Verify Broken Links
Hi All,
Now we will learn how to validate broken links on page using WebDriver and java http methods. There are so many open source free tools available in market. Like Xenu we can validate the same but if we have to validate multiple web pages like >300 then it will be very tiresome task to enter each page URL.
Approach:
First we will open the webpage using web driver and get all the links having <A> tag. For finding the link URL, we will get the “href” property value of all the links present in webpage. In this way we will store the all the links URL present on webpage. We have to exclude those links which having href property value like java script, hash(#) or mail to etc… because these links either referring to same page or refer to outlook mail like mail to link present in contact section.
Now we have the entire link’s URL, one way to open each URL using WebDriver but it is not advisable to use this approach because it will take too much time to perform. Instead of this we can use http method technique to test this. If my response code is 200 that means it is valid request else not. Note: Sometimes URL asks for certification acceptance, for handling this we are using SSL manager. For authentication URLs we can also use authentication methods
Below is the code where we getting the page URLs from excel sheet and validating the broken links. Results are saving in notepad. Please do the modifications as per your need.
##################################################################
##################################################################
import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.net.URLConnection; import java.security.cert.X509Certificate; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.firefox.FirefoxDriver; public class BrokenLinks { public static void main(String[] args) throws Exception { WebDriver driver = new FirefoxDriver(); String urlxpath = "//a"; List<WebElement> elements; Map<String, String> AllLinksURL = new HashMap<String, String>(); Map<Integer, String> m = new HashMap<Integer, String>(); try { // Open the Excel file FileInputStream fis = new FileInputStream( "D:\\url.xlsx"); // Access the required test data sheet XSSFWorkbook wb = new XSSFWorkbook(fis); XSSFSheet sheet = wb.getSheet("url"); // Loop through all rows in the sheet // Start at row 1 as row 0 is our header row File file = new File("Output.txt"); // creates the file file.createNewFile(); // creates a FileWriter Object FileWriter writer = new FileWriter(file); int i = 0; try { for (int count = 0; count <= sheet.getLastRowNum(); count++) { XSSFRow row = sheet.getRow(count); String page = row.getCell(0).toString(); m.put(i, page); i++; } } catch (Exception e) { System.out.println(e); } fis.close(); Collection<String> keyset = m.values(); System.out.println(keyset); try { for (String page : keyset) { driver.get(page); Thread.sleep(5000); elements = driver.findElements(By.xpath(urlxpath)); System.out.println(elements.size()); // System.out.println(elements); for (WebElement ele : elements) { try { String temp = ele.getAttribute("href"); // System.out.println(temp); if (temp != null) { if (!temp.contains("javascript") && (!temp.contains("tel://"))&& (!temp.contains("mailto:"))&& (!temp.equalsIgnoreCase("#"))) AllLinksURL.put(temp, temp); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } for (String urlVal : AllLinksURL.keySet()) { try { String res = check(urlVal); // System.out.println(res); if (!res.equals("OK")) { // Writes the content to the file writer.write(page + "\t" + urlVal + "\t" + res + "\n"); writer.flush(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } AllLinksURL.clear(); } } catch (Exception e) { System.out.println(e); } writer.close(); } catch (IOException e) { System.out.println(e); System.out.println("Url file not found"); } } public static String check(String urlString) throws Exception { String response = ""; TrustManager[] trustAllCerts = new TrustManager[] { new X509TrustManager() { public java.security.cert.X509Certificate[] getAcceptedIssuers() { return null; } public void checkClientTrusted(X509Certificate[] certs, String authType) { } public void checkServerTrusted(X509Certificate[] certs, String authType) { } } }; // Install the all-trusting trust manager SSLContext sc = SSLContext.getInstance("SSL"); sc.init(null, trustAllCerts, new java.security.SecureRandom()); HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); // Create all-trusting host name verifier HostnameVerifier allHostsValid = new HostnameVerifier() { public boolean verify(String hostname, SSLSession session) { return true; } }; // Install the all-trusting host verifier HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid); URL url = new URL(urlString); URLConnection con = url.openConnection(); HttpURLConnection connection = (HttpURLConnection) con; connection.connect(); response = connection.getResponseMessage(); System.out.println("Response : " + response); connection.disconnect(); return response; } }
########################################################################
########################################################################