From 1733a1a4aa6ad40446fe6f915577d1270b1e9802 Mon Sep 17 00:00:00 2001 From: Martin Goik <goik@hdm-stuttgart.de> Date: Wed, 25 Nov 2015 20:23:18 +0100 Subject: [PATCH] Better HTTP response handling, using Java streams --- Doc/Sda1/dom.xml | 96 ++++++++++--------- .../{src/main/resources => }/fileextref.html | 10 +- P/Sda1/VerifyImgAccess/pom.xml | 8 ++ .../main/java/dom/xpath/CheckExtImage.java | 18 ++-- .../java/dom/xpath/CheckExtImageDriver.java | 2 +- .../src/main/java/dom/xpath/CheckUrl.java | 44 ++++++--- .../src/main/resources/log4j2.xml | 4 +- .../src/test/java/alignimg/TestImgSearch.java | 29 ++++-- 8 files changed, 126 insertions(+), 85 deletions(-) rename P/Sda1/VerifyImgAccess/{src/main/resources => }/fileextref.html (71%) diff --git a/Doc/Sda1/dom.xml b/Doc/Sda1/dom.xml index b7822ef32..1769014ad 100644 --- a/Doc/Sda1/dom.xml +++ b/Doc/Sda1/dom.xml @@ -1560,22 +1560,27 @@ public class DomXpath { <programlisting language="xml" xml:id="domCheckImageAccessibility"><!DOCTYPE html> -<html xmlns="http://www.w3.org/1999/xhtml"> - <head> ... </head> +<html xmlns="http://www.w3.org/1999/xhtml"> + <head> + <title>External Pictures</title> + </head> <body> <h1>External Pictures</h1> <p>A local image reference:<img src="inline.gif" alt="none"/></p> + <p>What about ftp?<img src="ftp://inexistent.com/q.png" alt="none"/></p> <table> <tbody> <tr> <td>An existing picture:</td> - <td><img - src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif" - alt="none"/></td> + <td><img + src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif" + alt="none"/></td> </tr> <tr> <td>A non-existing picture:</td> - <td><img src="<emphasis role="bold">http://www.hdm-stuttgart.de/rotfl.gif</emphasis>" alt="none"/></td> + <td><img + src="http://www.hdm-stuttgart.de/rotfl.gif" + alt="none"/></td> </tr> </tbody> </table> @@ -1586,51 +1591,40 @@ public class DomXpath { <abbrev xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev> image references to <emphasis>external</emphasis> Servers - starting either with <code>http://</code> , - <code>https://</code> or <code>ftp://</code> ignoring other - protocol types. Internal image references referring to the - <quote>current</quote> server typically look like <code><img - src="/images/test.gif"</code>. So in order to distinguish these - two types of references we may use the XSL built in function - <link + starting either with <code>http://</code> or + <code>https://</code> ignoring other protocol types. Internal + image references referring to the <quote>current</quote> server + typically look like <code><img src="/images/test.gif"</code>. + So in order to distinguish these two types of references we may + use the XSL built in function <link xlink:href="http://www.cafeconleche.org/books/bible2/chapters/ch17.html">starts-with()</link> - testing for the <code>http</code> or <code>ftp</code> protocol + testing for the <code>http</code> or <code>https</code> protocol definition part of an <abbrev xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>. A possible output for the example being given is:</para> - <programlisting language="none">Received 'sun.awt.image.URLImageSource' from - http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif -Unable to open 'http://www.hdm-stuttgart.de/rotfl.gif'</programlisting> + <programlisting language="none">xpath.CheckUrl (CheckUrl.java:51) - Protocol 'ftp' not yet implemented +ftp://inexistent.com/q.png, HTTP Status: null +https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif, HTTP Status: OK +http://www.hdm-stuttgart.de/rotfl.gif, HTTP Status: Found</programlisting> - <para>The following code snippet shows a helpful class method to - check for both correctness of <abbrev - xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>'s - and accessibility of referenced objects:</para> + <para>Do not forget to provide unit tests at least for the image + network accessibility part.</para> - <programlisting language="java">package dom.xpath; -... -public class CheckUrl { - public static void checkReadability(final String urlRef) { - try { - final URL url = new URL(urlRef); - try { - final Object imgCandidate = url.getContent(); - if (null == imgCandidate) { - System.err.println("Unable to open '" + urlRef + "'"); - } else { - System.out.println("Received '" - + imgCandidate.getClass().getName() + "' from " - + urlRef); - } - } catch (IOException e) { - System.err.println("Unable to open '" + urlRef + "'"); - } - } catch (MalformedURLException e) { - System.err.println("Adress '" + urlRef + "' is malformed"); - } - } -}</programlisting> + <caution> + <para>Notice the response code for + <uri>http://www.hdm-stuttgart.de/rotfl.gif</uri>: This + resource is actually unavailable! But the web server tries to + recover by redirecting to an error page. Moreover a web server + may decide to return misleading response codes if deciding + your user agent is unable to handle the content type in + question. You may catch a glimpse of related problems by + reading <link + xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How + to check if a URL exists or returns 404 with Java?</link>. For + the current exercise we will not dig deeper into the + subject.</para> + </caution> <tip> <para>Using XPath expressions in conjunction with namespaces @@ -1649,6 +1643,15 @@ public class CheckUrl { xlink:href="http://www.jdom.org/pipermail/jdom-interest/2012-May/016850.html">XPath examples</link></para> </listitem> + + <listitem> + <para>For analyzing the accessibility of referenced <xref + linkend="glo_HTTP"/> / <acronym>HTTPS</acronym> resources + see the previously mentioned discussion in <link + xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How + to check if a URL exists or returns 404 with + Java?</link>.</para> + </listitem> </itemizedlist> </tip> </question> @@ -1687,8 +1690,9 @@ public class CheckUrl { <xref linkend="glo_XHTML"/> namespace. We thus create a corresponding <classname xlink:href="http://www.jdom.org/docs/apidocs/org/jdom2/Namespace.html">Namespace</classname> - instance along with the desired <code>xhtml</code> - prefix:</para> + instance handling the desired one to one correspondence between + the <code>"xhtml"</code> prefix and our target namespace + <uri>http://www.w3.org/1999/xhtml</uri>:</para> <programlisting language="java">final Namespace htmlNamespace = // This will allow for XPath Namespace.getNamespace("xhtml", // expressions like xhtml:img diff --git a/P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html b/P/Sda1/VerifyImgAccess/fileextref.html similarity index 71% rename from P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html rename to P/Sda1/VerifyImgAccess/fileextref.html index 4f8fdaf3a..be22b98c7 100644 --- a/P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html +++ b/P/Sda1/VerifyImgAccess/fileextref.html @@ -1,24 +1,18 @@ -<?xml version="1.0"?> -<!-- -The pre- html-5 way: -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" - "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> - --> <!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> - <head> <title>External Pictures</title> </head> <body> <h1>External Pictures</h1> <p>A local image reference:<img src="inline.gif" alt="none"/></p> + <p>What about ftp?<img src="ftp://inexistent.com/q.png" alt="none"/></p> <table> <tbody> <tr> <td>An existing picture:</td> <td><img - src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif" + src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif" alt="none"/></td> </tr> <tr> diff --git a/P/Sda1/VerifyImgAccess/pom.xml b/P/Sda1/VerifyImgAccess/pom.xml index 9c943b7b9..b40fd8179 100644 --- a/P/Sda1/VerifyImgAccess/pom.xml +++ b/P/Sda1/VerifyImgAccess/pom.xml @@ -33,5 +33,13 @@ <version>${jaxen.jaxen.version}</version> </dependency> + <!-- HTTP/HTTPS response codes --> + <dependency> + <groupId>javax</groupId> + <artifactId>javaee-api</artifactId> + <version>7.0</version> + </dependency> + + </dependencies> </project> diff --git a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java index 8c5e98cf0..1367135c6 100644 --- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java +++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java @@ -1,7 +1,6 @@ package dom.xpath; import java.io.IOException; -import java.util.List; import org.jdom2.Document; import org.jdom2.Element; @@ -28,18 +27,21 @@ public class CheckExtImage { public void process(final String xhtmlFilename) throws JDOMException, IOException { final Document htmlInput = builder.build(xhtmlFilename); - + final Namespace htmlNamespace = // This will allow for XPath expressions like xhtml:img Namespace.getNamespace("xhtml", "http://www.w3.org/1999/xhtml"); - + final XPathExpression<Element> xpath = XPathFactory.instance().compile ( "//xhtml:img[starts-with(@src, 'http://') or starts-with(@src, 'https://') or starts-with(@src, 'ftp://')]", new ElementFilter(), null, htmlNamespace); - - final List<Element> images = xpath.evaluate (htmlInput); - for (final Element image: images) { - CheckUrl.checkReadability(image.getAttributeValue("src")); - } + xpath.evaluate(htmlInput).stream(). + map(img -> img.getAttributeValue("src")). + forEach(CheckExtImage::printResult); } + + static private void printResult(String url) { + System.out.println(url + + ", HTTP Status: " + CheckUrl.checkReadability(url)); + } } \ No newline at end of file diff --git a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java index 5e7fc7a93..dc051be0b 100644 --- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java +++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java @@ -12,6 +12,6 @@ public class CheckExtImageDriver { */ public static void main(String[] argv) throws Exception { final CheckExtImage ao = new CheckExtImage(); - ao.process("src/main/resources/fileextref.html"); + ao.process("fileextref.html"); } } \ No newline at end of file diff --git a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java index 6fe0a4cc8..f82f1e3c6 100644 --- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java +++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java @@ -1,9 +1,16 @@ package dom.xpath; import java.io.IOException; +import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; +import javax.ws.rs.core.Response; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + + /** Check Readability of a given URL * @author $Author: goik $ * @version $Rev: 290 $ $Date: 2007-11-19 17:49:15 +0100 (Mo, 19. Nov 2007) $ @@ -12,28 +19,41 @@ import java.net.URL; * */ public class CheckUrl { + + static private final Logger log = LogManager.getLogger(CheckUrl.class); + /** Check whether a given object like http://someserver.com/img.gif * can be retrieved. + * * @param urlRef * The URL to be checked for read access + * + * @return An informative URL related status message or null e.g. if the protocol in question is unknown. */ - public static void checkReadability(final String urlRef) { + public static Response.Status checkReadability(final String urlRef) { try { final URL url = new URL(urlRef); - try { - final Object imgCandidate = url.getContent(); - if (null == imgCandidate) { - System.err.println("Unable to open '" + urlRef + "'"); - } else { - System.out.println("Received '" - + imgCandidate.getClass().getName() + "' from " - + urlRef); - } - } catch (IOException e) { - System.err.println("Unable to open '" + urlRef + "'"); + + switch(url.getProtocol()) { + + case "http": + case "https": + try { + final HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setRequestMethod("GET"); + huc.connect(); + return Response.Status.fromStatusCode(huc.getResponseCode()); + } catch (final IOException e) { + log.error("Unable to connect to " + urlRef, e); + } + break; + default: + log.error("Protocol '" + url.getProtocol() + "' not yet implemented"); } + } catch (MalformedURLException e) { System.err.println("Adress '" + urlRef + "' is malformed"); } + return null; } } \ No newline at end of file diff --git a/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml b/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml index 885670527..3cceed50d 100644 --- a/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml +++ b/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml @@ -11,10 +11,10 @@ <Loggers> <!-- You my want to define class or package level per-logger rules --> - <Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="warn"> + <Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="debug"> <AppenderRef ref="A1"/> </Logger> - <Root level="warn"> + <Root level="debug"> <AppenderRef ref="STDOUT"/> </Root> </Loggers> diff --git a/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java b/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java index f349eb2d9..03d2ecb1d 100644 --- a/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java +++ b/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java @@ -1,18 +1,31 @@ package alignimg; -import java.io.IOException; - -import javax.xml.parsers.ParserConfigurationException; +import javax.ws.rs.core.Response; +import org.junit.Assert; import org.junit.Test; -import org.xml.sax.SAXException; +import dom.xpath.CheckUrl; + +/** + * + */ public class TestImgSearch { + /** + */ @Test - public void searchAlignImageElements() throws ParserConfigurationException, - SAXException, IOException { - - + public void findExistingImage() { + Assert.assertTrue(Response.Status.OK == + CheckUrl.checkReadability( + "https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif")); + } + /** + */ + @Test + public void findRedirectErrorPage() { + Assert.assertTrue(Response.Status.FOUND == + CheckUrl.checkReadability( + "http://www.hdm-stuttgart.de/rotfl.gif")); } } \ No newline at end of file -- GitLab