Better HTTP response handling, using Java streams

1733a1a4 · Goik Martin · ba86f7bd · 1733a1a4 · 1733a1a4 · 1733a1a4
Commit 1733a1a4 authored 9 years ago by Goik Martin
--- a/Doc/Sda1/dom.xml
+++ b/Doc/Sda1/dom.xml
@@ -1560,22 +1560,27 @@ public class DomXpath {

              <programlisting language="xml"
                              xml:id="domCheckImageAccessibility">&lt;!DOCTYPE html&gt;
-&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt; 
-  &lt;head&gt; ... &lt;/head&gt;
+&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt;
+  &lt;head&gt;
+    &lt;title&gt;External Pictures&lt;/title&gt;
+  &lt;/head&gt;
  &lt;body&gt;
    &lt;h1&gt;External Pictures&lt;/h1&gt;
    &lt;p&gt;A local image reference:&lt;img src="inline.gif" alt="none"/&gt;&lt;/p&gt;
+    &lt;p&gt;What about ftp?&lt;img src="ftp://inexistent.com/q.png" alt="none"/&gt;&lt;/p&gt;
    &lt;table&gt;
      &lt;tbody&gt;
        &lt;tr&gt;
          &lt;td&gt;An existing picture:&lt;/td&gt;
-          &lt;td&gt;&lt;img
-            src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
-            alt="none"/&gt;&lt;/td&gt;
+          &lt;td&gt;&lt;img 
+             src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
+             alt="none"/&gt;&lt;/td&gt;
        &lt;/tr&gt;
        &lt;tr&gt;
          &lt;td&gt;A non-existing picture:&lt;/td&gt;
-          &lt;td&gt;&lt;img src="<emphasis role="bold">http://www.hdm-stuttgart.de/rotfl.gif</emphasis>" alt="none"/&gt;&lt;/td&gt;
+          &lt;td&gt;&lt;img
+              src="http://www.hdm-stuttgart.de/rotfl.gif"
+              alt="none"/&gt;&lt;/td&gt;
        &lt;/tr&gt;
      &lt;/tbody&gt;
    &lt;/table&gt;
@@ -1586,51 +1591,40 @@ public class DomXpath {
              <abbrev
              xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>
              image references to <emphasis>external</emphasis> Servers
-              starting either with <code>http://</code> ,
-              <code>https://</code> or <code>ftp://</code> ignoring other
-              protocol types. Internal image references referring to the
-              <quote>current</quote> server typically look like <code>&lt;img
-              src="/images/test.gif"</code>. So in order to distinguish these
-              two types of references we may use the XSL built in function
-              <link
+              starting either with <code>http://</code> or
+              <code>https://</code> ignoring other protocol types. Internal
+              image references referring to the <quote>current</quote> server
+              typically look like <code>&lt;img src="/images/test.gif"</code>.
+              So in order to distinguish these two types of references we may
+              use the XSL built in function <link
              xlink:href="http://www.cafeconleche.org/books/bible2/chapters/ch17.html">starts-with()</link>
-              testing for the <code>http</code> or <code>ftp</code> protocol
+              testing for the <code>http</code> or <code>https</code> protocol
              definition part of an <abbrev
              xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>. A
              possible output for the example being given is:</para>

-              <programlisting language="none">Received 'sun.awt.image.URLImageSource' from
-                    http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif
-Unable to open 'http://www.hdm-stuttgart.de/rotfl.gif'</programlisting>
+              <programlisting language="none">xpath.CheckUrl (CheckUrl.java:51) - Protocol 'ftp' not yet implemented
+ftp://inexistent.com/q.png, HTTP Status: null
+https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif, HTTP Status: OK
+http://www.hdm-stuttgart.de/rotfl.gif, HTTP Status: Found</programlisting>

-              <para>The following code snippet shows a helpful class method to
-              check for both correctness of <abbrev
-              xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>'s
-              and accessibility of referenced objects:</para>
+              <para>Do not forget to provide unit tests at least for the image
+              network accessibility part.</para>

-              <programlisting language="java">package dom.xpath;
-...
-public class CheckUrl {
-  public static void checkReadability(final String urlRef) {
-    try {
-      final URL url = new URL(urlRef);
-      try {
-        final Object imgCandidate = url.getContent();
-        if (null == imgCandidate) {
-          System.err.println("Unable to open '" + urlRef + "'");
-        } else {
-          System.out.println("Received '"
-              + imgCandidate.getClass().getName() + "' from "
-              + urlRef);
-        }
-      } catch (IOException e) {
-        System.err.println("Unable to open '" + urlRef + "'");
-      }
-    } catch (MalformedURLException e) {
-      System.err.println("Adress '" + urlRef + "' is malformed");
-    }
-  }
-}</programlisting>
+              <caution>
+                <para>Notice the response code for
+                <uri>http://www.hdm-stuttgart.de/rotfl.gif</uri>: This
+                resource is actually unavailable! But the web server tries to
+                recover by redirecting to an error page. Moreover a web server
+                may decide to return misleading response codes if deciding
+                your user agent is unable to handle the content type in
+                question. You may catch a glimpse of related problems by
+                reading <link
+                xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How
+                to check if a URL exists or returns 404 with Java?</link>. For
+                the current exercise we will not dig deeper into the
+                subject.</para>
+              </caution>

              <tip>
                <para>Using XPath expressions in conjunction with namespaces
@@ -1649,6 +1643,15 @@ public class CheckUrl {
                    xlink:href="http://www.jdom.org/pipermail/jdom-interest/2012-May/016850.html">XPath
                    examples</link></para>
                  </listitem>
+
+                  <listitem>
+                    <para>For analyzing the accessibility of referenced <xref
+                    linkend="glo_HTTP"/> / <acronym>HTTPS</acronym> resources
+                    see the previously mentioned discussion in <link
+                    xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How
+                    to check if a URL exists or returns 404 with
+                    Java?</link>.</para>
+                  </listitem>
                </itemizedlist>
              </tip>
            </question>
@@ -1687,8 +1690,9 @@ public class CheckUrl {
              <xref linkend="glo_XHTML"/> namespace. We thus create a
              corresponding <classname
              xlink:href="http://www.jdom.org/docs/apidocs/org/jdom2/Namespace.html">Namespace</classname>
-              instance along with the desired <code>xhtml</code>
-              prefix:</para>
+              instance handling the desired one to one correspondence between
+              the <code>"xhtml"</code> prefix and our target namespace
+              <uri>http://www.w3.org/1999/xhtml</uri>:</para>

              <programlisting language="java">final Namespace htmlNamespace =     // This will allow for XPath 
   Namespace.getNamespace("xhtml", // expressions like xhtml:img

--- a/P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html
+++ b/P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html
-<?xml version="1.0"?>
-<!-- 
-The pre- html-5 way:
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-     -->
 <!DOCTYPE html>
 <html xmlns="http://www.w3.org/1999/xhtml">
-  
  <head>
    <title>External Pictures</title>
  </head>
  <body>
    <h1>External Pictures</h1>
    <p>A local image reference:<img src="inline.gif" alt="none"/></p>
+    <p>What about ftp?<img src="ftp://inexistent.com/q.png" alt="none"/></p>
    <table>
      <tbody>
        <tr>
          <td>An existing picture:</td>
          <td><img 
-       src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
+       src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
       alt="none"/></td>
        </tr>
        <tr>

--- a/P/Sda1/VerifyImgAccess/pom.xml
+++ b/P/Sda1/VerifyImgAccess/pom.xml
@@ -33,5 +33,13 @@
 			<version>${jaxen.jaxen.version}</version>
 		</dependency>

+		<!-- HTTP/HTTPS response codes -->
+		<dependency>
+			<groupId>javax</groupId>
+			<artifactId>javaee-api</artifactId>
+			<version>7.0</version>
+		</dependency>
+
+
 	</dependencies>
 </project>
--- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java
+++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java
 package dom.xpath;

 import java.io.IOException;
-import java.util.List;

 import org.jdom2.Document;
 import org.jdom2.Element;
@@ -28,18 +27,21 @@ public class CheckExtImage {
   public void process(final String xhtmlFilename) throws JDOMException, IOException {

      final Document htmlInput = builder.build(xhtmlFilename);  
-      
+
      final Namespace htmlNamespace = // This will allow for XPath expressions like xhtml:img
            Namespace.getNamespace("xhtml", "http://www.w3.org/1999/xhtml");
-      
+
      final XPathExpression<Element> xpath = XPathFactory.instance().compile (
            "//xhtml:img[starts-with(@src, 'http://') or starts-with(@src, 'https://') or starts-with(@src, 'ftp://')]",
            new ElementFilter(), null, htmlNamespace);
-      
-      final List<Element> images = xpath.evaluate (htmlInput);

-      for (final Element image: images) {
-         CheckUrl.checkReadability(image.getAttributeValue("src"));
-      }
+      xpath.evaluate(htmlInput).stream().
+         map(img -> img.getAttributeValue("src")).
+         forEach(CheckExtImage::printResult);
   }
+   
+   static private void printResult(String url) {
+      System.out.println(url
+            + ", HTTP Status: " + CheckUrl.checkReadability(url));
+   }  
 }
\ No newline at end of file
--- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java
+++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java
@@ -12,6 +12,6 @@ public class CheckExtImageDriver {
   */
  public static void main(String[] argv) throws Exception {
    final CheckExtImage ao = new CheckExtImage();
-    ao.process("src/main/resources/fileextref.html");
+    ao.process("fileextref.html");
  }
 }
\ No newline at end of file
--- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java
+++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java
 package dom.xpath;

 import java.io.IOException;
+import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;

+import javax.ws.rs.core.Response;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+
 /** Check Readability of a given URL
 * @author $Author: goik $
 * @version $Rev: 290 $ $Date: 2007-11-19 17:49:15 +0100 (Mo, 19. Nov 2007) $
@@ -12,28 +19,41 @@ import java.net.URL;
 *
 */
 public class CheckUrl {
+   
+   static private final Logger log = LogManager.getLogger(CheckUrl.class);
+   
  /** Check whether a given object like http://someserver.com/img.gif
   *  can be retrieved.
+   *  
   * @param urlRef
   *  The URL to be checked for read access
+   *  
+   * @return An informative URL related status message or null e.g. if the protocol in question is unknown.
   */
-  public static void checkReadability(final String urlRef) {
+  public static Response.Status checkReadability(final String urlRef) {
    try {
      final URL url = new URL(urlRef);
-      try {
-        final Object imgCandidate = url.getContent();
-        if (null == imgCandidate) {
-          System.err.println("Unable to open '" + urlRef + "'");
-        } else {
-          System.out.println("Received '"
-              + imgCandidate.getClass().getName() + "' from "
-              + urlRef);
-        }
-      } catch (IOException e) {
-        System.err.println("Unable to open '" + urlRef + "'");
+      
+      switch(url.getProtocol()) {
+      
+      case "http":
+      case "https":
+         try {
+            final HttpURLConnection huc =  (HttpURLConnection) url.openConnection();
+            huc.setRequestMethod("GET"); 
+            huc.connect(); 
+            return Response.Status.fromStatusCode(huc.getResponseCode());            
+         } catch (final IOException e) {
+            log.error("Unable to connect to " + urlRef, e);
+         }
+         break;        
+         default:
+            log.error("Protocol '" + url.getProtocol() + "' not yet implemented");
      }
+      
    } catch (MalformedURLException e) {
      System.err.println("Adress '" + urlRef + "' is malformed");
    }
+    return null;
  }
 }
\ No newline at end of file
--- a/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml
+++ b/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml
@@ -11,10 +11,10 @@
    <Loggers>

        <!-- You my want to define class or package level per-logger rules -->
-        <Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="warn">
+        <Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="debug">
            <AppenderRef ref="A1"/>
        </Logger>
-        <Root level="warn">
+        <Root level="debug">
            <AppenderRef ref="STDOUT"/>
        </Root>
    </Loggers>

--- a/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java
+++ b/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java
 package alignimg;

-import java.io.IOException;
-
-import javax.xml.parsers.ParserConfigurationException;
+import javax.ws.rs.core.Response;

+import org.junit.Assert;
 import org.junit.Test;
-import org.xml.sax.SAXException;

+import dom.xpath.CheckUrl;
+
+/**
+ *
+ */
 public class TestImgSearch {

+   /**
+    */
   @Test
-   public void searchAlignImageElements() throws ParserConfigurationException,
-         SAXException, IOException {
-
-      
+   public void findExistingImage() {
+     Assert.assertTrue(Response.Status.OK == 
+           CheckUrl.checkReadability(
+                 "https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"));
+   }
+   /**
+    */
+   @Test
+   public void findRedirectErrorPage() {
+     Assert.assertTrue(Response.Status.FOUND == 
+           CheckUrl.checkReadability(
+                 "http://www.hdm-stuttgart.de/rotfl.gif"));
   }
 }
\ No newline at end of file