From 1733a1a4aa6ad40446fe6f915577d1270b1e9802 Mon Sep 17 00:00:00 2001
From: Martin Goik <goik@hdm-stuttgart.de>
Date: Wed, 25 Nov 2015 20:23:18 +0100
Subject: [PATCH] Better HTTP response handling, using Java streams

---
 Doc/Sda1/dom.xml                              | 96 ++++++++++---------
 .../{src/main/resources => }/fileextref.html  | 10 +-
 P/Sda1/VerifyImgAccess/pom.xml                |  8 ++
 .../main/java/dom/xpath/CheckExtImage.java    | 18 ++--
 .../java/dom/xpath/CheckExtImageDriver.java   |  2 +-
 .../src/main/java/dom/xpath/CheckUrl.java     | 44 ++++++---
 .../src/main/resources/log4j2.xml             |  4 +-
 .../src/test/java/alignimg/TestImgSearch.java | 29 ++++--
 8 files changed, 126 insertions(+), 85 deletions(-)
 rename P/Sda1/VerifyImgAccess/{src/main/resources => }/fileextref.html (71%)

diff --git a/Doc/Sda1/dom.xml b/Doc/Sda1/dom.xml
index b7822ef32..1769014ad 100644
--- a/Doc/Sda1/dom.xml
+++ b/Doc/Sda1/dom.xml
@@ -1560,22 +1560,27 @@ public class DomXpath {
 
               <programlisting language="xml"
                               xml:id="domCheckImageAccessibility">&lt;!DOCTYPE html&gt;
-&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt; 
-  &lt;head&gt; ... &lt;/head&gt;
+&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt;
+  &lt;head&gt;
+    &lt;title&gt;External Pictures&lt;/title&gt;
+  &lt;/head&gt;
   &lt;body&gt;
     &lt;h1&gt;External Pictures&lt;/h1&gt;
     &lt;p&gt;A local image reference:&lt;img src="inline.gif" alt="none"/&gt;&lt;/p&gt;
+    &lt;p&gt;What about ftp?&lt;img src="ftp://inexistent.com/q.png" alt="none"/&gt;&lt;/p&gt;
     &lt;table&gt;
       &lt;tbody&gt;
         &lt;tr&gt;
           &lt;td&gt;An existing picture:&lt;/td&gt;
-          &lt;td&gt;&lt;img
-            src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
-            alt="none"/&gt;&lt;/td&gt;
+          &lt;td&gt;&lt;img 
+             src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
+             alt="none"/&gt;&lt;/td&gt;
         &lt;/tr&gt;
         &lt;tr&gt;
           &lt;td&gt;A non-existing picture:&lt;/td&gt;
-          &lt;td&gt;&lt;img src="<emphasis role="bold">http://www.hdm-stuttgart.de/rotfl.gif</emphasis>" alt="none"/&gt;&lt;/td&gt;
+          &lt;td&gt;&lt;img
+              src="http://www.hdm-stuttgart.de/rotfl.gif"
+              alt="none"/&gt;&lt;/td&gt;
         &lt;/tr&gt;
       &lt;/tbody&gt;
     &lt;/table&gt;
@@ -1586,51 +1591,40 @@ public class DomXpath {
               <abbrev
               xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>
               image references to <emphasis>external</emphasis> Servers
-              starting either with <code>http://</code> ,
-              <code>https://</code> or <code>ftp://</code> ignoring other
-              protocol types. Internal image references referring to the
-              <quote>current</quote> server typically look like <code>&lt;img
-              src="/images/test.gif"</code>. So in order to distinguish these
-              two types of references we may use the XSL built in function
-              <link
+              starting either with <code>http://</code> or
+              <code>https://</code> ignoring other protocol types. Internal
+              image references referring to the <quote>current</quote> server
+              typically look like <code>&lt;img src="/images/test.gif"</code>.
+              So in order to distinguish these two types of references we may
+              use the XSL built in function <link
               xlink:href="http://www.cafeconleche.org/books/bible2/chapters/ch17.html">starts-with()</link>
-              testing for the <code>http</code> or <code>ftp</code> protocol
+              testing for the <code>http</code> or <code>https</code> protocol
               definition part of an <abbrev
               xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>. A
               possible output for the example being given is:</para>
 
-              <programlisting language="none">Received 'sun.awt.image.URLImageSource' from
-                    http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif
-Unable to open 'http://www.hdm-stuttgart.de/rotfl.gif'</programlisting>
+              <programlisting language="none">xpath.CheckUrl (CheckUrl.java:51) - Protocol 'ftp' not yet implemented
+ftp://inexistent.com/q.png, HTTP Status: null
+https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif, HTTP Status: OK
+http://www.hdm-stuttgart.de/rotfl.gif, HTTP Status: Found</programlisting>
 
-              <para>The following code snippet shows a helpful class method to
-              check for both correctness of <abbrev
-              xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>'s
-              and accessibility of referenced objects:</para>
+              <para>Do not forget to provide unit tests at least for the image
+              network accessibility part.</para>
 
-              <programlisting language="java">package dom.xpath;
-...
-public class CheckUrl {
-  public static void checkReadability(final String urlRef) {
-    try {
-      final URL url = new URL(urlRef);
-      try {
-        final Object imgCandidate = url.getContent();
-        if (null == imgCandidate) {
-          System.err.println("Unable to open '" + urlRef + "'");
-        } else {
-          System.out.println("Received '"
-              + imgCandidate.getClass().getName() + "' from "
-              + urlRef);
-        }
-      } catch (IOException e) {
-        System.err.println("Unable to open '" + urlRef + "'");
-      }
-    } catch (MalformedURLException e) {
-      System.err.println("Adress '" + urlRef + "' is malformed");
-    }
-  }
-}</programlisting>
+              <caution>
+                <para>Notice the response code for
+                <uri>http://www.hdm-stuttgart.de/rotfl.gif</uri>: This
+                resource is actually unavailable! But the web server tries to
+                recover by redirecting to an error page. Moreover a web server
+                may decide to return misleading response codes if deciding
+                your user agent is unable to handle the content type in
+                question. You may catch a glimpse of related problems by
+                reading <link
+                xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How
+                to check if a URL exists or returns 404 with Java?</link>. For
+                the current exercise we will not dig deeper into the
+                subject.</para>
+              </caution>
 
               <tip>
                 <para>Using XPath expressions in conjunction with namespaces
@@ -1649,6 +1643,15 @@ public class CheckUrl {
                     xlink:href="http://www.jdom.org/pipermail/jdom-interest/2012-May/016850.html">XPath
                     examples</link></para>
                   </listitem>
+
+                  <listitem>
+                    <para>For analyzing the accessibility of referenced <xref
+                    linkend="glo_HTTP"/> / <acronym>HTTPS</acronym> resources
+                    see the previously mentioned discussion in <link
+                    xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How
+                    to check if a URL exists or returns 404 with
+                    Java?</link>.</para>
+                  </listitem>
                 </itemizedlist>
               </tip>
             </question>
@@ -1687,8 +1690,9 @@ public class CheckUrl {
               <xref linkend="glo_XHTML"/> namespace. We thus create a
               corresponding <classname
               xlink:href="http://www.jdom.org/docs/apidocs/org/jdom2/Namespace.html">Namespace</classname>
-              instance along with the desired <code>xhtml</code>
-              prefix:</para>
+              instance handling the desired one to one correspondence between
+              the <code>"xhtml"</code> prefix and our target namespace
+              <uri>http://www.w3.org/1999/xhtml</uri>:</para>
 
               <programlisting language="java">final Namespace htmlNamespace =     // This will allow for XPath 
    Namespace.getNamespace("xhtml", // expressions like xhtml:img
diff --git a/P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html b/P/Sda1/VerifyImgAccess/fileextref.html
similarity index 71%
rename from P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html
rename to P/Sda1/VerifyImgAccess/fileextref.html
index 4f8fdaf3a..be22b98c7 100644
--- a/P/Sda1/VerifyImgAccess/src/main/resources/fileextref.html
+++ b/P/Sda1/VerifyImgAccess/fileextref.html
@@ -1,24 +1,18 @@
-<?xml version="1.0"?>
-<!-- 
-The pre- html-5 way:
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
-  "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-     -->
 <!DOCTYPE html>
 <html xmlns="http://www.w3.org/1999/xhtml">
-  
   <head>
     <title>External Pictures</title>
   </head>
   <body>
     <h1>External Pictures</h1>
     <p>A local image reference:<img src="inline.gif" alt="none"/></p>
+    <p>What about ftp?<img src="ftp://inexistent.com/q.png" alt="none"/></p>
     <table>
       <tbody>
         <tr>
           <td>An existing picture:</td>
           <td><img 
-       src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
+       src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
        alt="none"/></td>
         </tr>
         <tr>
diff --git a/P/Sda1/VerifyImgAccess/pom.xml b/P/Sda1/VerifyImgAccess/pom.xml
index 9c943b7b9..b40fd8179 100644
--- a/P/Sda1/VerifyImgAccess/pom.xml
+++ b/P/Sda1/VerifyImgAccess/pom.xml
@@ -33,5 +33,13 @@
 			<version>${jaxen.jaxen.version}</version>
 		</dependency>
 
+		<!-- HTTP/HTTPS response codes -->
+		<dependency>
+			<groupId>javax</groupId>
+			<artifactId>javaee-api</artifactId>
+			<version>7.0</version>
+		</dependency>
+
+
 	</dependencies>
 </project>
diff --git a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java
index 8c5e98cf0..1367135c6 100644
--- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java
+++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImage.java
@@ -1,7 +1,6 @@
 package dom.xpath;
 
 import java.io.IOException;
-import java.util.List;
 
 import org.jdom2.Document;
 import org.jdom2.Element;
@@ -28,18 +27,21 @@ public class CheckExtImage {
    public void process(final String xhtmlFilename) throws JDOMException, IOException {
 
       final Document htmlInput = builder.build(xhtmlFilename);  
-      
+
       final Namespace htmlNamespace = // This will allow for XPath expressions like xhtml:img
             Namespace.getNamespace("xhtml", "http://www.w3.org/1999/xhtml");
-      
+
       final XPathExpression<Element> xpath = XPathFactory.instance().compile (
             "//xhtml:img[starts-with(@src, 'http://') or starts-with(@src, 'https://') or starts-with(@src, 'ftp://')]",
             new ElementFilter(), null, htmlNamespace);
-      
-      final List<Element> images = xpath.evaluate (htmlInput);
 
-      for (final Element image: images) {
-         CheckUrl.checkReadability(image.getAttributeValue("src"));
-      }
+      xpath.evaluate(htmlInput).stream().
+         map(img -> img.getAttributeValue("src")).
+         forEach(CheckExtImage::printResult);
    }
+   
+   static private void printResult(String url) {
+      System.out.println(url
+            + ", HTTP Status: " + CheckUrl.checkReadability(url));
+   }  
 }
\ No newline at end of file
diff --git a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java
index 5e7fc7a93..dc051be0b 100644
--- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java
+++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckExtImageDriver.java
@@ -12,6 +12,6 @@ public class CheckExtImageDriver {
    */
   public static void main(String[] argv) throws Exception {
     final CheckExtImage ao = new CheckExtImage();
-    ao.process("src/main/resources/fileextref.html");
+    ao.process("fileextref.html");
   }
 }
\ No newline at end of file
diff --git a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java
index 6fe0a4cc8..f82f1e3c6 100644
--- a/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java
+++ b/P/Sda1/VerifyImgAccess/src/main/java/dom/xpath/CheckUrl.java
@@ -1,9 +1,16 @@
 package dom.xpath;
 
 import java.io.IOException;
+import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
 
+import javax.ws.rs.core.Response;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+
 /** Check Readability of a given URL
  * @author $Author: goik $
  * @version $Rev: 290 $ $Date: 2007-11-19 17:49:15 +0100 (Mo, 19. Nov 2007) $
@@ -12,28 +19,41 @@ import java.net.URL;
  *
  */
 public class CheckUrl {
+   
+   static private final Logger log = LogManager.getLogger(CheckUrl.class);
+   
   /** Check whether a given object like http://someserver.com/img.gif
    *  can be retrieved.
+   *  
    * @param urlRef
    *  The URL to be checked for read access
+   *  
+   * @return An informative URL related status message or null e.g. if the protocol in question is unknown.
    */
-  public static void checkReadability(final String urlRef) {
+  public static Response.Status checkReadability(final String urlRef) {
     try {
       final URL url = new URL(urlRef);
-      try {
-        final Object imgCandidate = url.getContent();
-        if (null == imgCandidate) {
-          System.err.println("Unable to open '" + urlRef + "'");
-        } else {
-          System.out.println("Received '"
-              + imgCandidate.getClass().getName() + "' from "
-              + urlRef);
-        }
-      } catch (IOException e) {
-        System.err.println("Unable to open '" + urlRef + "'");
+      
+      switch(url.getProtocol()) {
+      
+      case "http":
+      case "https":
+         try {
+            final HttpURLConnection huc =  (HttpURLConnection) url.openConnection();
+            huc.setRequestMethod("GET"); 
+            huc.connect(); 
+            return Response.Status.fromStatusCode(huc.getResponseCode());            
+         } catch (final IOException e) {
+            log.error("Unable to connect to " + urlRef, e);
+         }
+         break;        
+         default:
+            log.error("Protocol '" + url.getProtocol() + "' not yet implemented");
       }
+      
     } catch (MalformedURLException e) {
       System.err.println("Adress '" + urlRef + "' is malformed");
     }
+    return null;
   }
 }
\ No newline at end of file
diff --git a/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml b/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml
index 885670527..3cceed50d 100644
--- a/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml
+++ b/P/Sda1/VerifyImgAccess/src/main/resources/log4j2.xml
@@ -11,10 +11,10 @@
     <Loggers>
 
         <!-- You my want to define class or package level per-logger rules -->
-        <Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="warn">
+        <Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="debug">
             <AppenderRef ref="A1"/>
         </Logger>
-        <Root level="warn">
+        <Root level="debug">
             <AppenderRef ref="STDOUT"/>
         </Root>
     </Loggers>
diff --git a/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java b/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java
index f349eb2d9..03d2ecb1d 100644
--- a/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java
+++ b/P/Sda1/VerifyImgAccess/src/test/java/alignimg/TestImgSearch.java
@@ -1,18 +1,31 @@
 package alignimg;
 
-import java.io.IOException;
-
-import javax.xml.parsers.ParserConfigurationException;
+import javax.ws.rs.core.Response;
 
+import org.junit.Assert;
 import org.junit.Test;
-import org.xml.sax.SAXException;
 
+import dom.xpath.CheckUrl;
+
+/**
+ *
+ */
 public class TestImgSearch {
 
+   /**
+    */
    @Test
-   public void searchAlignImageElements() throws ParserConfigurationException,
-         SAXException, IOException {
-
-      
+   public void findExistingImage() {
+     Assert.assertTrue(Response.Status.OK == 
+           CheckUrl.checkReadability(
+                 "https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"));
+   }
+   /**
+    */
+   @Test
+   public void findRedirectErrorPage() {
+     Assert.assertTrue(Response.Status.FOUND == 
+           CheckUrl.checkReadability(
+                 "http://www.hdm-stuttgart.de/rotfl.gif"));
    }
 }
\ No newline at end of file
-- 
GitLab