Skip to content
Snippets Groups Projects
Commit 1733a1a4 authored by Goik Martin's avatar Goik Martin
Browse files

Better HTTP response handling, using Java streams

parent ba86f7bd
No related branches found
No related tags found
No related merge requests found
......@@ -1560,22 +1560,27 @@ public class DomXpath {
<programlisting language="xml"
xml:id="domCheckImageAccessibility">&lt;!DOCTYPE html&gt;
&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt;
&lt;head&gt; ... &lt;/head&gt;
&lt;html xmlns="http://www.w3.org/1999/xhtml"&gt;
&lt;head&gt;
&lt;title&gt;External Pictures&lt;/title&gt;
&lt;/head&gt;
&lt;body&gt;
&lt;h1&gt;External Pictures&lt;/h1&gt;
&lt;p&gt;A local image reference:&lt;img src="inline.gif" alt="none"/&gt;&lt;/p&gt;
&lt;p&gt;What about ftp?&lt;img src="ftp://inexistent.com/q.png" alt="none"/&gt;&lt;/p&gt;
&lt;table&gt;
&lt;tbody&gt;
&lt;tr&gt;
&lt;td&gt;An existing picture:&lt;/td&gt;
&lt;td&gt;&lt;img
src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
alt="none"/&gt;&lt;/td&gt;
&lt;td&gt;&lt;img
src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
alt="none"/&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;tr&gt;
&lt;td&gt;A non-existing picture:&lt;/td&gt;
&lt;td&gt;&lt;img src="<emphasis role="bold">http://www.hdm-stuttgart.de/rotfl.gif</emphasis>" alt="none"/&gt;&lt;/td&gt;
&lt;td&gt;&lt;img
src="http://www.hdm-stuttgart.de/rotfl.gif"
alt="none"/&gt;&lt;/td&gt;
&lt;/tr&gt;
&lt;/tbody&gt;
&lt;/table&gt;
......@@ -1586,51 +1591,40 @@ public class DomXpath {
<abbrev
xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>
image references to <emphasis>external</emphasis> Servers
starting either with <code>http://</code> ,
<code>https://</code> or <code>ftp://</code> ignoring other
protocol types. Internal image references referring to the
<quote>current</quote> server typically look like <code>&lt;img
src="/images/test.gif"</code>. So in order to distinguish these
two types of references we may use the XSL built in function
<link
starting either with <code>http://</code> or
<code>https://</code> ignoring other protocol types. Internal
image references referring to the <quote>current</quote> server
typically look like <code>&lt;img src="/images/test.gif"</code>.
So in order to distinguish these two types of references we may
use the XSL built in function <link
xlink:href="http://www.cafeconleche.org/books/bible2/chapters/ch17.html">starts-with()</link>
testing for the <code>http</code> or <code>ftp</code> protocol
testing for the <code>http</code> or <code>https</code> protocol
definition part of an <abbrev
xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>. A
possible output for the example being given is:</para>
<programlisting language="none">Received 'sun.awt.image.URLImageSource' from
http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif
Unable to open 'http://www.hdm-stuttgart.de/rotfl.gif'</programlisting>
<programlisting language="none">xpath.CheckUrl (CheckUrl.java:51) - Protocol 'ftp' not yet implemented
ftp://inexistent.com/q.png, HTTP Status: null
https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif, HTTP Status: OK
http://www.hdm-stuttgart.de/rotfl.gif, HTTP Status: Found</programlisting>
<para>The following code snippet shows a helpful class method to
check for both correctness of <abbrev
xlink:href="http://www.ietf.org/rfc/rfc1738.txt">URL</abbrev>'s
and accessibility of referenced objects:</para>
<para>Do not forget to provide unit tests at least for the image
network accessibility part.</para>
<programlisting language="java">package dom.xpath;
...
public class CheckUrl {
public static void checkReadability(final String urlRef) {
try {
final URL url = new URL(urlRef);
try {
final Object imgCandidate = url.getContent();
if (null == imgCandidate) {
System.err.println("Unable to open '" + urlRef + "'");
} else {
System.out.println("Received '"
+ imgCandidate.getClass().getName() + "' from "
+ urlRef);
}
} catch (IOException e) {
System.err.println("Unable to open '" + urlRef + "'");
}
} catch (MalformedURLException e) {
System.err.println("Adress '" + urlRef + "' is malformed");
}
}
}</programlisting>
<caution>
<para>Notice the response code for
<uri>http://www.hdm-stuttgart.de/rotfl.gif</uri>: This
resource is actually unavailable! But the web server tries to
recover by redirecting to an error page. Moreover a web server
may decide to return misleading response codes if deciding
your user agent is unable to handle the content type in
question. You may catch a glimpse of related problems by
reading <link
xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How
to check if a URL exists or returns 404 with Java?</link>. For
the current exercise we will not dig deeper into the
subject.</para>
</caution>
<tip>
<para>Using XPath expressions in conjunction with namespaces
......@@ -1649,6 +1643,15 @@ public class CheckUrl {
xlink:href="http://www.jdom.org/pipermail/jdom-interest/2012-May/016850.html">XPath
examples</link></para>
</listitem>
<listitem>
<para>For analyzing the accessibility of referenced <xref
linkend="glo_HTTP"/> / <acronym>HTTPS</acronym> resources
see the previously mentioned discussion in <link
xlink:href="http://stackoverflow.com/questions/1378199/how-to-check-if-a-url-exists-or-returns-404-with-java">How
to check if a URL exists or returns 404 with
Java?</link>.</para>
</listitem>
</itemizedlist>
</tip>
</question>
......@@ -1687,8 +1690,9 @@ public class CheckUrl {
<xref linkend="glo_XHTML"/> namespace. We thus create a
corresponding <classname
xlink:href="http://www.jdom.org/docs/apidocs/org/jdom2/Namespace.html">Namespace</classname>
instance along with the desired <code>xhtml</code>
prefix:</para>
instance handling the desired one to one correspondence between
the <code>"xhtml"</code> prefix and our target namespace
<uri>http://www.w3.org/1999/xhtml</uri>:</para>
<programlisting language="java">final Namespace htmlNamespace = // This will allow for XPath
Namespace.getNamespace("xhtml", // expressions like xhtml:img
......
<?xml version="1.0"?>
<!--
The pre- html-5 way:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
-->
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>External Pictures</title>
</head>
<body>
<h1>External Pictures</h1>
<p>A local image reference:<img src="inline.gif" alt="none"/></p>
<p>What about ftp?<img src="ftp://inexistent.com/q.png" alt="none"/></p>
<table>
<tbody>
<tr>
<td>An existing picture:</td>
<td><img
src="http://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
src="https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"
alt="none"/></td>
</tr>
<tr>
......
......@@ -33,5 +33,13 @@
<version>${jaxen.jaxen.version}</version>
</dependency>
<!-- HTTP/HTTPS response codes -->
<dependency>
<groupId>javax</groupId>
<artifactId>javaee-api</artifactId>
<version>7.0</version>
</dependency>
</dependencies>
</project>
package dom.xpath;
import java.io.IOException;
import java.util.List;
import org.jdom2.Document;
import org.jdom2.Element;
......@@ -28,18 +27,21 @@ public class CheckExtImage {
public void process(final String xhtmlFilename) throws JDOMException, IOException {
final Document htmlInput = builder.build(xhtmlFilename);
final Namespace htmlNamespace = // This will allow for XPath expressions like xhtml:img
Namespace.getNamespace("xhtml", "http://www.w3.org/1999/xhtml");
final XPathExpression<Element> xpath = XPathFactory.instance().compile (
"//xhtml:img[starts-with(@src, 'http://') or starts-with(@src, 'https://') or starts-with(@src, 'ftp://')]",
new ElementFilter(), null, htmlNamespace);
final List<Element> images = xpath.evaluate (htmlInput);
for (final Element image: images) {
CheckUrl.checkReadability(image.getAttributeValue("src"));
}
xpath.evaluate(htmlInput).stream().
map(img -> img.getAttributeValue("src")).
forEach(CheckExtImage::printResult);
}
static private void printResult(String url) {
System.out.println(url
+ ", HTTP Status: " + CheckUrl.checkReadability(url));
}
}
\ No newline at end of file
......@@ -12,6 +12,6 @@ public class CheckExtImageDriver {
*/
public static void main(String[] argv) throws Exception {
final CheckExtImage ao = new CheckExtImage();
ao.process("src/main/resources/fileextref.html");
ao.process("fileextref.html");
}
}
\ No newline at end of file
package dom.xpath;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import javax.ws.rs.core.Response;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/** Check Readability of a given URL
* @author $Author: goik $
* @version $Rev: 290 $ $Date: 2007-11-19 17:49:15 +0100 (Mo, 19. Nov 2007) $
......@@ -12,28 +19,41 @@ import java.net.URL;
*
*/
public class CheckUrl {
static private final Logger log = LogManager.getLogger(CheckUrl.class);
/** Check whether a given object like http://someserver.com/img.gif
* can be retrieved.
*
* @param urlRef
* The URL to be checked for read access
*
* @return An informative URL related status message or null e.g. if the protocol in question is unknown.
*/
public static void checkReadability(final String urlRef) {
public static Response.Status checkReadability(final String urlRef) {
try {
final URL url = new URL(urlRef);
try {
final Object imgCandidate = url.getContent();
if (null == imgCandidate) {
System.err.println("Unable to open '" + urlRef + "'");
} else {
System.out.println("Received '"
+ imgCandidate.getClass().getName() + "' from "
+ urlRef);
}
} catch (IOException e) {
System.err.println("Unable to open '" + urlRef + "'");
switch(url.getProtocol()) {
case "http":
case "https":
try {
final HttpURLConnection huc = (HttpURLConnection) url.openConnection();
huc.setRequestMethod("GET");
huc.connect();
return Response.Status.fromStatusCode(huc.getResponseCode());
} catch (final IOException e) {
log.error("Unable to connect to " + urlRef, e);
}
break;
default:
log.error("Protocol '" + url.getProtocol() + "' not yet implemented");
}
} catch (MalformedURLException e) {
System.err.println("Adress '" + urlRef + "' is malformed");
}
return null;
}
}
\ No newline at end of file
......@@ -11,10 +11,10 @@
<Loggers>
<!-- You my want to define class or package level per-logger rules -->
<Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="warn">
<Logger name="de.hdm_stuttgart.mi.sda1.alignimg.App" level="debug">
<AppenderRef ref="A1"/>
</Logger>
<Root level="warn">
<Root level="debug">
<AppenderRef ref="STDOUT"/>
</Root>
</Loggers>
......
package alignimg;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.ws.rs.core.Response;
import org.junit.Assert;
import org.junit.Test;
import org.xml.sax.SAXException;
import dom.xpath.CheckUrl;
/**
*
*/
public class TestImgSearch {
/**
*/
@Test
public void searchAlignImageElements() throws ParserConfigurationException,
SAXException, IOException {
public void findExistingImage() {
Assert.assertTrue(Response.Status.OK ==
CheckUrl.checkReadability(
"https://www.hdm-stuttgart.de/bilder_navigation/laptop.gif"));
}
/**
*/
@Test
public void findRedirectErrorPage() {
Assert.assertTrue(Response.Status.FOUND ==
CheckUrl.checkReadability(
"http://www.hdm-stuttgart.de/rotfl.gif"));
}
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment