From 2f99a6b86bf74ff8b618b9b429c0102ffddbbe12 Mon Sep 17 00:00:00 2001 From: Martin Goik <goik@hdm-stuttgart.de> Date: Sun, 21 Apr 2013 13:59:22 +0200 Subject: [PATCH] Exercise converting XML data to SQL INSERTs by SAX --- Doc/course.xml | 134 +++++++++++++++++- .../main/java/sax/context/Catalog2Sql.java | 40 ++++++ .../java/sax/context/Catalog2SqlDriver.java | 11 ++ .../main/java/sax/context/ImportHandler.java | 95 +++++++++++++ .../src/main/java/sax/context/products.xml | 23 +++ .../Jdbc/src/main/java/sax/context/schema.sql | 23 +++ 6 files changed, 325 insertions(+), 1 deletion(-) create mode 100644 ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2Sql.java create mode 100644 ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2SqlDriver.java create mode 100644 ws/eclipse/Jdbc/src/main/java/sax/context/ImportHandler.java create mode 100644 ws/eclipse/Jdbc/src/main/java/sax/context/products.xml create mode 100644 ws/eclipse/Jdbc/src/main/java/sax/context/schema.sql diff --git a/Doc/course.xml b/Doc/course.xml index b421a9dcb..73bdae33d 100644 --- a/Doc/course.xml +++ b/Doc/course.xml @@ -15433,6 +15433,120 @@ controlled for these purposes by the company.</para> </section> </chapter> + <chapter xml:id="extraExercises"> + <title>Additional exercises</title> + + <para>Exercises appearing here have been added during the current + lecture period. They do appear here in order not to interfere with + currently existing exercises and their corresponding numbers.</para> + + <qandaset role="exercise"> + <qandadiv> + <qandaentry> + <question> + <para>Consider the following DTD + document instance + example:</para> + + <figure xml:id="catalogProductDescriptionsExample"> + <title>A sample catalog containing products and corresponding + descriptions.</title> + + <programlisting><!DOCTYPE catalog [ + <!ELEMENT catalog (product*)> + <!ELEMENT product (name, description*, age?)> + <!ATTLIST product id ID #REQUIRED> + <!ELEMENT name (#PCDATA)> + <!ELEMENT description (#PCDATA)> + <!ELEMENT age (#PCDATA)> +]> +<catalog> + <product id="mpt"> + <name>Monkey Picked Tea</name> + <description>Rare wild Chinese tea</description> + <description>Picked only by specially trained monkeys</description> + </product> + <product id="instantTent"> + <name>4-Person Instant Tent</name> + <description>4-person, 1-room tent</description> + <description>Pre-attached tent poles</description> + <description>Exclusive WeatherTec system.</description> + <age>15</age> + </product> +</catalog></programlisting> + </figure> + + <para>Data being contained in catalog instances shall be + transferred to a relational database system. Implement and test + a <link linkend="gloss_SAX"><abbrev>SAX</abbrev></link> + application by following the subsequently described + steps:</para> + + <glosslist> + <glossentry> + <glossterm>Database schema</glossterm> + + <glossdef> + <para>Create a database schema matching a product of your + choice (<productname>Mysql</productname>, + <productname>Oracle</productname>, ...). Your schema + should map type and integrity constraints of the given + DTD. In particular:</para> + + <itemizedlist> + <listitem> + <para>The element <tag class="starttag">age</tag> is + optional and always expected to be of integer + type.</para> + </listitem> + + <listitem> + <para>description elements are being composed in + <product> elements and should thus be modeled as + a 1:n relation. Thus you need a suitable foreign + key.</para> + </listitem> + </itemizedlist> + </glossdef> + </glossentry> + + <glossentry> + <glossterm>SAX Application</glossterm> + + <glossdef> + <para>The order of appearance of the XML elements <tag + class="starttag">product</tag>, <tag + class="starttag">name</tag> and <tag + class="starttag">age</tag> does not permit a linear + generation of suitable SQL <code>INSERT</code> statements + by a <link linkend="gloss_SAX"><abbrev>SAX</abbrev></link> + content handler. Instead you will have to keep copies of + local element values when implementing + <methodname>org.xml.sax.ContentHandler.startElement(String,String,String,org.xml.sax.Attributes)</methodname> + and related callback methods. The following sequence of + insert statements corresponds to the XML data being + contained in <xref + linkend="catalogProductDescriptionsExample"/>. You may use + these statements as a blueprint to be generated by your + <link linkend="gloss_SAX"><abbrev>SAX</abbrev></link> + application:</para> + + <programlisting><emphasis role="bold">INSERT INTO Product VALUES ('mpt', 'Monkey Picked Tea', NULL);</emphasis> +INSERT INTO Description VALUES('mpt', 'Picked only by specially trained monkeys'); +INSERT INTO Description VALUES('mpt', 'Rare wild Chinese tea'); + +<emphasis role="bold">INSERT INTO Product VALUES ('instantTent', '4-Person Instant Tent', 15);</emphasis> +INSERT INTO Description VALUES('instantTent', 'Exclusive WeatherTec system.'); +INSERT INTO Description VALUES('instantTent', '4-person, 1-room tent'); +INSERT INTO Description VALUES('instantTent', 'Pre-attached tent poles');</programlisting> + </glossdef> + </glossentry> + </glosslist> + </question> + </qandaentry> + </qandadiv> + </qandaset> + </chapter> + <appendix> <title>W3C production rules</title> @@ -18345,7 +18459,7 @@ public class PersistUser { <question> <para>Replace set by <classname>java.util.List</classname> and assure that the order of components is being preserved - in a JPA2 compliant manner. </para> + in a JPA2 compliant manner.</para> </question> </qandaentry> </qandadiv> @@ -18410,6 +18524,24 @@ public class PersistUser { </glossdef> </glossentry> + <glossentry xml:id="gloss_SAX"> + <glossterm><abbrev>SAX</abbrev></glossterm> + + <glossdef> + <para><link xlink:href="http://www.saxproject.org">Simple API for + XML</link>.</para> + </glossdef> + </glossentry> + + <glossentry xml:id="gloss_SQL"> + <glossterm><abbrev>SQL</abbrev></glossterm> + + <glossdef> + <para><link xlink:href="http://en.wikipedia.org/wiki/SQL">Structured + query language</link>.</para> + </glossdef> + </glossentry> + <glossentry xml:id="gloss_XML"> <glossterm><abbrev>XML</abbrev></glossterm> diff --git a/ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2Sql.java b/ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2Sql.java new file mode 100644 index 000000000..46e251e07 --- /dev/null +++ b/ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2Sql.java @@ -0,0 +1,40 @@ +package sax.context; + +import java.io.IOException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import dom.MySaxErrorHandler; + +/** Parsing catalog instances for import to RDBMS. */ +public class Catalog2Sql { + + /** Set up a validating parser instance. + * + * @throws SAXException + * @throws ParserConfigurationException */ + public Catalog2Sql() + + throws SAXException, ParserConfigurationException { + final SAXParserFactory saxPf = SAXParserFactory.newInstance(); + final SAXParser saxParser = saxPf.newSAXParser(); + xmlReader = saxParser.getXMLReader(); + xmlReader.setFeature("http://xml.org/sax/features/validation", true); + xmlReader.setContentHandler(new ImportHandler()); + xmlReader.setErrorHandler(new MySaxErrorHandler(System.out)); + } + /** Initiate parsing + * @param uri + * @throws IOException + * @throws SAXException */ + public void parse(final String uri) + throws IOException, SAXException{ + xmlReader.parse(uri); + } + private final XMLReader xmlReader; +} \ No newline at end of file diff --git a/ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2SqlDriver.java b/ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2SqlDriver.java new file mode 100644 index 000000000..824cee237 --- /dev/null +++ b/ws/eclipse/Jdbc/src/main/java/sax/context/Catalog2SqlDriver.java @@ -0,0 +1,11 @@ +package sax.context; + +public class Catalog2SqlDriver { + + + public static void main(String argv[]) throws Exception{ + final Catalog2Sql importer = new Catalog2Sql(); + importer.parse("src/main/java/sax/context/products.xml"); + } + +} diff --git a/ws/eclipse/Jdbc/src/main/java/sax/context/ImportHandler.java b/ws/eclipse/Jdbc/src/main/java/sax/context/ImportHandler.java new file mode 100644 index 000000000..e79148fe3 --- /dev/null +++ b/ws/eclipse/Jdbc/src/main/java/sax/context/ImportHandler.java @@ -0,0 +1,95 @@ +package sax.context; + +import java.util.HashSet; +import java.util.Set; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** Reading attributes from element events */ +public class ImportHandler implements ContentHandler { + + private Set<String> currentDescriptions = new HashSet<String>(); + String lastCharacterUnit = null + ,lastAgeString = null + ,currentProductId; + @Override + public void startElement(String uri, String localName, String qName, + Attributes attributes) throws SAXException { + switch(qName) { + case "product": + currentProductId = attributes.getValue("id"); + System.out.print("INSERT INTO Product VALUES ('" + currentProductId + "'"); + } + } + + @Override + public void endElement(final String uri, final String localName, final String qName) + throws SAXException { + switch(qName) { + case "product": + if (null == lastAgeString) { + System.out.print( ", NULL"); + } else { + try { + System.out.print(", " + Integer.parseInt(lastCharacterUnit)); + } catch (NumberFormatException ex) { + System.err.println("Age is not an integer value:" + lastCharacterUnit); + } + lastAgeString = null; + } + + System.out.println(");"); + flushDescriptionEntries(); + break; + case "name": + System.out.print(", '" + lastCharacterUnit + "'"); + break; + + case "description": + // Do not interfere with the current INSERT INTO Product ... + // statement. Instead postpone related INSERT INTO Description ... + // operations, see flushDescriptionEntries(). + currentDescriptions.add(lastCharacterUnit); + break; + case "age": + lastAgeString = lastCharacterUnit; + break; + } + } + private void flushDescriptionEntries() { + // Add <description> related INSERTs + for (final String description: currentDescriptions) { + System.out.println("INSERT INTO Description VALUES('" + + currentProductId + "', '" + description + "');"); + } + // Next <product> is yet to come, so + // clear the current set of descriptions. + currentDescriptions.clear(); + } + + @Override + public void characters(final char[] ch, final int start, final int length) + throws SAXException { + lastCharacterUnit = new String(ch, start,length); + } + + // We don't need these remaining callbacks + + @Override public void setDocumentLocator(Locator locator) {} + @Override public void startDocument() throws SAXException {} + @Override public void endDocument() throws SAXException {} + @Override public void startPrefixMapping(String prefix, String uri) + throws SAXException {} + @Override public void endPrefixMapping(String prefix) + throws SAXException {} + @Override public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException {} + + @Override public void processingInstruction(String target, String data) + throws SAXException {} + + @Override public void skippedEntity(String name) throws SAXException {} +} \ No newline at end of file diff --git a/ws/eclipse/Jdbc/src/main/java/sax/context/products.xml b/ws/eclipse/Jdbc/src/main/java/sax/context/products.xml new file mode 100644 index 000000000..7bf5b33fc --- /dev/null +++ b/ws/eclipse/Jdbc/src/main/java/sax/context/products.xml @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE catalog [ + <!ELEMENT catalog (product*)> + <!ELEMENT product (name, description*, age?)> + <!ATTLIST product id ID #REQUIRED> + <!ELEMENT name (#PCDATA)> + <!ELEMENT description (#PCDATA)> + <!ELEMENT age (#PCDATA)> +]> +<catalog> + <product id="mpt"> + <name>Monkey Picked Tea</name> + <description>Rare wild Chinese tea</description> + <description>Picked only by specially trained monkeys</description> + </product> + <product id="instantTent"> + <name>4-Person Instant Tent</name> + <description>4-person, 1-room tent</description> + <description>Pre-attached tent poles</description> + <description>Exclusive WeatherTec system.</description> + <age>15</age> + </product> +</catalog> \ No newline at end of file diff --git a/ws/eclipse/Jdbc/src/main/java/sax/context/schema.sql b/ws/eclipse/Jdbc/src/main/java/sax/context/schema.sql new file mode 100644 index 000000000..60f6bccce --- /dev/null +++ b/ws/eclipse/Jdbc/src/main/java/sax/context/schema.sql @@ -0,0 +1,23 @@ +CREATE TABLE Product ( + id CHAR(20) NOT NULL PRIMARY KEY + ,name VARCHAR(255) NOT NULL + ,age SMALLINT +); + +CREATE TABLE Description ( + product CHAR(20) NOT NULL REFERENCES Product + ,text VARCHAR(255) NOT NULL +); + +-- example data corresponding to products.xml -- +INSERT INTO Product VALUES ('mpt', 'Monkey Picked Tea', NULL); +INSERT INTO Description VALUES('mpt', 'Picked only by specially trained monkeys'); +INSERT INTO Description VALUES('mpt', 'Rare wild Chinese tea'); +INSERT INTO Product VALUES ('instantTent', '4-Person Instant Tent', 15); +INSERT INTO Description VALUES('instantTent', 'Exclusive WeatherTec system.'); +INSERT INTO Description VALUES('instantTent', '4-person, 1-room tent'); +INSERT INTO Description VALUES('instantTent', 'Pre-attached tent poles'); + +-- tidy up -- +DROP TABLE Description; +DROP TABLE Product; \ No newline at end of file -- GitLab