diff --git a/Sda1/P/xmlstatistics/.gitignore b/Sda1/P/xmlstatistics/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..963f79fe91b7d2ec098e0ef2241735191b79c77d --- /dev/null +++ b/Sda1/P/xmlstatistics/.gitignore @@ -0,0 +1,5 @@ +/target/ +/.settings/ +.classpath +.project +A1.log \ No newline at end of file diff --git a/Sda1/P/xmlstatistics/pom.xml b/Sda1/P/xmlstatistics/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..aa865e9101feff0087a4b9bf5fac60386300521c --- /dev/null +++ b/Sda1/P/xmlstatistics/pom.xml @@ -0,0 +1,90 @@ +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <groupId>de.hdm-stuttgart.mi.sda1</groupId> + <artifactId>xmlstatistics</artifactId> + <version>0.0.1-SNAPSHOT</version> + <packaging>jar</packaging> + + <name>xmlstatistics</name> + + <!--Fixme: Add a sensible project related domain here --> + <url>http://somedomain.org</url> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + </properties> + + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.11</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-api</artifactId> + <version>2.1</version> + </dependency> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-core</artifactId> + <version>2.1</version> + </dependency> + + <dependency> + <groupId>de.hdm-stuttgart.mi.sda1</groupId> + <artifactId>saxerrorhandler</artifactId> + <version>0.8</version> + </dependency> + + </dependencies> + + <build> + <plugins> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.1</version> + <configuration> + <source>1.8</source> + <target>1.8</target> + </configuration> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.10.1</version> + <configuration/> + </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>2.3</version> + <configuration> + <transformers> + <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> + <manifestEntries> + <Main-Class>de.hdm_stuttgart.mi.sda1.xmlstatistics.App</Main-Class> + </manifestEntries> + </transformer> + </transformers> + </configuration> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + </execution> + </executions> + </plugin> + + </plugins> + </build> +</project> diff --git a/Sda1/P/xmlstatistics/sample.xsl b/Sda1/P/xmlstatistics/sample.xsl new file mode 100644 index 0000000000000000000000000000000000000000..dd633bf37906976bbf75ca103bf12831f6c559f5 --- /dev/null +++ b/Sda1/P/xmlstatistics/sample.xsl @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8"?> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:h="http://www.w3.org/1999/xhtml" + exclude-result-prefixes="xs" version="2.0"> + + <xsl:template match="/"> + <h:html> + <h:head> + <h:title></h:title> + </h:head> + <h:body> + <h:h1>A heading</h:h1> + <h:p>A paragraph</h:p> + <h:h1>Yet another heading</h:h1> + <xsl:apply-templates/> + </h:body> + </h:html> + </xsl:template> + + <xsl:template match="*"> + <xsl:message> + <xsl:text>No template defined for element '</xsl:text> + <xsl:value-of select="name(.)"/> + <xsl:text>'</xsl:text> + </xsl:message> + </xsl:template> + +</xsl:stylesheet> \ No newline at end of file diff --git a/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/App.java b/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/App.java new file mode 100644 index 0000000000000000000000000000000000000000..db808ac7adcea2102a20989f0435a860d456e090 --- /dev/null +++ b/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/App.java @@ -0,0 +1,56 @@ +package de.hdm_stuttgart.mi.sda1.xmlstatistics; + +import java.io.IOException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; + +import de.hdm_stuttgart.mi.sda1.saxerrorhandler.handler.SaxErrorHandler; +import de.hdm_stuttgart.mi.sda1.xmlstatistics.handler.SaxContentHandler; + + +/** + * A simple SAX parser demo + * + */ +public class App { + private static final Logger log = LogManager.getLogger(App.class); + + /** + * @param args Unused + * @throws SAXException + * @throws ParserConfigurationException + * @throws IOException + */ + public static void main( String[] args ) throws ParserConfigurationException, SAXException, IOException { + + log.info("Creating SAX parser factory"); + final SAXParserFactory saxPf = SAXParserFactory.newInstance(); + + log.info("Creating SAX parser"); + final SAXParser saxParser = saxPf.newSAXParser(); + final XMLReader xmlReader = saxParser.getXMLReader(); + + log.info("Enabling namespaces and namespace prefix processing"); + xmlReader.setFeature("http://xml.org/sax/features/validation", false); + xmlReader.setFeature("http://xml.org/sax/features/namespaces", true); + xmlReader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); + + log.info("Registering content- and error handler instances"); + final SaxContentHandler contentHandler = new SaxContentHandler(); + xmlReader.setContentHandler(contentHandler); + xmlReader.setErrorHandler(new SaxErrorHandler()); + + final String xmlDocumentInstanceFilename = "sample.xsl"; + log.info("Start parsing file '" + xmlDocumentInstanceFilename + "'"); + xmlReader.parse(xmlDocumentInstanceFilename); + contentHandler.printStatistics(); + + } +} diff --git a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementcountByName.java b/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/handler/ElementcountByName.java similarity index 92% rename from ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementcountByName.java rename to Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/handler/ElementcountByName.java index 610cd4f1fac5266e31c3e878bedc2f310d0b5648..89273bc4afe52fc828545cca62e441b4110c3c90 100644 --- a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementcountByName.java +++ b/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/handler/ElementcountByName.java @@ -1,4 +1,4 @@ -package sax.ns.stat; +package de.hdm_stuttgart.mi.sda1.xmlstatistics.handler; import java.util.HashMap; import java.util.Map; @@ -14,7 +14,7 @@ import java.util.Map; * arbitrary integer frequencies. * * Instances of this class will appear as {@link Map} values in - * {@link FrequencyGatherer} corresponding to a given namespace key value. + * {@link SaxContentHandler} corresponding to a given namespace key value. * */ @SuppressWarnings("serial") diff --git a/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/handler/SaxContentHandler.java b/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/handler/SaxContentHandler.java new file mode 100644 index 0000000000000000000000000000000000000000..ba902a17bacfaf11c74faeff8f5a6683b6dcec52 --- /dev/null +++ b/Sda1/P/xmlstatistics/src/main/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/handler/SaxContentHandler.java @@ -0,0 +1,153 @@ +package de.hdm_stuttgart.mi.sda1.xmlstatistics.handler; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * Creating statistics on elements and namespaces + * + */ +public class SaxContentHandler implements ContentHandler { + private static final Logger log = LogManager.getLogger(SaxContentHandler.class); + + Locator locator = null; + + @Override + public void setDocumentLocator(Locator locator) { + this.locator = locator; + } + + private Map<String, ElementcountByName> frequenciesByNamespace = + new HashMap<String, ElementcountByName>(); + + /** + * @return Array of namespaces being detected + */ + public String[] getNamespaces() { + return frequenciesByNamespace.keySet().toArray(new String[0]); + } + + @Override + public void startElement(final String uri, final String localName, final String qName, + Attributes attributes) throws SAXException { + log.info(displayWithLocation("Opening '" + qName + "'")); + + ElementcountByName elementsInNamespace = frequenciesByNamespace.get(uri); + if (null == elementsInNamespace) {// Does an entry to the current namespace yet exist? + elementsInNamespace = new ElementcountByName(); // Not yet, so create + frequenciesByNamespace.put(uri, elementsInNamespace); // and add a new entry. + } + elementsInNamespace.increment(localName); + } + + /** + * Printing element names with their respective number of occurrences grouped + * by namespace affiliation to standard output. + * + * Consider the following example XHTML+SVG+MATHML document instance: + * <pre><html xmlns="http://www.w3.org/1999/xhtml" xmlns:svg="http://www.w3.org/2000/svg"> + <head> + <title>Sample XHTML with Equations</title> + </head> + <body> + <h1>Sample MathML:</h1> + <math xmlns="http://www.w3.org/1998/Math/MathML"> + <mstyle displaystyle="false" scriptlevel="0"> + <mrow> + <mfrac> + <mrow> + <mi mathcolor="gray">sin</mi> + <mo rspace="verythinmathspace">â¡</mo> + <mi>θ</mi> + </mrow> + <mi>Ï€</mi> + </mfrac> + </mrow> + </mstyle> + </math> + <h1>Sample SVG:</h1> + <svg:svg version="1.1" baseProfile="full" width="300px" height="200px"> + <svg:circle cx="150px" cy="100px" r="50px" fill="#ff0000" + stroke="#000000" stroke-width="5px"/> + </svg:svg> + </body> + </html></pre> + + * The resulting output will be: + * <pre><b>Namespace 'http://www.w3.org/1998/Math/MathML' contains:</b> + <math> (1 occurrence) + <mo> (1 occurrence) + <mrow> (2 occurrences) + <mfrac> (1 occurrence) + <mi> (3 occurrences) + <mstyle> (1 occurrence) + + <b>Namespace 'http://www.w3.org/1999/xhtml' contains:</b> + <body> (1 occurrence) + <title> (1 occurrence) + <html> (1 occurrence) + <h1> (2 occurrences) + <head> (1 occurrence) + + <b>Namespace 'http://www.w3.org/2000/svg' contains:</b> + <svg> (1 occurrence) + <circle> (1 occurrence)</pre> + */ + public void printStatistics () { + for (final Entry<String, ElementcountByName> entry: frequenciesByNamespace.entrySet()) { + System.out.println("Namespace '" + entry.getKey() + "' contains:"); + entry.getValue().printStatistics(); + System.out.println(); + } + } + + String displayWithLocation(final String saxMsg) { + if (null == locator) { + return saxMsg; + } else { + return "File position (" + locator.getLineNumber() + ", " + locator.getColumnNumber() + "): " + saxMsg; + } + } + + // We don't need these ... + @Override + public void startDocument() throws SAXException {} + + @Override + public void endDocument() throws SAXException {} + + @Override + public void startPrefixMapping(String prefix, String uri) + throws SAXException {} + + @Override + public void endPrefixMapping(String prefix) throws SAXException {} + + @Override + public void endElement(String uri, String localName, String qName) + throws SAXException {} + + @Override + public void characters(char[] ch, int start, int length) + throws SAXException {} + + @Override + public void ignorableWhitespace(char[] ch, int start, int length) + throws SAXException {} + + @Override + public void processingInstruction(String target, String data) + throws SAXException {} + + @Override + public void skippedEntity(String name) throws SAXException {} + +} \ No newline at end of file diff --git a/Sda1/P/xmlstatistics/src/main/resources/log4j2.xml b/Sda1/P/xmlstatistics/src/main/resources/log4j2.xml new file mode 100644 index 0000000000000000000000000000000000000000..63574fb442015db24609cab03a0c999e7c9a95ad --- /dev/null +++ b/Sda1/P/xmlstatistics/src/main/resources/log4j2.xml @@ -0,0 +1,21 @@ +<?xml version="1.0" encoding="UTF-8"?> +<Configuration> + <Appenders> + <File name="A1" fileName="A1.log" append="false"> + <PatternLayout pattern="%t %-5p %c{2} - %m%n"/> + </File> + <Console name="STDOUT" target="SYSTEM_OUT"> + <PatternLayout pattern="%C{2} (%F:%L) - %m%n"/> + </Console> + </Appenders> + <Loggers> + + <!-- You my want to define class or package level per-logger rules --> + <Logger name="de.hdm_stuttgart.mi.sda1.xmlstatistics.App" level="info"> + <AppenderRef ref="A1"/> + </Logger> + <Root level="info"> + <AppenderRef ref="STDOUT"/> + </Root> + </Loggers> +</Configuration> \ No newline at end of file diff --git a/Sda1/P/xmlstatistics/src/test/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/AppTest.java b/Sda1/P/xmlstatistics/src/test/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/AppTest.java new file mode 100644 index 0000000000000000000000000000000000000000..9e0c0eaebc32e066a5243fafb79e28c6518b5262 --- /dev/null +++ b/Sda1/P/xmlstatistics/src/test/java/de/hdm_stuttgart/mi/sda1/xmlstatistics/AppTest.java @@ -0,0 +1,52 @@ +package de.hdm_stuttgart.mi.sda1.xmlstatistics; + +import java.io.IOException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.junit.Assert; +import org.junit.Test; +import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; +import org.xml.sax.XMLReader; + +import de.hdm_stuttgart.mi.sda1.saxerrorhandler.handler.SaxErrorHandler; +import de.hdm_stuttgart.mi.sda1.xmlstatistics.handler.SaxContentHandler; + +/** + * Unit test for simple App. + */ +public class AppTest { + /** + * Dummy test method + * @throws SAXException + * @throws ParserConfigurationException + * @throws IOException + */ + @Test + public void testNamespaceSet() throws ParserConfigurationException, SAXException, IOException { + final SAXParserFactory saxPf = SAXParserFactory.newInstance(); + + final SAXParser saxParser = saxPf.newSAXParser(); + final XMLReader xmlReader = saxParser.getXMLReader(); + + xmlReader.setFeature("http://xml.org/sax/features/validation", false); + xmlReader.setFeature("http://xml.org/sax/features/namespaces", true); + xmlReader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); + + final SaxContentHandler contentHandler = new SaxContentHandler(); + xmlReader.setContentHandler(contentHandler); + xmlReader.setErrorHandler(new SaxErrorHandler()); + + final String xmlDocumentInstanceFilename = "sample.xsl"; + xmlReader.parse(xmlDocumentInstanceFilename); + + Assert.assertArrayEquals(new String[]{"http://www.w3.org/1999/xhtml","http://www.w3.org/1999/XSL/Transform"}, + contentHandler.getNamespaces()); + } + + // Further tests not yet written for lazyness ... +} diff --git a/Sda1/sda1.xml b/Sda1/sda1.xml index 7b045a4acd68e9df6a1c8b8b698bd1cf69fcde57..112efea4234e3a7faa2aa21f39c82022c4940372 100644 --- a/Sda1/sda1.xml +++ b/Sda1/sda1.xml @@ -6907,111 +6907,79 @@ INSERT INTO Description VALUES('instantTent', 2, 'Exclusive WeatherTec system.') linkend="gloss_MathML">MathML</link> sample document:</para> <programlisting language="none"><?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" - "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd" > -<html xmlns="<emphasis role="bold">http://www.w3.org/1999/xhtml</emphasis>" <co - xml:id="xhtmlCombinedNs_Xhtml"/> - xmlns:svg="<emphasis role="bold">http://www.w3.org/2000/svg</emphasis>"> <co +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:xs="http://www.w3.org/2001/XMLSchema" <co xml:id="xhtmlCombinedNs_Svg"/> - <head> - <title>Sample XHTML with Equations</title> - </head> - <body> - <h1>Sample MathML:</h1> - <math <emphasis role="bold">xmlns="http://www.w3.org/1998/Math/MathML</emphasis>"> <co - xml:id="xhtmlCombinedNs_Mathml"/> - <mstyle displaystyle="false" scriptlevel="0"> - <mrow> - <mfrac> - <mrow> - <mi mathcolor="gray">sin</mi> - <mo rspace="verythinmathspace">â¡</mo> - <mi>θ</mi> - </mrow> - <mi>Ï€</mi> - </mfrac> - </mrow> - </mstyle> - </math> - <h1>Sample SVG:</h1> - <svg:svg version="1.1" baseProfile="full" width="300px" height="200px"> - <svg:circle cx="150px" cy="100px" r="50px" fill="#ff0000" - stroke="#000000" stroke-width="5px"/> - </svg:svg> - </body> -</html></programlisting> + xmlns:h="http://www.w3.org/1999/xhtml" <co xml:id="xhtmlCombinedNs_Xhtml"/> + exclude-result-prefixes="xs" version="2.0"> + + <xsl:template match="/"> + <h:html> + <h:head> + <h:title></h:title> + </h:head> + <h:body> + <h:h1>A heading</h:h1> + <h:p>A paragraph</h:p> + <h:h1>Yet another heading</h:h1> + <xsl:apply-templates/> + </h:body> + </h:html> + </xsl:template> + + <xsl:template match="*"> + <xsl:message> + <xsl:text>No template defined for element '</xsl:text> + <xsl:value-of select="name(.)"/> + <xsl:text>'</xsl:text> + </xsl:message> + </xsl:template> - <para>This document does define three different namespaces - <coref linkend="xhtmlCombinedNs_Xhtml"/>, <coref - linkend="xhtmlCombinedNs_Svg"/> and <coref - linkend="xhtmlCombinedNs_Mathml"/> corresponding to the three - XML application components.</para> +</xsl:stylesheet></programlisting> + + <para>This XSL stylesheet defines two different namespaces + <coref linkend="xhtmlCombinedNs_Xhtml"/> and <coref + linkend="xhtmlCombinedNs_Xhtml"/>.</para> <para>Implement a <link linkend="gloss_SAX">SAX</link> - application being able to group elements of arbitrary XML + application being able to group elements from arbitrary XML documents by namespaces along with their corresponding - frequencies of occurrence. The intended output corresponding - to the previous <link linkend="gloss_XHTML">XHTM</link>L - example might look like:</para> + frequencies of occurrence. The intended output for the + previous <xref linkend="glo_XSL"/> example shall look + like:</para> <programlisting language="none">Namespace '<emphasis - role="bold">http://www.w3.org/1998/Math/MathML</emphasis>' contains: -<math> (1 occurrence) -<mo> (1 occurrence) -<mrow> (2 occurrences) -<mfrac> (1 occurrence) -<mi> (3 occurrences) -<mstyle> (1 occurrence) - -Namespace '<emphasis role="bold">http://www.w3.org/1999/xhtml</emphasis>' contains: -<body> (1 occurrence) -<title> (1 occurrence) -<html> (1 occurrence) -<h1> (2 occurrences) + role="bold">http://www.w3.org/1999/xhtml</emphasis>' contains: <head> (1 occurrence) +<p> (1 occurrence) +<h1> (2 occurrences) +<html> (1 occurrence) +<title> (1 occurrence) +<body> (1 occurrence) -Namespace '<emphasis role="bold">http://www.w3.org/2000/svg</emphasis>' contains: -<svg> (1 occurrence) -<circle> (1 occurrence)</programlisting> - - <para>Hints:</para> - - <itemizedlist> - <listitem> - <para>Counting frequencies and grouping by namespaces may - be achieved by using standard Java container - implementations of <classname>java.util.Map</classname>. - You may for example define sets of related XML elements - and group them by their corresponding namespaces.</para> - </listitem> - - <listitem> - <para>Enabling validation may require an entity resolver - as being described in <xref linkend="saxValidate"/>. - Otherwise you may experience long parsing times due to - downloading of referenced schema components.</para> - </listitem> - </itemizedlist> +Namespace '<emphasis role="bold">http://www.w3.org/1999/XSL/Transform</emphasis>' contains: +<stylesheet> (1 occurrence) +<template> (2 occurrences) +<value-of> (1 occurrence) +<apply-templates> (1 occurrence) +<text> (2 occurrences) +<message> (1 occurrence)</programlisting> + + <para>Hint: Counting frequencies and grouping by namespaces + may be achieved by using standard Java container + implementations of <classname>java.util.Map</classname>. You + may for example define sets of related XML elements and group + them by their corresponding namespaces. Thus nested maps are + being required.</para> </question> <answer> - <itemizedlist> - <listitem> - <para><classname>sax.ns.stat.ElementFrequencyDriver</classname></para> - </listitem> - - <listitem> - <para><classname>sax.ns.stat.ElementFrequency</classname></para> - </listitem> - - <listitem> - <para><classname>sax.ns.stat.ElementcountByName</classname></para> - </listitem> + <annotation role="make"> + <para role="eclipse">P/catalog2sql</para> + </annotation> - <listitem> - <para><classname>sax.ns.stat.FrequencyGatherer</classname></para> - </listitem> - </itemizedlist> + <para>The above solution contains both a running application + and a (incomplete) <xref linkend="glo_Junit"/> test.</para> </answer> </qandaentry> </qandadiv> diff --git a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementFrequency.java b/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementFrequency.java deleted file mode 100644 index 51fd439f87fee7889c5b8f38593c8f2edc9cabf6..0000000000000000000000000000000000000000 --- a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementFrequency.java +++ /dev/null @@ -1,61 +0,0 @@ -package sax.ns.stat; - -import java.io.IOException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; - -import org.apache.xml.resolver.CatalogManager; -import org.apache.xml.resolver.tools.CatalogResolver; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - -import dom.MySaxErrorHandler; - -/** Parsing catalog instances for import to RDBMS. */ -public class ElementFrequency { - - /** Set up a validating parser instance. - * - * @throws SAXException Parsing may fail. - * @throws ParserConfigurationException Unable to instantiate parser. - */ - public ElementFrequency() - - throws SAXException, ParserConfigurationException { - - final SAXParserFactory saxPf = SAXParserFactory.newInstance(); - final SAXParser saxParser = saxPf.newSAXParser(); - xmlReader = saxParser.getXMLReader(); - - // Set up resolving PUBLIC identifier - final CatalogManager cm = new CatalogManager("CatalogManager.properties"); - final CatalogResolver resolver = new CatalogResolver(cm); - xmlReader.setEntityResolver(resolver); - - xmlReader.setFeature("http://xml.org/sax/features/validation", false); - xmlReader.setFeature("http://xml.org/sax/features/namespaces", true); - xmlReader.setFeature("http://xml.org/sax/features/namespace-prefixes", true); - xmlReader.setContentHandler(frequencyGatherer); - xmlReader.setErrorHandler(new MySaxErrorHandler(System.out)); - } - /** Initiate parsing - * @param uri The resource to be parsed - * @throws IOException Access related problems - * @throws SAXException Parsing may fail - */ - public void parse(final String uri) - throws IOException, SAXException { - xmlReader.parse(uri); - } - /** - * See {@link FrequencyGatherer#printStatistics()} - */ - public void printStatistics () { - frequencyGatherer.printStatistics(); - } - final FrequencyGatherer frequencyGatherer = new FrequencyGatherer(); - private final XMLReader xmlReader; - -} \ No newline at end of file diff --git a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementFrequencyDriver.java b/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementFrequencyDriver.java deleted file mode 100644 index ccbf6d7cca902d4ab7c4ad947fff891ca061dd86..0000000000000000000000000000000000000000 --- a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/ElementFrequencyDriver.java +++ /dev/null @@ -1,12 +0,0 @@ -package sax.ns.stat; - -public class ElementFrequencyDriver { - - - public static void main(String argv[]) throws Exception{ - final ElementFrequency elementFrequency = new ElementFrequency(); - elementFrequency.parse("src/main/java/sax/ns/stat/mixed.xml"); - elementFrequency.printStatistics(); - } - -} diff --git a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/FrequencyGatherer.java b/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/FrequencyGatherer.java deleted file mode 100644 index f19290e83a40fa7f332667edb608e6ba4b9802ac..0000000000000000000000000000000000000000 --- a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/FrequencyGatherer.java +++ /dev/null @@ -1,92 +0,0 @@ -package sax.ns.stat; - -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -/** Creating statistical data. Group elements occurring in arbitrary - * document instances by their respective namespaces and count the - * number of occurrences. - */ -public class FrequencyGatherer extends DefaultHandler { - - private Map<String, ElementcountByName> frequenciesByNamespace = - new HashMap<String, ElementcountByName>(); - - @Override - public void startElement(final String uri, final String localName, final String qName, - Attributes attributes) throws SAXException { - - ElementcountByName elementsInNamespace = frequenciesByNamespace.get(uri); - if (null == elementsInNamespace) {// Does an entry to the current namespace yet exist? - elementsInNamespace = new ElementcountByName(); // Not yet, so create - frequenciesByNamespace.put(uri, elementsInNamespace); // and add a new entry. - } - elementsInNamespace.increment(localName); - } - - /** - * Printing element names with their respective number of occurrences grouped - * by namespace affiliation to standard output. - * - * Consider the following example XHTML+SVG+MATHML document instance: - * <pre><html xmlns="http://www.w3.org/1999/xhtml" xmlns:svg="http://www.w3.org/2000/svg"> - <head> - <title>Sample XHTML with Equations</title> - </head> - <body> - <h1>Sample MathML:</h1> - <math xmlns="http://www.w3.org/1998/Math/MathML"> - <mstyle displaystyle="false" scriptlevel="0"> - <mrow> - <mfrac> - <mrow> - <mi mathcolor="gray">sin</mi> - <mo rspace="verythinmathspace">â¡</mo> - <mi>θ</mi> - </mrow> - <mi>Ï€</mi> - </mfrac> - </mrow> - </mstyle> - </math> - <h1>Sample SVG:</h1> - <svg:svg version="1.1" baseProfile="full" width="300px" height="200px"> - <svg:circle cx="150px" cy="100px" r="50px" fill="#ff0000" - stroke="#000000" stroke-width="5px"/> - </svg:svg> - </body> -</html></pre> - - * The resulting output will be: - * <pre><b>Namespace 'http://www.w3.org/1998/Math/MathML' contains:</b> -<math> (1 occurrence) -<mo> (1 occurrence) -<mrow> (2 occurrences) -<mfrac> (1 occurrence) -<mi> (3 occurrences) -<mstyle> (1 occurrence) - -<b>Namespace 'http://www.w3.org/1999/xhtml' contains:</b> -<body> (1 occurrence) -<title> (1 occurrence) -<html> (1 occurrence) -<h1> (2 occurrences) -<head> (1 occurrence) - -<b>Namespace 'http://www.w3.org/2000/svg' contains:</b> -<svg> (1 occurrence) -<circle> (1 occurrence)</pre> - */ - public void printStatistics () { - for (final Entry<String, ElementcountByName> entry: frequenciesByNamespace.entrySet()) { - System.out.println("Namespace '" + entry.getKey() + "' contains:"); - entry.getValue().printStatistics(); - System.out.println(); - } - } -} \ No newline at end of file diff --git a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/mixed.xml b/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/mixed.xml deleted file mode 100644 index bb346b1e93417d68f39e3a376b2782ffff755432..0000000000000000000000000000000000000000 --- a/ws/eclipse/Jdbc/src/main/java/sax/ns/stat/mixed.xml +++ /dev/null @@ -1,30 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" - "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd" > -<html xmlns="http://www.w3.org/1999/xhtml" xmlns:svg="http://www.w3.org/2000/svg"> - <head> - <title>Sample XHTML with Equations</title> - </head> - <body> - <h1>Sample MathML:</h1> - <math xmlns="http://www.w3.org/1998/Math/MathML"> - <mstyle displaystyle="false" scriptlevel="0"> - <mrow> - <mfrac> - <mrow> - <mi mathcolor="gray">sin</mi> - <mo rspace="verythinmathspace">â¡</mo> - <mi>θ</mi> - </mrow> - <mi>Ï€</mi> - </mfrac> - </mrow> - </mstyle> - </math> - <h1>Sample SVG:</h1> - <svg:svg version="1.1" baseProfile="full" width="300px" height="200px"> - <svg:circle cx="150px" cy="100px" r="50px" fill="#ff0000" - stroke="#000000" stroke-width="5px"/> - </svg:svg> - </body> -</html>