Copyright © 2006 Mihai Gabroveanu
Abstract
In acest curs este prezentat modul de procesare a documentelor XML utilizand limbajul Java.
Sa presupunem ca dorim sa procesam fisiserul XML urmator utilizand DOM:
<?xml version="1.0"?> <!-- Documentul ce retine o carte --> <CARTE id="10"> <TITLU>XML Bible</TITLU> <AUTOR>Elliotte Rusty Harold</AUTOR> <EDITURA>IDG Books Worldwide</EDITURA> <AN_APARITIE>2002</AN_APARITIE> </CARTE>
In continuare este prezentata secventa Java ce realizeaza operatii asupra acestui XML
package edu.ucv.xml; import java.io.*; import javax.xml.parsers.*; import javax.xml.transform.*; import javax.xml.transform.dom.*; import javax.xml.transform.stream.*; import org.w3c.dom.*; import org.xml.sax.*; /** * Exemplu de procesare a documentelor XML folosind DOM * *@author Mihai Gabroveanu *@created 31 martie 2006 *@version 1.0 */ public class TestDOM { /** * Afiseaza continutul unui document XML pe ecran. * *@param doc Dcoumentul XML. *@param out Output-ul. */ public static void displayDocument(Document doc, OutputStream out) { try { TransformerFactory xformFactory = TransformerFactory.newInstance(); Transformer idTransform = xformFactory.newTransformer(); Source input = new DOMSource(doc); Result output = new StreamResult(out); idTransform.transform(input, output); } catch (TransformerException e) { } catch (TransformerFactoryConfigurationError ex) { } } /** * The main program for the TestDOM class * *@param args The command line arguments */ public static void main(String[] args) { //Se obtine o instanta de parser DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = null; factory.setValidating(false); try { docBuilder = factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { System.out.println("Eroare la crearea Document Builder-ului:" + e.getMessage()); System.exit(1); } String numeFisier = "carte.xml"; Document doc = null; try { //Incarc documentul XML doc = docBuilder.parse(new File(numeFisier)); } catch (SAXException e) { System.err.println("Eroare la parsarea documentului [" + numeFisier + "]:" + e.getMessage()); System.exit(1); } catch (IOException e) { System.err.println("Eroare I/O la deschierea fisierului [" + numeFisier + "]:" + e.getMessage()); System.exit(1); } //Obtin elelementul radacina Element rootElement = doc.getDocumentElement(); System.out.println("Numele elementului radacina este:" + rootElement.getNodeName()); //Lista copiilor System.out.println("Lista copiilor pentru elementul:" + rootElement.getNodeName()); NodeList rootNodeList = rootElement.getChildNodes(); for (int i = 0; i < rootNodeList.getLength(); i++) { Node childNode = rootNodeList.item(i); if (childNode.getNodeType() == Node.ELEMENT_NODE) { System.out.print(" " + childNode.getNodeName()); Node firstChild = childNode.getFirstChild(); //incerc sa obtin valoarea din aceste Elemente if (firstChild != null && firstChild.getNodeType() == Node.TEXT_NODE) { //daca primul copil al lui childNode este nod text (CDATA) atunci obtin valoarea acestuia Text textNode = (Text) firstChild; System.out.println("=[" + textNode.getNodeValue() + "]"); } else { System.out.println("=[]"); } } } if (rootElement.hasAttribute("id")) { System.out.println("Elementul radacina are atributul id='" + rootElement.getAttribute("id") + "'"); } //Construiesc un element TIRAJ cu valoarea 1000 Element tirajElement = doc.createElement("TIRAJ"); Text continutTirajElement = doc.createTextNode("1000"); //Apendez la TIRAJ valoarea tirajElement.appendChild(continutTirajElement); //Adaug elementul tiraj ca fiu al lui CARTE. rootElement.appendChild(tirajElement); displayDocument(doc, System.out); } }
SAX = Simple API for XML ofera posibilitatea parcurgeri secventiale ale documentelor XML. Pe masura ce documentul este parcurs de catre parser sunt generate automat evenimente ca de exemplu: inceputul, sfarsitul unui document, inceputul si sfarsitul unui element, etc, ce pot fi interceptate in vederea realizarii anumitor operatii.
In vederea procesarii unui document XML cu SAX trebuie parcursi urmatorii pasi:
Crearea unui ”event-handler”
Crearea unui parser SAX
Asocierea dintre handler si parser
Parcurgerea documentului si tratarea evenimentelor generate
In continuare este prezentat un mic exemplu de prelucrare a unui document XML utilizand SAX:
import java.io.*; import javax.xml.parsers.*; /** * SAX Demo * *@author Mihai Gabroveanu *@created 07 aprilie 2006 *@version 1.0 */ public class SAXTest { /** * The main program for the SAXTest class * *@param args The command line arguments */ public static void main(String[] args) { String xmlFileName = "carte.xml"; DemoSAXHandler handler = new DemoSAXHandler(new OutputStreamWriter(System.out)); SAXParserFactory factory = SAXParserFactory.newInstance(); try { // Parse the input SAXParser saxParser = factory.newSAXParser(); saxParser.parse(new File(xmlFileName), handler); } catch (Throwable t) { t.printStackTrace(); } System.exit(0); } }
Clasa ce trateaza evenimentele genrate la parsare este urmatoarea:
import java.io.*; import org.xml.sax.*; import org.xml.sax.helpers.*; /** * Clasa ce trateaza evenimentele generate de parser. * *@author Mihai Gabroveanu *@created 07 aprilie 2006 */ public class DemoSAXHandler extends DefaultHandler { /** * Description of the Field */ private Writer out; /** * Spatiul de indentare. */ private String indentString = " "; /** * Nuvelul curednt de indentare. */ private int indentLevel = 0; /** * Constructor for the DemoSAXHandler object * *@param out Description of the Parameter */ public DemoSAXHandler(Writer out) { this.out = out; } /** * Metoda apelata atunci cand se incepe parsarea documentului. * *@exception SAXException Daca a aparut o eroare de parsare. */ public void startDocument() throws SAXException { nl(); nl(); emit("START DOCUMENT"); nl(); emit("<?xml version='1.0' encoding='UTF-8'?>"); } /** * Metoda apelata atunci cand se detecteaza sfarsitul unui document. * *@exception SAXException Daca a aparut o eroare de parsare. */ public void endDocument() throws SAXException { nl(); emit("END DOCUMENT"); try { nl(); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } } /** * Metoda invocata cand se detecteaza inceputul unui nou element. * *@param namespaceURI Namespace-ul elementului. *@param sName Numele local (simple name). *@param qName Numele calificat (qualified) name. *@param attrs Lista de atribute. *@exception SAXException Daca a aparut o eroare de parsare. */ public void startElement(String namespaceURI, String sName, String qName, Attributes attrs) throws SAXException { indentLevel++; nl(); emit("ELEMENT: "); String eName = sName; // element name if ("".equals(eName)) { eName = qName; } // namespaceAware = false emit("<" + eName); if (attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { String aName = attrs.getLocalName(i); // Attr name if ("".equals(aName)) { aName = attrs.getQName(i); } nl(); emit(" ATTR: "); emit(aName); emit("\t\""); emit(attrs.getValue(i)); emit("\""); } } if (attrs.getLength() > 0) { nl(); } emit(">"); } /** * Metoda invocata cand se detecteaza sfarsitul unui element. * *@param namespaceURI Namespace-ul elementului. *@param sName Numele local (simple name). *@param qName Numele calificat (qualified) name. *@exception SAXException Daca a aparut o eroare de parsare. */ public void endElement(String namespaceURI, String sName, String qName) throws SAXException { nl(); emit("END_ELM: "); emit("</" + sName + ">"); indentLevel--; } /** * Metoda apelata cand se intalnesc date caracter. * *@param buf Caractele citite *@param offset *@param len *@exception SAXException Daca a aparut o eroare de parsare. */ public void characters(char buf[], int offset, int len) throws SAXException { nl(); emit("CHARS: "); String s = new String(buf, offset, len); if (!s.trim().equals("")) { emit(s); } ; } /** * Metoda invocata cand se intalneste o instructiune de procesare. * *@param target Numele instructiunii de procesare. *@param data *@exception SAXException Daca a aparut o eroare de parsare. */ public void processingInstruction(String target, String data) throws SAXException { nl(); emit("PROCESS: "); emit("<?" + target + " " + data + "?>"); } //=========================================================== // Metodele din SAX ErrorHandler ce se ocupa cu procesarea erorilor. //=========================================================== /** * Trateaza erorile fatale. * *@param e Eroarea intalnita. *@exception SAXParseException Description of the Exception */ public void error(SAXParseException e) throws SAXParseException { throw e; } /** * Trateaza warningurile * *@param err Eroarea intalnita. *@exception SAXParseException Description of the Exception */ public void warning(SAXParseException err) throws SAXParseException { System.out.println("** Warning" + ", line " + err.getLineNumber() + ", uri " + err.getSystemId()); System.out.println(" " + err.getMessage()); } //=========================================================== // Metode utilitare //=========================================================== /** * Mapeaza erorile I/O in erori SAX. * *@param s Description of the Parameter *@exception SAXException Description of the Exception */ private void emit(String s) throws SAXException { try { out.write(s); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } } /** * Afiseaza o linie noua si indenteaza corespunzator. * *@exception SAXException Daca a aparut o exceptie de I/O. */ private void nl() throws SAXException { String lineEnd = System.getProperty("line.separator"); try { out.write(lineEnd); for (int i = 0; i < indentLevel; i++) { out.write(indentString); } } catch (IOException e) { throw new SAXException("I/O error", e); } } }
Processing XML with Java http://www.ibiblio.org/xml/books/xmljava/
Working with XML: The Java API for Xml Processing (JAXP) Tutorial - http://java.sun.com/webservices/jaxp/dist/1.1/docs/tutorial/
Elliotte Rusty Harold, XML Bible. IDG Books Worldwide, Inc, 919 E. Hillsdale Blvd., Suite 400, Foster City, CA 94404