Copyright © 2006 Mihai Gabroveanu
Abstract
In acest curs este prezentat modul de procesare a documentelor XML utilizand limbajul Java.
Sa presupunem ca dorim sa procesam fisiserul XML urmator utilizand DOM:
<?xml version="1.0"?>
<!-- Documentul ce retine o carte -->
<CARTE id="10">
<TITLU>XML Bible</TITLU>
<AUTOR>Elliotte Rusty Harold</AUTOR>
<EDITURA>IDG Books Worldwide</EDITURA>
<AN_APARITIE>2002</AN_APARITIE>
</CARTE>
In continuare este prezentata secventa Java ce realizeaza operatii asupra acestui XML
package edu.ucv.xml;
import java.io.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.w3c.dom.*;
import org.xml.sax.*;
/**
* Exemplu de procesare a documentelor XML folosind DOM
*
*@author Mihai Gabroveanu
*@created 31 martie 2006
*@version 1.0
*/
public class TestDOM {
/**
* Afiseaza continutul unui document XML pe ecran.
*
*@param doc Dcoumentul XML.
*@param out Output-ul.
*/
public static void displayDocument(Document doc, OutputStream out) {
try {
TransformerFactory xformFactory = TransformerFactory.newInstance();
Transformer idTransform = xformFactory.newTransformer();
Source input = new DOMSource(doc);
Result output = new StreamResult(out);
idTransform.transform(input, output);
} catch (TransformerException e) {
} catch (TransformerFactoryConfigurationError ex) {
}
}
/**
* The main program for the TestDOM class
*
*@param args The command line arguments
*/
public static void main(String[] args) {
//Se obtine o instanta de parser
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = null;
factory.setValidating(false);
try {
docBuilder = factory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
System.out.println("Eroare la crearea Document Builder-ului:" + e.getMessage());
System.exit(1);
}
String numeFisier = "carte.xml";
Document doc = null;
try {
//Incarc documentul XML
doc = docBuilder.parse(new File(numeFisier));
} catch (SAXException e) {
System.err.println("Eroare la parsarea documentului [" + numeFisier + "]:" + e.getMessage());
System.exit(1);
} catch (IOException e) {
System.err.println("Eroare I/O la deschierea fisierului [" + numeFisier + "]:" + e.getMessage());
System.exit(1);
}
//Obtin elelementul radacina
Element rootElement = doc.getDocumentElement();
System.out.println("Numele elementului radacina este:" + rootElement.getNodeName());
//Lista copiilor
System.out.println("Lista copiilor pentru elementul:" + rootElement.getNodeName());
NodeList rootNodeList = rootElement.getChildNodes();
for (int i = 0; i < rootNodeList.getLength(); i++) {
Node childNode = rootNodeList.item(i);
if (childNode.getNodeType() == Node.ELEMENT_NODE) {
System.out.print(" " + childNode.getNodeName());
Node firstChild = childNode.getFirstChild();
//incerc sa obtin valoarea din aceste Elemente
if (firstChild != null && firstChild.getNodeType() == Node.TEXT_NODE) {
//daca primul copil al lui childNode este nod text (CDATA) atunci obtin valoarea acestuia
Text textNode = (Text) firstChild;
System.out.println("=[" + textNode.getNodeValue() + "]");
} else {
System.out.println("=[]");
}
}
}
if (rootElement.hasAttribute("id")) {
System.out.println("Elementul radacina are atributul id='" + rootElement.getAttribute("id") + "'");
}
//Construiesc un element TIRAJ cu valoarea 1000
Element tirajElement = doc.createElement("TIRAJ");
Text continutTirajElement = doc.createTextNode("1000");
//Apendez la TIRAJ valoarea
tirajElement.appendChild(continutTirajElement);
//Adaug elementul tiraj ca fiu al lui CARTE.
rootElement.appendChild(tirajElement);
displayDocument(doc, System.out);
}
}
SAX = Simple API for XML ofera posibilitatea parcurgeri secventiale ale documentelor XML. Pe masura ce documentul este parcurs de catre parser sunt generate automat evenimente ca de exemplu: inceputul, sfarsitul unui document, inceputul si sfarsitul unui element, etc, ce pot fi interceptate in vederea realizarii anumitor operatii.
In vederea procesarii unui document XML cu SAX trebuie parcursi urmatorii pasi:
Crearea unui ”event-handler”
Crearea unui parser SAX
Asocierea dintre handler si parser
Parcurgerea documentului si tratarea evenimentelor generate
In continuare este prezentat un mic exemplu de prelucrare a unui document XML utilizand SAX:
import java.io.*;
import javax.xml.parsers.*;
/**
* SAX Demo
*
*@author Mihai Gabroveanu
*@created 07 aprilie 2006
*@version 1.0
*/
public class SAXTest {
/**
* The main program for the SAXTest class
*
*@param args The command line arguments
*/
public static void main(String[] args) {
String xmlFileName = "carte.xml";
DemoSAXHandler handler = new DemoSAXHandler(new OutputStreamWriter(System.out));
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
// Parse the input
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(new File(xmlFileName), handler);
} catch (Throwable t) {
t.printStackTrace();
}
System.exit(0);
}
}
Clasa ce trateaza evenimentele genrate la parsare este urmatoarea:
import java.io.*; import org.xml.sax.*; import org.xml.sax.helpers.*; /** * Clasa ce trateaza evenimentele generate de parser. * *@author Mihai Gabroveanu *@created 07 aprilie 2006 */ public class DemoSAXHandler extends DefaultHandler { /** * Description of the Field */ private Writer out; /** * Spatiul de indentare. */ private String indentString = " "; /** * Nuvelul curednt de indentare. */ private int indentLevel = 0; /** * Constructor for the DemoSAXHandler object * *@param out Description of the Parameter */ public DemoSAXHandler(Writer out) { this.out = out; } /** * Metoda apelata atunci cand se incepe parsarea documentului. * *@exception SAXException Daca a aparut o eroare de parsare. */ public void startDocument() throws SAXException { nl(); nl(); emit("START DOCUMENT"); nl(); emit("<?xml version='1.0' encoding='UTF-8'?>"); } /** * Metoda apelata atunci cand se detecteaza sfarsitul unui document. * *@exception SAXException Daca a aparut o eroare de parsare. */ public void endDocument() throws SAXException { nl(); emit("END DOCUMENT"); try { nl(); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } } /** * Metoda invocata cand se detecteaza inceputul unui nou element. * *@param namespaceURI Namespace-ul elementului. *@param sName Numele local (simple name). *@param qName Numele calificat (qualified) name. *@param attrs Lista de atribute. *@exception SAXException Daca a aparut o eroare de parsare. */ public void startElement(String namespaceURI, String sName, String qName, Attributes attrs) throws SAXException { indentLevel++; nl(); emit("ELEMENT: "); String eName = sName; // element name if ("".equals(eName)) { eName = qName; } // namespaceAware = false emit("<" + eName); if (attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { String aName = attrs.getLocalName(i); // Attr name if ("".equals(aName)) { aName = attrs.getQName(i); } nl(); emit(" ATTR: "); emit(aName); emit("\t\""); emit(attrs.getValue(i)); emit("\""); } } if (attrs.getLength() > 0) { nl(); } emit(">"); } /** * Metoda invocata cand se detecteaza sfarsitul unui element. * *@param namespaceURI Namespace-ul elementului. *@param sName Numele local (simple name). *@param qName Numele calificat (qualified) name. *@exception SAXException Daca a aparut o eroare de parsare. */ public void endElement(String namespaceURI, String sName, String qName) throws SAXException { nl(); emit("END_ELM: "); emit("</" + sName + ">"); indentLevel--; } /** * Metoda apelata cand se intalnesc date caracter. * *@param buf Caractele citite *@param offset *@param len *@exception SAXException Daca a aparut o eroare de parsare. */ public void characters(char buf[], int offset, int len) throws SAXException { nl(); emit("CHARS: "); String s = new String(buf, offset, len); if (!s.trim().equals("")) { emit(s); } ; } /** * Metoda invocata cand se intalneste o instructiune de procesare. * *@param target Numele instructiunii de procesare. *@param data *@exception SAXException Daca a aparut o eroare de parsare. */ public void processingInstruction(String target, String data) throws SAXException { nl(); emit("PROCESS: "); emit("<?" + target + " " + data + "?>"); } //=========================================================== // Metodele din SAX ErrorHandler ce se ocupa cu procesarea erorilor. //=========================================================== /** * Trateaza erorile fatale. * *@param e Eroarea intalnita. *@exception SAXParseException Description of the Exception */ public void error(SAXParseException e) throws SAXParseException { throw e; } /** * Trateaza warningurile * *@param err Eroarea intalnita. *@exception SAXParseException Description of the Exception */ public void warning(SAXParseException err) throws SAXParseException { System.out.println("** Warning" + ", line " + err.getLineNumber() + ", uri " + err.getSystemId()); System.out.println(" " + err.getMessage()); } //=========================================================== // Metode utilitare //=========================================================== /** * Mapeaza erorile I/O in erori SAX. * *@param s Description of the Parameter *@exception SAXException Description of the Exception */ private void emit(String s) throws SAXException { try { out.write(s); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } } /** * Afiseaza o linie noua si indenteaza corespunzator. * *@exception SAXException Daca a aparut o exceptie de I/O. */ private void nl() throws SAXException { String lineEnd = System.getProperty("line.separator"); try { out.write(lineEnd); for (int i = 0; i < indentLevel; i++) { out.write(indentString); } } catch (IOException e) { throw new SAXException("I/O error", e); } } }
Processing XML with Java http://www.ibiblio.org/xml/books/xmljava/
Working with XML: The Java API for Xml Processing (JAXP) Tutorial - http://java.sun.com/webservices/jaxp/dist/1.1/docs/tutorial/
Elliotte Rusty Harold, XML Bible. IDG Books Worldwide, Inc, 919 E. Hillsdale Blvd., Suite 400, Foster City, CA 94404