xml文件:
为了更好地描述数据,我们采用xml的内容+数据的描述形式,这便于更直观地读懂数据及其表达的内容

xml解析方法:

JAVA实现XML数据解析 java xml文件解析_java

DOM方法:

比较麻烦的一个方法,需要不停地用for遍历子树,然后输出结果:

例如看user.xml

JAVA实现XML数据解析 java xml文件解析_xml_02


它地读取就是逐层地(或者逐行地根据名称)解析,逐层的异常繁琐,可以看看源码:

package xml.dom;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;


public class DomReader 
{
	public static void main(String[] a)
	{
		recursiveTraverse(); //自上而下进行访问
		System.out.println("========华丽丽的分割线==========");
		traverseBySearch();    //根据名称进行搜索		
	}
    public static void recursiveTraverse()
    {
    	try 
    	{
    		//采用Dom解析xml文件
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document document = db.parse("users.xml");
            
            //获取所有的一级子节点
            NodeList usersList = document.getChildNodes();
            System.out.println(usersList.getLength());  //1  
            
            for (int i = 0; i < usersList.getLength(); i++) 
            {
                Node users = usersList.item(i);         //1  users 
                
                NodeList userList = users.getChildNodes(); //获取二级子节点user的列表
                System.out.println("==" + userList.getLength()); //9
                
                for (int j = 0; j < userList.getLength(); j++) //9
                {
                    Node user = userList.item(j);
                    if (user.getNodeType() == Node.ELEMENT_NODE)
                    {
                    	 NodeList metaList = user.getChildNodes();
                         System.out.println("====" + metaList.getLength()); //7
                         
                         for (int k = 0; k < metaList.getLength(); k++) //7
                         {
                         	//到最后一级文本
                        	Node meta = metaList.item(k);
                        	if (meta.getNodeType() == Node.ELEMENT_NODE)
                        	{
                        		System.out.println(metaList.item(k).getNodeName() 
                        				+ ":" + metaList.item(k).getTextContent());
                        	}                                                              
                         }                    
                         System.out.println();
                    }                   
                }
            }            
        } catch (Exception e) {
            e.printStackTrace();
        } 
    }   
    
    public static void traverseBySearch()
    {
    	try 
    	{
    		//采用Dom解析xml文件
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            DocumentBuilder db = dbf.newDocumentBuilder();
            Document document = db.parse("users.xml");
            
            Element rootElement = document.getDocumentElement();         

            NodeList nodeList = rootElement.getElementsByTagName("name"); 
            if(nodeList != null) 
            { 
               for (int i = 0 ; i < nodeList.getLength(); i++) 
               { 
                  Element element = (Element)nodeList.item(i);                  
                  System.out.println(element.getNodeName() + " = " + element.getTextContent());
               } 
            }             
        } catch (Exception e) {
            e.printStackTrace();
        } 	
    }       
}

再看看DOM写入,逻辑主要是先构建对象,然后描述对象之间的关系:

import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class DomWriter {

	public static void main(String[] args) {
		
		
		try {
			DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
			DocumentBuilder dbBuilder = dbFactory.newDocumentBuilder();
			
			//新创建一个Document节点
			Document document = dbBuilder.newDocument();
			if (document != null) 
			{
				Element docx = document.createElement("document");	//都是采用Document创建元素		
				Element element = document.createElement("element");
				element.setAttribute("type", "paragraph"); 
				element.setAttribute("alignment", "left"); //element增加2个属性
				
				Element object = document.createElement("object");
				object.setAttribute("type", "text");
				
				Element text = document.createElement("text");
				text.appendChild(document.createTextNode("ggggg")); //给text节点赋值
				Element bold = document.createElement("bold");
				bold.appendChild(document.createTextNode("true"));    //给bold节点赋值
				
				object.appendChild(text);      //把text节点挂在object下
				object.appendChild(bold);      //把bold节点挂在object下
				element.appendChild(object);   //把object节点挂在element下
				docx.appendChild(element);	   //把element节点挂在docx下		
				document.appendChild(docx);    //把docx挂在document下
				
				TransformerFactory transformerFactory = TransformerFactory.newInstance();
				Transformer transformer = transformerFactory.newTransformer();
				DOMSource source = new DOMSource(document);
				
				//定义目标文件
				File file = new File("dom_result.xml");
				StreamResult result = new StreamResult(file);
		 	 
				//将xml内容写入到文件中
				transformer.transform(source, result);
				
				System.out.println("write xml file successfully");
			}
		} catch (Exception e) {
			e.printStackTrace();
		}		
	}	
}

结果:

JAVA实现XML数据解析 java xml文件解析_开发语言_03


sax解析:

push方法,每行都看,然后实现接口的五个方法,进行处理逻辑的编写

先看看xml:

JAVA实现XML数据解析 java xml文件解析_xml_04


需求是书的title都弄出来

代码如下:

package xml.sax;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class SAXReader {
	public static void main(String[] args) throws SAXException, IOException {
		XMLReader parser = XMLReaderFactory.createXMLReader();
		BookHandler bookHandler = new BookHandler();
		parser.setContentHandler(bookHandler);
		parser.parse("books.xml");
		System.out.println(bookHandler.getNameList());
	}
}

class BookHandler extends DefaultHandler {
	private List<String> nameList;
	private boolean title = false;

	public List<String> getNameList() {
		return nameList;
	}

	// xml文档加载时
	public void startDocument() throws SAXException {
		System.out.println("Start parsing document...");
		nameList = new ArrayList<String>();
	}

	// 文档解析结束
	public void endDocument() throws SAXException {
		System.out.println("End");
	}

	// 访问某一个元素
	public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {

		if (qName.equals("title")) {
			title = true;
		}
	}

	// 结束访问元素
	public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
		// End of processing current element
		if (title) {
			title = false;
		}
	}

	// 访问元素正文
	public void characters(char[] ch, int start, int length) {
		
		if (title) {
			String bookTitle = new String(ch, start, length);
			System.out.println("Book title: " + bookTitle);
			nameList.add(bookTitle);
		}
	}

}

主要的实现逻辑就是通过一个bool变量标记当前行是不是title的,然后提起title的内容即可,缺点就是每行都要走一遍

stax解析:

基于pull模型的,可以有效地定位想抓取的内容。分为流模式和事件模式,流模式更好用,事件还是要头中尾的处理:

代码如下:

package xml.stax;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.Iterator;

import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;


public class StaxReader {
	
	public static void main(String[] args) {
		StaxReader.readByStream();
		System.out.println("========华丽丽的分割线==========");
		StaxReader.readByEvent();
	}

	//流模式
	public static void readByStream() {
		String xmlFile = "books.xml";
		XMLInputFactory factory = XMLInputFactory.newFactory();
		XMLStreamReader streamReader = null;
		try {
			streamReader = factory.createXMLStreamReader(new FileReader(xmlFile));			
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (XMLStreamException e) {
			e.printStackTrace();
		}
		
		// 基于指针遍历
		try {
			while (streamReader.hasNext()) {
				int event = streamReader.next();
				// 如果是元素的开始
				if (event == XMLStreamConstants.START_ELEMENT) {
					// 列出所有书籍名称
					if ("title".equalsIgnoreCase(streamReader.getLocalName())) {
						System.out.println("title:" + streamReader.getElementText());
					}
				}
			}
			streamReader.close();
		} catch (XMLStreamException e) {
			e.printStackTrace();
		}
	}
	

	

		
	// 事件模式
	public static void readByEvent() {
		String xmlFile = "books.xml";
		XMLInputFactory factory = XMLInputFactory.newInstance();
		boolean titleFlag = false;
		try {
			// 创建基于迭代器的事件读取器对象
			XMLEventReader eventReader = factory.createXMLEventReader(new FileReader(xmlFile));
			// 遍历Event迭代器
			while (eventReader.hasNext()) {
				XMLEvent event = eventReader.nextEvent();
				// 如果事件对象是元素的开始
				if (event.isStartElement()) {
					// 转换成开始元素事件对象
					StartElement start = event.asStartElement();
					// 打印元素标签的本地名称
					
					String name = start.getName().getLocalPart();
					//System.out.print(start.getName().getLocalPart());	
					if(name.equals("title"))
					{
						titleFlag = true;
						System.out.print("title:");
					}
					
					// 取得所有属性
					Iterator attrs = start.getAttributes();
					while (attrs.hasNext()) {
						// 打印所有属性信息
						Attribute attr = (Attribute) attrs.next();
						//System.out.print(":" + attr.getName().getLocalPart() + "=" + attr.getValue());
					}
					//System.out.println();
				}
				//如果是正文
				if(event.isCharacters())
				{
					String s = event.asCharacters().getData();
					if(null != s && s.trim().length()>0 && titleFlag)
					{
						System.out.println(s.trim());
					}					
				}
				//如果事件对象是元素的结束
				if(event.isEndElement())
				{
					EndElement end = event.asEndElement();
					String name = end.getName().getLocalPart();
					if(name.equals("title"))
					{
						titleFlag = false;
					}
				}
			}
			eventReader.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (XMLStreamException e) {
			e.printStackTrace();
		}
	}
}

总结:
大体了解一下即可,要用的时候再参考代码
自此我们确实可以对xml进行读取和写入