最近在项目中遇到需要解析XSD的情况,在上网搜索很久发现几乎没有这方面的信息,于是只好自己实现。
先说下具体要求:
1、用JAVA解析XSD,找出所有数据节点(xs:element),生成该节点的XPath
2、在解析出的节点中随便选择几个节点作为要保留的节点
3、根据需要保留的节点信息生成XSLT

这样可以实现这样的业务:首先制定好某一个业务需要的XSD规范,然后从规范中选择针对某个业务需要关心的节点生成XSLT,那么,以后满足这个XSD规范的所有xml文档,都可以由这个生成的XSLT来转换得到我们说关心的那些数据的一个XML文档。

具体实现如下,这个只是我自己用dom4j来实现的一种方式,如果有更好的方法或者建议,希望指正。

1.XSDNode定义

public class XSDNode { 

 // 节点名称 

 private String name; 

 // 节点XPath 

 private String xPath; 

 // 节点描述 

 private String annotation; 

 // 节点类型 

 private String type; 

 // 业务用路径,描述路径中的unbound节点 

 private String unboundedXpath; 


 public String getName() { 

 return name; 

 } 


 public void setName(String name) { 

 this.name = name; 

 } 


 public String getXPath() { 

 return xPath; 

 } 


 public void setXPath(String path) { 

 xPath = path; 

 } 


 public String getAnnotation() { 

 return annotation; 

 } 


 public void setAnnotation(String annotation) { 

 this.annotation = annotation; 

 } 


 public String getType() { 

 return type; 

 } 


 public void setType(String type) { 

 this.type = type; 

 } 


 public String getUnboundedXpath() { 

 return unboundedXpath; 

 } 


 public void setUnboundedXpath(String unboundedXpath) { 

 this.unboundedXpath = unboundedXpath; 

 } 

} 



2.XSD文件解析 

public class XSDReader { 


 private List<XSDNode> list = new ArrayList<XSDNode>(); 


 /** 

 * 解析XSD,返回数据节点对象列表 

 * 

 * @param xsd 

 * @return 

 * @throws Exception 

 */ 

 public List<XSDNode> paserXSD(String xsd) throws Exception { 

 SAXReader saxReader = new SAXReader(); 

// ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(xsd.getBytes(BaseConstants.XMLENCODING)); 

 Document doc = saxReader.read(xsd); 

 Element element = doc.getRootElement(); 

 String basePath = "//" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":element[@name=\"" + XMLConstants.MESSAGE + "\"]"; 

 Element dataElement = (Element) element.selectSingleNode(basePath); 

 String elementPath = "//" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":element"; 

 paseData(dataElement, "//", elementPath, "//"); 

 return list; 

 } 


 /** 

 * 转换XSD的数据节点,生成XSDNode对象 

 * 

 * @param element 

 * @param xPath 

 * @param xsdPath 

 * @param unboundedXpath 

 */ 

 public void paseData(Element element, String xPath, String xsdPath, String unboundedXpath) { 

 // 获取节点name属性 

 String nodeName = element.attributeValue("name"); 

 // 组装xml文档中节点的XPath 

 xPath += nodeName; 

 unboundedXpath += nodeName; 

 // 并列多节点限制属性 

 String maxOccurs = element.attributeValue("maxOccurs"); 

 if (maxOccurs != null && !"1".equals(maxOccurs) && !("//" + XMLConstants.MESSAGE + "").equals(xPath)) {// 节点可以有多个 

 unboundedXpath += XMLConstants.XSD_UNBOUNDED; 

 } 


 // 组装下一个element元素的XPath 

 String currentXsdPath = xsdPath + "[@name=\"" + nodeName + "\"]" + "/" + XMLConstants.XSD_DEFAULT_NAMESPACE 

 + ":complexType/" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":sequence/" + XMLConstants.XSD_DEFAULT_NAMESPACE 

 + ":element"; 

 // 查找该节点下所有的element元素 

 List<Node> elementNodes = element.selectNodes(currentXsdPath); 

 if (elementNodes != null && elementNodes.size() > 0) {// 如果下面还有element,说明不是叶子 

 Iterator<Node> nodes = elementNodes.iterator(); 

 while (nodes.hasNext()) { 

 if (!xPath.endsWith("/")) { 

 xPath += "/"; 

 unboundedXpath += "/"; 

 } 

 Element ele = (Element) nodes.next(); 

 paseData(ele, xPath, currentXsdPath, unboundedXpath); 

 } 

 } else { // 该element为叶子 

 XSDNode xsdNode = new XSDNode(); 

 // 获取注释节点 

 String annotation = ""; 

 Node annotationNode = element 

 .selectSingleNode(xsdPath + "[@name=\"" + nodeName + "\"]/" + XMLConstants.XSD_DEFAULT_NAMESPACE 

 + ":annotation/" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":documentation"); 

 if (annotationNode != null) 

 annotation = annotationNode.getText(); 

 // 获取节点类型属性 

 String nodeType = ""; 

 Attribute type = element.attribute("type"); 

 if (type != null) 

 nodeType = type.getText(); 

 else { 

 String spath = xsdPath + "[@name=\"" + nodeName + "\"]/" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":simpleType/" 

 + XMLConstants.XSD_DEFAULT_NAMESPACE + ":restriction"; 

 Element typeNode = (Element) element.selectSingleNode(spath); 

 if (typeNode != null) { 

 Attribute base = typeNode.attribute("base"); 

 if (base != null) 

 nodeType = base.getText(); 

 } 

 } 

 xsdNode.setName(nodeName); 

 xsdNode.setXPath(xPath); 

 xsdNode.setAnnotation(annotation); 

 xsdNode.setType(nodeType); 

 xsdNode.setUnboundedXpath(unboundedXpath); 

 list.add(xsdNode); 

 } 

 } 


 public static void main(String[] args) { 

 try { 

 List<XSDNode> nodes = paserXSD("F:/1.xsd"); 

 for (XSDNode node : nodes) { 

 System.out.println(node.getUnboundedXpath()); 

 } 

 } catch (Exception ex) { 

 ex.printStackTrace(); 

 } 

 } 


} 


3.根据XPath生成XSLT 

public class XSLCreator { 


 /** 

 * <p> 

 * 根据传入的XPath信息生成XSLT 

 * Xpath路径类似//MESSAGE/BASEINFO/INFO_SOURCE/infoRegionLimitList[unbounded]/Region[unbounded]/china/jobProvince[unbounded] 

 * 多个路径以","分隔[unbounded]标记路径中的unbounded节点 

 * </p> 

 * 

 * @param paths 

 * @return 

 * @throws Exception 

 */ 

 public String createXSL(String paths) throws Exception { 

 // 解析传入的Xpath 

 String[] xPathsArr = paths.split(","); 

 // 生成根节点,并创建XSLT基本信息 

 String rootElement = XMLConstants.MESSAGE; 

 Document documents = DocumentHelper.createDocument(); 

 Element documentsRoot = documents.addElement(XMLConstants.NAMESPACE + XMLConstants.SPER + XMLConstants.STYLESHEET) 

 .addAttribute(XMLConstants.VERSION, XMLConstants.VERSIONNUM); 

 documentsRoot.add(new Namespace(XMLConstants.NAMESPACE, XMLConstants.NAMESPACEADDRESS)); 

 Element elementTemplate = documentsRoot.addElement(XMLConstants.NAMESPACE + XMLConstants.SPER + XMLConstants.TEMPLATE); 

 elementTemplate.addAttribute(XMLConstants.MATCH, XMLConstants.ROOTSPER); 

 Element root = elementTemplate.addElement(rootElement); 

 root.addElement(XMLConstants.NAMESPACE + XMLConstants.SPER + XMLConstants.APPLYTEMPLATES).addAttribute( 

 XMLConstants.SELECT, rootElement); 

 // 创建节点模板 

 Element elementTemplateChild = documentsRoot.addElement(XMLConstants.NAMESPACE + XMLConstants.SPER 

 + XMLConstants.TEMPLATE); 

 elementTemplateChild.addAttribute(XMLConstants.MATCH, rootElement); 

 // 生成各个节点转换模板 

 for (String xPath : xPathsArr) { 

 // 记录同一路径下unbound节点的个数 

 int unbounds = 0; 

 String[] unboundPathSplit = xPath.split(XMLConstants.XSD_UNBOUNDED_REPLATE); 

 Element ele = elementTemplateChild; 

 // 获取Message后各级节点名称 

 String[] nodes = xPath.split("/"); 

 // 标记节点是否为unbound节点 

 boolean isUnbound = false; 

 // 标记改路径下的循环节点个数 

 for (int i = 0; i < nodes.length; i++) { 

 String node = nodes[i]; 

 if (node.endsWith(XMLConstants.XSD_UNBOUNDED)) {// 如果含有unbound标识,该节点为unbound节点 

 node = node.replaceAll(XMLConstants.XSD_UNBOUNDED_REPLATE, ""); 

 isUnbound = true; 

 unbounds++; // 将同一路径下的unbound节点数加1 

 } else 

 isUnbound = false; 

 if (node != null && !node.equals("")) { 

 // 先判断节点下有没有循环,如果有循环,跳过循环节点 

 Element each = ele.element(XMLConstants.XSL_ELEMENT_FOREACH); 

 if (each != null) 

 ele = each; 

 if (ele.element(node) == null) {// 如果没有该节点,创建一个节点作为当前节点 

 //判断当前节点是否为for-each,如果是,一直找到不为for-each的父亲节点 

 while (ele.getName().equals(XMLConstants.XSL_ELEMENT_FOREACH)) 

 ele = ele.getParent(); 

 if (isUnbound) {// 如果该节点为unbound节点则创建xsl:for-each节点 

 // 生成一个for-each节点 

 ele = ele.addElement(XMLConstants.NAMESPACE + XMLConstants.SPER + XMLConstants.XSL_ELEMENT_FOREACH); 

 // 获取对应的selectPath 

 String selectPath = unboundPathSplit[unbounds - 1]; 

 // 去除中间路径中开始的"/"标记,否则XSL会不合法,判断unbounds不等于1是因为第一个路径是由"//"开始,故需要排除 

 if (unbounds != 1 && selectPath.startsWith("/")) 

 selectPath = selectPath.substring(1, selectPath.length()); 

 // 为for-each节点添加select属性 

 ele.addAttribute(XMLConstants.XSL_ELEMENT_SELECT, selectPath); 

 } 

 ele = ele.addElement(node); 

 } else 

 // 如果已经有该节点,获取节点作为当前节点 

 ele = ele.element(node); 

 } 

 if (i == nodes.length - 1) {// 如果当前节点为叶子节点,则为叶子节点选择数据 

 if (unbounds != 0) {// 如果当前路径中存储unbound节点情况,取split后数组最后一位 

 if (unbounds == unboundPathSplit.length)// 如果叶子节点也是unbound节点,for-each里面的select路径用"."表示 

 xPath = "."; // 选择当前节点数据 

 else { 

 xPath = unboundPathSplit[unbounds]; 

 if (!"".equals(xPath) && xPath.startsWith("/")) 

 xPath = xPath.substring(1, xPath.length()); 

 } 

 } 

 ele.addElement(XMLConstants.NAMESPACE + XMLConstants.SPER + XMLConstants.VALUEOF).addAttribute( 

 XMLConstants.SELECT, xPath); 

 } 

 } 

 } 

 documents.setXMLEncoding(XMLConstants.XMLENCODING); 

 return documents.asXML(); 

 } 


 public static void main(String[] args) { 

 try { 

 String unboundPath = "//MESSAGE/BASEINFO/CDB_INFO/infoStatus," 

 + "//MESSAGE/BASEINFO/INFO_SOURCE/infoRegionLimitList[unbounded]/Region[unbounded]/china[unbounded]/jobProvince[unbounded]," 

 + "//MESSAGE/BASEINFO/INFO_SOURCE/infoRegionLimitList[unbounded]/Region[unbounded]/china[unbounded]/jobCity[unbounded]," 

 + "//MESSAGE/BASEINFO/INFO_SOURCE/infoRegionLimitList[unbounded]/Region[unbounded]/china[unbounded]/jobCunty[unbounded]," 

 + "//MESSAGE/BASEINFO/INFO_SOURCE/cpPrimaryKey," + "//MESSAGE/BASEINFO/INFO_SOURCE/cpCreateDate," 

 + "//MESSAGE/BASEINFO/INFO_SOURCE/cpUpdateDate"; 


 String result = createXSL(unboundPath); 

 System.out.println(result); 


 } catch (Exception ex) { 

 ex.printStackTrace(); 

 } 

 } 

} 


4、相关常量定义 

public class XMLConstants { 


 // 默认数据节点 

 public static final String MESSAGE = "MESSAGE"; 

 // xml编码 

 public static final String ENCODING = "UTF-8"; 




 // xsd默认命名空间 

 public static final String XSD_DEFAULT_NAMESPACE = "xs"; 

 // xsd定义的默认数据节点 

 public static final String XSD_DEFAULT_DATANODE = "MESSAGE"; 

 // xsd复合类型节点 

 public static final String XSD_COMPLEX_TYPE = "complexType"; 

 // xsd序列节点 

 public static final String XSD_SEQUENCE = "sequence"; 

 // xsd元素节点 

 public static final String XSD_ELEMENT = "element"; 

 // xsd注解节点 

 public static final String XSD_ANNOTATION = "annotation"; 

 // xsd注解文档节点 

 public static final String XSD_DOCUMENTATION = "documentation"; 

 // xsd简单类型节点 

 public static final String XSD_SIMPLE_TYPE = "simpleType"; 

 // xsd限制节点 

 public static final String XSD_RESTRICTION = "restriction"; 

 // xsd name属性 

 public static final String XSD_ATTRIBUTE_NAME = "name"; 

 // xsd type属性 

 public static final String XSD_ATTRIBUTE_TYPE = "type"; 

 // xsd base属性 

 public static final String XSD_ATTRIBUTE_base = "base"; 


 // 用来描述xsd中的unbounded节点信息 

 public static final String XSD_UNBOUNDED = "[unbounded]"; 

 public static final String XSD_UNBOUNDED_REPLATE = "\\[unbounded\\]"; 

 public static final String XSL_ELEMENT_FOREACH = "for-each"; 

 public static final String XSL_ELEMENT_SELECT = "select"; 


 /** ************* 创建xslt基础变量配置 ******************* */ 

 public static final String STYLESHEET = "stylesheet"; 

 public static final String VERSION = "version"; 

 public static final String VERSIONNUM = "1.0"; 

 public static final String NAMESPACE = "xsl"; 

 public static final String NAMESPACEADDRESS = "http://www.w3.org/1999/XSL/Transform"; 

 public static final String TEMPLATE = "template"; 

 public static final String MATCH = "match"; 

 public static final String APPLYTEMPLATES = "apply-templates"; 

 public static final String VALUEOF = "value-of"; 

 public static final String SELECT = "select"; 

 public static final String XMLENCODING = "UTF-8"; 

 public static final String ROOTSPER = "/"; 

 public static final String DOUBELROOTSPER = "//"; 

 public static final String SPER = ":"; 

} 


所有代码如上,需要用的dom4j相关包。如果有问题和建议,请联系我,mail:feihu8620@gmail.com,QQ76615834 



====================================================== 

修改XSDReader,使之适应XSD choise节点 

====================================================== 

import java.io.ByteArrayInputStream; 

import java.util.ArrayList; 

import java.util.Iterator; 

import java.util.List; 


import org.dom4j.Attribute; 

import org.dom4j.Document; 

import org.dom4j.Element; 

import org.dom4j.Node; 

import org.dom4j.io.SAXReader; 


import com.feinno.cdb.commons.BaseConstants; 

import com.feinno.cdb.commons.XMLConstants; 

import com.feinno.cdb.po.XSDNode; 

public class XSDReader { 


 private List<XSDNode> list = new ArrayList<XSDNode>(); 


 /** 

 * 解析XSD,返回数据节点对象列表 

 * 

 * @param xsd 

 * @return 

 * @throws Exception 

 */ 

 public List<XSDNode> paserXSD(String xsd) throws Exception { 

 SAXReader saxReader = new SAXReader(); 

 ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(xsd.getBytes(BaseConstants.XMLENCODING)); 

 Document doc = saxReader.read(byteArrayInputStream); 

 Element element = doc.getRootElement(); 

 String basePath = "//" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":element[@name=\"" + XMLConstants.MESSAGE + "\"]"; 

 Element dataElement = (Element) element.selectSingleNode(basePath); 

 String elementPath = "//" + XMLConstants.XSD_DEFAULT_NAMESPACE + ":element"; 

 paseData(dataElement, "//", elementPath, "//"); 

 return list; 

 } 


 /** 

 * 转换XSD的数据节点,生成XSDNode对象 

 * 

 * @param element 

 * @param xPath 

 * @param xsdPath 

 * @param unboundedXpath 

 */ 

 public void paseData(Element element, String xPath, String xsdPath, String unboundedXpath) { 

 // 获取节点name属性 

 String nodeName = element.attributeValue("name"); 

 // 组装xml文档中节点的XPath 

 xPath += nodeName; 

 unboundedXpath += nodeName; 

 // 并列多节点限制属性 

 String maxOccurs = element.attributeValue("maxOccurs"); 

 if (maxOccurs != null && !"1".equals(maxOccurs) && !("//" + XMLConstants.MESSAGE + "").equals(xPath)) {// 节点可以有多个 

 unboundedXpath += XMLConstants.XSD_UNBOUNDED; 

 } 

 // 下一个element元素的xsd XPath:sequence 

 String currentXsdPath_sequence = xsdPath + "[@name=\"" + nodeName + "\"]" + "/" + XMLConstants.XS_COMPLEXTYPE + "/" 

 + XMLConstants.XS_SEQUENCE + "/" + XMLConstants.XS_ELEMENT; 

 // 下一个element元素的xsd XPath:choice 

 String currentXsdPath_choise = xsdPath + "[@name=\"" + nodeName + "\"]" + "/" + XMLConstants.XS_COMPLEXTYPE + "/" 

 + XMLConstants.XS_CHOICE + "/" + XMLConstants.XS_ELEMENT; 

 String currentXsdPath = ""; 

 // 查找该节点下所有的element元素 

 List<Node> elementNodes_sequence = element.selectNodes(currentXsdPath_sequence); 

 List<Node> elementNodes_choice = element.selectNodes(currentXsdPath_choise); 

 List<Node> elementNodes = null; 

 if (elementNodes_sequence != null && elementNodes_sequence.size() > 0) { 

 elementNodes = elementNodes_sequence; 

 currentXsdPath = currentXsdPath_sequence; 

 } else { 

 elementNodes = elementNodes_choice; 

 currentXsdPath = currentXsdPath_choise; 

 } 

 if (elementNodes != null && elementNodes.size() > 0) {// 如果下面还有element,说明不是叶子 

 Iterator<Node> nodes = elementNodes.iterator(); 

 while (nodes.hasNext()) { 

 if (!xPath.endsWith("/")) { 

 xPath += "/"; 

 unboundedXpath += "/"; 

 } 

 Element ele = (Element) nodes.next(); 

 paseData(ele, xPath, currentXsdPath, unboundedXpath); 

 } 

 } else { // 该element为叶子 

 XSDNode xsdNode = new XSDNode(); 

 // 获取注释节点 

 String annotation = ""; 

 Node annotationNode = element.selectSingleNode(xsdPath + "[@name=\"" + nodeName + "\"]/" + XMLConstants.XS_ANNOTATION 

 + "/" + XMLConstants.XS_DOCUMENTATION); 

 if (annotationNode != null) 

 annotation = annotationNode.getText(); 

 // 获取节点类型属性 

 String nodeType = ""; 

 Attribute type = element.attribute("type"); 

 if (type != null) 

 nodeType = type.getText(); 

 else { 

 String spath = xsdPath + "[@name=\"" + nodeName + "\"]/" + XMLConstants.XS_SIMPLETYPE + "/" 

 + XMLConstants.XS_RESTRICTION; 

 Element typeNode = (Element) element.selectSingleNode(spath); 

 if (typeNode != null) { 

 Attribute base = typeNode.attribute("base"); 

 if (base != null) 

 nodeType = base.getText(); 

 } 

 } 

 xsdNode.setName(nodeName); 

 xsdNode.setXPath(xPath); 

 xsdNode.setAnnotation(annotation); 

 xsdNode.setType(nodeType); 

 xsdNode.setUnboundedXpath(unboundedXpath); 

 if (xPath.equals(unboundedXpath))// 说明不是unbounded节点 

 xsdNode.setIsUnbounded("0"); 

 else 

 xsdNode.setIsUnbounded("1"); 

 list.add(xsdNode); 

 } 

 } 

} 



常量定义: 

/** *******************XSD DID相关定义******************** */ 

 public static final String XS_ELEMENT = "xs:element"; 

 public static final String XS_SEQUENCE = "xs:sequence"; 

 public static final String XS_CHOICE = "xs:choice"; 

 public static final String XS_COMPLEXTYPE = "xs:complexType"; 

 public static final String XS_SIMPLETYPE = "xs:simpleType"; 

 public static final String XS_RESTRICTION = "xs:restriction"; 

 public static final String XS_ANNOTATION = "xs:annotation"; 

 public static final String XS_DOCUMENTATION = "xs:documentation";