1 import sys
2 import StringIO
3 import lxml
4
5 from lxml import etree
6 from StringIO import StringIO
7
8 # Construct XML relevant to the XML schema we're validating against. By altering the string, adding/removing elements
9 # we can force different errors to occur when validating.
10 xml = StringIO('''
11 <CompanyDataRequest xmlns="http://xmlgw.companieshouse.gov.uk" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://xmlgw.companieshouse.gov.uk http://xmlgw.companieshouse.gov.uk/v2-1/schema/CompanyData-v2-2.xsd">
12 <CompanyNumber>06937730</CompanyNumber>
13 <CompanyAuthenticationCode>123456</CompanyAuthenticationCode>
14 <MadeUpDate>2010-06-30x</MadeUpDate>
15 </CompanyDataRequest>
16 ''')
17
18 # Clear any previous errors
19 lxml.etree.clear_error_log()
20
21 try:
22 # Get the XML schema to validate against
23 schema = lxml.etree.XMLSchema(file = 'http://xmlgw.companieshouse.gov.uk/v2-1/schema/CompanyData-v2-2.xsd')
24 # Parse string of XML
25 xml_doc = lxml.etree.parse(xml)
26 # Validate parsed XML against schema returning a readable message on failure
27 schema.assertValid(xml_doc)
28 # Validate parsed XML against schema returning boolean value indicating success/failure
29 print 'schema.validate() returns "%s".' % schema.validate(xml_doc)
30
31 except lxml.etree.XMLSchemaParseError, xspe:
32 # Something wrong with the schema (getting from URL/parsing)
33 print "XMLSchemaParseError occurred!"
34 print xspe
35
36 except lxml.etree.XMLSyntaxError, xse:
37 # XML not well formed
38 print "XMLSyntaxError occurred!"
39 print xse
40
41 except lxml.etree.DocumentInvalid, di:
42 # XML failed to validate against schema
43 print "DocumentInvalid occurred!"
44
45 error = schema.error_log.last_error
46 if error:
47 # All the error properties (from libxml2) describing what went wrong
48 print 'domain_name: ' + error.domain_name
49 print 'domain: ' + str(error.domain)
50 print 'filename: ' + error.filename # '<string>' cos var is a string of xml
51 print 'level: ' + str(error.level)
52 print 'level_name: ' + error.level_name # an integer
53 print 'line: ' + str(error.line) # a unicode string that identifies the line where the error occurred.
54 print 'message: ' + error.message # a unicode string that lists the message.
55 print 'type: ' + str(error.type) # an integer
56 print 'type_name: ' + error.type_name

封装类

1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3 # Author:Eric.yue
4
5 import os
6 import lxml.etree as ET
7 from StringIO import StringIO
8 import chardet
9
10
11 class R3xmlCheck(object):
12 def __init__(self, element_xml):
13 self.elem_xml = element_xml
14
15 def validate_xsd_xml(self, f_xml, elem_xsd):
16 try:
17 elem_xsd = elem_xsd.encode('utf-8')
18 xsd_doc = StringIO(elem_xsd)
19 xml_doc = StringIO(f_xml)
20 xmlschema_doc = ET.parse(xsd_doc)
21 xmlschema = ET.XMLSchema(xmlschema_doc)
22 xml = ET.parse(xml_doc)
23 xmlschema.assertValid(xml)
24 print 'schema.validate() returns "%s".' % xmlschema.validate(xml)
25
26 except ET.XMLSchemaParseError, xspe:
27 # Something wrong with the schema (getting from URL/parsing)
28 print "XMLSchemaParseError occurred!"
29 print xspe
30
31 except ET.XMLSyntaxError, xse:
32 # XML not well formed
33 print "XMLSyntaxError occurred!"
34 print xse
35
36 except ET.DocumentInvalid, di:
37 # XML failed to validate against schema
38 print "DocumentInvalid occurred!"
39
40 error = xmlschema.error_log.last_error
41 if error:
42 # All the error properties (from libxml2) describing what went wrong
43 print 'domain_name: ' + error.domain_name
44 print 'domain: ' + str(error.domain)
45 print 'filename: ' + error.filename # '<string>' cos var is a string of xml
46 print 'level: ' + str(error.level)
47 print 'level_name: ' + error.level_name # an integer
48 print 'line: ' + str(error.line) # a unicode string that identifies the line where the error occurred.
49 print 'message: ' + error.message # a unicode string that lists the message.
50 print 'type: ' + str(error.type) # an integer
51 print 'type_name: ' + error.type_name
52
53 def run(self):
54 res = self.validate_xml(self.elem_xml)
55 if res["result"] is not True:
56 return res["info"]
57
58 elem_xsd = self.get_xsd()
59
60 with open(self.elem_xml) as f:
61 f_xml = f.read()
62 chardet_info = chardet.detect(f_xml)
63 if chardet_info['encoding'] == 'ascii':
64 f_xml = f_xml.encode('utf-8')
65 self.validate_xsd_xml(f_xml.strip(),elem_xsd)
66
67 # matching schemaLocation url
68 def get_xsd(self):
69 with open("./xsd/multicacheschemas/MCCI_IN200100UV01.xsd") as f:
70 elem_xsd = f.read()
71 return elem_xsd
72
73 def validate_xml(self, exml):
74 rinfo = {}
75 if os.path.exists(exml):
76 try:
77 ET.parse(exml)
78 rinfo['result'] = True
79 except Exception as err:
80 rinfo['result'] = False
81 rinfo['info'] = 'Parsing error info:{0}'.format(err)
82 return rinfo
83
84 if __name__ == "__main__":
85 aa = R3xmlCheck("./xsd/aa.xml")
86 aa.run()