首先,读取所有xml文件完整路径,写入train.txt 文本文档中,

然后读取TXT文档,逐行读取xml文档,建文件夹,用于保存解析好的TXT,写入TXT时,只需要保存类别名和坐标信息即可,中间用Tab分割

 



#!/usr/bin/evn python 
# coding:utf-8
import os
import glob

try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
import sys

# filename = os.listdir('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/')
filename = glob.glob('F:/snow leopard/000_IMAGE_FRAME/000_B_XML/' + '*xml')
fileObject = open('train.txt', 'w')

for ip in filename:
fileObject.write(ip)
fileObject.write('\n')
fileObject.close()


file_srx = open("train.txt") #其中包含所有待计算的文件名
line = file_srx.readline()
while line:
f = line[:-1] # 除去末尾的换行符
tree = ET.parse(f) #打开xml文档
root = tree.getroot() #获得root节点
print ("*"*10)
filename = root.find('filename').text
filename = filename[:-4]
print (filename)

dir_name = 'F:/snow leopard/Data preprocessing/txt'
if os.path.exists(dir_name) == False:
os.mkdir(dir_name)

# file_object_txt = open(dir_name +'/' + filename + ".txt","a")
# # file_object_txt = open(dir_name, 'w') #写文件
# file_object_txt.write(filename +'\t')

# file_object_log = open(filename + ".log", 'w') #写文件
flag = False

########################################
for size in root.findall('size'): #找到root节点下的size节点
width = size.find('width').text #子节点下节点width的值
height = size.find('height').text #子节点下节点height的值
print (width, height)
########################################

for object in root.findall('object'): #找到root节点下的所有object节点
name = object.find('name').text #子节点下节点name的值
file_object_txt = open(dir_name +'/' + filename + ".txt","a")
# file_object_txt = open(dir_name, 'w') #写文件
file_object_txt.write(name +'\t')
print (name)
bndbox = object.find('bndbox') #子节点下属性bndbox的值
xmin = bndbox.find('xmin').text
ymin = bndbox.find('ymin').text
xmax = bndbox.find('xmax').text
ymax = bndbox.find('ymax').text
file_object_txt.write(xmin+'\t' + ymin + '\t'+ xmax + '\t'+ ymax)

print (xmin, ymin, xmax, ymax)
file_object_txt.close()
# file_object_log.close()
if flag == False: #如果没有符合条件的信息,则删掉相应的txt文件以及jpg文件
#os.remove(filename + ".txt")
#os.remove(filename + ".jpg")
# os.remove(filename + ".log")
pass
line = file_srx.readline()