import os
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element
import gflags
import sys
from PIL import Image
Flags = gflags.FLAGS
gflags.DEFINE_string(
'source_dir1', 'C:/Users/Administrator/Desktop/test/test1', 'source_dir1')
gflags.DEFINE_string(
'source_dir2', 'C:/Users/Administrator/Desktop/test/test2', 'source_dir2')
gflags.DEFINE_string(
'result_dir', 'C:/Users/Administrator/Desktop/test/result', 'result_dir')
def copy_img(inputpath,outputpath):
for rt ,folders,files in os.walk(inputpath):
for folder in folders:
folder_path = os.path.join(rt,folder)
files = os.listdir(folder_path)
for file in files:
if file.endswith('jpg'):
dir= outputpath + '/' + folder
oldpath = os.path.join(folder_path,file)
newpath = os.path.join(dir, file)
img = Image.open(oldpath)
img.save(newpath)
print('save successfully %s'%newpath)
def delect_node(inputpath,sku):
for rt ,folder,files in os.walk(inputpath):
for file in files:
if file.endswith('xml'):
filepath = os.path.join(rt,file)
tree = ET.parse(filepath)
root = tree.getroot()
for objs in root.findall('object'):
for obj in objs:
for name in obj.iter('name'):
name = name.text
if (name==sku):
print('delect seccessful %s objedt'%name)
root.remove(objs)
#objs.remove(obj)
tree.write(filepath, encoding='utf-8', xml_declaration=True)
def get_filename_or_path(filename,inputpath):
for rt ,folder,files in os.walk(inputpath):
for file in files:
if file.endswith('xml'):
filepath = os.path.join(rt,file)
tree = ET.parse(filepath)
root = tree.getroot()
fn = root.find('filename').text
if (fn==filename):
return(fn,filepath)
def prettyXml(element, indent, newline, level=0): # elemnt为传进来的Elment类,参数indent用于缩进,newline用于换行
if element: # 判断element是否有子元素
if element.text == None or element.text.isspace(): # 如果element的text没有内容
element.text = newline + indent * (level + 1)
else:
element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * (level + 1)
# else: # 此处两行如果把注释去掉,Element的text也会另起一行
# element.text = newline + indent * (level + 1) + element.text.strip() + newline + indent * level
temp = list(element) # 将elemnt转成list
for subelement in temp:
if temp.index(subelement) < (len(temp) - 1): # 如果不是list的最后一个元素,说明下一个行是同级别元素的起始,缩进应一致
subelement.tail = newline + indent * (level + 1)
else: # 如果是list的最后一个元素, 说明下一行是母元素的结束,缩进应该少一个
subelement.tail = newline + indent * level
prettyXml(subelement, indent, newline, level=level + 1) # 对子元素进行递归操作
def write_xml(box,name,skuname,root,tree,outpath):
element = Element('object')
one1 = Element('bndbox')
two1 = Element('xmin')
two2 = Element('ymin')
two3 = Element('xmax')
two4 = Element('ymax')
two1.text = box[0]
two2.text = box[1]
two3.text = box[2]
two4.text = box[3]
name_node = Element('name')
skuName_node = Element('skuName')
name_node.text = name
skuName_node.text = skuname
one1.append(two1)
one1.append(two2)
one1.append(two3)
one1.append(two4)
element.append(one1)
element.append(name_node)
element.append(skuName_node)
root.append(element)
prettyXml(root, '\t', '\n')
print('save successful %s'%outpath)
tree.write(outpath, encoding='utf-8', xml_declaration=True)
def read_xml(inputpath1,inputpath2,outputhpath):
for rt ,folders,files in os.walk(inputpath1):
for folder in folders:
folder_path = os.path.join(rt,folder)
files = os.listdir(folder_path)
for file in files:
if file.endswith('xml'):
filepath = os.path.join(folder_path,file)
dir= outputhpath + '/' + folder
if not os.path.exists(dir):
os.makedirs(dir)
outpath = dir + '/' +file
tree = ET.parse(filepath)
root = tree.getroot()
filename1 = root.find('filename').text
filename,filepath2 = get_filename_or_path(filename1,inputpath2)
tree2 = ET.parse(filepath2)
root2 = tree2.getroot()
box = [0,0,0,0]
name = 0
skuname = 0
for objs in root.iter('object'):
for obj in objs:
for bnd in obj.iter('bndbox'):
j = 0
for i in bnd:
box[j] = i.text
j+=1
for name in obj.iter('name'):
name = name.text
for skuname in obj.iter('skuName'):
skuname = skuname.text
write_xml(box, name, skuname, root2, tree2, outpath)
def main(argv):
Flags(argv)
sku = '4000000000001'
#delect_node(Flags.result_dir, sku)
read_xml(Flags.source_dir1,Flags.source_dir2,Flags.result_dir)
copy_img(Flags.source_dir1, Flags.result_dir)
if __name__ == '__main__':
main(sys.argv)
