python中读取XML
XML:指可扩展标记语言
怎样读取XML
先写一个小xml:
<?xml version="1.0" encoding="utf-8" ?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
如图:
import xml.etree.ElementTree as ET
'''
tag:即标签,用于标识该元素表示哪种数据,即APP_KEY
attrib:即属性,用Dictionary形式保存,即{'channel'='****'}
text,文本字符串,并不是必须的。
'''
1先加载文档发哦内存里 形成一个倒桩的树结构
tree = ET.parse('xml')
2.获取根节点
root = tree.getroot()
xml = {}
print('tag:',root.tag,'attrib:',root.attrib,'text:',root.text)
for ele in root:
print('tag:',ele.tag,'atrib:',ele.attrib,ele.attrib['name'])
value = []
for e in ele:
print('tag:',e.tag,'attrib:',e.attrib,'text:',e.text)
if e.text is None:
value.append(e.attrib)
else:
value.append({e.tag:e.text})
xml[ele.attrib['name']]=value
print(xml)
'''
tag:即标签,用于标识该元素表示哪种数据,即APP_KEY
attrib:即属性,用Dictionary形式保存,即{'channel'='****'}
text,文本字符串,并不是必须的。
'''
1先加载文档发哦内存里 形成一个倒桩的树结构
tree = ET.parse('xml')
2.获取根节点
root = tree.getroot()
xml = {}
print('tag:',root.tag,'attrib:',root.attrib,'text:',root.text)
for ele in root:
print('tag:',ele.tag,'atrib:',ele.attrib,ele.attrib['name'])
value = []
for e in ele:
print('tag:',e.tag,'attrib:',e.attrib,'text:',e.text)
if e.text is None:
value.append(e.attrib)
else:
value.append({e.tag:e.text})
xml[ele.attrib['name']]=value
print(xml)
如图:
删除子节点:
nodes = root.findall('country')
for node in nodes:
if node.attrib['name']=='Liechtenstein': #Liechtenstein:就是要删除的子节点
root.remove(node)
break
tree.write('mingbai.xml')
print('删除完成')
nodes = root.findall('country')
for node in nodes:
if node.attrib['name']=='Liechtenstein': #Liechtenstein:就是要删除的子节点
root.remove(node)
break
tree.write('mingbai.xml')
print('删除完成')
如图:
1.
2.
使用SAX API解析XML:
一:创建一个XML:
<?xml version="1.0" encoding="UTF-8" ?> <collection shelf = "New Arrivals"> <movie title = "Enemy Behind"> <type>War, Thriller</type> <format>DVD</format> <year>2013</year> <rating>PG</rating> <stars>10</stars> <description>Talk about a US-Japan war</description> </movie> <movie title = "Transformers"> <type>Anime, Science Fiction</type> <format>DVD</format> <year>1989</year> <rating>R</rating> <stars>8</stars> <description>A schientific fiction</description> </movie> <movie title = "Trigun"> <type>Anime, Action</type> <format>DVD</format> <episodes>4</episodes> <rating>PG</rating> <stars>10</stars> <description>Vash the Stampede!</description> </movie> <movie title = "Ishtar"> <type>Comedy</type> <format>VHS</format> <rating>PG</rating> <stars>2</stars> <description>Viewable boredom</description> </movie> </collection>
二:sax 解析xml
import xml.sax
class MovieHandler( xml.sax.ContentHandler):
def __init__(self):
self.CurrentData = "" #CurrentData:当前的节点
self.type = ""
self.format = ""
self.year = ""
self.rating = ""
self.stars = ""
self.description = ""
class MovieHandler( xml.sax.ContentHandler):
def __init__(self):
self.CurrentData = "" #CurrentData:当前的节点
self.type = ""
self.format = ""
self.year = ""
self.rating = ""
self.stars = ""
self.description = ""
# start:开始事件
def startElement(self, tag, attributes):
self.CurrentData = tag
if tag == "movie":
print ("*****Movie*****")
title = attributes["title"]
print ("Title:", title)
def startElement(self, tag, attributes):
self.CurrentData = tag
if tag == "movie":
print ("*****Movie*****")
title = attributes["title"]
print ("Title:", title)
# end: 结束 事件
def endElement(self, tag):
if self.CurrentData == "type":
print ("Type:", self.type)
elif self.CurrentData == "format":
print ("Format:", self.format)
elif self.CurrentData == "year":
print ("Year:", self.year)
elif self.CurrentData == "rating":
print ("Rating:", self.rating)
elif self.CurrentData == "stars":
print ("Stars:", self.stars)
elif self.CurrentData == "description":
print ("Description:", self.description)
#清空 缓冲区
self.CurrentData = ""
def endElement(self, tag):
if self.CurrentData == "type":
print ("Type:", self.type)
elif self.CurrentData == "format":
print ("Format:", self.format)
elif self.CurrentData == "year":
print ("Year:", self.year)
elif self.CurrentData == "rating":
print ("Rating:", self.rating)
elif self.CurrentData == "stars":
print ("Stars:", self.stars)
elif self.CurrentData == "description":
print ("Description:", self.description)
#清空 缓冲区
self.CurrentData = ""
# Call when a character is read
# characters(text):方法通过参数text传递XML文件的字符数据。
def characters(self, content):
if self.CurrentData == "type":
self.type = content
elif self.CurrentData == "format":
self.format = content
elif self.CurrentData == "year":
self.year = content
elif self.CurrentData == "rating":
self.rating = content
elif self.CurrentData == "stars":
self.stars = content
elif self.CurrentData == "description":
self.description = content
# characters(text):方法通过参数text传递XML文件的字符数据。
def characters(self, content):
if self.CurrentData == "type":
self.type = content
elif self.CurrentData == "format":
self.format = content
elif self.CurrentData == "year":
self.year = content
elif self.CurrentData == "rating":
self.rating = content
elif self.CurrentData == "stars":
self.stars = content
elif self.CurrentData == "description":
self.description = content
if ( __name__ == "__main__"):
# 1.创建 XMLReader
parser = xml.sax.make_parser()
# 2.namepsaces 工作目录 工作空间 命名空间
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
handler = MovieHandler()
#覆盖其原来的ContentHandler
parser.setContentHandler( handler )
parser.parse("move.xml")
parser = xml.sax.make_parser()
# 2.namepsaces 工作目录 工作空间 命名空间
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
handler = MovieHandler()
#覆盖其原来的ContentHandler
parser.setContentHandler( handler )
parser.parse("move.xml")
结果如图: