from xml.parsers import expat
import pprint
class Parser:
def __init__(self):
self._parser = expat.ParserCreate()
self._parser.StartElementHandler = self.start
self._parser.EndElementHandler = self.end
self._parser.CharacterDataHandler = self.data
self.Students,self.isInStudentElement=[],False
def feed(self, data):
self._parser.Parse(data, 0)
def close(self):
self._parser.Parse("", 1) # end of data
del self._parser # get rid of circular references
def start(self, tag, attrs):
if tag=='student':
self.Students.append([attrs['id']])
self.isInStudentElement=True
def end(self, tag):
if tag=='student':
self.isInStudentElement=False
def data(self, data):
if self.isInStudentElement:
data=data.strip()
if data!='':
self.Students[-1].append(data)
p = Parser()
p.feed('''''')
Lily
Female
Computer
Gates
Male
Biology
p.close()
pprint.pprint(p.Students)
中文的元素总出编码问题,so,仅供参考