Context Navigation

source: anuga_core/source/anuga/utilities/xml_tools.py @ 5072

Last change on this file since 5072 was 5022, checked in by ole, 17 years ago
Added cairns license stub and fixed problem with empty xml tags.
File size: 7.2 KB

Line
1	"""Basic XML utilities based on minidom - the built in Document Object Model
2	"""
3
4	import sys
5	from xml.dom import minidom, Node
6	#from xml.sax import make_parser, parse as validate, handler
7
8	def print_tree(n, indent=0):
9	while n:
10	#print 'nodeType', n.nodeType, Node.ELEMENT_NODE
11	#if n.nodeType != Node.ELEMENT_NODE:
12	# break
13
14	print ' '*indent,\
15	'Node name: "%s",' %n.nodeName,\
16	'Node type: "%s",' %n.nodeType,\
17	'Node value: "%s"' %str(n.nodeValue).strip()
18
19
20	print_tree(n.firstChild, indent+4)
21	n = n.nextSibling
22
23
24	def pretty_print_tree(n, indent=0):
25	print n
26
27	def parse(fid):
28	"""Parse XML file descriptor and return DOM object.
29	"""
30
31	# FIXME (OLE): XML code should be validated against the DTD
32	#validate(fid, handler)
33	#doc = minidom.parse(fid, make_parser())
34
35	fid.seek(0)
36	doc = minidom.parse(fid)
37	return doc
38
39
40	def get_elements(nodelist):
41	"""Return list of nodes that are ELEMENT_NODE
42	"""
43
44	element_list = []
45	for node in nodelist:
46	if node.nodeType == Node.ELEMENT_NODE:
47	element_list.append(node)
48
49	return element_list
50
51
52	def get_text(nodelist):
53	"""Return a concatenation of text fields from list of nodes
54	"""
55
56	s = ''
57	for node in nodelist:
58	if node.nodeType == Node.TEXT_NODE:
59	s += node.nodeValue + ', '
60
61	if len(s)>0: s = s[:-2]
62	return s
63
64
65
66	def remove_whitespace(s):
67	"""Remove excess whitespace including newlines from string
68	"""
69	import string
70	words = s.split() # Split on whitespace
71
72	return string.join(words)
73
74	#return s.replace('\n', '')
75	#s.translate(string.maketrans)
76
77
78
79	#----------------------------
80	# XML object model
81	#----------------------------
82
83	class XML_element(dict):
84	def __init__(self,
85	tag=None,
86	value=None,
87	version='1.0',
88	encoding='iso-8859-1'):
89	"""
90	value can be either
91	* An XML_element
92	* a list of XML_value
93	* a text string
94
95	"""
96
97	if isinstance(value, XML_element):
98	value = [value]
99
100	self.value = value
101
102
103
104	if tag is None:
105	tag = '?xml version="%s" encoding="%s"?' %(version, encoding)
106	self.root_element = True
107	else:
108	self.root_element = False
109
110	self.tag = tag
111
112
113
114
115	# FIXME: It might be better to represent these objects
116	# in a proper dictionary format with
117	# {tag: value, ...}
118	# No, tried that - it removes any notion of ordering.
119
120
121	def __add__(self, other):
122	return str(self) + str(other)
123
124	def __radd__(self, other):
125	return str(other) + str(self) #Python swaps self and other
126
127	def __repr__(self):
128	return str(self)
129
130	def __str__(self, indent=0):
131	"""String representation of XML element
132	"""
133
134	if self.root_element is True:
135	increment = 0
136	else:
137	increment = 4
138
139	s = tab = ' '*indent
140
141	s += '<%s>' %self.tag
142	if isinstance(self.value, basestring):
143	s += remove_whitespace(self.value)
144	else:
145	s += '\n'
146	for e in self.value:
147	s += e.__str__(indent+increment)
148	s += tab
149
150	if self.root_element is False:
151	s += '</%s>\n' %self.tag
152
153	return s
154
155
156	def __getitem__(self, key):
157	"""Return sub-tree starting at element with tag equal to specified key
158	If node is terminal, its text value will be returned instead of itself.
159	This will allow statements such as
160
161	assert xmlobject['datafile']['accountable'] == 'Jane Sexton'
162
163	If more than one element matches the given key a list of all
164	matches will be returned
165	"""
166
167	result = []
168	for node in self.value:
169	if node.tag == key:
170	#print 'node tag = %s, node value = %s' %(node.tag, node.value)
171
172	if isinstance(node.value, basestring):
173	result.append(str(node.value))
174	#return node.value
175	else:
176	result.append(node)
177	#return node
178
179	#print 'result', result
180	if len(result) == 0:
181	return None
182	if len(result) == 1:
183	return result[0]
184	if len(result) > 1:
185	return result
186
187
188	def has_key(self, key):
189	found = False
190	for node in self.value:
191	if node.tag == key:
192	found = True
193
194	return found
195
196
197	def keys(self):
198	return [str(node.tag) for node in self.value]
199
200
201
202	def pretty_print(self, indent=0):
203	"""Print the document without tags using indentation
204	"""
205
206	s = tab = ' '*indent
207	s += '%s: ' %self.tag
208	if isinstance(self.value, basestring):
209	s += self.value
210	else:
211	s += '\n'
212	for e in self.value:
213	s += e.pretty_print(indent+4)
214	s += '\n'
215
216	return s
217
218
219	def xml2object(xml, verbose=False):
220	"""Generate XML object model from XML file or XML text
221
222	This is the inverse operation to the __str__ representation
223	(up to whitespace).
224
225	Input xml can be either an
226	* xml file
227	* open xml file object
228
229	Return XML_document instance.
230	"""
231
232	# FIXME - can we allow xml to be string?
233	# This would depend on minidom's parse function
234
235	# Input tests
236	if isinstance(xml, basestring):
237	fid = open(xml)
238	else:
239	fid = xml
240
241	try:
242	dom = parse(fid)
243	except Exception, e:
244	# Throw filename into dom exception
245	msg = 'XML file "%s" could not be parsed: ' %fid.name
246	msg += str(e)
247	raise Exception, msg
248
249	return dom2object(dom)
250
251
252
253	def dom2object(node):
254	"""Convert DOM representation to XML_object hierarchy.
255	"""
256
257	value = []
258	for n in node.childNodes:
259
260	if n.nodeType == 3:
261	# Child is a text element - omit the dom tag #text and
262	# go straight to the text value.
263
264	msg = 'Text element has child nodes - this shouldn\'t happen'
265	assert len(n.childNodes) == 0, msg
266
267
268	x = n.nodeValue.strip()
269	if len(x) == 0:
270	# Skip empty text children
271	continue
272
273	value = x
274	else:
275	# XML element
276
277	value.append(dom2object(n))
278
279
280	# Deal with empty elements
281	if len(value) == 0: value = ''
282
283
284	if node.nodeType == 9:
285	# Root node (document)
286	tag = None
287	else:
288	# Normal XML node
289	tag = node.nodeName
290
291
292	X = XML_element(tag=tag,
293	value=value)
294
295	return X
296
297
298
299
300
301	#=================== Useful print statement
302	#if n.nodeType == 3 and str(n.nodeValue).strip() == '':
303	# pass
304	#else:
305	# print 'Node name: "%s",' %n.nodeName,\
306	# 'Node type: "%s",' %n.nodeType,\
307	# 'Node value: "%s",' %str(n.nodeValue).strip(),\
308	# 'Node children: %d' %len(n.childNodes)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: