Context Navigation

source: trunk/anuga_core/source/anuga/file/csv_file.py @ 7870

Last change on this file since 7870 was 7858, checked in by James Hudson, 15 years ago
Refactorings to increase code quality, fixed missing log import from sww_interrogate.
File size: 12.3 KB

Line
1	"""
2	A set of functions which extend the capabilities of the Python csv
3	module.
4
5	CSV files have the extension .csv, which stands for Comma Separated Value
6	file. There is no standardised form for this format, so the user is provided
7	with a variety of options for parsing different styles of csv files.
8
9	These have been left as functions to aviod confusion with the standard
10	csv module.
11	"""
12
13
14	import csv
15	import numpy as num
16	import anuga.utilities.log as log
17
18
19	def load_csv_as_dict(file_name, title_check_list=None, delimiter=',',
20	d_type = str):
21	"""
22	Load in the csv as a dictionary, title as key and column info as value.
23	Also, create a dictionary, title as key and column index as value,
24	to keep track of the column order.
25
26	file_name The path to the file to read.
27
28	title_check_list List of titles that must be columns in the file.
29
30	delimiter is the delimiter used to separate the fields
31
32	format is one of float, str, int
33
34	return 2 dictionaries: ({key:column}, {title:index}).
35
36	WARNING: Values are returned as strings.
37	Do this to change a list of strings to a list of floats
38	time = [float(x) for x in time]
39	"""
40
41	# FIXME(Ole): Consider dealing with files without headers
42
43	attribute_dic = {}
44	title_index_dic = {}
45	titles_stripped = [] # List of titles
46
47	reader = csv.reader(file(file_name), delimiter=delimiter)
48
49	# Read in and manipulate the title info
50	titles = reader.next()
51	for i, title in enumerate(titles):
52	header = title.strip()
53	titles_stripped.append(header)
54	title_index_dic[header] = i
55	title_count = len(titles_stripped)
56
57	# Check required columns
58	if title_check_list is not None:
59	for title_check in title_check_list:
60	if not title_index_dic.has_key(title_check):
61	msg = 'Reading error. This row is not present %s' % title_check
62	raise IOError, msg
63
64
65	# Create a dictionary of column values, indexed by column title
66	for line in reader:
67	n = len(line) # Number of entries
68	if n < title_count:
69	msg = 'Entry in file %s had %d columns ' % (file_name, n)
70	msg += 'although there were %d headers' % title_count
71	raise IOError, msg
72	for i, val in enumerate(line[:title_count]): # skip trailing data
73	attribute_dic.setdefault(titles_stripped[i], []).append(d_type(val))
74
75	return attribute_dic, title_index_dic
76
77
78
79	def load_csv_as_array(file_name, delimiter = ','):
80	"""
81	Convert CSV files of the form:
82
83	time, discharge, velocity
84	0.0, 1.2, 0.0
85	0.1, 3.2, 1.1
86	...
87
88	to a dictionary of numeric arrays.
89
90	file_name The path to the file to read.
91	delimiter is the delimiter used to separate the fields
92
93	See underlying function load_csv_as_dict for more details.
94	"""
95
96	X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
97
98
99	# Return result as a dict of arrays
100	ret = {}
101	for key in X.keys():
102	ret[key] = num.array([float(x) for x in X[key]])
103
104	return ret
105
106
107	def load_csv_as_matrix(file_name, delimiter = ','):
108	"""
109	Convert CSV files of the form:
110
111	time, discharge, velocity
112	0.0, 1.2, 0.0
113	0.1, 3.2, 1.1
114	...
115
116	to a numeric matrix.
117
118	file_name The path to the file to read.
119	delimiter is the delimiter used to separate the fields
120
121	See underlying function load_csv_as_dict for more details.
122	"""
123
124	X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
125
126	col_titles = title_indices.keys()
127
128	# Return result as a 2D array
129	ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
130
131	header = []
132	for col_title in col_titles:
133	index = title_indices[col_title]
134	header.append(col_title)
135	for i, x in enumerate(X[col_title]):
136	ret[i, index] = float(x)
137
138	return header, ret
139
140
141
142	##
143	# @brief Store keyword params into a CSV file.
144	# @param verbose True if this function is to be verbose.
145	# @param kwargs Dictionary of keyword args to store.
146	# @note If kwargs dict contains 'file_name' key, that has the output filename.
147	# If not, make up a filename in the output directory.
148	def store_parameters(verbose=False, **kwargs):
149	"""
150	Store "kwargs" into a temp csv file, if "completed" is in kwargs,
151	csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
152
153	Must have a file_name keyword arg, this is what is writing to.
154	might be a better way to do this using CSV module Writer and writeDict.
155
156	writes file to "output_dir" unless "completed" is in kwargs, then
157	it writes to "file_name" kwargs
158	"""
159
160	import types
161
162	# Check that kwargs is a dictionary
163	if type(kwargs) != types.DictType:
164	raise TypeError
165
166	# is 'completed' in kwargs?
167	completed = kwargs.has_key('completed')
168
169	# get file name and removes from dict and assert that a file_name exists
170	if completed:
171	try:
172	file_name = str(kwargs['file_name'])
173	except:
174	raise Exception('kwargs must have file_name')
175	else:
176	# write temp file in output directory
177	try:
178	file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
179	except:
180	raise Exception('kwargs must have output_dir')
181
182	# extracts the header info and the new line info
183	line = ''
184	header = ''
185	count = 0
186	keys = kwargs.keys()
187	keys.sort()
188
189	# used the sorted keys to create the header and line data
190	for k in keys:
191	header += str(k)
192	line += str(kwargs[k])
193	count += 1
194	if count < len(kwargs):
195	header += ','
196	line += ','
197	header += '\n'
198	line += '\n'
199
200	# checks the header info, if the same, then write, if not create a new file
201	# try to open!
202	try:
203	fid = open(file_name, 'r')
204	file_header = fid.readline()
205	fid.close()
206	if verbose: log.critical('read file header %s' % file_header)
207	except Exception:
208	msg = 'try to create new file: %s' % file_name
209	if verbose:
210	log.critical(msg)
211	#tries to open file, maybe directory is bad
212	try:
213	fid = open(file_name, 'w')
214	fid.write(header)
215	fid.close()
216	file_header=header
217	except:
218	msg = 'cannot create new file: %s' % file
219	raise Exception, msg
220
221	# if header is same or this is a new file
222	if file_header == str(header):
223	fid = open(file_name, 'a')
224	fid.write(line)
225	fid.close()
226	else:
227	# backup plan,
228	# if header is different and has completed will append info to
229	# end of details_temp.cvs file in output directory
230	file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
231	fid = open(file_name, 'a')
232	fid.write(header)
233	fid.write(line)
234	fid.close()
235
236	if verbose:
237	log.critical('file %s', file_header.strip('\n'))
238	log.critical('head %s', header.strip('\n'))
239	if file_header.strip('\n') == str(header):
240	log.critical('they equal')
241
242	msg = 'WARNING: File header does not match input info, ' \
243	'the input variables have changed, suggest you change file name'
244	log.critical(msg)
245
246
247
248	def load_csv_as_building_polygons(file_name,
249	floor_height=3):
250	"""
251	Convert CSV files of the form:
252
253	easting,northing,id,floors
254	422664.22,870785.46,2,0
255	422672.48,870780.14,2,0
256	422668.17,870772.62,2,0
257	422660.35,870777.17,2,0
258	422664.22,870785.46,2,0
259	422661.30,871215.06,3,1
260	422667.50,871215.70,3,1
261	422668.30,871204.86,3,1
262	422662.21,871204.33,3,1
263	422661.30,871215.06,3,1
264
265	to a dictionary of polygons with id as key.
266	The associated number of floors are converted to m above MSL and
267	returned as a separate dictionary also keyed by id.
268
269	Optional parameter floor_height is the height of each building story.
270	Optional parameter clipping_olygons is a list of polygons selecting
271	buildings. Any building not in these polygons will be omitted.
272
273	See csv2polygons for more details
274	"""
275
276	polygons, values = load_csv_as_polygons(file_name,
277	value_name='floors',
278	clipping_polygons=None)
279
280
281	heights = {}
282	for key in values.keys():
283	v = float(values[key])
284	heights[key] = v*floor_height
285
286	return polygons, heights
287
288
289	def load_csv_as_polygons(file_name,
290	value_name='value',
291	clipping_polygons=None):
292	"""
293	Convert CSV files of the form:
294
295	easting,northing,id,value
296	422664.22,870785.46,2,0
297	422672.48,870780.14,2,0
298	422668.17,870772.62,2,0
299	422660.35,870777.17,2,0
300	422664.22,870785.46,2,0
301	422661.30,871215.06,3,1
302	422667.50,871215.70,3,1
303	422668.30,871204.86,3,1
304	422662.21,871204.33,3,1
305	422661.30,871215.06,3,1
306
307	to a dictionary of polygons with id as key.
308	The associated values are returned as a separate dictionary also keyed by id.
309
310
311	easting: x coordinate relative to zone implied by the model
312	northing: y coordinate relative to zone implied by the model
313	id: tag for polygon comprising points with this tag
314	value: numeral associated with each polygon. These must be the same for all points in each polygon.
315
316	The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
317	in which case the returned values will be None
318
319	Eastings and Northings will be returned as floating point values while
320	id and values will be returned as strings.
321
322	Optional argument: clipping_polygons will select only those polygons that are
323	fully within one or more of the clipping_polygons. In other words any polygon from
324	the csv file which has at least one point not inside one of the clipping polygons
325	will be excluded
326
327	See underlying function load_csv_as_dict for more details.
328	"""
329
330	X, _ = load_csv_as_dict(file_name)
331
332	msg = 'Polygon csv file must have 3 or 4 columns'
333	assert len(X.keys()) in [3, 4], msg
334
335	msg = 'Did not find expected column header: easting'
336	assert 'easting' in X.keys(), msg
337
338	msg = 'Did not find expected column header: northing'
339	assert 'northing' in X.keys(), msg
340
341	msg = 'Did not find expected column header: northing'
342	assert 'id' in X.keys(), msg
343
344	if value_name is not None:
345	msg = 'Did not find expected column header: %s' % value_name
346	assert value_name in X.keys(), msg
347
348	polygons = {}
349	if len(X.keys()) == 4:
350	values = {}
351	else:
352	values = None
353
354	# Loop through entries and compose polygons
355	excluded_polygons={}
356	past_ids = {}
357	last_id = None
358	for i, poly_id in enumerate(X['id']):
359
360	# Check for duplicate polygons
361	if poly_id in past_ids:
362	msg = 'Polygon %s was duplicated in line %d' % (id, i)
363	raise Exception, msg
364
365	if poly_id not in polygons:
366	# Start new polygon
367	polygons[poly_id] = []
368	if values is not None:
369	values[poly_id] = X[value_name][i]
370
371	# Keep track of previous polygon ids
372	if last_id is not None:
373	past_ids[last_id] = i
374
375	# Append this point to current polygon
376	point = [float(X['easting'][i]), float(X['northing'][i])]
377
378	if clipping_polygons is not None:
379	exclude=True
380	for clipping_polygon in clipping_polygons:
381	if inside_polygon(point, clipping_polygon):
382	exclude=False
383	break
384
385	if exclude is True:
386	excluded_polygons[poly_id]=True
387
388	polygons[poly_id].append(point)
389
390	# Check that value is the same across each polygon
391	msg = 'Values must be the same across each polygon.'
392	msg += 'I got %s in line %d but it should have been %s' % \
393	(X[value_name][i], i, values[poly_id])
394	assert values[poly_id] == X[value_name][i], msg
395
396	last_id = poly_id
397
398	# Weed out polygons that were not wholly inside clipping polygons
399	for poly_id in excluded_polygons:
400	del polygons[poly_id]
401
402	return polygons, values
403
404
405
406
407

Note: See TracBrowser for help on using the repository browser.

Download in other formats: