Context Navigation

csv_file.py @ 9516

Last change on this file since 9516 was 8143, checked in by wilsonr, 14 years ago
Removed '@brief' comments.
File size: 12.0 KB

Line
1	"""
2	A set of functions which extend the capabilities of the Python csv
3	module.
4
5	CSV files have the extension .csv, which stands for Comma Separated Value
6	file. There is no standardised form for this format, so the user is provided
7	with a variety of options for parsing different styles of csv files.
8
9	These have been left as functions to aviod confusion with the standard
10	csv module.
11	"""
12
13
14	import csv
15	import numpy as num
16	import anuga.utilities.log as log
17
18
19	def load_csv_as_dict(file_name, title_check_list=None, delimiter=',',
20	d_type = str):
21	"""
22	Load in the csv as a dictionary, title as key and column info as value.
23	Also, create a dictionary, title as key and column index as value,
24	to keep track of the column order.
25
26	file_name The path to the file to read.
27
28	title_check_list List of titles that must be columns in the file.
29
30	delimiter is the delimiter used to separate the fields
31
32	format is one of float, str, int
33
34	return 2 dictionaries: ({key:column}, {title:index}).
35
36	WARNING: Values are returned as strings.
37	Do this to change a list of strings to a list of floats
38	time = [float(x) for x in time]
39	"""
40
41	# FIXME(Ole): Consider dealing with files without headers
42
43	attribute_dic = {}
44	title_index_dic = {}
45	titles_stripped = [] # List of titles
46
47	reader = csv.reader(file(file_name), delimiter=delimiter)
48
49	# Read in and manipulate the title info
50	titles = reader.next()
51	for i, title in enumerate(titles):
52	header = title.strip()
53	titles_stripped.append(header)
54	title_index_dic[header] = i
55	title_count = len(titles_stripped)
56
57	# Check required columns
58	if title_check_list is not None:
59	for title_check in title_check_list:
60	if not title_index_dic.has_key(title_check):
61	msg = 'Reading error. This row is not present %s' % title_check
62	raise IOError, msg
63
64
65	# Create a dictionary of column values, indexed by column title
66	for line in reader:
67	n = len(line) # Number of entries
68	if n < title_count:
69	msg = 'Entry in file %s had %d columns ' % (file_name, n)
70	msg += 'although there were %d headers' % title_count
71	raise IOError, msg
72	for i, val in enumerate(line[:title_count]): # skip trailing data
73	attribute_dic.setdefault(titles_stripped[i], []).append(d_type(val))
74
75	return attribute_dic, title_index_dic
76
77
78
79	def load_csv_as_array(file_name, delimiter = ','):
80	"""
81	Convert CSV files of the form:
82
83	time, discharge, velocity
84	0.0, 1.2, 0.0
85	0.1, 3.2, 1.1
86	...
87
88	to a dictionary of numeric arrays.
89
90	file_name The path to the file to read.
91	delimiter is the delimiter used to separate the fields
92
93	See underlying function load_csv_as_dict for more details.
94	"""
95
96	X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
97
98
99	# Return result as a dict of arrays
100	ret = {}
101	for key in X.keys():
102	ret[key] = num.array([float(x) for x in X[key]])
103
104	return ret
105
106
107	def load_csv_as_matrix(file_name, delimiter = ','):
108	"""
109	Convert CSV files of the form:
110
111	time, discharge, velocity
112	0.0, 1.2, 0.0
113	0.1, 3.2, 1.1
114	...
115
116	to a numeric matrix.
117
118	file_name The path to the file to read.
119	delimiter is the delimiter used to separate the fields
120
121	See underlying function load_csv_as_dict for more details.
122	"""
123
124	X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
125
126	col_titles = title_indices.keys()
127
128	# Return result as a 2D array
129	ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
130
131	header = []
132	for col_title in col_titles:
133	index = title_indices[col_title]
134	header.append(col_title)
135	for i, x in enumerate(X[col_title]):
136	ret[i, index] = float(x)
137
138	return header, ret
139
140
141
142	def store_parameters(verbose=False, **kwargs):
143	"""
144	Store "kwargs" into a temp csv file, if "completed" is in kwargs,
145	csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
146
147	Must have a file_name keyword arg, this is what is writing to.
148	might be a better way to do this using CSV module Writer and writeDict.
149
150	writes file to "output_dir" unless "completed" is in kwargs, then
151	it writes to "file_name" kwargs
152	"""
153
154	# Check that kwargs is a dictionary
155	if not isinstance(kwargs, dict):
156	raise TypeError
157
158	# is 'completed' in kwargs?
159	completed = kwargs.has_key('completed')
160
161	# get file name and removes from dict and assert that a file_name exists
162	if completed:
163	try:
164	file_name = str(kwargs['file_name'])
165	except:
166	raise Exception('kwargs must have file_name')
167	else:
168	# write temp file in output directory
169	try:
170	file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
171	except:
172	raise Exception('kwargs must have output_dir')
173
174	# extracts the header info and the new line info
175	line = ''
176	header = ''
177	count = 0
178	keys = kwargs.keys()
179	keys.sort()
180
181	# used the sorted keys to create the header and line data
182	for k in keys:
183	header += str(k)
184	line += str(kwargs[k])
185	count += 1
186	if count < len(kwargs):
187	header += ','
188	line += ','
189	header += '\n'
190	line += '\n'
191
192	# checks the header info, if the same, then write, if not create a new file
193	# try to open!
194	try:
195	fid = open(file_name, 'r')
196	file_header = fid.readline()
197	fid.close()
198	if verbose: log.critical('read file header %s' % file_header)
199	except Exception:
200	msg = 'try to create new file: %s' % file_name
201	if verbose:
202	log.critical(msg)
203	#tries to open file, maybe directory is bad
204	try:
205	fid = open(file_name, 'w')
206	fid.write(header)
207	fid.close()
208	file_header=header
209	except:
210	msg = 'cannot create new file: %s' % file
211	raise Exception, msg
212
213	# if header is same or this is a new file
214	if file_header == str(header):
215	fid = open(file_name, 'a')
216	fid.write(line)
217	fid.close()
218	else:
219	# backup plan,
220	# if header is different and has completed will append info to
221	# end of details_temp.cvs file in output directory
222	file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
223	fid = open(file_name, 'a')
224	fid.write(header)
225	fid.write(line)
226	fid.close()
227
228	if verbose:
229	log.critical('file %s', file_header.strip('\n'))
230	log.critical('head %s', header.strip('\n'))
231	if file_header.strip('\n') == str(header):
232	log.critical('they equal')
233
234	msg = 'WARNING: File header does not match input info, ' \
235	'the input variables have changed, suggest you change file name'
236	log.critical(msg)
237
238
239
240	def load_csv_as_building_polygons(file_name,
241	floor_height=3):
242	"""
243	Convert CSV files of the form:
244
245	easting,northing,id,floors
246	422664.22,870785.46,2,0
247	422672.48,870780.14,2,0
248	422668.17,870772.62,2,0
249	422660.35,870777.17,2,0
250	422664.22,870785.46,2,0
251	422661.30,871215.06,3,1
252	422667.50,871215.70,3,1
253	422668.30,871204.86,3,1
254	422662.21,871204.33,3,1
255	422661.30,871215.06,3,1
256
257	to a dictionary of polygons with id as key.
258	The associated number of floors are converted to m above MSL and
259	returned as a separate dictionary also keyed by id.
260
261	Optional parameter floor_height is the height of each building story.
262	Optional parameter clipping_olygons is a list of polygons selecting
263	buildings. Any building not in these polygons will be omitted.
264
265	See csv2polygons for more details
266	"""
267
268	polygons, values = load_csv_as_polygons(file_name,
269	value_name='floors',
270	clipping_polygons=None)
271
272
273	heights = {}
274	for key in values.keys():
275	v = float(values[key])
276	heights[key] = v*floor_height
277
278	return polygons, heights
279
280
281	def load_csv_as_polygons(file_name,
282	value_name='value',
283	clipping_polygons=None):
284	"""
285	Convert CSV files of the form:
286
287	easting,northing,id,value
288	422664.22,870785.46,2,0
289	422672.48,870780.14,2,0
290	422668.17,870772.62,2,0
291	422660.35,870777.17,2,0
292	422664.22,870785.46,2,0
293	422661.30,871215.06,3,1
294	422667.50,871215.70,3,1
295	422668.30,871204.86,3,1
296	422662.21,871204.33,3,1
297	422661.30,871215.06,3,1
298
299	to a dictionary of polygons with id as key.
300	The associated values are returned as a separate dictionary also keyed by id.
301
302
303	easting: x coordinate relative to zone implied by the model
304	northing: y coordinate relative to zone implied by the model
305	id: tag for polygon comprising points with this tag
306	value: numeral associated with each polygon. These must be the same for all points in each polygon.
307
308	The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
309	in which case the returned values will be None
310
311	Eastings and Northings will be returned as floating point values while
312	id and values will be returned as strings.
313
314	Optional argument: clipping_polygons will select only those polygons that are
315	fully within one or more of the clipping_polygons. In other words any polygon from
316	the csv file which has at least one point not inside one of the clipping polygons
317	will be excluded
318
319	See underlying function load_csv_as_dict for more details.
320	"""
321
322	X, _ = load_csv_as_dict(file_name)
323
324	msg = 'Polygon csv file must have 3 or 4 columns'
325	assert len(X.keys()) in [3, 4], msg
326
327	msg = 'Did not find expected column header: easting'
328	assert 'easting' in X.keys(), msg
329
330	msg = 'Did not find expected column header: northing'
331	assert 'northing' in X.keys(), msg
332
333	msg = 'Did not find expected column header: northing'
334	assert 'id' in X.keys(), msg
335
336	if value_name is not None:
337	msg = 'Did not find expected column header: %s' % value_name
338	assert value_name in X.keys(), msg
339
340	polygons = {}
341	if len(X.keys()) == 4:
342	values = {}
343	else:
344	values = None
345
346	# Loop through entries and compose polygons
347	excluded_polygons={}
348	past_ids = {}
349	last_id = None
350	for i, poly_id in enumerate(X['id']):
351
352	# Check for duplicate polygons
353	if poly_id in past_ids:
354	msg = 'Polygon %s was duplicated in line %d' % (id, i)
355	raise Exception, msg
356
357	if poly_id not in polygons:
358	# Start new polygon
359	polygons[poly_id] = []
360	if values is not None:
361	values[poly_id] = X[value_name][i]
362
363	# Keep track of previous polygon ids
364	if last_id is not None:
365	past_ids[last_id] = i
366
367	# Append this point to current polygon
368	point = [float(X['easting'][i]), float(X['northing'][i])]
369
370	if clipping_polygons is not None:
371	exclude=True
372	for clipping_polygon in clipping_polygons:
373	if inside_polygon(point, clipping_polygon):
374	exclude=False
375	break
376
377	if exclude is True:
378	excluded_polygons[poly_id]=True
379
380	polygons[poly_id].append(point)
381
382	# Check that value is the same across each polygon
383	msg = 'Values must be the same across each polygon.'
384	msg += 'I got %s in line %d but it should have been %s' % \
385	(X[value_name][i], i, values[poly_id])
386	assert values[poly_id] == X[value_name][i], msg
387
388	last_id = poly_id
389
390	# Weed out polygons that were not wholly inside clipping polygons
391	for poly_id in excluded_polygons:
392	del polygons[poly_id]
393
394	return polygons, values
395
396
397
398
399

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/anuga_core/source/anuga/file/csv_file.py @ 9516

Download in other formats: