Context Navigation

source: trunk/anuga_core/source/anuga/file/csv_file.py @ 7776

Last change on this file since 7776 was 7776, checked in by hudson, 14 years ago
Removed redundant data_manager class. Unit tests are running, but may fail.
File size: 12.4 KB

Rev	Line
[7762]	1	"""
	2	A set of functions which extend the capabilities of the Python csv
	3	module.
	4
[7776]	5	CSV files have the extension .csv, which stands for Comma Separated Value
	6	file. There is no standardised form for this format, so the user is provided
	7	with a variety of options for parsing different styles of csv files.
	8
[7762]	9	These have been left as functions to aviod confusion with the standard
	10	csv module.
	11	"""
	12
	13	import csv
	14	import numpy as num
	15
	16
	17	def load_csv_as_dict(file_name, title_check_list=None, delimiter=','):
	18	"""
	19	Load in the csv as a dictionary, title as key and column info as value.
	20	Also, create a dictionary, title as key and column index as value,
	21	to keep track of the column order.
	22
	23	file_name The path to the file to read.
	24
	25	title_check_list List of titles that must be columns in the file.
	26
	27	delimiter is the delimiter used to separate the fields
	28
	29	return 2 dictionaries: ({key:column}, {title:index}).
	30
	31	WARNING: Values are returned as strings.
	32	Do this to change a list of strings to a list of floats
	33	time = [float(x) for x in time]
	34	"""
	35
	36	# FIXME(Ole): Consider dealing with files without headers
	37	# FIXME(Ole): Consider a wrapper automatically converting text fields
	38	# to the right type by trying for: int, float, string
	39
	40	attribute_dic = {}
	41	title_index_dic = {}
	42	titles_stripped = [] # List of titles
	43
	44	reader = csv.reader(file(file_name), delimiter=delimiter)
	45
	46	# Read in and manipulate the title info
	47	titles = reader.next()
	48	for i, title in enumerate(titles):
	49	header = title.strip()
	50	titles_stripped.append(header)
	51	title_index_dic[header] = i
	52	title_count = len(titles_stripped)
	53
	54	# Check required columns
	55	if title_check_list is not None:
	56	for title_check in title_check_list:
	57	if not title_index_dic.has_key(title_check):
	58	msg = 'Reading error. This row is not present %s' % title_check
	59	raise IOError, msg
	60
	61
	62	# Create a dictionary of column values, indexed by column title
	63	for line in reader:
	64	n = len(line) # Number of entries
	65	if n < title_count:
	66	msg = 'Entry in file %s had %d columns ' % (file_name, n)
	67	msg += 'although there were %d headers' % title_count
	68	raise IOError, msg
	69	for i, value in enumerate(line[:title_count]): # skip trailing data
	70	attribute_dic.setdefault(titles_stripped[i], []).append(value)
	71
	72	return attribute_dic, title_index_dic
	73
	74
	75
[7772]	76	def load_csv_as_array(file_name, delimiter = ','):
[7762]	77	"""
	78	Convert CSV files of the form:
	79
	80	time, discharge, velocity
	81	0.0, 1.2, 0.0
	82	0.1, 3.2, 1.1
	83	...
	84
	85	to a dictionary of numeric arrays.
	86
	87	file_name The path to the file to read.
	88	delimiter is the delimiter used to separate the fields
	89
	90	See underlying function load_csv_as_dict for more details.
	91	"""
	92
	93	X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
	94
[7772]	95
	96	# Return result as a dict of arrays
	97	ret = {}
[7762]	98	for key in X.keys():
[7772]	99	ret[key] = num.array([float(x) for x in X[key]])
	100
	101	return ret
[7762]	102
	103
[7772]	104	def load_csv_as_matrix(file_name, delimiter = ','):
	105	"""
	106	Convert CSV files of the form:
	107
	108	time, discharge, velocity
	109	0.0, 1.2, 0.0
	110	0.1, 3.2, 1.1
	111	...
	112
	113	to a numeric matrix.
	114
	115	file_name The path to the file to read.
	116	delimiter is the delimiter used to separate the fields
	117
	118	See underlying function load_csv_as_dict for more details.
	119	"""
	120
	121	X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
	122
	123	col_titles = title_indices.keys()
	124
	125	# Return result as a 2D array
	126	ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
	127
	128	header = []
	129	for col_title in col_titles:
	130	index = title_indices[col_title]
	131	header.append(col_title)
	132	for i, x in enumerate(X[col_title]):
	133	ret[i, index] = float(x)
	134
	135	return header, ret
	136
	137
	138
	139	##
	140	# @brief Store keyword params into a CSV file.
	141	# @param verbose True if this function is to be verbose.
	142	# @param kwargs Dictionary of keyword args to store.
	143	# @note If kwargs dict contains 'file_name' key, that has the output filename.
	144	# If not, make up a filename in the output directory.
	145	def store_parameters(verbose=False, **kwargs):
	146	"""
	147	Store "kwargs" into a temp csv file, if "completed" is in kwargs,
	148	csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
	149
	150	Must have a file_name keyword arg, this is what is writing to.
	151	might be a better way to do this using CSV module Writer and writeDict.
	152
	153	writes file to "output_dir" unless "completed" is in kwargs, then
	154	it writes to "file_name" kwargs
	155	"""
	156
	157	import types
	158
	159	# Check that kwargs is a dictionary
	160	if type(kwargs) != types.DictType:
	161	raise TypeError
	162
	163	# is 'completed' in kwargs?
	164	completed = kwargs.has_key('completed')
	165
	166	# get file name and removes from dict and assert that a file_name exists
	167	if completed:
	168	try:
	169	file = str(kwargs['file_name'])
	170	except:
	171	raise 'kwargs must have file_name'
	172	else:
	173	# write temp file in output directory
	174	try:
	175	file = str(kwargs['output_dir']) + 'detail_temp.csv'
	176	except:
	177	raise 'kwargs must have output_dir'
	178
	179	# extracts the header info and the new line info
	180	line = ''
	181	header = ''
	182	count = 0
	183	keys = kwargs.keys()
	184	keys.sort()
	185
	186	# used the sorted keys to create the header and line data
	187	for k in keys:
	188	header += str(k)
	189	line += str(kwargs[k])
	190	count += 1
	191	if count < len(kwargs):
	192	header += ','
	193	line += ','
	194	header += '\n'
	195	line += '\n'
	196
	197	# checks the header info, if the same, then write, if not create a new file
	198	# try to open!
	199	try:
	200	fid = open(file, 'r')
	201	file_header = fid.readline()
	202	fid.close()
	203	if verbose: log.critical('read file header %s' % file_header)
	204	except:
	205	msg = 'try to create new file: %s' % file
	206	if verbose: log.critical(msg)
	207	#tries to open file, maybe directory is bad
	208	try:
	209	fid = open(file, 'w')
	210	fid.write(header)
	211	fid.close()
	212	file_header=header
	213	except:
	214	msg = 'cannot create new file: %s' % file
	215	raise Exception, msg
	216
	217	# if header is same or this is a new file
	218	if file_header == str(header):
	219	fid = open(file, 'a')
	220	fid.write(line)
	221	fid.close()
	222	else:
	223	# backup plan,
	224	# if header is different and has completed will append info to
	225	# end of details_temp.cvs file in output directory
	226	file = str(kwargs['output_dir']) + 'detail_temp.csv'
	227	fid = open(file, 'a')
	228	fid.write(header)
	229	fid.write(line)
	230	fid.close()
	231
	232	if verbose:
	233	log.critical('file %s', file_header.strip('\n'))
	234	log.critical('head %s', header.strip('\n'))
	235	if file_header.strip('\n') == str(header):
	236	log.critical('they equal')
	237
	238	msg = 'WARNING: File header does not match input info, ' \
	239	'the input variables have changed, suggest you change file name'
	240	log.critical(msg)
	241
[7776]	242
	243
	244	def csv2building_polygons(file_name,
	245	floor_height=3,
	246	clipping_polygons=None):
	247	"""
	248	Convert CSV files of the form:
	249
	250	easting,northing,id,floors
	251	422664.22,870785.46,2,0
	252	422672.48,870780.14,2,0
	253	422668.17,870772.62,2,0
	254	422660.35,870777.17,2,0
	255	422664.22,870785.46,2,0
	256	422661.30,871215.06,3,1
	257	422667.50,871215.70,3,1
	258	422668.30,871204.86,3,1
	259	422662.21,871204.33,3,1
	260	422661.30,871215.06,3,1
	261
	262	to a dictionary of polygons with id as key.
	263	The associated number of floors are converted to m above MSL and
	264	returned as a separate dictionary also keyed by id.
	265
	266	Optional parameter floor_height is the height of each building story.
	267	Optional parameter clipping_olygons is a list of polygons selecting
	268	buildings. Any building not in these polygons will be omitted.
	269
	270	See csv2polygons for more details
	271	"""
	272
	273	polygons, values = csv2polygons(file_name,
	274	value_name='floors',
	275	clipping_polygons=None)
	276
	277
	278	heights = {}
	279	for key in values.keys():
	280	v = float(values[key])
	281	heights[key] = v*floor_height
	282
	283	return polygons, heights
	284
	285
	286	##
	287	# @brief Convert CSV file into a dictionary of polygons and associated values.
	288	# @param filename The path to the file to read, value_name name for the 4th column
	289	def csv2polygons(file_name,
	290	value_name='value',
	291	clipping_polygons=None):
	292	"""
	293	Convert CSV files of the form:
	294
	295	easting,northing,id,value
	296	422664.22,870785.46,2,0
	297	422672.48,870780.14,2,0
	298	422668.17,870772.62,2,0
	299	422660.35,870777.17,2,0
	300	422664.22,870785.46,2,0
	301	422661.30,871215.06,3,1
	302	422667.50,871215.70,3,1
	303	422668.30,871204.86,3,1
	304	422662.21,871204.33,3,1
	305	422661.30,871215.06,3,1
	306
	307	to a dictionary of polygons with id as key.
	308	The associated values are returned as a separate dictionary also keyed by id.
	309
	310
	311	easting: x coordinate relative to zone implied by the model
	312	northing: y coordinate relative to zone implied by the model
	313	id: tag for polygon comprising points with this tag
	314	value: numeral associated with each polygon. These must be the same for all points in each polygon.
	315
	316	The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
	317	in which case the returned values will be None
	318
	319	Eastings and Northings will be returned as floating point values while
	320	id and values will be returned as strings.
	321
	322	Optional argument: clipping_polygons will select only those polygons that are
	323	fully within one or more of the clipping_polygons. In other words any polygon from
	324	the csv file which has at least one point not inside one of the clipping polygons
	325	will be excluded
	326
	327	See underlying function load_csv_as_dict for more details.
	328	"""
	329
	330	X, _ = load_csv_as_dict(file_name)
	331
	332	msg = 'Polygon csv file must have 3 or 4 columns'
	333	assert len(X.keys()) in [3, 4], msg
	334
	335	msg = 'Did not find expected column header: easting'
	336	assert 'easting' in X.keys(), msg
	337
	338	msg = 'Did not find expected column header: northing'
	339	assert 'northing' in X.keys(), northing
	340
	341	msg = 'Did not find expected column header: northing'
	342	assert 'id' in X.keys(), msg
	343
	344	if value_name is not None:
	345	msg = 'Did not find expected column header: %s' % value_name
	346	assert value_name in X.keys(), msg
	347
	348	polygons = {}
	349	if len(X.keys()) == 4:
	350	values = {}
	351	else:
	352	values = None
	353
	354	# Loop through entries and compose polygons
	355	excluded_polygons={}
	356	past_ids = {}
	357	last_id = None
	358	for i, id in enumerate(X['id']):
	359
	360	# Check for duplicate polygons
	361	if id in past_ids:
	362	msg = 'Polygon %s was duplicated in line %d' % (id, i)
	363	raise Exception, msg
	364
	365	if id not in polygons:
	366	# Start new polygon
	367	polygons[id] = []
	368	if values is not None:
	369	values[id] = X[value_name][i]
	370
	371	# Keep track of previous polygon ids
	372	if last_id is not None:
	373	past_ids[last_id] = i
	374
	375	# Append this point to current polygon
	376	point = [float(X['easting'][i]), float(X['northing'][i])]
	377
	378	if clipping_polygons is not None:
	379	exclude=True
	380	for clipping_polygon in clipping_polygons:
	381	if inside_polygon(point, clipping_polygon):
	382	exclude=False
	383	break
	384
	385	if exclude is True:
	386	excluded_polygons[id]=True
	387
	388	polygons[id].append(point)
	389
	390	# Check that value is the same across each polygon
	391	msg = 'Values must be the same across each polygon.'
	392	msg += 'I got %s in line %d but it should have been %s' % (X[value_name][i], i, values[id])
	393	assert values[id] == X[value_name][i], msg
	394
	395	last_id = id
	396
	397	# Weed out polygons that were not wholly inside clipping polygons
	398	for id in excluded_polygons:
	399	del polygons[id]
	400
	401	return polygons, values
	402
	403
	404
	405
	406

Note: See TracBrowser for help on using the repository browser.

Download in other formats: