Context Navigation

source: trunk/anuga_core/source/anuga/utilities/csv_tools.py @ 8968

Last change on this file since 8968 was 7125, checked in by rwilson, 15 years ago
Add another sanity check: is list of input files empty?
File size: 4.0 KB

Line
1	#!/usr/bin/env python
2
3	'''
4	Some CSV file utility routines.
5	'''
6
7
8	import csv
9
10
11	def merge_csv_key_values(file_title_list, output_file,
12	key_col='hours', data_col='stage'):
13	'''Select key and value columns from 'N' CSV files, write one CSV file.
14
15	file_title_list: a list of (filename, new_data_column_title) values, one
16	for each input file
17	output_file: the output CSV file path
18	key_col: column header string of key column
19	data_col: column header string of data column
20
21	The output file will look like:
22	<key_col>, <new_data_column_title1>, <new_data_column_title2>, ...
23	<key_value>, <data1>, <data2>, ...
24	<key_value>, <data1>, <data2>, ...
25	<key_value>, <data1>, <data2>, ...
26	<key_value>, <data1>, <data2>, ...
27
28	There is an assumption that the <key_value> values are the same across
29	all files for the same row. This is tested in the code below.
30	'''
31
32	def read_csv_file(filename, key_col, data_col):
33	'''Read data from a CSV file, get 'key_col' and 'data_col' columns.
34
35	Returns ((key[0], data[0]), ...).
36	'''
37
38	# start reading the CSV file
39	data = []
40	fd = open(filename, 'rb')
41	csv_reader = csv.reader(fd)
42
43	# open file, get header row, calculate required column indices
44	h = csv_reader.next()
45	header = [x.strip() for x in h]
46	if key_col not in header:
47	msg = ("Column '%s' not in file %s"
48	% (key_col, filename))
49	raise Exception, msg
50	if data_col not in header:
51	msg = ("Column '%s' not in file %s"
52	% (data_col, filename))
53	raise Exception, msg
54
55	key_index = header.index(key_col)
56	data_index = header.index(data_col)
57
58	# read data, extract columns, save
59	result = []
60	for line in csv_reader:
61	key_data = line[key_index].strip()
62	data_data = line[data_index].strip()
63	result.append((key_data, data_data))
64
65	fd.close()
66
67	return result
68
69	# get number of input files, check we have 1 or more
70	num_files = len(file_title_list)
71	if num_files == 0:
72	msg = "List 'file_title_list' is empty!?"
73	raise Exception, msg
74
75	# read data from all files
76	file_data = []
77	for (filename, title) in file_title_list:
78	data = read_csv_file(filename, key_col, data_col)
79	file_data.append((filename, title, data))
80
81	# now, file_data -> [(filename, title, [(k,v), (k,v), ...], ...]
82	# sanity check, check num rows same in all files
83	num_rows = None
84	for (fn, t, d) in file_data:
85	if num_rows is None:
86	num_rows = len(d)
87	else:
88	if num_rows != len(d):
89	msg = ('File %s has different number of rows from %s, '
90	'expected %d columns, got %d'
91	% (fn, file_data[0][0], num_rows, len(d)))
92	raise Exception, msg
93
94	# sanity check, check key values same in same rows
95	first_key_values = [v[0] for v in file_data[0][2]]
96	for (fn, t, d) in file_data:
97	key_values = [v[0] for v in d]
98	if key_values != first_key_values:
99	msg = ('Key values differ between files %s and %s!?'
100	% (fn, file_data[0][0]))
101	raise Exception, msg
102
103	# open output file
104	out_fd = open(output_file, 'w')
105	out_csv = csv.writer(out_fd)
106
107	# write column rows to output file
108	header = [key_col]
109	for (fn, col, d) in file_data:
110	header.append(col)
111	out_csv.writerow(header)
112
113	# write data rows to output file
114	file_kv_list = [x[2] for x in file_data]
115	for i in xrange(num_rows):
116	data_row = [file_kv_list[0][i][0]]
117	for file_data in file_kv_list:
118	data_row.append(file_data[i][1])
119	out_csv.writerow(data_row)
120
121	out_fd.close()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: