1 | """ |
---|
2 | A set of functions which extend the capabilities of the Python csv |
---|
3 | module. |
---|
4 | |
---|
5 | These have been left as functions to aviod confusion with the standard |
---|
6 | csv module. |
---|
7 | """ |
---|
8 | |
---|
9 | import csv |
---|
10 | import numpy as num |
---|
11 | |
---|
12 | |
---|
13 | def load_csv_as_dict(file_name, title_check_list=None, delimiter=','): |
---|
14 | """ |
---|
15 | Load in the csv as a dictionary, title as key and column info as value. |
---|
16 | Also, create a dictionary, title as key and column index as value, |
---|
17 | to keep track of the column order. |
---|
18 | |
---|
19 | file_name The path to the file to read. |
---|
20 | |
---|
21 | title_check_list List of titles that *must* be columns in the file. |
---|
22 | |
---|
23 | delimiter is the delimiter used to separate the fields |
---|
24 | |
---|
25 | return 2 dictionaries: ({key:column}, {title:index}). |
---|
26 | |
---|
27 | WARNING: Values are returned as strings. |
---|
28 | Do this to change a list of strings to a list of floats |
---|
29 | time = [float(x) for x in time] |
---|
30 | """ |
---|
31 | |
---|
32 | # FIXME(Ole): Consider dealing with files without headers |
---|
33 | # FIXME(Ole): Consider a wrapper automatically converting text fields |
---|
34 | # to the right type by trying for: int, float, string |
---|
35 | |
---|
36 | attribute_dic = {} |
---|
37 | title_index_dic = {} |
---|
38 | titles_stripped = [] # List of titles |
---|
39 | |
---|
40 | reader = csv.reader(file(file_name), delimiter=delimiter) |
---|
41 | |
---|
42 | # Read in and manipulate the title info |
---|
43 | titles = reader.next() |
---|
44 | for i, title in enumerate(titles): |
---|
45 | header = title.strip() |
---|
46 | titles_stripped.append(header) |
---|
47 | title_index_dic[header] = i |
---|
48 | title_count = len(titles_stripped) |
---|
49 | |
---|
50 | # Check required columns |
---|
51 | if title_check_list is not None: |
---|
52 | for title_check in title_check_list: |
---|
53 | if not title_index_dic.has_key(title_check): |
---|
54 | msg = 'Reading error. This row is not present %s' % title_check |
---|
55 | raise IOError, msg |
---|
56 | |
---|
57 | |
---|
58 | # Create a dictionary of column values, indexed by column title |
---|
59 | for line in reader: |
---|
60 | n = len(line) # Number of entries |
---|
61 | if n < title_count: |
---|
62 | msg = 'Entry in file %s had %d columns ' % (file_name, n) |
---|
63 | msg += 'although there were %d headers' % title_count |
---|
64 | raise IOError, msg |
---|
65 | for i, value in enumerate(line[:title_count]): # skip trailing data |
---|
66 | attribute_dic.setdefault(titles_stripped[i], []).append(value) |
---|
67 | |
---|
68 | return attribute_dic, title_index_dic |
---|
69 | |
---|
70 | |
---|
71 | |
---|
72 | def load_csv_as_array(file_name, delimiter=','): |
---|
73 | """ |
---|
74 | Convert CSV files of the form: |
---|
75 | |
---|
76 | time, discharge, velocity |
---|
77 | 0.0, 1.2, 0.0 |
---|
78 | 0.1, 3.2, 1.1 |
---|
79 | ... |
---|
80 | |
---|
81 | to a dictionary of numeric arrays. |
---|
82 | |
---|
83 | file_name The path to the file to read. |
---|
84 | delimiter is the delimiter used to separate the fields |
---|
85 | |
---|
86 | See underlying function load_csv_as_dict for more details. |
---|
87 | """ |
---|
88 | |
---|
89 | X, _ = load_csv_as_dict(file_name, delimiter=delimiter) |
---|
90 | |
---|
91 | Y = {} |
---|
92 | for key in X.keys(): |
---|
93 | Y[key] = num.array([float(x) for x in X[key]]) |
---|
94 | |
---|
95 | return Y |
---|
96 | |
---|