source: trunk/anuga_core/source/anuga/file/csv_file.py @ 7762

Last change on this file since 7762 was 7762, checked in by hudson, 14 years ago

All tests in file module pass.

File size: 2.8 KB
Line 
1"""
2    A set of functions which extend the capabilities of the Python csv
3    module.
4   
5    These have been left as functions to aviod confusion with the standard
6    csv module.
7"""
8
9import csv
10import numpy as num
11
12
13def load_csv_as_dict(file_name, title_check_list=None, delimiter=','):
14    """
15    Load in the csv as a dictionary, title as key and column info as value.
16    Also, create a dictionary, title as key and column index as value,
17    to keep track of the column order.
18
19    file_name The path to the file to read.
20   
21    title_check_list List of titles that *must* be columns in the file.
22
23    delimiter is the delimiter used to separate the fields
24
25    return 2 dictionaries: ({key:column}, {title:index}).
26
27    WARNING: Values are returned as strings.
28             Do this to change a list of strings to a list of floats
29                 time = [float(x) for x in time]
30    """
31
32    # FIXME(Ole): Consider dealing with files without headers
33    # FIXME(Ole): Consider a wrapper automatically converting text fields
34    #             to the right type by trying for: int, float, string
35   
36    attribute_dic = {}
37    title_index_dic = {}
38    titles_stripped = [] # List of titles
39
40    reader = csv.reader(file(file_name), delimiter=delimiter)
41
42    # Read in and manipulate the title info
43    titles = reader.next()
44    for i, title in enumerate(titles):
45        header = title.strip()
46        titles_stripped.append(header)
47        title_index_dic[header] = i
48    title_count = len(titles_stripped)
49
50    # Check required columns
51    if title_check_list is not None:
52        for title_check in title_check_list:
53            if not title_index_dic.has_key(title_check):
54                msg = 'Reading error. This row is not present %s' % title_check
55                raise IOError, msg
56
57
58    # Create a dictionary of column values, indexed by column title
59    for line in reader:
60        n = len(line) # Number of entries
61        if n < title_count:
62            msg = 'Entry in file %s had %d columns ' % (file_name, n)
63            msg += 'although there were %d headers' % title_count
64            raise IOError, msg
65        for i, value in enumerate(line[:title_count]):  # skip trailing data
66            attribute_dic.setdefault(titles_stripped[i], []).append(value)
67
68    return attribute_dic, title_index_dic
69
70
71         
72def load_csv_as_array(file_name, delimiter=','):
73    """
74    Convert CSV files of the form:
75
76    time, discharge, velocity
77    0.0,  1.2,       0.0
78    0.1,  3.2,       1.1
79    ...
80
81    to a dictionary of numeric arrays.
82
83    file_name The path to the file to read.
84    delimiter is the delimiter used to separate the fields   
85
86    See underlying function load_csv_as_dict for more details.
87    """
88
89    X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
90
91    Y = {}
92    for key in X.keys():
93        Y[key] = num.array([float(x) for x in X[key]])
94
95    return Y
96
Note: See TracBrowser for help on using the repository browser.