source: trunk/anuga_core/source/anuga/file/csv_file.py @ 7780

Last change on this file since 7780 was 7780, checked in by hudson, 14 years ago

Almost all failing tests fixed.

File size: 12.4 KB
Line 
1"""
2    A set of functions which extend the capabilities of the Python csv
3    module.
4   
5    CSV files have the extension .csv, which stands for Comma Separated Value
6    file. There is no standardised form for this format, so the user is provided
7    with a variety of options for parsing different styles of csv files.
8   
9    These have been left as functions to aviod confusion with the standard
10    csv module.
11"""
12
13import csv
14import numpy as num
15
16
17def load_csv_as_dict(file_name, title_check_list=None, delimiter=','):
18    """
19    Load in the csv as a dictionary, title as key and column info as value.
20    Also, create a dictionary, title as key and column index as value,
21    to keep track of the column order.
22
23    file_name The path to the file to read.
24   
25    title_check_list List of titles that *must* be columns in the file.
26
27    delimiter is the delimiter used to separate the fields
28
29    return 2 dictionaries: ({key:column}, {title:index}).
30
31    WARNING: Values are returned as strings.
32             Do this to change a list of strings to a list of floats
33                 time = [float(x) for x in time]
34    """
35
36    # FIXME(Ole): Consider dealing with files without headers
37    # FIXME(Ole): Consider a wrapper automatically converting text fields
38    #             to the right type by trying for: int, float, string
39   
40    attribute_dic = {}
41    title_index_dic = {}
42    titles_stripped = [] # List of titles
43
44    reader = csv.reader(file(file_name), delimiter=delimiter)
45
46    # Read in and manipulate the title info
47    titles = reader.next()
48    for i, title in enumerate(titles):
49        header = title.strip()
50        titles_stripped.append(header)
51        title_index_dic[header] = i
52    title_count = len(titles_stripped)
53
54    # Check required columns
55    if title_check_list is not None:
56        for title_check in title_check_list:
57            if not title_index_dic.has_key(title_check):
58                msg = 'Reading error. This row is not present %s' % title_check
59                raise IOError, msg
60
61
62    # Create a dictionary of column values, indexed by column title
63    for line in reader:
64        n = len(line) # Number of entries
65        if n < title_count:
66            msg = 'Entry in file %s had %d columns ' % (file_name, n)
67            msg += 'although there were %d headers' % title_count
68            raise IOError, msg
69        for i, value in enumerate(line[:title_count]):  # skip trailing data
70            attribute_dic.setdefault(titles_stripped[i], []).append(value)
71
72    return attribute_dic, title_index_dic
73
74
75         
76def load_csv_as_array(file_name, delimiter = ','):
77    """
78    Convert CSV files of the form:
79
80    time, discharge, velocity
81    0.0,  1.2,       0.0
82    0.1,  3.2,       1.1
83    ...
84
85    to a dictionary of numeric arrays.
86
87    file_name The path to the file to read.
88    delimiter is the delimiter used to separate the fields   
89
90    See underlying function load_csv_as_dict for more details.
91    """
92
93    X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
94
95
96    # Return result as a dict of arrays
97    ret = {}
98    for key in X.keys():
99        ret[key] = num.array([float(x) for x in X[key]])
100           
101    return ret
102
103
104def load_csv_as_matrix(file_name, delimiter = ','):
105    """
106    Convert CSV files of the form:
107
108    time, discharge, velocity
109    0.0,  1.2,       0.0
110    0.1,  3.2,       1.1
111    ...
112
113    to a numeric matrix.
114
115    file_name The path to the file to read.
116    delimiter is the delimiter used to separate the fields   
117
118    See underlying function load_csv_as_dict for more details.
119    """
120
121    X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
122
123    col_titles = title_indices.keys()
124
125    # Return result as a 2D array
126    ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
127
128    header = []
129    for col_title in col_titles:
130        index = title_indices[col_title]
131        header.append(col_title)
132        for i, x in enumerate(X[col_title]):
133            ret[i, index] = float(x)
134
135    return header, ret
136
137
138
139##
140# @brief Store keyword params into a CSV file.
141# @param verbose True if this function is to be verbose.
142# @param kwargs Dictionary of keyword args to store.
143# @note If kwargs dict contains 'file_name' key, that has the output filename.
144#       If not, make up a filename in the output directory.
145def store_parameters(verbose=False, **kwargs):
146    """
147    Store "kwargs" into a temp csv file, if "completed" is in kwargs,
148    csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
149
150    Must have a file_name keyword arg, this is what is writing to.
151    might be a better way to do this using CSV module Writer and writeDict.
152
153    writes file to "output_dir" unless "completed" is in kwargs, then
154    it writes to "file_name" kwargs
155    """
156
157    import types
158
159    # Check that kwargs is a dictionary
160    if type(kwargs) != types.DictType:
161        raise TypeError
162
163    # is 'completed' in kwargs?
164    completed = kwargs.has_key('completed')
165
166    # get file name and removes from dict and assert that a file_name exists
167    if completed:
168        try:
169            file = str(kwargs['file_name'])
170        except:
171            raise 'kwargs must have file_name'
172    else:
173        # write temp file in output directory
174        try:
175            file = str(kwargs['output_dir']) + 'detail_temp.csv'
176        except:
177            raise 'kwargs must have output_dir'
178
179    # extracts the header info and the new line info
180    line = ''
181    header = ''
182    count = 0
183    keys = kwargs.keys()
184    keys.sort()
185
186    # used the sorted keys to create the header and line data
187    for k in keys:
188        header += str(k)
189        line += str(kwargs[k])
190        count += 1
191        if count < len(kwargs):
192            header += ','
193            line += ','
194    header += '\n'
195    line += '\n'
196
197    # checks the header info, if the same, then write, if not create a new file
198    # try to open!
199    try:
200        fid = open(file, 'r')
201        file_header = fid.readline()
202        fid.close()
203        if verbose: log.critical('read file header %s' % file_header)
204    except:
205        msg = 'try to create new file: %s' % file
206        if verbose: log.critical(msg)
207        #tries to open file, maybe directory is bad
208        try:
209            fid = open(file, 'w')
210            fid.write(header)
211            fid.close()
212            file_header=header
213        except:
214            msg = 'cannot create new file: %s' % file
215            raise Exception, msg
216
217    # if header is same or this is a new file
218    if file_header == str(header):
219        fid = open(file, 'a')
220        fid.write(line)
221        fid.close()
222    else:
223        # backup plan,
224        # if header is different and has completed will append info to
225        # end of details_temp.cvs file in output directory
226        file = str(kwargs['output_dir']) + 'detail_temp.csv'
227        fid = open(file, 'a')
228        fid.write(header)
229        fid.write(line)
230        fid.close()
231
232        if verbose:
233            log.critical('file %s', file_header.strip('\n'))
234            log.critical('head %s', header.strip('\n'))
235        if file_header.strip('\n') == str(header):
236            log.critical('they equal')
237
238        msg = 'WARNING: File header does not match input info, ' \
239              'the input variables have changed, suggest you change file name'
240        log.critical(msg)
241
242
243
244def load_csv_as_building_polygons(file_name,
245                          floor_height=3,
246                          clipping_polygons=None):
247    """
248    Convert CSV files of the form:
249
250    easting,northing,id,floors
251    422664.22,870785.46,2,0
252    422672.48,870780.14,2,0
253    422668.17,870772.62,2,0
254    422660.35,870777.17,2,0
255    422664.22,870785.46,2,0
256    422661.30,871215.06,3,1
257    422667.50,871215.70,3,1
258    422668.30,871204.86,3,1
259    422662.21,871204.33,3,1
260    422661.30,871215.06,3,1
261
262    to a dictionary of polygons with id as key.
263    The associated number of floors are converted to m above MSL and
264    returned as a separate dictionary also keyed by id.
265   
266    Optional parameter floor_height is the height of each building story.
267    Optional parameter clipping_olygons is a list of polygons selecting
268    buildings. Any building not in these polygons will be omitted.
269   
270    See csv2polygons for more details
271    """
272
273    polygons, values = load_csv_as_polygons(file_name,
274                                    value_name='floors',
275                                    clipping_polygons=None)   
276
277   
278    heights = {}
279    for key in values.keys():
280        v = float(values[key])
281        heights[key] = v*floor_height
282       
283    return polygons, heights               
284           
285
286##
287# @brief Convert CSV file into a dictionary of polygons and associated values.
288# @param filename The path to the file to read, value_name name for the 4th column
289def load_csv_as_polygons(file_name,
290                 value_name='value',
291                 clipping_polygons=None):
292    """
293    Convert CSV files of the form:
294
295    easting,northing,id,value
296    422664.22,870785.46,2,0
297    422672.48,870780.14,2,0
298    422668.17,870772.62,2,0
299    422660.35,870777.17,2,0
300    422664.22,870785.46,2,0
301    422661.30,871215.06,3,1
302    422667.50,871215.70,3,1
303    422668.30,871204.86,3,1
304    422662.21,871204.33,3,1
305    422661.30,871215.06,3,1
306
307    to a dictionary of polygons with id as key.
308    The associated values are returned as a separate dictionary also keyed by id.
309
310
311    easting: x coordinate relative to zone implied by the model
312    northing: y coordinate relative to zone implied by the model   
313    id: tag for polygon comprising points with this tag
314    value: numeral associated with each polygon. These must be the same for all points in each polygon.
315   
316    The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
317    in which case the returned values will be None
318   
319    Eastings and Northings will be returned as floating point values while
320    id and values will be returned as strings.
321
322    Optional argument: clipping_polygons will select only those polygons that are
323    fully within one or more of the clipping_polygons. In other words any polygon from
324    the csv file which has at least one point not inside one of the clipping polygons
325    will be excluded
326   
327    See underlying function load_csv_as_dict for more details.
328    """
329
330    X, _ = load_csv_as_dict(file_name)
331
332    msg = 'Polygon csv file must have 3 or 4 columns'
333    assert len(X.keys()) in [3, 4], msg
334   
335    msg = 'Did not find expected column header: easting'
336    assert 'easting' in X.keys(), msg
337   
338    msg = 'Did not find expected column header: northing'   
339    assert 'northing' in X.keys(), northing
340   
341    msg = 'Did not find expected column header: northing'       
342    assert 'id' in X.keys(), msg
343   
344    if value_name is not None:
345        msg = 'Did not find expected column header: %s' % value_name       
346        assert value_name in X.keys(), msg   
347   
348    polygons = {}
349    if len(X.keys()) == 4:
350        values = {}
351    else:
352        values = None
353
354    # Loop through entries and compose polygons
355    excluded_polygons={}
356    past_ids = {}
357    last_id = None
358    for i, id in enumerate(X['id']):
359
360        # Check for duplicate polygons
361        if id in past_ids:
362            msg = 'Polygon %s was duplicated in line %d' % (id, i)
363            raise Exception, msg
364       
365        if id not in polygons:
366            # Start new polygon
367            polygons[id] = []
368            if values is not None:
369                values[id] = X[value_name][i]
370
371            # Keep track of previous polygon ids
372            if last_id is not None:
373                past_ids[last_id] = i
374           
375        # Append this point to current polygon
376        point = [float(X['easting'][i]), float(X['northing'][i])]
377
378        if clipping_polygons is not None:
379            exclude=True
380            for clipping_polygon in clipping_polygons:
381                if inside_polygon(point, clipping_polygon):
382                    exclude=False
383                    break
384               
385            if exclude is True:
386                excluded_polygons[id]=True
387
388        polygons[id].append(point)   
389           
390        # Check that value is the same across each polygon
391        msg = 'Values must be the same across each polygon.'
392        msg += 'I got %s in line %d but it should have been %s' % (X[value_name][i], i, values[id])
393        assert values[id] == X[value_name][i], msg
394
395        last_id = id
396
397    # Weed out polygons that were not wholly inside clipping polygons
398    for id in excluded_polygons:
399        del polygons[id]
400       
401    return polygons, values
402
403
404           
405
406
Note: See TracBrowser for help on using the repository browser.