source: trunk/anuga_core/source/anuga/file/csv_file.py @ 7870

Last change on this file since 7870 was 7858, checked in by James Hudson, 15 years ago

Refactorings to increase code quality, fixed missing log import from sww_interrogate.

File size: 12.3 KB
Line 
1"""
2    A set of functions which extend the capabilities of the Python csv
3    module.
4   
5    CSV files have the extension .csv, which stands for Comma Separated Value
6    file. There is no standardised form for this format, so the user is provided
7    with a variety of options for parsing different styles of csv files.
8   
9    These have been left as functions to aviod confusion with the standard
10    csv module.
11"""
12
13
14import csv
15import numpy as num
16import anuga.utilities.log as log
17
18
19def load_csv_as_dict(file_name, title_check_list=None, delimiter=',',
20                        d_type = str):
21    """
22    Load in the csv as a dictionary, title as key and column info as value.
23    Also, create a dictionary, title as key and column index as value,
24    to keep track of the column order.
25
26    file_name The path to the file to read.
27   
28    title_check_list List of titles that *must* be columns in the file.
29
30    delimiter is the delimiter used to separate the fields
31   
32    format is one of float, str, int
33
34    return 2 dictionaries: ({key:column}, {title:index}).
35
36    WARNING: Values are returned as strings.
37             Do this to change a list of strings to a list of floats
38                 time = [float(x) for x in time]
39    """
40
41    # FIXME(Ole): Consider dealing with files without headers
42   
43    attribute_dic = {}
44    title_index_dic = {}
45    titles_stripped = [] # List of titles
46
47    reader = csv.reader(file(file_name), delimiter=delimiter)
48
49    # Read in and manipulate the title info
50    titles = reader.next()
51    for i, title in enumerate(titles):
52        header = title.strip()
53        titles_stripped.append(header)
54        title_index_dic[header] = i
55    title_count = len(titles_stripped)
56
57    # Check required columns
58    if title_check_list is not None:
59        for title_check in title_check_list:
60            if not title_index_dic.has_key(title_check):
61                msg = 'Reading error. This row is not present %s' % title_check
62                raise IOError, msg
63
64
65    # Create a dictionary of column values, indexed by column title
66    for line in reader:
67        n = len(line) # Number of entries
68        if n < title_count:
69            msg = 'Entry in file %s had %d columns ' % (file_name, n)
70            msg += 'although there were %d headers' % title_count
71            raise IOError, msg
72        for i, val in enumerate(line[:title_count]):  # skip trailing data
73            attribute_dic.setdefault(titles_stripped[i], []).append(d_type(val))
74
75    return attribute_dic, title_index_dic
76
77
78         
79def load_csv_as_array(file_name, delimiter = ','):
80    """
81    Convert CSV files of the form:
82
83    time, discharge, velocity
84    0.0,  1.2,       0.0
85    0.1,  3.2,       1.1
86    ...
87
88    to a dictionary of numeric arrays.
89
90    file_name The path to the file to read.
91    delimiter is the delimiter used to separate the fields   
92
93    See underlying function load_csv_as_dict for more details.
94    """
95
96    X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
97
98
99    # Return result as a dict of arrays
100    ret = {}
101    for key in X.keys():
102        ret[key] = num.array([float(x) for x in X[key]])
103           
104    return ret
105
106
107def load_csv_as_matrix(file_name, delimiter = ','):
108    """
109    Convert CSV files of the form:
110
111    time, discharge, velocity
112    0.0,  1.2,       0.0
113    0.1,  3.2,       1.1
114    ...
115
116    to a numeric matrix.
117
118    file_name The path to the file to read.
119    delimiter is the delimiter used to separate the fields   
120
121    See underlying function load_csv_as_dict for more details.
122    """
123
124    X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
125
126    col_titles = title_indices.keys()
127
128    # Return result as a 2D array
129    ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
130
131    header = []
132    for col_title in col_titles:
133        index = title_indices[col_title]
134        header.append(col_title)
135        for i, x in enumerate(X[col_title]):
136            ret[i, index] = float(x)
137
138    return header, ret
139
140
141
142##
143# @brief Store keyword params into a CSV file.
144# @param verbose True if this function is to be verbose.
145# @param kwargs Dictionary of keyword args to store.
146# @note If kwargs dict contains 'file_name' key, that has the output filename.
147#       If not, make up a filename in the output directory.
148def store_parameters(verbose=False, **kwargs):
149    """
150    Store "kwargs" into a temp csv file, if "completed" is in kwargs,
151    csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
152
153    Must have a file_name keyword arg, this is what is writing to.
154    might be a better way to do this using CSV module Writer and writeDict.
155
156    writes file to "output_dir" unless "completed" is in kwargs, then
157    it writes to "file_name" kwargs
158    """
159
160    import types
161
162    # Check that kwargs is a dictionary
163    if type(kwargs) != types.DictType:
164        raise TypeError
165
166    # is 'completed' in kwargs?
167    completed = kwargs.has_key('completed')
168
169    # get file name and removes from dict and assert that a file_name exists
170    if completed:
171        try:
172            file_name = str(kwargs['file_name'])
173        except:
174            raise Exception('kwargs must have file_name')
175    else:
176        # write temp file in output directory
177        try:
178            file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
179        except:
180            raise Exception('kwargs must have output_dir')
181
182    # extracts the header info and the new line info
183    line = ''
184    header = ''
185    count = 0
186    keys = kwargs.keys()
187    keys.sort()
188
189    # used the sorted keys to create the header and line data
190    for k in keys:
191        header += str(k)
192        line += str(kwargs[k])
193        count += 1
194        if count < len(kwargs):
195            header += ','
196            line += ','
197    header += '\n'
198    line += '\n'
199
200    # checks the header info, if the same, then write, if not create a new file
201    # try to open!
202    try:
203        fid = open(file_name, 'r')
204        file_header = fid.readline()
205        fid.close()
206        if verbose: log.critical('read file header %s' % file_header)
207    except Exception:
208        msg = 'try to create new file: %s' % file_name
209        if verbose:
210            log.critical(msg)
211        #tries to open file, maybe directory is bad
212        try:
213            fid = open(file_name, 'w')
214            fid.write(header)
215            fid.close()
216            file_header=header
217        except:
218            msg = 'cannot create new file: %s' % file
219            raise Exception, msg
220
221    # if header is same or this is a new file
222    if file_header == str(header):
223        fid = open(file_name, 'a')
224        fid.write(line)
225        fid.close()
226    else:
227        # backup plan,
228        # if header is different and has completed will append info to
229        # end of details_temp.cvs file in output directory
230        file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
231        fid = open(file_name, 'a')
232        fid.write(header)
233        fid.write(line)
234        fid.close()
235
236        if verbose:
237            log.critical('file %s', file_header.strip('\n'))
238            log.critical('head %s', header.strip('\n'))
239        if file_header.strip('\n') == str(header):
240            log.critical('they equal')
241
242        msg = 'WARNING: File header does not match input info, ' \
243              'the input variables have changed, suggest you change file name'
244        log.critical(msg)
245
246
247
248def load_csv_as_building_polygons(file_name,
249                          floor_height=3):
250    """
251    Convert CSV files of the form:
252
253    easting,northing,id,floors
254    422664.22,870785.46,2,0
255    422672.48,870780.14,2,0
256    422668.17,870772.62,2,0
257    422660.35,870777.17,2,0
258    422664.22,870785.46,2,0
259    422661.30,871215.06,3,1
260    422667.50,871215.70,3,1
261    422668.30,871204.86,3,1
262    422662.21,871204.33,3,1
263    422661.30,871215.06,3,1
264
265    to a dictionary of polygons with id as key.
266    The associated number of floors are converted to m above MSL and
267    returned as a separate dictionary also keyed by id.
268   
269    Optional parameter floor_height is the height of each building story.
270    Optional parameter clipping_olygons is a list of polygons selecting
271    buildings. Any building not in these polygons will be omitted.
272   
273    See csv2polygons for more details
274    """
275
276    polygons, values = load_csv_as_polygons(file_name,
277                                    value_name='floors',
278                                    clipping_polygons=None)   
279
280   
281    heights = {}
282    for key in values.keys():
283        v = float(values[key])
284        heights[key] = v*floor_height
285       
286    return polygons, heights               
287           
288
289def load_csv_as_polygons(file_name,
290                 value_name='value',
291                 clipping_polygons=None):
292    """
293    Convert CSV files of the form:
294
295    easting,northing,id,value
296    422664.22,870785.46,2,0
297    422672.48,870780.14,2,0
298    422668.17,870772.62,2,0
299    422660.35,870777.17,2,0
300    422664.22,870785.46,2,0
301    422661.30,871215.06,3,1
302    422667.50,871215.70,3,1
303    422668.30,871204.86,3,1
304    422662.21,871204.33,3,1
305    422661.30,871215.06,3,1
306
307    to a dictionary of polygons with id as key.
308    The associated values are returned as a separate dictionary also keyed by id.
309
310
311    easting: x coordinate relative to zone implied by the model
312    northing: y coordinate relative to zone implied by the model   
313    id: tag for polygon comprising points with this tag
314    value: numeral associated with each polygon. These must be the same for all points in each polygon.
315   
316    The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
317    in which case the returned values will be None
318   
319    Eastings and Northings will be returned as floating point values while
320    id and values will be returned as strings.
321
322    Optional argument: clipping_polygons will select only those polygons that are
323    fully within one or more of the clipping_polygons. In other words any polygon from
324    the csv file which has at least one point not inside one of the clipping polygons
325    will be excluded
326   
327    See underlying function load_csv_as_dict for more details.
328    """
329
330    X, _ = load_csv_as_dict(file_name)
331
332    msg = 'Polygon csv file must have 3 or 4 columns'
333    assert len(X.keys()) in [3, 4], msg
334   
335    msg = 'Did not find expected column header: easting'
336    assert 'easting' in X.keys(), msg
337   
338    msg = 'Did not find expected column header: northing'   
339    assert 'northing' in X.keys(), msg
340   
341    msg = 'Did not find expected column header: northing'       
342    assert 'id' in X.keys(), msg
343   
344    if value_name is not None:
345        msg = 'Did not find expected column header: %s' % value_name       
346        assert value_name in X.keys(), msg   
347   
348    polygons = {}
349    if len(X.keys()) == 4:
350        values = {}
351    else:
352        values = None
353
354    # Loop through entries and compose polygons
355    excluded_polygons={}
356    past_ids = {}
357    last_id = None
358    for i, poly_id in enumerate(X['id']):
359
360        # Check for duplicate polygons
361        if poly_id in past_ids:
362            msg = 'Polygon %s was duplicated in line %d' % (id, i)
363            raise Exception, msg
364       
365        if poly_id not in polygons:
366            # Start new polygon
367            polygons[poly_id] = []
368            if values is not None:
369                values[poly_id] = X[value_name][i]
370
371            # Keep track of previous polygon ids
372            if last_id is not None:
373                past_ids[last_id] = i
374           
375        # Append this point to current polygon
376        point = [float(X['easting'][i]), float(X['northing'][i])]
377
378        if clipping_polygons is not None:
379            exclude=True
380            for clipping_polygon in clipping_polygons:
381                if inside_polygon(point, clipping_polygon):
382                    exclude=False
383                    break
384               
385            if exclude is True:
386                excluded_polygons[poly_id]=True
387
388        polygons[poly_id].append(point)   
389           
390        # Check that value is the same across each polygon
391        msg = 'Values must be the same across each polygon.'
392        msg += 'I got %s in line %d but it should have been %s' % \
393                            (X[value_name][i], i, values[poly_id])
394        assert values[poly_id] == X[value_name][i], msg
395
396        last_id = poly_id
397
398    # Weed out polygons that were not wholly inside clipping polygons
399    for poly_id in excluded_polygons:
400        del polygons[poly_id]
401       
402    return polygons, values
403
404
405           
406
407
Note: See TracBrowser for help on using the repository browser.