source: trunk/anuga_core/source/anuga/file/csv_file.py @ 7854

Last change on this file since 7854 was 7854, checked in by hudson, 14 years ago

Fixed Windows Interpolate nonexistent verbose flag.

File size: 12.4 KB
Line 
1"""
2    A set of functions which extend the capabilities of the Python csv
3    module.
4   
5    CSV files have the extension .csv, which stands for Comma Separated Value
6    file. There is no standardised form for this format, so the user is provided
7    with a variety of options for parsing different styles of csv files.
8   
9    These have been left as functions to aviod confusion with the standard
10    csv module.
11"""
12
13import csv
14import numpy as num
15
16
17def load_csv_as_dict(file_name, title_check_list=None, delimiter=',',
18                        d_type = str):
19    """
20    Load in the csv as a dictionary, title as key and column info as value.
21    Also, create a dictionary, title as key and column index as value,
22    to keep track of the column order.
23
24    file_name The path to the file to read.
25   
26    title_check_list List of titles that *must* be columns in the file.
27
28    delimiter is the delimiter used to separate the fields
29   
30    format is one of float, str, int
31
32    return 2 dictionaries: ({key:column}, {title:index}).
33
34    WARNING: Values are returned as strings.
35             Do this to change a list of strings to a list of floats
36                 time = [float(x) for x in time]
37    """
38
39    # FIXME(Ole): Consider dealing with files without headers
40   
41    attribute_dic = {}
42    title_index_dic = {}
43    titles_stripped = [] # List of titles
44
45    reader = csv.reader(file(file_name), delimiter=delimiter)
46
47    # Read in and manipulate the title info
48    titles = reader.next()
49    for i, title in enumerate(titles):
50        header = title.strip()
51        titles_stripped.append(header)
52        title_index_dic[header] = i
53    title_count = len(titles_stripped)
54
55    # Check required columns
56    if title_check_list is not None:
57        for title_check in title_check_list:
58            if not title_index_dic.has_key(title_check):
59                msg = 'Reading error. This row is not present %s' % title_check
60                raise IOError, msg
61
62
63    # Create a dictionary of column values, indexed by column title
64    for line in reader:
65        n = len(line) # Number of entries
66        if n < title_count:
67            msg = 'Entry in file %s had %d columns ' % (file_name, n)
68            msg += 'although there were %d headers' % title_count
69            raise IOError, msg
70        for i, val in enumerate(line[:title_count]):  # skip trailing data
71            attribute_dic.setdefault(titles_stripped[i], []).append(d_type(val))
72
73    return attribute_dic, title_index_dic
74
75
76         
77def load_csv_as_array(file_name, delimiter = ','):
78    """
79    Convert CSV files of the form:
80
81    time, discharge, velocity
82    0.0,  1.2,       0.0
83    0.1,  3.2,       1.1
84    ...
85
86    to a dictionary of numeric arrays.
87
88    file_name The path to the file to read.
89    delimiter is the delimiter used to separate the fields   
90
91    See underlying function load_csv_as_dict for more details.
92    """
93
94    X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
95
96
97    # Return result as a dict of arrays
98    ret = {}
99    for key in X.keys():
100        ret[key] = num.array([float(x) for x in X[key]])
101           
102    return ret
103
104
105def load_csv_as_matrix(file_name, delimiter = ','):
106    """
107    Convert CSV files of the form:
108
109    time, discharge, velocity
110    0.0,  1.2,       0.0
111    0.1,  3.2,       1.1
112    ...
113
114    to a numeric matrix.
115
116    file_name The path to the file to read.
117    delimiter is the delimiter used to separate the fields   
118
119    See underlying function load_csv_as_dict for more details.
120    """
121
122    X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
123
124    col_titles = title_indices.keys()
125
126    # Return result as a 2D array
127    ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
128
129    header = []
130    for col_title in col_titles:
131        index = title_indices[col_title]
132        header.append(col_title)
133        for i, x in enumerate(X[col_title]):
134            ret[i, index] = float(x)
135
136    return header, ret
137
138
139
140##
141# @brief Store keyword params into a CSV file.
142# @param verbose True if this function is to be verbose.
143# @param kwargs Dictionary of keyword args to store.
144# @note If kwargs dict contains 'file_name' key, that has the output filename.
145#       If not, make up a filename in the output directory.
146def store_parameters(verbose=False, **kwargs):
147    """
148    Store "kwargs" into a temp csv file, if "completed" is in kwargs,
149    csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
150
151    Must have a file_name keyword arg, this is what is writing to.
152    might be a better way to do this using CSV module Writer and writeDict.
153
154    writes file to "output_dir" unless "completed" is in kwargs, then
155    it writes to "file_name" kwargs
156    """
157
158    import types
159
160    # Check that kwargs is a dictionary
161    if type(kwargs) != types.DictType:
162        raise TypeError
163
164    # is 'completed' in kwargs?
165    completed = kwargs.has_key('completed')
166
167    # get file name and removes from dict and assert that a file_name exists
168    if completed:
169        try:
170            file = str(kwargs['file_name'])
171        except:
172            raise 'kwargs must have file_name'
173    else:
174        # write temp file in output directory
175        try:
176            file = str(kwargs['output_dir']) + 'detail_temp.csv'
177        except:
178            raise 'kwargs must have output_dir'
179
180    # extracts the header info and the new line info
181    line = ''
182    header = ''
183    count = 0
184    keys = kwargs.keys()
185    keys.sort()
186
187    # used the sorted keys to create the header and line data
188    for k in keys:
189        header += str(k)
190        line += str(kwargs[k])
191        count += 1
192        if count < len(kwargs):
193            header += ','
194            line += ','
195    header += '\n'
196    line += '\n'
197
198    # checks the header info, if the same, then write, if not create a new file
199    # try to open!
200    try:
201        fid = open(file, 'r')
202        file_header = fid.readline()
203        fid.close()
204        if verbose: log.critical('read file header %s' % file_header)
205    except:
206        msg = 'try to create new file: %s' % file
207        if verbose: log.critical(msg)
208        #tries to open file, maybe directory is bad
209        try:
210            fid = open(file, 'w')
211            fid.write(header)
212            fid.close()
213            file_header=header
214        except:
215            msg = 'cannot create new file: %s' % file
216            raise Exception, msg
217
218    # if header is same or this is a new file
219    if file_header == str(header):
220        fid = open(file, 'a')
221        fid.write(line)
222        fid.close()
223    else:
224        # backup plan,
225        # if header is different and has completed will append info to
226        # end of details_temp.cvs file in output directory
227        file = str(kwargs['output_dir']) + 'detail_temp.csv'
228        fid = open(file, 'a')
229        fid.write(header)
230        fid.write(line)
231        fid.close()
232
233        if verbose:
234            log.critical('file %s', file_header.strip('\n'))
235            log.critical('head %s', header.strip('\n'))
236        if file_header.strip('\n') == str(header):
237            log.critical('they equal')
238
239        msg = 'WARNING: File header does not match input info, ' \
240              'the input variables have changed, suggest you change file name'
241        log.critical(msg)
242
243
244
245def load_csv_as_building_polygons(file_name,
246                          floor_height=3,
247                          clipping_polygons=None):
248    """
249    Convert CSV files of the form:
250
251    easting,northing,id,floors
252    422664.22,870785.46,2,0
253    422672.48,870780.14,2,0
254    422668.17,870772.62,2,0
255    422660.35,870777.17,2,0
256    422664.22,870785.46,2,0
257    422661.30,871215.06,3,1
258    422667.50,871215.70,3,1
259    422668.30,871204.86,3,1
260    422662.21,871204.33,3,1
261    422661.30,871215.06,3,1
262
263    to a dictionary of polygons with id as key.
264    The associated number of floors are converted to m above MSL and
265    returned as a separate dictionary also keyed by id.
266   
267    Optional parameter floor_height is the height of each building story.
268    Optional parameter clipping_olygons is a list of polygons selecting
269    buildings. Any building not in these polygons will be omitted.
270   
271    See csv2polygons for more details
272    """
273
274    polygons, values = load_csv_as_polygons(file_name,
275                                    value_name='floors',
276                                    clipping_polygons=None)   
277
278   
279    heights = {}
280    for key in values.keys():
281        v = float(values[key])
282        heights[key] = v*floor_height
283       
284    return polygons, heights               
285           
286
287##
288# @brief Convert CSV file into a dictionary of polygons and associated values.
289# @param filename The path to the file to read, value_name name for the 4th column
290def load_csv_as_polygons(file_name,
291                 value_name='value',
292                 clipping_polygons=None):
293    """
294    Convert CSV files of the form:
295
296    easting,northing,id,value
297    422664.22,870785.46,2,0
298    422672.48,870780.14,2,0
299    422668.17,870772.62,2,0
300    422660.35,870777.17,2,0
301    422664.22,870785.46,2,0
302    422661.30,871215.06,3,1
303    422667.50,871215.70,3,1
304    422668.30,871204.86,3,1
305    422662.21,871204.33,3,1
306    422661.30,871215.06,3,1
307
308    to a dictionary of polygons with id as key.
309    The associated values are returned as a separate dictionary also keyed by id.
310
311
312    easting: x coordinate relative to zone implied by the model
313    northing: y coordinate relative to zone implied by the model   
314    id: tag for polygon comprising points with this tag
315    value: numeral associated with each polygon. These must be the same for all points in each polygon.
316   
317    The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
318    in which case the returned values will be None
319   
320    Eastings and Northings will be returned as floating point values while
321    id and values will be returned as strings.
322
323    Optional argument: clipping_polygons will select only those polygons that are
324    fully within one or more of the clipping_polygons. In other words any polygon from
325    the csv file which has at least one point not inside one of the clipping polygons
326    will be excluded
327   
328    See underlying function load_csv_as_dict for more details.
329    """
330
331    X, _ = load_csv_as_dict(file_name)
332
333    msg = 'Polygon csv file must have 3 or 4 columns'
334    assert len(X.keys()) in [3, 4], msg
335   
336    msg = 'Did not find expected column header: easting'
337    assert 'easting' in X.keys(), msg
338   
339    msg = 'Did not find expected column header: northing'   
340    assert 'northing' in X.keys(), northing
341   
342    msg = 'Did not find expected column header: northing'       
343    assert 'id' in X.keys(), msg
344   
345    if value_name is not None:
346        msg = 'Did not find expected column header: %s' % value_name       
347        assert value_name in X.keys(), msg   
348   
349    polygons = {}
350    if len(X.keys()) == 4:
351        values = {}
352    else:
353        values = None
354
355    # Loop through entries and compose polygons
356    excluded_polygons={}
357    past_ids = {}
358    last_id = None
359    for i, id in enumerate(X['id']):
360
361        # Check for duplicate polygons
362        if id in past_ids:
363            msg = 'Polygon %s was duplicated in line %d' % (id, i)
364            raise Exception, msg
365       
366        if id not in polygons:
367            # Start new polygon
368            polygons[id] = []
369            if values is not None:
370                values[id] = X[value_name][i]
371
372            # Keep track of previous polygon ids
373            if last_id is not None:
374                past_ids[last_id] = i
375           
376        # Append this point to current polygon
377        point = [float(X['easting'][i]), float(X['northing'][i])]
378
379        if clipping_polygons is not None:
380            exclude=True
381            for clipping_polygon in clipping_polygons:
382                if inside_polygon(point, clipping_polygon):
383                    exclude=False
384                    break
385               
386            if exclude is True:
387                excluded_polygons[id]=True
388
389        polygons[id].append(point)   
390           
391        # Check that value is the same across each polygon
392        msg = 'Values must be the same across each polygon.'
393        msg += 'I got %s in line %d but it should have been %s' % (X[value_name][i], i, values[id])
394        assert values[id] == X[value_name][i], msg
395
396        last_id = id
397
398    # Weed out polygons that were not wholly inside clipping polygons
399    for id in excluded_polygons:
400        del polygons[id]
401       
402    return polygons, values
403
404
405           
406
407
Note: See TracBrowser for help on using the repository browser.