source: trunk/anuga_core/source/anuga/file/csv_file.py @ 9516

Last change on this file since 9516 was 8143, checked in by wilsonr, 14 years ago

Removed '@brief' comments.

File size: 12.0 KB
Line 
1"""
2    A set of functions which extend the capabilities of the Python csv
3    module.
4   
5    CSV files have the extension .csv, which stands for Comma Separated Value
6    file. There is no standardised form for this format, so the user is provided
7    with a variety of options for parsing different styles of csv files.
8   
9    These have been left as functions to aviod confusion with the standard
10    csv module.
11"""
12
13
14import csv
15import numpy as num
16import anuga.utilities.log as log
17
18
19def load_csv_as_dict(file_name, title_check_list=None, delimiter=',',
20                        d_type = str):
21    """
22    Load in the csv as a dictionary, title as key and column info as value.
23    Also, create a dictionary, title as key and column index as value,
24    to keep track of the column order.
25
26    file_name The path to the file to read.
27   
28    title_check_list List of titles that *must* be columns in the file.
29
30    delimiter is the delimiter used to separate the fields
31   
32    format is one of float, str, int
33
34    return 2 dictionaries: ({key:column}, {title:index}).
35
36    WARNING: Values are returned as strings.
37             Do this to change a list of strings to a list of floats
38                 time = [float(x) for x in time]
39    """
40
41    # FIXME(Ole): Consider dealing with files without headers
42   
43    attribute_dic = {}
44    title_index_dic = {}
45    titles_stripped = [] # List of titles
46
47    reader = csv.reader(file(file_name), delimiter=delimiter)
48
49    # Read in and manipulate the title info
50    titles = reader.next()
51    for i, title in enumerate(titles):
52        header = title.strip()
53        titles_stripped.append(header)
54        title_index_dic[header] = i
55    title_count = len(titles_stripped)
56
57    # Check required columns
58    if title_check_list is not None:
59        for title_check in title_check_list:
60            if not title_index_dic.has_key(title_check):
61                msg = 'Reading error. This row is not present %s' % title_check
62                raise IOError, msg
63
64
65    # Create a dictionary of column values, indexed by column title
66    for line in reader:
67        n = len(line) # Number of entries
68        if n < title_count:
69            msg = 'Entry in file %s had %d columns ' % (file_name, n)
70            msg += 'although there were %d headers' % title_count
71            raise IOError, msg
72        for i, val in enumerate(line[:title_count]):  # skip trailing data
73            attribute_dic.setdefault(titles_stripped[i], []).append(d_type(val))
74
75    return attribute_dic, title_index_dic
76
77
78         
79def load_csv_as_array(file_name, delimiter = ','):
80    """
81    Convert CSV files of the form:
82
83    time, discharge, velocity
84    0.0,  1.2,       0.0
85    0.1,  3.2,       1.1
86    ...
87
88    to a dictionary of numeric arrays.
89
90    file_name The path to the file to read.
91    delimiter is the delimiter used to separate the fields   
92
93    See underlying function load_csv_as_dict for more details.
94    """
95
96    X, _ = load_csv_as_dict(file_name, delimiter=delimiter)
97
98
99    # Return result as a dict of arrays
100    ret = {}
101    for key in X.keys():
102        ret[key] = num.array([float(x) for x in X[key]])
103           
104    return ret
105
106
107def load_csv_as_matrix(file_name, delimiter = ','):
108    """
109    Convert CSV files of the form:
110
111    time, discharge, velocity
112    0.0,  1.2,       0.0
113    0.1,  3.2,       1.1
114    ...
115
116    to a numeric matrix.
117
118    file_name The path to the file to read.
119    delimiter is the delimiter used to separate the fields   
120
121    See underlying function load_csv_as_dict for more details.
122    """
123
124    X, title_indices = load_csv_as_dict(file_name, delimiter=delimiter)
125
126    col_titles = title_indices.keys()
127
128    # Return result as a 2D array
129    ret = num.zeros((len(X[col_titles[0]]), len(title_indices)), float)
130
131    header = []
132    for col_title in col_titles:
133        index = title_indices[col_title]
134        header.append(col_title)
135        for i, x in enumerate(X[col_title]):
136            ret[i, index] = float(x)
137
138    return header, ret
139
140
141
142def store_parameters(verbose=False, **kwargs):
143    """
144    Store "kwargs" into a temp csv file, if "completed" is in kwargs,
145    csv file is kwargs[file_name] else it is kwargs[output_dir]+details_temp.csv
146
147    Must have a file_name keyword arg, this is what is writing to.
148    might be a better way to do this using CSV module Writer and writeDict.
149
150    writes file to "output_dir" unless "completed" is in kwargs, then
151    it writes to "file_name" kwargs
152    """
153
154    # Check that kwargs is a dictionary
155    if not isinstance(kwargs, dict):
156        raise TypeError
157
158    # is 'completed' in kwargs?
159    completed = kwargs.has_key('completed')
160
161    # get file name and removes from dict and assert that a file_name exists
162    if completed:
163        try:
164            file_name = str(kwargs['file_name'])
165        except:
166            raise Exception('kwargs must have file_name')
167    else:
168        # write temp file in output directory
169        try:
170            file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
171        except:
172            raise Exception('kwargs must have output_dir')
173
174    # extracts the header info and the new line info
175    line = ''
176    header = ''
177    count = 0
178    keys = kwargs.keys()
179    keys.sort()
180
181    # used the sorted keys to create the header and line data
182    for k in keys:
183        header += str(k)
184        line += str(kwargs[k])
185        count += 1
186        if count < len(kwargs):
187            header += ','
188            line += ','
189    header += '\n'
190    line += '\n'
191
192    # checks the header info, if the same, then write, if not create a new file
193    # try to open!
194    try:
195        fid = open(file_name, 'r')
196        file_header = fid.readline()
197        fid.close()
198        if verbose: log.critical('read file header %s' % file_header)
199    except Exception:
200        msg = 'try to create new file: %s' % file_name
201        if verbose:
202            log.critical(msg)
203        #tries to open file, maybe directory is bad
204        try:
205            fid = open(file_name, 'w')
206            fid.write(header)
207            fid.close()
208            file_header=header
209        except:
210            msg = 'cannot create new file: %s' % file
211            raise Exception, msg
212
213    # if header is same or this is a new file
214    if file_header == str(header):
215        fid = open(file_name, 'a')
216        fid.write(line)
217        fid.close()
218    else:
219        # backup plan,
220        # if header is different and has completed will append info to
221        # end of details_temp.cvs file in output directory
222        file_name = str(kwargs['output_dir']) + 'detail_temp.csv'
223        fid = open(file_name, 'a')
224        fid.write(header)
225        fid.write(line)
226        fid.close()
227
228        if verbose:
229            log.critical('file %s', file_header.strip('\n'))
230            log.critical('head %s', header.strip('\n'))
231        if file_header.strip('\n') == str(header):
232            log.critical('they equal')
233
234        msg = 'WARNING: File header does not match input info, ' \
235              'the input variables have changed, suggest you change file name'
236        log.critical(msg)
237
238
239
240def load_csv_as_building_polygons(file_name,
241                          floor_height=3):
242    """
243    Convert CSV files of the form:
244
245    easting,northing,id,floors
246    422664.22,870785.46,2,0
247    422672.48,870780.14,2,0
248    422668.17,870772.62,2,0
249    422660.35,870777.17,2,0
250    422664.22,870785.46,2,0
251    422661.30,871215.06,3,1
252    422667.50,871215.70,3,1
253    422668.30,871204.86,3,1
254    422662.21,871204.33,3,1
255    422661.30,871215.06,3,1
256
257    to a dictionary of polygons with id as key.
258    The associated number of floors are converted to m above MSL and
259    returned as a separate dictionary also keyed by id.
260   
261    Optional parameter floor_height is the height of each building story.
262    Optional parameter clipping_olygons is a list of polygons selecting
263    buildings. Any building not in these polygons will be omitted.
264   
265    See csv2polygons for more details
266    """
267
268    polygons, values = load_csv_as_polygons(file_name,
269                                    value_name='floors',
270                                    clipping_polygons=None)   
271
272   
273    heights = {}
274    for key in values.keys():
275        v = float(values[key])
276        heights[key] = v*floor_height
277       
278    return polygons, heights               
279           
280
281def load_csv_as_polygons(file_name,
282                 value_name='value',
283                 clipping_polygons=None):
284    """
285    Convert CSV files of the form:
286
287    easting,northing,id,value
288    422664.22,870785.46,2,0
289    422672.48,870780.14,2,0
290    422668.17,870772.62,2,0
291    422660.35,870777.17,2,0
292    422664.22,870785.46,2,0
293    422661.30,871215.06,3,1
294    422667.50,871215.70,3,1
295    422668.30,871204.86,3,1
296    422662.21,871204.33,3,1
297    422661.30,871215.06,3,1
298
299    to a dictionary of polygons with id as key.
300    The associated values are returned as a separate dictionary also keyed by id.
301
302
303    easting: x coordinate relative to zone implied by the model
304    northing: y coordinate relative to zone implied by the model   
305    id: tag for polygon comprising points with this tag
306    value: numeral associated with each polygon. These must be the same for all points in each polygon.
307   
308    The last header, value, can take on other names such as roughness, floors, etc - or it can be omitted
309    in which case the returned values will be None
310   
311    Eastings and Northings will be returned as floating point values while
312    id and values will be returned as strings.
313
314    Optional argument: clipping_polygons will select only those polygons that are
315    fully within one or more of the clipping_polygons. In other words any polygon from
316    the csv file which has at least one point not inside one of the clipping polygons
317    will be excluded
318   
319    See underlying function load_csv_as_dict for more details.
320    """
321
322    X, _ = load_csv_as_dict(file_name)
323
324    msg = 'Polygon csv file must have 3 or 4 columns'
325    assert len(X.keys()) in [3, 4], msg
326   
327    msg = 'Did not find expected column header: easting'
328    assert 'easting' in X.keys(), msg
329   
330    msg = 'Did not find expected column header: northing'   
331    assert 'northing' in X.keys(), msg
332   
333    msg = 'Did not find expected column header: northing'       
334    assert 'id' in X.keys(), msg
335   
336    if value_name is not None:
337        msg = 'Did not find expected column header: %s' % value_name       
338        assert value_name in X.keys(), msg   
339   
340    polygons = {}
341    if len(X.keys()) == 4:
342        values = {}
343    else:
344        values = None
345
346    # Loop through entries and compose polygons
347    excluded_polygons={}
348    past_ids = {}
349    last_id = None
350    for i, poly_id in enumerate(X['id']):
351
352        # Check for duplicate polygons
353        if poly_id in past_ids:
354            msg = 'Polygon %s was duplicated in line %d' % (id, i)
355            raise Exception, msg
356       
357        if poly_id not in polygons:
358            # Start new polygon
359            polygons[poly_id] = []
360            if values is not None:
361                values[poly_id] = X[value_name][i]
362
363            # Keep track of previous polygon ids
364            if last_id is not None:
365                past_ids[last_id] = i
366           
367        # Append this point to current polygon
368        point = [float(X['easting'][i]), float(X['northing'][i])]
369
370        if clipping_polygons is not None:
371            exclude=True
372            for clipping_polygon in clipping_polygons:
373                if inside_polygon(point, clipping_polygon):
374                    exclude=False
375                    break
376               
377            if exclude is True:
378                excluded_polygons[poly_id]=True
379
380        polygons[poly_id].append(point)   
381           
382        # Check that value is the same across each polygon
383        msg = 'Values must be the same across each polygon.'
384        msg += 'I got %s in line %d but it should have been %s' % \
385                            (X[value_name][i], i, values[poly_id])
386        assert values[poly_id] == X[value_name][i], msg
387
388        last_id = poly_id
389
390    # Weed out polygons that were not wholly inside clipping polygons
391    for poly_id in excluded_polygons:
392        del polygons[poly_id]
393       
394    return polygons, values
395
396
397           
398
399
Note: See TracBrowser for help on using the repository browser.