[7100] | 1 | #!/usr/bin/env python |
---|
| 2 | |
---|
| 3 | import os |
---|
| 4 | import unittest |
---|
| 5 | import tempfile |
---|
| 6 | import csv |
---|
| 7 | |
---|
[7103] | 8 | import csv_tools |
---|
[7100] | 9 | |
---|
| 10 | |
---|
| 11 | # this dictionary sets the column header string for |
---|
| 12 | # column number modulo 4. |
---|
| 13 | col_text_string = {0: 'col_%d', |
---|
| 14 | 1: ' col_%d', |
---|
| 15 | 2: 'col_%d ', |
---|
| 16 | 3: ' col_%d ' |
---|
| 17 | } |
---|
| 18 | |
---|
| 19 | class Test_CSV_utils(unittest.TestCase): |
---|
| 20 | |
---|
| 21 | NUM_FILES = 10 |
---|
| 22 | NUM_COLS = 6 |
---|
| 23 | NUM_LINES = 10 |
---|
| 24 | OUTPUT_FILE = 'test.csv' |
---|
| 25 | |
---|
| 26 | def setUp(self): |
---|
| 27 | # create temporary scratch directory |
---|
| 28 | self.tmp_dir = tempfile.mkdtemp() |
---|
| 29 | |
---|
| 30 | # create 4 test CSV files |
---|
| 31 | self.num_files = self.NUM_FILES |
---|
| 32 | self.filenames = [] |
---|
| 33 | for i in range(self.NUM_FILES): |
---|
| 34 | self.filenames.append(tempfile.mktemp('.csv')) |
---|
| 35 | for (i, fn) in enumerate(self.filenames): |
---|
| 36 | fd = open(fn, 'w') |
---|
| 37 | csv_fd = csv.writer(fd) |
---|
| 38 | # write colums row |
---|
| 39 | columns = [] |
---|
| 40 | for j in range(self.NUM_COLS): |
---|
| 41 | columns.append(col_text_string[j % 4] % j) |
---|
| 42 | csv_fd.writerow(columns) |
---|
| 43 | |
---|
| 44 | # write data rows |
---|
| 45 | for j in xrange(self.NUM_LINES): |
---|
| 46 | data = [j, j, '%d.%d' % (j, i)] + ['qwert']*(self.NUM_COLS-3) |
---|
| 47 | csv_fd.writerow(data) |
---|
| 48 | fd.close() |
---|
| 49 | |
---|
| 50 | |
---|
| 51 | def tearDown(self): |
---|
| 52 | for fn in self.filenames: |
---|
| 53 | try: |
---|
| 54 | os.remove(fn) |
---|
| 55 | except: |
---|
| 56 | pass |
---|
| 57 | try: |
---|
| 58 | os.remove(self.OUTPUT_FILE) |
---|
| 59 | except: |
---|
| 60 | pass |
---|
| 61 | |
---|
| 62 | |
---|
| 63 | def test_merge_one_file(self): |
---|
| 64 | """Test merging a single CSV file. |
---|
| 65 | |
---|
| 66 | This is the same as a two coluymn extract, with column rename. |
---|
| 67 | """ |
---|
| 68 | |
---|
| 69 | file_title_list = [(self.filenames[0], 'test')] |
---|
[7103] | 70 | csv_tools.merge_csv_key_values(file_title_list, self.OUTPUT_FILE, |
---|
[7100] | 71 | key_col='col_0', data_col='col_3') |
---|
| 72 | |
---|
| 73 | expected = '''col_0,test |
---|
| 74 | 0,qwert |
---|
| 75 | 1,qwert |
---|
| 76 | 2,qwert |
---|
| 77 | 3,qwert |
---|
| 78 | 4,qwert |
---|
| 79 | 5,qwert |
---|
| 80 | 6,qwert |
---|
| 81 | 7,qwert |
---|
| 82 | 8,qwert |
---|
| 83 | 9,qwert |
---|
| 84 | ''' |
---|
| 85 | |
---|
| 86 | got = self.get_file_contents(self.OUTPUT_FILE) |
---|
| 87 | msg = ('Merging one file,\n' |
---|
| 88 | 'expected file=\n' |
---|
| 89 | '--------------------\n' |
---|
| 90 | '%s' |
---|
| 91 | '--------------------\n' |
---|
| 92 | 'got file=\n' |
---|
| 93 | '--------------------\n' |
---|
| 94 | '%s' |
---|
| 95 | '--------------------\n' |
---|
| 96 | % (expected, got)) |
---|
| 97 | self.failUnless(self.str_cmp(got, expected), msg) |
---|
| 98 | |
---|
| 99 | |
---|
| 100 | def test_merge_two_files(self): |
---|
| 101 | """Test merging two CSV files.""" |
---|
| 102 | |
---|
| 103 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 104 | (self.filenames[1], 'test1')] |
---|
[7103] | 105 | csv_tools.merge_csv_key_values(file_title_list, self.OUTPUT_FILE, |
---|
[7100] | 106 | key_col='col_0', data_col='col_3') |
---|
| 107 | |
---|
| 108 | expected = '''col_0,test0,test1 |
---|
| 109 | 0,qwert,qwert |
---|
| 110 | 1,qwert,qwert |
---|
| 111 | 2,qwert,qwert |
---|
| 112 | 3,qwert,qwert |
---|
| 113 | 4,qwert,qwert |
---|
| 114 | 5,qwert,qwert |
---|
| 115 | 6,qwert,qwert |
---|
| 116 | 7,qwert,qwert |
---|
| 117 | 8,qwert,qwert |
---|
| 118 | 9,qwert,qwert |
---|
| 119 | ''' |
---|
| 120 | |
---|
| 121 | got = self.get_file_contents(self.OUTPUT_FILE) |
---|
| 122 | msg = ('Merging two files,\n' |
---|
| 123 | 'expected file=\n' |
---|
| 124 | '--------------------\n' |
---|
| 125 | '%s' |
---|
| 126 | '--------------------\n' |
---|
| 127 | 'got file=\n' |
---|
| 128 | '--------------------\n' |
---|
| 129 | '%s' |
---|
| 130 | '--------------------\n' |
---|
| 131 | % (expected, got)) |
---|
| 132 | self.failUnless(self.str_cmp(got, expected), msg) |
---|
| 133 | |
---|
| 134 | |
---|
| 135 | def test_merge_two_files2(self): |
---|
| 136 | """Test merging two CSV files.""" |
---|
| 137 | |
---|
| 138 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 139 | (self.filenames[1], 'test1')] |
---|
[7103] | 140 | csv_tools.merge_csv_key_values(file_title_list, self.OUTPUT_FILE, |
---|
[7100] | 141 | key_col='col_0', data_col='col_2') |
---|
| 142 | |
---|
| 143 | expected = '''col_0,test0,test1 |
---|
| 144 | 0,0.0,0.1 |
---|
| 145 | 1,1.0,1.1 |
---|
| 146 | 2,2.0,2.1 |
---|
| 147 | 3,3.0,3.1 |
---|
| 148 | 4,4.0,4.1 |
---|
| 149 | 5,5.0,5.1 |
---|
| 150 | 6,6.0,6.1 |
---|
| 151 | 7,7.0,7.1 |
---|
| 152 | 8,8.0,8.1 |
---|
| 153 | 9,9.0,9.1 |
---|
| 154 | ''' |
---|
| 155 | |
---|
| 156 | got = self.get_file_contents(self.OUTPUT_FILE) |
---|
| 157 | msg = ('Merging two file,\n' |
---|
| 158 | 'expected file=\n' |
---|
| 159 | '--------------------\n' |
---|
| 160 | '%s' |
---|
| 161 | '--------------------\n' |
---|
| 162 | 'got file=\n' |
---|
| 163 | '--------------------\n' |
---|
| 164 | '%s' |
---|
| 165 | '--------------------\n' |
---|
| 166 | % (expected, got)) |
---|
| 167 | self.failUnless(self.str_cmp(got, expected), msg) |
---|
| 168 | |
---|
| 169 | |
---|
| 170 | def test_merge_four_files(self): |
---|
| 171 | """Test merging four CSV files.""" |
---|
| 172 | |
---|
| 173 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 174 | (self.filenames[1], 'test1'), |
---|
| 175 | (self.filenames[2], 'test2'), |
---|
| 176 | (self.filenames[3], 'test3')] |
---|
[7103] | 177 | csv_tools.merge_csv_key_values(file_title_list, self.OUTPUT_FILE, |
---|
[7100] | 178 | key_col='col_0', data_col='col_2') |
---|
| 179 | |
---|
| 180 | expected = '''col_0,test0,test1,test2,test3 |
---|
| 181 | 0,0.0,0.1,0.2,0.3 |
---|
| 182 | 1,1.0,1.1,1.2,1.3 |
---|
| 183 | 2,2.0,2.1,2.2,2.3 |
---|
| 184 | 3,3.0,3.1,3.2,3.3 |
---|
| 185 | 4,4.0,4.1,4.2,4.3 |
---|
| 186 | 5,5.0,5.1,5.2,5.3 |
---|
| 187 | 6,6.0,6.1,6.2,6.3 |
---|
| 188 | 7,7.0,7.1,7.2,7.3 |
---|
| 189 | 8,8.0,8.1,8.2,8.3 |
---|
| 190 | 9,9.0,9.1,9.2,9.3 |
---|
| 191 | ''' |
---|
| 192 | |
---|
| 193 | got = self.get_file_contents(self.OUTPUT_FILE) |
---|
| 194 | msg = ('Merging four files,\n' |
---|
| 195 | 'expected file=\n' |
---|
| 196 | '--------------------\n' |
---|
| 197 | '%s' |
---|
| 198 | '--------------------\n' |
---|
| 199 | 'got file=\n' |
---|
| 200 | '--------------------\n' |
---|
| 201 | '%s' |
---|
| 202 | '--------------------\n' |
---|
| 203 | % (expected, got)) |
---|
| 204 | self.failUnless(self.str_cmp(got, expected), msg) |
---|
| 205 | |
---|
| 206 | |
---|
| 207 | def test_merge_ten_files(self): |
---|
| 208 | """Test merging ten CSV files.""" |
---|
| 209 | |
---|
| 210 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 211 | (self.filenames[1], 'test1'), |
---|
| 212 | (self.filenames[2], 'test2'), |
---|
| 213 | (self.filenames[3], 'test3'), |
---|
| 214 | (self.filenames[4], 'test4'), |
---|
| 215 | (self.filenames[5], 'test5'), |
---|
| 216 | (self.filenames[6], 'test6'), |
---|
| 217 | (self.filenames[7], 'test7'), |
---|
| 218 | (self.filenames[8], 'test8'), |
---|
| 219 | (self.filenames[9], 'test9')] |
---|
[7103] | 220 | csv_tools.merge_csv_key_values(file_title_list, self.OUTPUT_FILE, |
---|
[7100] | 221 | key_col='col_1', data_col='col_2') |
---|
| 222 | |
---|
| 223 | expected = '''col_1,test0,test1,test2,test3,test4,test5,test6,test7,test8,test9 |
---|
| 224 | 0,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9 |
---|
| 225 | 1,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9 |
---|
| 226 | 2,2.0,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8,2.9 |
---|
| 227 | 3,3.0,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9 |
---|
| 228 | 4,4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9 |
---|
| 229 | 5,5.0,5.1,5.2,5.3,5.4,5.5,5.6,5.7,5.8,5.9 |
---|
| 230 | 6,6.0,6.1,6.2,6.3,6.4,6.5,6.6,6.7,6.8,6.9 |
---|
| 231 | 7,7.0,7.1,7.2,7.3,7.4,7.5,7.6,7.7,7.8,7.9 |
---|
| 232 | 8,8.0,8.1,8.2,8.3,8.4,8.5,8.6,8.7,8.8,8.9 |
---|
| 233 | 9,9.0,9.1,9.2,9.3,9.4,9.5,9.6,9.7,9.8,9.9 |
---|
| 234 | ''' |
---|
| 235 | |
---|
| 236 | got = self.get_file_contents(self.OUTPUT_FILE) |
---|
| 237 | msg = ('Merging four files,\n' |
---|
| 238 | 'expected file=\n' |
---|
| 239 | '--------------------\n' |
---|
| 240 | '%s' |
---|
| 241 | '--------------------\n' |
---|
| 242 | 'got file=\n' |
---|
| 243 | '--------------------\n' |
---|
| 244 | '%s' |
---|
| 245 | '--------------------\n' |
---|
| 246 | % (expected, got)) |
---|
| 247 | self.failUnless(self.str_cmp(got, expected), msg) |
---|
| 248 | |
---|
| 249 | |
---|
| 250 | def test_no_key_column(self): |
---|
| 251 | """Test merging two CSV files with expected missing key column.""" |
---|
| 252 | |
---|
| 253 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 254 | (self.filenames[2], 'test2')] |
---|
| 255 | self.failUnlessRaises(Exception, |
---|
[7103] | 256 | csv_tools.merge_csv_key_values, |
---|
[7100] | 257 | file_title_list, |
---|
| 258 | self.OUTPUT_FILE, |
---|
| 259 | key_col='col_A', |
---|
| 260 | data_col='col_2' |
---|
| 261 | ) |
---|
| 262 | |
---|
| 263 | |
---|
[7126] | 264 | def test_no_input_files(self): |
---|
| 265 | """Test merging *zero* CSV files!""" |
---|
| 266 | |
---|
| 267 | file_title_list = [] |
---|
| 268 | self.failUnlessRaises(Exception, |
---|
| 269 | csv_tools.merge_csv_key_values, |
---|
| 270 | file_title_list, |
---|
| 271 | self.OUTPUT_FILE, |
---|
| 272 | key_col='col_1', |
---|
| 273 | data_col='col_A' |
---|
| 274 | ) |
---|
| 275 | |
---|
| 276 | |
---|
[7100] | 277 | def test_no_data_column(self): |
---|
| 278 | """Test merging two CSV files with expected missing data column.""" |
---|
| 279 | |
---|
| 280 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 281 | (self.filenames[2], 'test2')] |
---|
| 282 | self.failUnlessRaises(Exception, |
---|
[7103] | 283 | csv_tools.merge_csv_key_values, |
---|
[7100] | 284 | file_title_list, |
---|
| 285 | self.OUTPUT_FILE, |
---|
| 286 | key_col='col_1', |
---|
| 287 | data_col='col_A' |
---|
| 288 | ) |
---|
| 289 | |
---|
| 290 | |
---|
| 291 | def test_different_num_rows(self): |
---|
| 292 | """Test merging two CSV files with different number of rows.""" |
---|
| 293 | |
---|
| 294 | # get data from file [1] |
---|
| 295 | fd = open(self.filenames[1], 'r') |
---|
| 296 | data = fd.readlines() |
---|
| 297 | fd.close() |
---|
| 298 | |
---|
| 299 | # delete a row in data and write to test file |
---|
| 300 | test_filename = 'my_test.csv' |
---|
| 301 | fd = open(test_filename, 'w') |
---|
| 302 | fd.write(''.join(data[0:-1])) |
---|
| 303 | fd.close() |
---|
| 304 | |
---|
| 305 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 306 | (test_filename, 'test2')] |
---|
| 307 | self.failUnlessRaises(Exception, |
---|
[7103] | 308 | csv_tools.merge_csv_key_values, |
---|
[7100] | 309 | file_title_list, |
---|
| 310 | self.OUTPUT_FILE, |
---|
| 311 | key_col='col_1', |
---|
| 312 | data_col='col_A' |
---|
| 313 | ) |
---|
| 314 | |
---|
| 315 | try: |
---|
| 316 | os.remove(test_filename) |
---|
| 317 | except: |
---|
| 318 | pass |
---|
| 319 | |
---|
| 320 | |
---|
| 321 | def test_different_key_values(self): |
---|
| 322 | """Test merging two CSV files with different key values.""" |
---|
| 323 | |
---|
| 324 | # get data from file [1] |
---|
| 325 | fd = open(self.filenames[1], 'r') |
---|
| 326 | data = fd.readlines() |
---|
| 327 | fd.close() |
---|
| 328 | |
---|
| 329 | # chnage a row key value in data and write to test file |
---|
| 330 | test_filename = 'my_test.csv' |
---|
| 331 | fd = open(test_filename, 'w') |
---|
| 332 | data[3] = '1' + data[3] |
---|
| 333 | fd.write(''.join(data)) |
---|
| 334 | fd.close() |
---|
| 335 | |
---|
| 336 | file_title_list = [(self.filenames[0], 'test0'), |
---|
| 337 | (test_filename, 'test2')] |
---|
| 338 | self.failUnlessRaises(Exception, |
---|
[7103] | 339 | csv_tools.merge_csv_key_values, |
---|
[7100] | 340 | file_title_list, |
---|
| 341 | self.OUTPUT_FILE, |
---|
| 342 | key_col='col_1', |
---|
| 343 | data_col='col_A' |
---|
| 344 | ) |
---|
| 345 | |
---|
| 346 | try: |
---|
| 347 | os.remove(test_filename) |
---|
| 348 | except: |
---|
| 349 | pass |
---|
| 350 | |
---|
| 351 | |
---|
[7111] | 352 | def test_latex_example(self): |
---|
| 353 | """Test merging two CSV files - example from latex doc.""" |
---|
| 354 | |
---|
| 355 | fd = open('alpha.csv', 'w') |
---|
| 356 | csv_fd = csv.writer(fd) |
---|
| 357 | csv_fd.writerow(['time', 'hours', 'stage', 'depth']) |
---|
| 358 | csv_fd.writerow(['3600', '1.00', '100.3', '10.2']) |
---|
| 359 | csv_fd.writerow(['3636', '1.01', '100.3', '10.0']) |
---|
| 360 | csv_fd.writerow(['3672', '1.02', '100.3', '9.7']) |
---|
| 361 | csv_fd.writerow(['3708', '1.03', '100.3', '8.9']) |
---|
| 362 | csv_fd.writerow(['3744', '1.04', '100.3', '7.1']) |
---|
| 363 | fd.close() |
---|
| 364 | |
---|
| 365 | fd = open('beta.csv', 'w') |
---|
| 366 | csv_fd = csv.writer(fd) |
---|
| 367 | csv_fd.writerow(['time', 'hours', 'stage', 'depth']) |
---|
| 368 | csv_fd.writerow(['3600', '1.00', '100.3', '11.3']) |
---|
| 369 | csv_fd.writerow(['3636', '1.01', '100.3', '10.5']) |
---|
| 370 | csv_fd.writerow(['3672', '1.02', '100.3', '10.0']) |
---|
| 371 | csv_fd.writerow(['3708', '1.03', '100.3', '9.7']) |
---|
| 372 | csv_fd.writerow(['3744', '1.04', '100.3', '8.2']) |
---|
| 373 | fd.close() |
---|
| 374 | |
---|
| 375 | file_title_list = [('alpha.csv', 'alpha'), |
---|
| 376 | ('beta.csv', 'beta')] |
---|
| 377 | csv_tools.merge_csv_key_values(file_title_list, |
---|
| 378 | 'gamma.csv', |
---|
| 379 | key_col='hours', |
---|
| 380 | data_col='depth') |
---|
| 381 | |
---|
| 382 | expected = '''hours,alpha,beta |
---|
| 383 | 1.00,10.2,11.3 |
---|
| 384 | 1.01,10.0,10.5 |
---|
| 385 | 1.02,9.7,10.0 |
---|
| 386 | 1.03,8.9,9.7 |
---|
| 387 | 1.04,7.1,8.2 |
---|
| 388 | ''' |
---|
| 389 | |
---|
| 390 | got = self.get_file_contents('gamma.csv') |
---|
| 391 | msg = ('Merging two files,\n' |
---|
| 392 | 'expected file=\n' |
---|
| 393 | '--------------------\n' |
---|
| 394 | '%s' |
---|
| 395 | '--------------------\n' |
---|
| 396 | 'got file=\n' |
---|
| 397 | '--------------------\n' |
---|
| 398 | '%s' |
---|
| 399 | '--------------------\n' |
---|
| 400 | % (expected, got)) |
---|
| 401 | self.failUnless(self.str_cmp(got, expected), msg) |
---|
| 402 | |
---|
| 403 | try: |
---|
| 404 | os.remove('alpha.csv') |
---|
| 405 | os.remove('beta.csv') |
---|
| 406 | os.remove('gamma.csv') |
---|
| 407 | except: |
---|
| 408 | pass |
---|
| 409 | |
---|
| 410 | |
---|
[7100] | 411 | def str_cmp(self, str1, str2): |
---|
| 412 | '''Compare 2 strings, removing end-of-line stuff first.''' |
---|
| 413 | |
---|
| 414 | s1 = str1.split('\n') |
---|
| 415 | s2 = str2.split('\n') |
---|
| 416 | for (sub1, sub2) in zip(s1, s2): |
---|
| 417 | if sub1 != sub2: |
---|
| 418 | return False |
---|
| 419 | return True |
---|
| 420 | |
---|
| 421 | |
---|
| 422 | def get_file_contents(self, filename): |
---|
| 423 | '''Return file contents as a string.''' |
---|
| 424 | |
---|
| 425 | fd = open(filename, 'r') |
---|
| 426 | data = fd.readlines() |
---|
| 427 | fd.close() |
---|
| 428 | return ''.join(data).replace('\r', '') |
---|
| 429 | |
---|
| 430 | ################################################################################ |
---|
| 431 | |
---|
| 432 | if __name__ == "__main__": |
---|
| 433 | suite = unittest.makeSuite(Test_CSV_utils, 'test') |
---|
| 434 | runner = unittest.TextTestRunner() |
---|
| 435 | runner.run(suite) |
---|