Changeset 4963
- Timestamp:
- Jan 21, 2008, 6:58:15 PM (17 years ago)
- Location:
- anuga_core/source/anuga/utilities
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
anuga_core/source/anuga/utilities/data_audit.py
r4954 r4963 5 5 from os.path import join, splitext 6 6 7 from anuga.utilities.xml_tools import parse, print_tree, get_elements, get_text 7 from anuga.utilities.xml_tools import parse, pretty_print_tree, get_elements, get_text 8 from anuga.utilities.system_tools import compute_checksum 8 9 9 10 # Audit exceptions 10 11 class NotPublishable(Exception): pass 12 class FilenameMismatch(Exception): pass 13 class CRCMismatch(Exception): pass 11 14 class Invalid(Exception): pass 12 15 class WrongTags(Exception): pass 13 16 17 audit_exceptions = (NotPublishable, FilenameMismatch, CRCMismatch, Invalid, WrongTags) 14 18 15 19 def IP_verified(directory): … … 42 46 all_files_accounted_for = True 43 47 for dirpath, datafile in identify_datafiles(directory): 44 print join(dirpath, datafile) + ': ', 48 filename = join(dirpath, datafile) 49 50 print filename + ' (Checksum=%s): ' %str(compute_checksum(filename)), 45 51 46 52 basename, ext = splitext(datafile) … … 53 59 all_files_accounted_for = False 54 60 else: 55 if license_file_is_valid(fid): 61 try: 62 license_file_is_valid(fid, dirpath, verbose=False) 63 except audit_exceptions, e: 64 all_files_accounted_for = False 65 print 'LICENSE FILE NOT VALID' 66 print 'REASON:', e 67 68 #doc = parse(fid) 69 #pretty_print_tree(doc) 70 fid.seek(0) 71 print fid.read() 72 73 else: 56 74 print 'OK' 57 else: 58 print 'LICENSE FILE NOT VALID' 59 all_files_accounted_for = False 75 60 76 fid.close() 61 77 … … 115 131 116 132 117 def license_file_is_valid(fid ):133 def license_file_is_valid(fid, dirpath, verbose=False): 118 134 """Check that XML license file is valid 119 135 """ 120 136 137 license_filename = fid.name 121 138 doc = parse(fid) 122 139 #print_tree(doc) … … 127 144 128 145 if doc.nodeName != '#document': 129 msg = 'License file %s does not appear' % fid.name146 msg = 'License file %s does not appear' %license_filename 130 147 msg += 'to be a valid XML document' 131 148 msg += 'The root node has name %s' %doc.nodeName … … 133 150 raise Invalid, msg 134 151 135 if len(doc.childNodes) != 2: 136 msg = 'License file %s must have two elements' %fid.name 137 msg += ' at the root level. They are\n ' 138 msg += '<?xml version="1.0" encoding="iso-8859-1"?>\n' 152 if len(doc.childNodes) != 1: 153 msg = 'License file %s must have only one element' %license_filename 154 msg += ' at the root level. It is\n ' 139 155 msg += '<ga_license_file>' 140 156 raise Invalid, msg … … 142 158 143 159 # Start looking at document in earnest 144 root_node = doc.childNodes[ 1]160 root_node = doc.childNodes[0] 145 161 if root_node.nodeName != 'ga_license_file': 146 msg = 'License file %s must have two elements' % fid.name162 msg = 'License file %s must have two elements' %license_filename 147 163 msg += ' at the root level. They are\n ' 148 164 msg += '<?xml version="1.0" encoding="iso-8859-1"?>\n' … … 168 184 raise WrongTags, msg 169 185 170 print186 if verbose: print 171 187 # Extract information for source section 172 188 for node in get_elements(elements[0].childNodes): 173 189 if node.nodeName == 'author': 174 190 # Do something 175 print 'Author is', get_text(node.childNodes)191 if verbose: print 'Author: ', get_text(node.childNodes) 176 192 177 193 if node.nodeName == 'svn_keywords': … … 181 197 # Extract information for datafile sections 182 198 for datanode in elements[1:]: 183 print199 if verbose: print 184 200 185 201 for node in get_elements(datanode.childNodes): … … 189 205 if node.nodeName == 'filename': 190 206 # FIXME Check correctness 191 print 'Filename is "%s"' %get_text(node.childNodes) 207 filename = join(dirpath, get_text(node.childNodes)) 208 if verbose: print 'Filename: "%s"' %filename 209 try: 210 fid = open(filename, 'r') 211 except: 212 msg = 'Specified filename %s could not be opened'\ 213 %filename 214 raise FilenameMismatch, msg 215 216 if node.nodeName == 'checksum': 217 # FIXME (Ole): This relies on crc being preceded by filename 218 reported_crc = get_text(node.childNodes) 219 if verbose: print 'Checksum: "%s"' %reported_crc 220 221 file_crc = str(compute_checksum(filename)) 222 223 if reported_crc != file_crc: 224 msg = 'Bad checksum (CRC).\n' 225 msg += ' The CRC reported in license file "%s" is "%s"\n'\ 226 %(license_filename, reported_crc) 227 msg += ' The CRC computed from file "%s" is "%s"'\ 228 %(filename, file_crc) 229 raise CRCMismatch, msg 230 192 231 193 232 if node.nodeName == 'accountable': 194 print 'Accountable is "%s"' %get_text(node.childNodes) 233 accountable = get_text(node.childNodes) 234 if verbose: print 'Accountable: "%s"' %accountable 235 if accountable == "": 236 msg = 'No accountable person specified' 237 raise Exception, msg 195 238 196 239 if node.nodeName == 'source': 197 print 'Source is "%s"' %get_text(node.childNodes) 240 source = get_text(node.childNodes) 241 if verbose: print 'Source: "%s"' %source 242 if source == "": 243 msg = 'No source specified' 244 raise Exception, msg 198 245 199 246 if node.nodeName == 'IP_owner': 200 print 'IP owner is "%s"' %get_text(node.childNodes) 247 ip_owner = get_text(node.childNodes) 248 if verbose: print 'IP owner: "%s"' %ip_owner 249 if ip_owner == "": 250 msg = 'No IP owner specified' 251 raise Exception, msg 252 201 253 202 254 if node.nodeName == 'IP_info': 203 print 'IP info is "%s"' %get_text(node.childNodes)255 if verbose: print 'IP info: "%s"' %get_text(node.childNodes) 204 256 205 257 206 258 if node.nodeName == 'publishable': 259 260 if verbose: print 'Publishable: %s' %fid.name 207 261 value = get_text(node.childNodes) 208 262 if value.upper() != 'YES': 209 263 msg = 'Data file %s is not flagged as publishable'\ 210 264 %fid.name 211 print msg 212 #raise NotPublishable, msg 213 else: 214 print 'Data file %s is flagged publishable' %fid.name 215 216 #FIXME (Ole): Use hash code for original datafile as an XML element 217 # USE CRC32 in zlib or hash 218 219 #for node in elements: 220 # print node 221 #print 222 223 224 225 # Check that file is deemed publishable 226 items = doc.getElementsByTagName('publishable') 227 for i in items: 228 print i 229 #i.getAttribute() 265 raise NotPublishable, msg 266 267 268 269 # If we get this far, the license file is OK 270 return True -
anuga_core/source/anuga/utilities/mainland_only.lic
r4954 r4963 1 1 <?xml version="1.0" encoding="iso-8859-1"?> 2 3 <!DOCTYPE ga_license_file [4 <!ELEMENT ga_license_file (source, datafile+)>5 <!ELEMENT metadata (author, svn_keywords)>6 <!ELEMENT svn_keywords (author, date, revision, url, id)>7 <!ELEMENT datafile (filename, publishable, accountable,8 owner, location, IP_info)>9 <!ELEMENT filename (#PCDATA)>10 <!ELEMENT publishable (#PCDATA)>11 <!ELEMENT accountable (#PCDATA)>12 <!ELEMENT source (#PCDATA)>13 <!ELEMENT IP_owner (#PCDATA)>14 <!ELEMENT IP_info (#PCDATA)>15 ]>16 2 17 3 <ga_license_file> … … 28 14 <datafile> 29 15 <filename>mainland_only.csv</filename> 16 <checksum>-1661725548</checksum> 30 17 <publishable>No</publishable> 31 18 <accountable>Jane Sexton</accountable> 32 19 <source>Unknown</source> 33 20 <IP_owner>Geoscience Australia</IP_owner> 34 <IP_info>This is a polygon tracing the coastline at Dampier WA. The origin and license issues are undecided</IP_info> 21 <IP_info>This is a polygon comprising easting and northing locations 22 tracing parts of the coastline at Dampier WA as well as a rectangular area inland. 23 This is used to specifically set the onshore initial condition in a tsunami scenario 24 and here, it is used with a unit test in test_polygon.py. 25 26 The coastline was derived from Maritime Boundaries which is a public dataset. However, 27 rumour has it that some of it was digitised from a Landgate supplied image. 28 29 The origin and license issues are still undecided</IP_info> 35 30 </datafile> 36 31 37 32 </ga_license_file> 38 -
anuga_core/source/anuga/utilities/parse.py
r4944 r4963 22 22 23 23 from data_audit import license_file_is_valid 24 license_file_is_valid(fid )24 license_file_is_valid(fid, '.') 25 25 26 26 -
anuga_core/source/anuga/utilities/system_tools.py
r4952 r4963 172 172 173 173 174 def compute_checksum(filename ):174 def compute_checksum(filename, max_length=2**20): 175 175 """Compute the CRC32 checksum for specified file 176 177 Optional parameter max_length sets the maximum number 178 of bytes used to limit time used with large files. 179 Default = 2**20 (1MB) 176 180 """ 177 181 178 import zlib179 #FIXME(Ole): Do we need to limit the size?180 182 #from zlib import crc32 183 from binascii import crc32 #(works as well) 184 181 185 fid = open(filename) 182 183 crcval = zlib.crc32(fid.read()) 184 186 crcval = crc32(fid.read(max_length)) 185 187 return crcval -
anuga_core/source/anuga/utilities/xml_tools.py
r4944 r4963 22 22 23 23 24 def pretty_print_tree(n, indent=0): 25 print n 26 24 27 def parse(fid): 25 28 """Parse XML file descriptor and return DOM object. … … 30 33 #doc = minidom.parse(fid, make_parser()) 31 34 35 fid.seek(0) 32 36 doc = minidom.parse(fid) 33 37 return doc
Note: See TracChangeset
for help on using the changeset viewer.