1 | """ Stress test GA flood database |
---|
2 | |
---|
3 | This script tests that the flood database is live. |
---|
4 | Then proceeds to crawl through the site based on a html file containing 1005 level 1 links. Detail links below are followed automatically. |
---|
5 | |
---|
6 | If the database works, this test should complete with 3715 Detail links |
---|
7 | responding succesfully |
---|
8 | |
---|
9 | |
---|
10 | Ole Nielsen, RAMP 2006 |
---|
11 | x9048 |
---|
12 | """ |
---|
13 | |
---|
14 | |
---|
15 | |
---|
16 | b_version = False |
---|
17 | |
---|
18 | import urllib |
---|
19 | import string |
---|
20 | #from caching import cache |
---|
21 | |
---|
22 | if b_version is True: |
---|
23 | base_url = 'http://www-b.ga.gov.au/oracle/flood/' |
---|
24 | servlet_base_url = 'http://www-b.ga.gov.au/' |
---|
25 | else: |
---|
26 | base_url = 'http://www.ga.gov.au/oracle/flood/' |
---|
27 | servlet_base_url = 'http://www.ga.gov.au/' |
---|
28 | |
---|
29 | |
---|
30 | def test_site(): |
---|
31 | """Test flood database |
---|
32 | """ |
---|
33 | |
---|
34 | top_url = 'flood_input.jsp' |
---|
35 | |
---|
36 | url = base_url+top_url |
---|
37 | live = False |
---|
38 | print 'Testing %s. ' %url, |
---|
39 | T = get_page(url) |
---|
40 | |
---|
41 | for line in T: |
---|
42 | #print line.strip() |
---|
43 | if line.startswith('<title>Geoscience Australia: Online Flood Search'): |
---|
44 | live = True |
---|
45 | break |
---|
46 | |
---|
47 | if live is True: |
---|
48 | print 'OK: Page is live' |
---|
49 | else: |
---|
50 | msg = 'Page %s is not live' %url |
---|
51 | raise msg |
---|
52 | |
---|
53 | print |
---|
54 | |
---|
55 | |
---|
56 | def test_database(filename): |
---|
57 | """Read list of studies from html and try them one by one |
---|
58 | """ |
---|
59 | |
---|
60 | print 'Reading %s' %filename |
---|
61 | fid = open(filename) |
---|
62 | |
---|
63 | total_detail = 0 |
---|
64 | for i, link in enumerate(get_individual_studies(fid.readlines())): |
---|
65 | url = base_url+htmlmap(link) |
---|
66 | |
---|
67 | live = False |
---|
68 | print 'Testing link %d: %s...' %(i, link[:72]), |
---|
69 | T = get_page(url) |
---|
70 | |
---|
71 | live = False |
---|
72 | for line in T: |
---|
73 | if line.startswith('<tr><th>Dataset Name</th>') or\ |
---|
74 | line.startswith('<h2>Study title only available.</h2>'): |
---|
75 | live = True |
---|
76 | break |
---|
77 | |
---|
78 | if live is True: |
---|
79 | print 'OK: Link %d is live' %i |
---|
80 | else: |
---|
81 | msg = 'FAIL: Link %d is not live: %s' %(i,url) |
---|
82 | raise msg |
---|
83 | |
---|
84 | |
---|
85 | # Secon tier links |
---|
86 | |
---|
87 | for j, link in enumerate(get_second_tier_links(T)): |
---|
88 | url = servlet_base_url+htmlmap(link) |
---|
89 | |
---|
90 | live = False |
---|
91 | print 'Testing detail %d: %s...' %(j, link[:80]), |
---|
92 | T = get_page(url) |
---|
93 | |
---|
94 | live = False |
---|
95 | for line in T: |
---|
96 | #print line.strip() |
---|
97 | if line.startswith('<tr><td><h3>Participants in '): |
---|
98 | live = True |
---|
99 | break |
---|
100 | |
---|
101 | if live is True: |
---|
102 | print 'OK: Detail link %d is live (total=%d)' %(j, total_detail) |
---|
103 | else: |
---|
104 | for line in T: |
---|
105 | print line.strip() |
---|
106 | msg = 'FAIL: Detail link %d is not live (total=%d)' %(j, total_detail) |
---|
107 | raise msg |
---|
108 | |
---|
109 | total_detail += 1 |
---|
110 | |
---|
111 | |
---|
112 | def get_second_tier_links(lines): |
---|
113 | """Scan html lines for flood details and yield |
---|
114 | """ |
---|
115 | for line in lines: |
---|
116 | |
---|
117 | index = line.find('<a href="/servlet/FloodDetailServlet?sno') |
---|
118 | if index >= 0: |
---|
119 | start_index = line.find('servlet', index) |
---|
120 | end_index = line.find('">', start_index) |
---|
121 | yield line[start_index:end_index] |
---|
122 | |
---|
123 | |
---|
124 | |
---|
125 | def get_individual_studies(lines): |
---|
126 | """Scan html lines for individual flood studies and yield links |
---|
127 | """ |
---|
128 | |
---|
129 | for line in lines: |
---|
130 | index = line.find('<a href="%sflood_infolist.jsp?sno' %base_url) |
---|
131 | if index >= 0: |
---|
132 | start_index = line.find('flood_infolist', index) |
---|
133 | end_index = line.find('">', start_index) |
---|
134 | |
---|
135 | yield line[start_index:end_index] |
---|
136 | |
---|
137 | |
---|
138 | def get_page(URL): |
---|
139 | """General routine for getting and caching URL pages |
---|
140 | """ |
---|
141 | |
---|
142 | F = urllib.urlopen(URL) |
---|
143 | T = F.readlines() |
---|
144 | return(T) |
---|
145 | |
---|
146 | |
---|
147 | |
---|
148 | def htmlmap(s): |
---|
149 | import types |
---|
150 | from string import replace, strip |
---|
151 | |
---|
152 | s = replace(s,' ','%20') |
---|
153 | return(strip(s)) |
---|
154 | |
---|
155 | |
---|
156 | test_site() |
---|
157 | |
---|
158 | |
---|
159 | if b_version is True: |
---|
160 | test_database('flood_studies_all_b.html') |
---|
161 | else: |
---|
162 | test_database('flood_studies_all.html') |
---|
163 | |
---|