1 | """Cross tabulate old and large files on specified file system |
---|
2 | |
---|
3 | Usage |
---|
4 | |
---|
5 | python diskreport.py /d/cit/7 |
---|
6 | """ |
---|
7 | |
---|
8 | |
---|
9 | #Constants |
---|
10 | days = 365 |
---|
11 | filesize = 1000000 #One megabyte |
---|
12 | |
---|
13 | |
---|
14 | |
---|
15 | #Useful Unix commands |
---|
16 | ls_cmd = '-exec ls -la {} \\;' |
---|
17 | filter_cmd = 'gawk \'{printf("%s %s %s\\n", $3x, $5, $9)}\' ' |
---|
18 | sort_cmd = 'sort -k 1,1 -k2,2nr ' #Sort on name and reversely by size |
---|
19 | |
---|
20 | |
---|
21 | def make_filename(s): |
---|
22 | """Transform argument string into a suitable filename |
---|
23 | """ |
---|
24 | |
---|
25 | s = s.strip() |
---|
26 | s = s.replace(' ', '_') |
---|
27 | #s = s.replace('(', '{') |
---|
28 | #s = s.replace(')', '}') |
---|
29 | s = s.replace('(', '') |
---|
30 | s = s.replace(')', '') |
---|
31 | s = s.replace('__', '_') |
---|
32 | s = s.replace('/', '_') |
---|
33 | |
---|
34 | return s |
---|
35 | |
---|
36 | |
---|
37 | def diskstat(findcommand, outfile, headline): |
---|
38 | |
---|
39 | import time |
---|
40 | |
---|
41 | tempfile = '/tmp/diskreport_' + str(time.time()) |
---|
42 | |
---|
43 | #Find files |
---|
44 | cmd = findcommand |
---|
45 | cmd += ls_cmd + ' | ' + filter_cmd + ' | ' + sort_cmd |
---|
46 | cmd += ' > %s 2>/dev/null' %tempfile #Redirect |
---|
47 | #print cmd |
---|
48 | os.system(cmd) |
---|
49 | |
---|
50 | #sort_and_clean(tempfile) |
---|
51 | |
---|
52 | #Organise them by name (with totals) |
---|
53 | fid = open(tempfile) |
---|
54 | lines = fid.readlines() |
---|
55 | fid.close() |
---|
56 | |
---|
57 | D = {} |
---|
58 | for line in lines: |
---|
59 | fields = line.strip().split() |
---|
60 | |
---|
61 | username = fields[0] |
---|
62 | size = long(fields[1]) |
---|
63 | filename = fields[2] |
---|
64 | if not D.has_key(username): |
---|
65 | D[username] = [] |
---|
66 | D[username].append( [filename, size] ) |
---|
67 | |
---|
68 | #Find totals per user |
---|
69 | grand_total = 0 |
---|
70 | totals = {} |
---|
71 | for username in D: |
---|
72 | total = 0 |
---|
73 | for filename, size in D[username]: |
---|
74 | total += size |
---|
75 | |
---|
76 | totals[username] = total |
---|
77 | grand_total += total |
---|
78 | |
---|
79 | |
---|
80 | #Sort users by diskusage |
---|
81 | S = [(totals[username], username) for username in totals] #List comprehension |
---|
82 | S.sort() |
---|
83 | S.reverse() |
---|
84 | users = [x[1] for x in S] #Users by diskusage |
---|
85 | |
---|
86 | |
---|
87 | #Write final result to disk |
---|
88 | fid = open(outfile, 'w') |
---|
89 | |
---|
90 | print |
---|
91 | txt = headline |
---|
92 | txt += '(%.3f MB):' %(grand_total/1.0e6) |
---|
93 | fid.write(txt + '\n') |
---|
94 | print '--------------------------------------------------' |
---|
95 | print txt |
---|
96 | print '--------------------------------------------------' |
---|
97 | |
---|
98 | |
---|
99 | fid.write('\n') |
---|
100 | for username in users: |
---|
101 | fid.write('------------------------------------------------------\n') |
---|
102 | txt = '%s (%.3f MB)' %(username, totals[username]/1.0e6) |
---|
103 | fid.write(txt + '\n') |
---|
104 | fid.write('------------------------------------------------------\n') |
---|
105 | print txt |
---|
106 | |
---|
107 | for filename, size in D[username]: |
---|
108 | fid.write(' %s (%.3f MB)\n' %(filename, size/1.0e6)) |
---|
109 | fid.write('\n') |
---|
110 | |
---|
111 | |
---|
112 | fid.close() |
---|
113 | |
---|
114 | |
---|
115 | |
---|
116 | |
---|
117 | |
---|
118 | import os, time, sys |
---|
119 | |
---|
120 | |
---|
121 | if len(sys.argv) > 1: |
---|
122 | dir = sys.argv[1] |
---|
123 | else: |
---|
124 | dir = '.' |
---|
125 | |
---|
126 | |
---|
127 | print 'DISKREPORT FOR DIRECTORY %s' %dir |
---|
128 | |
---|
129 | # Find the largest directories |
---|
130 | topname = 'top50_%s.txt' %(make_filename(dir)) |
---|
131 | txt = 'Top 50 largest directories on %s (stated in kilo bytes)' %dir |
---|
132 | os.system('echo "%s" > %s' %(txt, topname)) |
---|
133 | |
---|
134 | #Divide into two (had problems with broken pipe) |
---|
135 | cmd = 'du -k "%s" | sort -nr | head -50 >> %s' %(dir, topname) |
---|
136 | os.system(cmd) |
---|
137 | #cmd = 'du -k "%s" | sort -nr >> %s' %(dir, topname) |
---|
138 | #os.system(cmd) |
---|
139 | #cmd = 'cat %s | head -50 > %s' %(topname, topname) |
---|
140 | #os.system(cmd) |
---|
141 | |
---|
142 | |
---|
143 | |
---|
144 | ################################################################# |
---|
145 | # Find old files |
---|
146 | ################################################################# |
---|
147 | diskstat('find %s -xdev -type f -atime +%d ' %(dir, days), |
---|
148 | 'oldfiles_%s.txt' %make_filename(dir), |
---|
149 | 'Statistics for files on disk %s that haven\'t been accessed for at least %d days '\ |
---|
150 | %(dir, days)) |
---|
151 | |
---|
152 | |
---|
153 | |
---|
154 | ################################################################# |
---|
155 | # Find all files |
---|
156 | ################################################################# |
---|
157 | diskstat('find %s -xdev -type f ' %(dir), |
---|
158 | 'allfiles_%s.txt' %make_filename(dir), |
---|
159 | 'Statistics for all files on disk %s '\ |
---|
160 | %(dir)) |
---|
161 | |
---|
162 | |
---|
163 | |
---|
164 | import sys; sys.exit() |
---|
165 | ################################################################# |
---|
166 | # Find large files |
---|
167 | ################################################################# |
---|
168 | diskstat('find %s -xdev -type f -size +%dc ' %(dir, filesize), |
---|
169 | 'bigfiles_%s.txt' %make_filename(dir), |
---|
170 | 'Statistics for files on disk %s that are larger than %.3f MB '\ |
---|
171 | %(dir, filesize/1.0e6)) |
---|
172 | |
---|
173 | |
---|
174 | ################################################################# |
---|
175 | # Find old and large files |
---|
176 | ################################################################# |
---|
177 | diskstat('find %s -xdev -type f -atime +%d -size +%dc '\ |
---|
178 | %(dir, days, filesize), |
---|
179 | 'oldbigfiles_%s.txt' %make_filename(dir), |
---|
180 | 'Statistics for files on disk %s that haven\'t been accessed for at least %d days and that are larger than %.3f kB '\ |
---|
181 | %(dir, days, filesize/1.0e6)) |
---|
182 | |
---|
183 | |
---|
184 | |
---|
185 | |
---|