1 | """Cross tabulate old and large files on specified file system |
---|
2 | |
---|
3 | Usage |
---|
4 | |
---|
5 | python diskreport.py /d/cit/7 |
---|
6 | |
---|
7 | |
---|
8 | will generate things like |
---|
9 | |
---|
10 | find . -xdev -type f -atime +365 -exec ls -la {} \; | gawk '{printf("%s %s %s\n", $3x, $5, $9)}' | sort -k 1,1 -k2,2nr |
---|
11 | |
---|
12 | """ |
---|
13 | |
---|
14 | |
---|
15 | #Constants |
---|
16 | days = 365 |
---|
17 | filesize = 1000000 #One megabyte |
---|
18 | |
---|
19 | |
---|
20 | |
---|
21 | #Useful Unix commands |
---|
22 | ls_cmd = '-exec ls -la {} \\;' |
---|
23 | filter_cmd = 'gawk \'{printf("%s %s %s\\n", $3x, $5, $9)}\' ' |
---|
24 | sort_cmd = 'sort -k 1,1 -k2,2nr ' #Sort on name and reversely by size |
---|
25 | |
---|
26 | |
---|
27 | def make_filename(s): |
---|
28 | """Transform argument string into a suitable filename |
---|
29 | """ |
---|
30 | |
---|
31 | s = s.strip() |
---|
32 | s = s.replace(' ', '_') |
---|
33 | #s = s.replace('(', '{') |
---|
34 | #s = s.replace(')', '}') |
---|
35 | s = s.replace('(', '') |
---|
36 | s = s.replace(')', '') |
---|
37 | s = s.replace('__', '_') |
---|
38 | s = s.replace('/', '_') |
---|
39 | |
---|
40 | return s |
---|
41 | |
---|
42 | |
---|
43 | def diskstat(findcommand, outfile, headline): |
---|
44 | |
---|
45 | import time |
---|
46 | |
---|
47 | tempfile = '/tmp/diskreport_' + str(time.time()) |
---|
48 | |
---|
49 | #Find files |
---|
50 | cmd = findcommand |
---|
51 | cmd += ls_cmd + ' | ' + filter_cmd + ' | ' + sort_cmd |
---|
52 | cmd += ' > %s 2>/dev/null' %tempfile #Redirect |
---|
53 | print cmd |
---|
54 | os.system(cmd) |
---|
55 | |
---|
56 | #sort_and_clean(tempfile) |
---|
57 | |
---|
58 | #Organise them by name (with totals) |
---|
59 | fid = open(tempfile) |
---|
60 | lines = fid.readlines() |
---|
61 | fid.close() |
---|
62 | |
---|
63 | D = {} |
---|
64 | for line in lines: |
---|
65 | fields = line.strip().split() |
---|
66 | |
---|
67 | username = fields[0] |
---|
68 | size = long(fields[1]) |
---|
69 | filename = fields[2] |
---|
70 | if not D.has_key(username): |
---|
71 | D[username] = [] |
---|
72 | D[username].append( [filename, size] ) |
---|
73 | |
---|
74 | #Find totals per user |
---|
75 | grand_total = 0 |
---|
76 | totals = {} |
---|
77 | for username in D: |
---|
78 | total = 0 |
---|
79 | for filename, size in D[username]: |
---|
80 | total += size |
---|
81 | |
---|
82 | totals[username] = total |
---|
83 | grand_total += total |
---|
84 | |
---|
85 | |
---|
86 | #Sort users by diskusage |
---|
87 | S = [(totals[username], username) for username in totals] #List comprehension |
---|
88 | S.sort() |
---|
89 | S.reverse() |
---|
90 | users = [x[1] for x in S] #Users by diskusage |
---|
91 | |
---|
92 | |
---|
93 | #Write final result to disk |
---|
94 | fid = open(outfile, 'w') |
---|
95 | |
---|
96 | print |
---|
97 | txt = headline |
---|
98 | txt += '(%.3f MB):' %(grand_total/1.0e6) |
---|
99 | fid.write(txt + '\n') |
---|
100 | print '--------------------------------------------------' |
---|
101 | print txt |
---|
102 | print '--------------------------------------------------' |
---|
103 | |
---|
104 | |
---|
105 | fid.write('\n') |
---|
106 | for username in users: |
---|
107 | fid.write('------------------------------------------------------\n') |
---|
108 | txt = '%s (%.3f MB)' %(username, totals[username]/1.0e6) |
---|
109 | fid.write(txt + '\n') |
---|
110 | fid.write('------------------------------------------------------\n') |
---|
111 | print txt |
---|
112 | |
---|
113 | for filename, size in D[username]: |
---|
114 | fid.write(' %s (%.3f MB)\n' %(filename, size/1.0e6)) |
---|
115 | fid.write('\n') |
---|
116 | |
---|
117 | |
---|
118 | fid.close() |
---|
119 | |
---|
120 | |
---|
121 | |
---|
122 | |
---|
123 | |
---|
124 | import os, time, sys |
---|
125 | |
---|
126 | |
---|
127 | if len(sys.argv) > 1: |
---|
128 | dir = sys.argv[1] |
---|
129 | else: |
---|
130 | dir = '.' |
---|
131 | |
---|
132 | |
---|
133 | print 'DISKREPORT FOR DIRECTORY %s' %dir |
---|
134 | |
---|
135 | # Find the largest directories |
---|
136 | topname = 'top50_%s.txt' %(make_filename(dir)) |
---|
137 | txt = 'Top 50 largest directories on %s (stated in kilo bytes)' %dir |
---|
138 | os.system('echo "%s" > %s' %(txt, topname)) |
---|
139 | |
---|
140 | #Divide into two (had problems with broken pipe) |
---|
141 | cmd = 'du -k "%s" | sort -nr | head -50 >> %s' %(dir, topname) |
---|
142 | os.system(cmd) |
---|
143 | #cmd = 'du -k "%s" | sort -nr >> %s' %(dir, topname) |
---|
144 | #os.system(cmd) |
---|
145 | #cmd = 'cat %s | head -50 > %s' %(topname, topname) |
---|
146 | #os.system(cmd) |
---|
147 | |
---|
148 | |
---|
149 | |
---|
150 | ################################################################# |
---|
151 | # Find old files |
---|
152 | ################################################################# |
---|
153 | diskstat('find %s -xdev -type f -atime +%d ' %(dir, days), |
---|
154 | 'oldfiles_%s.txt' %make_filename(dir), |
---|
155 | 'Statistics for files on disk %s that haven\'t been accessed for at least %d days '\ |
---|
156 | %(dir, days)) |
---|
157 | |
---|
158 | |
---|
159 | |
---|
160 | ################################################################# |
---|
161 | # Find all files |
---|
162 | ################################################################# |
---|
163 | diskstat('find %s -xdev -type f ' %(dir), |
---|
164 | 'allfiles_%s.txt' %make_filename(dir), |
---|
165 | 'Statistics for all files on disk %s '\ |
---|
166 | %(dir)) |
---|
167 | |
---|
168 | |
---|
169 | |
---|
170 | import sys; sys.exit() |
---|
171 | ################################################################# |
---|
172 | # Find large files |
---|
173 | ################################################################# |
---|
174 | diskstat('find %s -xdev -type f -size +%dc ' %(dir, filesize), |
---|
175 | 'bigfiles_%s.txt' %make_filename(dir), |
---|
176 | 'Statistics for files on disk %s that are larger than %.3f MB '\ |
---|
177 | %(dir, filesize/1.0e6)) |
---|
178 | |
---|
179 | |
---|
180 | ################################################################# |
---|
181 | # Find old and large files |
---|
182 | ################################################################# |
---|
183 | diskstat('find %s -xdev -type f -atime +%d -size +%dc '\ |
---|
184 | %(dir, days, filesize), |
---|
185 | 'oldbigfiles_%s.txt' %make_filename(dir), |
---|
186 | 'Statistics for files on disk %s that haven\'t been accessed for at least %d days and that are larger than %.3f kB '\ |
---|
187 | %(dir, days, filesize/1.0e6)) |
---|
188 | |
---|
189 | |
---|
190 | |
---|
191 | |
---|