[18] | 1 | """Cross tabulate old and large files on specified file system |
---|
| 2 | |
---|
| 3 | Usage |
---|
| 4 | |
---|
[32] | 5 | python diskreport.py /d/cit/7 |
---|
| 6 | |
---|
| 7 | |
---|
| 8 | will generate things like |
---|
| 9 | |
---|
| 10 | find . -xdev -type f -atime +365 -exec ls -la {} \; | gawk '{printf("%s %s %s\n", $3x, $5, $9)}' | sort -k 1,1 -k2,2nr |
---|
| 11 | |
---|
[18] | 12 | """ |
---|
| 13 | |
---|
[28] | 14 | |
---|
| 15 | #Constants |
---|
| 16 | days = 365 |
---|
| 17 | filesize = 1000000 #One megabyte |
---|
| 18 | |
---|
| 19 | |
---|
| 20 | |
---|
[26] | 21 | #Useful Unix commands |
---|
| 22 | ls_cmd = '-exec ls -la {} \\;' |
---|
| 23 | filter_cmd = 'gawk \'{printf("%s %s %s\\n", $3x, $5, $9)}\' ' |
---|
[28] | 24 | sort_cmd = 'sort -k 1,1 -k2,2nr ' #Sort on name and reversely by size |
---|
[18] | 25 | |
---|
[21] | 26 | |
---|
[26] | 27 | def make_filename(s): |
---|
| 28 | """Transform argument string into a suitable filename |
---|
| 29 | """ |
---|
[21] | 30 | |
---|
[26] | 31 | s = s.strip() |
---|
| 32 | s = s.replace(' ', '_') |
---|
| 33 | #s = s.replace('(', '{') |
---|
| 34 | #s = s.replace(')', '}') |
---|
| 35 | s = s.replace('(', '') |
---|
| 36 | s = s.replace(')', '') |
---|
| 37 | s = s.replace('__', '_') |
---|
| 38 | s = s.replace('/', '_') |
---|
[21] | 39 | |
---|
[26] | 40 | return s |
---|
[21] | 41 | |
---|
[24] | 42 | |
---|
[26] | 43 | def diskstat(findcommand, outfile, headline): |
---|
[24] | 44 | |
---|
[26] | 45 | import time |
---|
[24] | 46 | |
---|
[26] | 47 | tempfile = '/tmp/diskreport_' + str(time.time()) |
---|
[24] | 48 | |
---|
[26] | 49 | #Find files |
---|
| 50 | cmd = findcommand |
---|
[28] | 51 | cmd += ls_cmd + ' | ' + filter_cmd + ' | ' + sort_cmd |
---|
[26] | 52 | cmd += ' > %s 2>/dev/null' %tempfile #Redirect |
---|
[32] | 53 | print cmd |
---|
[26] | 54 | os.system(cmd) |
---|
[21] | 55 | |
---|
[26] | 56 | #sort_and_clean(tempfile) |
---|
[18] | 57 | |
---|
[26] | 58 | #Organise them by name (with totals) |
---|
| 59 | fid = open(tempfile) |
---|
| 60 | lines = fid.readlines() |
---|
| 61 | fid.close() |
---|
[18] | 62 | |
---|
[26] | 63 | D = {} |
---|
| 64 | for line in lines: |
---|
| 65 | fields = line.strip().split() |
---|
[18] | 66 | |
---|
[26] | 67 | username = fields[0] |
---|
| 68 | size = long(fields[1]) |
---|
| 69 | filename = fields[2] |
---|
| 70 | if not D.has_key(username): |
---|
| 71 | D[username] = [] |
---|
| 72 | D[username].append( [filename, size] ) |
---|
[18] | 73 | |
---|
[26] | 74 | #Find totals per user |
---|
| 75 | grand_total = 0 |
---|
| 76 | totals = {} |
---|
| 77 | for username in D: |
---|
| 78 | total = 0 |
---|
| 79 | for filename, size in D[username]: |
---|
| 80 | total += size |
---|
[18] | 81 | |
---|
[26] | 82 | totals[username] = total |
---|
| 83 | grand_total += total |
---|
[21] | 84 | |
---|
[18] | 85 | |
---|
[26] | 86 | #Sort users by diskusage |
---|
| 87 | S = [(totals[username], username) for username in totals] #List comprehension |
---|
| 88 | S.sort() |
---|
| 89 | S.reverse() |
---|
| 90 | users = [x[1] for x in S] #Users by diskusage |
---|
[18] | 91 | |
---|
| 92 | |
---|
[26] | 93 | #Write final result to disk |
---|
| 94 | fid = open(outfile, 'w') |
---|
[18] | 95 | |
---|
[26] | 96 | print |
---|
| 97 | txt = headline |
---|
[28] | 98 | txt += '(%.3f MB):' %(grand_total/1.0e6) |
---|
[26] | 99 | fid.write(txt + '\n') |
---|
| 100 | print '--------------------------------------------------' |
---|
| 101 | print txt |
---|
| 102 | print '--------------------------------------------------' |
---|
| 103 | |
---|
[18] | 104 | |
---|
[26] | 105 | fid.write('\n') |
---|
| 106 | for username in users: |
---|
| 107 | fid.write('------------------------------------------------------\n') |
---|
[28] | 108 | txt = '%s (%.3f MB)' %(username, totals[username]/1.0e6) |
---|
[26] | 109 | fid.write(txt + '\n') |
---|
| 110 | fid.write('------------------------------------------------------\n') |
---|
| 111 | print txt |
---|
[21] | 112 | |
---|
[26] | 113 | for filename, size in D[username]: |
---|
[28] | 114 | fid.write(' %s (%.3f MB)\n' %(filename, size/1.0e6)) |
---|
[26] | 115 | fid.write('\n') |
---|
[21] | 116 | |
---|
[18] | 117 | |
---|
[26] | 118 | fid.close() |
---|
[18] | 119 | |
---|
| 120 | |
---|
| 121 | |
---|
| 122 | |
---|
[24] | 123 | |
---|
[26] | 124 | import os, time, sys |
---|
[24] | 125 | |
---|
[18] | 126 | |
---|
[26] | 127 | if len(sys.argv) > 1: |
---|
| 128 | dir = sys.argv[1] |
---|
| 129 | else: |
---|
| 130 | dir = '.' |
---|
| 131 | |
---|
[24] | 132 | |
---|
[26] | 133 | print 'DISKREPORT FOR DIRECTORY %s' %dir |
---|
[18] | 134 | |
---|
[26] | 135 | # Find the largest directories |
---|
[27] | 136 | topname = 'top50_%s.txt' %(make_filename(dir)) |
---|
[26] | 137 | txt = 'Top 50 largest directories on %s (stated in kilo bytes)' %dir |
---|
[27] | 138 | os.system('echo "%s" > %s' %(txt, topname)) |
---|
[28] | 139 | |
---|
[29] | 140 | #Divide into two (had problems with broken pipe) |
---|
| 141 | cmd = 'du -k "%s" | sort -nr | head -50 >> %s' %(dir, topname) |
---|
[18] | 142 | os.system(cmd) |
---|
[29] | 143 | #cmd = 'du -k "%s" | sort -nr >> %s' %(dir, topname) |
---|
| 144 | #os.system(cmd) |
---|
| 145 | #cmd = 'cat %s | head -50 > %s' %(topname, topname) |
---|
| 146 | #os.system(cmd) |
---|
[18] | 147 | |
---|
| 148 | |
---|
[28] | 149 | |
---|
[26] | 150 | ################################################################# |
---|
| 151 | # Find old files |
---|
| 152 | ################################################################# |
---|
| 153 | diskstat('find %s -xdev -type f -atime +%d ' %(dir, days), |
---|
| 154 | 'oldfiles_%s.txt' %make_filename(dir), |
---|
| 155 | 'Statistics for files on disk %s that haven\'t been accessed for at least %d days '\ |
---|
| 156 | %(dir, days)) |
---|
[18] | 157 | |
---|
[28] | 158 | |
---|
| 159 | |
---|
[26] | 160 | ################################################################# |
---|
[28] | 161 | # Find all files |
---|
| 162 | ################################################################# |
---|
| 163 | diskstat('find %s -xdev -type f ' %(dir), |
---|
| 164 | 'allfiles_%s.txt' %make_filename(dir), |
---|
| 165 | 'Statistics for all files on disk %s '\ |
---|
| 166 | %(dir)) |
---|
| 167 | |
---|
| 168 | |
---|
| 169 | |
---|
| 170 | import sys; sys.exit() |
---|
| 171 | ################################################################# |
---|
[26] | 172 | # Find large files |
---|
| 173 | ################################################################# |
---|
| 174 | diskstat('find %s -xdev -type f -size +%dc ' %(dir, filesize), |
---|
| 175 | 'bigfiles_%s.txt' %make_filename(dir), |
---|
[28] | 176 | 'Statistics for files on disk %s that are larger than %.3f MB '\ |
---|
[26] | 177 | %(dir, filesize/1.0e6)) |
---|
[18] | 178 | |
---|
[21] | 179 | |
---|
[26] | 180 | ################################################################# |
---|
| 181 | # Find old and large files |
---|
| 182 | ################################################################# |
---|
| 183 | diskstat('find %s -xdev -type f -atime +%d -size +%dc '\ |
---|
| 184 | %(dir, days, filesize), |
---|
| 185 | 'oldbigfiles_%s.txt' %make_filename(dir), |
---|
[28] | 186 | 'Statistics for files on disk %s that haven\'t been accessed for at least %d days and that are larger than %.3f kB '\ |
---|
[26] | 187 | %(dir, days, filesize/1.0e6)) |
---|
[24] | 188 | |
---|
[26] | 189 | |
---|
[21] | 190 | |
---|
| 191 | |
---|