source: tools/pytools/diskreport.py @ 1507

Last change on this file since 1507 was 32, checked in by ole, 20 years ago
File size: 4.9 KB
Line 
1"""Cross tabulate old and large files on specified file system
2
3Usage
4
5python diskreport.py /d/cit/7
6
7
8will generate things like
9
10find . -xdev -type f -atime +365 -exec ls -la {} \; | gawk '{printf("%s %s %s\n", $3x, $5, $9)}'  | sort -k 1,1 -k2,2nr
11
12"""
13
14
15#Constants
16days = 365
17filesize = 1000000  #One megabyte
18
19
20
21#Useful Unix commands
22ls_cmd = '-exec ls -la {} \\;'
23filter_cmd = 'gawk \'{printf("%s %s %s\\n", $3x, $5, $9)}\' '
24sort_cmd = 'sort -k 1,1 -k2,2nr ' #Sort on name and reversely by size
25
26
27def make_filename(s):
28    """Transform argument string into a suitable filename
29    """
30
31    s = s.strip()
32    s = s.replace(' ', '_')
33    #s = s.replace('(', '{')
34    #s = s.replace(')', '}')
35    s = s.replace('(', '')
36    s = s.replace(')', '')
37    s = s.replace('__', '_')
38    s = s.replace('/', '_')                   
39       
40    return s
41
42
43def diskstat(findcommand, outfile, headline):
44
45    import time
46
47    tempfile = '/tmp/diskreport_' + str(time.time())
48
49    #Find files
50    cmd = findcommand
51    cmd += ls_cmd + ' | ' + filter_cmd + ' | ' + sort_cmd
52    cmd += ' > %s 2>/dev/null' %tempfile #Redirect
53    print cmd
54    os.system(cmd)
55
56    #sort_and_clean(tempfile)
57
58    #Organise them by name (with totals)
59    fid = open(tempfile)
60    lines = fid.readlines()
61    fid.close()
62
63    D = {}
64    for line in lines:
65        fields = line.strip().split()
66
67        username = fields[0]
68        size = long(fields[1])
69        filename = fields[2]
70        if not D.has_key(username):
71            D[username] = []
72        D[username].append( [filename, size] )
73
74    #Find totals per user
75    grand_total = 0
76    totals = {}
77    for username in D:
78        total = 0
79        for filename, size in D[username]:
80            total += size
81
82        totals[username] = total
83        grand_total += total
84
85
86    #Sort users by diskusage
87    S = [(totals[username], username) for username in totals] #List comprehension   
88    S.sort()
89    S.reverse()
90    users = [x[1] for x in S] #Users by diskusage
91
92
93    #Write final result to disk
94    fid = open(outfile, 'w')
95
96    print
97    txt = headline
98    txt += '(%.3f MB):' %(grand_total/1.0e6)
99    fid.write(txt + '\n')
100    print '--------------------------------------------------'
101    print txt
102    print '--------------------------------------------------'   
103   
104
105    fid.write('\n')
106    for username in users:
107        fid.write('------------------------------------------------------\n')
108        txt = '%s (%.3f MB)' %(username, totals[username]/1.0e6)
109        fid.write(txt + '\n')
110        fid.write('------------------------------------------------------\n')
111        print txt
112
113        for filename, size in D[username]:
114            fid.write('    %s (%.3f MB)\n' %(filename, size/1.0e6))
115        fid.write('\n')   
116
117
118    fid.close()
119
120
121
122
123
124import os, time, sys
125
126
127if len(sys.argv) > 1:
128    dir = sys.argv[1]
129else:   
130    dir = '.'
131   
132
133print 'DISKREPORT FOR DIRECTORY %s' %dir
134
135# Find the largest directories
136topname = 'top50_%s.txt' %(make_filename(dir))
137txt = 'Top 50 largest directories on %s (stated in kilo bytes)' %dir
138os.system('echo "%s" > %s' %(txt, topname))
139
140#Divide into two (had problems with broken pipe)
141cmd = 'du -k "%s" | sort -nr | head -50 >> %s' %(dir, topname)
142os.system(cmd)
143#cmd = 'du -k "%s" | sort -nr >> %s' %(dir, topname)
144#os.system(cmd)
145#cmd = 'cat %s | head -50 > %s' %(topname, topname)
146#os.system(cmd)
147
148
149
150#################################################################
151# Find old files
152#################################################################
153diskstat('find %s -xdev -type f -atime +%d ' %(dir, days),
154         'oldfiles_%s.txt' %make_filename(dir),
155         'Statistics for files on disk %s that haven\'t been accessed for at least %d days '\
156         %(dir, days))
157
158
159
160#################################################################
161# Find all files
162#################################################################
163diskstat('find %s -xdev -type f ' %(dir),
164         'allfiles_%s.txt' %make_filename(dir),         
165         'Statistics for all files on disk %s '\
166         %(dir))
167
168
169
170import sys; sys.exit()         
171#################################################################
172# Find large files
173#################################################################
174diskstat('find %s -xdev -type f -size +%dc ' %(dir, filesize),
175         'bigfiles_%s.txt' %make_filename(dir),         
176         'Statistics for files on disk %s that are larger than %.3f MB '\
177         %(dir, filesize/1.0e6))
178
179
180#################################################################
181# Find old and large files
182#################################################################
183diskstat('find %s -xdev -type f -atime +%d -size +%dc '\
184         %(dir, days, filesize),
185         'oldbigfiles_%s.txt' %make_filename(dir),
186         'Statistics for files on disk %s that haven\'t been accessed for at least %d days and that are larger than %.3f kB '\
187         %(dir, days, filesize/1.0e6))
188
189         
190
191
Note: See TracBrowser for help on using the repository browser.