source: tools/pytools/diskreport.py @ 1507

Last change on this file since 1507 was 32, checked in by ole, 20 years ago
File size: 4.9 KB
RevLine 
[18]1"""Cross tabulate old and large files on specified file system
2
3Usage
4
[32]5python diskreport.py /d/cit/7
6
7
8will generate things like
9
10find . -xdev -type f -atime +365 -exec ls -la {} \; | gawk '{printf("%s %s %s\n", $3x, $5, $9)}'  | sort -k 1,1 -k2,2nr
11
[18]12"""
13
[28]14
15#Constants
16days = 365
17filesize = 1000000  #One megabyte
18
19
20
[26]21#Useful Unix commands
22ls_cmd = '-exec ls -la {} \\;'
23filter_cmd = 'gawk \'{printf("%s %s %s\\n", $3x, $5, $9)}\' '
[28]24sort_cmd = 'sort -k 1,1 -k2,2nr ' #Sort on name and reversely by size
[18]25
[21]26
[26]27def make_filename(s):
28    """Transform argument string into a suitable filename
29    """
[21]30
[26]31    s = s.strip()
32    s = s.replace(' ', '_')
33    #s = s.replace('(', '{')
34    #s = s.replace(')', '}')
35    s = s.replace('(', '')
36    s = s.replace(')', '')
37    s = s.replace('__', '_')
38    s = s.replace('/', '_')                   
[21]39       
[26]40    return s
[21]41
[24]42
[26]43def diskstat(findcommand, outfile, headline):
[24]44
[26]45    import time
[24]46
[26]47    tempfile = '/tmp/diskreport_' + str(time.time())
[24]48
[26]49    #Find files
50    cmd = findcommand
[28]51    cmd += ls_cmd + ' | ' + filter_cmd + ' | ' + sort_cmd
[26]52    cmd += ' > %s 2>/dev/null' %tempfile #Redirect
[32]53    print cmd
[26]54    os.system(cmd)
[21]55
[26]56    #sort_and_clean(tempfile)
[18]57
[26]58    #Organise them by name (with totals)
59    fid = open(tempfile)
60    lines = fid.readlines()
61    fid.close()
[18]62
[26]63    D = {}
64    for line in lines:
65        fields = line.strip().split()
[18]66
[26]67        username = fields[0]
68        size = long(fields[1])
69        filename = fields[2]
70        if not D.has_key(username):
71            D[username] = []
72        D[username].append( [filename, size] )
[18]73
[26]74    #Find totals per user
75    grand_total = 0
76    totals = {}
77    for username in D:
78        total = 0
79        for filename, size in D[username]:
80            total += size
[18]81
[26]82        totals[username] = total
83        grand_total += total
[21]84
[18]85
[26]86    #Sort users by diskusage
87    S = [(totals[username], username) for username in totals] #List comprehension   
88    S.sort()
89    S.reverse()
90    users = [x[1] for x in S] #Users by diskusage
[18]91
92
[26]93    #Write final result to disk
94    fid = open(outfile, 'w')
[18]95
[26]96    print
97    txt = headline
[28]98    txt += '(%.3f MB):' %(grand_total/1.0e6)
[26]99    fid.write(txt + '\n')
100    print '--------------------------------------------------'
101    print txt
102    print '--------------------------------------------------'   
103   
[18]104
[26]105    fid.write('\n')
106    for username in users:
107        fid.write('------------------------------------------------------\n')
[28]108        txt = '%s (%.3f MB)' %(username, totals[username]/1.0e6)
[26]109        fid.write(txt + '\n')
110        fid.write('------------------------------------------------------\n')
111        print txt
[21]112
[26]113        for filename, size in D[username]:
[28]114            fid.write('    %s (%.3f MB)\n' %(filename, size/1.0e6))
[26]115        fid.write('\n')   
[21]116
[18]117
[26]118    fid.close()
[18]119
120
121
122
[24]123
[26]124import os, time, sys
[24]125
[18]126
[26]127if len(sys.argv) > 1:
128    dir = sys.argv[1]
129else:   
130    dir = '.'
131   
[24]132
[26]133print 'DISKREPORT FOR DIRECTORY %s' %dir
[18]134
[26]135# Find the largest directories
[27]136topname = 'top50_%s.txt' %(make_filename(dir))
[26]137txt = 'Top 50 largest directories on %s (stated in kilo bytes)' %dir
[27]138os.system('echo "%s" > %s' %(txt, topname))
[28]139
[29]140#Divide into two (had problems with broken pipe)
141cmd = 'du -k "%s" | sort -nr | head -50 >> %s' %(dir, topname)
[18]142os.system(cmd)
[29]143#cmd = 'du -k "%s" | sort -nr >> %s' %(dir, topname)
144#os.system(cmd)
145#cmd = 'cat %s | head -50 > %s' %(topname, topname)
146#os.system(cmd)
[18]147
148
[28]149
[26]150#################################################################
151# Find old files
152#################################################################
153diskstat('find %s -xdev -type f -atime +%d ' %(dir, days),
154         'oldfiles_%s.txt' %make_filename(dir),
155         'Statistics for files on disk %s that haven\'t been accessed for at least %d days '\
156         %(dir, days))
[18]157
[28]158
159
[26]160#################################################################
[28]161# Find all files
162#################################################################
163diskstat('find %s -xdev -type f ' %(dir),
164         'allfiles_%s.txt' %make_filename(dir),         
165         'Statistics for all files on disk %s '\
166         %(dir))
167
168
169
170import sys; sys.exit()         
171#################################################################
[26]172# Find large files
173#################################################################
174diskstat('find %s -xdev -type f -size +%dc ' %(dir, filesize),
175         'bigfiles_%s.txt' %make_filename(dir),         
[28]176         'Statistics for files on disk %s that are larger than %.3f MB '\
[26]177         %(dir, filesize/1.0e6))
[18]178
[21]179
[26]180#################################################################
181# Find old and large files
182#################################################################
183diskstat('find %s -xdev -type f -atime +%d -size +%dc '\
184         %(dir, days, filesize),
185         'oldbigfiles_%s.txt' %make_filename(dir),
[28]186         'Statistics for files on disk %s that haven\'t been accessed for at least %d days and that are larger than %.3f kB '\
[26]187         %(dir, days, filesize/1.0e6))
[24]188
[26]189         
[21]190
191
Note: See TracBrowser for help on using the repository browser.