Ignore:
Timestamp:
Nov 19, 2004, 3:29:26 PM (19 years ago)
Author:
ole
Message:

Disk stats for unix (perlite@GA)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • tools/pytools/diskreport.py

    r25 r26  
    66"""
    77
     8#Useful Unix commands
     9ls_cmd = '-exec ls -la {} \\;'
     10filter_cmd = 'gawk \'{printf("%s %s %s\\n", $3x, $5, $9)}\' '
     11sortsize_cmd = 'sort -k 2,2 -nr '
     12sortname_cmd = 'sort -k 1,1 '
    813
    914
    10 def tally(FN, col=4):
     15def make_filename(s):
     16    """Transform argument string into a suitable filename
     17    """
    1118
    12     #Tally the size
    13     fid = open(FN)
     19    s = s.strip()
     20    s = s.replace(' ', '_')
     21    #s = s.replace('(', '{')
     22    #s = s.replace(')', '}')
     23    s = s.replace('(', '')
     24    s = s.replace(')', '')
     25    s = s.replace('__', '_')
     26    s = s.replace('/', '_')                   
     27       
     28    return s
     29
     30
     31def diskstat(findcommand, outfile, headline):
     32
     33    import time
     34
     35    tempfile = '/tmp/diskreport_' + str(time.time())
     36
     37    #Find files
     38    cmd = findcommand
     39    cmd += ls_cmd + ' | ' + filter_cmd + ' | ' +\
     40           sortsize_cmd + ' | ' + sortname_cmd
     41
     42    cmd += ' > %s 2>/dev/null' %tempfile #Redirect
     43    #print cmd
     44    os.system(cmd)
     45
     46    #sort_and_clean(tempfile)
     47
     48    #Organise them by name (with totals)
     49    fid = open(tempfile)
    1450    lines = fid.readlines()
    1551    fid.close()
    1652
    17     x = 0
    18     for line in lines[1:]:
    19         fields = line.split()
    20         try:
    21             x += float(fields[col])
    22         except:
    23             pass
    24        
    25     return x   
     53    D = {}
     54    for line in lines:
     55        fields = line.strip().split()
    2656
    27 def sort_and_clean(FN):
    28     fid = open(FN)
    29     lines = fid.readlines()
     57        username = fields[0]
     58        size = long(fields[1])
     59        filename = fields[2]
     60        if not D.has_key(username):
     61            D[username] = []
     62        D[username].append( [filename, size] )
     63
     64    #Find totals per user
     65    grand_total = 0
     66    totals = {}
     67    for username in D:
     68        total = 0
     69        for filename, size in D[username]:
     70            total += size
     71
     72        totals[username] = total
     73        grand_total += total
     74
     75
     76    #Sort users by diskusage
     77    S = [(totals[username], username) for username in totals] #List comprehension   
     78    S.sort()
     79    S.reverse()
     80    users = [x[1] for x in S] #Users by diskusage
     81
     82
     83    #Write final result to disk
     84    fid = open(outfile, 'w')
     85
     86    print
     87    txt = headline
     88    txt += '(%.2f MB):' %(grand_total/1.0e6)
     89    fid.write(txt + '\n')
     90    print '--------------------------------------------------'
     91    print txt
     92    print '--------------------------------------------------'   
     93   
     94
     95    fid.write('\n')
     96    for username in users:
     97        fid.write('------------------------------------------------------\n')
     98        txt = '%s (totalling %.2f MB)' %(username, totals[username]/1.0e6)
     99        fid.write(txt + '\n')
     100        fid.write('------------------------------------------------------\n')
     101        print txt
     102
     103        for filename, size in D[username]:
     104            fid.write('    %s (%.2f MB)\n' %(filename, size/1.0e6))
     105        fid.write('\n')   
     106
     107
    30108    fid.close()
    31109
    32     fid = open(FN, 'w')
    33110
    34     fid.write(lines[0]) #Header
    35111
    36     sizes = []
    37     A = []
    38     for line in lines[1:]:
    39         fields = line.split()
    40 
    41         try:
    42             x = float(fields[4])
    43         except:
    44             pass
    45             print fields
    46         else:
    47             sizes.append(x)
    48             A.append( fields )   
    49        
    50     # Schwartzian transform     
    51     S = [ (sizes[i], A[i]) for i in range(len(sizes)) ] #List comprehension   
    52     S.sort()
    53     B = [x[1] for x in S]
    54    
    55     print A, B
    56    
    57    
    58     return B
    59    
    60112
    61113
     
    65117if len(sys.argv) > 1:
    66118    dir = sys.argv[1]
    67     days = int(sys.argv[2])
    68     filesize = int(sys.argv[3])
     119    #days = int(sys.argv[2])
     120    #filesize = int(sys.argv[3])
    69121else:   
    70122    dir = '.'
    71     days = 180
    72     filesize = 500000  #One half megabyte
    73 
    74 tempfile = '/tmp/diskreport_' + str(time.time())
     123   
     124days = 365
     125filesize = 1000000  #One megabyte
    75126
    76127
     
    78129
    79130# Find the largest directories
    80 cmd = 'du -k "%s" | sort -nr > %s' %(dir, tempfile)
    81 #print cmd
    82 os.system(cmd)
    83 
    84 fid = open(tempfile)
    85 lines = fid.readlines()
    86 fid.close()
    87 
    88131txt = 'Top 50 largest directories on %s (stated in kilo bytes)' %dir
    89132os.system('echo "%s" > top50.txt' %txt)
    90 
    91 fid = open('top50.txt', 'a')
    92 for line in lines[:50]:
    93     l = line.strip()
    94     #print l
    95     fid.write(l + '\n')
    96 fid.close()
    97 
    98 
    99 txt = 'The top 50 largest directories are stored in top50.txt'
    100 print txt
    101 
    102 
    103 # Find old files
    104 #
    105 cmd = 'find %s -atime +%d > %s 2>/dev/null' %(dir, days, tempfile)
    106 #print cmd
     133cmd = 'du -k "%s" | sort -nr >> top50_%s.txt' %(dir, make_filename(dir))
    107134os.system(cmd)
    108135
    109136
    110 fid = open(tempfile)
    111 lines = fid.readlines()
    112 fid.close()
     137#################################################################
     138# Find old files
     139#################################################################
     140diskstat('find %s -xdev -type f -atime +%d ' %(dir, days),
     141         'oldfiles_%s.txt' %make_filename(dir),
     142         'Statistics for files on disk %s that haven\'t been accessed for at least %d days '\
     143         %(dir, days))
     144
     145         
     146#################################################################
     147# Find large files
     148#################################################################
     149diskstat('find %s -xdev -type f -size +%dc ' %(dir, filesize),
     150         'bigfiles_%s.txt' %make_filename(dir),         
     151         'Statistics for files on disk %s that are larger than %.2f MB '\
     152         %(dir, filesize/1.0e6))
    113153
    114154
    115 txt = 'The oldest files'
    116 os.system('echo "%s" > oldfiles.txt' %txt)
    117 oldfiles = []
    118 for line in lines:
    119     l = line.strip()
    120     oldfiles.append(l)
    121     #Use python os to get stats
    122     #Or use: find <path> -name <name> -type f -atime +30 -exec ls -all {} \;
    123     #find $1 -type f  -size +2048 -xdev -exec ls -s {} + | /bin/sort -nr
    124     #find / -xdev -size +1024 -exec ls -al {} \; | sort -r -k 5
    125    
    126     #GREAT ONE
    127     #find . -type f -size +10 -printf "%s %u %a %k %h%f \n" | sort -nr
    128    
    129     print os.stat(l)
    130     cmd = 'ls -lua "%s" >> oldfiles.txt 2>/dev/null' %l
    131     #print cmd
    132     os.system(cmd)
     155#################################################################
     156# Find old and large files
     157#################################################################
     158diskstat('find %s -xdev -type f -atime +%d -size +%dc '\
     159         %(dir, days, filesize),
     160         'oldbigfiles_%s.txt' %make_filename(dir),
     161         'Statistics for files on disk %s that haven\'t been accessed for at least %d days and that are larger than %.2f kB '\
     162         %(dir, days, filesize/1.0e6))
     163
     164         
    133165
    134166
    135 sort_and_clean('oldfiles.txt')
    136 
    137 #Tally the size
    138 x = tally('oldfiles.txt', 4)
    139 
    140 
    141 txt = '%d files haven\'t been accessed for at least %d days and take up a total of %.2f MB. ' %(len(oldfiles), days, x/1000000)
    142 print txt + 'See oldfiles.txt.'
    143 
    144 
    145 
    146 # Find big files
    147 #
    148 cmd = 'find %s -size +%dc > %s 2>/dev/null' %(dir, filesize, tempfile)
    149 #print cmd
    150 os.system(cmd)
    151 
    152 
    153 fid = open(tempfile)
    154 lines = fid.readlines()
    155 fid.close()
    156 
    157 txt = 'The biggest files'
    158 os.system('echo "%s" > bigfiles.txt' %txt)
    159 bigfiles = []
    160 for line in lines:
    161     l = line.strip()
    162     bigfiles.append(l)
    163     cmd = 'ls -lua "%s" >> bigfiles.txt 2>/dev/null' %l
    164     #print cmd
    165     os.system(cmd)     
    166 fid.close()
    167 
    168 
    169 
    170 #Tally the size
    171 x = tally('bigfiles.txt', 4)
    172 
    173 sort_and_clean('bigfiles.txt')
    174 
    175 txt = '%d files are larger than %.2f MB and take up a total of %.2f MB'\
    176       %(len(bigfiles), filesize/1000000.0, x/1000000)
    177 print txt + '. See bigfiles.txt for details'
    178 
    179 
    180 
    181 os.remove(tempfile)
    182 
    183 
    184 #Find intersection
    185 
    186 intersection = []
    187 for f in oldfiles:
    188    if f in bigfiles:
    189        intersection.append(f)
    190 
    191 
    192 os.system('echo "%s" > intersection.txt' %txt)
    193 for f in intersection:
    194     cmd = 'ls -lua "%s" 2>/dev/null >> intersection.txt' %f
    195     os.system(cmd)     
    196 
    197 
    198 x = tally('intersection.txt', 4)
    199 
    200 sort_and_clean('intersection.txt')
    201 
    202 txt = '%d files are larger than %.2f MB,'\
    203       %(len(intersection), filesize/1000000.0)
    204 txt += ' haven\'t been accessed for at least %d days' %days
    205 txt += ' and take up a total of %.2f MB' %(x/1000000)
    206 print txt
    207 
    208 
    209 
Note: See TracChangeset for help on using the changeset viewer.