source: tools/caching/caching.py @ 601

Last change on this file since 601 was 130, checked in by ole, 20 years ago

Initial import of caching tool (from ANU cvs)

File size: 65.4 KB
Line 
1# =============================================================================
2# caching.py - Supervised caching of function results.
3# Copyright (C) 1999, 2000, 2001, 2002 Ole Moller Nielsen
4#
5#    This program is free software; you can redistribute it and/or modify
6#    it under the terms of the GNU General Public License as published by
7#    the Free Software Foundation; either version 2 of the License, or
8#    (at your option) any later version.
9#
10#    This program is distributed in the hope that it will be useful,
11#    but WITHOUT ANY WARRANTY; without even the implied warranty of
12#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13#    GNU General Public License (http://www.gnu.org/copyleft/gpl.html)
14#    for more details.
15#
16#    You should have received a copy of the GNU General Public License
17#    along with this program; if not, write to the Free Software
18#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
19#
20#
21# Contact address: Ole.Nielsen@anu.edu.au
22#
23# Version 1.5.6 February 2002
24# =============================================================================
25 
26"""Module caching.py - Supervised caching of function results.
27
28Public functions:
29
30cache(func,args) -- Cache values returned from func given args.
31cachestat() --      Reports statistics about cache hits and time saved.
32test() --       Conducts a basic test of the caching functionality.
33
34See doc strings of individual functions for detailed documentation.
35"""
36
37# -----------------------------------------------------------------------------
38# Initialisation code
39
40# Determine platform
41#
42import os
43if os.name in ['nt', 'dos', 'win32', 'what else?']:
44  unix = 0
45else:
46  unix = 1
47
48# Make default caching directory name
49#
50if unix:
51  homedir = '~'
52  CR = '\n'
53else:
54  homedir = 'c:'
55  CR = '\r\n'  #FIXME: Not tested under windows
56 
57cachedir = homedir + os.sep + '.python_cache' + os.sep
58
59# -----------------------------------------------------------------------------
60# Options directory with default values - to be set by user
61#
62
63options = { 
64  'cachedir': cachedir,  # Default cache directory
65  'maxfiles': 1000000,   # Maximum number of cached files
66  'savestat': 1,         # Log caching info to stats file
67  'verbose': 1,          # Write messages to standard output
68  'bin': 1,              # Use binary format (more efficient)
69  'compression': 1,      # Use zlip compression
70  'bytecode': 0,         # Recompute if bytecode has changed
71  'expire': 0            # Automatically remove files that have been accessed
72                         # least recently
73}
74
75# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
76
77def set_option(key, value):
78  """Function to set values in the options directory.
79
80  USAGE:
81    set_option(key, value)
82
83  ARGUMENTS:
84    key --   Key in options dictionary. (Required)
85    value -- New value for key. (Required)
86
87  DESCRIPTION:
88    Function to set values in the options directory.
89    Raises an exception if key is not in options.
90  """
91
92  if options.has_key(key):
93    options[key] = value
94  else:
95    raise KeyError(key)  # Key not found, raise an exception
96
97# -----------------------------------------------------------------------------
98# Function cache - the main routine
99
100def cache(func, args=(), kwargs = {}, dependencies=None , cachedir=None,
101          verbose=None, compression=None, evaluate=0, test=0, clear=0,
102          return_filename=0):
103  """Supervised caching of function results.
104
105  USAGE:
106    result = cache(func, args, kwargs, dependencies, cachedir, verbose,
107                   compression, evaluate, test, return_filename)
108
109  ARGUMENTS:
110    func --            Function object (Required)
111    args --            Arguments to func (Default: ())
112    kwargs --          Keyword arguments to func (Default: {})   
113    dependencies --    Filenames that func depends on (Default: None)
114    cachedir --        Directory for cache files (Default: options['cachedir'])
115    verbose --         Flag verbose output to stdout
116                       (Default: options['verbose'])
117    compression --     Flag zlib compression (Default: options['compression'])
118    evaluate --        Flag forced evaluation of func (Default: 0)
119    test --            Flag test for cached results (Default: 0)
120    clear --           Flag delete cached results (Default: 0)   
121    return_filename -- Flag return of cache filename (Default: 0)   
122
123  DESCRIPTION:
124    A Python function call of the form
125
126      result = func(arg1,...,argn)
127
128    can be replaced by
129
130      from caching import cache
131      result = cache(func,(arg1,...,argn))
132
133  The latter form returns the same output as the former but reuses cached
134  results if the function has been computed previously in the same context.
135  'result' and the arguments can be simple types, tuples, list, dictionaries or
136  objects, but not unhashable types such as functions or open file objects.
137  The function 'func' may be a member function of an object or a module.
138
139  This type of caching is particularly useful for computationally intensive
140  functions with few frequently used combinations of input arguments. Note that
141  if the inputs or output are very large caching might not save time because
142  disc access may dominate the execution time.
143
144  If the function definition changes after a result has been cached it will be
145  detected by examining the functions bytecode (co_code, co_consts,
146  func_defualts, co_argcount) and it will be recomputed.
147
148  LIMITATIONS:
149    1 Caching uses the apply function and will work with anything that can be
150      pickled, so any limitation in apply or pickle extends to caching.
151    2 A function to be cached should not depend on global variables
152      as wrong results may occur if globals are changed after a result has
153      been cached.
154
155  -----------------------------------------------------------------------------
156  Additional functionality:
157
158  Keyword args
159    Keyword arguments (kwargs) can be added as a dictionary of keyword: value
160    pairs, following the syntax of the built-in function apply().
161    A Python function call of the form
162   
163      result = func(arg1,...,argn, kwarg1=val1,...,kwargm=valm)   
164
165    is then cached as follows
166
167      from caching import cache
168      result = cache(func,(arg1,...,argn), {kwarg1:val1,...,kwargm:valm})
169   
170    The default value of kwargs is {} 
171
172  Explicit dependencies:
173    The call
174      cache(func,(arg1,...,argn),dependencies = <list of filenames>)
175    Checks the size, creation time and modification time of each listed file.
176    If any file has changed the function is recomputed and the results stored
177    again.
178
179  Specify caching directory:
180    The call
181      cache(func,(arg1,...,argn), cachedir = <cachedir>)
182    designates <cachedir> where cached data are stored. Use ~ to indicate users
183    home directory - not $HOME. The default is ~/.python_cache on a UNIX
184    platform and c:/.python_cache on a Win platform.
185
186  Silent operation:
187    The call
188      cache(func,(arg1,...,argn),verbose=0)
189    suppresses messages to standard output.
190
191  Compression:
192    The call
193      cache(func,(arg1,...,argn),compression=0)
194    disables compression. (Default: compression=1). If the requested compressed
195    or uncompressed file is not there, it'll try the other version.
196
197  Forced evaluation:
198    The call
199      cache(func,(arg1,...,argn),evaluate=1)
200    forces the function to evaluate even though cached data may exist.
201
202  Testing for presence of cached result:
203    The call
204      cache(func,(arg1,...,argn),test=1)
205    retrieves cached result if it exists, otherwise None. The function will not
206    be evaluated. If both evaluate and test are switched on, evaluate takes
207    precedence.
208   
209  Obtain cache filenames:
210    The call   
211      cache(func,(arg1,...,argn),return_filename=1)
212    returns the hashed base filename under which this function and its
213    arguments would be cached
214
215  Clearing cached results:
216    The call
217      cache(func,'clear')
218    clears all cached data for 'func' and
219      cache('clear')
220    clears all cached data.
221 
222    NOTE: The string 'clear' can be passed an *argument* to func using
223      cache(func,('clear',)) or cache(func,tuple(['clear'])).
224
225    New form of clear:
226      cache(func,(arg1,...,argn),clear=1)
227    clears cached data for particular combination func and args
228     
229  """
230
231  # Imports and input checks
232  #
233  import types, time, string
234  ###FIXME DELETEfrom string import *
235
236  if not cachedir:
237    cachedir = options['cachedir']
238
239  if verbose == None:  # Do NOT write 'if not verbose:', it could be zero.
240    verbose = options['verbose']
241
242  if compression == None:  # Do NOT write 'if not compression:',
243                           # it could be zero.
244    compression = options['compression']
245
246  # Create cache directory if needed
247  #
248  CD = checkdir(cachedir,verbose)
249
250  # Handle the case cache('clear')
251  #
252  if type(func) == types.StringType:
253    if string.lower(func) == 'clear':
254      clear_cache(CD,verbose=verbose)
255      return
256
257  # Handle the case cache(func, 'clear')
258  #
259  if type(args) == types.StringType:
260    if string.lower(args) == 'clear':
261      clear_cache(CD,func,verbose=verbose)
262      return
263
264  # Force singleton arg into a tuple
265  #
266  if type(args) != types.TupleType:
267    args = tuple([args])
268 
269  # Check that kwargs is a dictionary
270  #
271  if type(kwargs) != types.DictType:
272    raise TypeError   
273   
274  #print 'hashing' #FIXME: make faster hashing function
275   
276  # Hash arguments (and keyword args) to integer
277  #
278  arghash = myhash((args,kwargs))
279
280  # Get sizes and timestamps for files listed in dependencies.
281  # Force singletons into a tuple.
282  #
283  if dependencies and type(dependencies) != types.TupleType \
284                  and type(dependencies) != types.ListType:
285    dependencies = tuple([dependencies])
286  deps = get_depstats(dependencies)
287
288  # Extract function name from func object
289  #
290  funcname = get_funcname(func)
291
292  # Create cache filename
293  #
294  FN = funcname+'['+`arghash`+']'  # The symbol '(' does not work under unix
295
296  if return_filename:
297    return(FN)
298
299  if clear:
300    for file_type in file_types:
301      file_name = CD+FN+'_'+file_type
302      for fn in [file_name, file_name + '.z']:
303        if os.access(fn, os.F_OK):             
304          if unix:
305            os.remove(fn)
306          else:
307            # FIXME: os.remove doesn't work under windows       
308            os.system('del '+fn)
309          if verbose:
310            print 'MESSAGE (caching): File %s deleted' %fn
311        ##else:
312        ##  print '%s was not accessed' %fn
313    return None
314
315
316  #-------------------------------------------------------------------       
317 
318  # Check if previous computation has been cached
319  #
320  if evaluate:
321    Retrieved = None  # Force evaluation of func regardless of caching status.
322    reason = 4
323  else:
324    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \
325      CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
326                  dependencies)
327
328  if not Retrieved:
329    if test:  # Do not attempt to evaluate function
330      T = None
331    else:  # Evaluate function and save to cache
332      if verbose:
333        msg1(funcname, args, kwargs,reason)
334
335      # Remove expired files automatically
336      #
337      if options['expire']:
338        DeleteOldFiles(CD,verbose)
339       
340      # Save args before function is evaluated in case
341      # they are modified by function
342      #
343      save_args_to_cache(CD,FN,args,kwargs,compression)
344
345      # Execute and time function with supplied arguments
346      #
347      t0 = time.time()
348      T = apply(func,args,kwargs)
349      #comptime = round(time.time()-t0)
350      comptime = time.time()-t0
351
352      if verbose:
353        msg2(funcname,args,kwargs,comptime,reason)
354
355      # Save results and estimated loading time to cache
356      #
357      loadtime = save_results_to_cache(T, CD, FN, func, deps, comptime, \
358                                       funcname, dependencies, compression)
359      if verbose:
360        msg3(loadtime, CD, FN, deps, compression)
361      compressed = compression
362
363  if options['savestat'] and (not test or Retrieved):
364  ##if options['savestat']:
365    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compressed)
366
367  return(T)  # Return results in all cases
368
369# -----------------------------------------------------------------------------
370
371def cachestat(sortidx=4, period=-1, showuser=None, cachedir=None):
372  """Generate statistics of caching efficiency.
373
374  USAGE:
375    cachestat(sortidx, period, showuser, cachedir)
376
377  ARGUMENTS:
378    sortidx --  Index of field by which lists are (default: 4)
379                Legal values are
380                 0: 'Name'
381                 1: 'Hits'
382                 2: 'CPU'
383                 3: 'Time Saved'
384                 4: 'Gain(%)'
385                 5: 'Size'
386    period --   If set to -1 all available caching history is used.
387                If set 0 only the current month is used (default -1).
388    showuser -- Flag for additional table showing user statistics
389                (default: None).
390    cachedir -- Directory for cache files (default: options['cachedir']).
391
392  DESCRIPTION:
393    Logged caching statistics is converted into summaries of the form
394    --------------------------------------------------------------------------
395    Function Name   Hits   Exec(s)  Cache(s)  Saved(s)   Gain(%)      Size
396    --------------------------------------------------------------------------
397  """
398
399  __cachestat(sortidx, period, showuser, cachedir)
400  return
401
402# -----------------------------------------------------------------------------
403
404def test(cachedir=None,verbose=0,compression=None):
405  """Test the functionality of caching.
406
407  USAGE:
408    test(verbose)
409
410  ARGUMENTS:
411    verbose --     Flag whether caching will output its statistics (default=0)
412    cachedir --    Directory for cache files (Default: options['cachedir'])
413    compression -- Flag zlib compression (Default: options['compression'])
414  """
415   
416  import string, time
417
418  # Initialise
419  #
420  import caching
421  reload(caching)
422
423  if not cachedir:
424    cachedir = options['cachedir']
425
426  if verbose is None:  # Do NOT write 'if not verbose:', it could be zero.
427    verbose = options['verbose']
428 
429  if compression == None:  # Do NOT write 'if not compression:',
430                           # it could be zero.
431    compression = options['compression']
432  else:
433    try:
434      set_option('compression', compression)
435    except:
436      test_error('Set option failed')     
437
438  try:
439    import zlib
440  except:
441    print
442    print '*** Could not find zlib, default to no-compression      ***'
443    print '*** Installing zlib will improve performance of caching ***'
444    print
445    compression = 0       
446    set_option('compression', compression)   
447 
448  print 
449  print_header_box('Testing caching module - please stand by')
450  print   
451
452  # Define a test function to be cached
453  #
454  def f(a,b,c,N,x=0,y='abcdefg'):
455    """f(a,b,c,N)
456       Do something time consuming and produce a complex result.
457    """
458
459    import string
460
461    B = []
462    for n in range(N):
463      s = str(n+2.0/(n + 4.0))+'.a'*10
464      B.append((a,b,c,s,n,x,y))
465    return(B)
466   
467  # Check that default cachedir is OK
468  #     
469  CD = checkdir(cachedir,verbose)   
470   
471   
472  # Make a dependency file
473  #   
474  try:
475    DepFN = CD + 'testfile.tmp'
476    DepFN_wildcard = CD + 'test*.tmp'
477    Depfile = open(DepFN,'w')
478    Depfile.write('We are the knights who say NI!')
479    Depfile.close()
480    test_OK('Wrote file %s' %DepFN)
481  except:
482    test_error('Could not open file %s for writing - check your environment' \
483               % DepFN)
484
485  # Check set_option (and switch stats off
486  #   
487  try:
488    set_option('savestat',0)
489    assert(options['savestat'] == 0)
490    test_OK('Set option')
491  except:
492    test_error('Set option failed')   
493   
494  # Make some test input arguments
495  #
496  N = 5000  #Make N fairly small here
497
498  a = [1,2]
499  b = ('Thou shalt count the number three',4)
500  c = {'Five is right out': 6, (7,8): 9}
501  x = 3
502  y = 'holy hand granate'
503
504  # Test caching
505  #
506  if compression:
507    comprange = 2
508  else:
509    comprange = 1
510
511  for comp in range(comprange):
512 
513    # Evaluate and store
514    #
515    try:
516      T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, evaluate=1, \
517                         verbose=verbose, compression=comp)
518      if comp:                   
519        test_OK('Caching evaluation with compression')
520      else:     
521        test_OK('Caching evaluation without compression')     
522    except:
523      if comp:
524        test_error('Caching evaluation with compression failed - try caching.test(compression=0)')
525      else:
526        test_error('Caching evaluation failed - try caching.test(verbose=1)')
527
528    # Retrieve
529    #                           
530    try:                         
531      T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
532                         compression=comp) 
533
534      if comp:                   
535        test_OK('Caching retrieval with compression')
536      else:     
537        test_OK('Caching retrieval without compression')     
538    except:
539      if comp:
540        test_error('Caching retrieval with compression failed - try caching.test(compression=0)')
541      else:                                     
542        test_error('Caching retrieval failed - try caching.test(verbose=1)')
543
544    # Reference result
545    #   
546    T3 = f(a,b,c,N,x=x,y=y)  # Compute without caching
547   
548    if T1 == T2 and T2 == T3:
549      if comp:
550        test_OK('Basic caching functionality (with compression)')
551      else:
552        test_OK('Basic caching functionality (without compression)')
553    else:
554      test_error('Cached result does not match computed result')
555
556
557  # Test return_filename
558  #   
559  try:
560    FN = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
561                       return_filename=1)   
562    assert(FN[:2] == 'f[')
563    test_OK('Return of cache filename')
564  except:
565    test_error('Return of cache filename failed')
566
567  # Test existence of cachefiles
568 
569  try:
570    (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
571    (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
572    (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
573    test_OK('Presence of cache files')
574    datafile.close()
575    argsfile.close()
576    admfile.close()
577  except:
578    test_error('Expected cache files did not exist') 
579             
580  # Test 'test' function when cache is present
581  #     
582  try:
583    #T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
584    #                   evaluate=1) 
585    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
586    assert(T1 == T4)
587
588    test_OK("Option 'test' when cache file present")
589  except:
590    test_error("Option 'test' when cache file present failed")     
591
592  # Test that 'clear' works
593  #
594  #try:
595  #  caching.cache(f,'clear',verbose=verbose)
596  #  test_OK('Clearing of cache files')
597  #except:
598  #  test_error('Clear does not work')
599  try:
600    caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, clear=1)   
601    test_OK('Clearing of cache files')
602  except:
603    test_error('Clear does not work') 
604
605 
606
607  # Test 'test' function when cache is absent
608  #     
609  try:
610    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
611    assert(T4 is None)
612    test_OK("Option 'test' when cache absent")
613  except:
614    test_error("Option 'test' when cache absent failed")     
615         
616  # Test dependencies
617  #
618  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
619                       dependencies=DepFN) 
620  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
621                       dependencies=DepFN)                     
622                       
623  if T1 == T2:
624    test_OK('Basic dependencies functionality')
625  else:
626    test_error('Dependencies do not work')
627
628  # Test basic wildcard dependency
629  #
630  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
631                       dependencies=DepFN_wildcard)                     
632   
633  if T1 == T3:
634    test_OK('Basic dependencies with wildcard functionality')
635  else:
636    test_error('Dependencies with wildcards do not work')
637
638
639  # Test that changed timestamp in dependencies triggers recomputation
640 
641  # Modify dependency file
642  Depfile = open(DepFN,'a')
643  Depfile.write('You must cut down the mightiest tree in the forest with a Herring')
644  Depfile.close()
645 
646  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
647                       dependencies=DepFN, test = 1)                     
648 
649  if T3 is None:
650    test_OK('Changed dependencies recognised')
651  else:
652    test_error('Changed dependencies not recognised')   
653 
654  # Test recomputation when dependencies have changed
655  #
656  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
657                       dependencies=DepFN)                       
658  if T1 == T3:
659    test_OK('Recomputed value with changed dependencies')
660  else:
661    test_error('Recomputed value with changed dependencies failed')
662 
663  # Performance test (with statistics)
664  # Don't really rely on this as it will depend on specific computer.
665  #
666
667  set_option('savestat',1)
668
669  N = 20*N   #Should be large on fast computers...
670  tt = time.time()
671  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
672  t1 = time.time() - tt
673 
674  tt = time.time()
675  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
676  t2 = time.time() - tt
677 
678  if T1 == T2:
679    if t1 > t2:
680      test_OK('Performance test: relative time saved = %s pct' \
681              %str(round((t1-t2)*100/t1,2)))
682    #else:
683    #  print 'WARNING: Performance a bit low - this could be specific to current platform'
684  else:       
685    test_error('Basic caching failed for new problem')
686           
687  # Test presence of statistics file
688  #
689  try: 
690    DIRLIST = os.listdir(CD)
691    SF = []
692    for FN in DIRLIST:
693      if string.find(FN,statsfile) >= 0:
694        fid = open(CD+FN,'r')
695        fid.close()
696    test_OK('Statistics files present') 
697  except:
698    test_OK('Statistics files cannot be opened')         
699     
700  print_header_box('Show sample output of the caching function:')
701 
702  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
703  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
704  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=1)
705 
706  print_header_box('Show sample output of cachestat():')
707  if unix:
708    cachestat()   
709  else:
710    try:
711      import time
712      t = time.strptime('2030','%Y')
713      cachestat()
714    except: 
715      print 'caching.cachestat() does not work here, because it'
716      print 'relies on time.strptime() which is unavailable in Windows'
717     
718  print
719  test_OK('Caching self test completed')   
720     
721           
722  # Test setoption (not yet implemented)
723  #
724 
725#==============================================================================
726# Auxiliary functions
727#==============================================================================
728
729# Import pickler
730# cPickle is used by functions mysave, myload, and compare
731#
732import cPickle  # 10 to 100 times faster than pickle
733pickler = cPickle
734
735# Local immutable constants
736#
737comp_level = 1              # Compression level for zlib.
738                            # comp_level = 1 works well.
739textwidth1 = 16             # Text width of key fields in report forms.
740textwidth2 = 132            # Maximal width of textual representation of
741                            # arguments.
742textwidth3 = 16             # Initial width of separation lines. Is modified.
743textwidth4 = 50             # Text width in test_OK()
744statsfile  = '.cache_stat'  # Basefilename for cached statistics.
745                            # It will reside in the chosen cache directory.
746
747file_types = ['Result',     # File name extension for cached function results.
748              'Args',       # File name extension for stored function args.
749              'Admin']      # File name extension for administrative info.
750
751Reason_msg = ['OK',         # Verbose reasons for recomputation
752              'No cached result', 
753              'Dependencies have changed', 
754              'Byte code or arguments have changed',
755              'Recomputation was requested by caller',
756              'Cached file was unreadable']             
757             
758# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
759
760def CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, 
761                dependencies):
762  """Determine whether cached result exists and return info.
763
764  USAGE:
765    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \ 
766    CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
767                dependencies)
768
769  INPUT ARGUMENTS:
770    CD --            Cache Directory
771    FN --            Suggested cache file name
772    func --          Function object
773    args --          Tuple of arguments
774    kwargs --        Dictionary of keyword arguments   
775    deps --          Dependencies time stamps
776    verbose --       Flag text output
777    compression --   Flag zlib compression
778    dependencies --  Given list of dependencies
779   
780  OUTPUT ARGUMENTS:
781    T --             Cached result if present otherwise None
782    FN --            File name under which new results must be saved
783    Retrieved --     True if a valid cached result was found
784    reason --        0: OK (if Retrieved),
785                     1: No cached result,
786                     2: Dependencies have changed,
787                     3: Arguments or Bytecode have changed
788                     4: Recomputation was forced
789    comptime --      Number of seconds it took to computed cachged result
790    loadtime --      Number of seconds it took to load cached result
791    compressed --    Flag (0,1) if cached results were compressed or not
792
793  DESCRIPTION:
794    Determine if cached result exists as follows:
795    Load in saved arguments and bytecode stored under hashed filename.
796    If they are identical to current arguments and bytecode and if dependencies
797    have not changed their time stamp, then return cached result.
798
799    Otherwise return filename under which new results should be cached.
800    Hash collisions are handled recursively by calling CacheLookup again with a
801    modified filename.
802  """
803
804  import time, string, types
805
806  # Assess whether cached result exists - compressed or not.
807  #
808  if verbose:
809    print 'Caching: looking for cached files %s_{%s,%s,%s}.z'\
810           %(CD+FN, file_types[0], file_types[1], file_types[2])
811  (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
812  (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
813  (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
814
815  if not (argsfile and datafile and admfile) or \
816     not (compressed0 == compressed1 and compressed0 == compressed2):
817    # Cached result does not exist or files were compressed differently
818    #
819    # This will ensure that evaluation will take place unless all files are
820    # present.
821
822    reason = 1
823    return(None,FN,None,reason,None,None,None) #Recompute using same filename
824
825  compressed = compressed0  # Remember if compressed files were actually used
826  datafile.close()
827
828  # Retrieve arguments and adm. info
829  #
830  R, reason = myload(argsfile,compressed)  # The original arguments
831  argsfile.close()
832   
833  ##if R == None and reason > 0:
834  if reason > 0:
835    return(None,FN,None,reason,None,None,None) #Recompute using same filename
836  else:   
837    (argsref, kwargsref) = R
838
839  R, reason = myload(admfile,compressed)
840  admfile.close() 
841  ##if R == None and reason > 0:
842  if reason > 0:
843    return(None,FN,None,reason,None,None,None) #Recompute using same filename
844
845 
846  depsref  = R[0]  # Dependency statistics
847  comptime = R[1]  # The computation time
848  coderef  = R[2]  # The byte code
849  funcname = R[3]  # The function name
850
851  # Check if dependencies have changed
852  #
853  if dependencies and not compare(depsref,deps):
854    if verbose:
855      print 'MESSAGE (caching.py): Dependencies', dependencies, \
856            'have changed - recomputing'
857    # Don't use cached file - recompute
858    reason = 2
859    return(None,FN,None,reason,None,None,None)
860
861  # Get bytecode from func
862  #
863  bytecode = get_bytecode(func)
864
865  #print compare(argsref,args),
866  #print compare(kwargsref,kwargs),
867  #print compare(bytecode,coderef)
868
869  # Check if arguments or bytecode have changed
870  #
871  if compare(argsref,args) and compare(kwargsref,kwargs) and \
872     (not options['bytecode'] or compare(bytecode,coderef)):
873
874    # Arguments and dependencies match. Get cached results
875    #
876    T, loadtime, compressed, reason = load_from_cache(CD,FN,compressed)
877    ###if T == None and reason > 0:  #This doesn't work if T is a numeric array
878    if reason > 0:
879      return(None,FN,None,reason,None,None,None) #Recompute using same FN
880
881    Retrieved = 1
882    reason = 0
883
884    if verbose:
885      msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compressed)
886
887      if loadtime >= comptime:
888        print 'WARNING (caching.py): Caching did not yield any gain.'
889        print '                      Consider executing function ',
890        print '('+funcname+') without caching.'
891  else:
892
893    # Non matching arguments or bytecodes signify a hash-collision.
894    # This is resolved by recursive search of cache filenames
895    # until either a matching or an unused filename is found.
896    #
897    (T,FN,Retrieved,reason,comptime,loadtime,compressed) = \
898       CacheLookup(CD,FN+'x',func,args,kwargs,deps,verbose,compression, \
899                   dependencies)
900
901    # DEBUGGING
902    # if not Retrieved:
903    #   print 'Arguments did not match'
904    # else:
905    #   print 'Match found !'
906    if not Retrieved:
907      reason = 3     #The real reason is that args or bytecodes have changed.
908                     #Not that the recursive seach has found an unused filename
909   
910  return((T, FN, Retrieved, reason, comptime, loadtime, compressed))
911
912# -----------------------------------------------------------------------------
913
914def clear_cache(CD,func=None, verbose=None):
915  """Clear cache for func.
916
917  USAGE:
918     clear(CD, func, verbose)
919
920  ARGUMENTS:
921     CD --       Caching directory (required)
922     func --     Function object (default: None)
923     verbose --  Flag verbose output (default: None)
924
925  DESCRIPTION:
926
927    If func == None, clear everything,
928    otherwise clear only files pertaining to func.
929  """
930
931  import os, re
932   
933  if CD[-1] != os.sep:
934    CD = CD+os.sep
935 
936  if verbose == None:
937    verbose = options['verbose']
938
939  # FIXME: Windows version needs to be tested
940
941  if func:
942    funcname = get_funcname(func)
943    if verbose:
944      print 'MESSAGE (caching.py): Clearing', CD+funcname+'*'
945
946    file_names = os.listdir(CD)
947    for file_name in file_names:
948      #RE = re.search('^' + funcname,file_name)  #Inefficient
949      #if RE:
950      if file_name[:len(funcname)] == funcname:
951        if unix:
952          os.remove(CD+file_name)
953        else:
954          os.system('del '+CD+file_name)
955          # FIXME: os.remove doesn't work under windows
956  else:
957    file_names = os.listdir(CD)
958    if len(file_names) > 0:
959      if verbose:
960        print 'MESSAGE (caching.py): Remove the following files:'
961        for file_name in file_names:
962            print file_name
963
964        A = raw_input('Delete (Y/N)[N] ?')
965      else:
966        A = 'Y' 
967       
968      if A == 'Y' or A == 'y':
969        for file_name in file_names:
970          if unix:
971            os.remove(CD+file_name)
972          else:
973            os.system('del '+CD+file_name)
974            # FIXME: os.remove doesn't work under windows
975          #exitcode=os.system('/bin/rm '+CD+'* 2> /dev/null')
976
977# -----------------------------------------------------------------------------
978
979def DeleteOldFiles(CD,verbose=None):
980  """Remove expired files
981
982  USAGE:
983    DeleteOldFiles(CD,verbose=None)
984  """
985
986  if verbose == None:
987    verbose = options['verbose']
988
989  maxfiles = options['maxfiles']
990
991  # FIXME: Windows version
992
993  import os
994  block = 1000  # How many files to delete per invokation
995  Files = os.listdir(CD)
996  numfiles = len(Files)
997  if not unix: return  # FIXME: Windows case ?
998
999  if numfiles > maxfiles:
1000    delfiles = numfiles-maxfiles+block
1001    if verbose:
1002      print 'Deleting '+`delfiles`+' expired files:'
1003      os.system('ls -lur '+CD+'* | head -' + `delfiles`)            # List them
1004    os.system('ls -ur '+CD+'* | head -' + `delfiles` + ' | xargs /bin/rm')
1005                                                                  # Delete them
1006    # FIXME: Replace this with os.listdir and os.remove
1007
1008# -----------------------------------------------------------------------------
1009
1010def save_args_to_cache(CD,FN,args,kwargs,compression):
1011  """Save arguments to cache
1012
1013  USAGE:
1014    save_args_to_cache(CD,FN,args,kwargs,compression)
1015  """
1016
1017  import time, os, sys, types
1018
1019  (argsfile, compressed) = myopen(CD+FN+'_'+file_types[1], 'wb', compression)
1020
1021  if not argsfile:
1022    if verbose:
1023      print 'ERROR (caching): Could not open %s' %argsfile.name
1024    raise IOError
1025
1026  mysave((args,kwargs),argsfile,compression)  # Save args and kwargs to cache
1027  argsfile.close()
1028
1029  # Change access rights if possible
1030  #
1031  #if unix:
1032  #  try:
1033  #    exitcode=os.system('chmod 666 '+argsfile.name)
1034  #  except:
1035  #    pass
1036  #else:
1037  #  pass  # FIXME: Take care of access rights under Windows
1038
1039  return
1040
1041# -----------------------------------------------------------------------------
1042
1043def save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1044                          dependencies, compression):
1045  """Save computed results T and admin info to cache
1046
1047  USAGE:
1048    save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1049                          dependencies, compression)
1050  """
1051
1052  import time, os, sys, types
1053
1054  (datafile, compressed1) = myopen(CD+FN+'_'+file_types[0],'wb',compression)
1055  (admfile, compressed2) = myopen(CD+FN+'_'+file_types[2],'wb',compression)
1056
1057  if not datafile:
1058    if verbose:
1059      print 'ERROR (caching): Could not open %s' %datafile.name
1060    raise IOError
1061
1062  if not admfile:
1063    if verbose:
1064      print 'ERROR (caching): Could not open %s' %admfile.name
1065    raise IOError
1066
1067  t0 = time.time()
1068
1069  mysave(T,datafile,compression)  # Save data to cache
1070  datafile.close()
1071  #savetime = round(time.time()-t0,2)
1072  savetime = time.time()-t0 
1073
1074  bytecode = get_bytecode(func)  # Get bytecode from function object
1075  admtup = (deps, comptime, bytecode, funcname)  # Gather admin info
1076
1077  mysave(admtup,admfile,compression)  # Save admin info to cache
1078  admfile.close()
1079
1080  # Change access rights if possible
1081  #
1082  #if unix:
1083  #  try:
1084  #    exitcode=os.system('chmod 666 '+datafile.name)
1085  #    exitcode=os.system('chmod 666 '+admfile.name)
1086  #  except:
1087  #    pass
1088  #else:
1089  #  pass  # FIXME: Take care of access rights under Windows
1090
1091  return(savetime)
1092
1093# -----------------------------------------------------------------------------
1094
1095def load_from_cache(CD,FN,compression):
1096  """Load previously cached data from file FN
1097
1098  USAGE:
1099    load_from_cache(CD,FN,compression)
1100  """
1101
1102  import time
1103
1104  (datafile, compressed) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
1105  t0 = time.time()
1106  T, reason = myload(datafile,compressed)
1107  #loadtime = round(time.time()-t0,2)
1108  loadtime = time.time()-t0
1109  datafile.close() 
1110
1111  return T, loadtime, compressed, reason
1112
1113# -----------------------------------------------------------------------------
1114
1115def myopen(FN,mode,compression=1):
1116  """Open file FN using given mode
1117
1118  USAGE:
1119    myopen(FN,mode,compression=1)
1120
1121  ARGUMENTS:
1122    FN --           File name to be opened
1123    mode --         Open mode (as in open)
1124    compression --  Flag zlib compression
1125
1126  DESCRIPTION:
1127     if compression
1128       Attempt first to open FN + '.z'
1129       If this fails try to open FN
1130     else do the opposite
1131     Return file handle plus info about whether it was compressed or not.
1132  """
1133
1134  import string
1135
1136  # Determine if file exists already (if writing was requested)
1137  # This info is only used to determine if access modes should be set
1138  #
1139  if 'w' in mode or 'a' in mode:
1140    try:
1141      file = open(FN+'.z','r')
1142      file.close()
1143      new_file = 0
1144    except:
1145      try:
1146        file = open(FN,'r') 
1147        file.close()
1148        new_file = 0
1149      except:
1150        new_file = 1
1151  else:
1152    new_file = 0 #Assume it exists if mode was not 'w'
1153 
1154
1155  compressed = 0
1156  if compression:
1157    try:
1158      file = open(FN+'.z',mode)
1159      compressed = 1
1160    except:
1161      try:
1162        file = open(FN,mode)
1163      except:
1164        file = None
1165  else:
1166    try:
1167      file = open(FN,mode)
1168    except:
1169      try:
1170        file = open(FN+'.z',mode)
1171        compressed = 1
1172      except:
1173        file = None
1174
1175  # Now set access rights if it is a new file
1176  #
1177  if file and new_file:
1178    if unix:
1179      exitcode=os.system('chmod 666 '+file.name)
1180    else:
1181      pass  # FIXME: Take care of access rights under Windows
1182
1183  return(file,compressed)
1184
1185# -----------------------------------------------------------------------------
1186
1187def myload(file, compressed):
1188  """Load data from file
1189
1190  USAGE:
1191    myload(file, compressed)
1192  """
1193
1194  reason = 0
1195  try:
1196    if compressed:
1197      import zlib
1198
1199      RsC = file.read()
1200      try:
1201        Rs  = zlib.decompress(RsC)
1202      except:
1203        #  File "./caching.py", line 1032, in load_from_cache
1204        #  T = myload(datafile,compressed)
1205        #  File "./caching.py", line 1124, in myload
1206        #  Rs  = zlib.decompress(RsC)
1207        #  zlib.error: Error -5 while decompressing data
1208        #print 'ERROR (caching): Could not decompress ', file.name
1209        #raise Exception
1210        reason = 5  #(Unreadable file)
1211        return None, reason 
1212     
1213     
1214      del RsC  # Free up some space
1215      R   = pickler.loads(Rs)
1216    else:
1217      R = pickler.load(file)
1218  except MemoryError:
1219    import sys
1220    if options['verbose']:
1221      print 'ERROR (caching): Out of memory while loading %s, aborting' \
1222            %(file.name)
1223
1224    # Raise the error again for now
1225    #
1226    raise MemoryError
1227
1228  return R, reason
1229
1230# -----------------------------------------------------------------------------
1231
1232def mysave(T,file,compression):
1233  """Save data T to file
1234
1235  USAGE:
1236    mysave(T,file,compression)
1237
1238  """
1239
1240  bin = options['bin']
1241
1242  if compression:
1243    try:
1244      import zlib
1245    except:
1246      print
1247      print '*** Could not find zlib ***'
1248      print '*** Try to run caching with compression off ***'
1249      print "*** caching.set_option('compression', 0) ***"
1250      raise Exception
1251     
1252
1253    Ts  = pickler.dumps(T,bin)
1254    TsC = zlib.compress(Ts,comp_level)
1255    file.write(TsC)
1256  else:
1257    pickler.dump(T,file,bin)
1258
1259# -----------------------------------------------------------------------------
1260
1261def myhash(T):
1262  """Compute hashed integer from hashable values of tuple T
1263
1264  USAGE:
1265    myhash(T)
1266
1267  ARGUMENTS:
1268    T -- Tuple
1269  """
1270
1271  import types
1272
1273  # Get hash vals for hashable entries
1274  #
1275  if type(T) == types.TupleType or type(T) == types.ListType:
1276    hvals = []
1277    for k in range(len(T)):
1278      h = myhash(T[k])
1279      hvals.append(h)
1280    val = hash(tuple(hvals))
1281  elif type(T) == types.DictType:
1282    val = dicthash(T)
1283  else:
1284    try:
1285      val = hash(T)
1286    except:
1287      val = 1
1288      try:
1289        import Numeric
1290        if type(T) == Numeric.ArrayType:
1291          hvals = []       
1292          for e in T:
1293            h = myhash(e)
1294            hvals.append(h)         
1295          val = hash(tuple(hvals))
1296        else:
1297          val = 1  #Could implement other Numeric types here
1298      except:   
1299        pass
1300
1301  return(val)
1302
1303# -----------------------------------------------------------------------------
1304
1305def dicthash(D):
1306  """Compute hashed integer from hashable values of dictionary D
1307
1308  USAGE:
1309    dicthash(D)
1310  """
1311
1312  keys = D.keys()
1313
1314  # Get hash values for hashable entries
1315  #
1316  hvals = []
1317  for k in range(len(keys)):
1318    try:
1319      h = hash(D[keys[k]])
1320      hvals.append(h)
1321    except:
1322      pass
1323
1324  # Hash obtained values into one value
1325  #
1326  return(hash(tuple(hvals)))
1327
1328# -----------------------------------------------------------------------------
1329
1330def compare(A,B):
1331  """Safe comparison of general objects
1332
1333  USAGE:
1334    compare(A,B)
1335
1336  DESCRIPTION:
1337    Return 1 if A and B they are identical, 0 otherwise
1338  """
1339
1340  try:
1341    identical = (A == B)
1342  except:
1343    try:
1344      identical = (pickler.dumps(A) == pickler.dumps(B))
1345    except:
1346      identical = 0
1347
1348  return(identical)
1349
1350# -----------------------------------------------------------------------------
1351
1352def nospace(s):
1353  """Replace spaces in string s with underscores
1354
1355  USAGE:
1356    nospace(s)
1357
1358  ARGUMENTS:
1359    s -- string
1360  """
1361
1362  import string
1363
1364  newstr = ''
1365  for i in range(len(s)):
1366    if s[i] == ' ':
1367      newstr = newstr+'_'
1368    else:
1369      newstr = newstr+s[i]
1370
1371  return(newstr)
1372
1373# -----------------------------------------------------------------------------
1374
1375def get_funcname(func):
1376  """Retrieve name of function object func (depending on its type)
1377
1378  USAGE:
1379    get_funcname(func)
1380  """
1381
1382  import types, string
1383
1384  if type(func) == types.FunctionType:
1385    funcname = func.func_name
1386  elif type(func) == types.BuiltinFunctionType:
1387    funcname = func.__name__
1388  else:
1389    tab = string.maketrans("<>'","   ")
1390    tmp = string.translate(`func`,tab)
1391    tmp = string.split(tmp)
1392    funcname = string.join(tmp)
1393
1394  funcname = nospace(funcname)
1395  return(funcname)
1396
1397# -----------------------------------------------------------------------------
1398
1399def get_bytecode(func):
1400  """ Get bytecode from function object.
1401
1402  USAGE:
1403    get_bytecode(func)
1404  """
1405
1406  import types
1407
1408  if type(func) == types.FunctionType:
1409    bytecode = func.func_code.co_code
1410    consts = func.func_code.co_consts
1411    argcount = func.func_code.co_argcount   
1412    defaults = func.func_defaults     
1413  elif type(func) == types.MethodType:
1414    bytecode = func.im_func.func_code.co_code
1415    consts =  func.im_func.func_code.co_consts
1416    argcount =  func.im_func.func_code.co_argcount   
1417    defaults = func.im_func.func_defaults         
1418  else:
1419    #raise Exception  #Test only
1420    bytecode = None   #Built-in functions are assumed not to change
1421    consts = 0
1422    argcount = 0
1423    defaults = 0
1424
1425  return (bytecode, consts, argcount, defaults)
1426
1427# -----------------------------------------------------------------------------
1428
1429def get_depstats(dependencies):
1430  """ Build dictionary of dependency files and their size, mod. time and ctime.
1431
1432  USAGE:
1433    get_depstats(dependencies):
1434  """
1435
1436  import types
1437
1438  d = {}
1439  if dependencies:
1440
1441    #Expand any wildcards
1442    import glob
1443    expanded_dependencies = []
1444    for FN in dependencies:
1445      expanded_FN = glob.glob(FN)
1446     
1447      expanded_dependencies += expanded_FN
1448
1449   
1450    for FN in expanded_dependencies:
1451      if not type(FN) == types.StringType:
1452        errmsg = 'ERROR (caching.py): Dependency must be a string.\n'
1453        errmsg += '                    Dependency given: %s' %FN
1454        raise Exception, errmsg     
1455      if not os.access(FN,os.F_OK):
1456        errmsg = 'ERROR (caching.py): Dependency '+FN+' does not exist.'
1457        raise Exception, errmsg
1458      (size,atime,mtime,ctime) = filestat(FN)
1459
1460      # We don't use atime because that would cause recomputation every time.
1461      # We don't use ctime because that is irrelevant and confusing for users.
1462      d.update({FN : (size,mtime)})
1463
1464  return(d)
1465
1466# -----------------------------------------------------------------------------
1467
1468def filestat(FN):
1469  """A safe wrapper using os.stat to get basic file statistics
1470     The built-in os.stat breaks down if file sizes are too large (> 2GB ?)
1471
1472  USAGE:
1473    filestat(FN)
1474
1475  DESCRIPTION:
1476     Must compile Python with
1477     CFLAGS="`getconf LFS_CFLAGS`" OPT="-g -O2 $CFLAGS" \
1478              configure
1479     as given in section 8.1.1 Large File Support in the Libray Reference
1480  """
1481
1482  import os, time
1483
1484  try:
1485    stats = os.stat(FN)
1486    size  = stats[6]
1487    atime = stats[7]
1488    mtime = stats[8]
1489    ctime = stats[9]
1490  except:
1491
1492    # Hack to get the results anyway (works only on Unix at the moment)
1493    #
1494    print 'Hack to get os.stat when files are too large'
1495
1496    if unix:
1497      tmp = '/tmp/cach.tmp.'+`time.time()`+`os.getpid()`
1498      # Unique filename, FIXME: Use random number
1499
1500      # Get size and access time (atime)
1501      #
1502      exitcode=os.system('ls -l --full-time --time=atime '+FN+' > '+tmp)
1503      (size,atime) = get_lsline(tmp)
1504
1505      # Get size and modification time (mtime)
1506      #
1507      exitcode=os.system('ls -l --full-time '+FN+' > '+tmp)
1508      (size,mtime) = get_lsline(tmp)
1509
1510      # Get size and ctime
1511      #
1512      exitcode=os.system('ls -l --full-time --time=ctime '+FN+' > '+tmp)
1513      (size,ctime) = get_lsline(tmp)
1514
1515      try:
1516        exitcode=os.system('rm '+tmp)
1517        # FIXME: Gives error if file doesn't exist
1518      except:
1519        pass
1520    else:
1521      pass
1522      raise Exception  # FIXME: Windows case
1523
1524  return(long(size),atime,mtime,ctime)
1525
1526# -----------------------------------------------------------------------------
1527
1528def get_lsline(FN):
1529  """get size and time for filename
1530
1531  USAGE:
1532    get_lsline(file_name)
1533
1534  DESCRIPTION:
1535    Read in one line 'ls -la' item from file (generated by filestat) and
1536    convert time to seconds since epoch. Return file size and time.
1537  """
1538
1539  import string, time
1540
1541  f = open(FN,'r')
1542  info = f.read()
1543  info = string.split(info)
1544
1545  size = info[4]
1546  week = info[5]
1547  mon  = info[6]
1548  day  = info[7]
1549  hour = info[8]
1550  year = info[9]
1551
1552  str = week+' '+mon+' '+day+' '+hour+' '+year
1553  timetup = time.strptime(str)
1554  t = time.mktime(timetup)
1555  return(size, t)
1556
1557# -----------------------------------------------------------------------------
1558
1559def checkdir(CD,verbose=None):
1560  """Check or create caching directory
1561
1562  USAGE:
1563    checkdir(CD,verbose):
1564
1565  ARGUMENTS:
1566    CD -- Directory
1567    verbose -- Flag verbose output (default: None)
1568
1569  DESCRIPTION:
1570    If CD does not exist it will be created if possible
1571  """
1572
1573  import os
1574  import os.path
1575
1576  if CD[-1] != os.sep: 
1577    CD = CD + os.sep  # Add separator for directories
1578
1579  CD = os.path.expanduser(CD) # Expand ~ or ~user in pathname
1580  if not (os.access(CD,os.R_OK and os.W_OK) or CD == ''):
1581    try:
1582      exitcode=os.mkdir(CD)
1583
1584      # Change access rights if possible
1585      #
1586      if unix:
1587        exitcode=os.system('chmod 777 '+CD)
1588      else:
1589        pass  # FIXME: What about acces rights under Windows?
1590      if verbose: print 'MESSAGE: Directory', CD, 'created.'
1591    except:
1592      print 'WARNING: Directory', CD, 'could not be created.'
1593      if unix:
1594        CD = '/tmp/'
1595      else:
1596        CD = 'C:' 
1597      print 'Using directory %s instead' %CD
1598
1599  return(CD)
1600
1601#==============================================================================
1602# Statistics
1603#==============================================================================
1604
1605def addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,
1606                 compression):
1607  """Add stats entry
1608
1609  USAGE:
1610    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compression)
1611
1612  DESCRIPTION:
1613    Make one entry in the stats file about one cache hit recording time saved
1614    and other statistics. The data are used by the function cachestat.
1615  """
1616
1617  import os, time
1618
1619  try:
1620    TimeTuple = time.localtime(time.time())
1621    extension = time.strftime('%b%Y',TimeTuple)
1622    SFN = CD+statsfile+'.'+extension
1623    #statfile = open(SFN,'a')
1624    (statfile, dummy) = myopen(SFN,'a',compression=0)
1625
1626    # Change access rights if possible
1627    #
1628    #if unix:
1629    #  try:
1630    #    exitcode=os.system('chmod 666 '+SFN)
1631    #  except:
1632    #    pass
1633  except:
1634    print 'Warning: Stat file could not be opened'
1635
1636  try:
1637    if os.environ.has_key('USER'):
1638      user = os.environ['USER']
1639    else:
1640      user = 'Nobody'
1641
1642    date = time.asctime(TimeTuple)
1643
1644    if Retrieved:
1645      hit = '1'
1646    else:
1647      hit = '0'
1648
1649    # Get size of result file
1650    #   
1651    if compression:
1652      stats = os.stat(CD+FN+'_'+file_types[0]+'.z')
1653    else:
1654      stats = os.stat(CD+FN+'_'+file_types[0])
1655 
1656    if stats: 
1657      size = stats[6]
1658    else:
1659      size = -1  # Error condition, but don't crash. This is just statistics 
1660
1661    # Build entry
1662   
1663    entry = date             + ',' +\
1664            user             + ',' +\
1665            FN               + ',' +\
1666            str(int(size))   + ',' +\
1667            str(compression) + ',' +\
1668            hit              + ',' +\
1669            str(reason)      + ',' +\
1670            str(round(comptime,4)) + ',' +\
1671            str(round(loadtime,4)) +\
1672            CR
1673           
1674    statfile.write(entry)
1675    statfile.close()
1676  except:
1677    print 'Warning: Writing of stat file failed'
1678
1679# -----------------------------------------------------------------------------
1680
1681# FIXME: should take cachedir as an optional arg
1682#
1683def __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1684  """  List caching statistics.
1685
1686  USAGE:
1687    __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1688
1689      Generate statistics of caching efficiency.
1690      The parameter sortidx determines by what field lists are sorted.
1691      If the optional keyword period is set to -1,
1692      all available caching history is used.
1693      If it is 0 only the current month is used.
1694      Future versions will include more than one month....
1695      OMN 20/8/2000
1696  """
1697
1698  import os
1699  import os.path
1700  from string import split, rstrip, find, atof, atoi
1701  from time import strptime, localtime, strftime, mktime, ctime
1702
1703  # sortidx = 4    # Index into Fields[1:]. What to sort by.
1704
1705  Fields = ['Name', 'Hits', 'Exec(s)', \
1706            'Cache(s)', 'Saved(s)', 'Gain(%)', 'Size']
1707  Widths = [25,7,9,9,9,9,13]
1708  #Types = ['s','d','d','d','d','.2f','d']
1709  Types = ['s','d','.2f','.2f','.2f','.2f','d'] 
1710
1711  Dictnames = ['Function', 'User']
1712
1713  if not cachedir:
1714    cachedir = checkdir(options['cachedir'])
1715
1716  SD = os.path.expanduser(cachedir)  # Expand ~ or ~user in pathname
1717
1718  if period == -1:  # Take all available stats
1719    SFILENAME = statsfile
1720  else:  # Only stats from current month 
1721       # MAKE THIS MORE GENERAL SO period > 0 counts several months backwards!
1722    TimeTuple = localtime(time())
1723    extension = strftime('%b%Y',TimeTuple)
1724    SFILENAME = statsfile+'.'+extension
1725
1726  DIRLIST = os.listdir(SD)
1727  SF = []
1728  for FN in DIRLIST:
1729    if find(FN,SFILENAME) >= 0:
1730      SF.append(FN)
1731
1732  blocksize = 15000000
1733  total_read = 0
1734  total_hits = 0
1735  total_discarded = 0
1736  firstday = mktime(strptime('2030','%Y'))
1737             # FIXME: strptime don't exist in WINDOWS ?
1738  lastday = 0
1739
1740  FuncDict = {}
1741  UserDict = {}
1742  for FN in SF:
1743    input = open(SD+FN,'r')
1744    print 'Reading file ', SD+FN
1745
1746    while 1:
1747      A = input.readlines(blocksize)
1748      if len(A) == 0: break
1749      total_read = total_read + len(A)
1750      for record in A:
1751        record = tuple(split(rstrip(record),','))
1752        #print record
1753
1754        if len(record) in [8,9]:
1755          n = 0
1756          timestamp = record[n]; n=n+1
1757       
1758          try:
1759            t = mktime(strptime(timestamp))
1760          except:
1761            total_discarded = total_discarded + 1         
1762            continue   
1763             
1764          if t > lastday:
1765            lastday = t
1766          if t < firstday:
1767            firstday = t
1768
1769          user     = record[n]; n=n+1
1770          func     = record[n]; n=n+1
1771
1772          # Strip hash-stamp off
1773          #
1774          i = find(func,'[')
1775          func = func[:i]
1776
1777          size        = atof(record[n]); n=n+1
1778          compression = atoi(record[n]); n=n+1
1779          hit         = atoi(record[n]); n=n+1
1780          reason      = atoi(record[n]); n=n+1   # Not used here   
1781          cputime     = atof(record[n]); n=n+1
1782          loadtime    = atof(record[n]); n=n+1
1783
1784          if hit:
1785            total_hits = total_hits + 1
1786            saving = cputime-loadtime
1787
1788            if cputime != 0:
1789              rel_saving = round(100.0*saving/cputime,2)
1790            else:
1791              #rel_saving = round(1.0*saving,2)
1792              rel_saving = 100.0 - round(1.0*saving,2)  # A bit of a hack
1793
1794            info = [1,cputime,loadtime,saving,rel_saving,size]
1795
1796            UpdateDict(UserDict,user,info)
1797            UpdateDict(FuncDict,func,info)
1798          else:
1799            pass #Stats on recomputations and their reasons could go in here
1800             
1801        else:
1802          #print 'Record discarded'
1803          #print record
1804          total_discarded = total_discarded + 1
1805
1806    input.close()
1807
1808  # Compute averages of all sums and write list
1809  #
1810
1811  if total_read == 0:
1812    printline(Widths,'=')
1813    print 'CACHING STATISTICS: No valid records read'
1814    printline(Widths,'=')
1815    return
1816
1817  print
1818  printline(Widths,'=')
1819  print 'CACHING STATISTICS: '+ctime(firstday)+' to '+ctime(lastday)
1820  printline(Widths,'=')
1821  #print '  Period:', ctime(firstday), 'to', ctime(lastday)
1822  print '  Total number of valid records', total_read
1823  print '  Total number of discarded records', total_discarded
1824  print '  Total number of hits', total_hits
1825  print
1826
1827  print '  Fields', Fields[2:], 'are averaged over number of hits'
1828  print '  Time is measured in seconds and size in bytes'
1829  print '  Tables are sorted by', Fields[1:][sortidx]
1830
1831  # printline(Widths,'-')
1832
1833  if showuser:
1834    Dictionaries = [FuncDict, UserDict]
1835  else:
1836    Dictionaries = [FuncDict]
1837
1838  i = 0
1839  for Dict in Dictionaries:
1840    for key in Dict.keys():
1841      rec = Dict[key]
1842      for n in range(len(rec)):
1843        if n > 0:
1844          rec[n] = round(1.0*rec[n]/rec[0],2)
1845      Dict[key] = rec
1846
1847    # Sort and output
1848    #
1849    keylist = SortDict(Dict,sortidx)
1850
1851    # Write Header
1852    #
1853    print
1854    #print Dictnames[i], 'statistics:'; i=i+1
1855    printline(Widths,'-')
1856    n = 0
1857    for s in Fields:
1858      if s == Fields[0]:  # Left justify
1859        s = Dictnames[i] + ' ' + s; i=i+1
1860        exec "print '%-" + str(Widths[n]) + "s'%s,"; n=n+1
1861      else:
1862        exec "print '%" + str(Widths[n]) + "s'%s,"; n=n+1
1863    print
1864    printline(Widths,'-')
1865
1866    # Output Values
1867    #
1868    for key in keylist:
1869      rec = Dict[key]
1870      n = 0
1871      if len(key) > Widths[n]: key = key[:Widths[n]-3] + '...'
1872      exec "print '%-" + str(Widths[n]) + Types[n]+"'%key,";n=n+1
1873      for val in rec:
1874        exec "print '%" + str(Widths[n]) + Types[n]+"'%val,"; n=n+1
1875      print
1876    print
1877
1878#==============================================================================
1879# Auxiliary stats functions
1880#==============================================================================
1881
1882def UpdateDict(Dict,key,info):
1883  """Update dictionary by adding new values to existing.
1884
1885  USAGE:
1886    UpdateDict(Dict,key,info)
1887  """
1888
1889  if Dict.has_key(key):
1890    dinfo = Dict[key]
1891    for n in range(len(dinfo)):
1892      dinfo[n] = info[n] + dinfo[n]
1893  else:
1894    dinfo = info[:]  # Make a copy of info list
1895
1896  Dict[key] = dinfo
1897  return Dict
1898
1899# -----------------------------------------------------------------------------
1900
1901def SortDict(Dict,sortidx=0):
1902  """Sort dictionary
1903
1904  USAGE:
1905    SortDict(Dict,sortidx):
1906
1907  DESCRIPTION:
1908    Sort dictionary of lists according field number 'sortidx'
1909  """
1910
1911  import types
1912
1913  sortlist  = []
1914  keylist = Dict.keys()
1915  for key in keylist:
1916    rec = Dict[key]
1917    if not type(rec) in [types.ListType, types.TupleType]:
1918      rec = [rec]
1919
1920    if sortidx > len(rec)-1:
1921      if options['verbose']:
1922        print 'ERROR: Sorting index to large, sortidx = ', sortidx
1923      raise IndexError
1924
1925    val = rec[sortidx]
1926    sortlist.append(val)
1927
1928  A = map(None,sortlist,keylist)
1929  A.sort()
1930  keylist = map(lambda x: x[1], A)  # keylist sorted by sortidx
1931
1932  return(keylist)
1933
1934# -----------------------------------------------------------------------------
1935
1936def printline(Widths,char):
1937  """Print textline in fixed field.
1938
1939  USAGE:
1940    printline(Widths,char)
1941  """
1942
1943  s = ''
1944  for n in range(len(Widths)):
1945    s = s+Widths[n]*char
1946    if n > 0:
1947      s = s+char
1948
1949  print s
1950
1951#==============================================================================
1952# Messages
1953#==============================================================================
1954
1955def msg1(funcname,args,kwargs,reason):
1956  """Message 1
1957
1958  USAGE:
1959    msg1(funcname,args,kwargs,reason):
1960  """
1961
1962  import string
1963  #print 'MESSAGE (caching.py): Evaluating function', funcname,
1964
1965  print_header_box('Evaluating function %s' %funcname)
1966 
1967  msg7(args,kwargs)
1968  msg8(reason) 
1969 
1970  print_footer()
1971 
1972  #
1973  # Old message
1974  #
1975  #args_present = 0
1976  #if args:
1977  #  if len(args) == 1:
1978  #    print 'with argument', mkargstr(args[0], textwidth2),
1979  #  else:
1980  #    print 'with arguments', mkargstr(args, textwidth2),
1981  #  args_present = 1     
1982  #   
1983  #if kwargs:
1984  #  if args_present:
1985  #    word = 'and'
1986  #  else:
1987  #    word = 'with'
1988  #     
1989  #  if len(kwargs) == 1:
1990  #    print word + ' keyword argument', mkargstr(kwargs, textwidth2)
1991  #  else:
1992  #    print word + ' keyword arguments', mkargstr(kwargs, textwidth2)
1993  #  args_present = 1           
1994  #else:
1995  #  print    # Newline when no keyword args present
1996  #       
1997  #if not args_present:   
1998  #  print '',  # Default if no args or kwargs present
1999   
2000   
2001
2002# -----------------------------------------------------------------------------
2003
2004def msg2(funcname,args,kwargs,comptime,reason):
2005  """Message 2
2006
2007  USAGE:
2008    msg2(funcname,args,kwargs,comptime,reason)
2009  """
2010
2011  import string
2012
2013  #try:
2014  #  R = Reason_msg[reason]
2015  #except:
2016  #  R = 'Unknown reason' 
2017 
2018  #print_header_box('Caching statistics (storing) - %s' %R)
2019  print_header_box('Caching statistics (storing)') 
2020 
2021  msg6(funcname,args,kwargs)
2022  msg8(reason)
2023
2024  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2025
2026# -----------------------------------------------------------------------------
2027
2028def msg3(savetime, CD, FN, deps,compression):
2029  """Message 3
2030
2031  USAGE:
2032    msg3(savetime, CD, FN, deps,compression)
2033  """
2034
2035  import string
2036  print string.ljust('| Loading time:', textwidth1) + str(round(savetime,2)) + \
2037                     ' seconds (estimated)'
2038  msg5(CD,FN,deps,compression)
2039
2040# -----------------------------------------------------------------------------
2041
2042def msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression):
2043  """Message 4
2044
2045  USAGE:
2046    msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression)
2047  """
2048
2049  import string
2050
2051  print_header_box('Caching statistics (retrieving)')
2052 
2053  msg6(funcname,args,kwargs)
2054  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2055  print string.ljust('| Loading time:', textwidth1) + str(round(loadtime,2)) + ' seconds'
2056  print string.ljust('| Time saved:', textwidth1) + str(round(comptime-loadtime,2)) + \
2057        ' seconds'
2058  msg5(CD,FN,deps,compression)
2059
2060# -----------------------------------------------------------------------------
2061
2062def msg5(CD,FN,deps,compression):
2063  """Message 5
2064
2065  USAGE:
2066    msg5(CD,FN,deps,compression)
2067
2068  DESCRIPTION:
2069   Print dependency stats. Used by msg3 and msg4
2070  """
2071
2072  import os, time, string
2073
2074  print '|'
2075  print string.ljust('| Caching dir: ', textwidth1) + CD
2076
2077  if compression:
2078    suffix = '.z'
2079    bytetext = 'bytes, compressed'
2080  else:
2081    suffix = ''
2082    bytetext = 'bytes'
2083
2084  for file_type in file_types:
2085    file_name = FN + '_' + file_type + suffix
2086    print string.ljust('| ' + file_type + ' file: ', textwidth1) + file_name,
2087    stats = os.stat(CD+file_name)
2088    print '('+ str(stats[6]) + ' ' + bytetext + ')'
2089
2090  print '|'
2091  if len(deps) > 0:
2092    print '| Dependencies:  '
2093    dependencies  = deps.keys()
2094    dlist = []; maxd = 0
2095    tlist = []; maxt = 0
2096    slist = []; maxs = 0
2097    for d in dependencies:
2098      stats = deps[d]
2099      t = time.ctime(stats[1])
2100      s = str(stats[0])
2101      #if s[-1] == 'L':
2102      #  s = s[:-1]  # Strip rightmost 'long integer' L off.
2103      #              # FIXME: Unnecessary in versions later than 1.5.2
2104
2105      if len(d) > maxd: maxd = len(d)
2106      if len(t) > maxt: maxt = len(t)
2107      if len(s) > maxs: maxs = len(s)
2108      dlist.append(d)
2109      tlist.append(t)
2110      slist.append(s)
2111
2112    for n in range(len(dlist)):
2113      d = string.ljust(dlist[n]+':', maxd+1)
2114      t = string.ljust(tlist[n], maxt)
2115      s = string.rjust(slist[n], maxs)
2116
2117      print '| ', d, t, ' ', s, 'bytes'
2118  else:
2119    print '| No dependencies'
2120  print_footer()
2121
2122# -----------------------------------------------------------------------------
2123
2124def msg6(funcname,args,kwargs):
2125  """Message 6
2126
2127  USAGE:
2128    msg6(funcname,args,kwargs)
2129  """
2130
2131  import string
2132  print string.ljust('| Function:', textwidth1) + funcname
2133
2134  msg7(args,kwargs)
2135 
2136# -----------------------------------------------------------------------------   
2137
2138def msg7(args,kwargs):
2139  """Message 7
2140 
2141  USAGE:
2142    msg7(args,kwargs):
2143  """
2144 
2145  import string
2146 
2147  args_present = 0 
2148  if args:
2149    if len(args) == 1:
2150      print string.ljust('| Argument:', textwidth1) + mkargstr(args[0], \
2151                         textwidth2)
2152    else:
2153      print string.ljust('| Arguments:', textwidth1) + \
2154            mkargstr(args, textwidth2)
2155    args_present = 1
2156           
2157  if kwargs:
2158    if len(kwargs) == 1:
2159      print string.ljust('| Keyword Arg:', textwidth1) + mkargstr(kwargs, \
2160                         textwidth2)
2161    else:
2162      print string.ljust('| Keyword Args:', textwidth1) + \
2163            mkargstr(kwargs, textwidth2)
2164    args_present = 1
2165
2166  if not args_present:               
2167    print '| No arguments' # Default if no args or kwargs present
2168
2169# -----------------------------------------------------------------------------
2170
2171def msg8(reason):
2172  """Message 8
2173 
2174  USAGE:
2175    msg8(reason):
2176  """
2177 
2178  import string
2179   
2180  try:
2181    R = Reason_msg[reason]
2182  except:
2183    R = 'Unknown' 
2184 
2185  print string.ljust('| Reason:', textwidth1) + R
2186   
2187# -----------------------------------------------------------------------------
2188
2189def print_header_box(line):
2190  """Print line in a nice box.
2191 
2192  USAGE:
2193    print_header_box(line)
2194
2195  """
2196  global textwidth3
2197
2198  import time
2199
2200  time_stamp = time.ctime(time.time())
2201  line = time_stamp + '. ' + line
2202   
2203  N = len(line) + 1
2204  s = '+' + '-'*N + CR
2205
2206  print s + '| ' + line + CR + s,
2207
2208  textwidth3 = N
2209
2210# -----------------------------------------------------------------------------
2211   
2212def print_footer():
2213  """Print line same width as that of print_header_box.
2214  """
2215 
2216  N = textwidth3
2217  s = '+' + '-'*N + CR   
2218     
2219  print s     
2220     
2221# -----------------------------------------------------------------------------
2222
2223def mkargstr(args, textwidth, argstr = ''):
2224  """ Generate a string containing first textwidth characters of arguments.
2225
2226  USAGE:
2227    mkargstr(args, textwidth, argstr = '')
2228
2229  DESCRIPTION:
2230    Exactly the same as str(args) possibly followed by truncation,
2231    but faster if args is huge.
2232  """
2233
2234  import types
2235
2236  WasTruncated = 0
2237
2238  if not type(args) in [types.TupleType, types.ListType, types.DictType]:
2239    if type(args) == types.StringType:
2240      argstr = argstr + "'"+str(args)+"'"
2241    else:
2242      argstr = argstr + str(args)
2243  else:
2244    if type(args) == types.DictType:
2245      argstr = argstr + "{"
2246      for key in args.keys():
2247        argstr = argstr + mkargstr(key, textwidth) + ": " + \
2248                 mkargstr(args[key], textwidth) + ", "
2249        if len(argstr) > textwidth:
2250          WasTruncated = 1
2251          break
2252      argstr = argstr[:-2]  # Strip off trailing comma     
2253      argstr = argstr + "}"
2254
2255    else:
2256      if type(args) == types.TupleType:
2257        lc = '('
2258        rc = ')'
2259      else:
2260        lc = '['
2261        rc = ']'
2262      argstr = argstr + lc
2263      for arg in args:
2264        argstr = argstr + mkargstr(arg, textwidth) + ', '
2265        if len(argstr) > textwidth:
2266          WasTruncated = 1
2267          break
2268
2269      # Strip off trailing comma and space unless singleton tuple
2270      #
2271      if type(args) == types.TupleType and len(args) == 1:
2272        argstr = argstr[:-1]   
2273      else:
2274        argstr = argstr[:-2]
2275      argstr = argstr + rc
2276
2277  if len(argstr) > textwidth:
2278    WasTruncated = 1
2279
2280  if WasTruncated:
2281    argstr = argstr[:textwidth]+'...'
2282  return(argstr)
2283
2284# -----------------------------------------------------------------------------
2285
2286def test_OK(msg):
2287  """Print OK msg if test is OK.
2288 
2289  USAGE
2290    test_OK(message)
2291  """
2292
2293  import string
2294   
2295  print string.ljust(msg, textwidth4) + ' - OK' 
2296 
2297  #raise StandardError
2298 
2299# -----------------------------------------------------------------------------
2300
2301def test_error(msg):
2302  """Print error if test fails.
2303 
2304  USAGE
2305    test_error(message)
2306  """
2307 
2308  print 'ERROR (caching.test): %s' %msg
2309  print 'Please send this code example and output to '
2310  print 'Ole.Nielsen@anu.edu.au'
2311  print
2312  print
2313 
2314  #import sys
2315  #sys.exit()
2316  raise StandardError
Note: See TracBrowser for help on using the repository browser.