source: inundation/caching/caching.py @ 1831

Last change on this file since 1831 was 1831, checked in by ole, 19 years ago

Moved selftest to unit test

File size: 65.7 KB
Line 
1# =============================================================================
2# caching.py - Supervised caching of function results.
3# Copyright (C) 1999, 2000, 2001, 2002 Ole Moller Nielsen
4# Australian National University (1999-2003)
5# Geoscience Australia (2003-present)
6#
7#    This program is free software; you can redistribute it and/or modify
8#    it under the terms of the GNU General Public License as published by
9#    the Free Software Foundation; either version 2 of the License, or
10#    (at your option) any later version.
11#
12#    This program is distributed in the hope that it will be useful,
13#    but WITHOUT ANY WARRANTY; without even the implied warranty of
14#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15#    GNU General Public License (http://www.gnu.org/copyleft/gpl.html)
16#    for more details.
17#
18#    You should have received a copy of the GNU General Public License
19#    along with this program; if not, write to the Free Software
20#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
21#
22#
23# Contact address: Ole.Nielsen@ga.gov.au
24#
25# Version 1.5.6 February 2002
26# =============================================================================
27 
28"""Module caching.py - Supervised caching of function results.
29
30Public functions:
31
32cache(func,args) -- Cache values returned from func given args.
33cachestat() --      Reports statistics about cache hits and time saved.
34test() --       Conducts a basic test of the caching functionality.
35
36See doc strings of individual functions for detailed documentation.
37"""
38
39# -----------------------------------------------------------------------------
40# Initialisation code
41
42# Determine platform
43#
44import os
45if os.name in ['nt', 'dos', 'win32', 'what else?']:
46  unix = 0
47else:
48  unix = 1
49
50# Make default caching directory name
51#
52if unix:
53  homedir = '~'
54  CR = '\n'
55else:
56  homedir = 'c:'
57  CR = '\r\n'  #FIXME: Not tested under windows
58 
59cachedir = homedir + os.sep + '.python_cache' + os.sep
60
61# -----------------------------------------------------------------------------
62# Options directory with default values - to be set by user
63#
64
65options = { 
66  'cachedir': cachedir,  # Default cache directory
67  'maxfiles': 1000000,   # Maximum number of cached files
68  'savestat': 1,         # Log caching info to stats file
69  'verbose': 1,          # Write messages to standard output
70  'bin': 1,              # Use binary format (more efficient)
71  'compression': 1,      # Use zlib compression
72  'bytecode': 0,         # Recompute if bytecode has changed
73  'expire': 0            # Automatically remove files that have been accessed
74                         # least recently
75}
76
77# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
78
79def set_option(key, value):
80  """Function to set values in the options directory.
81
82  USAGE:
83    set_option(key, value)
84
85  ARGUMENTS:
86    key --   Key in options dictionary. (Required)
87    value -- New value for key. (Required)
88
89  DESCRIPTION:
90    Function to set values in the options directory.
91    Raises an exception if key is not in options.
92  """
93
94  if options.has_key(key):
95    options[key] = value
96  else:
97    raise KeyError(key)  # Key not found, raise an exception
98
99# -----------------------------------------------------------------------------
100# Function cache - the main routine
101
102def cache(func, args=(), kwargs = {}, dependencies=None , cachedir=None,
103          verbose=None, compression=None, evaluate=0, test=0, clear=0,
104          return_filename=0):
105  """Supervised caching of function results.
106
107  USAGE:
108    result = cache(func, args, kwargs, dependencies, cachedir, verbose,
109                   compression, evaluate, test, return_filename)
110
111  ARGUMENTS:
112    func --            Function object (Required)
113    args --            Arguments to func (Default: ())
114    kwargs --          Keyword arguments to func (Default: {})   
115    dependencies --    Filenames that func depends on (Default: None)
116    cachedir --        Directory for cache files (Default: options['cachedir'])
117    verbose --         Flag verbose output to stdout
118                       (Default: options['verbose'])
119    compression --     Flag zlib compression (Default: options['compression'])
120    evaluate --        Flag forced evaluation of func (Default: 0)
121    test --            Flag test for cached results (Default: 0)
122    clear --           Flag delete cached results (Default: 0)   
123    return_filename -- Flag return of cache filename (Default: 0)   
124
125  DESCRIPTION:
126    A Python function call of the form
127
128      result = func(arg1,...,argn)
129
130    can be replaced by
131
132      from caching import cache
133      result = cache(func,(arg1,...,argn))
134
135  The latter form returns the same output as the former but reuses cached
136  results if the function has been computed previously in the same context.
137  'result' and the arguments can be simple types, tuples, list, dictionaries or
138  objects, but not unhashable types such as functions or open file objects.
139  The function 'func' may be a member function of an object or a module.
140
141  This type of caching is particularly useful for computationally intensive
142  functions with few frequently used combinations of input arguments. Note that
143  if the inputs or output are very large caching might not save time because
144  disc access may dominate the execution time.
145
146  If the function definition changes after a result has been cached it will be
147  detected by examining the functions bytecode (co_code, co_consts,
148  func_defualts, co_argcount) and it will be recomputed.
149
150  LIMITATIONS:
151    1 Caching uses the apply function and will work with anything that can be
152      pickled, so any limitation in apply or pickle extends to caching.
153    2 A function to be cached should not depend on global variables
154      as wrong results may occur if globals are changed after a result has
155      been cached.
156
157  -----------------------------------------------------------------------------
158  Additional functionality:
159
160  Keyword args
161    Keyword arguments (kwargs) can be added as a dictionary of keyword: value
162    pairs, following the syntax of the built-in function apply().
163    A Python function call of the form
164   
165      result = func(arg1,...,argn, kwarg1=val1,...,kwargm=valm)   
166
167    is then cached as follows
168
169      from caching import cache
170      result = cache(func,(arg1,...,argn), {kwarg1:val1,...,kwargm:valm})
171   
172    The default value of kwargs is {} 
173
174  Explicit dependencies:
175    The call
176      cache(func,(arg1,...,argn),dependencies = <list of filenames>)
177    Checks the size, creation time and modification time of each listed file.
178    If any file has changed the function is recomputed and the results stored
179    again.
180
181  Specify caching directory:
182    The call
183      cache(func,(arg1,...,argn), cachedir = <cachedir>)
184    designates <cachedir> where cached data are stored. Use ~ to indicate users
185    home directory - not $HOME. The default is ~/.python_cache on a UNIX
186    platform and c:/.python_cache on a Win platform.
187
188  Silent operation:
189    The call
190      cache(func,(arg1,...,argn),verbose=0)
191    suppresses messages to standard output.
192
193  Compression:
194    The call
195      cache(func,(arg1,...,argn),compression=0)
196    disables compression. (Default: compression=1). If the requested compressed
197    or uncompressed file is not there, it'll try the other version.
198
199  Forced evaluation:
200    The call
201      cache(func,(arg1,...,argn),evaluate=1)
202    forces the function to evaluate even though cached data may exist.
203
204  Testing for presence of cached result:
205    The call
206      cache(func,(arg1,...,argn),test=1)
207    retrieves cached result if it exists, otherwise None. The function will not
208    be evaluated. If both evaluate and test are switched on, evaluate takes
209    precedence.
210   
211  Obtain cache filenames:
212    The call   
213      cache(func,(arg1,...,argn),return_filename=1)
214    returns the hashed base filename under which this function and its
215    arguments would be cached
216
217  Clearing cached results:
218    The call
219      cache(func,'clear')
220    clears all cached data for 'func' and
221      cache('clear')
222    clears all cached data.
223 
224    NOTE: The string 'clear' can be passed an *argument* to func using
225      cache(func,('clear',)) or cache(func,tuple(['clear'])).
226
227    New form of clear:
228      cache(func,(arg1,...,argn),clear=1)
229    clears cached data for particular combination func and args
230     
231  """
232
233  # Imports and input checks
234  #
235  import types, time, string
236
237  if not cachedir:
238    cachedir = options['cachedir']
239
240  if verbose == None:  # Do NOT write 'if not verbose:', it could be zero.
241    verbose = options['verbose']
242
243  if compression == None:  # Do NOT write 'if not compression:',
244                           # it could be zero.
245    compression = options['compression']
246
247  # Create cache directory if needed
248  #
249  CD = checkdir(cachedir,verbose)
250
251  # Handle the case cache('clear')
252  #
253  if type(func) == types.StringType:
254    if string.lower(func) == 'clear':
255      clear_cache(CD,verbose=verbose)
256      return
257
258  # Handle the case cache(func, 'clear')
259  #
260  if type(args) == types.StringType:
261    if string.lower(args) == 'clear':
262      clear_cache(CD,func,verbose=verbose)
263      return
264
265  # Force singleton arg into a tuple
266  #
267  if type(args) != types.TupleType:
268    args = tuple([args])
269 
270  # Check that kwargs is a dictionary
271  #
272  if type(kwargs) != types.DictType:
273    raise TypeError   
274   
275  #print 'hashing' #FIXME: make faster hashing function
276   
277  # Hash arguments (and keyword args) to integer
278  #
279  arghash = myhash((args,kwargs))
280
281  # Get sizes and timestamps for files listed in dependencies.
282  # Force singletons into a tuple.
283  #
284  if dependencies and type(dependencies) != types.TupleType \
285                  and type(dependencies) != types.ListType:
286    dependencies = tuple([dependencies])
287  deps = get_depstats(dependencies)
288
289  # Extract function name from func object
290  #
291  funcname = get_funcname(func)
292
293  # Create cache filename
294  #
295  FN = funcname+'['+`arghash`+']'  # The symbol '(' does not work under unix
296
297  if return_filename:
298    return(FN)
299
300  if clear:
301    for file_type in file_types:
302      file_name = CD+FN+'_'+file_type
303      for fn in [file_name, file_name + '.z']:
304        if os.access(fn, os.F_OK):             
305          if unix:
306            os.remove(fn)
307          else:
308            # FIXME: os.remove doesn't work under windows       
309            os.system('del '+fn)
310          if verbose:
311            print 'MESSAGE (caching): File %s deleted' %fn
312        ##else:
313        ##  print '%s was not accessed' %fn
314    return None
315
316
317  #-------------------------------------------------------------------       
318 
319  # Check if previous computation has been cached
320  #
321  if evaluate:
322    Retrieved = None  # Force evaluation of func regardless of caching status.
323    reason = 4
324  else:
325    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \
326      CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
327                  dependencies)
328
329  if not Retrieved:
330    if test:  # Do not attempt to evaluate function
331      T = None
332    else:  # Evaluate function and save to cache
333      if verbose:
334        msg1(funcname, args, kwargs,reason)
335
336      # Remove expired files automatically
337      #
338      if options['expire']:
339        DeleteOldFiles(CD,verbose)
340       
341      # Save args before function is evaluated in case
342      # they are modified by function
343      #
344      save_args_to_cache(CD,FN,args,kwargs,compression)
345
346      # Execute and time function with supplied arguments
347      #
348      t0 = time.time()
349      T = apply(func,args,kwargs)
350      #comptime = round(time.time()-t0)
351      comptime = time.time()-t0
352
353      if verbose:
354        msg2(funcname,args,kwargs,comptime,reason)
355
356      # Save results and estimated loading time to cache
357      #
358      loadtime = save_results_to_cache(T, CD, FN, func, deps, comptime, \
359                                       funcname, dependencies, compression)
360      if verbose:
361        msg3(loadtime, CD, FN, deps, compression)
362      compressed = compression
363
364  if options['savestat'] and (not test or Retrieved):
365  ##if options['savestat']:
366    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compressed)
367
368  return(T)  # Return results in all cases
369
370# -----------------------------------------------------------------------------
371
372def cachestat(sortidx=4, period=-1, showuser=None, cachedir=None):
373  """Generate statistics of caching efficiency.
374
375  USAGE:
376    cachestat(sortidx, period, showuser, cachedir)
377
378  ARGUMENTS:
379    sortidx --  Index of field by which lists are (default: 4)
380                Legal values are
381                 0: 'Name'
382                 1: 'Hits'
383                 2: 'CPU'
384                 3: 'Time Saved'
385                 4: 'Gain(%)'
386                 5: 'Size'
387    period --   If set to -1 all available caching history is used.
388                If set 0 only the current month is used (default -1).
389    showuser -- Flag for additional table showing user statistics
390                (default: None).
391    cachedir -- Directory for cache files (default: options['cachedir']).
392
393  DESCRIPTION:
394    Logged caching statistics is converted into summaries of the form
395    --------------------------------------------------------------------------
396    Function Name   Hits   Exec(s)  Cache(s)  Saved(s)   Gain(%)      Size
397    --------------------------------------------------------------------------
398  """
399
400  __cachestat(sortidx, period, showuser, cachedir)
401  return
402
403# -----------------------------------------------------------------------------
404
405#FIXME - moving to unit test
406def test(cachedir=None,verbose=0,compression=None):
407  """Test the functionality of caching.
408
409  USAGE:
410    test(verbose)
411
412  ARGUMENTS:
413    verbose --     Flag whether caching will output its statistics (default=0)
414    cachedir --    Directory for cache files (Default: options['cachedir'])
415    compression -- Flag zlib compression (Default: options['compression'])
416  """
417   
418  import string, time
419
420  # Initialise
421  #
422  import caching
423  reload(caching)
424
425  if not cachedir:
426    cachedir = options['cachedir']
427
428  if verbose is None:  # Do NOT write 'if not verbose:', it could be zero.
429    verbose = options['verbose']
430 
431  if compression == None:  # Do NOT write 'if not compression:',
432                           # it could be zero.
433    compression = options['compression']
434  else:
435    try:
436      set_option('compression', compression)
437    except:
438      test_error('Set option failed')     
439
440  try:
441    import zlib
442  except:
443    print
444    print '*** Could not find zlib, default to no-compression      ***'
445    print '*** Installing zlib will improve performance of caching ***'
446    print
447    compression = 0       
448    set_option('compression', compression)   
449 
450  print 
451  print_header_box('Testing caching module - please stand by')
452  print   
453
454  # Define a test function to be cached
455  #
456  def f(a,b,c,N,x=0,y='abcdefg'):
457    """f(a,b,c,N)
458       Do something time consuming and produce a complex result.
459    """
460
461    import string
462
463    B = []
464    for n in range(N):
465      s = str(n+2.0/(n + 4.0))+'.a'*10
466      B.append((a,b,c,s,n,x,y))
467    return(B)
468   
469  # Check that default cachedir is OK
470  #     
471  CD = checkdir(cachedir,verbose)   
472   
473   
474  # Make a dependency file
475  #   
476  try:
477    DepFN = CD + 'testfile.tmp'
478    DepFN_wildcard = CD + 'test*.tmp'
479    Depfile = open(DepFN,'w')
480    Depfile.write('We are the knights who say NI!')
481    Depfile.close()
482    test_OK('Wrote file %s' %DepFN)
483  except:
484    test_error('Could not open file %s for writing - check your environment' \
485               % DepFN)
486
487  # Check set_option (and switch stats off
488  #   
489  try:
490    set_option('savestat',0)
491    assert(options['savestat'] == 0)
492    test_OK('Set option')
493  except:
494    test_error('Set option failed')   
495   
496  # Make some test input arguments
497  #
498  N = 5000  #Make N fairly small here
499
500  a = [1,2]
501  b = ('Thou shalt count the number three',4)
502  c = {'Five is right out': 6, (7,8): 9}
503  x = 3
504  y = 'holy hand granate'
505
506  # Test caching
507  #
508  if compression:
509    comprange = 2
510  else:
511    comprange = 1
512
513  for comp in range(comprange):
514 
515    # Evaluate and store
516    #
517    try:
518      T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, evaluate=1, \
519                         verbose=verbose, compression=comp)
520      if comp:                   
521        test_OK('Caching evaluation with compression')
522      else:     
523        test_OK('Caching evaluation without compression')     
524    except:
525      if comp:
526        test_error('Caching evaluation with compression failed - try caching.test(compression=0)')
527      else:
528        test_error('Caching evaluation failed - try caching.test(verbose=1)')
529
530    # Retrieve
531    #                           
532    try:                         
533      T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
534                         compression=comp) 
535
536      if comp:                   
537        test_OK('Caching retrieval with compression')
538      else:     
539        test_OK('Caching retrieval without compression')     
540    except:
541      if comp:
542        test_error('Caching retrieval with compression failed - try caching.test(compression=0)')
543      else:                                     
544        test_error('Caching retrieval failed - try caching.test(verbose=1)')
545
546    # Reference result
547    #   
548    T3 = f(a,b,c,N,x=x,y=y)  # Compute without caching
549   
550    if T1 == T2 and T2 == T3:
551      if comp:
552        test_OK('Basic caching functionality (with compression)')
553      else:
554        test_OK('Basic caching functionality (without compression)')
555    else:
556      test_error('Cached result does not match computed result')
557
558
559  # Test return_filename
560  #   
561  try:
562    FN = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
563                       return_filename=1)   
564    assert(FN[:2] == 'f[')
565    test_OK('Return of cache filename')
566  except:
567    test_error('Return of cache filename failed')
568
569  # Test existence of cachefiles
570 
571  try:
572    (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
573    (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
574    (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
575    test_OK('Presence of cache files')
576    datafile.close()
577    argsfile.close()
578    admfile.close()
579  except:
580    test_error('Expected cache files did not exist') 
581             
582  # Test 'test' function when cache is present
583  #     
584  try:
585    #T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
586    #                   evaluate=1) 
587    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
588    assert(T1 == T4)
589
590    test_OK("Option 'test' when cache file present")
591  except:
592    test_error("Option 'test' when cache file present failed")     
593
594  # Test that 'clear' works
595  #
596  #try:
597  #  caching.cache(f,'clear',verbose=verbose)
598  #  test_OK('Clearing of cache files')
599  #except:
600  #  test_error('Clear does not work')
601  try:
602    caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, clear=1)   
603    test_OK('Clearing of cache files')
604  except:
605    test_error('Clear does not work') 
606
607 
608
609  # Test 'test' function when cache is absent
610  #     
611  try:
612    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
613    assert(T4 is None)
614    test_OK("Option 'test' when cache absent")
615  except:
616    test_error("Option 'test' when cache absent failed")     
617         
618  # Test dependencies
619  #
620  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
621                       dependencies=DepFN) 
622  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
623                       dependencies=DepFN)                     
624                       
625  if T1 == T2:
626    test_OK('Basic dependencies functionality')
627  else:
628    test_error('Dependencies do not work')
629
630  # Test basic wildcard dependency
631  #
632  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
633                       dependencies=DepFN_wildcard)                     
634   
635  if T1 == T3:
636    test_OK('Basic dependencies with wildcard functionality')
637  else:
638    test_error('Dependencies with wildcards do not work')
639
640
641  # Test that changed timestamp in dependencies triggers recomputation
642 
643  # Modify dependency file
644  Depfile = open(DepFN,'a')
645  Depfile.write('You must cut down the mightiest tree in the forest with a Herring')
646  Depfile.close()
647 
648  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
649                       dependencies=DepFN, test = 1)                     
650 
651  if T3 is None:
652    test_OK('Changed dependencies recognised')
653  else:
654    test_error('Changed dependencies not recognised')   
655 
656  # Test recomputation when dependencies have changed
657  #
658  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
659                       dependencies=DepFN)                       
660  if T1 == T3:
661    test_OK('Recomputed value with changed dependencies')
662  else:
663    test_error('Recomputed value with changed dependencies failed')
664
665  # Performance test (with statistics)
666  # Don't really rely on this as it will depend on specific computer.
667  #
668
669  set_option('savestat',1)
670
671  N = 20*N   #Should be large on fast computers...
672  tt = time.time()
673  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
674  t1 = time.time() - tt
675 
676  tt = time.time()
677  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
678  t2 = time.time() - tt
679 
680  if T1 == T2:
681    if t1 > t2:
682      test_OK('Performance test: relative time saved = %s pct' \
683              %str(round((t1-t2)*100/t1,2)))
684    #else:
685    #  print 'WARNING: Performance a bit low - this could be specific to current platform'
686  else:       
687    test_error('Basic caching failed for new problem')
688           
689  # Test presence of statistics file
690  #
691  try: 
692    DIRLIST = os.listdir(CD)
693    SF = []
694    for FN in DIRLIST:
695      if string.find(FN,statsfile) >= 0:
696        fid = open(CD+FN,'r')
697        fid.close()
698    test_OK('Statistics files present') 
699  except:
700    test_OK('Statistics files cannot be opened')         
701     
702  print_header_box('Show sample output of the caching function:')
703 
704  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
705  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
706  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=1)
707 
708  print_header_box('Show sample output of cachestat():')
709  if unix:
710    cachestat()   
711  else:
712    try:
713      import time
714      t = time.strptime('2030','%Y')
715      cachestat()
716    except: 
717      print 'caching.cachestat() does not work here, because it'
718      print 'relies on time.strptime() which is unavailable in Windows'
719     
720  print
721  test_OK('Caching self test completed')   
722     
723           
724  # Test setoption (not yet implemented)
725  #
726
727 
728#==============================================================================
729# Auxiliary functions
730#==============================================================================
731
732# Import pickler
733# cPickle is used by functions mysave, myload, and compare
734#
735import cPickle  # 10 to 100 times faster than pickle
736pickler = cPickle
737
738# Local immutable constants
739#
740comp_level = 1              # Compression level for zlib.
741                            # comp_level = 1 works well.
742textwidth1 = 16             # Text width of key fields in report forms.
743textwidth2 = 132            # Maximal width of textual representation of
744                            # arguments.
745textwidth3 = 16             # Initial width of separation lines. Is modified.
746textwidth4 = 50             # Text width in test_OK()
747statsfile  = '.cache_stat'  # Basefilename for cached statistics.
748                            # It will reside in the chosen cache directory.
749
750file_types = ['Result',     # File name extension for cached function results.
751              'Args',       # File name extension for stored function args.
752              'Admin']      # File name extension for administrative info.
753
754Reason_msg = ['OK',         # Verbose reasons for recomputation
755              'No cached result', 
756              'Dependencies have changed', 
757              'Byte code or arguments have changed',
758              'Recomputation was requested by caller',
759              'Cached file was unreadable']             
760             
761# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
762
763def CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, 
764                dependencies):
765  """Determine whether cached result exists and return info.
766
767  USAGE:
768    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \ 
769    CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
770                dependencies)
771
772  INPUT ARGUMENTS:
773    CD --            Cache Directory
774    FN --            Suggested cache file name
775    func --          Function object
776    args --          Tuple of arguments
777    kwargs --        Dictionary of keyword arguments   
778    deps --          Dependencies time stamps
779    verbose --       Flag text output
780    compression --   Flag zlib compression
781    dependencies --  Given list of dependencies
782   
783  OUTPUT ARGUMENTS:
784    T --             Cached result if present otherwise None
785    FN --            File name under which new results must be saved
786    Retrieved --     True if a valid cached result was found
787    reason --        0: OK (if Retrieved),
788                     1: No cached result,
789                     2: Dependencies have changed,
790                     3: Arguments or Bytecode have changed
791                     4: Recomputation was forced
792    comptime --      Number of seconds it took to computed cachged result
793    loadtime --      Number of seconds it took to load cached result
794    compressed --    Flag (0,1) if cached results were compressed or not
795
796  DESCRIPTION:
797    Determine if cached result exists as follows:
798    Load in saved arguments and bytecode stored under hashed filename.
799    If they are identical to current arguments and bytecode and if dependencies
800    have not changed their time stamp, then return cached result.
801
802    Otherwise return filename under which new results should be cached.
803    Hash collisions are handled recursively by calling CacheLookup again with a
804    modified filename.
805  """
806
807  import time, string, types
808
809  # Assess whether cached result exists - compressed or not.
810  #
811  if verbose:
812    print 'Caching: looking for cached files %s_{%s,%s,%s}.z'\
813           %(CD+FN, file_types[0], file_types[1], file_types[2])
814  (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
815  (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
816  (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
817
818  if not (argsfile and datafile and admfile) or \
819     not (compressed0 == compressed1 and compressed0 == compressed2):
820    # Cached result does not exist or files were compressed differently
821    #
822    # This will ensure that evaluation will take place unless all files are
823    # present.
824
825    reason = 1
826    return(None,FN,None,reason,None,None,None) #Recompute using same filename
827
828  compressed = compressed0  # Remember if compressed files were actually used
829  datafile.close()
830
831  # Retrieve arguments and adm. info
832  #
833  R, reason = myload(argsfile,compressed)  # The original arguments
834  argsfile.close()
835   
836  ##if R == None and reason > 0:
837  if reason > 0:
838    return(None,FN,None,reason,None,None,None) #Recompute using same filename
839  else:   
840    (argsref, kwargsref) = R
841
842  R, reason = myload(admfile,compressed)
843  admfile.close() 
844  ##if R == None and reason > 0:
845  if reason > 0:
846    return(None,FN,None,reason,None,None,None) #Recompute using same filename
847
848 
849  depsref  = R[0]  # Dependency statistics
850  comptime = R[1]  # The computation time
851  coderef  = R[2]  # The byte code
852  funcname = R[3]  # The function name
853
854  # Check if dependencies have changed
855  #
856  if dependencies and not compare(depsref,deps):
857    if verbose:
858      print 'MESSAGE (caching.py): Dependencies', dependencies, \
859            'have changed - recomputing'
860    # Don't use cached file - recompute
861    reason = 2
862    return(None,FN,None,reason,None,None,None)
863
864  # Get bytecode from func
865  #
866  bytecode = get_bytecode(func)
867
868  #print compare(argsref,args),
869  #print compare(kwargsref,kwargs),
870  #print compare(bytecode,coderef)
871
872  # Check if arguments or bytecode have changed
873  #
874  if compare(argsref,args) and compare(kwargsref,kwargs) and \
875     (not options['bytecode'] or compare(bytecode,coderef)):
876
877    # Arguments and dependencies match. Get cached results
878    #
879    T, loadtime, compressed, reason = load_from_cache(CD,FN,compressed)
880    ###if T == None and reason > 0:  #This doesn't work if T is a numeric array
881    if reason > 0:
882      return(None,FN,None,reason,None,None,None) #Recompute using same FN
883
884    Retrieved = 1
885    reason = 0
886
887    if verbose:
888      msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compressed)
889
890      if loadtime >= comptime:
891        print 'WARNING (caching.py): Caching did not yield any gain.'
892        print '                      Consider executing function ',
893        print '('+funcname+') without caching.'
894  else:
895
896    # Non matching arguments or bytecodes signify a hash-collision.
897    # This is resolved by recursive search of cache filenames
898    # until either a matching or an unused filename is found.
899    #
900    (T,FN,Retrieved,reason,comptime,loadtime,compressed) = \
901       CacheLookup(CD,FN+'x',func,args,kwargs,deps,verbose,compression, \
902                   dependencies)
903
904    # DEBUGGING
905    # if not Retrieved:
906    #   print 'Arguments did not match'
907    # else:
908    #   print 'Match found !'
909    if not Retrieved:
910      reason = 3     #The real reason is that args or bytecodes have changed.
911                     #Not that the recursive seach has found an unused filename
912   
913  return((T, FN, Retrieved, reason, comptime, loadtime, compressed))
914
915# -----------------------------------------------------------------------------
916
917def clear_cache(CD,func=None, verbose=None):
918  """Clear cache for func.
919
920  USAGE:
921     clear(CD, func, verbose)
922
923  ARGUMENTS:
924     CD --       Caching directory (required)
925     func --     Function object (default: None)
926     verbose --  Flag verbose output (default: None)
927
928  DESCRIPTION:
929
930    If func == None, clear everything,
931    otherwise clear only files pertaining to func.
932  """
933
934  import os, re
935   
936  if CD[-1] != os.sep:
937    CD = CD+os.sep
938 
939  if verbose == None:
940    verbose = options['verbose']
941
942  # FIXME: Windows version needs to be tested
943
944  if func:
945    funcname = get_funcname(func)
946    if verbose:
947      print 'MESSAGE (caching.py): Clearing', CD+funcname+'*'
948
949    file_names = os.listdir(CD)
950    for file_name in file_names:
951      #RE = re.search('^' + funcname,file_name)  #Inefficient
952      #if RE:
953      if file_name[:len(funcname)] == funcname:
954        if unix:
955          os.remove(CD+file_name)
956        else:
957          os.system('del '+CD+file_name)
958          # FIXME: os.remove doesn't work under windows
959  else:
960    file_names = os.listdir(CD)
961    if len(file_names) > 0:
962      if verbose:
963        print 'MESSAGE (caching.py): Remove the following files:'
964        for file_name in file_names:
965            print file_name
966
967        A = raw_input('Delete (Y/N)[N] ?')
968      else:
969        A = 'Y' 
970       
971      if A == 'Y' or A == 'y':
972        for file_name in file_names:
973          if unix:
974            os.remove(CD+file_name)
975          else:
976            os.system('del '+CD+file_name)
977            # FIXME: os.remove doesn't work under windows
978          #exitcode=os.system('/bin/rm '+CD+'* 2> /dev/null')
979
980# -----------------------------------------------------------------------------
981
982def DeleteOldFiles(CD,verbose=None):
983  """Remove expired files
984
985  USAGE:
986    DeleteOldFiles(CD,verbose=None)
987  """
988
989  if verbose == None:
990    verbose = options['verbose']
991
992  maxfiles = options['maxfiles']
993
994  # FIXME: Windows version
995
996  import os
997  block = 1000  # How many files to delete per invokation
998  Files = os.listdir(CD)
999  numfiles = len(Files)
1000  if not unix: return  # FIXME: Windows case ?
1001
1002  if numfiles > maxfiles:
1003    delfiles = numfiles-maxfiles+block
1004    if verbose:
1005      print 'Deleting '+`delfiles`+' expired files:'
1006      os.system('ls -lur '+CD+'* | head -' + `delfiles`)            # List them
1007    os.system('ls -ur '+CD+'* | head -' + `delfiles` + ' | xargs /bin/rm')
1008                                                                  # Delete them
1009    # FIXME: Replace this with os.listdir and os.remove
1010
1011# -----------------------------------------------------------------------------
1012
1013def save_args_to_cache(CD,FN,args,kwargs,compression):
1014  """Save arguments to cache
1015
1016  USAGE:
1017    save_args_to_cache(CD,FN,args,kwargs,compression)
1018  """
1019
1020  import time, os, sys, types
1021
1022  (argsfile, compressed) = myopen(CD+FN+'_'+file_types[1], 'wb', compression)
1023
1024  if not argsfile:
1025    if verbose:
1026      print 'ERROR (caching): Could not open %s' %argsfile.name
1027    raise IOError
1028
1029  mysave((args,kwargs),argsfile,compression)  # Save args and kwargs to cache
1030  argsfile.close()
1031
1032  # Change access rights if possible
1033  #
1034  #if unix:
1035  #  try:
1036  #    exitcode=os.system('chmod 666 '+argsfile.name)
1037  #  except:
1038  #    pass
1039  #else:
1040  #  pass  # FIXME: Take care of access rights under Windows
1041
1042  return
1043
1044# -----------------------------------------------------------------------------
1045
1046def save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1047                          dependencies, compression):
1048  """Save computed results T and admin info to cache
1049
1050  USAGE:
1051    save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1052                          dependencies, compression)
1053  """
1054
1055  import time, os, sys, types
1056
1057  (datafile, compressed1) = myopen(CD+FN+'_'+file_types[0],'wb',compression)
1058  (admfile, compressed2) = myopen(CD+FN+'_'+file_types[2],'wb',compression)
1059
1060  if not datafile:
1061    if verbose:
1062      print 'ERROR (caching): Could not open %s' %datafile.name
1063    raise IOError
1064
1065  if not admfile:
1066    if verbose:
1067      print 'ERROR (caching): Could not open %s' %admfile.name
1068    raise IOError
1069
1070  t0 = time.time()
1071
1072  mysave(T,datafile,compression)  # Save data to cache
1073  datafile.close()
1074  #savetime = round(time.time()-t0,2)
1075  savetime = time.time()-t0 
1076
1077  bytecode = get_bytecode(func)  # Get bytecode from function object
1078  admtup = (deps, comptime, bytecode, funcname)  # Gather admin info
1079
1080  mysave(admtup,admfile,compression)  # Save admin info to cache
1081  admfile.close()
1082
1083  # Change access rights if possible
1084  #
1085  #if unix:
1086  #  try:
1087  #    exitcode=os.system('chmod 666 '+datafile.name)
1088  #    exitcode=os.system('chmod 666 '+admfile.name)
1089  #  except:
1090  #    pass
1091  #else:
1092  #  pass  # FIXME: Take care of access rights under Windows
1093
1094  return(savetime)
1095
1096# -----------------------------------------------------------------------------
1097
1098def load_from_cache(CD,FN,compression):
1099  """Load previously cached data from file FN
1100
1101  USAGE:
1102    load_from_cache(CD,FN,compression)
1103  """
1104
1105  import time
1106
1107  (datafile, compressed) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
1108  t0 = time.time()
1109  T, reason = myload(datafile,compressed)
1110  #loadtime = round(time.time()-t0,2)
1111  loadtime = time.time()-t0
1112  datafile.close() 
1113
1114  return T, loadtime, compressed, reason
1115
1116# -----------------------------------------------------------------------------
1117
1118def myopen(FN,mode,compression=1):
1119  """Open file FN using given mode
1120
1121  USAGE:
1122    myopen(FN,mode,compression=1)
1123
1124  ARGUMENTS:
1125    FN --           File name to be opened
1126    mode --         Open mode (as in open)
1127    compression --  Flag zlib compression
1128
1129  DESCRIPTION:
1130     if compression
1131       Attempt first to open FN + '.z'
1132       If this fails try to open FN
1133     else do the opposite
1134     Return file handle plus info about whether it was compressed or not.
1135  """
1136
1137  import string
1138
1139  # Determine if file exists already (if writing was requested)
1140  # This info is only used to determine if access modes should be set
1141  #
1142  if 'w' in mode or 'a' in mode:
1143    try:
1144      file = open(FN+'.z','r')
1145      file.close()
1146      new_file = 0
1147    except:
1148      try:
1149        file = open(FN,'r') 
1150        file.close()
1151        new_file = 0
1152      except:
1153        new_file = 1
1154  else:
1155    new_file = 0 #Assume it exists if mode was not 'w'
1156 
1157
1158  compressed = 0
1159  if compression:
1160    try:
1161      file = open(FN+'.z',mode)
1162      compressed = 1
1163    except:
1164      try:
1165        file = open(FN,mode)
1166      except:
1167        file = None
1168  else:
1169    try:
1170      file = open(FN,mode)
1171    except:
1172      try:
1173        file = open(FN+'.z',mode)
1174        compressed = 1
1175      except:
1176        file = None
1177
1178  # Now set access rights if it is a new file
1179  #
1180  if file and new_file:
1181    if unix:
1182      exitcode=os.system('chmod 666 '+file.name)
1183    else:
1184      pass  # FIXME: Take care of access rights under Windows
1185
1186  return(file,compressed)
1187
1188# -----------------------------------------------------------------------------
1189
1190def myload(file, compressed):
1191  """Load data from file
1192
1193  USAGE:
1194    myload(file, compressed)
1195  """
1196
1197  reason = 0
1198  try:
1199    if compressed:
1200      import zlib
1201
1202      RsC = file.read()
1203      try:
1204        Rs  = zlib.decompress(RsC)
1205      except:
1206        #  File "./caching.py", line 1032, in load_from_cache
1207        #  T = myload(datafile,compressed)
1208        #  File "./caching.py", line 1124, in myload
1209        #  Rs  = zlib.decompress(RsC)
1210        #  zlib.error: Error -5 while decompressing data
1211        #print 'ERROR (caching): Could not decompress ', file.name
1212        #raise Exception
1213        reason = 5  #(Unreadable file)
1214        return None, reason 
1215     
1216     
1217      del RsC  # Free up some space
1218      R   = pickler.loads(Rs)
1219    else:
1220      R = pickler.load(file)
1221  except MemoryError:
1222    import sys
1223    if options['verbose']:
1224      print 'ERROR (caching): Out of memory while loading %s, aborting' \
1225            %(file.name)
1226
1227    # Raise the error again for now
1228    #
1229    raise MemoryError
1230
1231  return R, reason
1232
1233# -----------------------------------------------------------------------------
1234
1235def mysave(T,file,compression):
1236  """Save data T to file
1237
1238  USAGE:
1239    mysave(T,file,compression)
1240
1241  """
1242
1243  bin = options['bin']
1244
1245  if compression:
1246    try:
1247      import zlib
1248    except:
1249      print
1250      print '*** Could not find zlib ***'
1251      print '*** Try to run caching with compression off ***'
1252      print "*** caching.set_option('compression', 0) ***"
1253      raise Exception
1254     
1255
1256    Ts  = pickler.dumps(T,bin)
1257    TsC = zlib.compress(Ts,comp_level)
1258    file.write(TsC)
1259  else:
1260    pickler.dump(T,file,bin)
1261
1262# -----------------------------------------------------------------------------
1263
1264def myhash(T):
1265  """Compute hashed integer from hashable values of tuple T
1266
1267  USAGE:
1268    myhash(T)
1269
1270  ARGUMENTS:
1271    T -- Tuple
1272  """
1273
1274  import types
1275
1276  # Get hash vals for hashable entries
1277  #
1278  if type(T) == types.TupleType or type(T) == types.ListType:
1279    hvals = []
1280    for k in range(len(T)):
1281      h = myhash(T[k])
1282      hvals.append(h)
1283    val = hash(tuple(hvals))
1284  elif type(T) == types.DictType:
1285    val = dicthash(T)
1286  else:
1287    try:
1288      val = hash(T)
1289    except:
1290      val = 1
1291      try:
1292        import Numeric
1293        if type(T) == Numeric.ArrayType:
1294          hvals = []       
1295          for e in T:
1296            h = myhash(e)
1297            hvals.append(h)         
1298          val = hash(tuple(hvals))
1299        else:
1300          val = 1  #Could implement other Numeric types here
1301      except:   
1302        pass
1303
1304  return(val)
1305
1306# -----------------------------------------------------------------------------
1307
1308def dicthash(D):
1309  """Compute hashed integer from hashable values of dictionary D
1310
1311  USAGE:
1312    dicthash(D)
1313  """
1314
1315  keys = D.keys()
1316
1317  # Get hash values for hashable entries
1318  #
1319  hvals = []
1320  for k in range(len(keys)):
1321    try:
1322      h = hash(D[keys[k]])
1323      hvals.append(h)
1324    except:
1325      pass
1326
1327  # Hash obtained values into one value
1328  #
1329  return(hash(tuple(hvals)))
1330
1331# -----------------------------------------------------------------------------
1332
1333def compare(A,B):
1334  """Safe comparison of general objects
1335
1336  USAGE:
1337    compare(A,B)
1338
1339  DESCRIPTION:
1340    Return 1 if A and B they are identical, 0 otherwise
1341  """
1342
1343  try:
1344    identical = (A == B)
1345  except:
1346    try:
1347      identical = (pickler.dumps(A) == pickler.dumps(B))
1348    except:
1349      identical = 0
1350
1351  return(identical)
1352
1353# -----------------------------------------------------------------------------
1354
1355def nospace(s):
1356  """Replace spaces in string s with underscores
1357
1358  USAGE:
1359    nospace(s)
1360
1361  ARGUMENTS:
1362    s -- string
1363  """
1364
1365  import string
1366
1367  newstr = ''
1368  for i in range(len(s)):
1369    if s[i] == ' ':
1370      newstr = newstr+'_'
1371    else:
1372      newstr = newstr+s[i]
1373
1374  return(newstr)
1375
1376# -----------------------------------------------------------------------------
1377
1378def get_funcname(func):
1379  """Retrieve name of function object func (depending on its type)
1380
1381  USAGE:
1382    get_funcname(func)
1383  """
1384
1385  import types, string
1386
1387  if type(func) == types.FunctionType:
1388    funcname = func.func_name
1389  elif type(func) == types.BuiltinFunctionType:
1390    funcname = func.__name__
1391  else:
1392    tab = string.maketrans("<>'","   ")
1393    tmp = string.translate(`func`,tab)
1394    tmp = string.split(tmp)
1395    funcname = string.join(tmp)
1396
1397  funcname = nospace(funcname)
1398  return(funcname)
1399
1400# -----------------------------------------------------------------------------
1401
1402def get_bytecode(func):
1403  """ Get bytecode from function object.
1404
1405  USAGE:
1406    get_bytecode(func)
1407  """
1408
1409  import types
1410
1411  if type(func) == types.FunctionType:
1412    bytecode = func.func_code.co_code
1413    consts = func.func_code.co_consts
1414    argcount = func.func_code.co_argcount   
1415    defaults = func.func_defaults     
1416  elif type(func) == types.MethodType:
1417    bytecode = func.im_func.func_code.co_code
1418    consts =  func.im_func.func_code.co_consts
1419    argcount =  func.im_func.func_code.co_argcount   
1420    defaults = func.im_func.func_defaults         
1421  else:
1422    #raise Exception  #Test only
1423    bytecode = None   #Built-in functions are assumed not to change
1424    consts = 0
1425    argcount = 0
1426    defaults = 0
1427
1428  return (bytecode, consts, argcount, defaults)
1429
1430# -----------------------------------------------------------------------------
1431
1432def get_depstats(dependencies):
1433  """ Build dictionary of dependency files and their size, mod. time and ctime.
1434
1435  USAGE:
1436    get_depstats(dependencies):
1437  """
1438
1439  import types
1440
1441  d = {}
1442  if dependencies:
1443
1444    #Expand any wildcards
1445    import glob
1446    expanded_dependencies = []
1447    for FN in dependencies:
1448      expanded_FN = glob.glob(FN)
1449     
1450      expanded_dependencies += expanded_FN
1451
1452   
1453    for FN in expanded_dependencies:
1454      if not type(FN) == types.StringType:
1455        errmsg = 'ERROR (caching.py): Dependency must be a string.\n'
1456        errmsg += '                    Dependency given: %s' %FN
1457        raise Exception, errmsg     
1458      if not os.access(FN,os.F_OK):
1459        errmsg = 'ERROR (caching.py): Dependency '+FN+' does not exist.'
1460        raise Exception, errmsg
1461      (size,atime,mtime,ctime) = filestat(FN)
1462
1463      # We don't use atime because that would cause recomputation every time.
1464      # We don't use ctime because that is irrelevant and confusing for users.
1465      d.update({FN : (size,mtime)})
1466
1467  return(d)
1468
1469# -----------------------------------------------------------------------------
1470
1471def filestat(FN):
1472  """A safe wrapper using os.stat to get basic file statistics
1473     The built-in os.stat breaks down if file sizes are too large (> 2GB ?)
1474
1475  USAGE:
1476    filestat(FN)
1477
1478  DESCRIPTION:
1479     Must compile Python with
1480     CFLAGS="`getconf LFS_CFLAGS`" OPT="-g -O2 $CFLAGS" \
1481              configure
1482     as given in section 8.1.1 Large File Support in the Libray Reference
1483  """
1484
1485  import os, time
1486
1487  try:
1488    stats = os.stat(FN)
1489    size  = stats[6]
1490    atime = stats[7]
1491    mtime = stats[8]
1492    ctime = stats[9]
1493  except:
1494
1495    # Hack to get the results anyway (works only on Unix at the moment)
1496    #
1497    print 'Hack to get os.stat when files are too large'
1498
1499    if unix:
1500      tmp = '/tmp/cach.tmp.'+`time.time()`+`os.getpid()`
1501      # Unique filename, FIXME: Use random number
1502
1503      # Get size and access time (atime)
1504      #
1505      exitcode=os.system('ls -l --full-time --time=atime '+FN+' > '+tmp)
1506      (size,atime) = get_lsline(tmp)
1507
1508      # Get size and modification time (mtime)
1509      #
1510      exitcode=os.system('ls -l --full-time '+FN+' > '+tmp)
1511      (size,mtime) = get_lsline(tmp)
1512
1513      # Get size and ctime
1514      #
1515      exitcode=os.system('ls -l --full-time --time=ctime '+FN+' > '+tmp)
1516      (size,ctime) = get_lsline(tmp)
1517
1518      try:
1519        exitcode=os.system('rm '+tmp)
1520        # FIXME: Gives error if file doesn't exist
1521      except:
1522        pass
1523    else:
1524      pass
1525      raise Exception  # FIXME: Windows case
1526
1527  return(long(size),atime,mtime,ctime)
1528
1529# -----------------------------------------------------------------------------
1530
1531def get_lsline(FN):
1532  """get size and time for filename
1533
1534  USAGE:
1535    get_lsline(file_name)
1536
1537  DESCRIPTION:
1538    Read in one line 'ls -la' item from file (generated by filestat) and
1539    convert time to seconds since epoch. Return file size and time.
1540  """
1541
1542  import string, time
1543
1544  f = open(FN,'r')
1545  info = f.read()
1546  info = string.split(info)
1547
1548  size = info[4]
1549  week = info[5]
1550  mon  = info[6]
1551  day  = info[7]
1552  hour = info[8]
1553  year = info[9]
1554
1555  str = week+' '+mon+' '+day+' '+hour+' '+year
1556  timetup = time.strptime(str)
1557  t = time.mktime(timetup)
1558  return(size, t)
1559
1560# -----------------------------------------------------------------------------
1561
1562def checkdir(CD,verbose=None):
1563  """Check or create caching directory
1564
1565  USAGE:
1566    checkdir(CD,verbose):
1567
1568  ARGUMENTS:
1569    CD -- Directory
1570    verbose -- Flag verbose output (default: None)
1571
1572  DESCRIPTION:
1573    If CD does not exist it will be created if possible
1574  """
1575
1576  import os
1577  import os.path
1578
1579  if CD[-1] != os.sep: 
1580    CD = CD + os.sep  # Add separator for directories
1581
1582  CD = os.path.expanduser(CD) # Expand ~ or ~user in pathname
1583  if not (os.access(CD,os.R_OK and os.W_OK) or CD == ''):
1584    try:
1585      exitcode=os.mkdir(CD)
1586
1587      # Change access rights if possible
1588      #
1589      if unix:
1590        exitcode=os.system('chmod 777 '+CD)
1591      else:
1592        pass  # FIXME: What about acces rights under Windows?
1593      if verbose: print 'MESSAGE: Directory', CD, 'created.'
1594    except:
1595      print 'WARNING: Directory', CD, 'could not be created.'
1596      if unix:
1597        CD = '/tmp/'
1598      else:
1599        CD = 'C:' 
1600      print 'Using directory %s instead' %CD
1601
1602  return(CD)
1603
1604#==============================================================================
1605# Statistics
1606#==============================================================================
1607
1608def addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,
1609                 compression):
1610  """Add stats entry
1611
1612  USAGE:
1613    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compression)
1614
1615  DESCRIPTION:
1616    Make one entry in the stats file about one cache hit recording time saved
1617    and other statistics. The data are used by the function cachestat.
1618  """
1619
1620  import os, time
1621
1622  try:
1623    TimeTuple = time.localtime(time.time())
1624    extension = time.strftime('%b%Y',TimeTuple)
1625    SFN = CD+statsfile+'.'+extension
1626    #statfile = open(SFN,'a')
1627    (statfile, dummy) = myopen(SFN,'a',compression=0)
1628
1629    # Change access rights if possible
1630    #
1631    #if unix:
1632    #  try:
1633    #    exitcode=os.system('chmod 666 '+SFN)
1634    #  except:
1635    #    pass
1636  except:
1637    print 'Warning: Stat file could not be opened'
1638
1639  try:
1640    if os.environ.has_key('USER'):
1641      user = os.environ['USER']
1642    else:
1643      user = 'Nobody'
1644
1645    date = time.asctime(TimeTuple)
1646
1647    if Retrieved:
1648      hit = '1'
1649    else:
1650      hit = '0'
1651
1652    # Get size of result file
1653    #   
1654    if compression:
1655      stats = os.stat(CD+FN+'_'+file_types[0]+'.z')
1656    else:
1657      stats = os.stat(CD+FN+'_'+file_types[0])
1658 
1659    if stats: 
1660      size = stats[6]
1661    else:
1662      size = -1  # Error condition, but don't crash. This is just statistics 
1663
1664    # Build entry
1665   
1666    entry = date             + ',' +\
1667            user             + ',' +\
1668            FN               + ',' +\
1669            str(int(size))   + ',' +\
1670            str(compression) + ',' +\
1671            hit              + ',' +\
1672            str(reason)      + ',' +\
1673            str(round(comptime,4)) + ',' +\
1674            str(round(loadtime,4)) +\
1675            CR
1676           
1677    statfile.write(entry)
1678    statfile.close()
1679  except:
1680    print 'Warning: Writing of stat file failed'
1681
1682# -----------------------------------------------------------------------------
1683
1684# FIXME: should take cachedir as an optional arg
1685#
1686def __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1687  """  List caching statistics.
1688
1689  USAGE:
1690    __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1691
1692      Generate statistics of caching efficiency.
1693      The parameter sortidx determines by what field lists are sorted.
1694      If the optional keyword period is set to -1,
1695      all available caching history is used.
1696      If it is 0 only the current month is used.
1697      Future versions will include more than one month....
1698      OMN 20/8/2000
1699  """
1700
1701  import os
1702  import os.path
1703  from string import split, rstrip, find, atof, atoi
1704  from time import strptime, localtime, strftime, mktime, ctime
1705
1706  # sortidx = 4    # Index into Fields[1:]. What to sort by.
1707
1708  Fields = ['Name', 'Hits', 'Exec(s)', \
1709            'Cache(s)', 'Saved(s)', 'Gain(%)', 'Size']
1710  Widths = [25,7,9,9,9,9,13]
1711  #Types = ['s','d','d','d','d','.2f','d']
1712  Types = ['s','d','.2f','.2f','.2f','.2f','d'] 
1713
1714  Dictnames = ['Function', 'User']
1715
1716  if not cachedir:
1717    cachedir = checkdir(options['cachedir'])
1718
1719  SD = os.path.expanduser(cachedir)  # Expand ~ or ~user in pathname
1720
1721  if period == -1:  # Take all available stats
1722    SFILENAME = statsfile
1723  else:  # Only stats from current month 
1724       # MAKE THIS MORE GENERAL SO period > 0 counts several months backwards!
1725    TimeTuple = localtime(time())
1726    extension = strftime('%b%Y',TimeTuple)
1727    SFILENAME = statsfile+'.'+extension
1728
1729  DIRLIST = os.listdir(SD)
1730  SF = []
1731  for FN in DIRLIST:
1732    if find(FN,SFILENAME) >= 0:
1733      SF.append(FN)
1734
1735  blocksize = 15000000
1736  total_read = 0
1737  total_hits = 0
1738  total_discarded = 0
1739  firstday = mktime(strptime('2030','%Y'))
1740             # FIXME: strptime don't exist in WINDOWS ?
1741  lastday = 0
1742
1743  FuncDict = {}
1744  UserDict = {}
1745  for FN in SF:
1746    input = open(SD+FN,'r')
1747    print 'Reading file ', SD+FN
1748
1749    while 1:
1750      A = input.readlines(blocksize)
1751      if len(A) == 0: break
1752      total_read = total_read + len(A)
1753      for record in A:
1754        record = tuple(split(rstrip(record),','))
1755        #print record
1756
1757        if len(record) in [8,9]:
1758          n = 0
1759          timestamp = record[n]; n=n+1
1760       
1761          try:
1762            t = mktime(strptime(timestamp))
1763          except:
1764            total_discarded = total_discarded + 1         
1765            continue   
1766             
1767          if t > lastday:
1768            lastday = t
1769          if t < firstday:
1770            firstday = t
1771
1772          user     = record[n]; n=n+1
1773          func     = record[n]; n=n+1
1774
1775          # Strip hash-stamp off
1776          #
1777          i = find(func,'[')
1778          func = func[:i]
1779
1780          size        = atof(record[n]); n=n+1
1781          compression = atoi(record[n]); n=n+1
1782          hit         = atoi(record[n]); n=n+1
1783          reason      = atoi(record[n]); n=n+1   # Not used here   
1784          cputime     = atof(record[n]); n=n+1
1785          loadtime    = atof(record[n]); n=n+1
1786
1787          if hit:
1788            total_hits = total_hits + 1
1789            saving = cputime-loadtime
1790
1791            if cputime != 0:
1792              rel_saving = round(100.0*saving/cputime,2)
1793            else:
1794              #rel_saving = round(1.0*saving,2)
1795              rel_saving = 100.0 - round(1.0*saving,2)  # A bit of a hack
1796
1797            info = [1,cputime,loadtime,saving,rel_saving,size]
1798
1799            UpdateDict(UserDict,user,info)
1800            UpdateDict(FuncDict,func,info)
1801          else:
1802            pass #Stats on recomputations and their reasons could go in here
1803             
1804        else:
1805          #print 'Record discarded'
1806          #print record
1807          total_discarded = total_discarded + 1
1808
1809    input.close()
1810
1811  # Compute averages of all sums and write list
1812  #
1813
1814  if total_read == 0:
1815    printline(Widths,'=')
1816    print 'CACHING STATISTICS: No valid records read'
1817    printline(Widths,'=')
1818    return
1819
1820  print
1821  printline(Widths,'=')
1822  print 'CACHING STATISTICS: '+ctime(firstday)+' to '+ctime(lastday)
1823  printline(Widths,'=')
1824  #print '  Period:', ctime(firstday), 'to', ctime(lastday)
1825  print '  Total number of valid records', total_read
1826  print '  Total number of discarded records', total_discarded
1827  print '  Total number of hits', total_hits
1828  print
1829
1830  print '  Fields', Fields[2:], 'are averaged over number of hits'
1831  print '  Time is measured in seconds and size in bytes'
1832  print '  Tables are sorted by', Fields[1:][sortidx]
1833
1834  # printline(Widths,'-')
1835
1836  if showuser:
1837    Dictionaries = [FuncDict, UserDict]
1838  else:
1839    Dictionaries = [FuncDict]
1840
1841  i = 0
1842  for Dict in Dictionaries:
1843    for key in Dict.keys():
1844      rec = Dict[key]
1845      for n in range(len(rec)):
1846        if n > 0:
1847          rec[n] = round(1.0*rec[n]/rec[0],2)
1848      Dict[key] = rec
1849
1850    # Sort and output
1851    #
1852    keylist = SortDict(Dict,sortidx)
1853
1854    # Write Header
1855    #
1856    print
1857    #print Dictnames[i], 'statistics:'; i=i+1
1858    printline(Widths,'-')
1859    n = 0
1860    for s in Fields:
1861      if s == Fields[0]:  # Left justify
1862        s = Dictnames[i] + ' ' + s; i=i+1
1863        exec "print '%-" + str(Widths[n]) + "s'%s,"; n=n+1
1864      else:
1865        exec "print '%" + str(Widths[n]) + "s'%s,"; n=n+1
1866    print
1867    printline(Widths,'-')
1868
1869    # Output Values
1870    #
1871    for key in keylist:
1872      rec = Dict[key]
1873      n = 0
1874      if len(key) > Widths[n]: key = key[:Widths[n]-3] + '...'
1875      exec "print '%-" + str(Widths[n]) + Types[n]+"'%key,";n=n+1
1876      for val in rec:
1877        exec "print '%" + str(Widths[n]) + Types[n]+"'%val,"; n=n+1
1878      print
1879    print
1880
1881#==============================================================================
1882# Auxiliary stats functions
1883#==============================================================================
1884
1885def UpdateDict(Dict,key,info):
1886  """Update dictionary by adding new values to existing.
1887
1888  USAGE:
1889    UpdateDict(Dict,key,info)
1890  """
1891
1892  if Dict.has_key(key):
1893    dinfo = Dict[key]
1894    for n in range(len(dinfo)):
1895      dinfo[n] = info[n] + dinfo[n]
1896  else:
1897    dinfo = info[:]  # Make a copy of info list
1898
1899  Dict[key] = dinfo
1900  return Dict
1901
1902# -----------------------------------------------------------------------------
1903
1904def SortDict(Dict,sortidx=0):
1905  """Sort dictionary
1906
1907  USAGE:
1908    SortDict(Dict,sortidx):
1909
1910  DESCRIPTION:
1911    Sort dictionary of lists according field number 'sortidx'
1912  """
1913
1914  import types
1915
1916  sortlist  = []
1917  keylist = Dict.keys()
1918  for key in keylist:
1919    rec = Dict[key]
1920    if not type(rec) in [types.ListType, types.TupleType]:
1921      rec = [rec]
1922
1923    if sortidx > len(rec)-1:
1924      if options['verbose']:
1925        print 'ERROR: Sorting index to large, sortidx = ', sortidx
1926      raise IndexError
1927
1928    val = rec[sortidx]
1929    sortlist.append(val)
1930
1931  A = map(None,sortlist,keylist)
1932  A.sort()
1933  keylist = map(lambda x: x[1], A)  # keylist sorted by sortidx
1934
1935  return(keylist)
1936
1937# -----------------------------------------------------------------------------
1938
1939def printline(Widths,char):
1940  """Print textline in fixed field.
1941
1942  USAGE:
1943    printline(Widths,char)
1944  """
1945
1946  s = ''
1947  for n in range(len(Widths)):
1948    s = s+Widths[n]*char
1949    if n > 0:
1950      s = s+char
1951
1952  print s
1953
1954#==============================================================================
1955# Messages
1956#==============================================================================
1957
1958def msg1(funcname,args,kwargs,reason):
1959  """Message 1
1960
1961  USAGE:
1962    msg1(funcname,args,kwargs,reason):
1963  """
1964
1965  import string
1966  #print 'MESSAGE (caching.py): Evaluating function', funcname,
1967
1968  print_header_box('Evaluating function %s' %funcname)
1969 
1970  msg7(args,kwargs)
1971  msg8(reason) 
1972 
1973  print_footer()
1974 
1975  #
1976  # Old message
1977  #
1978  #args_present = 0
1979  #if args:
1980  #  if len(args) == 1:
1981  #    print 'with argument', mkargstr(args[0], textwidth2),
1982  #  else:
1983  #    print 'with arguments', mkargstr(args, textwidth2),
1984  #  args_present = 1     
1985  #   
1986  #if kwargs:
1987  #  if args_present:
1988  #    word = 'and'
1989  #  else:
1990  #    word = 'with'
1991  #     
1992  #  if len(kwargs) == 1:
1993  #    print word + ' keyword argument', mkargstr(kwargs, textwidth2)
1994  #  else:
1995  #    print word + ' keyword arguments', mkargstr(kwargs, textwidth2)
1996  #  args_present = 1           
1997  #else:
1998  #  print    # Newline when no keyword args present
1999  #       
2000  #if not args_present:   
2001  #  print '',  # Default if no args or kwargs present
2002   
2003   
2004
2005# -----------------------------------------------------------------------------
2006
2007def msg2(funcname,args,kwargs,comptime,reason):
2008  """Message 2
2009
2010  USAGE:
2011    msg2(funcname,args,kwargs,comptime,reason)
2012  """
2013
2014  import string
2015
2016  #try:
2017  #  R = Reason_msg[reason]
2018  #except:
2019  #  R = 'Unknown reason' 
2020 
2021  #print_header_box('Caching statistics (storing) - %s' %R)
2022  print_header_box('Caching statistics (storing)') 
2023 
2024  msg6(funcname,args,kwargs)
2025  msg8(reason)
2026
2027  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2028
2029# -----------------------------------------------------------------------------
2030
2031def msg3(savetime, CD, FN, deps,compression):
2032  """Message 3
2033
2034  USAGE:
2035    msg3(savetime, CD, FN, deps,compression)
2036  """
2037
2038  import string
2039  print string.ljust('| Loading time:', textwidth1) + str(round(savetime,2)) + \
2040                     ' seconds (estimated)'
2041  msg5(CD,FN,deps,compression)
2042
2043# -----------------------------------------------------------------------------
2044
2045def msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression):
2046  """Message 4
2047
2048  USAGE:
2049    msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression)
2050  """
2051
2052  import string
2053
2054  print_header_box('Caching statistics (retrieving)')
2055 
2056  msg6(funcname,args,kwargs)
2057  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2058  print string.ljust('| Loading time:', textwidth1) + str(round(loadtime,2)) + ' seconds'
2059  print string.ljust('| Time saved:', textwidth1) + str(round(comptime-loadtime,2)) + \
2060        ' seconds'
2061  msg5(CD,FN,deps,compression)
2062
2063# -----------------------------------------------------------------------------
2064
2065def msg5(CD,FN,deps,compression):
2066  """Message 5
2067
2068  USAGE:
2069    msg5(CD,FN,deps,compression)
2070
2071  DESCRIPTION:
2072   Print dependency stats. Used by msg3 and msg4
2073  """
2074
2075  import os, time, string
2076
2077  print '|'
2078  print string.ljust('| Caching dir: ', textwidth1) + CD
2079
2080  if compression:
2081    suffix = '.z'
2082    bytetext = 'bytes, compressed'
2083  else:
2084    suffix = ''
2085    bytetext = 'bytes'
2086
2087  for file_type in file_types:
2088    file_name = FN + '_' + file_type + suffix
2089    print string.ljust('| ' + file_type + ' file: ', textwidth1) + file_name,
2090    stats = os.stat(CD+file_name)
2091    print '('+ str(stats[6]) + ' ' + bytetext + ')'
2092
2093  print '|'
2094  if len(deps) > 0:
2095    print '| Dependencies:  '
2096    dependencies  = deps.keys()
2097    dlist = []; maxd = 0
2098    tlist = []; maxt = 0
2099    slist = []; maxs = 0
2100    for d in dependencies:
2101      stats = deps[d]
2102      t = time.ctime(stats[1])
2103      s = str(stats[0])
2104      #if s[-1] == 'L':
2105      #  s = s[:-1]  # Strip rightmost 'long integer' L off.
2106      #              # FIXME: Unnecessary in versions later than 1.5.2
2107
2108      if len(d) > maxd: maxd = len(d)
2109      if len(t) > maxt: maxt = len(t)
2110      if len(s) > maxs: maxs = len(s)
2111      dlist.append(d)
2112      tlist.append(t)
2113      slist.append(s)
2114
2115    for n in range(len(dlist)):
2116      d = string.ljust(dlist[n]+':', maxd+1)
2117      t = string.ljust(tlist[n], maxt)
2118      s = string.rjust(slist[n], maxs)
2119
2120      print '| ', d, t, ' ', s, 'bytes'
2121  else:
2122    print '| No dependencies'
2123  print_footer()
2124
2125# -----------------------------------------------------------------------------
2126
2127def msg6(funcname,args,kwargs):
2128  """Message 6
2129
2130  USAGE:
2131    msg6(funcname,args,kwargs)
2132  """
2133
2134  import string
2135  print string.ljust('| Function:', textwidth1) + funcname
2136
2137  msg7(args,kwargs)
2138 
2139# -----------------------------------------------------------------------------   
2140
2141def msg7(args,kwargs):
2142  """Message 7
2143 
2144  USAGE:
2145    msg7(args,kwargs):
2146  """
2147 
2148  import string
2149 
2150  args_present = 0 
2151  if args:
2152    if len(args) == 1:
2153      print string.ljust('| Argument:', textwidth1) + mkargstr(args[0], \
2154                         textwidth2)
2155    else:
2156      print string.ljust('| Arguments:', textwidth1) + \
2157            mkargstr(args, textwidth2)
2158    args_present = 1
2159           
2160  if kwargs:
2161    if len(kwargs) == 1:
2162      print string.ljust('| Keyword Arg:', textwidth1) + mkargstr(kwargs, \
2163                         textwidth2)
2164    else:
2165      print string.ljust('| Keyword Args:', textwidth1) + \
2166            mkargstr(kwargs, textwidth2)
2167    args_present = 1
2168
2169  if not args_present:               
2170    print '| No arguments' # Default if no args or kwargs present
2171
2172# -----------------------------------------------------------------------------
2173
2174def msg8(reason):
2175  """Message 8
2176 
2177  USAGE:
2178    msg8(reason):
2179  """
2180 
2181  import string
2182   
2183  try:
2184    R = Reason_msg[reason]
2185  except:
2186    R = 'Unknown' 
2187 
2188  print string.ljust('| Reason:', textwidth1) + R
2189   
2190# -----------------------------------------------------------------------------
2191
2192def print_header_box(line):
2193  """Print line in a nice box.
2194 
2195  USAGE:
2196    print_header_box(line)
2197
2198  """
2199  global textwidth3
2200
2201  import time
2202
2203  time_stamp = time.ctime(time.time())
2204  line = time_stamp + '. ' + line
2205   
2206  N = len(line) + 1
2207  s = '+' + '-'*N + CR
2208
2209  print s + '| ' + line + CR + s,
2210
2211  textwidth3 = N
2212
2213# -----------------------------------------------------------------------------
2214   
2215def print_footer():
2216  """Print line same width as that of print_header_box.
2217  """
2218 
2219  N = textwidth3
2220  s = '+' + '-'*N + CR   
2221     
2222  print s     
2223     
2224# -----------------------------------------------------------------------------
2225
2226def mkargstr(args, textwidth, argstr = ''):
2227  """ Generate a string containing first textwidth characters of arguments.
2228
2229  USAGE:
2230    mkargstr(args, textwidth, argstr = '')
2231
2232  DESCRIPTION:
2233    Exactly the same as str(args) possibly followed by truncation,
2234    but faster if args is huge.
2235  """
2236
2237  import types
2238
2239  WasTruncated = 0
2240
2241  if not type(args) in [types.TupleType, types.ListType, types.DictType]:
2242    if type(args) == types.StringType:
2243      argstr = argstr + "'"+str(args)+"'"
2244    else:
2245      #Truncate large Numeric arrays before using str()
2246      import Numeric
2247      if type(args) == Numeric.ArrayType:
2248        if len(args.flat) > textwidth:
2249          args = 'Array: ' + str(args.shape)
2250
2251      argstr = argstr + str(args)
2252  else:
2253    if type(args) == types.DictType:
2254      argstr = argstr + "{"
2255      for key in args.keys():
2256        argstr = argstr + mkargstr(key, textwidth) + ": " + \
2257                 mkargstr(args[key], textwidth) + ", "
2258        if len(argstr) > textwidth:
2259          WasTruncated = 1
2260          break
2261      argstr = argstr[:-2]  # Strip off trailing comma     
2262      argstr = argstr + "}"
2263
2264    else:
2265      if type(args) == types.TupleType:
2266        lc = '('
2267        rc = ')'
2268      else:
2269        lc = '['
2270        rc = ']'
2271      argstr = argstr + lc
2272      for arg in args:
2273        argstr = argstr + mkargstr(arg, textwidth) + ', '
2274        if len(argstr) > textwidth:
2275          WasTruncated = 1
2276          break
2277
2278      # Strip off trailing comma and space unless singleton tuple
2279      #
2280      if type(args) == types.TupleType and len(args) == 1:
2281        argstr = argstr[:-1]   
2282      else:
2283        argstr = argstr[:-2]
2284      argstr = argstr + rc
2285
2286  if len(argstr) > textwidth:
2287    WasTruncated = 1
2288
2289  if WasTruncated:
2290    argstr = argstr[:textwidth]+'...'
2291  return(argstr)
2292
2293# -----------------------------------------------------------------------------
2294
2295def test_OK(msg):
2296  """Print OK msg if test is OK.
2297 
2298  USAGE
2299    test_OK(message)
2300  """
2301
2302  import string
2303   
2304  print string.ljust(msg, textwidth4) + ' - OK' 
2305 
2306  #raise StandardError
2307 
2308# -----------------------------------------------------------------------------
2309
2310def test_error(msg):
2311  """Print error if test fails.
2312 
2313  USAGE
2314    test_error(message)
2315  """
2316 
2317  print 'ERROR (caching.test): %s' %msg
2318  print 'Please send this code example and output to '
2319  print 'Ole.Nielsen@anu.edu.au'
2320  print
2321  print
2322 
2323  #import sys
2324  #sys.exit()
2325  raise StandardError
Note: See TracBrowser for help on using the repository browser.