source: inundation/caching/caching.py @ 2043

Last change on this file since 2043 was 2043, checked in by ole, 18 years ago

Investigated file write error on networked drives

File size: 68.0 KB
Line 
1# =============================================================================
2# caching.py - Supervised caching of function results.
3# Copyright (C) 1999, 2000, 2001, 2002 Ole Moller Nielsen
4# Australian National University (1999-2003)
5# Geoscience Australia (2003-present)
6#
7#    This program is free software; you can redistribute it and/or modify
8#    it under the terms of the GNU General Public License as published by
9#    the Free Software Foundation; either version 2 of the License, or
10#    (at your option) any later version.
11#
12#    This program is distributed in the hope that it will be useful,
13#    but WITHOUT ANY WARRANTY; without even the implied warranty of
14#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15#    GNU General Public License (http://www.gnu.org/copyleft/gpl.html)
16#    for more details.
17#
18#    You should have received a copy of the GNU General Public License
19#    along with this program; if not, write to the Free Software
20#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
21#
22#
23# Contact address: Ole.Nielsen@ga.gov.au
24#
25# Version 1.5.6 February 2002
26# =============================================================================
27 
28"""Module caching.py - Supervised caching of function results.
29
30Public functions:
31
32cache(func,args) -- Cache values returned from func given args.
33cachestat() --      Reports statistics about cache hits and time saved.
34test() --       Conducts a basic test of the caching functionality.
35
36See doc strings of individual functions for detailed documentation.
37"""
38
39# -----------------------------------------------------------------------------
40# Initialisation code
41
42# Determine platform
43#
44import os
45if os.name in ['nt', 'dos', 'win32', 'what else?']:
46  unix = 0
47else:
48  unix = 1
49
50# Make default caching directory name
51#
52if unix:
53  homedir = '~'
54  CR = '\n'
55else:
56  homedir = 'c:'
57  CR = '\r\n'  #FIXME: Not tested under windows
58 
59cachedir = homedir + os.sep + '.python_cache' + os.sep
60
61# -----------------------------------------------------------------------------
62# Options directory with default values - to be set by user
63#
64
65options = { 
66  'cachedir': cachedir,  # Default cache directory
67  'maxfiles': 1000000,   # Maximum number of cached files
68  'savestat': 1,         # Log caching info to stats file
69  'verbose': 1,          # Write messages to standard output
70  'bin': 1,              # Use binary format (more efficient)
71  'compression': 1,      # Use zlib compression
72  'bytecode': 0,         # Recompute if bytecode has changed
73  'expire': 0            # Automatically remove files that have been accessed
74                         # least recently
75}
76
77# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
78
79def set_option(key, value):
80  """Function to set values in the options directory.
81
82  USAGE:
83    set_option(key, value)
84
85  ARGUMENTS:
86    key --   Key in options dictionary. (Required)
87    value -- New value for key. (Required)
88
89  DESCRIPTION:
90    Function to set values in the options directory.
91    Raises an exception if key is not in options.
92  """
93
94  if options.has_key(key):
95    options[key] = value
96  else:
97    raise KeyError(key)  # Key not found, raise an exception
98
99# -----------------------------------------------------------------------------
100# Function cache - the main routine
101
102def cache(func, args=(), kwargs = {}, dependencies=None , cachedir=None,
103          verbose=None, compression=None, evaluate=0, test=0, clear=0,
104          return_filename=0):
105  """Supervised caching of function results.
106
107  USAGE:
108    result = cache(func, args, kwargs, dependencies, cachedir, verbose,
109                   compression, evaluate, test, return_filename)
110
111  ARGUMENTS:
112    func --            Function object (Required)
113    args --            Arguments to func (Default: ())
114    kwargs --          Keyword arguments to func (Default: {})   
115    dependencies --    Filenames that func depends on (Default: None)
116    cachedir --        Directory for cache files (Default: options['cachedir'])
117    verbose --         Flag verbose output to stdout
118                       (Default: options['verbose'])
119    compression --     Flag zlib compression (Default: options['compression'])
120    evaluate --        Flag forced evaluation of func (Default: 0)
121    test --            Flag test for cached results (Default: 0)
122    clear --           Flag delete cached results (Default: 0)   
123    return_filename -- Flag return of cache filename (Default: 0)   
124
125  DESCRIPTION:
126    A Python function call of the form
127
128      result = func(arg1,...,argn)
129
130    can be replaced by
131
132      from caching import cache
133      result = cache(func,(arg1,...,argn))
134
135  The latter form returns the same output as the former but reuses cached
136  results if the function has been computed previously in the same context.
137  'result' and the arguments can be simple types, tuples, list, dictionaries or
138  objects, but not unhashable types such as functions or open file objects.
139  The function 'func' may be a member function of an object or a module.
140
141  This type of caching is particularly useful for computationally intensive
142  functions with few frequently used combinations of input arguments. Note that
143  if the inputs or output are very large caching might not save time because
144  disc access may dominate the execution time.
145
146  If the function definition changes after a result has been cached it will be
147  detected by examining the functions bytecode (co_code, co_consts,
148  func_defualts, co_argcount) and it will be recomputed.
149
150  LIMITATIONS:
151    1 Caching uses the apply function and will work with anything that can be
152      pickled, so any limitation in apply or pickle extends to caching.
153    2 A function to be cached should not depend on global variables
154      as wrong results may occur if globals are changed after a result has
155      been cached.
156
157  -----------------------------------------------------------------------------
158  Additional functionality:
159
160  Keyword args
161    Keyword arguments (kwargs) can be added as a dictionary of keyword: value
162    pairs, following the syntax of the built-in function apply().
163    A Python function call of the form
164   
165      result = func(arg1,...,argn, kwarg1=val1,...,kwargm=valm)   
166
167    is then cached as follows
168
169      from caching import cache
170      result = cache(func,(arg1,...,argn), {kwarg1:val1,...,kwargm:valm})
171   
172    The default value of kwargs is {} 
173
174  Explicit dependencies:
175    The call
176      cache(func,(arg1,...,argn),dependencies = <list of filenames>)
177    Checks the size, creation time and modification time of each listed file.
178    If any file has changed the function is recomputed and the results stored
179    again.
180
181  Specify caching directory:
182    The call
183      cache(func,(arg1,...,argn), cachedir = <cachedir>)
184    designates <cachedir> where cached data are stored. Use ~ to indicate users
185    home directory - not $HOME. The default is ~/.python_cache on a UNIX
186    platform and c:/.python_cache on a Win platform.
187
188  Silent operation:
189    The call
190      cache(func,(arg1,...,argn),verbose=0)
191    suppresses messages to standard output.
192
193  Compression:
194    The call
195      cache(func,(arg1,...,argn),compression=0)
196    disables compression. (Default: compression=1). If the requested compressed
197    or uncompressed file is not there, it'll try the other version.
198
199  Forced evaluation:
200    The call
201      cache(func,(arg1,...,argn),evaluate=1)
202    forces the function to evaluate even though cached data may exist.
203
204  Testing for presence of cached result:
205    The call
206      cache(func,(arg1,...,argn),test=1)
207    retrieves cached result if it exists, otherwise None. The function will not
208    be evaluated. If both evaluate and test are switched on, evaluate takes
209    precedence.
210   
211  Obtain cache filenames:
212    The call   
213      cache(func,(arg1,...,argn),return_filename=1)
214    returns the hashed base filename under which this function and its
215    arguments would be cached
216
217  Clearing cached results:
218    The call
219      cache(func,'clear')
220    clears all cached data for 'func' and
221      cache('clear')
222    clears all cached data.
223 
224    NOTE: The string 'clear' can be passed an *argument* to func using
225      cache(func,('clear',)) or cache(func,tuple(['clear'])).
226
227    New form of clear:
228      cache(func,(arg1,...,argn),clear=1)
229    clears cached data for particular combination func and args
230     
231  """
232
233  # Imports and input checks
234  #
235  import types, time, string
236
237  if not cachedir:
238    cachedir = options['cachedir']
239
240  if verbose == None:  # Do NOT write 'if not verbose:', it could be zero.
241    verbose = options['verbose']
242
243  if compression == None:  # Do NOT write 'if not compression:',
244                           # it could be zero.
245    compression = options['compression']
246
247  # Create cache directory if needed
248  #
249  CD = checkdir(cachedir,verbose)
250
251  # Handle the case cache('clear')
252  #
253  if type(func) == types.StringType:
254    if string.lower(func) == 'clear':
255      clear_cache(CD,verbose=verbose)
256      return
257
258  # Handle the case cache(func, 'clear')
259  #
260  if type(args) == types.StringType:
261    if string.lower(args) == 'clear':
262      clear_cache(CD,func,verbose=verbose)
263      return
264
265  # Force singleton arg into a tuple
266  #
267  if type(args) != types.TupleType:
268    args = tuple([args])
269 
270  # Check that kwargs is a dictionary
271  #
272  if type(kwargs) != types.DictType:
273    raise TypeError   
274   
275  #print 'hashing' #FIXME: make faster hashing function
276   
277  # Hash arguments (and keyword args) to integer
278  #
279  arghash = myhash((args,kwargs))
280
281  # Get sizes and timestamps for files listed in dependencies.
282  # Force singletons into a tuple.
283  #
284  if dependencies and type(dependencies) != types.TupleType \
285                  and type(dependencies) != types.ListType:
286    dependencies = tuple([dependencies])
287  deps = get_depstats(dependencies)
288
289  # Extract function name from func object
290  #
291  funcname = get_funcname(func)
292
293  # Create cache filename
294  #
295  FN = funcname+'['+`arghash`+']'  # The symbol '(' does not work under unix
296
297  if return_filename:
298    return(FN)
299
300  if clear:
301    for file_type in file_types:
302      file_name = CD+FN+'_'+file_type
303      for fn in [file_name, file_name + '.z']:
304        if os.access(fn, os.F_OK):             
305          if unix:
306            os.remove(fn)
307          else:
308            # FIXME: os.remove doesn't work under windows       
309            os.system('del '+fn)
310          if verbose:
311            print 'MESSAGE (caching): File %s deleted' %fn
312        ##else:
313        ##  print '%s was not accessed' %fn
314    return None
315
316
317  #-------------------------------------------------------------------       
318 
319  # Check if previous computation has been cached
320  #
321  if evaluate:
322    Retrieved = None  # Force evaluation of func regardless of caching status.
323    reason = 4
324  else:
325    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \
326      CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
327                  dependencies)
328
329  if not Retrieved:
330    if test:  # Do not attempt to evaluate function
331      T = None
332    else:  # Evaluate function and save to cache
333      if verbose:
334        msg1(funcname, args, kwargs,reason)
335
336      # Remove expired files automatically
337      #
338      if options['expire']:
339        DeleteOldFiles(CD,verbose)
340       
341      # Save args before function is evaluated in case
342      # they are modified by function
343      #
344      save_args_to_cache(CD,FN,args,kwargs,compression)
345
346      # Execute and time function with supplied arguments
347      #
348      t0 = time.time()
349      T = apply(func,args,kwargs)
350      #comptime = round(time.time()-t0)
351      comptime = time.time()-t0
352
353      if verbose:
354        msg2(funcname,args,kwargs,comptime,reason)
355
356      # Save results and estimated loading time to cache
357      #
358      loadtime = save_results_to_cache(T, CD, FN, func, deps, comptime, \
359                                       funcname, dependencies, compression)
360      if verbose:
361        msg3(loadtime, CD, FN, deps, compression)
362      compressed = compression
363
364  if options['savestat'] and (not test or Retrieved):
365  ##if options['savestat']:
366    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compressed)
367
368  return(T)  # Return results in all cases
369
370# -----------------------------------------------------------------------------
371
372def cachestat(sortidx=4, period=-1, showuser=None, cachedir=None):
373  """Generate statistics of caching efficiency.
374
375  USAGE:
376    cachestat(sortidx, period, showuser, cachedir)
377
378  ARGUMENTS:
379    sortidx --  Index of field by which lists are (default: 4)
380                Legal values are
381                 0: 'Name'
382                 1: 'Hits'
383                 2: 'CPU'
384                 3: 'Time Saved'
385                 4: 'Gain(%)'
386                 5: 'Size'
387    period --   If set to -1 all available caching history is used.
388                If set 0 only the current month is used (default -1).
389    showuser -- Flag for additional table showing user statistics
390                (default: None).
391    cachedir -- Directory for cache files (default: options['cachedir']).
392
393  DESCRIPTION:
394    Logged caching statistics is converted into summaries of the form
395    --------------------------------------------------------------------------
396    Function Name   Hits   Exec(s)  Cache(s)  Saved(s)   Gain(%)      Size
397    --------------------------------------------------------------------------
398  """
399
400  __cachestat(sortidx, period, showuser, cachedir)
401  return
402
403# -----------------------------------------------------------------------------
404
405#Has mostly been moved to proper unit test
406def test(cachedir=None,verbose=0,compression=None):
407  """Test the functionality of caching.
408
409  USAGE:
410    test(verbose)
411
412  ARGUMENTS:
413    verbose --     Flag whether caching will output its statistics (default=0)
414    cachedir --    Directory for cache files (Default: options['cachedir'])
415    compression -- Flag zlib compression (Default: options['compression'])
416  """
417   
418  import string, time
419
420  # Initialise
421  #
422  import caching
423  reload(caching)
424
425  if not cachedir:
426    cachedir = options['cachedir']
427
428  if verbose is None:  # Do NOT write 'if not verbose:', it could be zero.
429    verbose = options['verbose']
430 
431  if compression == None:  # Do NOT write 'if not compression:',
432                           # it could be zero.
433    compression = options['compression']
434  else:
435    try:
436      set_option('compression', compression)
437    except:
438      test_error('Set option failed')     
439
440  try:
441    import zlib
442  except:
443    print
444    print '*** Could not find zlib, default to no-compression      ***'
445    print '*** Installing zlib will improve performance of caching ***'
446    print
447    compression = 0       
448    set_option('compression', compression)   
449 
450  print 
451  print_header_box('Testing caching module - please stand by')
452  print   
453
454  # Define a test function to be cached
455  #
456  def f(a,b,c,N,x=0,y='abcdefg'):
457    """f(a,b,c,N)
458       Do something time consuming and produce a complex result.
459    """
460
461    import string
462
463    B = []
464    for n in range(N):
465      s = str(n+2.0/(n + 4.0))+'.a'*10
466      B.append((a,b,c,s,n,x,y))
467    return(B)
468   
469  # Check that default cachedir is OK
470  #     
471  CD = checkdir(cachedir,verbose)   
472   
473   
474  # Make a dependency file
475  #   
476  try:
477    DepFN = CD + 'testfile.tmp'
478    DepFN_wildcard = CD + 'test*.tmp'
479    Depfile = open(DepFN,'w')
480    Depfile.write('We are the knights who say NI!')
481    Depfile.close()
482    test_OK('Wrote file %s' %DepFN)
483  except:
484    test_error('Could not open file %s for writing - check your environment' \
485               % DepFN)
486
487  # Check set_option (and switch stats off
488  #   
489  try:
490    set_option('savestat',0)
491    assert(options['savestat'] == 0)
492    test_OK('Set option')
493  except:
494    test_error('Set option failed')   
495   
496  # Make some test input arguments
497  #
498  N = 5000  #Make N fairly small here
499
500  a = [1,2]
501  b = ('Thou shalt count the number three',4)
502  c = {'Five is right out': 6, (7,8): 9}
503  x = 3
504  y = 'holy hand granate'
505
506  # Test caching
507  #
508  if compression:
509    comprange = 2
510  else:
511    comprange = 1
512
513  for comp in range(comprange):
514 
515    # Evaluate and store
516    #
517    try:
518      T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, evaluate=1, \
519                         verbose=verbose, compression=comp)
520      if comp:                   
521        test_OK('Caching evaluation with compression')
522      else:     
523        test_OK('Caching evaluation without compression')     
524    except:
525      if comp:
526        test_error('Caching evaluation with compression failed - try caching.test(compression=0)')
527      else:
528        test_error('Caching evaluation failed - try caching.test(verbose=1)')
529
530    # Retrieve
531    #                           
532    try:                         
533      T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
534                         compression=comp) 
535
536      if comp:                   
537        test_OK('Caching retrieval with compression')
538      else:     
539        test_OK('Caching retrieval without compression')     
540    except:
541      if comp:
542        test_error('Caching retrieval with compression failed - try caching.test(compression=0)')
543      else:                                     
544        test_error('Caching retrieval failed - try caching.test(verbose=1)')
545
546    # Reference result
547    #   
548    T3 = f(a,b,c,N,x=x,y=y)  # Compute without caching
549   
550    if T1 == T2 and T2 == T3:
551      if comp:
552        test_OK('Basic caching functionality (with compression)')
553      else:
554        test_OK('Basic caching functionality (without compression)')
555    else:
556      test_error('Cached result does not match computed result')
557
558
559  # Test return_filename
560  #   
561  try:
562    FN = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
563                       return_filename=1)   
564    assert(FN[:2] == 'f[')
565    test_OK('Return of cache filename')
566  except:
567    test_error('Return of cache filename failed')
568
569  # Test existence of cachefiles
570 
571  try:
572    (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
573    (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
574    (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
575    test_OK('Presence of cache files')
576    datafile.close()
577    argsfile.close()
578    admfile.close()
579  except:
580    test_error('Expected cache files did not exist') 
581             
582  # Test 'test' function when cache is present
583  #     
584  try:
585    #T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
586    #                   evaluate=1) 
587    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
588    assert(T1 == T4)
589
590    test_OK("Option 'test' when cache file present")
591  except:
592    test_error("Option 'test' when cache file present failed")     
593
594  # Test that 'clear' works
595  #
596  #try:
597  #  caching.cache(f,'clear',verbose=verbose)
598  #  test_OK('Clearing of cache files')
599  #except:
600  #  test_error('Clear does not work')
601  try:
602    caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, clear=1)   
603    test_OK('Clearing of cache files')
604  except:
605    test_error('Clear does not work') 
606
607 
608
609  # Test 'test' function when cache is absent
610  #     
611  try:
612    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
613    assert(T4 is None)
614    test_OK("Option 'test' when cache absent")
615  except:
616    test_error("Option 'test' when cache absent failed")     
617         
618  # Test dependencies
619  #
620  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
621                       dependencies=DepFN) 
622  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
623                       dependencies=DepFN)                     
624                       
625  if T1 == T2:
626    test_OK('Basic dependencies functionality')
627  else:
628    test_error('Dependencies do not work')
629
630  # Test basic wildcard dependency
631  #
632  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
633                       dependencies=DepFN_wildcard)                     
634   
635  if T1 == T3:
636    test_OK('Basic dependencies with wildcard functionality')
637  else:
638    test_error('Dependencies with wildcards do not work')
639
640
641  # Test that changed timestamp in dependencies triggers recomputation
642 
643  # Modify dependency file
644  Depfile = open(DepFN,'a')
645  Depfile.write('You must cut down the mightiest tree in the forest with a Herring')
646  Depfile.close()
647 
648  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
649                       dependencies=DepFN, test = 1)                     
650 
651  if T3 is None:
652    test_OK('Changed dependencies recognised')
653  else:
654    test_error('Changed dependencies not recognised')   
655 
656  # Test recomputation when dependencies have changed
657  #
658  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
659                       dependencies=DepFN)                       
660  if T1 == T3:
661    test_OK('Recomputed value with changed dependencies')
662  else:
663    test_error('Recomputed value with changed dependencies failed')
664
665  # Performance test (with statistics)
666  # Don't really rely on this as it will depend on specific computer.
667  #
668
669  set_option('savestat',1)
670
671  N = 20*N   #Should be large on fast computers...
672  tt = time.time()
673  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
674  t1 = time.time() - tt
675 
676  tt = time.time()
677  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
678  t2 = time.time() - tt
679 
680  if T1 == T2:
681    if t1 > t2:
682      test_OK('Performance test: relative time saved = %s pct' \
683              %str(round((t1-t2)*100/t1,2)))
684    #else:
685    #  print 'WARNING: Performance a bit low - this could be specific to current platform'
686  else:       
687    test_error('Basic caching failed for new problem')
688           
689  # Test presence of statistics file
690  #
691  try: 
692    DIRLIST = os.listdir(CD)
693    SF = []
694    for FN in DIRLIST:
695      if string.find(FN,statsfile) >= 0:
696        fid = open(CD+FN,'r')
697        fid.close()
698    test_OK('Statistics files present') 
699  except:
700    test_OK('Statistics files cannot be opened')         
701     
702  print_header_box('Show sample output of the caching function:')
703 
704  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
705  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
706  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=1)
707 
708  print_header_box('Show sample output of cachestat():')
709  if unix:
710    cachestat()   
711  else:
712    try:
713      import time
714      t = time.strptime('2030','%Y')
715      cachestat()
716    except: 
717      print 'caching.cachestat() does not work here, because it'
718      print 'relies on time.strptime() which is unavailable in Windows'
719     
720  print
721  test_OK('Caching self test completed')   
722     
723           
724  # Test setoption (not yet implemented)
725  #
726
727 
728#==============================================================================
729# Auxiliary functions
730#==============================================================================
731
732# Import pickler
733# cPickle is used by functions mysave, myload, and compare
734#
735import cPickle  # 10 to 100 times faster than pickle
736pickler = cPickle
737
738# Local immutable constants
739#
740comp_level = 1              # Compression level for zlib.
741                            # comp_level = 1 works well.
742textwidth1 = 16             # Text width of key fields in report forms.
743textwidth2 = 132            # Maximal width of textual representation of
744                            # arguments.
745textwidth3 = 16             # Initial width of separation lines. Is modified.
746textwidth4 = 50             # Text width in test_OK()
747statsfile  = '.cache_stat'  # Basefilename for cached statistics.
748                            # It will reside in the chosen cache directory.
749
750file_types = ['Result',     # File name extension for cached function results.
751              'Args',       # File name extension for stored function args.
752              'Admin']      # File name extension for administrative info.
753
754Reason_msg = ['OK',         # Verbose reasons for recomputation
755              'No cached result', 
756              'Dependencies have changed', 
757              'Byte code or arguments have changed',
758              'Recomputation was requested by caller',
759              'Cached file was unreadable']             
760             
761# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
762
763def CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, 
764                dependencies):
765  """Determine whether cached result exists and return info.
766
767  USAGE:
768    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \ 
769    CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
770                dependencies)
771
772  INPUT ARGUMENTS:
773    CD --            Cache Directory
774    FN --            Suggested cache file name
775    func --          Function object
776    args --          Tuple of arguments
777    kwargs --        Dictionary of keyword arguments   
778    deps --          Dependencies time stamps
779    verbose --       Flag text output
780    compression --   Flag zlib compression
781    dependencies --  Given list of dependencies
782   
783  OUTPUT ARGUMENTS:
784    T --             Cached result if present otherwise None
785    FN --            File name under which new results must be saved
786    Retrieved --     True if a valid cached result was found
787    reason --        0: OK (if Retrieved),
788                     1: No cached result,
789                     2: Dependencies have changed,
790                     3: Arguments or Bytecode have changed
791                     4: Recomputation was forced
792    comptime --      Number of seconds it took to computed cachged result
793    loadtime --      Number of seconds it took to load cached result
794    compressed --    Flag (0,1) if cached results were compressed or not
795
796  DESCRIPTION:
797    Determine if cached result exists as follows:
798    Load in saved arguments and bytecode stored under hashed filename.
799    If they are identical to current arguments and bytecode and if dependencies
800    have not changed their time stamp, then return cached result.
801
802    Otherwise return filename under which new results should be cached.
803    Hash collisions are handled recursively by calling CacheLookup again with a
804    modified filename.
805  """
806
807  import time, string, types
808
809  # Assess whether cached result exists - compressed or not.
810  #
811  if verbose:
812    print 'Caching: looking for cached files %s_{%s,%s,%s}.z'\
813           %(CD+FN, file_types[0], file_types[1], file_types[2])
814  (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
815  (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
816  (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
817
818  if not (argsfile and datafile and admfile) or \
819     not (compressed0 == compressed1 and compressed0 == compressed2):
820    # Cached result does not exist or files were compressed differently
821    #
822    # This will ensure that evaluation will take place unless all files are
823    # present.
824
825    reason = 1
826    return(None,FN,None,reason,None,None,None) #Recompute using same filename
827
828  compressed = compressed0  # Remember if compressed files were actually used
829  datafile.close()
830
831  # Retrieve arguments and adm. info
832  #
833  R, reason = myload(argsfile,compressed)  # The original arguments
834  argsfile.close()
835   
836  ##if R == None and reason > 0:
837  if reason > 0:
838    return(None,FN,None,reason,None,None,None) #Recompute using same filename
839  else:   
840    (argsref, kwargsref) = R
841
842  R, reason = myload(admfile,compressed)
843  admfile.close() 
844  ##if R == None and reason > 0:
845  if reason > 0:
846    return(None,FN,None,reason,None,None,None) #Recompute using same filename
847
848 
849  depsref  = R[0]  # Dependency statistics
850  comptime = R[1]  # The computation time
851  coderef  = R[2]  # The byte code
852  funcname = R[3]  # The function name
853
854  # Check if dependencies have changed
855  #
856  if dependencies and not compare(depsref,deps):
857    if verbose:
858      print 'MESSAGE (caching.py): Dependencies', dependencies, \
859            'have changed - recomputing'
860    # Don't use cached file - recompute
861    reason = 2
862    return(None,FN,None,reason,None,None,None)
863
864  # Get bytecode from func
865  #
866  bytecode = get_bytecode(func)
867
868  #print compare(argsref,args),
869  #print compare(kwargsref,kwargs),
870  #print compare(bytecode,coderef)
871
872  # Check if arguments or bytecode have changed
873  #
874  if compare(argsref,args) and compare(kwargsref,kwargs) and \
875     (not options['bytecode'] or compare(bytecode,coderef)):
876
877    # Arguments and dependencies match. Get cached results
878    #
879    T, loadtime, compressed, reason = load_from_cache(CD,FN,compressed)
880    ###if T == None and reason > 0:  #This doesn't work if T is a numeric array
881    if reason > 0:
882      return(None,FN,None,reason,None,None,None) #Recompute using same FN
883
884    Retrieved = 1
885    reason = 0
886
887    if verbose:
888      msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compressed)
889
890      if loadtime >= comptime:
891        print 'WARNING (caching.py): Caching did not yield any gain.'
892        print '                      Consider executing function ',
893        print '('+funcname+') without caching.'
894  else:
895
896    # Non matching arguments or bytecodes signify a hash-collision.
897    # This is resolved by recursive search of cache filenames
898    # until either a matching or an unused filename is found.
899    #
900    (T,FN,Retrieved,reason,comptime,loadtime,compressed) = \
901       CacheLookup(CD,FN+'x',func,args,kwargs,deps,verbose,compression, \
902                   dependencies)
903
904    # DEBUGGING
905    # if not Retrieved:
906    #   print 'Arguments did not match'
907    # else:
908    #   print 'Match found !'
909    if not Retrieved:
910      reason = 3     #The real reason is that args or bytecodes have changed.
911                     #Not that the recursive seach has found an unused filename
912   
913  return((T, FN, Retrieved, reason, comptime, loadtime, compressed))
914
915# -----------------------------------------------------------------------------
916
917def clear_cache(CD,func=None, verbose=None):
918  """Clear cache for func.
919
920  USAGE:
921     clear(CD, func, verbose)
922
923  ARGUMENTS:
924     CD --       Caching directory (required)
925     func --     Function object (default: None)
926     verbose --  Flag verbose output (default: None)
927
928  DESCRIPTION:
929
930    If func == None, clear everything,
931    otherwise clear only files pertaining to func.
932  """
933
934  import os, re
935   
936  if CD[-1] != os.sep:
937    CD = CD+os.sep
938 
939  if verbose == None:
940    verbose = options['verbose']
941
942  # FIXME: Windows version needs to be tested
943
944  if func:
945    funcname = get_funcname(func)
946    if verbose:
947      print 'MESSAGE (caching.py): Clearing', CD+funcname+'*'
948
949    file_names = os.listdir(CD)
950    for file_name in file_names:
951      #RE = re.search('^' + funcname,file_name)  #Inefficient
952      #if RE:
953      if file_name[:len(funcname)] == funcname:
954        if unix:
955          os.remove(CD+file_name)
956        else:
957          os.system('del '+CD+file_name)
958          # FIXME: os.remove doesn't work under windows
959  else:
960    file_names = os.listdir(CD)
961    if len(file_names) > 0:
962      if verbose:
963        print 'MESSAGE (caching.py): Remove the following files:'
964        for file_name in file_names:
965            print file_name
966
967        A = raw_input('Delete (Y/N)[N] ?')
968      else:
969        A = 'Y' 
970       
971      if A == 'Y' or A == 'y':
972        for file_name in file_names:
973          if unix:
974            os.remove(CD+file_name)
975          else:
976            os.system('del '+CD+file_name)
977            # FIXME: os.remove doesn't work under windows
978          #exitcode=os.system('/bin/rm '+CD+'* 2> /dev/null')
979
980# -----------------------------------------------------------------------------
981
982def DeleteOldFiles(CD,verbose=None):
983  """Remove expired files
984
985  USAGE:
986    DeleteOldFiles(CD,verbose=None)
987  """
988
989  if verbose == None:
990    verbose = options['verbose']
991
992  maxfiles = options['maxfiles']
993
994  # FIXME: Windows version
995
996  import os
997  block = 1000  # How many files to delete per invokation
998  Files = os.listdir(CD)
999  numfiles = len(Files)
1000  if not unix: return  # FIXME: Windows case ?
1001
1002  if numfiles > maxfiles:
1003    delfiles = numfiles-maxfiles+block
1004    if verbose:
1005      print 'Deleting '+`delfiles`+' expired files:'
1006      os.system('ls -lur '+CD+'* | head -' + `delfiles`)            # List them
1007    os.system('ls -ur '+CD+'* | head -' + `delfiles` + ' | xargs /bin/rm')
1008                                                                  # Delete them
1009    # FIXME: Replace this with os.listdir and os.remove
1010
1011# -----------------------------------------------------------------------------
1012
1013def save_args_to_cache(CD,FN,args,kwargs,compression):
1014  """Save arguments to cache
1015
1016  USAGE:
1017    save_args_to_cache(CD,FN,args,kwargs,compression)
1018  """
1019
1020  import time, os, sys, types
1021
1022  (argsfile, compressed) = myopen(CD+FN+'_'+file_types[1], 'wb', compression)
1023
1024  if not argsfile:
1025    if verbose:
1026      print 'ERROR (caching): Could not open %s' %argsfile.name
1027    raise IOError
1028
1029  mysave((args,kwargs),argsfile,compression)  # Save args and kwargs to cache
1030  argsfile.close()
1031
1032  # Change access rights if possible
1033  #
1034  #if unix:
1035  #  try:
1036  #    exitcode=os.system('chmod 666 '+argsfile.name)
1037  #  except:
1038  #    pass
1039  #else:
1040  #  pass  # FIXME: Take care of access rights under Windows
1041
1042  return
1043
1044# -----------------------------------------------------------------------------
1045
1046def save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1047                          dependencies, compression):
1048  """Save computed results T and admin info to cache
1049
1050  USAGE:
1051    save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1052                          dependencies, compression)
1053  """
1054
1055  import time, os, sys, types
1056
1057  (datafile, compressed1) = myopen(CD+FN+'_'+file_types[0],'wb',compression)
1058  (admfile, compressed2) = myopen(CD+FN+'_'+file_types[2],'wb',compression)
1059
1060  if not datafile:
1061    if verbose:
1062      print 'ERROR (caching): Could not open %s' %datafile.name
1063    raise IOError
1064
1065  if not admfile:
1066    if verbose:
1067      print 'ERROR (caching): Could not open %s' %admfile.name
1068    raise IOError
1069
1070  t0 = time.time()
1071
1072  mysave(T,datafile,compression)  # Save data to cache
1073  datafile.close()
1074  #savetime = round(time.time()-t0,2)
1075  savetime = time.time()-t0 
1076
1077  bytecode = get_bytecode(func)  # Get bytecode from function object
1078  admtup = (deps, comptime, bytecode, funcname)  # Gather admin info
1079
1080  mysave(admtup,admfile,compression)  # Save admin info to cache
1081  admfile.close()
1082
1083  # Change access rights if possible
1084  #
1085  #if unix:
1086  #  try:
1087  #    exitcode=os.system('chmod 666 '+datafile.name)
1088  #    exitcode=os.system('chmod 666 '+admfile.name)
1089  #  except:
1090  #    pass
1091  #else:
1092  #  pass  # FIXME: Take care of access rights under Windows
1093
1094  return(savetime)
1095
1096# -----------------------------------------------------------------------------
1097
1098def load_from_cache(CD,FN,compression):
1099  """Load previously cached data from file FN
1100
1101  USAGE:
1102    load_from_cache(CD,FN,compression)
1103  """
1104
1105  import time
1106
1107  (datafile, compressed) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
1108  t0 = time.time()
1109  T, reason = myload(datafile,compressed)
1110  #loadtime = round(time.time()-t0,2)
1111  loadtime = time.time()-t0
1112  datafile.close() 
1113
1114  return T, loadtime, compressed, reason
1115
1116# -----------------------------------------------------------------------------
1117
1118def myopen(FN,mode,compression=1):
1119  """Open file FN using given mode
1120
1121  USAGE:
1122    myopen(FN,mode,compression=1)
1123
1124  ARGUMENTS:
1125    FN --           File name to be opened
1126    mode --         Open mode (as in open)
1127    compression --  Flag zlib compression
1128
1129  DESCRIPTION:
1130     if compression
1131       Attempt first to open FN + '.z'
1132       If this fails try to open FN
1133     else do the opposite
1134     Return file handle plus info about whether it was compressed or not.
1135  """
1136
1137  import string
1138
1139  # Determine if file exists already (if writing was requested)
1140  # This info is only used to determine if access modes should be set
1141  #
1142  if 'w' in mode or 'a' in mode:
1143    try:
1144      file = open(FN+'.z','r')
1145      file.close()
1146      new_file = 0
1147    except:
1148      try:
1149        file = open(FN,'r') 
1150        file.close()
1151        new_file = 0
1152      except:
1153        new_file = 1
1154  else:
1155    new_file = 0 #Assume it exists if mode was not 'w'
1156 
1157
1158  compressed = 0
1159  if compression:
1160    try:
1161      file = open(FN+'.z',mode)
1162      compressed = 1
1163    except:
1164      try:
1165        file = open(FN,mode)
1166      except:
1167        file = None
1168  else:
1169    try:
1170      file = open(FN,mode)
1171    except:
1172      try:
1173        file = open(FN+'.z',mode)
1174        compressed = 1
1175      except:
1176        file = None
1177
1178  # Now set access rights if it is a new file
1179  #
1180  if file and new_file:
1181    if unix:
1182      exitcode=os.system('chmod 666 '+file.name)
1183    else:
1184      pass  # FIXME: Take care of access rights under Windows
1185
1186  return(file,compressed)
1187
1188# -----------------------------------------------------------------------------
1189
1190def myload(file, compressed):
1191  """Load data from file
1192
1193  USAGE:
1194    myload(file, compressed)
1195  """
1196
1197  reason = 0
1198  try:
1199    if compressed:
1200      import zlib
1201
1202      RsC = file.read()
1203      try:
1204        Rs  = zlib.decompress(RsC)
1205      except:
1206        #  File "./caching.py", line 1032, in load_from_cache
1207        #  T = myload(datafile,compressed)
1208        #  File "./caching.py", line 1124, in myload
1209        #  Rs  = zlib.decompress(RsC)
1210        #  zlib.error: Error -5 while decompressing data
1211        #print 'ERROR (caching): Could not decompress ', file.name
1212        #raise Exception
1213        reason = 5  #(Unreadable file)
1214        return None, reason 
1215     
1216     
1217      del RsC  # Free up some space
1218      R   = pickler.loads(Rs)
1219    else:
1220      try:
1221        R = pickler.load(file)
1222      #except EOFError, e:
1223      except:
1224        #Catch e.g., file with 0 length or corrupted
1225        reason = 5  #(Unreadable file)
1226        return None, reason
1227     
1228  except MemoryError:
1229    import sys
1230    if options['verbose']:
1231      print 'ERROR (caching): Out of memory while loading %s, aborting' \
1232            %(file.name)
1233
1234    # Raise the error again for now
1235    #
1236    raise MemoryError
1237
1238  return R, reason
1239
1240# -----------------------------------------------------------------------------
1241
1242def mysave(T,file,compression):
1243  """Save data T to file
1244
1245  USAGE:
1246    mysave(T,file,compression)
1247
1248  """
1249
1250  bin = options['bin']
1251
1252  if compression:
1253    try:
1254      import zlib
1255    except:
1256      print
1257      print '*** Could not find zlib ***'
1258      print '*** Try to run caching with compression off ***'
1259      print "*** caching.set_option('compression', 0) ***"
1260      raise Exception
1261     
1262
1263    try:
1264      Ts  = pickler.dumps(T, bin)
1265    except MemoryError:
1266      msg = '****WARNING (caching.py): Could not pickle data for compression.'
1267      msg += ' Try using compression = False'
1268      raise MemoryError, msg
1269    else: 
1270      #Compressed pickling     
1271      TsC = zlib.compress(Ts, comp_level)
1272      file.write(TsC)
1273  else:
1274      #Uncompressed pickling
1275      pickler.dump(T, file, bin)
1276
1277      # FIXME: This may not work on Windoze network drives.
1278      # The error msg is IOError: [Errno 22] Invalid argument
1279      # Testing with small files was OK, though.
1280      # I think this is an OS problem.
1281
1282      # Excerpt from http://www.ultraseek.com/support/faqs/4173.html
1283     
1284# The error is caused when there is a problem with server disk access (I/0). This happens at the OS level, and there is no controlling these errors through the Ultraseek application.
1285#
1286#Ultraseek contains an embedded Python interpreter. The exception "exceptions.IOError: [Errno 22] Invalid argument" is generated by the Python interpreter. The exception is thrown when a disk access operation fails due to an I/O-related reason.
1287#
1288#The following extract is taken from the site http://www.python.org:
1289#
1290#---------------------------------------------------------------------------------------------
1291#exception IOError
1292#Raised when an I/O operation (such as a print statement, the built-in open() function or a method of a file object) fails for an I/O-related reason, e.g., ``file not found'' or ``disk full''.
1293#This class is derived from EnvironmentError. See the discussion above for more information on exception instance attributes.
1294#---------------------------------------------------------------------------------------------
1295#
1296#The error code(s) that accompany exceptions are described at:
1297#http://www.python.org/dev/doc/devel//lib/module-errno.html
1298#
1299#You can view several postings on this error message by going to http://www.python.org, and typing the below into the search box:
1300#
1301#exceptions.IOError invalid argument Errno 22
1302       
1303      #try:
1304      #  pickler.dump(T,file,bin)
1305      #except IOError, e:
1306      #  print e
1307      #  msg = 'Could not store to %s, bin=%s' %(file, bin)
1308      #  raise msg
1309     
1310
1311# -----------------------------------------------------------------------------
1312
1313def myhash(T):
1314  """Compute hashed integer from hashable values of tuple T
1315
1316  USAGE:
1317    myhash(T)
1318
1319  ARGUMENTS:
1320    T -- Tuple
1321  """
1322
1323  import types
1324
1325  # Get hash vals for hashable entries
1326  #
1327  if type(T) == types.TupleType or type(T) == types.ListType:
1328    hvals = []
1329    for k in range(len(T)):
1330      h = myhash(T[k])
1331      hvals.append(h)
1332    val = hash(tuple(hvals))
1333  elif type(T) == types.DictType:
1334    val = dicthash(T)
1335  else:
1336    try:
1337      val = hash(T)
1338    except:
1339      val = 1
1340      try:
1341        import Numeric
1342        if type(T) == Numeric.ArrayType:
1343          hvals = []       
1344          for e in T:
1345            h = myhash(e)
1346            hvals.append(h)         
1347          val = hash(tuple(hvals))
1348        else:
1349          val = 1  #Could implement other Numeric types here
1350      except:   
1351        pass
1352
1353  return(val)
1354
1355# -----------------------------------------------------------------------------
1356
1357def dicthash(D):
1358  """Compute hashed integer from hashable values of dictionary D
1359
1360  USAGE:
1361    dicthash(D)
1362  """
1363
1364  keys = D.keys()
1365
1366  # Get hash values for hashable entries
1367  #
1368  hvals = []
1369  for k in range(len(keys)):
1370    try:
1371      h = hash(D[keys[k]])
1372      hvals.append(h)
1373    except:
1374      pass
1375
1376  # Hash obtained values into one value
1377  #
1378  return(hash(tuple(hvals)))
1379
1380# -----------------------------------------------------------------------------
1381
1382def compare(A,B):
1383  """Safe comparison of general objects
1384
1385  USAGE:
1386    compare(A,B)
1387
1388  DESCRIPTION:
1389    Return 1 if A and B they are identical, 0 otherwise
1390  """
1391
1392  try:
1393    identical = (A == B)
1394  except:
1395    try:
1396      identical = (pickler.dumps(A) == pickler.dumps(B))
1397    except:
1398      identical = 0
1399
1400  return(identical)
1401
1402# -----------------------------------------------------------------------------
1403
1404def nospace(s):
1405  """Replace spaces in string s with underscores
1406
1407  USAGE:
1408    nospace(s)
1409
1410  ARGUMENTS:
1411    s -- string
1412  """
1413
1414  import string
1415
1416  newstr = ''
1417  for i in range(len(s)):
1418    if s[i] == ' ':
1419      newstr = newstr+'_'
1420    else:
1421      newstr = newstr+s[i]
1422
1423  return(newstr)
1424
1425# -----------------------------------------------------------------------------
1426
1427def get_funcname(func):
1428  """Retrieve name of function object func (depending on its type)
1429
1430  USAGE:
1431    get_funcname(func)
1432  """
1433
1434  import types, string
1435
1436  if type(func) == types.FunctionType:
1437    funcname = func.func_name
1438  elif type(func) == types.BuiltinFunctionType:
1439    funcname = func.__name__
1440  else:
1441    tab = string.maketrans("<>'","   ")
1442    tmp = string.translate(`func`,tab)
1443    tmp = string.split(tmp)
1444    funcname = string.join(tmp)
1445
1446  funcname = nospace(funcname)
1447  return(funcname)
1448
1449# -----------------------------------------------------------------------------
1450
1451def get_bytecode(func):
1452  """ Get bytecode from function object.
1453
1454  USAGE:
1455    get_bytecode(func)
1456  """
1457
1458  import types
1459
1460  if type(func) == types.FunctionType:
1461    bytecode = func.func_code.co_code
1462    consts = func.func_code.co_consts
1463    argcount = func.func_code.co_argcount   
1464    defaults = func.func_defaults     
1465  elif type(func) == types.MethodType:
1466    bytecode = func.im_func.func_code.co_code
1467    consts =  func.im_func.func_code.co_consts
1468    argcount =  func.im_func.func_code.co_argcount   
1469    defaults = func.im_func.func_defaults         
1470  else:
1471    #raise Exception  #Test only
1472    bytecode = None   #Built-in functions are assumed not to change
1473    consts = 0
1474    argcount = 0
1475    defaults = 0
1476
1477  return (bytecode, consts, argcount, defaults)
1478
1479# -----------------------------------------------------------------------------
1480
1481def get_depstats(dependencies):
1482  """ Build dictionary of dependency files and their size, mod. time and ctime.
1483
1484  USAGE:
1485    get_depstats(dependencies):
1486  """
1487
1488  import types
1489
1490  d = {}
1491  if dependencies:
1492
1493    #Expand any wildcards
1494    import glob
1495    expanded_dependencies = []
1496    for FN in dependencies:
1497      expanded_FN = glob.glob(FN)
1498     
1499      expanded_dependencies += expanded_FN
1500
1501   
1502    for FN in expanded_dependencies:
1503      if not type(FN) == types.StringType:
1504        errmsg = 'ERROR (caching.py): Dependency must be a string.\n'
1505        errmsg += '                    Dependency given: %s' %FN
1506        raise Exception, errmsg     
1507      if not os.access(FN,os.F_OK):
1508        errmsg = 'ERROR (caching.py): Dependency '+FN+' does not exist.'
1509        raise Exception, errmsg
1510      (size,atime,mtime,ctime) = filestat(FN)
1511
1512      # We don't use atime because that would cause recomputation every time.
1513      # We don't use ctime because that is irrelevant and confusing for users.
1514      d.update({FN : (size,mtime)})
1515
1516  return(d)
1517
1518# -----------------------------------------------------------------------------
1519
1520def filestat(FN):
1521  """A safe wrapper using os.stat to get basic file statistics
1522     The built-in os.stat breaks down if file sizes are too large (> 2GB ?)
1523
1524  USAGE:
1525    filestat(FN)
1526
1527  DESCRIPTION:
1528     Must compile Python with
1529     CFLAGS="`getconf LFS_CFLAGS`" OPT="-g -O2 $CFLAGS" \
1530              configure
1531     as given in section 8.1.1 Large File Support in the Libray Reference
1532  """
1533
1534  import os, time
1535
1536  try:
1537    stats = os.stat(FN)
1538    size  = stats[6]
1539    atime = stats[7]
1540    mtime = stats[8]
1541    ctime = stats[9]
1542  except:
1543
1544    # Hack to get the results anyway (works only on Unix at the moment)
1545    #
1546    print 'Hack to get os.stat when files are too large'
1547
1548    if unix:
1549      tmp = '/tmp/cach.tmp.'+`time.time()`+`os.getpid()`
1550      # Unique filename, FIXME: Use random number
1551
1552      # Get size and access time (atime)
1553      #
1554      exitcode=os.system('ls -l --full-time --time=atime '+FN+' > '+tmp)
1555      (size,atime) = get_lsline(tmp)
1556
1557      # Get size and modification time (mtime)
1558      #
1559      exitcode=os.system('ls -l --full-time '+FN+' > '+tmp)
1560      (size,mtime) = get_lsline(tmp)
1561
1562      # Get size and ctime
1563      #
1564      exitcode=os.system('ls -l --full-time --time=ctime '+FN+' > '+tmp)
1565      (size,ctime) = get_lsline(tmp)
1566
1567      try:
1568        exitcode=os.system('rm '+tmp)
1569        # FIXME: Gives error if file doesn't exist
1570      except:
1571        pass
1572    else:
1573      pass
1574      raise Exception  # FIXME: Windows case
1575
1576  return(long(size),atime,mtime,ctime)
1577
1578# -----------------------------------------------------------------------------
1579
1580def get_lsline(FN):
1581  """get size and time for filename
1582
1583  USAGE:
1584    get_lsline(file_name)
1585
1586  DESCRIPTION:
1587    Read in one line 'ls -la' item from file (generated by filestat) and
1588    convert time to seconds since epoch. Return file size and time.
1589  """
1590
1591  import string, time
1592
1593  f = open(FN,'r')
1594  info = f.read()
1595  info = string.split(info)
1596
1597  size = info[4]
1598  week = info[5]
1599  mon  = info[6]
1600  day  = info[7]
1601  hour = info[8]
1602  year = info[9]
1603
1604  str = week+' '+mon+' '+day+' '+hour+' '+year
1605  timetup = time.strptime(str)
1606  t = time.mktime(timetup)
1607  return(size, t)
1608
1609# -----------------------------------------------------------------------------
1610
1611def checkdir(CD,verbose=None):
1612  """Check or create caching directory
1613
1614  USAGE:
1615    checkdir(CD,verbose):
1616
1617  ARGUMENTS:
1618    CD -- Directory
1619    verbose -- Flag verbose output (default: None)
1620
1621  DESCRIPTION:
1622    If CD does not exist it will be created if possible
1623  """
1624
1625  import os
1626  import os.path
1627
1628  if CD[-1] != os.sep: 
1629    CD = CD + os.sep  # Add separator for directories
1630
1631  CD = os.path.expanduser(CD) # Expand ~ or ~user in pathname
1632  if not (os.access(CD,os.R_OK and os.W_OK) or CD == ''):
1633    try:
1634      exitcode=os.mkdir(CD)
1635
1636      # Change access rights if possible
1637      #
1638      if unix:
1639        exitcode=os.system('chmod 777 '+CD)
1640      else:
1641        pass  # FIXME: What about acces rights under Windows?
1642      if verbose: print 'MESSAGE: Directory', CD, 'created.'
1643    except:
1644      print 'WARNING: Directory', CD, 'could not be created.'
1645      if unix:
1646        CD = '/tmp/'
1647      else:
1648        CD = 'C:' 
1649      print 'Using directory %s instead' %CD
1650
1651  return(CD)
1652
1653#==============================================================================
1654# Statistics
1655#==============================================================================
1656
1657def addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,
1658                 compression):
1659  """Add stats entry
1660
1661  USAGE:
1662    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compression)
1663
1664  DESCRIPTION:
1665    Make one entry in the stats file about one cache hit recording time saved
1666    and other statistics. The data are used by the function cachestat.
1667  """
1668
1669  import os, time
1670
1671  try:
1672    TimeTuple = time.localtime(time.time())
1673    extension = time.strftime('%b%Y',TimeTuple)
1674    SFN = CD+statsfile+'.'+extension
1675    #statfile = open(SFN,'a')
1676    (statfile, dummy) = myopen(SFN,'a',compression=0)
1677
1678    # Change access rights if possible
1679    #
1680    #if unix:
1681    #  try:
1682    #    exitcode=os.system('chmod 666 '+SFN)
1683    #  except:
1684    #    pass
1685  except:
1686    print 'Warning: Stat file could not be opened'
1687
1688  try:
1689    if os.environ.has_key('USER'):
1690      user = os.environ['USER']
1691    else:
1692      user = 'Nobody'
1693
1694    date = time.asctime(TimeTuple)
1695
1696    if Retrieved:
1697      hit = '1'
1698    else:
1699      hit = '0'
1700
1701    # Get size of result file
1702    #   
1703    if compression:
1704      stats = os.stat(CD+FN+'_'+file_types[0]+'.z')
1705    else:
1706      stats = os.stat(CD+FN+'_'+file_types[0])
1707 
1708    if stats: 
1709      size = stats[6]
1710    else:
1711      size = -1  # Error condition, but don't crash. This is just statistics 
1712
1713    # Build entry
1714   
1715    entry = date             + ',' +\
1716            user             + ',' +\
1717            FN               + ',' +\
1718            str(int(size))   + ',' +\
1719            str(compression) + ',' +\
1720            hit              + ',' +\
1721            str(reason)      + ',' +\
1722            str(round(comptime,4)) + ',' +\
1723            str(round(loadtime,4)) +\
1724            CR
1725           
1726    statfile.write(entry)
1727    statfile.close()
1728  except:
1729    print 'Warning: Writing of stat file failed'
1730
1731# -----------------------------------------------------------------------------
1732
1733# FIXME: should take cachedir as an optional arg
1734#
1735def __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1736  """  List caching statistics.
1737
1738  USAGE:
1739    __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1740
1741      Generate statistics of caching efficiency.
1742      The parameter sortidx determines by what field lists are sorted.
1743      If the optional keyword period is set to -1,
1744      all available caching history is used.
1745      If it is 0 only the current month is used.
1746      Future versions will include more than one month....
1747      OMN 20/8/2000
1748  """
1749
1750  import os
1751  import os.path
1752  from string import split, rstrip, find, atof, atoi
1753  from time import strptime, localtime, strftime, mktime, ctime
1754
1755  # sortidx = 4    # Index into Fields[1:]. What to sort by.
1756
1757  Fields = ['Name', 'Hits', 'Exec(s)', \
1758            'Cache(s)', 'Saved(s)', 'Gain(%)', 'Size']
1759  Widths = [25,7,9,9,9,9,13]
1760  #Types = ['s','d','d','d','d','.2f','d']
1761  Types = ['s','d','.2f','.2f','.2f','.2f','d'] 
1762
1763  Dictnames = ['Function', 'User']
1764
1765  if not cachedir:
1766    cachedir = checkdir(options['cachedir'])
1767
1768  SD = os.path.expanduser(cachedir)  # Expand ~ or ~user in pathname
1769
1770  if period == -1:  # Take all available stats
1771    SFILENAME = statsfile
1772  else:  # Only stats from current month 
1773       # MAKE THIS MORE GENERAL SO period > 0 counts several months backwards!
1774    TimeTuple = localtime(time())
1775    extension = strftime('%b%Y',TimeTuple)
1776    SFILENAME = statsfile+'.'+extension
1777
1778  DIRLIST = os.listdir(SD)
1779  SF = []
1780  for FN in DIRLIST:
1781    if find(FN,SFILENAME) >= 0:
1782      SF.append(FN)
1783
1784  blocksize = 15000000
1785  total_read = 0
1786  total_hits = 0
1787  total_discarded = 0
1788  firstday = mktime(strptime('2030','%Y'))
1789             # FIXME: strptime don't exist in WINDOWS ?
1790  lastday = 0
1791
1792  FuncDict = {}
1793  UserDict = {}
1794  for FN in SF:
1795    input = open(SD+FN,'r')
1796    print 'Reading file ', SD+FN
1797
1798    while 1:
1799      A = input.readlines(blocksize)
1800      if len(A) == 0: break
1801      total_read = total_read + len(A)
1802      for record in A:
1803        record = tuple(split(rstrip(record),','))
1804        #print record
1805
1806        if len(record) in [8,9]:
1807          n = 0
1808          timestamp = record[n]; n=n+1
1809       
1810          try:
1811            t = mktime(strptime(timestamp))
1812          except:
1813            total_discarded = total_discarded + 1         
1814            continue   
1815             
1816          if t > lastday:
1817            lastday = t
1818          if t < firstday:
1819            firstday = t
1820
1821          user     = record[n]; n=n+1
1822          func     = record[n]; n=n+1
1823
1824          # Strip hash-stamp off
1825          #
1826          i = find(func,'[')
1827          func = func[:i]
1828
1829          size        = atof(record[n]); n=n+1
1830          compression = atoi(record[n]); n=n+1
1831          hit         = atoi(record[n]); n=n+1
1832          reason      = atoi(record[n]); n=n+1   # Not used here   
1833          cputime     = atof(record[n]); n=n+1
1834          loadtime    = atof(record[n]); n=n+1
1835
1836          if hit:
1837            total_hits = total_hits + 1
1838            saving = cputime-loadtime
1839
1840            if cputime != 0:
1841              rel_saving = round(100.0*saving/cputime,2)
1842            else:
1843              #rel_saving = round(1.0*saving,2)
1844              rel_saving = 100.0 - round(1.0*saving,2)  # A bit of a hack
1845
1846            info = [1,cputime,loadtime,saving,rel_saving,size]
1847
1848            UpdateDict(UserDict,user,info)
1849            UpdateDict(FuncDict,func,info)
1850          else:
1851            pass #Stats on recomputations and their reasons could go in here
1852             
1853        else:
1854          #print 'Record discarded'
1855          #print record
1856          total_discarded = total_discarded + 1
1857
1858    input.close()
1859
1860  # Compute averages of all sums and write list
1861  #
1862
1863  if total_read == 0:
1864    printline(Widths,'=')
1865    print 'CACHING STATISTICS: No valid records read'
1866    printline(Widths,'=')
1867    return
1868
1869  print
1870  printline(Widths,'=')
1871  print 'CACHING STATISTICS: '+ctime(firstday)+' to '+ctime(lastday)
1872  printline(Widths,'=')
1873  #print '  Period:', ctime(firstday), 'to', ctime(lastday)
1874  print '  Total number of valid records', total_read
1875  print '  Total number of discarded records', total_discarded
1876  print '  Total number of hits', total_hits
1877  print
1878
1879  print '  Fields', Fields[2:], 'are averaged over number of hits'
1880  print '  Time is measured in seconds and size in bytes'
1881  print '  Tables are sorted by', Fields[1:][sortidx]
1882
1883  # printline(Widths,'-')
1884
1885  if showuser:
1886    Dictionaries = [FuncDict, UserDict]
1887  else:
1888    Dictionaries = [FuncDict]
1889
1890  i = 0
1891  for Dict in Dictionaries:
1892    for key in Dict.keys():
1893      rec = Dict[key]
1894      for n in range(len(rec)):
1895        if n > 0:
1896          rec[n] = round(1.0*rec[n]/rec[0],2)
1897      Dict[key] = rec
1898
1899    # Sort and output
1900    #
1901    keylist = SortDict(Dict,sortidx)
1902
1903    # Write Header
1904    #
1905    print
1906    #print Dictnames[i], 'statistics:'; i=i+1
1907    printline(Widths,'-')
1908    n = 0
1909    for s in Fields:
1910      if s == Fields[0]:  # Left justify
1911        s = Dictnames[i] + ' ' + s; i=i+1
1912        exec "print '%-" + str(Widths[n]) + "s'%s,"; n=n+1
1913      else:
1914        exec "print '%" + str(Widths[n]) + "s'%s,"; n=n+1
1915    print
1916    printline(Widths,'-')
1917
1918    # Output Values
1919    #
1920    for key in keylist:
1921      rec = Dict[key]
1922      n = 0
1923      if len(key) > Widths[n]: key = key[:Widths[n]-3] + '...'
1924      exec "print '%-" + str(Widths[n]) + Types[n]+"'%key,";n=n+1
1925      for val in rec:
1926        exec "print '%" + str(Widths[n]) + Types[n]+"'%val,"; n=n+1
1927      print
1928    print
1929
1930#==============================================================================
1931# Auxiliary stats functions
1932#==============================================================================
1933
1934def UpdateDict(Dict,key,info):
1935  """Update dictionary by adding new values to existing.
1936
1937  USAGE:
1938    UpdateDict(Dict,key,info)
1939  """
1940
1941  if Dict.has_key(key):
1942    dinfo = Dict[key]
1943    for n in range(len(dinfo)):
1944      dinfo[n] = info[n] + dinfo[n]
1945  else:
1946    dinfo = info[:]  # Make a copy of info list
1947
1948  Dict[key] = dinfo
1949  return Dict
1950
1951# -----------------------------------------------------------------------------
1952
1953def SortDict(Dict,sortidx=0):
1954  """Sort dictionary
1955
1956  USAGE:
1957    SortDict(Dict,sortidx):
1958
1959  DESCRIPTION:
1960    Sort dictionary of lists according field number 'sortidx'
1961  """
1962
1963  import types
1964
1965  sortlist  = []
1966  keylist = Dict.keys()
1967  for key in keylist:
1968    rec = Dict[key]
1969    if not type(rec) in [types.ListType, types.TupleType]:
1970      rec = [rec]
1971
1972    if sortidx > len(rec)-1:
1973      if options['verbose']:
1974        print 'ERROR: Sorting index to large, sortidx = ', sortidx
1975      raise IndexError
1976
1977    val = rec[sortidx]
1978    sortlist.append(val)
1979
1980  A = map(None,sortlist,keylist)
1981  A.sort()
1982  keylist = map(lambda x: x[1], A)  # keylist sorted by sortidx
1983
1984  return(keylist)
1985
1986# -----------------------------------------------------------------------------
1987
1988def printline(Widths,char):
1989  """Print textline in fixed field.
1990
1991  USAGE:
1992    printline(Widths,char)
1993  """
1994
1995  s = ''
1996  for n in range(len(Widths)):
1997    s = s+Widths[n]*char
1998    if n > 0:
1999      s = s+char
2000
2001  print s
2002
2003#==============================================================================
2004# Messages
2005#==============================================================================
2006
2007def msg1(funcname,args,kwargs,reason):
2008  """Message 1
2009
2010  USAGE:
2011    msg1(funcname,args,kwargs,reason):
2012  """
2013
2014  import string
2015  #print 'MESSAGE (caching.py): Evaluating function', funcname,
2016
2017  print_header_box('Evaluating function %s' %funcname)
2018 
2019  msg7(args,kwargs)
2020  msg8(reason) 
2021 
2022  print_footer()
2023 
2024  #
2025  # Old message
2026  #
2027  #args_present = 0
2028  #if args:
2029  #  if len(args) == 1:
2030  #    print 'with argument', mkargstr(args[0], textwidth2),
2031  #  else:
2032  #    print 'with arguments', mkargstr(args, textwidth2),
2033  #  args_present = 1     
2034  #   
2035  #if kwargs:
2036  #  if args_present:
2037  #    word = 'and'
2038  #  else:
2039  #    word = 'with'
2040  #     
2041  #  if len(kwargs) == 1:
2042  #    print word + ' keyword argument', mkargstr(kwargs, textwidth2)
2043  #  else:
2044  #    print word + ' keyword arguments', mkargstr(kwargs, textwidth2)
2045  #  args_present = 1           
2046  #else:
2047  #  print    # Newline when no keyword args present
2048  #       
2049  #if not args_present:   
2050  #  print '',  # Default if no args or kwargs present
2051   
2052   
2053
2054# -----------------------------------------------------------------------------
2055
2056def msg2(funcname,args,kwargs,comptime,reason):
2057  """Message 2
2058
2059  USAGE:
2060    msg2(funcname,args,kwargs,comptime,reason)
2061  """
2062
2063  import string
2064
2065  #try:
2066  #  R = Reason_msg[reason]
2067  #except:
2068  #  R = 'Unknown reason' 
2069 
2070  #print_header_box('Caching statistics (storing) - %s' %R)
2071  print_header_box('Caching statistics (storing)') 
2072 
2073  msg6(funcname,args,kwargs)
2074  msg8(reason)
2075
2076  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2077
2078# -----------------------------------------------------------------------------
2079
2080def msg3(savetime, CD, FN, deps,compression):
2081  """Message 3
2082
2083  USAGE:
2084    msg3(savetime, CD, FN, deps,compression)
2085  """
2086
2087  import string
2088  print string.ljust('| Loading time:', textwidth1) + str(round(savetime,2)) + \
2089                     ' seconds (estimated)'
2090  msg5(CD,FN,deps,compression)
2091
2092# -----------------------------------------------------------------------------
2093
2094def msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression):
2095  """Message 4
2096
2097  USAGE:
2098    msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression)
2099  """
2100
2101  import string
2102
2103  print_header_box('Caching statistics (retrieving)')
2104 
2105  msg6(funcname,args,kwargs)
2106  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2107  print string.ljust('| Loading time:', textwidth1) + str(round(loadtime,2)) + ' seconds'
2108  print string.ljust('| Time saved:', textwidth1) + str(round(comptime-loadtime,2)) + \
2109        ' seconds'
2110  msg5(CD,FN,deps,compression)
2111
2112# -----------------------------------------------------------------------------
2113
2114def msg5(CD,FN,deps,compression):
2115  """Message 5
2116
2117  USAGE:
2118    msg5(CD,FN,deps,compression)
2119
2120  DESCRIPTION:
2121   Print dependency stats. Used by msg3 and msg4
2122  """
2123
2124  import os, time, string
2125
2126  print '|'
2127  print string.ljust('| Caching dir: ', textwidth1) + CD
2128
2129  if compression:
2130    suffix = '.z'
2131    bytetext = 'bytes, compressed'
2132  else:
2133    suffix = ''
2134    bytetext = 'bytes'
2135
2136  for file_type in file_types:
2137    file_name = FN + '_' + file_type + suffix
2138    print string.ljust('| ' + file_type + ' file: ', textwidth1) + file_name,
2139    stats = os.stat(CD+file_name)
2140    print '('+ str(stats[6]) + ' ' + bytetext + ')'
2141
2142  print '|'
2143  if len(deps) > 0:
2144    print '| Dependencies:  '
2145    dependencies  = deps.keys()
2146    dlist = []; maxd = 0
2147    tlist = []; maxt = 0
2148    slist = []; maxs = 0
2149    for d in dependencies:
2150      stats = deps[d]
2151      t = time.ctime(stats[1])
2152      s = str(stats[0])
2153      #if s[-1] == 'L':
2154      #  s = s[:-1]  # Strip rightmost 'long integer' L off.
2155      #              # FIXME: Unnecessary in versions later than 1.5.2
2156
2157      if len(d) > maxd: maxd = len(d)
2158      if len(t) > maxt: maxt = len(t)
2159      if len(s) > maxs: maxs = len(s)
2160      dlist.append(d)
2161      tlist.append(t)
2162      slist.append(s)
2163
2164    for n in range(len(dlist)):
2165      d = string.ljust(dlist[n]+':', maxd+1)
2166      t = string.ljust(tlist[n], maxt)
2167      s = string.rjust(slist[n], maxs)
2168
2169      print '| ', d, t, ' ', s, 'bytes'
2170  else:
2171    print '| No dependencies'
2172  print_footer()
2173
2174# -----------------------------------------------------------------------------
2175
2176def msg6(funcname,args,kwargs):
2177  """Message 6
2178
2179  USAGE:
2180    msg6(funcname,args,kwargs)
2181  """
2182
2183  import string
2184  print string.ljust('| Function:', textwidth1) + funcname
2185
2186  msg7(args,kwargs)
2187 
2188# -----------------------------------------------------------------------------   
2189
2190def msg7(args,kwargs):
2191  """Message 7
2192 
2193  USAGE:
2194    msg7(args,kwargs):
2195  """
2196 
2197  import string
2198 
2199  args_present = 0 
2200  if args:
2201    if len(args) == 1:
2202      print string.ljust('| Argument:', textwidth1) + mkargstr(args[0], \
2203                         textwidth2)
2204    else:
2205      print string.ljust('| Arguments:', textwidth1) + \
2206            mkargstr(args, textwidth2)
2207    args_present = 1
2208           
2209  if kwargs:
2210    if len(kwargs) == 1:
2211      print string.ljust('| Keyword Arg:', textwidth1) + mkargstr(kwargs, \
2212                         textwidth2)
2213    else:
2214      print string.ljust('| Keyword Args:', textwidth1) + \
2215            mkargstr(kwargs, textwidth2)
2216    args_present = 1
2217
2218  if not args_present:               
2219    print '| No arguments' # Default if no args or kwargs present
2220
2221# -----------------------------------------------------------------------------
2222
2223def msg8(reason):
2224  """Message 8
2225 
2226  USAGE:
2227    msg8(reason):
2228  """
2229 
2230  import string
2231   
2232  try:
2233    R = Reason_msg[reason]
2234  except:
2235    R = 'Unknown' 
2236 
2237  print string.ljust('| Reason:', textwidth1) + R
2238   
2239# -----------------------------------------------------------------------------
2240
2241def print_header_box(line):
2242  """Print line in a nice box.
2243 
2244  USAGE:
2245    print_header_box(line)
2246
2247  """
2248  global textwidth3
2249
2250  import time
2251
2252  time_stamp = time.ctime(time.time())
2253  line = time_stamp + '. ' + line
2254   
2255  N = len(line) + 1
2256  s = '+' + '-'*N + CR
2257
2258  print s + '| ' + line + CR + s,
2259
2260  textwidth3 = N
2261
2262# -----------------------------------------------------------------------------
2263   
2264def print_footer():
2265  """Print line same width as that of print_header_box.
2266  """
2267 
2268  N = textwidth3
2269  s = '+' + '-'*N + CR   
2270     
2271  print s     
2272     
2273# -----------------------------------------------------------------------------
2274
2275def mkargstr(args, textwidth, argstr = ''):
2276  """ Generate a string containing first textwidth characters of arguments.
2277
2278  USAGE:
2279    mkargstr(args, textwidth, argstr = '')
2280
2281  DESCRIPTION:
2282    Exactly the same as str(args) possibly followed by truncation,
2283    but faster if args is huge.
2284  """
2285
2286  import types
2287
2288  WasTruncated = 0
2289
2290  if not type(args) in [types.TupleType, types.ListType, types.DictType]:
2291    if type(args) == types.StringType:
2292      argstr = argstr + "'"+str(args)+"'"
2293    else:
2294      #Truncate large Numeric arrays before using str()
2295      import Numeric
2296      if type(args) == Numeric.ArrayType:
2297        if len(args.flat) > textwidth:
2298          args = 'Array: ' + str(args.shape)
2299
2300      argstr = argstr + str(args)
2301  else:
2302    if type(args) == types.DictType:
2303      argstr = argstr + "{"
2304      for key in args.keys():
2305        argstr = argstr + mkargstr(key, textwidth) + ": " + \
2306                 mkargstr(args[key], textwidth) + ", "
2307        if len(argstr) > textwidth:
2308          WasTruncated = 1
2309          break
2310      argstr = argstr[:-2]  # Strip off trailing comma     
2311      argstr = argstr + "}"
2312
2313    else:
2314      if type(args) == types.TupleType:
2315        lc = '('
2316        rc = ')'
2317      else:
2318        lc = '['
2319        rc = ']'
2320      argstr = argstr + lc
2321      for arg in args:
2322        argstr = argstr + mkargstr(arg, textwidth) + ', '
2323        if len(argstr) > textwidth:
2324          WasTruncated = 1
2325          break
2326
2327      # Strip off trailing comma and space unless singleton tuple
2328      #
2329      if type(args) == types.TupleType and len(args) == 1:
2330        argstr = argstr[:-1]   
2331      else:
2332        argstr = argstr[:-2]
2333      argstr = argstr + rc
2334
2335  if len(argstr) > textwidth:
2336    WasTruncated = 1
2337
2338  if WasTruncated:
2339    argstr = argstr[:textwidth]+'...'
2340  return(argstr)
2341
2342# -----------------------------------------------------------------------------
2343
2344def test_OK(msg):
2345  """Print OK msg if test is OK.
2346 
2347  USAGE
2348    test_OK(message)
2349  """
2350
2351  import string
2352   
2353  print string.ljust(msg, textwidth4) + ' - OK' 
2354 
2355  #raise StandardError
2356 
2357# -----------------------------------------------------------------------------
2358
2359def test_error(msg):
2360  """Print error if test fails.
2361 
2362  USAGE
2363    test_error(message)
2364  """
2365 
2366  print 'ERROR (caching.test): %s' %msg
2367  print 'Please send this code example and output to '
2368  print 'Ole.Nielsen@anu.edu.au'
2369  print
2370  print
2371 
2372  #import sys
2373  #sys.exit()
2374  raise StandardError
Note: See TracBrowser for help on using the repository browser.