source: anuga_core/source/anuga/caching/caching.py @ 4198

Last change on this file since 4198 was 4198, checked in by duncan, 17 years ago

fix in cache. None was giving different hashes, on some linux boxes

File size: 68.1 KB
Line 
1# =============================================================================
2# caching.py - Supervised caching of function results.
3# Copyright (C) 1999, 2000, 2001, 2002 Ole Moller Nielsen
4# Australian National University (1999-2003)
5# Geoscience Australia (2003-present)
6#
7#    This program is free software; you can redistribute it and/or modify
8#    it under the terms of the GNU General Public License as published by
9#    the Free Software Foundation; either version 2 of the License, or
10#    (at your option) any later version.
11#
12#    This program is distributed in the hope that it will be useful,
13#    but WITHOUT ANY WARRANTY; without even the implied warranty of
14#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15#    GNU General Public License (http://www.gnu.org/copyleft/gpl.html)
16#    for more details.
17#
18#    You should have received a copy of the GNU General Public License
19#    along with this program; if not, write to the Free Software
20#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
21#
22#
23# Contact address: Ole.Nielsen@ga.gov.au
24#
25# Version 1.5.6 February 2002
26# =============================================================================
27 
28"""Module caching.py - Supervised caching of function results.
29
30Public functions:
31
32cache(func,args) -- Cache values returned from func given args.
33cachestat() --      Reports statistics about cache hits and time saved.
34test() --       Conducts a basic test of the caching functionality.
35
36See doc strings of individual functions for detailed documentation.
37"""
38
39# -----------------------------------------------------------------------------
40# Initialisation code
41
42# Determine platform
43#
44import os
45if os.name in ['nt', 'dos', 'win32', 'what else?']:
46  unix = 0
47else:
48  unix = 1
49
50# Make default caching directory name
51#
52if unix:
53  homedir = '~'
54  CR = '\n'
55else:
56  homedir = 'c:'
57  CR = '\r\n'  #FIXME: Not tested under windows
58 
59cachedir = homedir + os.sep + '.python_cache' + os.sep
60
61# -----------------------------------------------------------------------------
62# Options directory with default values - to be set by user
63#
64
65options = { 
66  'cachedir': cachedir,  # Default cache directory
67  'maxfiles': 1000000,   # Maximum number of cached files
68  'savestat': 1,         # Log caching info to stats file
69  'verbose': 1,          # Write messages to standard output
70  'bin': 1,              # Use binary format (more efficient)
71  'compression': 1,      # Use zlib compression
72  'bytecode': 0,         # Recompute if bytecode has changed
73  'expire': 0            # Automatically remove files that have been accessed
74                         # least recently
75}
76
77# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
78
79def set_option(key, value):
80  """Function to set values in the options directory.
81
82  USAGE:
83    set_option(key, value)
84
85  ARGUMENTS:
86    key --   Key in options dictionary. (Required)
87    value -- New value for key. (Required)
88
89  DESCRIPTION:
90    Function to set values in the options directory.
91    Raises an exception if key is not in options.
92  """
93
94  if options.has_key(key):
95    options[key] = value
96  else:
97    raise KeyError(key)  # Key not found, raise an exception
98
99# -----------------------------------------------------------------------------
100# Function cache - the main routine
101
102def cache(func, args=(), kwargs = {}, dependencies=None , cachedir=None,
103          verbose=None, compression=None, evaluate=0, test=0, clear=0,
104          return_filename=0):
105  """Supervised caching of function results.
106
107  USAGE:
108    result = cache(func, args, kwargs, dependencies, cachedir, verbose,
109                   compression, evaluate, test, return_filename)
110
111  ARGUMENTS:
112    func --            Function object (Required)
113    args --            Arguments to func (Default: ())
114    kwargs --          Keyword arguments to func (Default: {})   
115    dependencies --    Filenames that func depends on (Default: None)
116    cachedir --        Directory for cache files (Default: options['cachedir'])
117    verbose --         Flag verbose output to stdout
118                       (Default: options['verbose'])
119    compression --     Flag zlib compression (Default: options['compression'])
120    evaluate --        Flag forced evaluation of func (Default: 0)
121    test --            Flag test for cached results (Default: 0)
122    clear --           Flag delete cached results (Default: 0)   
123    return_filename -- Flag return of cache filename (Default: 0)   
124
125  DESCRIPTION:
126    A Python function call of the form
127
128      result = func(arg1,...,argn)
129
130    can be replaced by
131
132      from caching import cache
133      result = cache(func,(arg1,...,argn))
134
135  The latter form returns the same output as the former but reuses cached
136  results if the function has been computed previously in the same context.
137  'result' and the arguments can be simple types, tuples, list, dictionaries or
138  objects, but not unhashable types such as functions or open file objects.
139  The function 'func' may be a member function of an object or a module.
140
141  This type of caching is particularly useful for computationally intensive
142  functions with few frequently used combinations of input arguments. Note that
143  if the inputs or output are very large caching might not save time because
144  disc access may dominate the execution time.
145
146  If the function definition changes after a result has been cached it will be
147  detected by examining the functions bytecode (co_code, co_consts,
148  func_defualts, co_argcount) and it will be recomputed.
149
150  LIMITATIONS:
151    1 Caching uses the apply function and will work with anything that can be
152      pickled, so any limitation in apply or pickle extends to caching.
153    2 A function to be cached should not depend on global variables
154      as wrong results may occur if globals are changed after a result has
155      been cached.
156
157  -----------------------------------------------------------------------------
158  Additional functionality:
159
160  Keyword args
161    Keyword arguments (kwargs) can be added as a dictionary of keyword: value
162    pairs, following the syntax of the built-in function apply().
163    A Python function call of the form
164   
165      result = func(arg1,...,argn, kwarg1=val1,...,kwargm=valm)   
166
167    is then cached as follows
168
169      from caching import cache
170      result = cache(func,(arg1,...,argn), {kwarg1:val1,...,kwargm:valm})
171   
172    The default value of kwargs is {} 
173
174  Explicit dependencies:
175    The call
176      cache(func,(arg1,...,argn),dependencies = <list of filenames>)
177    Checks the size, creation time and modification time of each listed file.
178    If any file has changed the function is recomputed and the results stored
179    again.
180
181  Specify caching directory:
182    The call
183      cache(func,(arg1,...,argn), cachedir = <cachedir>)
184    designates <cachedir> where cached data are stored. Use ~ to indicate users
185    home directory - not $HOME. The default is ~/.python_cache on a UNIX
186    platform and c:/.python_cache on a Win platform.
187
188  Silent operation:
189    The call
190      cache(func,(arg1,...,argn),verbose=0)
191    suppresses messages to standard output.
192
193  Compression:
194    The call
195      cache(func,(arg1,...,argn),compression=0)
196    disables compression. (Default: compression=1). If the requested compressed
197    or uncompressed file is not there, it'll try the other version.
198
199  Forced evaluation:
200    The call
201      cache(func,(arg1,...,argn),evaluate=1)
202    forces the function to evaluate even though cached data may exist.
203
204  Testing for presence of cached result:
205    The call
206      cache(func,(arg1,...,argn),test=1)
207    retrieves cached result if it exists, otherwise None. The function will not
208    be evaluated. If both evaluate and test are switched on, evaluate takes
209    precedence.
210   
211  Obtain cache filenames:
212    The call   
213      cache(func,(arg1,...,argn),return_filename=1)
214    returns the hashed base filename under which this function and its
215    arguments would be cached
216
217  Clearing cached results:
218    The call
219      cache(func,'clear')
220    clears all cached data for 'func' and
221      cache('clear')
222    clears all cached data.
223 
224    NOTE: The string 'clear' can be passed an *argument* to func using
225      cache(func,('clear',)) or cache(func,tuple(['clear'])).
226
227    New form of clear:
228      cache(func,(arg1,...,argn),clear=1)
229    clears cached data for particular combination func and args
230     
231  """
232
233  # Imports and input checks
234  #
235  import types, time, string
236
237  if not cachedir:
238    cachedir = options['cachedir']
239
240  if verbose == None:  # Do NOT write 'if not verbose:', it could be zero.
241    verbose = options['verbose']
242
243  if compression == None:  # Do NOT write 'if not compression:',
244                           # it could be zero.
245    compression = options['compression']
246
247  # Create cache directory if needed
248  #
249  CD = checkdir(cachedir,verbose)
250
251  # Handle the case cache('clear')
252  #
253  if type(func) == types.StringType:
254    if string.lower(func) == 'clear':
255      clear_cache(CD,verbose=verbose)
256      return
257
258  # Handle the case cache(func, 'clear')
259  #
260  if type(args) == types.StringType:
261    if string.lower(args) == 'clear':
262      clear_cache(CD,func,verbose=verbose)
263      return
264
265  # Force singleton arg into a tuple
266  #
267  if type(args) != types.TupleType:
268    args = tuple([args])
269 
270  # Check that kwargs is a dictionary
271  #
272  if type(kwargs) != types.DictType:
273    raise TypeError   
274   
275  #print 'hashing' #FIXME: make faster hashing function
276   
277  # Hash arguments (and keyword args) to integer
278  #
279  arghash = myhash((args,kwargs))
280
281  # Get sizes and timestamps for files listed in dependencies.
282  # Force singletons into a tuple.
283  #
284  if dependencies and type(dependencies) != types.TupleType \
285                  and type(dependencies) != types.ListType:
286    dependencies = tuple([dependencies])
287  deps = get_depstats(dependencies)
288
289  # Extract function name from func object
290  #
291  funcname = get_funcname(func)
292
293  # Create cache filename
294  #
295  FN = funcname+'['+`arghash`+']'  # The symbol '(' does not work under unix
296
297  if return_filename:
298    return(FN)
299
300  if clear:
301    for file_type in file_types:
302      file_name = CD+FN+'_'+file_type
303      for fn in [file_name, file_name + '.z']:
304        if os.access(fn, os.F_OK):             
305          if unix:
306            os.remove(fn)
307          else:
308            # FIXME: os.remove doesn't work under windows       
309            os.system('del '+fn)
310          if verbose is True:
311            print 'MESSAGE (caching): File %s deleted' %fn
312        ##else:
313        ##  print '%s was not accessed' %fn
314    return None
315
316
317  #-------------------------------------------------------------------       
318 
319  # Check if previous computation has been cached
320  #
321  if evaluate:
322    Retrieved = None  # Force evaluation of func regardless of caching status.
323    reason = 4
324  else:
325    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \
326      CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
327                  dependencies)
328
329  if not Retrieved:
330    if test:  # Do not attempt to evaluate function
331      T = None
332    else:  # Evaluate function and save to cache
333      if verbose:
334        msg1(funcname, args, kwargs,reason)
335
336      # Remove expired files automatically
337      #
338      if options['expire']:
339        DeleteOldFiles(CD,verbose)
340       
341      # Save args before function is evaluated in case
342      # they are modified by function
343      #
344      save_args_to_cache(CD,FN,args,kwargs,compression)
345
346      # Execute and time function with supplied arguments
347      #
348      t0 = time.time()
349      T = apply(func,args,kwargs)
350      #comptime = round(time.time()-t0)
351      comptime = time.time()-t0
352
353      if verbose:
354        msg2(funcname,args,kwargs,comptime,reason)
355
356      # Save results and estimated loading time to cache
357      #
358      loadtime = save_results_to_cache(T, CD, FN, func, deps, comptime, \
359                                       funcname, dependencies, compression)
360      if verbose:
361        msg3(loadtime, CD, FN, deps, compression)
362      compressed = compression
363
364  if options['savestat'] and (not test or Retrieved):
365  ##if options['savestat']:
366    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compressed)
367
368  return(T)  # Return results in all cases
369
370# -----------------------------------------------------------------------------
371
372def cachestat(sortidx=4, period=-1, showuser=None, cachedir=None):
373  """Generate statistics of caching efficiency.
374
375  USAGE:
376    cachestat(sortidx, period, showuser, cachedir)
377
378  ARGUMENTS:
379    sortidx --  Index of field by which lists are (default: 4)
380                Legal values are
381                 0: 'Name'
382                 1: 'Hits'
383                 2: 'CPU'
384                 3: 'Time Saved'
385                 4: 'Gain(%)'
386                 5: 'Size'
387    period --   If set to -1 all available caching history is used.
388                If set 0 only the current month is used (default -1).
389    showuser -- Flag for additional table showing user statistics
390                (default: None).
391    cachedir -- Directory for cache files (default: options['cachedir']).
392
393  DESCRIPTION:
394    Logged caching statistics is converted into summaries of the form
395    --------------------------------------------------------------------------
396    Function Name   Hits   Exec(s)  Cache(s)  Saved(s)   Gain(%)      Size
397    --------------------------------------------------------------------------
398  """
399
400  __cachestat(sortidx, period, showuser, cachedir)
401  return
402
403# -----------------------------------------------------------------------------
404
405#Has mostly been moved to proper unit test
406def test(cachedir=None,verbose=0,compression=None):
407  """Test the functionality of caching.
408
409  USAGE:
410    test(verbose)
411
412  ARGUMENTS:
413    verbose --     Flag whether caching will output its statistics (default=0)
414    cachedir --    Directory for cache files (Default: options['cachedir'])
415    compression -- Flag zlib compression (Default: options['compression'])
416  """
417   
418  import string, time
419
420  # Initialise
421  #
422  import caching
423  reload(caching)
424
425  if not cachedir:
426    cachedir = options['cachedir']
427
428  if verbose is None:  # Do NOT write 'if not verbose:', it could be zero.
429    verbose = options['verbose']
430 
431  if compression == None:  # Do NOT write 'if not compression:',
432                           # it could be zero.
433    compression = options['compression']
434  else:
435    try:
436      set_option('compression', compression)
437    except:
438      test_error('Set option failed')     
439
440  try:
441    import zlib
442  except:
443    print
444    print '*** Could not find zlib, default to no-compression      ***'
445    print '*** Installing zlib will improve performance of caching ***'
446    print
447    compression = 0       
448    set_option('compression', compression)   
449 
450  print 
451  print_header_box('Testing caching module - please stand by')
452  print   
453
454  # Define a test function to be cached
455  #
456  def f(a,b,c,N,x=0,y='abcdefg'):
457    """f(a,b,c,N)
458       Do something time consuming and produce a complex result.
459    """
460
461    import string
462
463    B = []
464    for n in range(N):
465      s = str(n+2.0/(n + 4.0))+'.a'*10
466      B.append((a,b,c,s,n,x,y))
467    return(B)
468   
469  # Check that default cachedir is OK
470  #     
471  CD = checkdir(cachedir,verbose)   
472   
473   
474  # Make a dependency file
475  #   
476  try:
477    DepFN = CD + 'testfile.tmp'
478    DepFN_wildcard = CD + 'test*.tmp'
479    Depfile = open(DepFN,'w')
480    Depfile.write('We are the knights who say NI!')
481    Depfile.close()
482    test_OK('Wrote file %s' %DepFN)
483  except:
484    test_error('Could not open file %s for writing - check your environment' \
485               % DepFN)
486
487  # Check set_option (and switch stats off
488  #   
489  try:
490    set_option('savestat',0)
491    assert(options['savestat'] == 0)
492    test_OK('Set option')
493  except:
494    test_error('Set option failed')   
495   
496  # Make some test input arguments
497  #
498  N = 5000  #Make N fairly small here
499
500  a = [1,2]
501  b = ('Thou shalt count the number three',4)
502  c = {'Five is right out': 6, (7,8): 9}
503  x = 3
504  y = 'holy hand granate'
505
506  # Test caching
507  #
508  if compression:
509    comprange = 2
510  else:
511    comprange = 1
512
513  for comp in range(comprange):
514 
515    # Evaluate and store
516    #
517    try:
518      T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, evaluate=1, \
519                         verbose=verbose, compression=comp)
520      if comp:                   
521        test_OK('Caching evaluation with compression')
522      else:     
523        test_OK('Caching evaluation without compression')     
524    except:
525      if comp:
526        test_error('Caching evaluation with compression failed - try caching.test(compression=0)')
527      else:
528        test_error('Caching evaluation failed - try caching.test(verbose=1)')
529
530    # Retrieve
531    #                           
532    try:                         
533      T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
534                         compression=comp) 
535
536      if comp:                   
537        test_OK('Caching retrieval with compression')
538      else:     
539        test_OK('Caching retrieval without compression')     
540    except:
541      if comp:
542        test_error('Caching retrieval with compression failed - try caching.test(compression=0)')
543      else:                                     
544        test_error('Caching retrieval failed - try caching.test(verbose=1)')
545
546    # Reference result
547    #   
548    T3 = f(a,b,c,N,x=x,y=y)  # Compute without caching
549   
550    if T1 == T2 and T2 == T3:
551      if comp:
552        test_OK('Basic caching functionality (with compression)')
553      else:
554        test_OK('Basic caching functionality (without compression)')
555    else:
556      test_error('Cached result does not match computed result')
557
558
559  # Test return_filename
560  #   
561  try:
562    FN = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
563                       return_filename=1)   
564    assert(FN[:2] == 'f[')
565    test_OK('Return of cache filename')
566  except:
567    test_error('Return of cache filename failed')
568
569  # Test existence of cachefiles
570 
571  try:
572    (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
573    (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
574    (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
575    test_OK('Presence of cache files')
576    datafile.close()
577    argsfile.close()
578    admfile.close()
579  except:
580    test_error('Expected cache files did not exist') 
581             
582  # Test 'test' function when cache is present
583  #     
584  try:
585    #T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
586    #                   evaluate=1) 
587    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
588    assert(T1 == T4)
589
590    test_OK("Option 'test' when cache file present")
591  except:
592    test_error("Option 'test' when cache file present failed")     
593
594  # Test that 'clear' works
595  #
596  #try:
597  #  caching.cache(f,'clear',verbose=verbose)
598  #  test_OK('Clearing of cache files')
599  #except:
600  #  test_error('Clear does not work')
601  try:
602    caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, clear=1)   
603    test_OK('Clearing of cache files')
604  except:
605    test_error('Clear does not work') 
606
607 
608
609  # Test 'test' function when cache is absent
610  #     
611  try:
612    T4 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, test=1)
613    assert(T4 is None)
614    test_OK("Option 'test' when cache absent")
615  except:
616    test_error("Option 'test' when cache absent failed")     
617         
618  # Test dependencies
619  #
620  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
621                       dependencies=DepFN) 
622  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
623                       dependencies=DepFN)                     
624                       
625  if T1 == T2:
626    test_OK('Basic dependencies functionality')
627  else:
628    test_error('Dependencies do not work')
629
630  # Test basic wildcard dependency
631  #
632  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
633                       dependencies=DepFN_wildcard)                     
634   
635  if T1 == T3:
636    test_OK('Basic dependencies with wildcard functionality')
637  else:
638    test_error('Dependencies with wildcards do not work')
639
640
641  # Test that changed timestamp in dependencies triggers recomputation
642 
643  # Modify dependency file
644  Depfile = open(DepFN,'a')
645  Depfile.write('You must cut down the mightiest tree in the forest with a Herring')
646  Depfile.close()
647 
648  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
649                       dependencies=DepFN, test = 1)                     
650 
651  if T3 is None:
652    test_OK('Changed dependencies recognised')
653  else:
654    test_error('Changed dependencies not recognised')   
655 
656  # Test recomputation when dependencies have changed
657  #
658  T3 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose, \
659                       dependencies=DepFN)                       
660  if T1 == T3:
661    test_OK('Recomputed value with changed dependencies')
662  else:
663    test_error('Recomputed value with changed dependencies failed')
664
665  # Performance test (with statistics)
666  # Don't really rely on this as it will depend on specific computer.
667  #
668
669  set_option('savestat',1)
670
671  N = 20*N   #Should be large on fast computers...
672  tt = time.time()
673  T1 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
674  t1 = time.time() - tt
675 
676  tt = time.time()
677  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=verbose)
678  t2 = time.time() - tt
679 
680  if T1 == T2:
681    if t1 > t2:
682      test_OK('Performance test: relative time saved = %s pct' \
683              %str(round((t1-t2)*100/t1,2)))
684    #else:
685    #  print 'WARNING: Performance a bit low - this could be specific to current platform'
686  else:       
687    test_error('Basic caching failed for new problem')
688           
689  # Test presence of statistics file
690  #
691  try: 
692    DIRLIST = os.listdir(CD)
693    SF = []
694    for FN in DIRLIST:
695      if string.find(FN,statsfile) >= 0:
696        fid = open(CD+FN,'r')
697        fid.close()
698    test_OK('Statistics files present') 
699  except:
700    test_OK('Statistics files cannot be opened')         
701     
702  print_header_box('Show sample output of the caching function:')
703 
704  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
705  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=0)
706  T2 = caching.cache(f,(a,b,c,N), {'x':x, 'y':y}, verbose=1)
707 
708  print_header_box('Show sample output of cachestat():')
709  if unix:
710    cachestat()   
711  else:
712    try:
713      import time
714      t = time.strptime('2030','%Y')
715      cachestat()
716    except: 
717      print 'caching.cachestat() does not work here, because it'
718      print 'relies on time.strptime() which is unavailable in Windows'
719     
720  print
721  test_OK('Caching self test completed')   
722     
723           
724  # Test setoption (not yet implemented)
725  #
726
727 
728#==============================================================================
729# Auxiliary functions
730#==============================================================================
731
732# Import pickler
733# cPickle is used by functions mysave, myload, and compare
734#
735import cPickle  # 10 to 100 times faster than pickle
736pickler = cPickle
737
738# Local immutable constants
739#
740comp_level = 1              # Compression level for zlib.
741                            # comp_level = 1 works well.
742textwidth1 = 16             # Text width of key fields in report forms.
743textwidth2 = 132            # Maximal width of textual representation of
744                            # arguments.
745textwidth3 = 16             # Initial width of separation lines. Is modified.
746textwidth4 = 50             # Text width in test_OK()
747statsfile  = '.cache_stat'  # Basefilename for cached statistics.
748                            # It will reside in the chosen cache directory.
749
750file_types = ['Result',     # File name extension for cached function results.
751              'Args',       # File name extension for stored function args.
752              'Admin']      # File name extension for administrative info.
753
754Reason_msg = ['OK',         # Verbose reasons for recomputation
755              'No cached result', 
756              'Dependencies have changed', 
757              'Byte code or arguments have changed',
758              'Recomputation was requested by caller',
759              'Cached file was unreadable']             
760             
761# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
762
763def CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, 
764                dependencies):
765  """Determine whether cached result exists and return info.
766
767  USAGE:
768    (T, FN, Retrieved, reason, comptime, loadtime, compressed) = \ 
769    CacheLookup(CD, FN, func, args, kwargs, deps, verbose, compression, \
770                dependencies)
771
772  INPUT ARGUMENTS:
773    CD --            Cache Directory
774    FN --            Suggested cache file name
775    func --          Function object
776    args --          Tuple of arguments
777    kwargs --        Dictionary of keyword arguments   
778    deps --          Dependencies time stamps
779    verbose --       Flag text output
780    compression --   Flag zlib compression
781    dependencies --  Given list of dependencies
782   
783  OUTPUT ARGUMENTS:
784    T --             Cached result if present otherwise None
785    FN --            File name under which new results must be saved
786    Retrieved --     True if a valid cached result was found
787    reason --        0: OK (if Retrieved),
788                     1: No cached result,
789                     2: Dependencies have changed,
790                     3: Arguments or Bytecode have changed
791                     4: Recomputation was forced
792    comptime --      Number of seconds it took to computed cachged result
793    loadtime --      Number of seconds it took to load cached result
794    compressed --    Flag (0,1) if cached results were compressed or not
795
796  DESCRIPTION:
797    Determine if cached result exists as follows:
798    Load in saved arguments and bytecode stored under hashed filename.
799    If they are identical to current arguments and bytecode and if dependencies
800    have not changed their time stamp, then return cached result.
801
802    Otherwise return filename under which new results should be cached.
803    Hash collisions are handled recursively by calling CacheLookup again with a
804    modified filename.
805  """
806
807  import time, string, types
808
809  # Assess whether cached result exists - compressed or not.
810  #
811  if verbose:
812    print 'Caching: looking for cached files %s_{%s,%s,%s}.z'\
813           %(CD+FN, file_types[0], file_types[1], file_types[2])
814  (datafile,compressed0) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
815  (argsfile,compressed1) = myopen(CD+FN+'_'+file_types[1],"rb",compression)
816  (admfile,compressed2) =  myopen(CD+FN+'_'+file_types[2],"rb",compression)
817
818  if not (argsfile and datafile and admfile) or \
819     not (compressed0 == compressed1 and compressed0 == compressed2):
820    # Cached result does not exist or files were compressed differently
821    #
822    # This will ensure that evaluation will take place unless all files are
823    # present.
824
825    reason = 1
826    return(None,FN,None,reason,None,None,None) #Recompute using same filename
827
828  compressed = compressed0  # Remember if compressed files were actually used
829  datafile.close()
830
831  # Retrieve arguments and adm. info
832  #
833  R, reason = myload(argsfile,compressed)  # The original arguments
834  argsfile.close()
835   
836  ##if R == None and reason > 0:
837  if reason > 0:
838    return(None,FN,None,reason,None,None,None) #Recompute using same filename
839  else:   
840    (argsref, kwargsref) = R
841
842  R, reason = myload(admfile,compressed)
843  admfile.close() 
844  ##if R == None and reason > 0:
845  if reason > 0:
846    return(None,FN,None,reason,None,None,None) #Recompute using same filename
847
848 
849  depsref  = R[0]  # Dependency statistics
850  comptime = R[1]  # The computation time
851  coderef  = R[2]  # The byte code
852  funcname = R[3]  # The function name
853
854  # Check if dependencies have changed
855  #
856  if dependencies and not compare(depsref,deps):
857    if verbose:
858      print 'MESSAGE (caching.py): Dependencies', dependencies, \
859            'have changed - recomputing'
860    # Don't use cached file - recompute
861    reason = 2
862    return(None,FN,None,reason,None,None,None)
863
864  # Get bytecode from func
865  #
866  bytecode = get_bytecode(func)
867
868  #print compare(argsref,args),
869  #print compare(kwargsref,kwargs),
870  #print compare(bytecode,coderef)
871
872  # Check if arguments or bytecode have changed
873  #
874  if compare(argsref,args) and compare(kwargsref,kwargs) and \
875     (not options['bytecode'] or compare(bytecode,coderef)):
876
877    # Arguments and dependencies match. Get cached results
878    #
879    T, loadtime, compressed, reason = load_from_cache(CD,FN,compressed)
880    ###if T == None and reason > 0:  #This doesn't work if T is a numeric array
881    if reason > 0:
882      return(None,FN,None,reason,None,None,None) #Recompute using same FN
883
884    Retrieved = 1
885    reason = 0
886
887    if verbose:
888      msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compressed)
889
890      if loadtime >= comptime:
891        print 'WARNING (caching.py): Caching did not yield any gain.'
892        print '                      Consider executing function ',
893        print '('+funcname+') without caching.'
894  else:
895
896    # Non matching arguments or bytecodes signify a hash-collision.
897    # This is resolved by recursive search of cache filenames
898    # until either a matching or an unused filename is found.
899    #
900    (T,FN,Retrieved,reason,comptime,loadtime,compressed) = \
901       CacheLookup(CD,FN+'x',func,args,kwargs,deps,verbose,compression, \
902                   dependencies)
903
904    # DEBUGGING
905    # if not Retrieved:
906    #   print 'Arguments did not match'
907    # else:
908    #   print 'Match found !'
909    if not Retrieved:
910      reason = 3     #The real reason is that args or bytecodes have changed.
911                     #Not that the recursive seach has found an unused filename
912   
913  return((T, FN, Retrieved, reason, comptime, loadtime, compressed))
914
915# -----------------------------------------------------------------------------
916
917def clear_cache(CD,func=None, verbose=None):
918  """Clear cache for func.
919
920  USAGE:
921     clear(CD, func, verbose)
922
923  ARGUMENTS:
924     CD --       Caching directory (required)
925     func --     Function object (default: None)
926     verbose --  Flag verbose output (default: None)
927
928  DESCRIPTION:
929
930    If func == None, clear everything,
931    otherwise clear only files pertaining to func.
932  """
933
934  import os, re
935   
936  if CD[-1] != os.sep:
937    CD = CD+os.sep
938 
939  if verbose == None:
940    verbose = options['verbose']
941
942  # FIXME: Windows version needs to be tested
943
944  if func:
945    funcname = get_funcname(func)
946    if verbose:
947      print 'MESSAGE (caching.py): Clearing', CD+funcname+'*'
948
949    file_names = os.listdir(CD)
950    for file_name in file_names:
951      #RE = re.search('^' + funcname,file_name)  #Inefficient
952      #if RE:
953      if file_name[:len(funcname)] == funcname:
954        if unix:
955          os.remove(CD+file_name)
956        else:
957          os.system('del '+CD+file_name)
958          # FIXME: os.remove doesn't work under windows
959  else:
960    file_names = os.listdir(CD)
961    if len(file_names) > 0:
962      if verbose:
963        print 'MESSAGE (caching.py): Remove the following files:'
964        for file_name in file_names:
965            print file_name
966
967        A = raw_input('Delete (Y/N)[N] ?')
968      else:
969        A = 'Y' 
970       
971      if A == 'Y' or A == 'y':
972        for file_name in file_names:
973          if unix:
974            os.remove(CD+file_name)
975          else:
976            os.system('del '+CD+file_name)
977            # FIXME: os.remove doesn't work under windows
978          #exitcode=os.system('/bin/rm '+CD+'* 2> /dev/null')
979
980# -----------------------------------------------------------------------------
981
982def DeleteOldFiles(CD,verbose=None):
983  """Remove expired files
984
985  USAGE:
986    DeleteOldFiles(CD,verbose=None)
987  """
988
989  if verbose == None:
990    verbose = options['verbose']
991
992  maxfiles = options['maxfiles']
993
994  # FIXME: Windows version
995
996  import os
997  block = 1000  # How many files to delete per invokation
998  Files = os.listdir(CD)
999  numfiles = len(Files)
1000  if not unix: return  # FIXME: Windows case ?
1001
1002  if numfiles > maxfiles:
1003    delfiles = numfiles-maxfiles+block
1004    if verbose:
1005      print 'Deleting '+`delfiles`+' expired files:'
1006      os.system('ls -lur '+CD+'* | head -' + `delfiles`)            # List them
1007    os.system('ls -ur '+CD+'* | head -' + `delfiles` + ' | xargs /bin/rm')
1008                                                                  # Delete them
1009    # FIXME: Replace this with os.listdir and os.remove
1010
1011# -----------------------------------------------------------------------------
1012
1013def save_args_to_cache(CD,FN,args,kwargs,compression):
1014  """Save arguments to cache
1015
1016  USAGE:
1017    save_args_to_cache(CD,FN,args,kwargs,compression)
1018  """
1019
1020  import time, os, sys, types
1021
1022  (argsfile, compressed) = myopen(CD+FN+'_'+file_types[1], 'wb', compression)
1023
1024  if not argsfile:
1025    if verbose:
1026      print 'ERROR (caching): Could not open %s' %argsfile.name
1027    raise IOError
1028
1029  mysave((args,kwargs),argsfile,compression)  # Save args and kwargs to cache
1030  argsfile.close()
1031
1032  # Change access rights if possible
1033  #
1034  #if unix:
1035  #  try:
1036  #    exitcode=os.system('chmod 666 '+argsfile.name)
1037  #  except:
1038  #    pass
1039  #else:
1040  #  pass  # FIXME: Take care of access rights under Windows
1041
1042  return
1043
1044# -----------------------------------------------------------------------------
1045
1046def save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1047                          dependencies, compression):
1048  """Save computed results T and admin info to cache
1049
1050  USAGE:
1051    save_results_to_cache(T, CD, FN, func, deps, comptime, funcname,
1052                          dependencies, compression)
1053  """
1054
1055  import time, os, sys, types
1056
1057  (datafile, compressed1) = myopen(CD+FN+'_'+file_types[0],'wb',compression)
1058  (admfile, compressed2) = myopen(CD+FN+'_'+file_types[2],'wb',compression)
1059
1060  if not datafile:
1061    if verbose:
1062      print 'ERROR (caching): Could not open %s' %datafile.name
1063    raise IOError
1064
1065  if not admfile:
1066    if verbose:
1067      print 'ERROR (caching): Could not open %s' %admfile.name
1068    raise IOError
1069
1070  t0 = time.time()
1071
1072  mysave(T,datafile,compression)  # Save data to cache
1073  datafile.close()
1074  #savetime = round(time.time()-t0,2)
1075  savetime = time.time()-t0 
1076
1077  bytecode = get_bytecode(func)  # Get bytecode from function object
1078  admtup = (deps, comptime, bytecode, funcname)  # Gather admin info
1079
1080  mysave(admtup,admfile,compression)  # Save admin info to cache
1081  admfile.close()
1082
1083  # Change access rights if possible
1084  #
1085  #if unix:
1086  #  try:
1087  #    exitcode=os.system('chmod 666 '+datafile.name)
1088  #    exitcode=os.system('chmod 666 '+admfile.name)
1089  #  except:
1090  #    pass
1091  #else:
1092  #  pass  # FIXME: Take care of access rights under Windows
1093
1094  return(savetime)
1095
1096# -----------------------------------------------------------------------------
1097
1098def load_from_cache(CD,FN,compression):
1099  """Load previously cached data from file FN
1100
1101  USAGE:
1102    load_from_cache(CD,FN,compression)
1103  """
1104
1105  import time
1106
1107  (datafile, compressed) = myopen(CD+FN+'_'+file_types[0],"rb",compression)
1108  t0 = time.time()
1109  T, reason = myload(datafile,compressed)
1110  #loadtime = round(time.time()-t0,2)
1111  loadtime = time.time()-t0
1112  datafile.close() 
1113
1114  return T, loadtime, compressed, reason
1115
1116# -----------------------------------------------------------------------------
1117
1118def myopen(FN,mode,compression=1):
1119  """Open file FN using given mode
1120
1121  USAGE:
1122    myopen(FN,mode,compression=1)
1123
1124  ARGUMENTS:
1125    FN --           File name to be opened
1126    mode --         Open mode (as in open)
1127    compression --  Flag zlib compression
1128
1129  DESCRIPTION:
1130     if compression
1131       Attempt first to open FN + '.z'
1132       If this fails try to open FN
1133     else do the opposite
1134     Return file handle plus info about whether it was compressed or not.
1135  """
1136
1137  import string
1138
1139  # Determine if file exists already (if writing was requested)
1140  # This info is only used to determine if access modes should be set
1141  #
1142  if 'w' in mode or 'a' in mode:
1143    try:
1144      file = open(FN+'.z','r')
1145      file.close()
1146      new_file = 0
1147    except:
1148      try:
1149        file = open(FN,'r') 
1150        file.close()
1151        new_file = 0
1152      except:
1153        new_file = 1
1154  else:
1155    new_file = 0 #Assume it exists if mode was not 'w'
1156 
1157
1158  compressed = 0
1159  if compression:
1160    try:
1161      file = open(FN+'.z',mode)
1162      compressed = 1
1163    except:
1164      try:
1165        file = open(FN,mode)
1166      except:
1167        file = None
1168  else:
1169    try:
1170      file = open(FN,mode)
1171    except:
1172      try:
1173        file = open(FN+'.z',mode)
1174        compressed = 1
1175      except:
1176        file = None
1177
1178  # Now set access rights if it is a new file
1179  #
1180  if file and new_file:
1181    if unix:
1182      exitcode=os.system('chmod 666 '+file.name)
1183    else:
1184      pass  # FIXME: Take care of access rights under Windows
1185
1186  return(file,compressed)
1187
1188# -----------------------------------------------------------------------------
1189
1190def myload(file, compressed):
1191  """Load data from file
1192
1193  USAGE:
1194    myload(file, compressed)
1195  """
1196
1197  reason = 0
1198  try:
1199    if compressed:
1200      import zlib
1201
1202      RsC = file.read()
1203      try:
1204        Rs  = zlib.decompress(RsC)
1205      except:
1206        #  File "./caching.py", line 1032, in load_from_cache
1207        #  T = myload(datafile,compressed)
1208        #  File "./caching.py", line 1124, in myload
1209        #  Rs  = zlib.decompress(RsC)
1210        #  zlib.error: Error -5 while decompressing data
1211        #print 'ERROR (caching): Could not decompress ', file.name
1212        #raise Exception
1213        reason = 5  #(Unreadable file)
1214        return None, reason 
1215     
1216     
1217      del RsC  # Free up some space
1218      R   = pickler.loads(Rs)
1219    else:
1220      try:
1221        R = pickler.load(file)
1222      #except EOFError, e:
1223      except:
1224        #Catch e.g., file with 0 length or corrupted
1225        reason = 5  #(Unreadable file)
1226        return None, reason
1227     
1228  except MemoryError:
1229    import sys
1230    if options['verbose']:
1231      print 'ERROR (caching): Out of memory while loading %s, aborting' \
1232            %(file.name)
1233
1234    # Raise the error again for now
1235    #
1236    raise MemoryError
1237
1238  return R, reason
1239
1240# -----------------------------------------------------------------------------
1241
1242def mysave(T,file,compression):
1243  """Save data T to file
1244
1245  USAGE:
1246    mysave(T,file,compression)
1247
1248  """
1249
1250  bin = options['bin']
1251
1252  if compression:
1253    try:
1254      import zlib
1255    except:
1256      print
1257      print '*** Could not find zlib ***'
1258      print '*** Try to run caching with compression off ***'
1259      print "*** caching.set_option('compression', 0) ***"
1260      raise Exception
1261     
1262
1263    try:
1264      Ts  = pickler.dumps(T, bin)
1265    except MemoryError:
1266      msg = '****WARNING (caching.py): Could not pickle data for compression.'
1267      msg += ' Try using compression = False'
1268      raise MemoryError, msg
1269    else: 
1270      #Compressed pickling     
1271      TsC = zlib.compress(Ts, comp_level)
1272      file.write(TsC)
1273  else:
1274      #Uncompressed pickling
1275      pickler.dump(T, file, bin)
1276
1277      # FIXME: This may not work on Windoze network drives.
1278      # The error msg is IOError: [Errno 22] Invalid argument
1279      # Testing with small files was OK, though.
1280      # I think this is an OS problem.
1281
1282      # Excerpt from http://www.ultraseek.com/support/faqs/4173.html
1283     
1284# The error is caused when there is a problem with server disk access (I/0). This happens at the OS level, and there is no controlling these errors through the Ultraseek application.
1285#
1286#Ultraseek contains an embedded Python interpreter. The exception "exceptions.IOError: [Errno 22] Invalid argument" is generated by the Python interpreter. The exception is thrown when a disk access operation fails due to an I/O-related reason.
1287#
1288#The following extract is taken from the site http://www.python.org:
1289#
1290#---------------------------------------------------------------------------------------------
1291#exception IOError
1292#Raised when an I/O operation (such as a print statement, the built-in open() function or a method of a file object) fails for an I/O-related reason, e.g., ``file not found'' or ``disk full''.
1293#This class is derived from EnvironmentError. See the discussion above for more information on exception instance attributes.
1294#---------------------------------------------------------------------------------------------
1295#
1296#The error code(s) that accompany exceptions are described at:
1297#http://www.python.org/dev/doc/devel//lib/module-errno.html
1298#
1299#You can view several postings on this error message by going to http://www.python.org, and typing the below into the search box:
1300#
1301#exceptions.IOError invalid argument Errno 22
1302       
1303      #try:
1304      #  pickler.dump(T,file,bin)
1305      #except IOError, e:
1306      #  print e
1307      #  msg = 'Could not store to %s, bin=%s' %(file, bin)
1308      #  raise msg
1309     
1310
1311# -----------------------------------------------------------------------------
1312
1313def myhash(T):
1314  """Compute hashed integer from hashable values of tuple T
1315
1316  USAGE:
1317    myhash(T)
1318
1319  ARGUMENTS:
1320    T -- Tuple
1321  """
1322
1323  import types
1324
1325  # On some architectures None gets different hash values
1326  if T is None:
1327    return(1)
1328
1329  # Get hash vals for hashable entries
1330  #
1331  if type(T) == types.TupleType or type(T) == types.ListType:
1332    hvals = []
1333    for k in range(len(T)):
1334      h = myhash(T[k])
1335      hvals.append(h)
1336    val = hash(tuple(hvals))
1337  elif type(T) == types.DictType:
1338    val = dicthash(T)
1339  else:
1340    try:
1341      val = hash(T)
1342    except:
1343      val = 1
1344      try:
1345        import Numeric
1346        if type(T) == Numeric.ArrayType:
1347          hvals = []       
1348          for e in T:
1349            h = myhash(e)
1350            hvals.append(h)         
1351          val = hash(tuple(hvals))
1352        else:
1353          val = 1  #Could implement other Numeric types here
1354      except:   
1355        pass
1356
1357  return(val)
1358
1359# -----------------------------------------------------------------------------
1360
1361def dicthash(D):
1362  """Compute hashed integer from hashable values of dictionary D
1363
1364  USAGE:
1365    dicthash(D)
1366  """
1367
1368  keys = D.keys()
1369
1370  # Get hash values for hashable entries
1371  #
1372  hvals = []
1373  for k in range(len(keys)):
1374    try:
1375      h = myhash(D[keys[k]])
1376      hvals.append(h)
1377    except:
1378      pass
1379
1380  # Hash obtained values into one value
1381  #
1382  return(hash(tuple(hvals)))
1383
1384# -----------------------------------------------------------------------------
1385
1386def compare(A,B):
1387  """Safe comparison of general objects
1388
1389  USAGE:
1390    compare(A,B)
1391
1392  DESCRIPTION:
1393    Return 1 if A and B they are identical, 0 otherwise
1394  """
1395
1396  try:
1397    identical = (A == B)
1398  except:
1399    try:
1400      identical = (pickler.dumps(A) == pickler.dumps(B))
1401    except:
1402      identical = 0
1403
1404  return(identical)
1405
1406# -----------------------------------------------------------------------------
1407
1408def nospace(s):
1409  """Replace spaces in string s with underscores
1410
1411  USAGE:
1412    nospace(s)
1413
1414  ARGUMENTS:
1415    s -- string
1416  """
1417
1418  import string
1419
1420  newstr = ''
1421  for i in range(len(s)):
1422    if s[i] == ' ':
1423      newstr = newstr+'_'
1424    else:
1425      newstr = newstr+s[i]
1426
1427  return(newstr)
1428
1429# -----------------------------------------------------------------------------
1430
1431def get_funcname(func):
1432  """Retrieve name of function object func (depending on its type)
1433
1434  USAGE:
1435    get_funcname(func)
1436  """
1437
1438  import types, string
1439
1440  if type(func) == types.FunctionType:
1441    funcname = func.func_name
1442  elif type(func) == types.BuiltinFunctionType:
1443    funcname = func.__name__
1444  else:
1445    tab = string.maketrans("<>'","   ")
1446    tmp = string.translate(`func`,tab)
1447    tmp = string.split(tmp)
1448    funcname = string.join(tmp)
1449
1450  funcname = nospace(funcname)
1451  return(funcname)
1452
1453# -----------------------------------------------------------------------------
1454
1455def get_bytecode(func):
1456  """ Get bytecode from function object.
1457
1458  USAGE:
1459    get_bytecode(func)
1460  """
1461
1462  import types
1463
1464  if type(func) == types.FunctionType:
1465    bytecode = func.func_code.co_code
1466    consts = func.func_code.co_consts
1467    argcount = func.func_code.co_argcount   
1468    defaults = func.func_defaults     
1469  elif type(func) == types.MethodType:
1470    bytecode = func.im_func.func_code.co_code
1471    consts =  func.im_func.func_code.co_consts
1472    argcount =  func.im_func.func_code.co_argcount   
1473    defaults = func.im_func.func_defaults         
1474  else:
1475    #raise Exception  #Test only
1476    bytecode = None   #Built-in functions are assumed not to change
1477    consts = 0
1478    argcount = 0
1479    defaults = 0
1480
1481  return (bytecode, consts, argcount, defaults)
1482
1483# -----------------------------------------------------------------------------
1484
1485def get_depstats(dependencies):
1486  """ Build dictionary of dependency files and their size, mod. time and ctime.
1487
1488  USAGE:
1489    get_depstats(dependencies):
1490  """
1491
1492  import types
1493
1494  d = {}
1495  if dependencies:
1496
1497    #Expand any wildcards
1498    import glob
1499    expanded_dependencies = []
1500    for FN in dependencies:
1501      expanded_FN = glob.glob(FN)
1502     
1503      expanded_dependencies += expanded_FN
1504
1505   
1506    for FN in expanded_dependencies:
1507      if not type(FN) == types.StringType:
1508        errmsg = 'ERROR (caching.py): Dependency must be a string.\n'
1509        errmsg += '                    Dependency given: %s' %FN
1510        raise Exception, errmsg     
1511      if not os.access(FN,os.F_OK):
1512        errmsg = 'ERROR (caching.py): Dependency '+FN+' does not exist.'
1513        raise Exception, errmsg
1514      (size,atime,mtime,ctime) = filestat(FN)
1515
1516      # We don't use atime because that would cause recomputation every time.
1517      # We don't use ctime because that is irrelevant and confusing for users.
1518      d.update({FN : (size,mtime)})
1519
1520  return(d)
1521
1522# -----------------------------------------------------------------------------
1523
1524def filestat(FN):
1525  """A safe wrapper using os.stat to get basic file statistics
1526     The built-in os.stat breaks down if file sizes are too large (> 2GB ?)
1527
1528  USAGE:
1529    filestat(FN)
1530
1531  DESCRIPTION:
1532     Must compile Python with
1533     CFLAGS="`getconf LFS_CFLAGS`" OPT="-g -O2 $CFLAGS" \
1534              configure
1535     as given in section 8.1.1 Large File Support in the Libray Reference
1536  """
1537
1538  import os, time
1539
1540  try:
1541    stats = os.stat(FN)
1542    size  = stats[6]
1543    atime = stats[7]
1544    mtime = stats[8]
1545    ctime = stats[9]
1546  except:
1547
1548    # Hack to get the results anyway (works only on Unix at the moment)
1549    #
1550    print 'Hack to get os.stat when files are too large'
1551
1552    if unix:
1553      tmp = '/tmp/cach.tmp.'+`time.time()`+`os.getpid()`
1554      # Unique filename, FIXME: Use random number
1555
1556      # Get size and access time (atime)
1557      #
1558      exitcode=os.system('ls -l --full-time --time=atime '+FN+' > '+tmp)
1559      (size,atime) = get_lsline(tmp)
1560
1561      # Get size and modification time (mtime)
1562      #
1563      exitcode=os.system('ls -l --full-time '+FN+' > '+tmp)
1564      (size,mtime) = get_lsline(tmp)
1565
1566      # Get size and ctime
1567      #
1568      exitcode=os.system('ls -l --full-time --time=ctime '+FN+' > '+tmp)
1569      (size,ctime) = get_lsline(tmp)
1570
1571      try:
1572        exitcode=os.system('rm '+tmp)
1573        # FIXME: Gives error if file doesn't exist
1574      except:
1575        pass
1576    else:
1577      pass
1578      raise Exception  # FIXME: Windows case
1579
1580  return(long(size),atime,mtime,ctime)
1581
1582# -----------------------------------------------------------------------------
1583
1584def get_lsline(FN):
1585  """get size and time for filename
1586
1587  USAGE:
1588    get_lsline(file_name)
1589
1590  DESCRIPTION:
1591    Read in one line 'ls -la' item from file (generated by filestat) and
1592    convert time to seconds since epoch. Return file size and time.
1593  """
1594
1595  import string, time
1596
1597  f = open(FN,'r')
1598  info = f.read()
1599  info = string.split(info)
1600
1601  size = info[4]
1602  week = info[5]
1603  mon  = info[6]
1604  day  = info[7]
1605  hour = info[8]
1606  year = info[9]
1607
1608  str = week+' '+mon+' '+day+' '+hour+' '+year
1609  timetup = time.strptime(str)
1610  t = time.mktime(timetup)
1611  return(size, t)
1612
1613# -----------------------------------------------------------------------------
1614
1615def checkdir(CD,verbose=None):
1616  """Check or create caching directory
1617
1618  USAGE:
1619    checkdir(CD,verbose):
1620
1621  ARGUMENTS:
1622    CD -- Directory
1623    verbose -- Flag verbose output (default: None)
1624
1625  DESCRIPTION:
1626    If CD does not exist it will be created if possible
1627  """
1628
1629  import os
1630  import os.path
1631
1632  if CD[-1] != os.sep: 
1633    CD = CD + os.sep  # Add separator for directories
1634
1635  CD = os.path.expanduser(CD) # Expand ~ or ~user in pathname
1636  if not (os.access(CD,os.R_OK and os.W_OK) or CD == ''):
1637    try:
1638      exitcode=os.mkdir(CD)
1639
1640      # Change access rights if possible
1641      #
1642      if unix:
1643        exitcode=os.system('chmod 777 '+CD)
1644      else:
1645        pass  # FIXME: What about acces rights under Windows?
1646      if verbose: print 'MESSAGE: Directory', CD, 'created.'
1647    except:
1648      print 'WARNING: Directory', CD, 'could not be created.'
1649      if unix:
1650        CD = '/tmp/'
1651      else:
1652        CD = 'C:' 
1653      print 'Using directory %s instead' %CD
1654
1655  return(CD)
1656
1657#==============================================================================
1658# Statistics
1659#==============================================================================
1660
1661def addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,
1662                 compression):
1663  """Add stats entry
1664
1665  USAGE:
1666    addstatsline(CD,funcname,FN,Retrieved,reason,comptime,loadtime,compression)
1667
1668  DESCRIPTION:
1669    Make one entry in the stats file about one cache hit recording time saved
1670    and other statistics. The data are used by the function cachestat.
1671  """
1672
1673  import os, time
1674
1675  try:
1676    TimeTuple = time.localtime(time.time())
1677    extension = time.strftime('%b%Y',TimeTuple)
1678    SFN = CD+statsfile+'.'+extension
1679    #statfile = open(SFN,'a')
1680    (statfile, dummy) = myopen(SFN,'a',compression=0)
1681
1682    # Change access rights if possible
1683    #
1684    #if unix:
1685    #  try:
1686    #    exitcode=os.system('chmod 666 '+SFN)
1687    #  except:
1688    #    pass
1689  except:
1690    print 'Warning: Stat file could not be opened'
1691
1692  try:
1693    if os.environ.has_key('USER'):
1694      user = os.environ['USER']
1695    else:
1696      user = 'Nobody'
1697
1698    date = time.asctime(TimeTuple)
1699
1700    if Retrieved:
1701      hit = '1'
1702    else:
1703      hit = '0'
1704
1705    # Get size of result file
1706    #   
1707    if compression:
1708      stats = os.stat(CD+FN+'_'+file_types[0]+'.z')
1709    else:
1710      stats = os.stat(CD+FN+'_'+file_types[0])
1711 
1712    if stats: 
1713      size = stats[6]
1714    else:
1715      size = -1  # Error condition, but don't crash. This is just statistics 
1716
1717    # Build entry
1718   
1719    entry = date             + ',' +\
1720            user             + ',' +\
1721            FN               + ',' +\
1722            str(int(size))   + ',' +\
1723            str(compression) + ',' +\
1724            hit              + ',' +\
1725            str(reason)      + ',' +\
1726            str(round(comptime,4)) + ',' +\
1727            str(round(loadtime,4)) +\
1728            CR
1729           
1730    statfile.write(entry)
1731    statfile.close()
1732  except:
1733    print 'Warning: Writing of stat file failed'
1734
1735# -----------------------------------------------------------------------------
1736
1737# FIXME: should take cachedir as an optional arg
1738#
1739def __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1740  """  List caching statistics.
1741
1742  USAGE:
1743    __cachestat(sortidx=4,period=-1,showuser=None,cachedir=None):
1744
1745      Generate statistics of caching efficiency.
1746      The parameter sortidx determines by what field lists are sorted.
1747      If the optional keyword period is set to -1,
1748      all available caching history is used.
1749      If it is 0 only the current month is used.
1750      Future versions will include more than one month....
1751      OMN 20/8/2000
1752  """
1753
1754  import os
1755  import os.path
1756  from string import split, rstrip, find, atof, atoi
1757  from time import strptime, localtime, strftime, mktime, ctime
1758
1759  # sortidx = 4    # Index into Fields[1:]. What to sort by.
1760
1761  Fields = ['Name', 'Hits', 'Exec(s)', \
1762            'Cache(s)', 'Saved(s)', 'Gain(%)', 'Size']
1763  Widths = [25,7,9,9,9,9,13]
1764  #Types = ['s','d','d','d','d','.2f','d']
1765  Types = ['s','d','.2f','.2f','.2f','.2f','d'] 
1766
1767  Dictnames = ['Function', 'User']
1768
1769  if not cachedir:
1770    cachedir = checkdir(options['cachedir'])
1771
1772  SD = os.path.expanduser(cachedir)  # Expand ~ or ~user in pathname
1773
1774  if period == -1:  # Take all available stats
1775    SFILENAME = statsfile
1776  else:  # Only stats from current month 
1777       # MAKE THIS MORE GENERAL SO period > 0 counts several months backwards!
1778    TimeTuple = localtime(time())
1779    extension = strftime('%b%Y',TimeTuple)
1780    SFILENAME = statsfile+'.'+extension
1781
1782  DIRLIST = os.listdir(SD)
1783  SF = []
1784  for FN in DIRLIST:
1785    if find(FN,SFILENAME) >= 0:
1786      SF.append(FN)
1787
1788  blocksize = 15000000
1789  total_read = 0
1790  total_hits = 0
1791  total_discarded = 0
1792  firstday = mktime(strptime('2030','%Y'))
1793             # FIXME: strptime don't exist in WINDOWS ?
1794  lastday = 0
1795
1796  FuncDict = {}
1797  UserDict = {}
1798  for FN in SF:
1799    input = open(SD+FN,'r')
1800    print 'Reading file ', SD+FN
1801
1802    while 1:
1803      A = input.readlines(blocksize)
1804      if len(A) == 0: break
1805      total_read = total_read + len(A)
1806      for record in A:
1807        record = tuple(split(rstrip(record),','))
1808        #print record
1809
1810        if len(record) in [8,9]:
1811          n = 0
1812          timestamp = record[n]; n=n+1
1813       
1814          try:
1815            t = mktime(strptime(timestamp))
1816          except:
1817            total_discarded = total_discarded + 1         
1818            continue   
1819             
1820          if t > lastday:
1821            lastday = t
1822          if t < firstday:
1823            firstday = t
1824
1825          user     = record[n]; n=n+1
1826          func     = record[n]; n=n+1
1827
1828          # Strip hash-stamp off
1829          #
1830          i = find(func,'[')
1831          func = func[:i]
1832
1833          size        = atof(record[n]); n=n+1
1834          compression = atoi(record[n]); n=n+1
1835          hit         = atoi(record[n]); n=n+1
1836          reason      = atoi(record[n]); n=n+1   # Not used here   
1837          cputime     = atof(record[n]); n=n+1
1838          loadtime    = atof(record[n]); n=n+1
1839
1840          if hit:
1841            total_hits = total_hits + 1
1842            saving = cputime-loadtime
1843
1844            if cputime != 0:
1845              rel_saving = round(100.0*saving/cputime,2)
1846            else:
1847              #rel_saving = round(1.0*saving,2)
1848              rel_saving = 100.0 - round(1.0*saving,2)  # A bit of a hack
1849
1850            info = [1,cputime,loadtime,saving,rel_saving,size]
1851
1852            UpdateDict(UserDict,user,info)
1853            UpdateDict(FuncDict,func,info)
1854          else:
1855            pass #Stats on recomputations and their reasons could go in here
1856             
1857        else:
1858          #print 'Record discarded'
1859          #print record
1860          total_discarded = total_discarded + 1
1861
1862    input.close()
1863
1864  # Compute averages of all sums and write list
1865  #
1866
1867  if total_read == 0:
1868    printline(Widths,'=')
1869    print 'CACHING STATISTICS: No valid records read'
1870    printline(Widths,'=')
1871    return
1872
1873  print
1874  printline(Widths,'=')
1875  print 'CACHING STATISTICS: '+ctime(firstday)+' to '+ctime(lastday)
1876  printline(Widths,'=')
1877  #print '  Period:', ctime(firstday), 'to', ctime(lastday)
1878  print '  Total number of valid records', total_read
1879  print '  Total number of discarded records', total_discarded
1880  print '  Total number of hits', total_hits
1881  print
1882
1883  print '  Fields', Fields[2:], 'are averaged over number of hits'
1884  print '  Time is measured in seconds and size in bytes'
1885  print '  Tables are sorted by', Fields[1:][sortidx]
1886
1887  # printline(Widths,'-')
1888
1889  if showuser:
1890    Dictionaries = [FuncDict, UserDict]
1891  else:
1892    Dictionaries = [FuncDict]
1893
1894  i = 0
1895  for Dict in Dictionaries:
1896    for key in Dict.keys():
1897      rec = Dict[key]
1898      for n in range(len(rec)):
1899        if n > 0:
1900          rec[n] = round(1.0*rec[n]/rec[0],2)
1901      Dict[key] = rec
1902
1903    # Sort and output
1904    #
1905    keylist = SortDict(Dict,sortidx)
1906
1907    # Write Header
1908    #
1909    print
1910    #print Dictnames[i], 'statistics:'; i=i+1
1911    printline(Widths,'-')
1912    n = 0
1913    for s in Fields:
1914      if s == Fields[0]:  # Left justify
1915        s = Dictnames[i] + ' ' + s; i=i+1
1916        exec "print '%-" + str(Widths[n]) + "s'%s,"; n=n+1
1917      else:
1918        exec "print '%" + str(Widths[n]) + "s'%s,"; n=n+1
1919    print
1920    printline(Widths,'-')
1921
1922    # Output Values
1923    #
1924    for key in keylist:
1925      rec = Dict[key]
1926      n = 0
1927      if len(key) > Widths[n]: key = key[:Widths[n]-3] + '...'
1928      exec "print '%-" + str(Widths[n]) + Types[n]+"'%key,";n=n+1
1929      for val in rec:
1930        exec "print '%" + str(Widths[n]) + Types[n]+"'%val,"; n=n+1
1931      print
1932    print
1933
1934#==============================================================================
1935# Auxiliary stats functions
1936#==============================================================================
1937
1938def UpdateDict(Dict,key,info):
1939  """Update dictionary by adding new values to existing.
1940
1941  USAGE:
1942    UpdateDict(Dict,key,info)
1943  """
1944
1945  if Dict.has_key(key):
1946    dinfo = Dict[key]
1947    for n in range(len(dinfo)):
1948      dinfo[n] = info[n] + dinfo[n]
1949  else:
1950    dinfo = info[:]  # Make a copy of info list
1951
1952  Dict[key] = dinfo
1953  return Dict
1954
1955# -----------------------------------------------------------------------------
1956
1957def SortDict(Dict,sortidx=0):
1958  """Sort dictionary
1959
1960  USAGE:
1961    SortDict(Dict,sortidx):
1962
1963  DESCRIPTION:
1964    Sort dictionary of lists according field number 'sortidx'
1965  """
1966
1967  import types
1968
1969  sortlist  = []
1970  keylist = Dict.keys()
1971  for key in keylist:
1972    rec = Dict[key]
1973    if not type(rec) in [types.ListType, types.TupleType]:
1974      rec = [rec]
1975
1976    if sortidx > len(rec)-1:
1977      if options['verbose']:
1978        print 'ERROR: Sorting index to large, sortidx = ', sortidx
1979      raise IndexError
1980
1981    val = rec[sortidx]
1982    sortlist.append(val)
1983
1984  A = map(None,sortlist,keylist)
1985  A.sort()
1986  keylist = map(lambda x: x[1], A)  # keylist sorted by sortidx
1987
1988  return(keylist)
1989
1990# -----------------------------------------------------------------------------
1991
1992def printline(Widths,char):
1993  """Print textline in fixed field.
1994
1995  USAGE:
1996    printline(Widths,char)
1997  """
1998
1999  s = ''
2000  for n in range(len(Widths)):
2001    s = s+Widths[n]*char
2002    if n > 0:
2003      s = s+char
2004
2005  print s
2006
2007#==============================================================================
2008# Messages
2009#==============================================================================
2010
2011def msg1(funcname,args,kwargs,reason):
2012  """Message 1
2013
2014  USAGE:
2015    msg1(funcname,args,kwargs,reason):
2016  """
2017
2018  import string
2019  #print 'MESSAGE (caching.py): Evaluating function', funcname,
2020
2021  print_header_box('Evaluating function %s' %funcname)
2022 
2023  msg7(args,kwargs)
2024  msg8(reason) 
2025 
2026  print_footer()
2027 
2028  #
2029  # Old message
2030  #
2031  #args_present = 0
2032  #if args:
2033  #  if len(args) == 1:
2034  #    print 'with argument', mkargstr(args[0], textwidth2),
2035  #  else:
2036  #    print 'with arguments', mkargstr(args, textwidth2),
2037  #  args_present = 1     
2038  #   
2039  #if kwargs:
2040  #  if args_present:
2041  #    word = 'and'
2042  #  else:
2043  #    word = 'with'
2044  #     
2045  #  if len(kwargs) == 1:
2046  #    print word + ' keyword argument', mkargstr(kwargs, textwidth2)
2047  #  else:
2048  #    print word + ' keyword arguments', mkargstr(kwargs, textwidth2)
2049  #  args_present = 1           
2050  #else:
2051  #  print    # Newline when no keyword args present
2052  #       
2053  #if not args_present:   
2054  #  print '',  # Default if no args or kwargs present
2055   
2056   
2057
2058# -----------------------------------------------------------------------------
2059
2060def msg2(funcname,args,kwargs,comptime,reason):
2061  """Message 2
2062
2063  USAGE:
2064    msg2(funcname,args,kwargs,comptime,reason)
2065  """
2066
2067  import string
2068
2069  #try:
2070  #  R = Reason_msg[reason]
2071  #except:
2072  #  R = 'Unknown reason' 
2073 
2074  #print_header_box('Caching statistics (storing) - %s' %R)
2075  print_header_box('Caching statistics (storing)') 
2076 
2077  msg6(funcname,args,kwargs)
2078  msg8(reason)
2079
2080  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2081
2082# -----------------------------------------------------------------------------
2083
2084def msg3(savetime, CD, FN, deps,compression):
2085  """Message 3
2086
2087  USAGE:
2088    msg3(savetime, CD, FN, deps,compression)
2089  """
2090
2091  import string
2092  print string.ljust('| Loading time:', textwidth1) + str(round(savetime,2)) + \
2093                     ' seconds (estimated)'
2094  msg5(CD,FN,deps,compression)
2095
2096# -----------------------------------------------------------------------------
2097
2098def msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression):
2099  """Message 4
2100
2101  USAGE:
2102    msg4(funcname,args,kwargs,deps,comptime,loadtime,CD,FN,compression)
2103  """
2104
2105  import string
2106
2107  print_header_box('Caching statistics (retrieving)')
2108 
2109  msg6(funcname,args,kwargs)
2110  print string.ljust('| CPU time:', textwidth1) + str(round(comptime,2)) + ' seconds'
2111  print string.ljust('| Loading time:', textwidth1) + str(round(loadtime,2)) + ' seconds'
2112  print string.ljust('| Time saved:', textwidth1) + str(round(comptime-loadtime,2)) + \
2113        ' seconds'
2114  msg5(CD,FN,deps,compression)
2115
2116# -----------------------------------------------------------------------------
2117
2118def msg5(CD,FN,deps,compression):
2119  """Message 5
2120
2121  USAGE:
2122    msg5(CD,FN,deps,compression)
2123
2124  DESCRIPTION:
2125   Print dependency stats. Used by msg3 and msg4
2126  """
2127
2128  import os, time, string
2129
2130  print '|'
2131  print string.ljust('| Caching dir: ', textwidth1) + CD
2132
2133  if compression:
2134    suffix = '.z'
2135    bytetext = 'bytes, compressed'
2136  else:
2137    suffix = ''
2138    bytetext = 'bytes'
2139
2140  for file_type in file_types:
2141    file_name = FN + '_' + file_type + suffix
2142    print string.ljust('| ' + file_type + ' file: ', textwidth1) + file_name,
2143    stats = os.stat(CD+file_name)
2144    print '('+ str(stats[6]) + ' ' + bytetext + ')'
2145
2146  print '|'
2147  if len(deps) > 0:
2148    print '| Dependencies:  '
2149    dependencies  = deps.keys()
2150    dlist = []; maxd = 0
2151    tlist = []; maxt = 0
2152    slist = []; maxs = 0
2153    for d in dependencies:
2154      stats = deps[d]
2155      t = time.ctime(stats[1])
2156      s = str(stats[0])
2157      #if s[-1] == 'L':
2158      #  s = s[:-1]  # Strip rightmost 'long integer' L off.
2159      #              # FIXME: Unnecessary in versions later than 1.5.2
2160
2161      if len(d) > maxd: maxd = len(d)
2162      if len(t) > maxt: maxt = len(t)
2163      if len(s) > maxs: maxs = len(s)
2164      dlist.append(d)
2165      tlist.append(t)
2166      slist.append(s)
2167
2168    for n in range(len(dlist)):
2169      d = string.ljust(dlist[n]+':', maxd+1)
2170      t = string.ljust(tlist[n], maxt)
2171      s = string.rjust(slist[n], maxs)
2172
2173      print '| ', d, t, ' ', s, 'bytes'
2174  else:
2175    print '| No dependencies'
2176  print_footer()
2177
2178# -----------------------------------------------------------------------------
2179
2180def msg6(funcname,args,kwargs):
2181  """Message 6
2182
2183  USAGE:
2184    msg6(funcname,args,kwargs)
2185  """
2186
2187  import string
2188  print string.ljust('| Function:', textwidth1) + funcname
2189
2190  msg7(args,kwargs)
2191 
2192# -----------------------------------------------------------------------------   
2193
2194def msg7(args,kwargs):
2195  """Message 7
2196 
2197  USAGE:
2198    msg7(args,kwargs):
2199  """
2200 
2201  import string
2202 
2203  args_present = 0 
2204  if args:
2205    if len(args) == 1:
2206      print string.ljust('| Argument:', textwidth1) + mkargstr(args[0], \
2207                         textwidth2)
2208    else:
2209      print string.ljust('| Arguments:', textwidth1) + \
2210            mkargstr(args, textwidth2)
2211    args_present = 1
2212           
2213  if kwargs:
2214    if len(kwargs) == 1:
2215      print string.ljust('| Keyword Arg:', textwidth1) + mkargstr(kwargs, \
2216                         textwidth2)
2217    else:
2218      print string.ljust('| Keyword Args:', textwidth1) + \
2219            mkargstr(kwargs, textwidth2)
2220    args_present = 1
2221
2222  if not args_present:               
2223    print '| No arguments' # Default if no args or kwargs present
2224
2225# -----------------------------------------------------------------------------
2226
2227def msg8(reason):
2228  """Message 8
2229 
2230  USAGE:
2231    msg8(reason):
2232  """
2233 
2234  import string
2235   
2236  try:
2237    R = Reason_msg[reason]
2238  except:
2239    R = 'Unknown' 
2240 
2241  print string.ljust('| Reason:', textwidth1) + R
2242   
2243# -----------------------------------------------------------------------------
2244
2245def print_header_box(line):
2246  """Print line in a nice box.
2247 
2248  USAGE:
2249    print_header_box(line)
2250
2251  """
2252  global textwidth3
2253
2254  import time
2255
2256  time_stamp = time.ctime(time.time())
2257  line = time_stamp + '. ' + line
2258   
2259  N = len(line) + 1
2260  s = '+' + '-'*N + CR
2261
2262  print s + '| ' + line + CR + s,
2263
2264  textwidth3 = N
2265
2266# -----------------------------------------------------------------------------
2267   
2268def print_footer():
2269  """Print line same width as that of print_header_box.
2270  """
2271 
2272  N = textwidth3
2273  s = '+' + '-'*N + CR   
2274     
2275  print s     
2276     
2277# -----------------------------------------------------------------------------
2278
2279def mkargstr(args, textwidth, argstr = ''):
2280  """ Generate a string containing first textwidth characters of arguments.
2281
2282  USAGE:
2283    mkargstr(args, textwidth, argstr = '')
2284
2285  DESCRIPTION:
2286    Exactly the same as str(args) possibly followed by truncation,
2287    but faster if args is huge.
2288  """
2289
2290  import types
2291
2292  WasTruncated = 0
2293
2294  if not type(args) in [types.TupleType, types.ListType, types.DictType]:
2295    if type(args) == types.StringType:
2296      argstr = argstr + "'"+str(args)+"'"
2297    else:
2298      #Truncate large Numeric arrays before using str()
2299      import Numeric
2300      if type(args) == Numeric.ArrayType:
2301        if len(args.flat) > textwidth:
2302          args = 'Array: ' + str(args.shape)
2303
2304      argstr = argstr + str(args)
2305  else:
2306    if type(args) == types.DictType:
2307      argstr = argstr + "{"
2308      for key in args.keys():
2309        argstr = argstr + mkargstr(key, textwidth) + ": " + \
2310                 mkargstr(args[key], textwidth) + ", "
2311        if len(argstr) > textwidth:
2312          WasTruncated = 1
2313          break
2314      argstr = argstr[:-2]  # Strip off trailing comma     
2315      argstr = argstr + "}"
2316
2317    else:
2318      if type(args) == types.TupleType:
2319        lc = '('
2320        rc = ')'
2321      else:
2322        lc = '['
2323        rc = ']'
2324      argstr = argstr + lc
2325      for arg in args:
2326        argstr = argstr + mkargstr(arg, textwidth) + ', '
2327        if len(argstr) > textwidth:
2328          WasTruncated = 1
2329          break
2330
2331      # Strip off trailing comma and space unless singleton tuple
2332      #
2333      if type(args) == types.TupleType and len(args) == 1:
2334        argstr = argstr[:-1]   
2335      else:
2336        argstr = argstr[:-2]
2337      argstr = argstr + rc
2338
2339  if len(argstr) > textwidth:
2340    WasTruncated = 1
2341
2342  if WasTruncated:
2343    argstr = argstr[:textwidth]+'...'
2344  return(argstr)
2345
2346# -----------------------------------------------------------------------------
2347
2348def test_OK(msg):
2349  """Print OK msg if test is OK.
2350 
2351  USAGE
2352    test_OK(message)
2353  """
2354
2355  import string
2356   
2357  print string.ljust(msg, textwidth4) + ' - OK' 
2358 
2359  #raise StandardError
2360 
2361# -----------------------------------------------------------------------------
2362
2363def test_error(msg):
2364  """Print error if test fails.
2365 
2366  USAGE
2367    test_error(message)
2368  """
2369 
2370  print 'ERROR (caching.test): %s' %msg
2371  print 'Please send this code example and output to '
2372  print 'Ole.Nielsen@anu.edu.au'
2373  print
2374  print
2375 
2376  #import sys
2377  #sys.exit()
2378  raise StandardError
Note: See TracBrowser for help on using the repository browser.