Changeset 4501


Ignore:
Timestamp:
May 28, 2007, 1:14:38 PM (18 years ago)
Author:
nick
Message:

fixed split function to run quicker and use less memory

Location:
anuga_core/source/anuga/geospatial_data
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • anuga_core/source/anuga/geospatial_data/geospatial_data.py

    r4484 r4501  
    1010from Numeric import concatenate, array, Float, shape, reshape, ravel, take, \
    1111                        size, shape
    12 from random import randint
     12#from Array import tolist
     13from RandomArray import randint
    1314from copy import deepcopy
    1415
     
    675676   
    676677    def split(self, factor=0.5, verbose=False):
    677         """Returns two geospatial_data object, first is size of the 'factor'
    678         smaller the original and the second is the remainer. The two new
     678        """Returns two geospatial_data object, first is the size of the 'factor'
     679        smaller the original and the second is the remainder. The two new
    679680        object are disjoin set of each other.
    680681       
     
    682683        AND if factor is a decimal it will round (2.25 to 2 and 2.5 to 3)
    683684       
    684        
     685        This method create two lists of indices which are passed into get_sample.
     686        The lists are created using random numbers, and they are unique sets eg.
     687        total_list(1,2,3,4,5,6,7,8,9)  random_list(1,3,6,7,9) and remainder_list(0,2,4,5,8)
     688               
    685689        Input - the factor which to split the object, if 0.1 then 10% of the
    686690            object will be returned
     
    695699        remainder_list = []
    696700        new_size = round(factor*self_size)
    697    #     print'Split original %s by %s' %(self_size, factor)
    698    #     print'New samples are %s and %s in size' %(int(round(factor*self_size)),int(self_size-new_size))
    699701       
    700702        #find unique random numbers
    701703        if verbose: print "make unique random number list and get indices"
    702         while i < new_size:
    703             random_num = randint(0,self_size-1)
    704             if random_num not in random_list:
    705                 random_list.append(random_num)
    706                 i=i+1
    707 
    708         #Make list of opposite to random_list
    709         if verbose: print "make list of opposite to random list"
    710         for i in range(0,self_size,1):
    711             remainder_list.append(i)
    712 
    713         #remove random list from remainder_list to get correct remainder_list
     704
     705        total=array(range(self_size))
     706        total_list = total.tolist()
     707        if verbose: print "total list len",len(total_list)
     708               
     709        #there will be repeated random numbers however will not be a
     710        #problem as they are being 'pop'ed out of array so if there
     711        #are two numbers the same they will pop different indicies,
     712        #still basically random
     713        ## create list of non-unquie random numbers
     714        if verbose: print "create random numbers list %s long" %new_size
     715        random_num = randint(0,self_size-1,(int(new_size),))
     716        random_num = random_num.tolist()
     717
    714718        #need to sort and reverse so the pop() works correctly
    715         random_list.sort()
    716         random_list.reverse()
    717         if verbose: print "get indices of opposite to random list"
    718         for i in random_list:
    719             remainder_list.pop(i)
    720             if verbose:
    721                 if ((i/100)==(float(i)/100)): print "reached: ",i
    722            
     719        random_num.sort()
     720        random_num.reverse()       
     721       
     722        if verbose: print "make random number list and get indices"
     723        j=0
     724        k=1
     725        remainder_list = total_list[:]
     726        #pops array index (random_num) from remainder_list (which starts as the
     727        #total_list and appends to random_list 
     728        random_num_len = len(random_num)
     729        for i in random_num:
     730            random_list.append(remainder_list.pop(i))
     731            j+=1
     732            #prints progress
     733            if verbose and round(random_num_len/10*k)==j:
     734                print '(%s/%s)' %(j, random_num_len)
     735                k+=1
     736       
     737        #FIXME: move to tests, it might take a long time
     738        #then create an array of random lenght between 500 and 1000,
     739        #and use a random factor between 0 and 1
     740        #setup for assertion
     741        test_total = random_list[:]
     742        test_total.extend(remainder_list)
     743        test_total.sort()
     744        msg = 'The two random lists made from the original list when added together '\
     745         'DO NOT equal the original list'
     746        assert (total_list==test_total),msg
     747
    723748        #get new samples
    724749        if verbose: print "get values of indices for random list"
  • anuga_core/source/anuga/geospatial_data/test_geospatial_data.py

    r4484 r4501  
    44import unittest
    55import os
    6 from Numeric import zeros, array, allclose, concatenate
     6from Numeric import zeros, array, allclose, concatenate,sort
    77from math import sqrt, pi
    88import tempfile
     
    22322232                      14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]}
    22332233        G = Geospatial_data(points, attributes)
    2234 #        print G.get_data_points()
    2235 #        print G.get_attributes()
    22362234
    22372235        factor = 0.21
     
    22402238        G1, G2  = G.split(factor)
    22412239       
    2242 #        print 'len(G): %s len(G1): %s len(G2): %s' %(len(G), len(G1), len(G2))
    2243 #        print 'G: ', len(G),'G1: ', len(G1), 'G2: ', len(G2)
    2244 
    22452240        assert allclose(len(G), len(G1)+len(G2))
    22462241        assert allclose(round(len(G)*factor), len(G1))
    22472242       
    2248 #        assert allclose(G == G1 + G2) must implentent __equal__
     2243#        assert allclose(G, G1+G2)# must implentent __equal__ or a sort
    22492244       
    22502245         
Note: See TracChangeset for help on using the changeset viewer.