"""
Functions used to calculate the root mean square deviation.

Duncan Gray, GA - 2007 

"""


#----------------------------------------------------------------------------
# Import necessary modules
#----------------------------------------------------------------------------

# Standard modules
import os
from csv import writer
from time import localtime, strftime
from os.path import join

# Related major packages
from Numeric import zeros, Float, where, greater, less, compress, sqrt, sum
from anuga.shallow_water.data_manager import csv2dict
from anuga.utilities.numerical_tools import ensure_numeric, err, norm
from anuga.utilities.interp import interp

# Scenario specific imports
import project

def get_max_min_condition_array(min, max, vector):
    """
    Given a vector of values, and minimum and maximum values, return a
    vector of 0/1's that can be used to cut arrays so only the times
    in the min max range are used.
    
    precondition: The vector values are ascending.
    
    """
    
    SMALL_MIN = -1e10  # Not that small, but small enough
    vector = ensure_numeric(vector)
    assert min > SMALL_MIN
    no_maxs = where(less(vector,max), vector, SMALL_MIN)
    band_condition = greater(no_maxs, min)
    return band_condition

    
def auto_rrms(outputdir_tag, scenarios, quantity='stage',
              y_location_tag=':0.0'):
    """
    Given a list of scenarios that have CSV guage files, calc the
    err, Number_of_samples and rmsd for all gauges in each scenario.
    Write this info to a file for each scenario.
    """
    for run_data in scenarios:           
        location_sims = []
        location_exps = []
        for gauge_x in run_data['gauge_x']:
            gauge_x = str(gauge_x)
            location_sims.append(gauge_x + y_location_tag)
            location_exps.append(gauge_x)
        
        id = run_data['scenario_id']
        outputdir_name = id + outputdir_tag
        file_sim = join(project.output_dir,outputdir_name + '_' +  \
                        quantity + ".csv")
        file_exp = id + '_exp_' + quantity + '.csv'
        file_err = join(project.output_dir,outputdir_name + "_" + \
                        quantity + "_err.csv")
        

        simulation, _ = csv2dict(file_sim)
        experiment, _ = csv2dict(file_exp)
        
        time_sim = [float(x) for x in simulation['time']]
        time_exp = [float(x) for x in experiment['Time']]
        time_sim = ensure_numeric(time_sim)
        time_exp = ensure_numeric(time_exp)
        condition = get_max_min_condition_array(run_data['wave_times'][0],
                                                run_data['wave_times'][1],
                                                time_exp)
        time_exp_cut = compress(condition, time_exp)
        
        print "Writing to ", file_err
        
        err_list = []
        points = []
        rmsd_list = []
        for location_sim, location_exp in map(None, location_sims,
                                              location_exps):
            quantity_sim = [float(x) for x in simulation[location_sim]]
            quantity_exp = [float(x) for x in experiment[location_exp]]

            quantity_exp_cut = compress(condition, quantity_exp)

            # Now let's do interpolation
            quantity_sim_interp = interp(quantity_sim, time_sim, time_exp_cut)

            assert len(quantity_sim_interp) == len(quantity_exp_cut)
            norm = err(quantity_sim_interp,
	    	       quantity_exp_cut,
                       2, relative = False)  # 2nd norm (rel. RMS)
            err_list.append(norm)
            points.append(len(quantity_sim_interp))
            rmsd_list.append(norm/sqrt(len(quantity_sim_interp))) 
        assert len(location_exps) == len(err_list)

        # Writing the file out for one scenario
        a_writer = writer(file(file_err, "wb"))
        a_writer.writerow(["x location", "err", "Number_of_samples", "rmsd"])
        a_writer.writerows(map(None,
                               location_exps,
                               err_list,
                               points,
                               rmsd_list))


def load_sensors(quantity_file):
    """
    Load a csv file, where the first row is the column header and
    the first colum explains the rows.

    returns the data as two vectors and an array.
    
    """
    
    # Read the depth file
    dfid = open(quantity_file)
    lines = dfid.readlines()
    dfid.close()

    title = lines.pop(0)
    n_time = len(lines)
    n_sensors = len(lines[0].split(','))-1  # -1 to remove time
    times = zeros(n_time, Float)  #Time
    depths = zeros(n_time, Float)  #
    sensors = zeros((n_time,n_sensors), Float)
    quantity_locations = title.split(',')
    quantity_locations.pop(0) # remove 'time'

    # Doing j.split(':')[0] drops the y location
    locations = [float(j.split(':')[0]) for j in quantity_locations]
    
    for i, line in enumerate(lines):
        fields = line.split(',')
        fields = [float(j) for j in fields]
        times[i] = fields[0]
        sensors[i] = fields[1:] # 1: to remove time

    return times, locations, sensors                  

    
def err_files(scenarios, outputdir_tag, quantity='stage'):
    """
    Create a list of err files, for a list of scenarios.
    """
    file_errs = []
    for scenario in scenarios:
        id = scenario['scenario_id']
        outputdir_name = id + outputdir_tag
        file_err =  join(project.output_dir,outputdir_name + "_" + \
                         quantity + "_err.csv")
        file_errs.append(file_err)
    return file_errs
    

def compare_different_settings(outputdir_tag, scenarios, quantity='stage'):
    """
    Calculate the RMSD for all the tests in a scenario
    """
    files = err_files(scenarios, outputdir_tag, quantity=quantity)
    err = 0.0
    number_of_samples = 0
    for run_data, file in map(None, scenarios, files):
        
        simulation, _ = csv2dict(file)
        err_list = [float(x) for x in simulation['err']]
        number_of_samples_list = [float(x) for x in \
                                  simulation['Number_of_samples']]
        
        if number_of_samples is not 0:
            err_list.append(err)
            number_of_samples_list.append(number_of_samples)
        err, number_of_samples = err_addition(err_list, number_of_samples_list)
    rmsd = err/sqrt(number_of_samples)
    print outputdir_tag + "   " + str(rmsd)
    
    
def err_addition(err_list, number_of_samples_list):
    """
    This function 'sums' a list of errs and sums a list of samples
    
    err is the err value (sqrt(sum_over_x&y((xi - yi)^2))) for a set of values.
    number_of_samples is the number of values associated with the err.
    
    If this function gets used alot, maybe pull this out and make it an object
    """
    err = norm(ensure_numeric(err_list))
    number_of_samples = sum(ensure_numeric(number_of_samples_list))

    return err, number_of_samples

                 
#-------------------------------------------------------------
if __name__ == "__main__":
    
    from scenarios import scenarios

    # Change the outputdir_tag to change which set of data
    # the RMSD is calculated for.
    outputdir_tag = "_nolmts_wdth_0.1_z_0.0_ys_0.01_mta_0.01_A"
    auto_rrms(outputdir_tag, scenarios, "stage", y_location_tag=':0.0')
    compare_different_settings(outputdir_tag, scenarios, "stage")