""" Functions used to calculate the root mean square deviation. Duncan Gray, GA - 2007 """ #---------------------------------------------------------------------------- # Import necessary modules #---------------------------------------------------------------------------- # Standard modules import os from csv import writer from time import localtime, strftime from os.path import join # Related major packages from Numeric import zeros, Float, where, greater, less, compress, sqrt, sum from anuga.shallow_water.data_manager import csv2dict from anuga.utilities.numerical_tools import ensure_numeric, err, norm from anuga.utilities.interp import interp # Scenario specific imports import project def get_max_min_condition_array(min, max, vector): """ Given a vector of values, and minimum and maximum values, return a vector of 0/1's that can be used to cut arrays so only the times in the min max range are used. precondition: The vector values are ascending. """ SMALL_MIN = -1e10 # Not that small, but small enough vector = ensure_numeric(vector) assert min > SMALL_MIN no_maxs = where(less(vector,max), vector, SMALL_MIN) band_condition = greater(no_maxs, min) return band_condition def auto_rrms(outputdir_tag, scenarios, quantity='stage', y_location_tag=':0.0'): """ Given a list of scenarios that have CSV guage files, calc the err, Number_of_samples and rmsd for all gauges in each scenario. Write this info to a file for each scenario. """ for run_data in scenarios: location_sims = [] location_exps = [] for gauge_x in run_data['gauge_x']: gauge_x = str(gauge_x) location_sims.append(gauge_x + y_location_tag) location_exps.append(gauge_x) id = run_data['scenario_id'] outputdir_name = id + outputdir_tag file_sim = join(project.output_dir,outputdir_name + '_' + \ quantity + ".csv") file_exp = id + '_exp_' + quantity + '.csv' file_err = join(project.output_dir,outputdir_name + "_" + \ quantity + "_err.csv") simulation, _ = csv2dict(file_sim) experiment, _ = csv2dict(file_exp) time_sim = [float(x) for x in simulation['time']] time_exp = [float(x) for x in experiment['Time']] time_sim = ensure_numeric(time_sim) time_exp = ensure_numeric(time_exp) condition = get_max_min_condition_array(run_data['wave_times'][0], run_data['wave_times'][1], time_exp) time_exp_cut = compress(condition, time_exp) print "Writing to ", file_err err_list = [] points = [] rmsd_list = [] for location_sim, location_exp in map(None, location_sims, location_exps): quantity_sim = [float(x) for x in simulation[location_sim]] quantity_exp = [float(x) for x in experiment[location_exp]] quantity_exp_cut = compress(condition, quantity_exp) # Now let's do interpolation quantity_sim_interp = interp(quantity_sim, time_sim, time_exp_cut) assert len(quantity_sim_interp) == len(quantity_exp_cut) norm = err(quantity_sim_interp, quantity_exp_cut, 2, relative = False) # 2nd norm (rel. RMS) err_list.append(norm) points.append(len(quantity_sim_interp)) rmsd_list.append(norm/sqrt(len(quantity_sim_interp))) assert len(location_exps) == len(err_list) # Writing the file out for one scenario a_writer = writer(file(file_err, "wb")) a_writer.writerow(["x location", "err", "Number_of_samples", "rmsd"]) a_writer.writerows(map(None, location_exps, err_list, points, rmsd_list)) def load_sensors(quantity_file): """ Load a csv file, where the first row is the column header and the first colum explains the rows. returns the data as two vectors and an array. """ # Read the depth file dfid = open(quantity_file) lines = dfid.readlines() dfid.close() title = lines.pop(0) n_time = len(lines) n_sensors = len(lines[0].split(','))-1 # -1 to remove time times = zeros(n_time, Float) #Time depths = zeros(n_time, Float) # sensors = zeros((n_time,n_sensors), Float) quantity_locations = title.split(',') quantity_locations.pop(0) # remove 'time' # Doing j.split(':')[0] drops the y location locations = [float(j.split(':')[0]) for j in quantity_locations] for i, line in enumerate(lines): fields = line.split(',') fields = [float(j) for j in fields] times[i] = fields[0] sensors[i] = fields[1:] # 1: to remove time return times, locations, sensors def err_files(scenarios, outputdir_tag, quantity='stage'): """ Create a list of err files, for a list of scenarios. """ file_errs = [] for scenario in scenarios: id = scenario['scenario_id'] outputdir_name = id + outputdir_tag file_err = join(project.output_dir,outputdir_name + "_" + \ quantity + "_err.csv") file_errs.append(file_err) return file_errs def compare_different_settings(outputdir_tag, scenarios, quantity='stage'): """ Calculate the RMSD for all the tests in a scenario """ files = err_files(scenarios, outputdir_tag, quantity=quantity) err = 0.0 number_of_samples = 0 for run_data, file in map(None, scenarios, files): simulation, _ = csv2dict(file) err_list = [float(x) for x in simulation['err']] number_of_samples_list = [float(x) for x in \ simulation['Number_of_samples']] if number_of_samples is not 0: err_list.append(err) number_of_samples_list.append(number_of_samples) err, number_of_samples = err_addition(err_list, number_of_samples_list) rmsd = err/sqrt(number_of_samples) print outputdir_tag + " " + str(rmsd) def err_addition(err_list, number_of_samples_list): """ This function 'sums' a list of errs and sums a list of samples err is the err value (sqrt(sum_over_x&y((xi - yi)^2))) for a set of values. number_of_samples is the number of values associated with the err. If this function gets used alot, maybe pull this out and make it an object """ err = norm(ensure_numeric(err_list)) number_of_samples = sum(ensure_numeric(number_of_samples_list)) return err, number_of_samples #------------------------------------------------------------- if __name__ == "__main__": from scenarios import scenarios # Change the outputdir_tag to change which set of data # the RMSD is calculated for. outputdir_tag = "_nolmts_wdth_0.1_z_0.0_ys_0.01_mta_0.01_A" auto_rrms(outputdir_tag, scenarios, "stage", y_location_tag=':0.0') compare_different_settings(outputdir_tag, scenarios, "stage")