diff --git a/src/libs/common/utilities.cpp b/src/libs/common/utilities.cpp index 405a1ebf..1a6efd79 100644 --- a/src/libs/common/utilities.cpp +++ b/src/libs/common/utilities.cpp @@ -851,6 +851,235 @@ bool parse_string_arg_to_bool(string arg) } } +vector read_dense_binary_col_names(ifstream& in,int n_col) +{ + //first read the names of the columns + stringstream ss; + vector col_name_sizes; + int name_size = 0; + for (int i = 0; i < n_col; i++) + { + in.read((char*)&(name_size), sizeof(name_size)); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format error reading size column name size for column number " << i; + throw runtime_error(ss.str()); + } + + col_name_sizes.push_back(name_size); + } + int i = 0; + string name; + vector col_names; + for (auto col_name_size : col_name_sizes) + { + char* col_name = new char[col_name_size]; + in.read(col_name, col_name_size); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format error reading column name for column number " << i << ", size " << col_name_size; + throw runtime_error(ss.str()); + } + name = string(col_name, col_name_size); + pest_utils::strip_ip(name); + pest_utils::upper_ip(name); + col_names.push_back(name); + i++; + delete[] col_name; + } + return col_names; + +} + +bool read_dense_binary_records(ifstream& in,int n_records, int name_size,const vector& col_names,vector& row_names, vector>& rec_vec) +{ + stringstream ss; + int i; + streampos current_pos = in.tellg(); + in.seekg(0,std::ios::end); + streampos end = in.tellg(); + in.seekg(current_pos,std::ios::beg); + bool success = true; + string name; + double data; + map rec; + rec_vec.clear(); + int n_col = col_names.size(); + while (true) + { + //finished + //if ((in.bad()) || (in.eof())) + if ((i > 0) && (!in.good())) + { + break; + } + if (in.tellg() == end) { + break; + } + in.read((char*)&(name_size), sizeof(name_size)); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error reading row name size for row number " << i << "...continuing"; + cout << ss.str(); + success = false; + break; + } + char* row_name = new char[name_size]; + in.read(row_name, name_size); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error reading row name for row number " << i << "...continuing"; + cout << ss.str(); + success = false; + break; + } + name = string(row_name, name_size); + delete[] row_name; + pest_utils::strip_ip(name); + pest_utils::upper_ip(name); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; + cout << ss.str(); + success = false; + break; + } + + rec.clear(); + for (int j = 0; j < n_col; j++) + { + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error reading row,col value " << i << "," << j << "...continuing "; + cout << ss.str(); + success = false; + break; + } + in.read((char*)&(data), sizeof(data)); + rec[col_names[j]] = data; + + } + if (in.eof()) + break; + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; + cout << ss.str(); + success = false; + break; + } + row_names.push_back(name); + i++; + } + + return success; +} + +vector read_dense_binary_remaining_row_names(ifstream& in,const vector& col_names) +{ + stringstream ss; + int name_size = 0; + string name; + int i = 0; + double data = -1.; + vector row_names; + // record current position in file + streampos current_pos = in.tellg(); + in.seekg(0,std::ios::end); + streampos end = in.tellg(); + in.seekg(current_pos,std::ios::beg); + + while (true) + { + //finished + //if ((in.bad()) || (in.eof())) + if ((i > 0) && (!in.good())) + { + break; + } + if (in.tellg() == end) { + break; + } + in.read((char*)&(name_size), sizeof(name_size)); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error reading row name size for row number " << i << "...continuing"; + cout << ss.str(); + break; + } + char* row_name = new char[name_size]; + in.read(row_name, name_size); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error reading row name for row number " << i << "...continuing"; + cout << ss.str(); + break; + } + name = string(row_name, name_size); + delete[] row_name; + pest_utils::strip_ip(name); + pest_utils::upper_ip(name); + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; + cout << ss.str(); + break; + } + + //skip the values + //in.seekg(col_names.size() * sizeof(double), ios_base::cur); + char* rest_of_line = new char[sizeof(double) * col_names.size()]; + in.read(rest_of_line, sizeof(double)* col_names.size()); + delete[] rest_of_line; + if (in.eof()) + break; + if (!in.good()) + { + ss.str(""); + ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; + cout << ss.str(); + break; + } + row_names.push_back(name); + i++; + } + in.seekg(current_pos,std::ios::beg); + return row_names; +} + +void read_binary_matrix_header(ifstream& in, int& tmp1, int& tmp2, int& tmp3) +{ + stringstream ss; + + if (!in.good()) + { + ss.str(""); + ss << "read_binary_matrix_header - stream is not good"; + throw runtime_error(ss.str()); + } + in.read((char*)&tmp1, sizeof(tmp1)); + in.read((char*)&tmp2, sizeof(tmp2)); + in.read((char*)&tmp3, sizeof(tmp3)); + if (!in.good()) + { + ss.str(""); + ss << "read_binary_matrix_header - stream is not good"; + throw runtime_error(ss.str()); + } + + in.close(); + +} void read_dense_binary(const string& filename, vector& row_names, vector& col_names, Eigen::MatrixXd& matrix) @@ -874,125 +1103,23 @@ void read_dense_binary(const string& filename, vector& row_names, vector int n_obs_and_pi; int i, j; double data; - //char* col_name; - //char* row_name; - // read header - in.read((char*)&n_par, sizeof(n_par)); - in.read((char*)&n_obs_and_pi, sizeof(n_obs_and_pi)); - in.read((char*)&n_nonzero, sizeof(n_nonzero)); + read_binary_matrix_header(in,n_par,n_obs_and_pi,n_nonzero); if ((n_par == 0) && (n_obs_and_pi < 0) && (n_nonzero < 0) && (n_obs_and_pi == n_nonzero)) { n_obs_and_pi *= -1; cout << "reading 'dense' format matrix with " << n_obs_and_pi << " columns" << endl; - //first read the names of the columns - vector col_name_sizes; - int name_size = 0; - for (int i = 0; i < n_obs_and_pi; i++) - { - in.read((char*)&(name_size), sizeof(name_size)); - if (!in.good()) - { - ss.str(""); - ss << "read_dense_binary(), dense format error reading size column name size for column number " << i; - throw runtime_error(ss.str()); - } - - col_name_sizes.push_back(name_size); - } - int i = 0; - string name; - for (auto col_name_size : col_name_sizes) - { - char* col_name = new char[col_name_size]; - in.read(col_name, col_name_size); - if (!in.good()) - { - ss.str(""); - ss << "read_dense_binary(), dense format error reading column name for column number " << i << ", size " << col_name_size; - throw runtime_error(ss.str()); - } - name = string(col_name, col_name_size); - pest_utils::strip_ip(name); - pest_utils::upper_ip(name); - col_names.push_back(name); - i++; - delete[] col_name; - } - i = 0; - double data = -1.; - // record current position in file - streampos begin_rows = in.tellg(); - in.seekg(0,std::ios::end); - streampos end = in.tellg(); - in.seekg(begin_rows,std::ios::beg); - //read the row names so we can dimension the matrix - while (true) - { - //finished - //if ((in.bad()) || (in.eof())) - if ((i > 0) && (!in.good())) - { - break; - } - if (in.tellg() == end) { - break; - } - in.read((char*)&(name_size), sizeof(name_size)); - if (!in.good()) - { - ss.str(""); - ss << "read_dense_binary(), dense format incomplete record: error reading row name size for row number " << i << "...continuing"; - cout << ss.str(); - break; - } - char* row_name = new char[name_size]; - in.read(row_name, name_size); - if (!in.good()) - { - ss.str(""); - ss << "read_dense_binary(), dense format incomplete record: error reading row name for row number " << i << "...continuing"; - cout << ss.str(); - break; - } - name = string(row_name, name_size); - delete[] row_name; - pest_utils::strip_ip(name); - pest_utils::upper_ip(name); - if (!in.good()) - { - ss.str(""); - ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; - cout << ss.str(); - break; - } + //first read the names of the columns and the rows + col_names = read_dense_binary_col_names(in,n_obs_and_pi); + row_names = read_dense_binary_remaining_row_names(in,col_names); - //skip the values - //in.seekg(col_names.size() * sizeof(double), ios_base::cur); - char* rest_of_line = new char[sizeof(double) * col_names.size()]; - in.read(rest_of_line, sizeof(double)* col_names.size()); - delete[] rest_of_line; - if (in.eof()) - break; - if (!in.good()) - { - ss.str(""); - ss << "read_dense_binary(), dense format incomplete record: error skipping values for row " << i << "...continuing "; - cout << ss.str(); - break; - } - row_names.push_back(name); - i++; - } in.close(); in.open(filename.c_str(), ifstream::binary); //resize the matrix now that we know big it should be matrix.resize(row_names.size(), col_names.size()); - //seek back to the first row - in.seekg(begin_rows, ios_base::beg); for (int i=0;i& row_names, vector } } } + else + { + throw runtime_error("binary matrix header values do not indicate a dense matrix format"); + } } void read_binary_matrix_header(const string& filename, int& tmp1, int& tmp2, int& tmp3) diff --git a/src/programs/sweep/sweep.cpp b/src/programs/sweep/sweep.cpp index 1b47f673..a2fd5acc 100644 --- a/src/programs/sweep/sweep.cpp +++ b/src/programs/sweep/sweep.cpp @@ -116,6 +116,73 @@ map prepare_parameter_csv(Parameters pars, ifstream &csv, bool forgi return header_info; } +map prepare_parameter_densebin(Parameters pars, ifstream &csv, bool forgive) +{ + if (!csv.good()) + { + throw runtime_error("ifstream not good"); + } + + //process the header + //any missing header labels will be marked to ignore those columns later + + string line; + vector header_tokens; + if (!getline(csv, line)) + throw runtime_error("error reading header (first) line from csv file :"); + strip_ip(line); + upper_ip(line); + tokenize(line, header_tokens, ",", false); + + for (auto &t : header_tokens) + { + strip_ip(t); + } + //cout << tokens << endl; + //vector header_tokens = tokens; + + // check for parameter names that in the pest control file but that are missing from the csv file + vector missing_names; + string name; + set stokens(header_tokens.begin(),header_tokens.end()); + for (auto &p : pars) + if (stokens.find(p.first) == stokens.end()) + missing_names.push_back(p.first); + + if (missing_names.size() > 0) + { + stringstream ss; + ss << " the following pest control file parameter names were not found in the parameter csv file:" << endl; + for (auto &n : missing_names) ss << n << endl; + if (!forgive) + throw runtime_error(ss.str()); + else + cout << ss.str() << endl << "continuing anyway..." << endl; + } + + if (header_tokens[header_tokens.size() - 1].size() == 0) + header_tokens.pop_back(); + + + //build up a list of idxs to use + vector ctl_pnames = pars.get_keys(); + unordered_set s_pnames(ctl_pnames.begin(), ctl_pnames.end()); + unordered_set::iterator end = s_pnames.end(); + ctl_pnames.resize(0); + vector header_idxs; + map header_info; + for (int i = 0; i < header_tokens.size(); i++) + { + //if (find(ctl_pnames.begin(), ctl_pnames.end(), header_tokens[i]) != ctl_pnames.end()) + if (s_pnames.find(header_tokens[i]) != end) + { + //header_idxs.push_back(i); + header_info[header_tokens[i]] = i; + } + } + return header_info; +} + //pair,vector> load_parameters_from_csv(map &header_info, ifstream &csv, int chunk, const Parameters &ctl_pars, vector &run_ids, vector &sweep_pars) void load_parameters_from_csv(map& header_info, ifstream& csv, int chunk, const Parameters& ctl_pars, vector& run_ids, vector& sweep_pars) @@ -469,7 +536,7 @@ int main(int argc, char* argv[]) ifstream par_stream(par_csv_file); if (!par_stream.good()) { - throw runtime_error("could not open parameter csv file " + par_csv_file); + throw runtime_error("could not open parameter sweep file " + par_csv_file); } RunManagerAbstract *run_manager_ptr; @@ -557,11 +624,23 @@ int main(int argc, char* argv[]) jco_mat = jco.get_matrix(jco.get_sim_obs_names(), pest_scenario.get_ctl_ordered_par_names()).toDense(); } - else + else if (par_ext.compare("csv") == 0) { + cout << " --- csv file detected for par_csv" << endl; header_info = prepare_parameter_csv(pest_scenario.get_ctl_parameters(), par_stream, pest_scenario.get_pestpp_options().get_sweep_forgive()); } + else if (par_ext.compare("bin")==0) + { + cout << " --- dense binary file detected for par_csv" << endl; + header_info = prepare_parameter_densebin(pest_scenario.get_ctl_parameters(),par_stream, pest_scenario.get_pestpp_options().get_sweep_forgive()); + + + } + else + { + throw runtime_error("unrecognized parameter sweep input file extension: '"+par_ext+"'"); + } // prepare the output file ofstream obs_stream;