diff --git a/example/accuracy.cpp b/example/accuracy.cpp index a0bbe31..45c7265 100644 --- a/example/accuracy.cpp +++ b/example/accuracy.cpp @@ -26,7 +26,7 @@ void Match::addOverlap( long d , long len ) { } } -int main (int argc, char* argv[]) +int main (int argc, char* argv[]) { // cin should be <0/1> @@ -36,23 +36,23 @@ int main (int argc, char* argv[]) Match cur_match; list< Match > segments[2]; list< Match >::iterator segments_it[2]; - + stringstream ss; prev_pair = ""; - + long ss_total[2]; bool last_pair = false; - + while( true ) { if ( last_pair || getline( cin , line ) ) { - + if ( ! last_pair ) { ss.clear(); ss.str( line ); cur_match.line = line; ss >> pre >> pair >> cur_match.pos[0] >> cur_match.pos[1]; } - - if ( last_pair || pair != prev_pair ) { + + if ( last_pair || pair != prev_pair ) { if ( prev_pair != "" ) { // compute overlap for ( segments_it[0] = segments[0].begin() ; segments_it[0] != segments[0].end(); segments_it[0]++ ) @@ -95,9 +95,9 @@ int main (int argc, char* argv[]) segments[0].clear(); segments[1].clear(); } - + if ( last_pair ) break; - + segments[ pre ].push_back( cur_match ); ss_total[ pre ] += cur_match.pos[1] - cur_match.pos[0]; prev_pair = pair; @@ -106,4 +106,4 @@ int main (int argc, char* argv[]) } } return 1; -} +} diff --git a/g2.cpp b/g2.cpp index b1151aa..18a5edb 100644 --- a/g2.cpp +++ b/g2.cpp @@ -41,7 +41,7 @@ struct Marker { string id; long pos; double cm; - + string print() { stringstream ss; ss << id << '\t' << pos << '\t' << cm << endl; @@ -67,10 +67,10 @@ class Individual { string getWordString( int w ) { return hap[ w % CONST_READ_AHEAD ].to_string(); } - string print() { + string print() { stringstream ss; ss << id[0] << '\t' << id[1] << '\t' << endl; - return ss.str(); + return ss.str(); } string getID() { return id[1]; } int getNum() { return idnum; } @@ -116,13 +116,13 @@ struct Match { FOUT.write( (char*) &pid[0] , sizeof( unsigned int ) ); FOUT.write( (char*) &pid[1] , sizeof( unsigned int ) ); FOUT.write( (char*) &sid[0] , sizeof( unsigned int ) ); - FOUT.write( (char*) &sid[1] , sizeof( unsigned int ) ); + FOUT.write( (char*) &sid[1] , sizeof( unsigned int ) ); } else { - FOUT << all_ind[p.first].getID() << "\t" - << all_ind[p.second].getID() << "\t" - << getPos( interval[0] * WORD_SIZE ) << "\t" - << getPos( interval[1] * WORD_SIZE + WORD_SIZE - 1 ) << "\t" - << mlen << "\t" + FOUT << all_ind[p.first].getID() << "\t" + << all_ind[p.second].getID() << "\t" + << getPos( interval[0] * WORD_SIZE ) << "\t" + << getPos( interval[1] * WORD_SIZE + WORD_SIZE - 1 ) << "\t" + << mlen << "\t" << interval[1] - interval[0] + 1 << "\t" << gaps - PAR_GAP; FOUT << endl; @@ -161,17 +161,17 @@ class ExtendHash { p.first = 2 * ((loc - p.second/2) / num); } else { p.second = loc % num; - p.first = (loc - p.second) / num; + p.first = (loc - p.second) / num; } return p; } // Compute location from pair of individuals unsigned int pairToLocation( unsigned int i , unsigned int j ) { - + if ( !PAR_HAPLOID ) { // round everyone down to the nearest haplotype i = (i - (i % 2)) / 2; - j = (j - (j % 2)) / 2; + j = (j - (j % 2)) / 2; } unsigned int loc = (i > j) ? j * num + i : i * num + j; return loc; @@ -185,7 +185,7 @@ class ExtendHash { extend_ret = extend_hash.insert( pair< unsigned int , Match >( pairToLocation(i,j) , m) ); (extend_ret.first->second).extend(w); } - + // Remove all pairs that were not extended beyond w // int w : word # to remove prior to void clearPairsPriorTo(int w) { @@ -205,7 +205,7 @@ class ExtendHash { void extendAllPairsTo(int w) { for ( auto it = extend_hash.begin() ; it != extend_hash.end() ; it++ ) it->second.interval[1] = w; } - + // Remove all pairs // int w : word # to remove prior to void clearAllPairs() { @@ -235,10 +235,10 @@ class SeedHash { void clear() { seed_hash.clear(); } - int size() { + int size() { return seed_hash.size(); } - + // Generate a new hash for this vector of individuals unsigned long subHash( ExtendHash * e , vector vec , int w ) { SeedHash cur_sh; @@ -254,7 +254,7 @@ class SeedHash { unsigned long extendAllPairs( ExtendHash * e , int w ) { unsigned long tot_pairs = 0; for ( auto it = seed_hash.begin() ; it != seed_hash.end() ; ++it ) { - + // *** As long as the # of pairs is high, generate a sub-hash for the next word // *** Only store pairs of individuals that have collision in a small hash // *** Extend only to the haplotypes that seeded here @@ -269,10 +269,10 @@ class SeedHash { for ( int ii = i+1 ; ii < it->second.size() ; ii++ ) { e->extendPair( it->second[i] , it->second[ii] , w ); } - } + } } } - return tot_pairs; + return tot_pairs; } // Debug print all pairs in the current hash // string word : formatted output for end of line @@ -280,30 +280,30 @@ class SeedHash { int printAllPairs(string word) { int tot_pairs = 0; for ( auto it = seed_hash.begin() ; it != seed_hash.end() ; ++it ) { - tot_pairs += it->second.size() * (it->second.size() - 1) / 2; + tot_pairs += it->second.size() * (it->second.size() - 1) / 2; for ( int i = 0 ; i < it->second.size() ; i++ ) { for ( int ii = i+1 ; ii < it->second.size() ; ii++ ) { cout << all_ind[it->second[i]].getID() << "\t" << all_ind[it->second[ii]].getID() << "\t" << word << endl; - } + } } } return tot_pairs; - } + } }; double get_cpu_time(){ return (double)clock() / CLOCKS_PER_SEC; } -int main (int argc, char* argv[]) -{ +int main (int argc, char* argv[]) +{ cout << R"( - ___ ___ + ___ ___ ___ ____ ______ _ / (_)__ ___ |_ | - / _ `/ -_) __/ ' \/ / / _ \/ -_) __/ - \_, /\__/_/ /_/_/_/_/_/_//_/\__/____/ - /___/ + / _ `/ -_) __/ ' \/ / / _ \/ -_) __/ + \_, /\__/_/ /_/_/_/_/_/_//_/\__/____/ + /___/ )" << endl; @@ -311,7 +311,7 @@ int main (int argc, char* argv[]) double TIME_prev = TIME_start; float PAR_MIN_MAF = 0; float PAR_skip = 0; - + string line, discard; bool opt_error = 0; int c; @@ -330,16 +330,16 @@ int main (int argc, char* argv[]) break; case 'f': PAR_MIN_MAF = atof( optarg ); - break; + break; case 'g': PAR_GAP = atoi( optarg ); break; case 's': PAR_skip = atof( optarg ); - break; + break; case 'd': MAX_seeds = atoi( optarg ); - break; + break; default: opt_error = 1; } @@ -355,39 +355,39 @@ int main (int argc, char* argv[]) cout << "-s\tSkip words with (seeds/samples) less than than this value [default 0.0] " << PAR_skip << endl; cout << endl; if ( opt_error == 1 ) abort (); - + // load parameters if(opt_error || argc - optind != 4){ cerr << "ERROR: Incorrect number of parameters" << endl; - cerr << "Usage: g2 [options] " << endl; + cerr << "Usage: g2 [options] " << endl; return 0; } - + cout << "\thaps: " << argv[optind + 0] << endl; cout << "\tsample: " << argv[optind + 1] << endl; cout << "\tgmap: " << argv[optind + 2] << endl; cout << "\toutput: " << argv[optind + 3] << endl << endl; cout << "---" << endl << endl; - + ifstream file_haps(argv[optind + 0]); ifstream file_samp(argv[optind + 1]); ifstream file_genm(argv[optind + 2]); - + string out = string( argv[optind + 3] ); if ( PAR_BIN_OUT ) { FOUT.open( ( out + ".bmatch" ).c_str() , ios::binary ); } else { FOUT.open( argv[optind + 3] ); } - + if(!FOUT ) { cerr << argv[optind + 3] << " could not be opened" << endl; return -1; } if(!file_haps ) { cerr << argv[optind + 0] << " could not be opened" << endl; return -1; } if(!file_samp ) { cerr << argv[optind + 1] << " could not be opened" << endl; return -1; } if(!file_genm ) { cerr << argv[optind + 2] << " could not be opened" << endl; return -1; } - + string map_field[3]; stringstream ss; - + // *** read genetic map vector< pair > genetic_map; int cur_g = 0; @@ -407,7 +407,7 @@ int main (int argc, char* argv[]) cerr << "*** runtime : " << get_cpu_time() - TIME_start << "\t"; cerr << genetic_map.size() << " genetic map entries read" << endl; - + // *** read individual information // skip first two lines getline(file_samp,line); @@ -419,39 +419,39 @@ int main (int argc, char* argv[]) ss >> map_field[0] >> map_field[1]; if ( PAR_HAPLOID ) { all_ind.push_back( Individual(map_field[0],(map_field[1]+".0").c_str(),idctr) ); - idctr++; + idctr++; all_ind.push_back( Individual(map_field[0],(map_field[1]+".1").c_str(),idctr) ); } else { all_ind.push_back( Individual(map_field[0],map_field[1],idctr) ); all_ind.push_back( Individual(map_field[0],map_field[1],idctr) ); } - idctr++; + idctr++; } file_samp.close(); num_ind = all_ind.size(); - + cerr << "*** runtime : " << get_cpu_time() - TIME_start << "\t"; cerr << num_ind / 2 << " sample identifiers read" << endl; - + Marker cur_marker; // track position through genetic map cur_g = 0; int snp_ctr; char al[2] , inp; string cur_al; - + // Storage for seeds SeedHash seeds; - + hash_size word[2]; - + // Storage for extensions ExtendHash extend( PAR_HAPLOID ? num_ind : num_ind / 2 ); // Hash individual words GLOBAL_READ_WORDS = 0; GLOBAL_CURRENT_WORD = 0; - + while( 1 ) { snp_ctr = 0; while( getline(file_haps,line) ) @@ -473,7 +473,7 @@ int main (int argc, char* argv[]) cur_marker.cm = genetic_map[cur_g-1].second + (cur_marker.pos - genetic_map[cur_g-1].first) * ( genetic_map[cur_g].second - genetic_map[cur_g-1].second ) / ( genetic_map[cur_g].first - genetic_map[cur_g-1].first ); // cerr << "interpolating " << cur_marker.id << " : " << cur_marker.pos << " between " << genetic_map[cur_g-1].first << "," << genetic_map[cur_g-1].second << " and " << genetic_map[cur_g].first << "," << genetic_map[cur_g].second << " to " << cur_marker.cm << endl; } - + // restrict on MAF if ( PAR_MIN_MAF > 0 ) { int maf_ctr = 0; @@ -488,9 +488,9 @@ int main (int argc, char* argv[]) ss.clear(); ss.str( line ); ss >> map_field[0] >> map_field[0] >> map_field[0] >> al[0] >> al[1]; } - + all_markers.push_back( cur_marker ); - + // read haplotype for( unsigned int i=0; i= CONST_READ_AHEAD ) break; @@ -506,13 +506,13 @@ int main (int argc, char* argv[]) snp_ctr = 0 ; } } - + // end if read all data if( GLOBAL_CURRENT_WORD >= GLOBAL_READ_WORDS ) { cerr << "processed " << GLOBAL_CURRENT_WORD * WORD_SIZE << " / " << all_markers.size() << " SNPs" << endl; break; } - + for ( unsigned int i = 0 ; i < num_ind ; i++ ) { // cerr << i << "\t" << all_ind[i].getWordString( w ) << endl; seeds.insertIndividual( i , all_ind[i].getWordHash( GLOBAL_CURRENT_WORD ) ); @@ -539,28 +539,28 @@ int main (int argc, char* argv[]) << extend.size() << " active seeds, " << GLOBAL_SKIPPED_WORDS << " skipped words" << endl; seeds.clear(); - + for( unsigned int i=0; i< num_ind ; i++) all_ind[i].clear( GLOBAL_CURRENT_WORD ); GLOBAL_CURRENT_WORD++; } extend.clearAllPairs(); - file_haps.close(); + file_haps.close(); FOUT.close(); - + if ( PAR_BIN_OUT ) { ofstream bmid_out( ( out + ".bmid" ).c_str() ); for ( int i = 0 ; i < all_markers.size() ; i++ ) bmid_out << all_markers[i].print(); bmid_out.close(); - + ofstream bsid_out( ( out + ".bsid" ).c_str() ); for ( int i = 0 ; i < all_ind.size() ; i++ ) { bsid_out << all_ind[i].print(); if ( !PAR_HAPLOID ) i++; } bsid_out.close(); - + } cerr << "*** runtime : " << get_cpu_time() - TIME_start << endl; - + return 0; } diff --git a/parse_bmatch.cpp b/parse_bmatch.cpp index 17fdca2..53234fa 100644 --- a/parse_bmatch.cpp +++ b/parse_bmatch.cpp @@ -22,7 +22,7 @@ int main (int argc, char* argv[]) cerr << "Usage: " << argv[0] << " [BMATCH FILE] [BSID FILE] [BMID FILE]" << endl; return 0; } - + string line, discard; ifstream file_bmatch( argv[1] , ios::binary ); ifstream file_bsid( argv[2] ); @@ -30,7 +30,7 @@ int main (int argc, char* argv[]) if(!file_bmatch || !file_bsid || !file_bmid ) { cerr << "file could not be opened" << endl; return 0; } stringstream ss; - + // load samples vector< string > sample_id; while( getline(file_bsid , line) ) @@ -38,7 +38,7 @@ int main (int argc, char* argv[]) sample_id.push_back( line ); } file_bsid.close(); - + // load markers vector< Marker > marker_id; Marker cur_marker; @@ -49,7 +49,7 @@ int main (int argc, char* argv[]) marker_id.push_back( cur_marker ); } file_bmid.close(); - + // load matches unsigned int pid[2]; unsigned int sid[2]; @@ -63,7 +63,7 @@ int main (int argc, char* argv[]) file_bmatch.read( (char*) &pid[1] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &sid[0] , sizeof( unsigned int ) ); file_bmatch.read( (char*) &sid[1] , sizeof( unsigned int ) ); - + cout << sample_id[ pid[0] ] << '\t' << sample_id[ pid[1] ] << '\t'