Trim trailing whitespace

gusevlab · May 20, 2024 · e7e163f · e7e163f
1 parent ebf257e
commit e7e163f
Show file tree

Hide file tree

Showing 2 changed files with 66 additions and 66 deletions.
diff --git a/g2.cpp b/g2.cpp
@@ -41,7 +41,7 @@ struct Marker {
 	string id;
 	long pos;
 	double cm;
-	
+
 	string print() {
 		stringstream ss;
 		ss << id << '\t' << pos << '\t' << cm << endl;
@@ -67,10 +67,10 @@ class Individual {
 	string getWordString( int w ) {
 		return hap[ w % CONST_READ_AHEAD ].to_string();
 	}
-	string print() { 
+	string print() {
 		stringstream ss;
 		ss << id[0] << '\t' << id[1] << '\t' << endl;
-		return ss.str();	
+		return ss.str();
 	}
 	string getID() { return id[1]; }
 	int getNum() { return idnum; }
@@ -116,13 +116,13 @@ struct Match {
                 FOUT.write( (char*) &pid[0] , sizeof( unsigned int ) );
                 FOUT.write( (char*) &pid[1] , sizeof( unsigned int ) );
                 FOUT.write( (char*) &sid[0] , sizeof( unsigned int ) );
-                FOUT.write( (char*) &sid[1] , sizeof( unsigned int ) );			
+                FOUT.write( (char*) &sid[1] , sizeof( unsigned int ) );
 			} else {
-				FOUT 	<< all_ind[p.first].getID() << "\t" 
-						<< all_ind[p.second].getID() << "\t" 
-						<< getPos( interval[0] * WORD_SIZE ) << "\t" 
-						<< getPos( interval[1] * WORD_SIZE + WORD_SIZE - 1 ) << "\t" 
-						<< mlen << "\t" 
+				FOUT 	<< all_ind[p.first].getID() << "\t"
+						<< all_ind[p.second].getID() << "\t"
+						<< getPos( interval[0] * WORD_SIZE ) << "\t"
+						<< getPos( interval[1] * WORD_SIZE + WORD_SIZE - 1 ) << "\t"
+						<< mlen << "\t"
 						<< interval[1] - interval[0] + 1 << "\t"
 						<< gaps - PAR_GAP;
 				FOUT << endl;
@@ -161,17 +161,17 @@ class ExtendHash {
 			p.first = 2 * ((loc - p.second/2) / num);
 		} else {
 			p.second = loc % num;
-			p.first = (loc - p.second) / num;		
+			p.first = (loc - p.second) / num;
 		}
 		return p;
 	}
 	// Compute location from pair of individuals
 	unsigned int pairToLocation( unsigned int i , unsigned int j ) {
-	
+
 		if ( !PAR_HAPLOID ) {
 			// round everyone down to the nearest haplotype
 			i = (i - (i % 2)) / 2;
-			j = (j - (j % 2)) / 2;	
+			j = (j - (j % 2)) / 2;
 		}
 		unsigned int loc = (i > j) ? j * num + i : i * num + j;
 		return loc;
@@ -185,7 +185,7 @@ class ExtendHash {
 		extend_ret = extend_hash.insert( pair< unsigned int , Match >( pairToLocation(i,j) , m) );
 		(extend_ret.first->second).extend(w);
 	}
-	
+
 	// Remove all pairs that were not extended beyond w
 	// int w : word # to remove prior to
 	void clearPairsPriorTo(int w) {
@@ -205,7 +205,7 @@ class ExtendHash {
 	void extendAllPairsTo(int w) {
 		for ( auto it = extend_hash.begin() ; it != extend_hash.end() ; it++ ) it->second.interval[1] = w;
 	}
-	
+
 	// Remove all pairs
 	// int w : word # to remove prior to
 	void clearAllPairs() {
@@ -235,10 +235,10 @@ class SeedHash {
 	void clear() {
 		seed_hash.clear();
 	}
-	int size() { 
+	int size() {
 		return seed_hash.size();
 	}
-	
+
 	// Generate a new hash for this vector of individuals
 	unsigned long subHash( ExtendHash * e , vector<unsigned int> vec , int w ) {
 		SeedHash cur_sh;
@@ -254,7 +254,7 @@ class SeedHash {
 	unsigned long extendAllPairs( ExtendHash * e , int w ) {
 		unsigned long tot_pairs = 0;
 		for ( auto it = seed_hash.begin() ; it != seed_hash.end() ; ++it ) {
-		
+
 			// *** As long as the # of pairs is high, generate a sub-hash for the next word
 			// *** Only store pairs of individuals that have collision in a small hash
 			// *** Extend only to the haplotypes that seeded here
@@ -269,49 +269,49 @@ class SeedHash {
 				for ( int ii = i+1 ; ii < it->second.size() ; ii++ ) {
 					e->extendPair( it->second[i] , it->second[ii] , w );
 				}
-				}	
+				}
 			}
 		}
-		return tot_pairs;	
+		return tot_pairs;
 	}
 	// Debug print all pairs in the current hash
 	// string word : formatted output for end of line
 	// returns : number of pairs evaluated
 	int printAllPairs(string word) {
 		int tot_pairs = 0;
 		for ( auto it = seed_hash.begin() ; it != seed_hash.end() ; ++it ) {
-			tot_pairs += it->second.size() * (it->second.size() - 1) / 2;		
+			tot_pairs += it->second.size() * (it->second.size() - 1) / 2;
 			for ( int i = 0 ; i < it->second.size() ; i++ ) {
 			for ( int ii = i+1 ; ii < it->second.size() ; ii++ ) {
 				cout << all_ind[it->second[i]].getID() << "\t" << all_ind[it->second[ii]].getID() << "\t" << word << endl;
-			}			
+			}
 			}
 		}
 		return tot_pairs;
-	}	
+	}
 };
 
 double get_cpu_time(){
     return (double)clock() / CLOCKS_PER_SEC;
 }
 
-int main (int argc, char* argv[]) 
-{	
+int main (int argc, char* argv[])
+{
 
     cout << R"(
-	                      ___          ___ 
+	                      ___          ___
 	  ___ ____ ______ _  / (_)__  ___ |_  |
-	 / _ `/ -_) __/  ' \/ / / _ \/ -_) __/ 
-	 \_, /\__/_/ /_/_/_/_/_/_//_/\__/____/ 
-	/___/                                  
+	 / _ `/ -_) __/  ' \/ / / _ \/ -_) __/
+	 \_, /\__/_/ /_/_/_/_/_/_//_/\__/____/
+	/___/
 )" << endl;
 
 
 	double TIME_start = get_cpu_time();
 	double TIME_prev = TIME_start;
 	float PAR_MIN_MAF = 0;
 	float PAR_skip = 0;
-	
+
 	string line, discard;
 	bool opt_error = 0;
 	int c;
@@ -330,16 +330,16 @@ int main (int argc, char* argv[])
 			break;
 		  case 'f':
 			PAR_MIN_MAF = atof( optarg );
-			break;			
+			break;
 		  case 'g':
 			PAR_GAP = atoi( optarg );
 			break;
 		  case 's':
 			PAR_skip = atof( optarg );
-			break;			
+			break;
 		  case 'd':
 			MAX_seeds = atoi( optarg );
-			break;	
+			break;
 		  default:
 		  	opt_error = 1;
 		  }
@@ -355,39 +355,39 @@ int main (int argc, char* argv[])
 	cout << "-s\tSkip words with (seeds/samples) less than than this value [default 0.0]    " << PAR_skip << endl;
 	cout << endl;
 	if ( opt_error == 1 ) abort ();
-	
+
 	// load parameters
 	if(opt_error || argc - optind != 4){
 		cerr << "ERROR: Incorrect number of parameters" << endl;
-		cerr << "Usage: g2 [options] <haps file> <sample file> <genetic map file> <output file>" << endl;			
+		cerr << "Usage: g2 [options] <haps file> <sample file> <genetic map file> <output file>" << endl;
 		return 0;
 	}
-	
+
 	cout << "\thaps: " << argv[optind + 0] << endl;
 	cout << "\tsample: " << argv[optind + 1] << endl;
 	cout << "\tgmap: " << argv[optind + 2] << endl;
 	cout << "\toutput: " << argv[optind + 3] << endl << endl;
 	cout << "---" << endl << endl;
-		
+
 	ifstream file_haps(argv[optind + 0]);
 	ifstream file_samp(argv[optind + 1]);
 	ifstream file_genm(argv[optind + 2]);
-	
+
 	string out = string( argv[optind + 3] );
 	if ( PAR_BIN_OUT ) {
 		FOUT.open( ( out + ".bmatch" ).c_str() , ios::binary );
 	} else {
 		FOUT.open( argv[optind + 3] );
 	}
-	
+
 	if(!FOUT ) { cerr << argv[optind + 3] << " could not be opened" << endl; return -1; }
 	if(!file_haps ) { cerr << argv[optind + 0] << " could not be opened" << endl; return -1; }
 	if(!file_samp ) { cerr << argv[optind + 1] << " could not be opened" << endl; return -1; }
 	if(!file_genm ) { cerr << argv[optind + 2] << " could not be opened" << endl; return -1; }
-	
+
 	string map_field[3];
 	stringstream ss;
-	
+
 	// *** read genetic map
 	vector< pair<long,double> > genetic_map;
 	int cur_g = 0;
@@ -407,7 +407,7 @@ int main (int argc, char* argv[])
 
 	cerr << "*** runtime : " << get_cpu_time() - TIME_start << "\t";
 	cerr << genetic_map.size() << " genetic map entries read" << endl;
-	
+
 	// *** read individual information
 	// skip first two lines
 	getline(file_samp,line);
@@ -419,39 +419,39 @@ int main (int argc, char* argv[])
 		ss >> map_field[0] >> map_field[1];
 		if ( PAR_HAPLOID ) {
 			all_ind.push_back( Individual(map_field[0],(map_field[1]+".0").c_str(),idctr) );
-			idctr++;		
+			idctr++;
 			all_ind.push_back( Individual(map_field[0],(map_field[1]+".1").c_str(),idctr) );
 		} else {
 			all_ind.push_back( Individual(map_field[0],map_field[1],idctr) );
 			all_ind.push_back( Individual(map_field[0],map_field[1],idctr) );
 		}
-		idctr++;		
+		idctr++;
 	}
 	file_samp.close();
 	num_ind = all_ind.size();
-	
+
 	cerr << "*** runtime : " << get_cpu_time() - TIME_start << "\t";
 	cerr << num_ind / 2 << " sample identifiers read" << endl;
-	
+
 	Marker cur_marker;
 	// track position through genetic map
 	cur_g = 0;
 	int snp_ctr;
 	char al[2] , inp;
 	string cur_al;
-	
+
 	// Storage for seeds
 	SeedHash seeds;
-	
+
 	hash_size word[2];
-	
+
 	// Storage for extensions
 	ExtendHash extend( PAR_HAPLOID ? num_ind : num_ind / 2 );
 
 	// Hash individual words
 	GLOBAL_READ_WORDS = 0;
 	GLOBAL_CURRENT_WORD = 0;
-	
+
 	while( 1 ) {
 		snp_ctr = 0;
 		while( getline(file_haps,line) )
@@ -473,7 +473,7 @@ int main (int argc, char* argv[])
 				cur_marker.cm = genetic_map[cur_g-1].second + (cur_marker.pos - genetic_map[cur_g-1].first) * ( genetic_map[cur_g].second - genetic_map[cur_g-1].second ) / ( genetic_map[cur_g].first - genetic_map[cur_g-1].first );
 				// cerr << "interpolating " << cur_marker.id << " : " << cur_marker.pos << " between " << genetic_map[cur_g-1].first << "," << genetic_map[cur_g-1].second << " and " << genetic_map[cur_g].first << "," << genetic_map[cur_g].second << " to " << cur_marker.cm << endl;
 			}
-		
+
 			// restrict on MAF
 			if ( PAR_MIN_MAF > 0 ) {
 				int maf_ctr = 0;
@@ -488,31 +488,31 @@ int main (int argc, char* argv[])
 				ss.clear(); ss.str( line );
 				ss >> map_field[0] >> map_field[0] >> map_field[0] >> al[0] >> al[1];
 			}
-			
+
 			all_markers.push_back( cur_marker );
-		
+
 			// read haplotype
 			for( unsigned int i=0; i<num_ind ; i++ )
 			{
 				ss >> inp;
 				if ( inp == '1' ) all_ind[i].setMarker( GLOBAL_READ_WORDS , snp_ctr );
 			}
 			snp_ctr++;
-			
+
 			if ( snp_ctr % WORD_SIZE == 0 ) {
 				// cerr << "*** word " << GLOBAL_CURRENT_WORD << " " << cur_marker.pos << " " << cur_marker.cm << endl;
 				if ( ++GLOBAL_READ_WORDS >= CONST_READ_AHEAD ) break;
 				else cerr << "*** loading word buffer " << GLOBAL_READ_WORDS << " / " << CONST_READ_AHEAD << endl;
 				snp_ctr = 0 ;
 			}
 		}
-		
+
 		// end if read all data
 		if( GLOBAL_CURRENT_WORD >= GLOBAL_READ_WORDS ) {
 			cerr << "processed " << GLOBAL_CURRENT_WORD * WORD_SIZE << " / " << all_markers.size() << " SNPs" << endl;
 			break;
 		}
-	
+
 		for ( unsigned int i = 0 ; i < num_ind ; i++ ) {
 			// cerr << i << "\t" << all_ind[i].getWordString( w ) << endl;
             seeds.insertIndividual( i , all_ind[i].getWordHash( GLOBAL_CURRENT_WORD ) );
@@ -539,28 +539,28 @@ int main (int argc, char* argv[])
 			<< extend.size() << " active seeds, "
 			<< GLOBAL_SKIPPED_WORDS << " skipped words" << endl;
 		seeds.clear();
-		
+
 		for( unsigned int i=0; i< num_ind ; i++) all_ind[i].clear( GLOBAL_CURRENT_WORD );
 		GLOBAL_CURRENT_WORD++;
 	}
 	extend.clearAllPairs();
-	file_haps.close();	
+	file_haps.close();
 	FOUT.close();
-	
+
 	if ( PAR_BIN_OUT ) {
 		ofstream bmid_out( ( out + ".bmid" ).c_str() );
 		for ( int i = 0 ; i < all_markers.size() ; i++ ) bmid_out << all_markers[i].print();
 		bmid_out.close();
-		
+
 		ofstream bsid_out( ( out + ".bsid" ).c_str() );
 		for ( int i = 0 ; i < all_ind.size() ; i++ ) {
 			bsid_out << all_ind[i].print();
 			if ( !PAR_HAPLOID ) i++;
 		}
 		bsid_out.close();
-		
+
 	}
 	cerr << "*** runtime : " << get_cpu_time() - TIME_start << endl;
-		
+
 	return 0;
 }