Skip to content

Commit

Permalink
Trim trailing whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
alexreg committed May 20, 2024
1 parent ebf257e commit e7e163f
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 66 deletions.
122 changes: 61 additions & 61 deletions g2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct Marker {
string id;
long pos;
double cm;

string print() {
stringstream ss;
ss << id << '\t' << pos << '\t' << cm << endl;
Expand All @@ -67,10 +67,10 @@ class Individual {
string getWordString( int w ) {
return hap[ w % CONST_READ_AHEAD ].to_string();
}
string print() {
string print() {
stringstream ss;
ss << id[0] << '\t' << id[1] << '\t' << endl;
return ss.str();
return ss.str();
}
string getID() { return id[1]; }
int getNum() { return idnum; }
Expand Down Expand Up @@ -116,13 +116,13 @@ struct Match {
FOUT.write( (char*) &pid[0] , sizeof( unsigned int ) );
FOUT.write( (char*) &pid[1] , sizeof( unsigned int ) );
FOUT.write( (char*) &sid[0] , sizeof( unsigned int ) );
FOUT.write( (char*) &sid[1] , sizeof( unsigned int ) );
FOUT.write( (char*) &sid[1] , sizeof( unsigned int ) );
} else {
FOUT << all_ind[p.first].getID() << "\t"
<< all_ind[p.second].getID() << "\t"
<< getPos( interval[0] * WORD_SIZE ) << "\t"
<< getPos( interval[1] * WORD_SIZE + WORD_SIZE - 1 ) << "\t"
<< mlen << "\t"
FOUT << all_ind[p.first].getID() << "\t"
<< all_ind[p.second].getID() << "\t"
<< getPos( interval[0] * WORD_SIZE ) << "\t"
<< getPos( interval[1] * WORD_SIZE + WORD_SIZE - 1 ) << "\t"
<< mlen << "\t"
<< interval[1] - interval[0] + 1 << "\t"
<< gaps - PAR_GAP;
FOUT << endl;
Expand Down Expand Up @@ -161,17 +161,17 @@ class ExtendHash {
p.first = 2 * ((loc - p.second/2) / num);
} else {
p.second = loc % num;
p.first = (loc - p.second) / num;
p.first = (loc - p.second) / num;
}
return p;
}
// Compute location from pair of individuals
unsigned int pairToLocation( unsigned int i , unsigned int j ) {

if ( !PAR_HAPLOID ) {
// round everyone down to the nearest haplotype
i = (i - (i % 2)) / 2;
j = (j - (j % 2)) / 2;
j = (j - (j % 2)) / 2;
}
unsigned int loc = (i > j) ? j * num + i : i * num + j;
return loc;
Expand All @@ -185,7 +185,7 @@ class ExtendHash {
extend_ret = extend_hash.insert( pair< unsigned int , Match >( pairToLocation(i,j) , m) );
(extend_ret.first->second).extend(w);
}

// Remove all pairs that were not extended beyond w
// int w : word # to remove prior to
void clearPairsPriorTo(int w) {
Expand All @@ -205,7 +205,7 @@ class ExtendHash {
void extendAllPairsTo(int w) {
for ( auto it = extend_hash.begin() ; it != extend_hash.end() ; it++ ) it->second.interval[1] = w;
}

// Remove all pairs
// int w : word # to remove prior to
void clearAllPairs() {
Expand Down Expand Up @@ -235,10 +235,10 @@ class SeedHash {
void clear() {
seed_hash.clear();
}
int size() {
int size() {
return seed_hash.size();
}

// Generate a new hash for this vector of individuals
unsigned long subHash( ExtendHash * e , vector<unsigned int> vec , int w ) {
SeedHash cur_sh;
Expand All @@ -254,7 +254,7 @@ class SeedHash {
unsigned long extendAllPairs( ExtendHash * e , int w ) {
unsigned long tot_pairs = 0;
for ( auto it = seed_hash.begin() ; it != seed_hash.end() ; ++it ) {

// *** As long as the # of pairs is high, generate a sub-hash for the next word
// *** Only store pairs of individuals that have collision in a small hash
// *** Extend only to the haplotypes that seeded here
Expand All @@ -269,49 +269,49 @@ class SeedHash {
for ( int ii = i+1 ; ii < it->second.size() ; ii++ ) {
e->extendPair( it->second[i] , it->second[ii] , w );
}
}
}
}
}
return tot_pairs;
return tot_pairs;
}
// Debug print all pairs in the current hash
// string word : formatted output for end of line
// returns : number of pairs evaluated
int printAllPairs(string word) {
int tot_pairs = 0;
for ( auto it = seed_hash.begin() ; it != seed_hash.end() ; ++it ) {
tot_pairs += it->second.size() * (it->second.size() - 1) / 2;
tot_pairs += it->second.size() * (it->second.size() - 1) / 2;
for ( int i = 0 ; i < it->second.size() ; i++ ) {
for ( int ii = i+1 ; ii < it->second.size() ; ii++ ) {
cout << all_ind[it->second[i]].getID() << "\t" << all_ind[it->second[ii]].getID() << "\t" << word << endl;
}
}
}
}
return tot_pairs;
}
}
};

double get_cpu_time(){
return (double)clock() / CLOCKS_PER_SEC;
}

int main (int argc, char* argv[])
{
int main (int argc, char* argv[])
{

cout << R"(
___ ___
___ ___
___ ____ ______ _ / (_)__ ___ |_ |
/ _ `/ -_) __/ ' \/ / / _ \/ -_) __/
\_, /\__/_/ /_/_/_/_/_/_//_/\__/____/
/___/
/ _ `/ -_) __/ ' \/ / / _ \/ -_) __/
\_, /\__/_/ /_/_/_/_/_/_//_/\__/____/
/___/
)" << endl;


double TIME_start = get_cpu_time();
double TIME_prev = TIME_start;
float PAR_MIN_MAF = 0;
float PAR_skip = 0;

string line, discard;
bool opt_error = 0;
int c;
Expand All @@ -330,16 +330,16 @@ int main (int argc, char* argv[])
break;
case 'f':
PAR_MIN_MAF = atof( optarg );
break;
break;
case 'g':
PAR_GAP = atoi( optarg );
break;
case 's':
PAR_skip = atof( optarg );
break;
break;
case 'd':
MAX_seeds = atoi( optarg );
break;
break;
default:
opt_error = 1;
}
Expand All @@ -355,39 +355,39 @@ int main (int argc, char* argv[])
cout << "-s\tSkip words with (seeds/samples) less than than this value [default 0.0] " << PAR_skip << endl;
cout << endl;
if ( opt_error == 1 ) abort ();

// load parameters
if(opt_error || argc - optind != 4){
cerr << "ERROR: Incorrect number of parameters" << endl;
cerr << "Usage: g2 [options] <haps file> <sample file> <genetic map file> <output file>" << endl;
cerr << "Usage: g2 [options] <haps file> <sample file> <genetic map file> <output file>" << endl;
return 0;
}

cout << "\thaps: " << argv[optind + 0] << endl;
cout << "\tsample: " << argv[optind + 1] << endl;
cout << "\tgmap: " << argv[optind + 2] << endl;
cout << "\toutput: " << argv[optind + 3] << endl << endl;
cout << "---" << endl << endl;

ifstream file_haps(argv[optind + 0]);
ifstream file_samp(argv[optind + 1]);
ifstream file_genm(argv[optind + 2]);

string out = string( argv[optind + 3] );
if ( PAR_BIN_OUT ) {
FOUT.open( ( out + ".bmatch" ).c_str() , ios::binary );
} else {
FOUT.open( argv[optind + 3] );
}

if(!FOUT ) { cerr << argv[optind + 3] << " could not be opened" << endl; return -1; }
if(!file_haps ) { cerr << argv[optind + 0] << " could not be opened" << endl; return -1; }
if(!file_samp ) { cerr << argv[optind + 1] << " could not be opened" << endl; return -1; }
if(!file_genm ) { cerr << argv[optind + 2] << " could not be opened" << endl; return -1; }

string map_field[3];
stringstream ss;

// *** read genetic map
vector< pair<long,double> > genetic_map;
int cur_g = 0;
Expand All @@ -407,7 +407,7 @@ int main (int argc, char* argv[])

cerr << "*** runtime : " << get_cpu_time() - TIME_start << "\t";
cerr << genetic_map.size() << " genetic map entries read" << endl;

// *** read individual information
// skip first two lines
getline(file_samp,line);
Expand All @@ -419,39 +419,39 @@ int main (int argc, char* argv[])
ss >> map_field[0] >> map_field[1];
if ( PAR_HAPLOID ) {
all_ind.push_back( Individual(map_field[0],(map_field[1]+".0").c_str(),idctr) );
idctr++;
idctr++;
all_ind.push_back( Individual(map_field[0],(map_field[1]+".1").c_str(),idctr) );
} else {
all_ind.push_back( Individual(map_field[0],map_field[1],idctr) );
all_ind.push_back( Individual(map_field[0],map_field[1],idctr) );
}
idctr++;
idctr++;
}
file_samp.close();
num_ind = all_ind.size();

cerr << "*** runtime : " << get_cpu_time() - TIME_start << "\t";
cerr << num_ind / 2 << " sample identifiers read" << endl;

Marker cur_marker;
// track position through genetic map
cur_g = 0;
int snp_ctr;
char al[2] , inp;
string cur_al;

// Storage for seeds
SeedHash seeds;

hash_size word[2];

// Storage for extensions
ExtendHash extend( PAR_HAPLOID ? num_ind : num_ind / 2 );

// Hash individual words
GLOBAL_READ_WORDS = 0;
GLOBAL_CURRENT_WORD = 0;

while( 1 ) {
snp_ctr = 0;
while( getline(file_haps,line) )
Expand All @@ -473,7 +473,7 @@ int main (int argc, char* argv[])
cur_marker.cm = genetic_map[cur_g-1].second + (cur_marker.pos - genetic_map[cur_g-1].first) * ( genetic_map[cur_g].second - genetic_map[cur_g-1].second ) / ( genetic_map[cur_g].first - genetic_map[cur_g-1].first );
// cerr << "interpolating " << cur_marker.id << " : " << cur_marker.pos << " between " << genetic_map[cur_g-1].first << "," << genetic_map[cur_g-1].second << " and " << genetic_map[cur_g].first << "," << genetic_map[cur_g].second << " to " << cur_marker.cm << endl;
}

// restrict on MAF
if ( PAR_MIN_MAF > 0 ) {
int maf_ctr = 0;
Expand All @@ -488,31 +488,31 @@ int main (int argc, char* argv[])
ss.clear(); ss.str( line );
ss >> map_field[0] >> map_field[0] >> map_field[0] >> al[0] >> al[1];
}

all_markers.push_back( cur_marker );

// read haplotype
for( unsigned int i=0; i<num_ind ; i++ )
{
ss >> inp;
if ( inp == '1' ) all_ind[i].setMarker( GLOBAL_READ_WORDS , snp_ctr );
}
snp_ctr++;

if ( snp_ctr % WORD_SIZE == 0 ) {
// cerr << "*** word " << GLOBAL_CURRENT_WORD << " " << cur_marker.pos << " " << cur_marker.cm << endl;
if ( ++GLOBAL_READ_WORDS >= CONST_READ_AHEAD ) break;
else cerr << "*** loading word buffer " << GLOBAL_READ_WORDS << " / " << CONST_READ_AHEAD << endl;
snp_ctr = 0 ;
}
}

// end if read all data
if( GLOBAL_CURRENT_WORD >= GLOBAL_READ_WORDS ) {
cerr << "processed " << GLOBAL_CURRENT_WORD * WORD_SIZE << " / " << all_markers.size() << " SNPs" << endl;
break;
}

for ( unsigned int i = 0 ; i < num_ind ; i++ ) {
// cerr << i << "\t" << all_ind[i].getWordString( w ) << endl;
seeds.insertIndividual( i , all_ind[i].getWordHash( GLOBAL_CURRENT_WORD ) );
Expand All @@ -539,28 +539,28 @@ int main (int argc, char* argv[])
<< extend.size() << " active seeds, "
<< GLOBAL_SKIPPED_WORDS << " skipped words" << endl;
seeds.clear();

for( unsigned int i=0; i< num_ind ; i++) all_ind[i].clear( GLOBAL_CURRENT_WORD );
GLOBAL_CURRENT_WORD++;
}
extend.clearAllPairs();
file_haps.close();
file_haps.close();
FOUT.close();

if ( PAR_BIN_OUT ) {
ofstream bmid_out( ( out + ".bmid" ).c_str() );
for ( int i = 0 ; i < all_markers.size() ; i++ ) bmid_out << all_markers[i].print();
bmid_out.close();

ofstream bsid_out( ( out + ".bsid" ).c_str() );
for ( int i = 0 ; i < all_ind.size() ; i++ ) {
bsid_out << all_ind[i].print();
if ( !PAR_HAPLOID ) i++;
}
bsid_out.close();

}
cerr << "*** runtime : " << get_cpu_time() - TIME_start << endl;

return 0;
}
Loading

0 comments on commit e7e163f

Please sign in to comment.