Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added utility functions to initialize and hold a struct for passing around the RNA info to functions for increased maintainability #671

Draft
wants to merge 10 commits into
base: dev
Choose a base branch
from
Draft
30 changes: 30 additions & 0 deletions dfsdsdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
origin/556-optim-update-sequence-range-visibility
origin/EdPknotFixBug
origin/EternaJS-new-oligo-method
origin/HEAD -> origin/dev
origin/OligoEnergyBug
origin/batch-renderer
origin/codon-cycler
origin/complexesAndConcentrations
origin/cypress
origin/degscore_predict
origin/dependabot/npm_and_yarn/three-0.125.0
origin/dev
origin/duplicate-buttons-for-drag-and-drop
origin/ensembleDefect
origin/eterna-nova
origin/eterna2-skeleton
origin/fix-zoom-centering
origin/i18n-tempaltetag
origin/master
origin/mercedconfig
origin/new-state-managment
origin/new-toolbar-updates
origin/offline
origin/pr-new_toolbar
origin/puzzlemaker-constraints
origin/scripts-changes
origin/sheila
origin/studs
origin/trunk
origin/v2
249 changes: 69 additions & 180 deletions lib/NUPACK/emscripten/FullEnsemble.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,74 +21,33 @@ extern int Multistranded;
FullAdvancedResult* FullEnsembleWithOligos(const std::string& seqString, int temperature, float kcalDeltaRange, bool const pseudoknotted = false) {

//this chuck of code sets up the variables used to represent adn configure the sequence for the C code to use
auto autoSeqString = MakeCString(seqString);
char* string = autoSeqString.get();
int seqNum[MAXSEQLENGTH+1];



//runtime_constants.h defines NAD_INFINITY
//#define NAD_INFINITY 100000 //artificial value for positive infinity
//this is how teh dnastructure call looks for subopt.c which is waht iran when you ron normal nupack compiled code
//here are comments from pfuncutilheaders.h on hwat the attributes of dnastructures consist of

//oneDnaStruct and dnaStructures are used for enumerating sequences
// typedef struct {
// int *theStruct; //describes what is paired to what
// DBL_TYPE error; //accumulated error (from the mfe) for a structure
// DBL_TYPE correctedEnergy; //actual energy of a structure
// int slength;
// //(accounting for symmetry).
// } oneDnaStruct;
//
// typedef struct {
// oneDnaStruct *validStructs;
//int nStructs; //# of structures stored
//int nAlloc; //# of structures allocated
// int seqlength;
// DBL_TYPE minError; //minimum deviation from mfe for all seqs
//in validStructs
//
//} dnaStructures;

//this struct will store
//all the structures within the given range
dnaStructures suboptStructs = {NULL, 0, 0, 0, NAD_INFINITY};

//convert from how it comes from eterna to how nuapck needs it for joined oligos '+'
char* pc;
do {
pc = strchr(string, '&');
if (pc) (*pc) = '+';
} while(pc);
SequenceStructureInfo rna_info = SequenceStructureInfo();
setSequenceInfo(seqString, &rna_info);


int seqStringLength = strlen(string);
convertSeq(string, seqNum, seqStringLength);


//first get the ensemble through subopt
if ( pseudoknotted ) {
mfeFullWithSym_SubOpt(seqNum, seqStringLength, &suboptStructs, 5, RNA, 1 /*DANGLETYPE*/,
temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
if ( rna_info.isPknot==TRUE ) {
mfeFullWithSym_SubOpt(rna_info.sequenceNumber, rna_info.sequenceLength, &rna_info.ensemebleStructs, 5, RNA, 1 /*DANGLETYPE*/,
rna_info.temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
0, SODIUM_CONC, MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);
} else {
mfeFullWithSym_SubOpt(seqNum, seqStringLength, &suboptStructs, 3, RNA, 1 /*DANGLETYPE*/,
temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
mfeFullWithSym_SubOpt(rna_info.sequenceNumber, rna_info.sequenceLength, &rna_info.ensemebleStructs, 3, RNA, 1 /*DANGLETYPE*/,
rna_info.temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
0, SODIUM_CONC, MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);
}


//now process the results for return to TS
//initialize the result to return
FullAdvancedResult* result = new FullAdvancedResult();

int i, j;
//get dot bracket notation from data
for (i = 0; i < suboptStructs.nStructs; i++ ) {
oneDnaStruct currentStruct = suboptStructs.validStructs[i];
for (i = 0; i < rna_info.ensemebleStructs.nStructs; i++ ) {
oneDnaStruct currentStruct = rna_info.ensemebleStructs.validStructs[i];

//each structure reset this
std::string singlestructure = "";
for (j = 0; j < suboptStructs.seqlength; j++ ) {
for (j = 0; j < rna_info.ensemebleStructs.seqlength; j++ ) {
if ( currentStruct.theStruct[j] > j ) {
singlestructure.push_back('(');
}
Expand All @@ -98,9 +57,9 @@ FullAdvancedResult* FullEnsembleWithOligos(const std::string& seqString, int tem
else singlestructure.push_back(')');
}


char* pc;
std::string constraints = singlestructure;
for (pc = string, i = 0, j = 0; (*pc); pc++, j++) {
for (pc = rna_info.sequenceChar, i = 0, j = 0; (*pc); pc++, j++) {
auto value = ((*pc) == '+' ? '&' : constraints[i++]);
if (j < singlestructure.length()) {
singlestructure[j] = value;
Expand All @@ -120,176 +79,106 @@ FullAdvancedResult* FullEnsembleWithOligos(const std::string& seqString, int tem
result->suboptFreeEnergy.push_back(correctedEnergy);
}

clearDnaStructures(&suboptStructs);

clearDnaStructures(&rna_info.ensemebleStructs);

return result;
}


FullAdvancedResult* FullEnsembleNoBindingSite(const std::string& seqString, int temperature, float kcalDeltaRange, bool const pseudoknotted = false) {

//this chuck of code sets up the variables used to represent adn configure the sequence for the C code to use
auto autoSeqString = MakeCString(seqString);
char* string = autoSeqString.get();
int seqNum[MAXSEQLENGTH+1];
int seqStringLength = strlen(string);
//runtime_constants.h defines NAD_INFINITY
//#define NAD_INFINITY 100000 //artificial value for positive infinity
//this is how teh dnastructure call looks for subopt.c which is waht iran when you ron normal nupack compiled code
//here are comments from pfuncutilheaders.h on hwat the attributes of dnastructures consist of

//oneDnaStruct and dnaStructures are used for enumerating sequences
// typedef struct {
// int *theStruct; //describes what is paired to what
// DBL_TYPE error; //accumulated error (from the mfe) for a structure
// DBL_TYPE correctedEnergy; //actual energy of a structure
// int slength;
// //(accounting for symmetry).
// } oneDnaStruct;
//
// typedef struct {
// oneDnaStruct *validStructs;
//int nStructs; //# of structures stored
//int nAlloc; //# of structures allocated
// int seqlength;
// DBL_TYPE minError; //minimum deviation from mfe for all seqs
//in validStructs
//
//} dnaStructures;

//this struct will store
//all the structures within the given range
dnaStructures suboptStructs = {NULL, 0, 0, 0, NAD_INFINITY};
convertSeq(string, seqNum, seqStringLength);



SequenceStructureInfo rna_info = SequenceStructureInfo();
setSequenceInfo(seqString, &rna_info);
setGlobals(NULL, NULL, pseudoknotted, temperature, &rna_info);

//first get the ensemble through subopt
if ( pseudoknotted ) {
mfeFullWithSym_SubOpt(seqNum, seqStringLength, &suboptStructs, 5, RNA, 1 /*DANGLETYPE*/,
temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
if ( rna_info.isPknot == TRUE ) {
mfeFullWithSym_SubOpt(rna_info.sequenceNumber, rna_info.sequenceLength, &rna_info.ensemebleStructs, 5, RNA, 1 /*DANGLETYPE*/,
rna_info.temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
0, SODIUM_CONC, MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);
} else {
mfeFullWithSym_SubOpt(seqNum, seqStringLength, &suboptStructs, 3, RNA, 1 /*DANGLETYPE*/,
temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
mfeFullWithSym_SubOpt(rna_info.sequenceNumber, rna_info.sequenceLength, &rna_info.ensemebleStructs, 3, RNA, 1 /*DANGLETYPE*/,
rna_info.temperature, TRUE, (DBL_TYPE) kcalDeltaRange,
0, SODIUM_CONC, MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);
}

//initialize the result to return


//now process the results for return to TS
//initialize the result to return

FullAdvancedResult* result = new FullAdvancedResult();

std::string mfeStructure;
//get dot bracket notation from data
for ( int i = 0; i < suboptStructs.nStructs; i++ ) {
oneDnaStruct currentStruct = suboptStructs.validStructs[i];

for ( int i = 0; i < rna_info.ensemebleStructs.nStructs; i++ ) {
rna_info.currentStruct = rna_info.ensemebleStructs.validStructs[i];

//get the secondary strucutre in dot paren notation
std::string singlestructure = getDotParens(pseudoknotted, suboptStructs.seqlength, &currentStruct);
std::string singlestructure = getDotParens(pseudoknotted, rna_info.ensemebleStructs.seqlength, &rna_info.currentStruct);

if (i == 0)
{
//this is the first one so it is the mfe
mfeStructure = singlestructure;
}

//get energies
double energyError = currentStruct.error;
double correctedEnergy = currentStruct.correctedEnergy;
double energyError = rna_info.currentStruct.error;
double correctedEnergy = rna_info.currentStruct.correctedEnergy;


//std::string test(rna_info.sequenceNumber);
std::string test;
for (int j =0; j < rna_info.sequenceLength; j++) {
test = test + std::to_string(rna_info.sequenceNumber[j]);
}
//write out data
result->suboptStructures.push_back(singlestructure);
result->suboptEnergyError.push_back(energyError);
result->suboptFreeEnergy.push_back(correctedEnergy);
}

clearDnaStructures(&suboptStructs);

clearDnaStructures(&rna_info.ensemebleStructs);


return result;
}

/*
mode is how to do the ED
1= do only ED
2 = do only MFE ED
3 = do both
*/


FullEnsembleDefectResult* GetEnsembleDefect(const std::string& seqString, const std::string& MfeStructure, int temperature, bool pseudoknot) {

FullEnsembleDefectResult* result = new FullEnsembleDefectResult();
int complexity;
DBL_TYPE nsStar_ED;
int i;
int nNicks;
int seqlength;
int tmpLength;
int seqNum[MAXSEQLENGTH+1];

//set the globals
USE_MFE=0;
ONLY_ONE_MFE=0;

if (pseudoknot == true) {
DO_PSEUDOKNOTS = 1;
} else {
DO_PSEUDOKNOTS = 0;
}

if ( !DO_PSEUDOKNOTS ) {
complexity = 3;
} else {
complexity = 5;
}


//convert sequence from string to char*
auto autoSeqString = MakeCString(seqString);
char* seqChar = autoSeqString.get();

FullEnsembleDefectResult* result = new FullEnsembleDefectResult();

SequenceStructureInfo rna_info = SequenceStructureInfo();
setSequenceInfo(seqString, &rna_info);
setStructureInfo(MfeStructure, &rna_info);
setGlobals(FALSE,FALSE,pseudoknot,temperature, &rna_info);

//convert sequence from latin based characters "A,C,U,G" into numerical representation A=1, C=2, G=3. U=4
seqlength = tmpLength = strlen(seqChar);
convertSeq(seqChar, seqNum, tmpLength);

// Get the number of strand breaks
nNicks = 0;
for (i = 0; i < tmpLength; i++) {
if (seqChar[i] == '+') {
nNicks++;
}
}

// New sequence length removing the number of breaks or pluses from the total
seqlength -= nNicks;
DBL_TYPE nsStar_ED;

// Allocate memory for storing pair probabilities
pairPr = (DBL_TYPE*) calloc( (rna_info.sequenceLength+1)*(rna_info.sequenceLength+1), sizeof(DBL_TYPE));
// Allocate memory for storing pair probabilities
pairPrPbg = (DBL_TYPE*) calloc( (rna_info.sequenceLength+1)*(rna_info.sequenceLength+1), sizeof(DBL_TYPE));
pairPrPb = (DBL_TYPE*) calloc( (rna_info.sequenceLength+1)*(rna_info.sequenceLength+1), sizeof(DBL_TYPE));

//convert the string structure to a char array
auto autoMfeStructure = MakeCString(MfeStructure);
char* MfeStructureChar = autoMfeStructure.get();

//get the pairs from the struct
int thepairs[MAXSEQLENGTH+1];
getStructureFromParens(MfeStructureChar, thepairs, seqlength);

// Allocate memory for storing pair probabilities
pairPr = (DBL_TYPE*) calloc( (seqlength+1)*(seqlength+1), sizeof(DBL_TYPE));
// Allocate memory for storing pair probabilities
pairPrPbg = (DBL_TYPE*) calloc( (seqlength+1)*(seqlength+1), sizeof(DBL_TYPE));
pairPrPb = (DBL_TYPE*) calloc( (seqlength+1)*(seqlength+1), sizeof(DBL_TYPE));
nsStar_ED = nsStarPairsOrParensFull(rna_info.sequenceLength, rna_info.sequenceNumber, rna_info.thePairs, NULL,
rna_info.complexity, RNA, 1 /*DANGLETYPE*/,
rna_info.temperature, SODIUM_CONC,
MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);


result->ensembleDefect = (long double) nsStar_ED;
result->ensembleDefectNormalized = (long double) nsStar_ED/rna_info.sequenceLength;

nsStar_ED = nsStarPairsOrParensFull(seqlength, seqNum, thepairs, NULL,
complexity, RNA, 1 /*DANGLETYPE*/,
temperature, SODIUM_CONC,
MAGNESIUM_CONC, USE_LONG_HELIX_FOR_SALT_CORRECTION);
free(pairPrPbg);
free(pairPrPb);


result->ensembleDefect = (long double) nsStar_ED;
result->ensembleDefectNormalized = (long double) nsStar_ED/seqlength;

free(pairPr);
free(pairPrPbg);
free(pairPrPb);
return result;

return result;
}

4 changes: 3 additions & 1 deletion lib/NUPACK/emscripten/FullEnsemble.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ struct FullAdvancedResult {

struct FullEnsembleDefectResult {
double ensembleDefect;
double ensembleDefectNormalized;

double ensembleDefectNormalized;

};

FullAdvancedResult* FullEnsembleNoBindingSite (const std::string& seqString, int temperature, float kcalDeltaRange, bool const pseudoknotted);
Expand Down
Loading