Skip to content

Commit

Permalink
Merge pull request #2 from ayoshiaki/mdd
Browse files Browse the repository at this point in the history
Mdd
  • Loading branch information
ayoshiaki committed Sep 19, 2014
2 parents 753c876 + e220c2d commit 3f04b4c
Show file tree
Hide file tree
Showing 17 changed files with 1,289 additions and 81 deletions.
2 changes: 2 additions & 0 deletions app/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ ADD_EXECUTABLE(predalign predalign.cpp)
ADD_EXECUTABLE(runAllPredalignTests runAllPredalignTests.cpp)
ADD_EXECUTABLE(print_graph_from_model print_graph_from_model.cpp)
ADD_EXECUTABLE(profileHMMtests profileHMMtests.cpp)
ADD_EXECUTABLE(teste_mdd teste_mdd.cpp)

TARGET_LINK_LIBRARIES(evaluate ${APP_DEP} )
TARGET_LINK_LIBRARIES(train ${APP_DEP} )
Expand All @@ -60,6 +61,7 @@ TARGET_LINK_LIBRARIES(runAllPredalignTests ${APP_DEP} )
TARGET_LINK_LIBRARIES(posterior_decoding ${APP_DEP} )
TARGET_LINK_LIBRARIES(print_graph_from_model ${APP_DEP} )
TARGET_LINK_LIBRARIES(profileHMMtests ${APP_DEP} )
TARGET_LINK_LIBRARIES(teste_mdd ${APP_DEP} )

install(TARGETS train viterbi_decoding mea_decoding simulate testeFBGHMM testeGPHMM simulateAlignment align predalign runAllPredalignTests bayes_classifier one_file_model sliding_window evaluate kullback_positional posterior_probabilities posterior_decoding print_graph_from_model profileHMMtests
RUNTIME DESTINATION bin
Expand Down
204 changes: 204 additions & 0 deletions app/teste_mdd.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#include <iostream>
#include <vector>
#include <fstream>

#include "Consensus.hpp"
#include "ChiSquare.hpp"
#include "MaximalDependenceDecomposition.hpp"
#include "ProbabilisticModelCreatorClient.hpp"
#include "ContextTree.hpp"

using namespace tops;
using namespace std;

int main (int argc, char ** argv)
{
cout << "TesteMDD" << endl;

/***********************************************************/
// Alphabet
AlphabetPtr alphabet = AlphabetPtr(new Alphabet());
alphabet->createSymbol("A");
alphabet->createSymbol("C");
alphabet->createSymbol("G");
alphabet->createSymbol("T");

/***********************************************************/
// Consensus Sequence
ConsensusSequence consensus_sequence;

vector<int> s1;
s1.push_back(0); s1.push_back(1); // a/c
Consensus c1(s1);
consensus_sequence.push_back(c1);

int x1[] = {0, 2, 2, 3}; // AGGT
for (int i = 0; i < 4; i++) {
vector<int> s;
s.push_back(x1[i]);
Consensus c(s);
consensus_sequence.push_back(c);
}

vector<int> s2;
s2.push_back(0); s2.push_back(2); // a/g
Consensus c2(s2);
consensus_sequence.push_back(c2);

int x2[] = {0, 2, 3}; // agt
for (int i = 0; i < 3; i++) {
vector<int> s;
s.push_back(x2[i]);
Consensus c(s);
consensus_sequence.push_back(c);
}

for (std::vector<Consensus>::iterator it = consensus_sequence.begin() ; it != consensus_sequence.end(); ++it) {
cout << it->str() << '\t';
}
cout << endl;

/***********************************************************/
// WMMs
ProbabilisticModelCreatorClient creator;
ProbabilisticModelPtr root = creator.create("_test2/root.txt");
ProbabilisticModelPtr g5 = creator.create("_test2/g5.txt");
ProbabilisticModelPtr h5 = creator.create("_test2/h5.txt");
ProbabilisticModelPtr g5gm1 = creator.create("_test2/g5gm1.txt");
ProbabilisticModelPtr g5hm1 = creator.create("_test2/g5hm1.txt");
ProbabilisticModelPtr g5gm1am2 = creator.create("_test2/g5gm1am2.txt");
ProbabilisticModelPtr g5gm1bm2 = creator.create("_test2/g5gm1bm2.txt");
ProbabilisticModelPtr g5gm1am2u6 = creator.create("_test2/g5gm1am2u6.txt");
ProbabilisticModelPtr g5gm1am2v6 = creator.create("_test2/g5gm1am2v6.txt");

/***********************************************************/
// MDD tree
MaximalDependenceDecompositionNodePtr mdd_root = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("root", root, 7));
MaximalDependenceDecompositionNodePtr mdd_g5 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5", g5, 2));
MaximalDependenceDecompositionNodePtr mdd_h5 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("h5", h5, -1));
MaximalDependenceDecompositionNodePtr mdd_g5gm1 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5gm1", g5gm1, 1));
MaximalDependenceDecompositionNodePtr mdd_g5hm1 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5hm1", g5hm1, -1));
MaximalDependenceDecompositionNodePtr mdd_g5gm1am2 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5gm1am2", g5gm1am2, 8));
MaximalDependenceDecompositionNodePtr mdd_g5gm1bm2 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5gm1bm2", g5gm1bm2, -1));
MaximalDependenceDecompositionNodePtr mdd_g5gm1am2u6 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5gm1am2u6", g5gm1am2u6, -1));
MaximalDependenceDecompositionNodePtr mdd_g5gm1am2v6 = MaximalDependenceDecompositionNodePtr(
new MaximalDependenceDecompositionNode("g5gm1am2v6", g5gm1am2v6, -1));

mdd_root->setChildern(mdd_g5, mdd_h5);
mdd_g5->setChildern(mdd_g5gm1, mdd_g5hm1);
mdd_g5gm1->setChildern(mdd_g5gm1am2, mdd_g5gm1bm2);
mdd_g5gm1am2->setChildern(mdd_g5gm1am2u6, mdd_g5gm1am2v6);

/***********************************************************/
// MDD definition

MaximalDependenceDecompositionPtr mdd = MaximalDependenceDecompositionPtr(new MaximalDependenceDecomposition());
mdd->setAlphabet(alphabet);
mdd->setMDDTree(mdd_root);
mdd->setConsensusSequence(consensus_sequence);
ProbabilisticModelPtr consensus_model = creator.create("_test2/consensus_model.txt");
mdd->setConsensusModel(consensus_model);

/***********************************************************/
// Evaluate

Sequence s;
s.push_back(1);s.push_back(0);s.push_back(2);s.push_back(2);s.push_back(3);s.push_back(2);s.push_back(0);s.push_back(0);s.push_back(3);
cout << mdd->evaluate(s, 0, 9) << endl;

/***********************************************************/
// Choose

srand(time(NULL));
Sequence new_sequence;
mdd->choose(new_sequence, 9);

for (int i = 0; i < 9; i++) {
cout << new_sequence[i] << "\t";
}
cout << endl;

/***********************************************************/
// Criando SequenceEntryList
SequenceEntryList sequences;
for (int i = 0; i < 10; i++) {
SequenceEntryPtr se = SequenceEntryPtr(new SequenceEntry(alphabet));
Sequence random_sequence;
mdd->choose(random_sequence, 9);
se->setSequence(random_sequence);
se->setName("s");
sequences.push_back(se);
}

for (int i = 0; i < 10; i++) {
for (int j = 0; j < 10; j++) {
cout << sequences[i]->getSequence()[j];
}
cout << endl;
}

/***********************************************************/
// Treinando MDD
MaximalDependenceDecompositionPtr trained_mdd = MaximalDependenceDecompositionPtr(new MaximalDependenceDecomposition());
trained_mdd->setAlphabet(alphabet);
trained_mdd->setConsensusSequence(consensus_sequence);

// ProbabilisticModelPtr consensus_model = creator.create("_test2/consensus_model.txt");
trained_mdd->setConsensusModel(consensus_model);

trained_mdd->train(sequences, 2);

cout << "-------------------------" << endl;
Sequence new_sequence_from_trained_mdd;
trained_mdd->choose(new_sequence_from_trained_mdd, 9);

for (int i = 0; i < 9; i++) {
cout << new_sequence_from_trained_mdd[i] << "\t";
}
cout << endl;

// std::map<std::string, double> w;
// ContextTreePtr tree = ContextTreePtr(new ContextTree(alphabet));
// tree->initializeCounter(sequences, 0, w);
// tree->normalize();

// cout << tree->getContext(0)->getDistribution()->str() << endl;

// cout << mdd->str() << endl;

cout << "--------------------------" << endl;
ProbabilisticModelPtr mdd_from_config = creator.create("_test2/mdd.txt");
// cout << mdd_from_config->str() << endl;

// cout << trained_mdd->evaluate(new_sequence, 0, 9) << endl;
// cout << trained_mdd->evaluate(new_sequence, 0, 1) << endl;

new_sequence.push_back(0);
new_sequence.push_back(0);
new_sequence.push_back(0);
new_sequence.push_back(0);

mdd->initialize_prefix_sum_array(new_sequence);
// cout << mdd->evaluate(new_sequence, 0, 8);
cout << mdd->prefix_sum_array_compute(0, 1) << endl;
cout << mdd->prefix_sum_array_compute(0, 8) << endl;
cout << mdd->prefix_sum_array_compute(1, 9) << endl;
cout << mdd->prefix_sum_array_compute(2, 10) << endl;
cout << mdd->prefix_sum_array_compute(3, 11) << endl;
cout << mdd->prefix_sum_array_compute(4, 12) << endl;

return 0;
}





2 changes: 2 additions & 0 deletions app/train.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "TrainInterpolatedMarkovChain.hpp"
#include "TrainSimilarityBasedSequenceWeighting.hpp"
#include "TrainPhasedMarkovChainContextAlgorithm.hpp"
#include "TrainMaximalDependenceDecomposition.hpp"
#include "TrainHMMMaximumLikelihood.hpp"
#include "RemoveSequenceFromModel.hpp"
#include "SequenceFormat.hpp"
Expand Down Expand Up @@ -114,6 +115,7 @@ int main(int argc, char ** argv) {
createModelCommand["SmoothedHistogramStanke"] = SmoothedHistogramStankePtr(new SmoothedHistogramStanke());
createModelCommand["SmoothedHistogramBurge"] = SmoothedHistogramBurgePtr(new SmoothedHistogramBurge());
createModelCommand["DiscreteIIDModel"] = TrainDiscreteIIDModelPtr(new TrainDiscreteIIDModel());
createModelCommand["MaximalDependenceDecomposition"] = TrainMaximalDependenceDecompositionPtr(new TrainMaximalDependenceDecomposition());


modelSelectionCommand["BIC"] = BayesianInformationCriteriaPtr(
Expand Down
8 changes: 7 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@ include_directories(../lang/src .)

ADD_LIBRARY(ToPS SHARED Symbol Alphabet util SparseMatrix MultipleAlignment GHMMStates DiscreteIIDModel VariableLengthMarkovChain InhomogeneousMarkovChain HiddenMarkovModel ConfigurationReader DiscreteIIDModelCreator VariableLengthMarkovChainCreator InhomogeneousMarkovChainCreator HiddenMarkovModelCreator GeneralizedHiddenMarkovModelCreator SequenceEntry SequenceFactory GeneralizedHiddenMarkovModel TrainVariableLengthMarkovChain TrainFixedLengthMarkovChain TrainInterpolatedMarkovChain TrainHMMBaumWelch TrainHMMMaximumLikelihood ContextTree TrainWeightArrayModel BayesianInformationCriteria TrainVariableLengthInhomogeneousMarkovChain ProbabilisticModelCreatorClient AkaikeInformationCriteria TrainPhasedMarkovChain FactorableModel DecodableModel InhomogeneousFactorableModel SmoothedHistogramKernelDensity FixedSequenceAtPosition FixedSequenceAtPositionCreator PhasedRunLengthDistribution PhasedRunLengthDistributionCreator SmoothedHistogramStanke TrainPhasedMarkovChainContextAlgorithm RemoveSequenceFromModel SmoothedHistogramBurge SequenceFormat TrainDiscreteIIDModel TargetModel TargetModelCreator ReverseComplementDNA ReverseComplementDNACreator ProbabilisticModelParameter TrainGHMMTransitions BernoulliModelCreator TrainInterpolatedPhasedMarkovChain SimilarityBasedSequenceWeightingCreator SimilarityBasedSequenceWeighting TrainSimilarityBasedSequenceWeighting MultipleSequentialModelCreator MultipleSequentialModel StoreLoadedModel PairHiddenMarkovModel PairHiddenMarkovModelCreator TrainPHMMBaumWelch MaximumDependenceDecomposition
crossplatform.hpp ProbabilisticModelConfiguration ProbabilisticModelParameterValue2
../lang/src/ASTNode ../lang/src/PropertyNode ../lang/src/KeyNode ../lang/src/ValueNode ../lang/src/StringNode ../lang/src/ListNode ../lang/src/ConfigurationNode ../lang/src/IntegerNode ../lang/src/FloatNode ../lang/src/ConditionalProbabilityNode ../lang/src/ConditionalProbabilityMapNode ../lang/src/ConditionNode ../lang/src/ProbabilityMapNode ../lang/src/ProbabilityNode ../lang/src/parser ../lang/src/tokens ../lang/src/lang ../lang/src/ToPSLangVisitor ProfileHiddenMarkovModel ProfileHiddenMarkovModelCreator TrainProfileHMMMaxLikelihood TrainProfileHMMBaumWelch)
../lang/src/ASTNode ../lang/src/PropertyNode ../lang/src/KeyNode ../lang/src/ValueNode ../lang/src/StringNode ../lang/src/ListNode ../lang/src/ConfigurationNode ../lang/src/IntegerNode ../lang/src/FloatNode ../lang/src/ConditionalProbabilityNode ../lang/src/ConditionalProbabilityMapNode ../lang/src/ConditionNode ../lang/src/ProbabilityMapNode ../lang/src/ProbabilityNode ../lang/src/parser ../lang/src/tokens ../lang/src/lang ../lang/src/ToPSLangVisitor ProfileHiddenMarkovModel ProfileHiddenMarkovModelCreator TrainProfileHMMMaxLikelihood TrainProfileHMMBaumWelch
Consensus
ChiSquare
MaximalDependenceDecomposition
MaximalDependenceDecompositionCreator
TrainMaximalDependenceDecomposition
)



Expand Down
78 changes: 78 additions & 0 deletions src/ChiSquare.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#include "ChiSquare.hpp"

namespace tops {

double CHI_QUARE[31][9] = {
{0.00098, 1.64, 2.71, 3.84, 5.02, 6.63, 7.88, 10.83, 12.12},
{0.051, 3.22, 4.61, 5.99, 7.38, 9.21, 10.60, 13.82, 15.20},
{0.216, 4.64, 6.25, 7.81, 9.35, 11.34, 12.84, 16.27, 17.73},
{0.48, 5.99, 7.78, 9.49, 11.14, 13.28, 14.86, 18.47, 20.00},
{0.83, 7.29, 9.24, 11.07, 12.83, 15.09, 16.75, 20.51, 22.11},
{1.24, 8.56, 10.64, 12.59, 14.45, 16.81, 18.55, 22.46, 24.10},
{1.69, 9.80, 12.02, 14.07, 16.01, 18.48, 20.28, 24.32, 26.02},
{2.18, 11.03, 13.36, 15.51, 17.53, 20.09, 21.95, 26.12, 27.87},
{2.70, 12.24, 14.68, 16.92, 19.02, 21.67, 23.59, 27.88, 29.67},
{3.25, 13.44, 15.99, 18.31, 20.48, 23.21, 25.19, 29.59, 31.42},
{3.82, 14.63, 17.28, 19.68, 21.92, 24.73, 26.76, 31.26, 33.14},
{4.40, 15.81, 18.55, 21.03, 23.34, 26.22, 28.30, 32.91, 34.82},
{5.01, 16.98, 19.81, 22.36, 24.74, 27.69, 29.82, 34.53, 36.48},
{5.63, 18.15, 21.06, 23.68, 26.12, 29.14, 31.32, 36.12, 38.11},
{6.26, 19.31, 22.31, 25.00, 27.49, 30.58, 32.80, 37.70, 39.72},
{6.91, 20.47, 23.54, 26.30, 28.85, 32.00, 34.27, 39.25, 41.31},
{7.56, 21.61, 24.77, 27.59, 30.19, 33.41, 35.72, 40.79, 42.88},
{8.23, 22.76, 25.99, 28.87, 31.53, 34.81, 37.16, 42.31, 44.43},
{8.91, 23.90, 27.20, 30.14, 32.85, 36.19, 38.58, 43.82, 45.97},
{9.59, 25.04, 28.41, 31.41, 34.17, 37.57, 40.00, 45.31, 47.50},
{10.28, 26.17, 29.62, 32.67, 35.48, 38.93, 41.40, 46.80, 49.01},
{10.98, 27.30, 30.81, 33.92, 36.78, 40.29, 42.80, 48.27, 50.51},
{11.69, 28.43, 32.01, 35.17, 38.08, 41.64, 44.18, 49.73, 52.00},
{12.40, 29.55, 33.20, 36.42, 39.36, 42.98, 45.56, 51.18, 53.48},
{13.12, 30.68, 34.38, 37.65, 40.65, 44.31, 46.93, 52.62, 54.95},
{16.79, 36.25, 40.26, 43.77, 46.98, 50.89, 53.67, 59.70, 62.16},
{24.43, 47.27, 51.81, 55.76, 59.34, 63.69, 66.77, 73.40, 76.10},
{32.36, 58.16, 63.17, 67.50, 71.42, 76.15, 79.49, 86.66, 89.56},
{40.48, 68.97, 74.40, 79.08, 83.30, 88.38, 91.95, 99.61, 102.7},
{57.15, 90.41, 96.58, 101.9, 106.6, 112.3, 116.3, 124.8, 128.3},
{74.22, 111.7, 118.5, 124.3, 129.6, 135.8, 140.2, 149.4, 153.2}
};

int chiPi(double p) {
double prob[] = {0.975, 0.2, 0.1, 0.05, 0.025, 0.01, 0.005, 0.001, 0.0005};
int pi = 0;
for (int i = 0; i < 9; i++) {
if (p >= prob[i]) {
if (i == 0)
pi = 0;
else if (abs(p - prob[i]) > abs(p - prob[i-1]))
pi = i-1;
else
pi = i;
break;
}
}
return pi;
}

int chiDfi(int df) {
if (df <= 25)
return df-1;

int dfi = 30;
while ((df - dfi) > 0) {
dfi += 10;
if (dfi == 80) {
if (abs(df - 80) > abs(df - 100))
return 30;
else
return 29;
}
}
return ((dfi - 30)/10 + 25);
}

double chiSquare(double p, int df) {
int pi = chiPi(p);
int dfi = chiDfi(df);
return CHI_QUARE[dfi][pi];
}
}
36 changes: 36 additions & 0 deletions src/ChiSquare.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* ConfigurationReader.hpp
*
* Copyright 2011 Andre Yoshiaki Kashiwabara <[email protected]>
* Ígor Bonadio <[email protected]>
* Vitor Onuchic <[email protected]>
* Alan Mitchell Durham <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/

#ifndef CHI_SQUARE_HPP
#define CHI_SQUARE_HPP
#include <boost/shared_ptr.hpp>

#include "crossplatform.hpp"


namespace tops {
double chiSquare(double p, int df);
}

#endif
Loading

0 comments on commit 3f04b4c

Please sign in to comment.