Skip to content

Commit

Permalink
implement similarity threshold
Browse files Browse the repository at this point in the history
  • Loading branch information
reednel committed Jan 16, 2024
1 parent e7fb972 commit a31103b
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 30 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ This utility has the form `./minaa.exe <G> <H> [-B=bio] [-a=alpha] [-b=beta]`.
- **-b=**: beta; the topological-biological cost matrix balancer.
- Require: a real number in range [0, 1].
- Default: 1 (100% topological data).
- **-st=**: similarity threshold; The similarity value above which aligned pairs are included in the output.
- Require: a real number in range [0, 1].
- Default: 0.

#### Uncommon

Expand Down Expand Up @@ -105,9 +108,9 @@ This utility has the form `./minaa.exe <G> <H> [-B=bio] [-a=alpha] [-b=beta]`.

Here we align network0 with network1 using no biological data. `-a=0.6` sets alpha equal to 0.6, meaning 60% of the topological cost function comes from similarity calculated by GDVs, and 40% from simpler node degree data.

`./minaa.exe network0.csv network1.csv -B=bio_costs.csv -b=0.85`
`./minaa.exe network0.csv network1.csv -B=bio_costs.csv -b=0.85 -st=0.5`

Here we align network0 with network1 using topological information and the given biological cost matrix, bio_costs. Since alpha was unspecified, it defaults to 1. Since beta was set to 0.85, 85% of the cost weight is from the calculated topological cost matrix, and 15% is from the given biological cost matrix.
Here we align network0 with network1 using topological information and the given biological cost matrix, bio_costs. Since alpha was unspecified, it defaults to 1. Since beta was set to 0.85, 85% of the cost weight is from the calculated topological cost matrix, and 15% is from the given biological cost matrix. Since the similarity threshold was set to 0.5, any aligned pair with similarity score less than or equal to 0.5 is excluded from the alignment results.

`./minaa.exe network0.csv network1.csv -Galias=control -Halias=treatment -p -t`

Expand Down
4 changes: 2 additions & 2 deletions include/file_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ namespace FileIO
void graph_to_file(std::string, std::vector<std::string>, std::vector<std::vector<unsigned>>);
void gdvs_to_file(std::string, std::vector<std::string>, std::vector<std::vector<unsigned>>);
void matrix_to_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>);
void alignment_to_matrix_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>);
void alignment_to_list_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>);
void alignment_to_matrix_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>, double);
void alignment_to_list_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>, double);
}

#endif
12 changes: 7 additions & 5 deletions src/file_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -617,17 +617,18 @@ namespace FileIO
}

/**
* Write the given alignment to a csv file as a matrix. // COPILOTTED
* Write the given alignment to a csv file as a matrix.
*
* @param filepath The path to the output file.
* @param g_labels Labels for the G graph.
* @param h_labels Labels for the H graph.
* @param alignment The alignment matrix to write to the file.
* @param similarity_threshold The similarity threshold above which alignments are included in the output.
*
* @throws std::runtime_error If the file could not be written.
*/
void alignment_to_matrix_file(std::string filepath, std::vector<std::string> g_labels,
std::vector<std::string> h_labels, std::vector<std::vector<double>> alignment)
std::vector<std::string> h_labels, std::vector<std::vector<double>> alignment, double similarity_threshold)
{
// Create and open the file
std::ofstream fout;
Expand All @@ -652,7 +653,7 @@ namespace FileIO
fout << std::endl << g_labels[i];
for (unsigned j = 0; j < alignment[0].size(); ++j)
{
if (alignment[i][j] >= 0)
if (alignment[i][j] > similarity_threshold)
{
fout << "," << alignment[i][j];
}
Expand All @@ -673,11 +674,12 @@ namespace FileIO
* @param g_labels Labels for the G graph.
* @param h_labels Labels for the H graph.
* @param alignment The alignment matrix to write to the file.
* @param similarity_threshold The similarity threshold above which alignments are included in the output.
*
* @throws std::runtime_error If the file could not be written.
*/
void alignment_to_list_file(std::string filepath, std::vector<std::string> g_labels,
std::vector<std::string> h_labels, std::vector<std::vector<double>> alignment)
std::vector<std::string> h_labels, std::vector<std::vector<double>> alignment, double similarity_threshold)
{
// Convert the alignment matrix into a list
std::vector<std::array<double, 3>> list;
Expand All @@ -686,7 +688,7 @@ namespace FileIO
{
for (unsigned j = 0; j < alignment[0].size(); ++j)
{
if (alignment[i][j] >= 0)
if (alignment[i][j] > similarity_threshold)
{
net_cost += (1 - alignment[i][j]);
list.push_back({(double)i, (double)j, alignment[i][j]});
Expand Down
19 changes: 10 additions & 9 deletions src/minaa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ int main(int argc, char* argv[])
auto bio_file = args[3]; // biological data file
auto alpha = std::stod(args[4]); // GDV - edge weight balancer
auto beta = std::stod(args[5]); // topological - biological balancer
auto g_alias = args[6]; // graph G alias
auto h_alias = args[7]; // graph H alias
auto bio_alias = args[8]; // biological data alias
auto do_passthrough = (args[9] == "1"); // do a passthrough of input files?
auto do_timestamp = (args[10] == "1"); // include a timestamp in the directory name?
auto do_greekstamp = (args[11] == "1"); // include a greekstamp in the directory name?
auto do_similarity_conversion = (args[12] == "1"); // convert biological similarity to costs?
auto similarity_threshold = std::stod(args[6]); // similarity threshold above which alignments report
auto g_alias = args[7]; // graph G alias
auto h_alias = args[8]; // graph H alias
auto bio_alias = args[9]; // biological data alias
auto do_passthrough = (args[10] == "1"); // do a passthrough of input files?
auto do_timestamp = (args[11] == "1"); // include a timestamp in the directory name?
auto do_greekstamp = (args[12] == "1"); // include a greekstamp in the directory name?
auto do_similarity_conversion = (args[13] == "1"); // convert biological similarity to costs?
auto do_bio = (bio_file != ""); // biological data file provided?

const auto BASE_PATH = "alignments";
Expand Down Expand Up @@ -227,8 +228,8 @@ int main(int argc, char* argv[])
// Write the alignment to csv files
FileIO::out(log, "Writing the alignment to file..................");
auto s51 = std::chrono::high_resolution_clock::now();
FileIO::alignment_to_matrix_file(directory + ALIGNMENT_MATRIX_FILENAME, g_labels, h_labels, alignment);
FileIO::alignment_to_list_file(directory + ALIGNMENT_LIST_FILENAME, g_labels, h_labels, alignment);
FileIO::alignment_to_matrix_file(directory + ALIGNMENT_MATRIX_FILENAME, g_labels, h_labels, alignment, similarity_threshold);
FileIO::alignment_to_list_file(directory + ALIGNMENT_LIST_FILENAME, g_labels, h_labels, alignment, similarity_threshold);
auto f51 = std::chrono::high_resolution_clock::now();
auto d51 = std::chrono::duration_cast<std::chrono::milliseconds>(f51-s51).count();
FileIO::out(log, "done. (" + std::to_string(d51) + "ms)\n");
Expand Down
32 changes: 20 additions & 12 deletions src/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ namespace Util
*/
std::vector<std::string> parse_args(int argc, char* argv[])
{
std::vector<std::string> args = {"", "", "", "", "1", "1", "", "", "", "0", "0", "0", "0"};
std::vector<std::string> args = {"", "", "", "", "1", "1", "0", "", "", "", "0", "0", "0", "0"};

if (argc < 3 || argc > 13)
if (argc < 3 || argc > 14)
{
throw std::invalid_argument("Invalid number of arguments.\nUsage: ./minaaa.exe <G.csv> <H.csv> \nSee README.md for additional options and details.");
}
Expand Down Expand Up @@ -115,45 +115,53 @@ namespace Util
throw std::invalid_argument("The beta argument must be in range [0, 1].");
}
}
else if (arg.find("-st=") != std::string::npos)
{
args[6] = arg.substr(4);
if (std::stod(args[6]) < 0 || std::stod(args[6]) > 1)
{
throw std::invalid_argument("The similarity threshold argument must be in range [0, 1].");
}
}
else if (arg.find("-Galias=") != std::string::npos)
{
args[6] = arg.substr(8);
if (!FileIO::is_valid_filename(args[6]))
args[7] = arg.substr(8);
if (!FileIO::is_valid_filename(args[7]))
{
throw std::invalid_argument("The G alias contains an illegal character.");
}
}
else if (arg.find("-Halias=") != std::string::npos)
{
args[7] = arg.substr(8);
if (!FileIO::is_valid_filename(args[7]))
args[8] = arg.substr(8);
if (!FileIO::is_valid_filename(args[8]))
{
throw std::invalid_argument("The H alias contains an illegal character.");
}
}
else if (arg.find("-Balias=") != std::string::npos)
{
args[8] = arg.substr(8);
if (!FileIO::is_valid_filename(args[8]))
args[9] = arg.substr(8);
if (!FileIO::is_valid_filename(args[9]))
{
throw std::invalid_argument("The B alias contains an illegal character.");
}
}
else if (arg.find("-p") != std::string::npos)
{
args[9] = "1";
args[10] = "1";
}
else if (arg.find("-t") != std::string::npos)
{
args[10] = "1";
args[11] = "1";
}
else if (arg.find("-g") != std::string::npos)
{
args[11] = "1";
args[12] = "1";
}
else if (arg.find("-s") != std::string::npos)
{
args[12] = "1";
args[13] = "1";
}
else
{
Expand Down

0 comments on commit a31103b

Please sign in to comment.