Skip to content

Commit

Permalink
Merge pull request #15 from solislemuslab/feature/cluster
Browse files Browse the repository at this point in the history
conserved subgraphs
  • Loading branch information
reednel authored Sep 26, 2024
2 parents 379fcf9 + d7ddc49 commit 401fc6d
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 23 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ This utility has the form `./minaa.exe <G> <H> [-B=bio] [-a=alpha] [-b=beta]`.
- **-st=**: similarity threshold; The similarity value above which aligned pairs are included in the output.
- Require: a real number in range [0, 1].
- Default: 0.
- **-c**: conserved subgraphs; whether or not to output a list of the conserved subgraphs in the alignment between G and H.
- Require: none.
- Default: this list is not calculated or returned.
- Note: We define a conserved subgraph as a connected subgraph of G whose nodes are aligned to a connected subgraph of H.

#### Uncommon

Expand Down
4 changes: 2 additions & 2 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ Here we align network **g** with network **h** using topological information and
## Example 3

```bash
./minaa.exe examples/g.csv examples/h.csv -Galias=nonsmoker -Halias=smoker -p -t
./minaa.exe examples/g.csv examples/h.csv -Galias=nonsmoker -Halias=smoker -p -t -c
```

Output to: `nonsmoker-smoker-2024_01_16-22_05_34/`

Here we align network **g** with network **h**, where **g** is given the alias "nonsmoker", and **h** is given the alias "smoker". The timestamp option `-t` was specified, so the name of the output folder will be nonsmoker-smoker-T, where T is the date and time of execution. Additionally, because the passthrough option `-p` was specified, g.csv and h.csv will be passed through to the output folder as nonsmoker.csv and smoker.csv, respectively.
Here we align network **g** with network **h**, where **g** is given the alias "nonsmoker", and **h** is given the alias "smoker". The timestamp option `-t` was specified, so the name of the output folder will be nonsmoker-smoker-T, where T is the date and time of execution. Because the passthrough option `-p` was specified, g.csv and h.csv will be passed through to the output folder as nonsmoker.csv and smoker.csv, respectively. Finally, because the `-c` option was specified, the output folder will include the alignment's conserved subgraphs, in a file called `conserved_subgraphs.csv`.

## Example 4

Expand Down
128 changes: 128 additions & 0 deletions examples/nonsmoker-smoker-2024_01_16-23_10_14/conserved_subgraphs.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"g1","h146"

"g6","h5"

"g7","h161"

"g8","h138"
"g167","h134"

"g12","h124"

"g13","h43"
"g126","h121"

"g14","h171"

"g16","h96"

"g17","h14"
"g74","h74"
"g82","h71"
"g23","h81"
"g81","h23"
"g90","h90"
"g71","h17"
"g70","h70"
"g58","h13"

"g19","h25"

"g20","h128"
"g28","h87"

"g22","h160"

"g24","h162"

"g30","h130"

"g32","h122"
"g45","h156"
"g42","h145"

"g35","h135"
"g155","h120"
"g164","h60"
"g112","h109"
"g92","h68"
"g51","h104"

"g37","h110"

"g43","h56"

"g46","h155"

"g52","h127"

"g53","h63"
"g68","h142"

"g54","h123"

"g56","h118"

"g59","h152"

"g63","h129"

"g65","h7"

"g73","h115"

"g91","h55"

"g95","h153"

"g103","h117"

"g105","h132"

"g106","h107"

"g108","h28"

"g109","h93"

"g115","h72"

"g116","h169"

"g119","h103"

"g120","h101"

"g123","h73"

"g125","h157"

"g128","h173"
"g157","h6"

"g130","h102"

"g131","h51"

"g133","h32"

"g137","h174"

"g139","h30"

"g142","h31"

"g150","h22"

"g154","h21"

"g166","h18"

"g169","h125"

"g172","h44"

"g173","h106"

"g174","h137"

1 change: 1 addition & 0 deletions include/file_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace FileIO
void matrix_to_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>);
void alignment_to_matrix_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>, double);
void alignment_to_list_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<double>>, double);
void subgraphs_to_file(std::string, std::vector<std::string>, std::vector<std::string>, std::vector<std::vector<std::pair<unsigned, unsigned>>>);
}

#endif
1 change: 1 addition & 0 deletions include/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ namespace Util
std::vector<std::vector<double>> normalize(std::vector<std::vector<double>>);
std::vector<std::vector<double>> one_minus(std::vector<std::vector<double>>);
std::vector<std::vector<double>> combine(std::vector<std::vector<double>>, std::vector<std::vector<double>>, double);
std::vector<std::vector<std::pair<unsigned, unsigned>>> conserved_subgraphs(std::vector<std::vector<unsigned>>, std::vector<std::vector<unsigned>>, std::vector<std::vector<double>>, double);
}

#endif
69 changes: 63 additions & 6 deletions src/file_io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,8 +584,10 @@ namespace FileIO
*
* @throws std::runtime_error If the file could not be written.
*/
void matrix_to_file(std::string filepath, std::vector<std::string> g_labels, std::vector<std::string> h_labels,
std::vector<std::vector<double>> matrix)
void matrix_to_file(std::string filepath,
std::vector<std::string> g_labels,
std::vector<std::string> h_labels,
std::vector<std::vector<double>> matrix)
{
// Create and open the file
std::ofstream fout;
Expand Down Expand Up @@ -628,8 +630,11 @@ namespace FileIO
*
* @throws std::runtime_error If the file could not be written.
*/
void alignment_to_matrix_file(std::string filepath, std::vector<std::string> g_labels,
std::vector<std::string> h_labels, std::vector<std::vector<double>> alignment, double similarity_threshold)
void alignment_to_matrix_file(std::string filepath,
std::vector<std::string> g_labels,
std::vector<std::string> h_labels,
std::vector<std::vector<double>> alignment,
double similarity_threshold)
{
// Create and open the file
std::ofstream fout;
Expand Down Expand Up @@ -680,8 +685,11 @@ namespace FileIO
*
* @throws std::runtime_error If the file could not be written.
*/
void alignment_to_list_file(std::string filepath, std::vector<std::string> g_labels,
std::vector<std::string> h_labels, std::vector<std::vector<double>> alignment, double similarity_threshold)
void alignment_to_list_file(std::string filepath,
std::vector<std::string> g_labels,
std::vector<std::string> h_labels,
std::vector<std::vector<double>> alignment,
double similarity_threshold)
{
// Convert the alignment matrix into a list
std::vector<std::array<double, 3>> list;
Expand Down Expand Up @@ -730,4 +738,53 @@ namespace FileIO
fout.close();
}

/**
* Write the given conserved subgraphs to a csv file.
*
* @param filepath The path to the output file.
* @param g_labels Labels for the G graph.
* @param h_labels Labels for the H graph.
* @param subgraphs The subgraphs to write to the file.
*
* @throws std::runtime_error If the file could not be written.
*/
void subgraphs_to_file(std::string filepath,
std::vector<std::string> g_labels,
std::vector<std::string> h_labels,
std::vector<std::vector<std::pair<unsigned, unsigned>>> subgraphs)
{
// Create and open the file
std::ofstream fout;
fout.exceptions(std::ofstream::badbit);
try
{
fout.open(filepath);
}
catch (const std::ofstream::failure &e)
{
throw std::runtime_error("Unable to open file " + filepath);
}

// Write the subgraphs to the file
for (const auto& subgraph : subgraphs)
{
for (const auto& pair : subgraph)
{
unsigned g_index = pair.first;
unsigned h_index = pair.second;

// Ensure the indices are within the bounds of the labels
if (g_index >= g_labels.size() || h_index >= h_labels.size())
{
throw std::runtime_error("Index out of bounds for labels.");
}

fout << g_labels[g_index] << "," << h_labels[h_index] << std::endl;
}
fout << std::endl;
}

fout.close();
}

}
21 changes: 17 additions & 4 deletions src/minaa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,11 @@ int main(int argc, char *argv[])
auto g_alias = args[7]; // graph G alias
auto h_alias = args[8]; // graph H alias
auto bio_alias = args[9]; // biological data alias
auto do_passthrough = (args[10] == "1"); // do a passthrough of input files?
auto do_timestamp = (args[11] == "1"); // include a timestamp in the directory name?
auto do_greekstamp = (args[12] == "1"); // include a greekstamp in the directory name?
auto do_similarity_conversion = (args[13] == "1"); // convert biological similarity to costs?
auto do_subgraph_id = (args[10] == "1"); // do conserved subgraph identification?
auto do_passthrough = (args[11] == "1"); // do a passthrough of input files?
auto do_timestamp = (args[12] == "1"); // include a timestamp in the directory name?
auto do_greekstamp = (args[13] == "1"); // include a greekstamp in the directory name?
auto do_similarity_conversion = (args[14] == "1"); // convert biological similarity to costs?
auto do_bio = (bio_file != ""); // biological data file provided?

const auto BASE_PATH = "alignments";
Expand All @@ -58,6 +59,7 @@ int main(int argc, char *argv[])
const auto OVERALL_COSTS_FILENAME = "overall_costs.csv";
const auto ALIGNMENT_MATRIX_FILENAME = "alignment_matrix.csv";
const auto ALIGNMENT_LIST_FILENAME = "alignment_list.csv";
const auto CONSERVED_SUBGRAPHS_FILENAME = "conserved_subgraphs.csv";

// Generate output names
auto g_name = FileIO::name_file(g_file, g_alias);
Expand Down Expand Up @@ -235,6 +237,17 @@ int main(int argc, char *argv[])
auto d51 = std::chrono::duration_cast<std::chrono::milliseconds>(f51 - s51).count();
FileIO::out(log, "done. (" + std::to_string(d51) + "ms)\n");

if (do_subgraph_id) {
// Conserved subgraph identification
FileIO::out(log, "Identifying conserved subgraphs................");
auto s60 = std::chrono::high_resolution_clock::now();
auto subgraphs = Util::conserved_subgraphs(g_graph, h_graph, alignment, similarity_threshold);
FileIO::subgraphs_to_file(directory + CONSERVED_SUBGRAPHS_FILENAME, g_labels, h_labels, subgraphs);
auto f60 = std::chrono::high_resolution_clock::now();
auto d60 = std::chrono::duration_cast<std::chrono::milliseconds>(f60 - s60).count();
FileIO::out(log, "done. (" + std::to_string(d60) + "ms)\n");
}

auto f = std::chrono::high_resolution_clock::now();
auto d = std::chrono::duration_cast<std::chrono::milliseconds>(f - s).count();
FileIO::out(log, "ALIGNMENT COMPLETED (" + std::to_string(d) + "ms)\n");
Expand Down
Loading

0 comments on commit 401fc6d

Please sign in to comment.