Skip to content

Commit

Permalink
Merge branch 'StrGenotyping'
Browse files Browse the repository at this point in the history
  • Loading branch information
edolzhenko committed May 31, 2017
2 parents 3f82452 + 8b83fea commit 8656ced
Show file tree
Hide file tree
Showing 55 changed files with 15,875 additions and 1,882 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.DS_Store
.idea
cmake-build-debug
test-example
build
30 changes: 25 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,28 @@ enable_testing()

include(ExternalProject)

option(BUILD_TESTS "Controls if unit tests are build" OFF)
if(BUILD_TESTS)
include(google_test)
######################### Google Test ############################
# Download and unpack googletest at configure time
configure_file(cmake/google_test.cmake googletest-download/CMakeLists.txt)
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download )
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()

# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src
${CMAKE_BINARY_DIR}/googletest-build)
##################################################################


ExternalProject_Add(zlib
PREFIX ${CMAKE_BINARY_DIR}/thirdparty/zlib
Expand Down Expand Up @@ -49,15 +67,17 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${Boost_INCLUDE_DIR})
include_directories(${CMAKE_BINARY_DIR}/thirdparty/htslib/include)

add_subdirectory(genotyping)
add_subdirectory(purity)
add_subdirectory(rep_align)
add_subdirectory(common)

file(GLOB SOURCES "src/*.cc")

add_executable(ExpansionHunter ${SOURCES})
target_compile_features(ExpansionHunter PRIVATE cxx_range_for)

add_dependencies(htslib zlib)
add_dependencies(ExpansionHunter htslib)
add_dependencies(common htslib)

target_link_libraries(ExpansionHunter purity rep_align pthread ${htslib_static} ${zlib_static} ${Boost_LIBRARIES})
target_link_libraries(ExpansionHunter common genotyping purity rep_align pthread ${htslib_static} ${zlib_static} ${Boost_LIBRARIES})
21 changes: 13 additions & 8 deletions cmake/google_test.cmake
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
set(GTEST_DIR "../../software/googletest/" CACHE PATH "Google Test path.")
cmake_minimum_required(VERSION 2.8.2)

add_subdirectory(${GTEST_DIR} ${CMAKE_BINARY_DIR}/gtest)
include_directories(SYSTEM ${GTEST_DIR}/googlemock/include/ ${GTEST_DIR}/googletest/include/)
project(googletest-download NONE)

function(add_google_test target)
add_executable(${target} ${ARGN})
target_link_libraries(${target} gmock_main)
add_test(${target} ${target})
endfunction()
include(ExternalProject)
ExternalProject_Add(googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG master
SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
3 changes: 3 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
file(GLOB sources *.cc)
add_library(common ${sources})
target_link_libraries(common ${Boost_LIBRARIES})
103 changes: 103 additions & 0 deletions common/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//
// Expansion Hunter
// Copyright (c) 2016 Illumina, Inc.
//
// Author: Egor Dolzhenko <[email protected]>,
// Mitch Bekritsky <[email protected]>, Richard Shaw
// Concept: Michael Eberle <[email protected]>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#pragma once

#include <map>
#include <ostream>
#include <string>

enum class ReadType { kSpanning, kFlanking, kInrepeat, kOther };
const std::map<ReadType, std::string> kReadTypeToString = {
{ReadType::kInrepeat, "INREPEAT"},
{ReadType::kSpanning, "SPANNING"},
{ReadType::kFlanking, "FLANKING"},
{ReadType::kOther, "OTHER"}};

struct Read {
std::string name;
std::string bases;
std::string quals;
};

class AlleleSupport {
public:
AlleleSupport() : num_spanning_(0), num_flanking_(0), num_inrepeat_(0) {}
AlleleSupport(int num_spanning, int num_flanking, int num_inrepeat)
: num_spanning_(num_spanning), num_flanking_(num_flanking),
num_inrepeat_(num_inrepeat) {}

int num_spanning() const { return num_spanning_; }
int num_flanking() const { return num_flanking_; }
int num_inrepeat() const { return num_inrepeat_; }

void set_num_spanning(int num_spanning) { num_spanning_ = num_spanning; }
void set_num_flanking(int num_flanking) { num_flanking_ = num_flanking; }
void set_num_inrepeat(int num_inrepeat) { num_inrepeat_ = num_inrepeat; }

std::string ToString() const {
return std::to_string(num_spanning_) + "-" + std::to_string(num_flanking_) +
"-" + std::to_string(num_inrepeat_);
}

bool operator==(const AlleleSupport &rhs) const {
return num_spanning_ == rhs.num_spanning_ &&
num_flanking_ == rhs.num_flanking_ &&
num_inrepeat_ == rhs.num_inrepeat_;
}

private:
int num_spanning_;
int num_flanking_;
int num_inrepeat_;
};

struct Interval {
Interval() : lower_bound_(-1), upper_bound_(-1) {}
int lower_bound_;
int upper_bound_;
bool operator==(const Interval &rhs) const {
return lower_bound_ == rhs.lower_bound_ && upper_bound_ == rhs.upper_bound_;
}
std::string ToString() const {
return std::to_string(lower_bound_) + "-" + std::to_string(upper_bound_);
}
};

struct RepeatAllele {
RepeatAllele(int size, int num_supporting_reads, ReadType type)
: size_(size), num_supporting_reads_(num_supporting_reads), type_(type) {}
RepeatAllele(int size, ReadType type, AlleleSupport support)
: size_(size), type_(type), num_supporting_reads_(-1), support_(support) {}
bool operator==(const RepeatAllele &rhs) const {
return size_ == rhs.size_ && ci_ == rhs.ci_ && support_ == rhs.support_ &&
type_ == rhs.type_ &&
num_supporting_reads_ == rhs.num_supporting_reads_;
}
int size_;
Interval ci_;
AlleleSupport support_; // TODO: Rename to "consistent".
int num_supporting_reads_;
ReadType type_;
};

typedef std::vector<RepeatAllele> RepeatGenotype;
68 changes: 26 additions & 42 deletions src/genomic_region.cc → common/genomic_region.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,40 +20,36 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#include "include/genomic_region.h"
#include "common/genomic_region.h"

#include <boost/algorithm/string/split.hpp>
using boost::algorithm::split;
#include <boost/algorithm/string/classification.hpp>
using boost::algorithm::is_any_of;
#include <boost/lexical_cast.hpp>
using boost::lexical_cast;

#include <vector>
using std::vector;
#include <string>
using std::string;
#include <iostream>
using std::endl;
using std::cerr;
#include <sstream>
using std::ostream;
using std::istream;
#include <stdexcept>

/*****************************************************************************/
using boost::algorithm::split;
using boost::algorithm::is_any_of;
using boost::lexical_cast;

Region::Region() : chrom_("chr0"), start_(0), end_(0) {}
using std::vector;
using std::ostream;
using std::istream;
using std::endl;
using std::cerr;
using std::string;

/*****************************************************************************/
Region::Region() : chrom_("chr0"), start_(0), end_(0) {}

Region::Region(const string& chrom, size_t start, size_t end,
const string& label)
Region::Region(const string &chrom, int64_t start, int64_t end,
const string &label)
: chrom_(chrom), start_(start), end_(end), label_(label) {}

/*****************************************************************************/

Region::Region(const string& encoding, const string& label) : label_(label) {
Region::Region(const string &encoding, const string &label) : label_(label) {
vector<string> components;
split(components, encoding, is_any_of(":-"));

Expand All @@ -62,13 +58,11 @@ Region::Region(const string& encoding, const string& label) : label_(label) {
}

chrom_ = components[0];
start_ = lexical_cast<size_t>(components[1]);
end_ = lexical_cast<size_t>(components[2]);
start_ = lexical_cast<int64_t>(components[1]);
end_ = lexical_cast<int64_t>(components[2]);
}

/*****************************************************************************/

bool Region::operator<(const Region& other_region) const {
bool Region::operator<(const Region &other_region) const {
if (chrom_ != other_region.chrom_) {
return chrom_ < other_region.chrom_;
}
Expand All @@ -80,54 +74,44 @@ bool Region::operator<(const Region& other_region) const {
return end_ < other_region.end_;
}

/*****************************************************************************/

bool Region::Overlaps(const Region& other_region) const {
bool Region::Overlaps(const Region &other_region) const {
if (chrom_ != other_region.chrom_) {
return false;
}

const size_t left_bound =
const int64_t left_bound =
start_ > other_region.start_ ? start_ : other_region.start_;
const size_t right_bound =
const int64_t right_bound =
end_ < other_region.end_ ? end_ : other_region.end_;

return left_bound <= right_bound;
}

/*****************************************************************************/

// Returns the range extended by flankSize upstream and downstream.
// NOTE: The right boundary of the extended region may stick past chromosome
// end.
Region Region::Extend(size_t extension_len) const {
const size_t new_start =
Region Region::Extend(int extension_len) const {
const int64_t new_start =
start_ > extension_len ? (start_ - extension_len) : 1;
const size_t new_end = end_ + extension_len;
const int64_t new_end = end_ + extension_len;
return Region(chrom_, new_start, new_end);
}

/*****************************************************************************/

const string Region::AsString() const {
const string Region::ToString() const {
std::ostringstream ostrm;
ostrm << *this;
return ostrm.str();
}

/*****************************************************************************/

istream& operator>>(istream& istrm, Region& region) {
istream &operator>>(istream &istrm, Region &region) {
string encoding;
istrm >> encoding;
region = Region(encoding);

return istrm;
}

/*****************************************************************************/

ostream& operator<<(ostream& ostrm, const Region& region) {
ostream &operator<<(ostream &ostrm, const Region &region) {
ostrm << region.chrom_ << ':' << region.start_;

if (region.end_ != region.start_) {
Expand Down
65 changes: 65 additions & 0 deletions common/genomic_region.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
//
// Expansion Hunter
// Copyright (c) 2016 Illumina, Inc.
//
// Author: Egor Dolzhenko <[email protected]>,
// Mitch Bekritsky <[email protected]>, Richard Shaw
// Concept: Michael Eberle <[email protected]>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//

#pragma once

#include <iostream>
#include <string>

class Region {
public:
friend std::istream &operator>>(std::istream &istrm, Region &region);
friend std::ostream &operator<<(std::ostream &ostrm, const Region &region);

Region();
Region(const std::string &chrom, int64_t start, int64_t end,
const std::string &labelStr = std::string());
Region(const std::string &rangeStr,
const std::string &labelStr = std::string());

bool is_set() const { return (chrom_ != "chr0"); }
bool operator<(const Region &other_region) const;

bool Overlaps(const Region &other_region) const;

Region Extend(int extension_len) const;

const std::string &chrom() const { return chrom_; }
const int64_t start() const { return start_; }
const int64_t end() const { return end_; }
const std::string &label() const { return label_; }

void set_start(int64_t start) { start_ = start; }
void set_end(int64_t end) { end_ = end; }
void set_label(const std::string &label) { label_ = label; }

const std::string ToString() const;

private:
std::string chrom_;
int64_t start_;
int64_t end_;
std::string label_;
};

std::istream &operator>>(std::istream &istrm, Region &region);
std::ostream &operator<<(std::ostream &ostrm, const Region &region);
Loading

0 comments on commit 8656ced

Please sign in to comment.