diff --git a/LICENSE b/LICENSE index cbcf338..8ebf01b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License -Copyright © 2018 Merck Sharp & Dohme Corp. a subsidiary of Merck & Co., Inc., Kenilworth, NJ, USA." +Copyright © 2019 Merck Sharp & Dohme Corp. a subsidiary of Merck & Co., Inc., Kenilworth, NJ, USA." Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/deepbgc/pipeline/detector.py b/deepbgc/pipeline/detector.py index 664f1e4..85c57be 100644 --- a/deepbgc/pipeline/detector.py +++ b/deepbgc/pipeline/detector.py @@ -110,7 +110,7 @@ def run(self, record): for cluster_proteins in clusters: start = cluster_proteins[0].location.start end = cluster_proteins[-1].location.end - candidate_id = '{}({}-{})'.format(record.id, int(start), int(end)) + candidate_id = '{}_{}-{}.1'.format(record.id, int(start), int(end)) if self.min_nucl > 1: nucl_length = end - start diff --git a/deepbgc/util.py b/deepbgc/util.py index 37f6af3..4695ea6 100644 --- a/deepbgc/util.py +++ b/deepbgc/util.py @@ -149,8 +149,11 @@ def extract_cluster_record(cluster_feature, record): """ cluster_record = cluster_feature.extract(record) - # TODO set cluster ID - # cluster_record.id = cluster.qualifiers.get('bgc_candidate_id', ['unknown_cluster_id'])[0] + cluster_record.id = cluster_feature.qualifiers.get('bgc_candidate_id', ['unknown_cluster_id'])[0] + cluster_record.description = '' + cluster_record.annotations['source'] = record.annotations.get('source', '') + cluster_record.annotations['organism'] = record.annotations.get('organism', '') + proteins_by_id = get_proteins_by_id(get_protein_features(cluster_record)) # Remove pfams with protein not fully inside cluster borders (therefore not present in cluster_record) cluster_record.features = [f for f in cluster_record.features