Skip to content

Commit

Permalink
Deleted comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mnshgl0110 committed Jul 28, 2020
1 parent d10bf51 commit d2229ac
Showing 1 changed file with 0 additions and 281 deletions.
281 changes: 0 additions & 281 deletions syri/pyxFiles/tdfunc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1180,287 +1180,6 @@ cpdef getProfitableTrans(cpp_map[long, cpp_set[long]] graph, long[:] astart, lon
out.append([path[k] for k in range(<Py_ssize_t> path.size())])
return out

# cpdef getProfitableTrans(cpp_map[long, cpp_set[long]] graph, long[:] astart, long[:] aend, long[:] bstart, long[:] bend, np.ndarray achr, np.ndarray bchr, float[:] iden, long[:] alen, long[:] blen, long[:] inastart, long[:] inaend, long[:] inbstart, long[:] inbend, np.ndarray inachr, np.ndarray inbchr, long tUC, float tUP, int isinv = 0):
# """
# Input:
# 1) dictionary in which each key corresponds to trans alignment and the corresponding values are the alignments which are colinear with key.
# 2) Coordinates of the trans alignments
# 3) Coordinates of inplace blocks. Sorted separately for reference and query genome.
# 4) tUC, tUP
#
# Output:
# 1) All trans blocks (groups of alignments) with high positive score.
# """
# cdef:
# Py_ssize_t i, j, k
# long id, current
# unsigned long cnt
# long nodecnt, edgecnt, len_in = len(inastart)
# long ast, aen, bst, ben
# float ascore, bscore, agap, bgap
# long al, bl, au, bu
# float score
# float[:] weight
# float[:] dist
# long[:] pred
# long[:] topo, indegree, source, target
# long[:] n = np.array(range(len(astart)), dtype='int')
# long[:] achrint, bchrint, inachrint, inbchrint
# cpp_set[long] rem
# cpp_que[long] q, toporder
# cpp_deq[long] path, r_path
# cpp_map[long, cpp_deq[long]] nodepath
# cpp_map[long, cpp_vec[long]] almntdata
# cpp_set[long].iterator set_it, set_it2
# cpp_deq[long].reverse_iterator deq_rit
# cpp_map[long, cpp_set[long]].iterator mapit
#
#
# nodecnt = len(n)
# topo = indegree = np.zeros(nodecnt, dtype='int64')
#
# ## Check that the keys are sorted in the graph
# mapit = graph.begin()
# id = -1
# while mapit != graph.end():
# if id >= deref(mapit).first:
# print('ERROR: unsorted outOrderedBlocks')
# else:
# id = deref(mapit).first
# inc(mapit)
#
#
# ## Remove children for which another child is present between parent and itself
# mapit = graph.begin()
# while mapit != graph.end():
# rem.clear() ## List of children to be removed
# set_it = deref(mapit).second.begin()
# while set_it != deref(mapit).second.end():
# if rem.count(deref(set_it)) == 0:
# if graph.count(deref(set_it)) == 1:
# set_it2 = graph[deref(set_it)].begin()
# while set_it2 != graph[deref(set_it)].end():
# rem.insert(deref(set_it2))
# inc(set_it2)
# inc(set_it)
# set_it = rem.begin()
# while set_it != rem.end():
# deref(mapit).second.erase(deref(set_it))
# inc(set_it)
# inc(mapit)
#
#
# # Get number of edges in the graph
# edgecnt = 0
# mapit = graph.begin()
# while mapit != graph.end():
# edgecnt += <Py_ssize_t> deref(mapit).second.size()
# inc(mapit)
#
#
# # Find the topological order in which the nodes should be processed to find paths
#
# ## Get indegree for all nodes (number of nodes == number of aligments)
# mapit = graph.begin()
# while mapit != graph.end():
# set_it = deref(mapit).second.begin()
# while set_it != deref(mapit).second.end():
# indegree[deref(set_it)]+=1
# inc(set_it)
# inc(mapit)
#
#
# ## Push all nodes with indegree=0 in queue
# for i in n:
# if indegree[i] == 0:
# q.push(i)
# cnt = 0
#
# ## Get topological ordering for the nodes
# while q.size() > 0:
# id = q.front()
# q.pop()
# toporder.push(id)
# if graph.count(id) == 1:
# set_it = graph[id].begin()
# while set_it != graph[id].end():
# indegree[deref(set_it)]-=1
# if indegree[deref(set_it)]==0:
# q.push(deref(set_it))
# inc(set_it)
# cnt += 1
# if cnt != len(indegree):
# print('ERROR: Cycle found')
# if toporder.size() != len(topo):
# print('ERROR: topological ordering didnt cover all nodes')
# for i in n:
# topo[i] = toporder.front()
# toporder.pop()
#
#
# # Get order in which the edges need to be transversed
# source = np.zeros(edgecnt, dtype='int')
# target = np.zeros(edgecnt, dtype='int')
# weight = np.zeros(edgecnt, dtype='float32')
# id = 0
# for i in topo:
# if graph.count(i) == 1:
# set_it = graph[i].begin()
# while set_it != graph[i].end():
# source[id] = i
# target[id] = deref(set_it)
# weight[id] = (aend[i] + bend[i] - astart[i] - bstart[i] + 2) * iden[i]
# id+=1
# inc(set_it)
#
#
# ## Convert arary of 'string' chromosome ids to much faster numeric chromosome ids
# if list(np.unique(achr)) == list(np.unique(bchr)) == list(np.unique(inachr)) == list(np.unique(inbchr)):
# _, achrint = np.unique(achr, return_inverse = True)
# _, bchrint = np.unique(bchr, return_inverse = True)
# _, inachrint = np.unique(inachr, return_inverse = True)
# _, inbchrint = np.unique(inbchr, return_inverse = True)
# else:
# unchrs = np.unique(list(np.unique(achr)) + list(np.unique(bchr)) + list(np.unique(inachr)) + list(np.unique(inbchr)))
# unchrdict = {unchrs[i]:i for i in range(len(unchrs))}
# achrint = np.array([unchrdict[c] for c in achr], np.int)
# bchrint = np.array([unchrdict[c] for c in bchr], np.int)
# inachrint = np.array([unchrdict[c] for c in inachr], np.int)
# inbchrint = np.array([unchrdict[c] for c in inbchr], np.int)
#
#
# ## For each alignment/node calculate the number of bases which are not overlapping with the in-place blocks
# for i in n:
# ascore = 0
# bscore = 0
# ast = astart[i]
# aen = aend[i]
# bst = bstart[i]
# ben = bend[i]
#
# # print(ast, aen, bst, ben)
# for j in range(len_in):
# if achrint[i] == inachrint[j]:
# if inastart[j] > aen:
# break
# if inaend[j] < ast:
# continue
# if inastart[j] < ast:
# if inaend[j] < aen:
# ast = inaend[j]+1
# else:
# ast = aen+1
# break
# else:
# ascore += inastart[j] - ast
# if inaend[j] < aen:
# ast = inaend[j]+1
# else:
# ast = aen+1
# break
# ascore += aen - ast + 1
#
# for j in range(len_in):
# if bchrint[i] == inbchrint[j]:
# if inbstart[j] > ben:
# break
# if inbend[j] < bst:
# continue
# if inbstart[j] < bst:
# if inbend[j] < ben:
# bst = inbend[j]+1
# else:
# bst = ben+1
# break
# else:
# bscore += inbstart[j] - bst
# if inbend[j] < ben:
# bst = inbend[j]+1
# else:
# bst = ben+1
# break
# bscore += ben - bst + 1
# almntdata[i] = np.array([aend[i]-astart[i]+1, bend[i]-bstart[i]+1, ascore, bscore], dtype=np.int)
#
# out = deque()
# for i in n:
# # if i%100 == 0:
# # print(i, str(datetime.now()))
# nodepath.clear()
# pred = np.array([-1]*<Py_ssize_t>len(n), dtype = np.int)
# dist = np.array([np.float32('inf')]*<Py_ssize_t>len(n), dtype = np.float32)
# dist[i] = 0
#
# # Process vertices in topological order
# id = 0
# for j in n:
# for k in range(id, edgecnt):
# if source[k] != topo[j]:
# break
# if dist[target[k]] > dist[source[k]] + weight[k]:
# dist[target[k]] = dist[source[k]] + weight[k]
# pred[target[k]] = source[k]
# id+=1
# for j in n:
# # Find all connected paths which are profitable
# if dist[topo[j]] != float("inf"):
# current = topo[j]
# path.clear()
# while current!=i:
# if nodepath.count(current) > 0:
# deq_it = nodepath[current].rbegin()
# while deq_it != nodepath[current].rend():
# path.push_front(deref(deq_it))
# inc(deq_it)
# break
# path.push_front(current)
# current = pred[current]
# nodepath[topo[j]] = path
# path.push_front(i) ## Found the best path between two co-linear nodes
#
#
# ## Calculate score of the identified path
# ascore = float(alen[path[0]])
# bscore = float(blen[path[0]])
# agap = 0
# bgap = 0
#
# if path.size() > 1:
# for k in range(1, <Py_ssize_t> path.size()):
# ascore += float(alen[path[k]])
# bscore += float(blen[path[k]])
# agap += 0 if 0 > (astart[path[k]] - aend[path[k-1]]) else float(astart[path[k]] - aend[path[k-1]])
# if not isinv:
# bgap += 0 if 0 > (bstart[path[k]] - bend[path[k-1]]) else float(bstart[path[k]] - bend[path[k-1]])
# else:
# bgap += 0 if 0 > (bstart[path[k-1]] - bend[path[k]]) else float(bstart[path[k-1]] - bend[path[k]])
#
# score = min(((ascore - agap)/ascore),((bscore - bgap)/bscore))
#
#
# # print([path[id] for id in range(path.size())], score)
# ## Check if the alignments of the path explain sufficient unique alignments and are not excessively overlapped
#
# # Only process paths for which the alignments explain more than gaps
#
# if score > 0:
# al = 0
# bl = 0
# au = 0
# bu = 0
# for k in range(<Py_ssize_t> path.size()):
# al += almntdata[path[k]][0]
# bl += almntdata[path[k]][1]
# au += almntdata[path[k]][2]
# bu += almntdata[path[k]][3]
#
# #Trans block is selected IFF either the unique region on any genome is larger than tUC
# # or length of unique region on a genome is larger than tUP times the length of
# # the overlapping region on that genome
# if au > tUC or bu > tUC or au > tUP*al or bu > tUP*bl:
# out.append([path[k] for k in range(<Py_ssize_t> path.size())])
# return out


def blocksdata(outPlaceBlocks, inPlaceBlocks, threshold, tUC, tUP, chromo, tdgl):
logger = logging.getLogger('blocksdata.'+ chromo)
Expand Down

0 comments on commit d2229ac

Please sign in to comment.