diff --git a/CircDCJ.py b/CircDCJ.py new file mode 100644 index 0000000..df27e8b --- /dev/null +++ b/CircDCJ.py @@ -0,0 +1,136 @@ +import os +import sys +from DCJ import DCJ +from random import choice, randint +from copy import deepcopy + +class CircDCJ(DCJ): + #def __init__ (self, s): + #g = DCJ(s) + #if not g.circular(): + #print "Warning:", g, "not circular" + #self = self.circularize(g) + + def median (self, g1, g2, g3, errors=0): # pomocou median solvera + """ + Use BIOMedian solver to find median of g1, g2, and g3. + + BIOMedian solves the median problem for multicircular DCJ model, + however, the input genomes have to be circular. + """ + if not (g1.circular() and g2.circular() and g3.circular()): + print g1 + print g2 + print g3 + raise TypeError("The input genomes are not circular") + r = str(randint(0,99999)) + inf = "input" + r + outf = "input" + r + ".rst" + f = open (inf, 'w') + f.write('> Genome 1\n') + f.write('C: ' + g1.__str__()[:-2] + '\n') + f.write('> Genome 2\n') + f.write('C: ' + g2.__str__()[:-2] + '\n') + f.write('> Genome 3\n') + f.write('C: ' + g3.__str__()[:-2] + '\n') + f.close() + #print g1 + #print g2 + #print g3 + os.system('java -cp ms/BIO BIOMedian '+inf+' >> /dev/null') + f = open (outf, 'r') + s = '' + for line in f: + if line[0] == '>': continue + if line[0] == '#': break + pos = line.find(':') + s += line[pos + 1:] + ' @ ' + f.close() + os.system('rm '+inf) + os.system('rm '+outf) + print '.', + M = DCJ(s) + if self.n != M.n: + if errors == 3: + #TODO: najlepsie odchytit exception a vypisat historiu + #sys.exit() + return [g1, g2, g3] + else: + print "BIO ERROR" + return self.median_solver(g1, g2, g3, errors+1) + r = self.circularize(M) + return [r,] + + def circularize (self, g): + """ + Returns a set of circular genomes close to the genome. + """ + L, C = g.numch() + if L == 0 and C == 1: return CircDCJ(g._g) + while L > 0: + h = g.rand_neigh() + while h.numlin() >= L: + h = g.rand_neigh() + g = h + L = g.numlin() + while C > 1: + h = g.rand_neigh() + while h.numcirc() >= C: + h = g.rand_neigh() + g = h + C = g.numcirc() + return CircDCJ(h._g) + + def circularize2 (self): + """ + Returns a set of circular genomes close to the genome. + """ + print self + L, C = self.numch() + if L == 0 and C == 1: return # [self, ] + while L > 0: + h = self.rand_neigh() + while h.numlin() >= L: + h = self.rand_neigh() + self = h + L = self.numlin() + print self + while C > 1: + h = self.rand_neigh() + while h.numcirc() >= C: + h = self.rand_neigh() + self = h + C = self.numcirc() + print self + print "vysledok:", self + #return [CircDCJ(h._self)] + + def rand_neigh(self): + g = DCJ(self._g) + h = g.rand_neigh() + #while not h.circular(): + #h = g.rand_neigh() + return CircDCJ(h._g) + + def neigh(self): + c = [] + for i in xrange(0, self.n): + if self._g[i] > i: + for j in xrange(i + 1, self.n): + if self._g[j] > j: + u = deepcopy(self) + u.swap (i, self._g[i], j, self._g[j]) + if u.circular(): + c.append(u) + return c + + def betterer_neigh (self, z): + g = DCJ(self._g) + z = [DCJ(g._g) for g in z] + neigh = g.betterer_neigh(z) + if len(neigh) == 0: + print '-----' + print z[0] + print z[1] + print z[2] + return [self.circularize(h) for h in neigh] diff --git a/CircRev.py b/CircRev.py new file mode 100644 index 0000000..3eed2a0 --- /dev/null +++ b/CircRev.py @@ -0,0 +1,74 @@ +from LinRev import LinRev +from DCJ import DCJ +import os + +class CircRev(LinRev): + def __init__ (self, s): + if isinstance(s, str): + self.set (s) + else: + self.n = len(s) + self._g = s + + def set (self, s): + s = s.split() + if s[-1] != '@': + raise TypeError("The input genome is not circular") + s.pop() + s = [int(x) for x in s] + n = len(s) + for i in xrange(0,n): + if s[i] == n or s[i] == -n: + break + if s[i] == n: + s = s[i+1:n] + s[0:i] + else: + l1, l2 = s[0:i], s[i+1:n] + l1.reverse() + l2.reverse() + s = l1 + l2 + s = [-x for x in s] + #print s + self.n = n-1 + self.g, self.s, check = [0], [True], (self.n + 1) * [False] + for x in s: + self.s.append(x > 0) + x = abs(x) + if not (0 < x < n): print 'mimo' + if check[x]: print 'uz bolo...' + check[x] = True + self.g.append(x) + self.g.append (self.n + 1) + self.s.append (True) + + def __len__ (self): return self.n+1 + + def __str__ (self): + return self.toString() + ' @' + + def toString (self): + return " ".join([('' if self.s[i] else '-') + str(self.g[i]) for i in xrange(1, self.n+1)]) + ' ' + str(self.n+1) + + def numch (self): + return (1, 0) + + def dist2 (self, B): + f = open ('input', 'w') + print >>f, '> g' + print >>f, self.toString() + print >>f, '> h' + print >>f, B.toString() + f.close() + os.system('rm output') + os.system('./grimm -f input -o output -Cd') + f = open ('output', 'r') + for l in f: + if l[:18] == 'Reversal Distance:': + f.close() + return int(l[19:]) + raise Error("grimm") + + def better_neigh (self, z): + g = DCJ(self.g) + z = [DCJ(g.__str__()) for g in z] + return [CircRev(h.__str__()) for h in g.betterer_neigh(z) if h.circular()] diff --git a/DCJ.py b/DCJ.py new file mode 100644 index 0000000..e87a1eb --- /dev/null +++ b/DCJ.py @@ -0,0 +1,522 @@ +from Genome import Genome +from copy import deepcopy, copy +from random import randint, choice +import os +import sys + +class DCJ(Genome): + def __init__ (self, s): + if isinstance(s, str): + self.set (s) + else: + self.n = len(s) + self._g = s + + def set (self, s): + """ + Set the genome to the genome described in string s. + """ + s = s.split() + self.n = 2 * (len(s) - s.count("$") - s.count("@")) + self._g = self.n * [0] + first, last = -1, -1 + emptychr = True + for x in s: + if x == '$': + if not emptychr: self._g[last], first, last, emptychr = last, -1, -1, True + elif x == '@': + if not emptychr: self._g[last], self._g[first], first, last, emptychr = first, last, -1, -1, True + else: + x = int(x) + if x > 0: + x = 2 * (x - 1) + y = x + 1 + else: + x = -2 * x - 1 + y = x - 1 + if first == -1: first, last = x, x + self._g[last], self._g[x] = x, last + last, emptychr = y, False +# print self._g + + def __len__ (self): + return self.n/2 + + def _compstr (self, v, i): + """ + Auxilliary function creating the string description of the genome. + """ + j = i + s = "" + while not v[j]: + v[j] = True + if j % 2 == 0: + s = s + str(j / 2 + 1) + " " + v[j + 1] = True + j = self._g[j + 1] + else: + s = s + str(-(j / 2) - 1) + " " + v[j - 1] = True + j = self._g[j - 1] + return s + + def __str__ (self): + v = self.n * [False] + s = "" + for i in xrange(0, self.n): + if not v[i] and self._g[i] == i: + s = s + self._compstr (v, i) + "$ " + for i in xrange(0, self.n): + if not v[i]: + s = s + self._compstr (v, i) + "@ " + return s + + def _mark (self, v, i): + j = i + while not v[j]: + v[j] = True + if j % 2 == 0: j += 1 + else: j -= 1 + v[j] = True + j = self._g[j] + + def numch (self): + """ + Compute the number of chromosomes. + + returns (L, C) where L is the number of linear and C the number of circular chromosomes. + """ + v = self.n * [False] + s = "" + L, C = 0, 0 + for i in xrange(0, self.n): + if not v[i] and self._g[i] == i: + self._mark (v, i) + L += 1 + for i in xrange(0, self.n): + if not v[i]: + self._mark (v, i) + C += 1 + return L, C + + def numlin (self): + """ + Returns the number of linear chromosomes + """ + L, C = self.numch() + return L + + def numcirc (self): + """ + Returns the number of circular chromosomes + """ + L, C = self.numch() + return C + + def _comp_size (self, g, v, i, p): + j = i + while not v[j]: + v[j] = 1 + # print j, + if p == 0: j = g[j]; p = 1 + else: j = self._g[j]; p = 0 + return p + + def dist (self, B): + """ + Compute the DCJ distance from genome B. + """ + if self.n != B.n: + raise TypeError ("Computing distance of genomes with different number of markers.") + #print 'BUUG, rozne dlzky', self.n, B.n; print self; print B + op, c = 0, 0 + v = self.n * [False] + for i in xrange(0, self.n): + if not v[i] and self._g[i] == i: + # print "("+str(comp_size (A, B, v, i, 0))+")" + op = op + self._comp_size (B._g, v, i, 0) + elif not v[i] and B._g[i] == i: + # print "("+str(1-comp_size (A, B, v, i, 1))+")" + op = op + 1 - self._comp_size (B._g, v, i, 1) + for i in xrange(0, self.n): + if not v[i]: + # print "("+str(comp_size (A, B, v, i, 1))+")" + c = c + self._comp_size (B._g, v, i, 1) + # print n/2, c, op + return self.n / 2 - c - op / 2 + + def swap (self, p, q, p2, q2): + if p == q and p2 == q2: # {p} {p2} --> {p,p2} + self._g[p], self._g[p2] = p2, p + elif p == q: # {p} {p2,q2} --> {p,p2}, {q2} + self._g[p], self._g[p2], self._g[q2] = p2, p, q2 + elif p2 == q2: # {p,q} {p2} --> {p,p2}, {q} + self._g[p], self._g[p2], self._g[q] = p2, p, q + else: # {p,q} {p2,q2} --> {p,p2} {q,q2} + self._g[p], self._g[p2], self._g[q], self._g[q2] = p2, p, q2, q + + def unswap (self, p, q, p2, q2): + # nefunguje pre {p,q} {pp} + if p == q and p2 == q2: # {p} {p2} <-- {p,p2} + self._g[p], self._g[p2] == p, p2 + elif p == q: # {p} {p2,q2} <-- {p,p2}, {q2} + self._g[p], self._g[p2], self._g[q2] = p, q2, p2 + elif p2 == q2: # {p,q} {p2} <-- {p,p2}, {q} + self._g[p], self._g[p2], self._g[q] = q, p2, p + else: # {p,q} {p2,q2} --> {p,p2} {q,q2} + self._g[p], self._g[p2], self._g[q], self._g[q2] = q, q2, p, p2 + + def linear (self): + """ + Is the genome linear? + """ + L, C = self.numch() + return (C == 0) and (L == 1) + + def multilin (self): + """ + Is the genome multilinear? + """ + L, C = self.numch() + return (C == 0) + + def circular (self): + """ + Is the genome circular? + """ + L, C = self.numch() + return (L == 0) and (C == 1) + + def multicirc (self): + """ + Is the genome multicircular? + """ + L, C = self.numch() + return (L == 0) + + def tempcirc (self): + """ + Does the genome contain only one (temporary) circular chromosome? + """ + L, C = self.numch() + return (C <= 1) + + def unitempcirc (self): + """ + Is the genome linear, with possibly one (temporary) circular chromosome? + """ + L, C = self.numch() + return (L == 1) and (C <= 1) + + def linearize2 (self): + """ + Returns a set of multilinear genomes close to the genome. + """ + L, C = self.numch() + if C == 0: return [self, ] + g = DCJ(self._g) + while C > 1: g = choice ([h for h in g.neigh() if h.numcirc() < C]); C = g.numcirc() + for h in g.neigh(): + if h.multilin(): print h + return [h for h in g.neigh() if h.multilin()] + + def neigh (self): + """ + List all genomes within one DCJ operation from the genome. + + The list has size O(n^2). + """ + c = [] + for i in xrange(0, self.n): + if self._g[i] >= i: + for j in xrange(i + 1, self.n): + if self._g[j] >= j and j != self._g[i]: + u = deepcopy(self) + u.swap (i, self._g[i], j, self._g[j]) + c.append(u) + if self._g[i] != i: + j = self._g[i] + u = deepcopy(self) + u.swap (i, j, i, j) + c.append(u) + return c + + def neigh_part (self, i1, i2, j1, j2): + """ + List all genomes within one DCJ operation from the genome. + + The list has size O(n^2). + """ + c = [] + i2 = min(i2, self.n) + j2 = min(j2, self.n) + for i in xrange(i1, i2): + if self._g[i] >= i: + for j in xrange(j1, j2): + if j > i and self._g[j] >= j and j != self._g[i]: + u = deepcopy(self) + u.swap (i, self._g[i], j, self._g[j]) + c.append(u) + if self._g[i] != i: + j = self._g[i] + u = deepcopy(self) + u.swap (i, j, i, j) + c.append(u) + return c + + def neigh2 (self): + """ + List only multilinear neighbours + + The list has size O(n^2). + """ + return [g for g in self.neigh() if self.multilin(g)] + + def better_neigh (self, z): + """ + List only neighbours which improve the overall distance from the neighbouring vertices (in z). + + z is a list of genomes + The returned list has size O(n^2) in the worst case, in practice, it should be shorter. + """ + c = self.neigh() + D = sum([self.dist(g) for g in z]) + return [g for g in c if sum([g.dist(g2) for g2 in z]) < D] + + def even_better_neigh (self, g2): + """ + List neighbours that are closer to genome g2. + + It goes through all adjacencies in g2 and if they are breakpoints, it creates + a new genome with healed bp in the list of neighbours. The list has size O(n). + """ + c = [] + for i in xrange(0, self.n): + if g2._g[i] >= i: + j = g2._g[i] + if self._g[i] != j: + u = deepcopy (self) + u.swap(i, self._g[i], j, self._g[j]) + c.append(u) + return c + + def betterer_neigh (self, z): + """ + List neigbours that are closer to one of the neighbouring vertices. + """ + c = [] + c.extend (self.even_better_neigh(z[0])) + c.extend (self.even_better_neigh(z[1])) + c.extend (self.even_better_neigh(z[2])) + return c + D = sum([self.dist(g) for g in z]) + return [g for g in c if sum([g.dist(g2) for g2 in z]) < D] + + def median_score(self, g1, g2, g3): + return self.dist(g1) + self.dist(g2) + self.dist(g3) + + # dake pokusy o heuristicke mediany + def median2 (self, g1, g2, g3): + z = [g1, g2, g3] + G = self + while True: + c = G.neigh(); + D = G.median_score(g1, g2, g3) + D2, G = min([(g.median_score(g1, g2, g3), g) for g in c]) + if D2 >= D and randint(0, 3) == 0: break + return [G, ] + + def _next (self, j): + if j % 2 == 0: j += 1 + else: j -= 1 + return self._g[j] + + def circularize (self, i1, i2, j1, j2): + # zatial predpokladam, ze to ma len 2C + # a spravim vsetky moznosti ako vrazit jeden do druheho + v = self.n * [False] + c = [] + self._mark (v, 0) + for i in xrange(1, self.n): + if not v[i]: + self._mark (v, i) + k = 0 + while True: + j = i + if i1 <= k < i2: + while True: + if j1 <= j < j2: + g = deepcopy (self) + g.swap(k, self._g[k], j, self._g[j]) + c.append(g) + g = deepcopy (self) + g.swap(k, self._g[k], self._g[j], j) + c.append(g) + j = self._next(j) + if j == i: break + k = self._next(k) + if k == 0: break + return c + + def median3 (self, g1, g2, g3): + g = [0, 0, 0]; s = [set(), set(), set()] + n, g[0], g[1], g[2] = g1.n, copy(g1._g), copy(g2._g), copy(g3._g) + oth2 = [(0, 1, 2), (1, 0, 2), (2, 0, 1)] + perm3 = [(0, 1, 2), (0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)] + for i in xrange(0, n): + for j in xrange(0, 3): + if g[j][i] <= i: s[j].add((i, g[j][i])) + ii = s[0] & s[1] & s[2] + s[0] -= ii; s[1] -= ii; s[2] -= ii; + while True: + z = False + for (i, j, k) in oth2: + for a in xrange(0, n): + if g[i][a] < a: + b = g[i][a]; c, d = g[j][a], g[j][b] + if g[i][c] == d and g[j][a] == g[k][a] and g[j][b] == g[k][b] and g[j][a] != g[i][a] and g[j][b] != g[i][b]: + swap2(g[i], a, b, c, d) + z = True + if not z: break + while True: + ss = set() + z = False + for (i, j, k) in oth2: + for a in xrange(0, n): + if g[i][a] < a and g[j][a] == g[k][a] != g[i][a]: + b, c = g[i][a], g[j][a]; d = g[i][c] + ss.add((i, a, b, c, d)) + if len(ss) > 0: + (i, a, b, c, d) = choice(list(ss)) + swap2(g[i], a, b, c, d) + continue + for (i, j, k) in perm3: + for a in xrange(0, n): + if g[i][a] < a: + b = g[i][a] + x, w, y, z = g[j][a], g[k][a], g[j][b], g[k][b] + if len(set([a, b, x, y, w, z])) == 6: + ss.add((i, j, k, a, b, x, w, y, z)) + if len(ss) > 0: + (i, j, k, a, b, x, w, y, z) = choice(list(ss)) + swap2(g[j], a, x, b, y) + swap2(g[k], a, w, b, z) + continue + else: break + for (i, j, k) in oth2: + for a in xrange(0, n): + if g[i][a] < a and g[j][a] == g[k][a] == a: + swap2(g[i], a, g[i][a], a, g[i][a]) +# print Genome(g[0]) +# print Genome(g[1]) +# print Genome(g[2]) + return DCJ(g[0]).neigh() + + def rand_neigh (self): + c = deepcopy(self) + i = randint(0, c.n - 1) + j = randint(0, c.n - 1) + if i != j and i != c._g[j]: + c.swap(i, c._g[i], j, c._g[j]) + return c + + def rand_neigh2 (self, restr): +# print self, '-->', + c = self.neigh() + if restr == 1 or restr == 4: + c = [g for g in c if g.multicirc()] + elif restr == 2: + c = [g for g in c if g.unitempcirc()] + elif restr == 3: + c = [g for g in c if g.tempcirc()] + g = choice(c) +# print g, '-->', + L, C = g.numch() + if restr == 1: + if C >= 2: g = choice ([h for h in g.neigh() if h.circular()]) + elif restr == 2: + if C >= 1: g = choice ([h for h in g.neigh() if h.linear()]) + elif restr == 3: + if C >= 1: g = choice ([h for h in g.neigh() if h.multilin()]) +# print g + return g + + def path (self, B): + P = [] + C = deepcopy (self) + s = set() + for p in xrange(0, B.n): + q = B._g[p] + if q < p: continue + if p == q: + u = C._g[p] + if u != p: + s.add((p, p)) + else: + u, v = min(p, C._g[p]), min(q, C._g[q]) + if u != v: + s.add((p, q)) + k = C.dist(B) - 1 + i = 0 + while i < k: + p, q = choice (list(s)) + u, v = C._g[p], C._g[q] + if p == q: + if B._g[u] == u: + C.swap(p, u, p, u) + s.remove((u, u)) + s.remove((p, p)) + else: continue + else: + s.remove((p, q)) + if u == p and q == v: + C.swap(p, q, q, p) + elif u == p: + C.swap(q, v, p, v) + if B._g[v] == v: s.remove((v, v)) + elif v == q: + C.swap(p, u, q, u) + if B._g[u] == u: s.remove((u, u)) + else: + C.swap(p, u, q, v) + if B._g[u] == v: s.remove((min(u, v), max(u, v))) + i += 1 + P.append(deepcopy(C)) + return P + + def restrict (self, n): + s = self.__str__().split() + t = [] + for x in s: + if x == '$' or x == '@': t.append(x) + elif abs(int(x)) <= n: t.append(x) + self.set(" ".join(t)) + if not self.OK(): print 'BUUUG restrict' + + def add (self): + self._g.append(self.n) + self._g.append(self.n + 1) + self.n += 2 + if not self.OK(): print 'BUUUG add' + + def __hash__ (self): + return hash(self.__str__()) + + def __cmp__ (self, B): + return cmp (self._g, B._g) + + def OK (self): + for i in xrange(0, self.n): + if i != self._g[self._g[i]]: return False + return True + + +def swap2 (g, p, q, p2, q2): + if p == q and p2 == q2: # {p} {p2} --> {p,p2} + g[p], g[p2] = p2, p + elif p == q: # {p} {p2,q2} --> {p,p2}, {q2} + g[p], g[p2], g[q2] = p2, p, q2 + elif p2 == q2: # {p,q} {p2} --> {p,p2}, {q} + g[p], g[p2], g[q] = p2, p, q + else: # {p,q} {p2,q2} --> {p,p2} {q,q2} + g[p], g[p2], g[q], g[q2] = p2, p, q2, q diff --git a/Genome.py b/Genome.py new file mode 100644 index 0000000..b0e37e2 --- /dev/null +++ b/Genome.py @@ -0,0 +1,73 @@ +from random import choice, shuffle, sample + +class Genome: + def __init__ (self, s): + raise NotImplementedError( "Should have implemented this" ) + def set (self, s): + raise NotImplementedError( "Should have implemented this" ) + def __len__ (self): + raise NotImplementedError( "Should have implemented this" ) + def __str__ (self): + raise NotImplementedError( "Should have implemented this" ) + def numch (self): + raise NotImplementedError( "Should have implemented this" ) + def dist (self, B): + raise NotImplementedError( "Should have implemented this" ) + def neigh (self): + raise NotImplementedError( "Should have implemented this" ) + def median_solver (self, g1, g2, g3): + raise NotImplementedError( "Should have implemented this" ) + + def median_cloud (self, g1, g2, g3, MAX=300): + g = self.median_solver(g1, g2, g3)[0] + t = g.median_score(g1, g2, g3) + S = [set(), set(), set(), set()] + a = [[g], [], [], []] + i = 0 + while i < 10000 and len(S[0]) <= MAX: # len(a) > 0 and len(S) <= MAX: + if len(a[0]) > 0: g = a[0].pop() + elif len(a[1]) > 0: g = a[1].pop() + elif len(a[2]) > 0: g = a[2].pop() + elif len(a[3]) > 0: g = a[3].pop() + else: break + for h in g.neigh(): + k = h.median_score(g1, g2, g3) - t + i += 1 + if i >= 10000: break + if k > 3: continue + if k < 0: k = 0 + if not h in S[k]: + S[k].add(h) + a[k].append(h) + if len(S[0]) > MAX: break + #print len(S[0]), len(S[1]), len(S[2]), len(S[3]), i + l = list(S[0]) + if len(l) < MAX: l.extend(list(S[1])) + if len(l) < MAX: l.extend(list(S[2])) + if len(l) < MAX: l.extend(list(S[3])) + if len(l) > MAX: l = l[0:MAX] + return l + + def single_median (self, g1, g2, g3, MAX=300): + r = self.median_solver(g1, g2, g3) + if len(r) <= MAX: return r + else: return sample(r, MAX) + +def id_genome (n): + """ + Linear genome '1 2 3 ... n $' + """ + return " ".join([str(x) for x in xrange(1, n + 1)]) + " $" + +def id_cgenome (n): + return " ".join([str(x) for x in xrange(1, n + 1)]) + " @" + +def random_linear_genome (n): + g = [str(x * choice([-1, 1])) for x in xrange(1, n+1)] + shuffle(g) + return " ".join(g) + " $" + +def random_circular_genome (n): + g = [str(x * choice([-1, 1])) for x in xrange(1, n+1)] + shuffle(g) + return " ".join(g) + " @" diff --git a/History.py b/History.py new file mode 100644 index 0000000..2195a21 --- /dev/null +++ b/History.py @@ -0,0 +1,936 @@ +import os +from Tree import Tree +from Genome import * +from copy import deepcopy, copy +from random import choice, randint, shuffle, sample +from numpy.random import poisson +import time +from pyxfigs import * +from CircDCJ import CircDCJ + +chngf = open ('changes', 'a') +heurf = open ('heuristics', 'a') + +def penalty (p): +# return 0 + L, C = p + if C > 1: return 1 + return 0 +# if C > 0: return 10 +# else: return 0 +# if C > 1 or (L > 0 and C > 0): return 10 +# else: return max (0, L-2) + +def _c(g, i): + return "C%d,%d" % (g, i) +def _g(g, i, j): + assert i != j + if i > j: i, j = j, i + return "G%d,%d,%d" % (g, i, j) +def _p(g, i, j): + assert i != j + if i > j: i, j = j, i + return "P%d,%d,%d" % (g, i, j) +def _u(g, i): + return "U%d,%d" % (g, i) + +class History: + def __init__ (self, T, wdir, simul=False): + "Zo suboru T vo wdir nacita genomy v listoch (v liste moze byt aj viac moznosti)" + self.T = T + self.wdir = 'data' + os.sep + wdir + if simul: + self.g = [0] * self.T.N + return + f = open(self.wdir + os.sep + 'G', 'r') + self.g = [] + self.input = [] + for i in xrange(0, self.T.n): + self.input.append([]) + i = 0 + for line in f: + line = line.strip() + if line == '' or line[0] == '#': continue + pos = line.find(' ') + self.input[self.T.leaf[line[0:pos]]].append(Genome(line[pos + 1:])) + f.close() + #for i in xrange(0, self.T.n): + #print self.T.name[i], self.input[i] + #print self.T.n, self.T.N + for i in xrange(0, self.T.n): + self.g.append(deepcopy(choice(self.input[i]))) + for i in xrange(self.T.n, self.T.N): + if randint(0, 1): self.g.append (self.g[self.T.right[i]]) + else: self.g.append (self.g[self.T.left[i]]) + self.chng = [0] * self.T.N + self.cand() + + def __str__ (self): + s = "" + for i in xrange(0, self.T.N): + s = s + self.T.name[i] + ' ' + str(self.g[i].dist(self.g[self.T.parent[i]])) + ' ' + self.g[i].__str__() + '\n' #' ' + str(self.g[i].numch()) +'\n' + return s #+str(self.score())+'\n' + + def read (self, fn): + """ + Zo suboru fn nacita historiu v standardnom formate + (meno, vzdialenost od otca, genom). Pozor, mena sa + ignoruju - vrcholy musia ist v takom poradi, + ako su v strome. + """ + f = open(self.wdir + os.sep + fn, 'r') + #g = [] + i = 0 + for line in f: + line = line.strip() + if line == '' or line[0] == '#': continue + #print line + self.g[i] = Genome(line.split(None, 2)[2]) + i += 1 + f.close() + + def read_cand (self, fn): + """ + Zo suboru fn nacita historiu ako dalsieho kandidata. + """ + f = open(self.wdir + os.sep + fn, 'r') + #g = [] + i = 0 + tmp = CircDCJ("1 2 3 @") + for line in f: + line = line.strip() + if line == '' or line[0] == '#': continue + if i >= self.T.n: + #self.c[i].append(Genome(line.split(None, 2)[2])) + self.c[i].append(tmp.circularize(Genome(line.split(None, 2)[2]))) + ##self.heur[i].append(-1) + i += 1 + f.close() + + def cand (self): + """ + Vytvori nove zoznamy kandidatov, inicializuje ich genomami v historii + """ + self.c = [] + ##self.heur = [] + for i in xrange(0, self.T.n): + self.c.append(deepcopy(self.input[i])) + ##self.heur.append([0]*len(self.input[i])) + for i in xrange(self.T.n, self.T.N): +# if self.g[i].circular(): + self.c.append([self.g[i], ]) +# else: +# self.c.append([]) + ##self.heur.append([0,]) + ##if len(self.heur[i]) != len(self.c[i]): print 'cand' + + def cand2 (self, m): # dana dlzka genomu + """ + Vytvori nove zoznamy kandidatov, inicializuje ich genomami v historii + """ + self.c = [] + ##self.heur = [] + for g in self.g: + if g.n > 2 * m: g.restrict(m) + if 2 * m > g.n: g.add() + for i in xrange(0, self.T.n): + self.c.append(deepcopy(self.input[i])) + for g in self.c[i]: g.restrict(m) + ##self.heur.append([0]*len(self.input[i])) + for i in xrange(self.T.n, self.T.N): + self.c.append([self.g[i], ]) + ##self.heur.append([0,]) + ##if len(self.heur[i]) != len(self.c[i]): print 'cand' + + def fill_in_desc (self): + "Do kazdeho zoznamu kandidatov prida tych, ktori su v zozname pod nim" + for i in xrange(self.T.n, self.T.N): + ##if len(self.heur[i]) != len(self.c[i]): print 'pred desc' + self.c[i].extend (self.c[self.T.left[i]]) + ##self.heur[i].extend ([1]*len(self.c[self.T.left[i]])) + self.c[i].extend (self.c[self.T.right[i]]) + if len(self.c[i]) > 300: + self.c[i] = sample(self.c[i], 300) + ##self.heur[i].extend ([1]*len(self.c[right[i]])) + ##if len(self.heur[i]) != len(self.c[i]): print 'desc' + + def fill_in_middle (self): + "Do zoznamu kandidatov prida genomy na nahodnej ceste medzi lavym a pravym synom" + for i in xrange(self.T.n, self.T.N): + r, l = self.g[self.T.right[i]], self.g[self.T.left[i]] + p = l.path(r) + self.c[i].extend (p) + ##self.heur[i].extend ([2]*len(p)) + ##if len(self.heur[i]) != len(self.c[i]): print 'middle' + + def fill_paths (self): + "Do zoznamu kandidatov prida nahodne cesty medzi vsetkymi dvojicami listov v podstromoch" + desc = [] + for i in xrange(0, self.T.n): + desc.append([i, ]) + for i in xrange(self.T.n, self.T.N): + l, r = self.T.left[i], self.T.right[i] + for j in xrange(0, len(desc[l])): + for k in xrange(0, len(desc[r])): + e = self.g[desc[l][j]].path(self.g[desc[r][k]]) + self.c[i].extend(e) + ##self.heur[i].extend([3]*len(e)) + ##if len(self.heur[i]) != len(self.c[i]): print 'paths' + desc.append (desc[l] + desc[r]) + + def score (self): + """ + Score of the evolutionary history + """ + s = 0 + for i in xrange(0, self.T.N): + s = s + self.g[i].dist(self.g[self.T.parent[i]]) + for i in xrange(self.T.n, self.T.N): + s = s + penalty (self.g[i].numch()) + return s + + def write (self, fn = None): #, suffix): + """ + Write the history into a file in the working directory. + + The filename is of the form score-time. + """ + #now = time.localtime(time.time()) + if fn == None: + fn = ("%04d" % self.score()) + '-' + str(time.time()) + #+time.strftime("%d-%m-%y_%H:%M:%S", now) + f = open (self.wdir + os.sep + fn, 'w') + f.write (self.__str__()) + f.close() + + def neigh (self): + "Do zoznamu kandidatov prida okolia aktualnych genomov" + for i in xrange(self.T.n, self.T.N - 1): + e = [] + e.extend (self.g[i].neigh()) + #e.extend (self.g[i].even_better_neigh(self.g[self.T.left[i]])) + #e.extend (self.g[i].even_better_neigh(self.g[self.T.right[i]])) + if i == self.T.left[self.T.N - 1]: p = self.T.right[self.T.N - 1] + elif i == self.T.right[self.T.N - 1]: p = self.T.left[self.T.N - 1] + else: p = self.T.parent[i] + #e.extend (self.g[i].even_better_neigh(self.g[p])) + #e = self.g[i].betterer_neigh([self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[p]]) + self.c[i].extend(e) + #print len(self.c[i]) + ##self.heur[i].extend([4]*len(e)) +# e = self.g[N-1].better_neigh([self.g[self.T.left[N-1]], self.g[self.T.right[N-1]]]) +# self.c[N-1].extend(e) + ##self.heur[N-1].extend([4]*len(e)) + + def neigh_part (self, i1, i2, j1, j2): + "Do zoznamu kandidatov prida okolia aktualnych genomov" + for i in xrange(self.T.n, self.T.N - 1): + e = [] + if self.g[i].circular(): + e.extend (self.g[i].neigh_part(i1, i2, j1, j2)) + else: + e.extend (self.g[i].circularize(i1, i2, j1, j2)) + self.c[i].extend(e) + + def neigh2 (self): + "Do zoznamu kandidatov prida okolia kandidatov" + for i in xrange(self.T.n, self.T.N): + k = len(self.c[i]) + for j in xrange(0, k): + e = self.c[i][j].betterer_neigh([self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.parent[i]]]) + self.c[i].extend(e) + ##self.heur[i].extend([5]*len(ebetter_neigh)) + + def neigh3 (self): + "Do zoznamu kandidatov prida okolia kandidatov" + for i in xrange(self.T.n, self.T.N): + e = self.g[i].better_neigh([self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.parent[i]]]) + self.c[i].extend(e) + + def median (self): + for i in xrange(self.T.n, self.T.N - 1): + if i == self.T.left[self.T.N - 1]: + self.c[i].extend(self.g[i].median(self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.right[self.T.N - 1]])) + elif i == self.T.right[self.T.N - 1]: + self.c[i].extend(self.g[i].median(self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.left[self.T.N - 1]])) + else: + self.c[i].extend(self.g[i].median(self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.parent[i]])) + + def _replace (self, M, v, i): + global chng + self.chng[v] = self.g[v].dist(self.c[v][i]) + self.g[v] = self.c[v][i] + if v < self.T.n: return + self._replace (M, self.T.left[v], M[v][i][0]) + self._replace (M, self.T.right[v], M[v][i][1]) + + def _opt_sols (self, B, M, ns, u, i, m): + if u < self.T.n: return + v, w = self.T.left[u], self.T.right[u] + lv, lw = len(self.c[v]), len(self.c[w]) + bl = B[v][M[u][i][0]] + dl = self.c[v][M[u][i][0]].dist(self.c[u][i]) + br = B[w][M[u][i][1]] + dr = self.c[w][M[u][i][1]].dist(self.c[u][i]) + for j in xrange(0, lv): + dd = self.c[v][j].dist(self.c[u][i]) + if ns[v][j] > 0 and B[v][j] + dd == bl + dl: + ns[v][j] = -ns[v][j] + self._opt_sols (B, M, ns, v, j, bl) + for j in xrange(0, lw): + dd = self.c[w][j].dist(self.c[u][i]) + if ns[w][j] > 0 and B[w][j] + dd == br + dr: + ns[w][j] = -ns[w][j] + self._opt_sols (B, M, ns, w, j, br) + + def opt_neigh (self): + "Najde historiu v okoli s najmensim skore" + global chngf#, heurf + if len(self.c[self.T.left[self.T.N - 1]]) < len(self.c[self.T.right[self.T.N - 1]]): + self.c[self.T.N - 1] = self.c[self.T.left[self.T.N - 1]] +## self.heur[self.T.N-1] = self.heur[self.T.left[self.T.N-1]] + else: + self.c[self.T.N - 1] = self.c[self.T.right[self.T.N - 1]] +## self.heur[self.T.N-1] = self.heur[self.T.right[self.T.N-1]] + B, M = [], [] + print [len(self.c[i]) for i in xrange(self.T.n, self.T.N)] +## ns = [0]*self.T.N +## for u in xrange(0,self.T.N): +## ns[u] = [1]*len(self.c[u]) +## if len(self.heur[u]) != len(self.c[u]): print u, '!!!!!' + for u in xrange(0, self.T.n): + B.append(len(self.c[u]) * [0]) + M.append([]) + for i in xrange(0, len(self.c[u])): + M[u].append([0, 0]) + for u in xrange(self.T.n, self.T.N): + v, w = self.T.left[u], self.T.right[u] + ul, vl, wl = len(self.c[u]), len(self.c[v]), len(self.c[w]) + B.append([]); M.append([]) + for i in xrange(0, ul): + B[u].append(penalty(self.c[u][i].numch())) + M[u].append([0, 0]) + for i in xrange(0, ul): + m, mi, q = 999999, 0, 1 + for j in xrange(0, vl): + t = B[v][j] + self.c[u][i].dist(self.c[v][j]) +## if t == m: q += ns[v][j] + if t < m: m, mi = t, j # , q = t, j, ns[v][j] + B[u][i], M[u][i][0] = B[u][i] + m, mi #, ns[u][i] = B[u][i]+m, mi, q + m, mi, q = 999999, 0, 1 + for j in xrange(0, wl): + t = B[w][j] + self.c[u][i].dist(self.c[w][j]) +## if t == m: q += ns[w][j] + if t < m: m, mi = t, j # , q = t, j, ns[w][j] + B[u][i], M[u][i][1] = B[u][i] + m, mi +## ns[u][i] *= q + m, mi, q = 999999, 0, 1 + for i in xrange(0, len(self.c[self.T.N - 1])): +## if B[self.T.N-1][i] == m: q += ns[self.T.N-1][i] + if B[self.T.N - 1][i] < m: m, mi = B[self.T.N - 1][i], i #, q = B[self.T.N-1][i], i, ns[self.T.N-1][i] + +## for i in xrange(0,len(self.c[self.T.N-1])): +## if B[self.T.N-1][i] == m: +## ns[self.T.N-1][i] = -ns[self.T.N-1][i] +## self._opt_sols(B, M, ns, self.T.N-1, i, m) +## hsug = [0]*9 +## hopt = [0]*9 +## for v in xrange(self.T.n,self.T.N): +## for i in xrange(0,len(self.c[v])): +## hsug[self.heur[v][i]] += 1 +## if ns[v][i] < 0: hopt[self.heur[v][i]] += 1 +## print >> heurf, q, "optimalnych rieseni" +## for i in xrange(0,9): print >> heurf, "%d/%d" % (hopt[i],hsug[i]), +## print >> heurf + self.chng = [0] * self.T.N + self._replace (M, self.T.N - 1, mi) + print >> chngf, '\n', self.chng[self.T.n:self.T.N], m, + if m != self.score(): print "BUGGGGGGGGGGG", m, self.score() + return m + + def local_opt (self): + raise NotImplementedError( "Should have implemented this" ) +# "Najde lokalne minimum historii" +# self.cand() +# self.fill_in_desc() +# self.rand_neigh() +# self.fill_in_middle() +# self.fill_fitch() + #s, s2 = 9999, self.opt_neigh() +# s, s2 = 9999, 9998 +# print >> chngf, ' *', +# self.cand() +# s, s2 = s2, self.opt_neigh() +# print s2 + #while s2 < s: + #print "pred" + #print self + # self.cand() +# self.fill_in_middle() +# self.fill_fitch() +# self.median() + # self.median() + #self.neigh() + #print "po" + #print self + # s, s2 = s2, self.opt_neigh() + #print s2 + #print self + + def local_opt_steinerization (self): + "Najde lokalne minimum historii" + s, s2 = 9999, 9998 + while s2 < s: + s = s2 + for i in xrange(self.T.n, self.T.N - 1): + self.cand() + if i == self.T.left[self.T.N - 1]: + self.c[i].extend(self.g[i].median(self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.right[self.T.N - 1]])) + elif i == self.T.right[self.T.N - 1]: + self.c[i].extend(self.g[i].median(self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.left[self.T.N - 1]])) + else: + self.c[i].extend(self.g[i].median(self.g[self.T.left[i]], self.g[self.T.right[i]], self.g[self.T.parent[i]])) + s2 = self.opt_neigh() + print s2 + print self + + def local_opt_medians (self): + "Najde lokalne minimum historii" + s, s2 = 9999, 9998 + while s2 < s: + self.cand() + self.median() + s, s2 = s2, self.opt_neigh() + print s2 + print self + + def local_opt_better_neighbours (self): + "Najde lokalne minimum historii" + s, s2 = 9999, 9998 + while s2 < s: + self.cand() + self.neigh2() + s, s2 = s2, self.opt_neigh() + print s2 + print self + + def local_opt_neighbours (self): + "Najde lokalne minimum historii" + m = len(self.g[0]) + #self.cand() + #self.init_hist() + s, s2 = 9999, 9998 #self.opt_neigh() + while s2 < s: + s = s2 +# for i in xrange(0,m,20): + y = self.chunks + if self.x == -1: + rr = xrange(0,m,y) + else: + rr = [self.x] + for i in rr: + for j in xrange(i,m,y): + self.cand() + self.neigh_part(i, i+y, j, j+y) + s2 = self.opt_neigh() + print i, j, '>', s2, self.noncirc() + print self + + def noncirc (self): + num = 0 + for i in xrange(self.T.n, self.T.N): + if not self.g[i].circular(): + num += 1 + return num + + def local_opt_desc (self): + "Najde lokalne minimum historii" + self.cand() + self.fill_in_desc() + s, s2 = 9999, self.opt_neigh() + while s2 < s: + self.cand() + self.median() + s, s2 = s2, self.opt_neigh() + print s2 + print self + + def local_opt_incr_lengths (self): + "Najde lokalne minimum historii" + gl = self.input[0][0].n / 2 + for i in xrange(5, gl): + print 'LEN:', i + self.cand2(i); + self.fill_in_middle(); self.fill_in_desc() + s, s2 = 9999, self.opt_neigh() + while s2 < s: + self.cand2(i); self.fill_in_middle(); self.fill_in_desc(); self.neigh() + s, s2 = s2, self.opt_neigh() + print s2 + print "---" + + def init_hist (self): + n, N = self.T.n, self.T.N + left, right, parent = self.T.left, self.T.right, self.T.parent + desc = [] + for i in xrange(0, n): + desc.append([i]) + for i in xrange(n, N): + desc.append(desc[left[i]]+desc[right[i]]) +# print 'desc', i, desc[i] + desc2 = N*[0] + ll, rr = left[N-1], right[N-1] + desc2[ll] = copy(desc[rr]) + desc2[rr] = copy(desc[ll]) +# print 'desc2', ll, desc2[ll] +# print 'desc2', rr, desc2[rr] + for i in reversed(xrange(n, N-1)): + if i == ll or i == rr: continue + desc2[i] = copy(desc2[parent[i]]) + if left[parent[i]] == i: # lavy syn + desc2[i].extend(desc[right[parent[i]]]) + else: + desc2[i].extend(desc[left[parent[i]]]) +# print 'desc2', i, desc2[i] + for i in xrange(n, N-1): + for k in xrange(0,10): + z0 = self.g[choice(desc[left[i]])] + z1 = self.g[choice(desc[right[i]])] + z2 = self.g[choice(desc2[i])] +# print i, ':', choice(desc[left[i]]), choice(desc[right[i]]), choice(desc2[i]) + self.c[i].extend (self.g[i].median(z0, z1, z2, MAX=20)) + + def paths_opt (self): + self.cand() + self.fill_paths() + self.opt_neigh() + print self.score() + print self + + def median_opt (self): + s = 10000 + s2 = s - 1 + while s2 < s: + self.fill_in_oe(0) + s, s2 = s2, self.opt_neigh() + self.fill_in_oe(1) + s2 = self.opt_neigh() + print s2 + print "---" + + def swap_subtree (self, h, v): + if v < self.T.n: return + self.g[v], h.g[v] = h.g[v], self.g[v] + self.swap_subtree(h, self.T.left[v]) + self.swap_subtree(h, self.T.right[v]) + + def mutate (self): + # i = randint (self.T.n,self.T.N-1) + for i in xrange(0, self.T.N): + self.g[i] = self.g[i].rand_neigh().rand_neigh() #.rand_neigh() #.rand_neigh().rand_neigh() + + def rand_hist (self, restr=None): + g = range(1, len(self.input[0][0]) + 1) + for i in xrange(self.T.n, self.T.N): + for j in xrange(0, len(g)): + g[j] = g[j] * choice([-1, 1]) + shuffle(g) + if restr == 1 or restr == 4: + s = " ".join([str(x) for x in g]) + " @" + elif restr == 2 or restr == 3: + s = " ".join([str(x) for x in g]) + " $" + else: s = " ".join([str(x) for x in g]) + " " + choice('$@') + self.g[i] = Genome(s) + + def rand_neigh (self): + for i in xrange(self.T.n, self.T.N): + self.c.append([self.g[i], ]) +## self.heur.append([0,]) #TODO ??? + for j in xrange(0, 100): + self.c[i].append(self.g[i].rand_neigh()) + for j in xrange(0, 200): + self.c[i].append(self.g[i].rand_neigh().rand_neigh()) +## self.heur[i].extend([6]*100) +## self.heur[i].extend([7]*200) + #for j in xrange(0,300): + # self.c[i].append(self.g[i].rand_neigh().rand_neigh().rand_neigh()) + + def rand_opt (self): + s = self.score() + s2 = s - 1 + print s + while s2 < s: + self.cand() + self.fill_in_desc() + self.rand_neigh() + s, s2 = s2, self.opt_neigh() + print s2 + + def fill_in_rand (self): + for i in xrange(self.T.n, self.T.N): + for k in xrange(0, 300): + g = range(1, 41) + for j in xrange(0, len(g)): + g[j] = g[j] * choice([-1, 1]) + shuffle(g) + s = " ".join([str(x) for x in g]) + " " + choice('$@') + self.c[i].append (Genome(s)) +## self.heur[i].append (8) + + def fill_fitch (self): + gl = self.g[0].n + S = [] + for i in xrange(0, self.T.n): + S.append([]) + for j in xrange(0, gl): + S[i].append(set([self.g[i]._g[j], ])) + for i in xrange(self.T.n, self.T.N): + S.append([]) + u, v = self.T.left[i], self.T.right[i] + for j in xrange(0, gl): + s = S[u][j] & S[v][j] + if len(s) == 0: S[i].append(S[u][j] | S[v][j]) + else: S[i].append(s) + for i in reversed(xrange(self.T.n, self.T.N)): + u, v = self.T.left[i], self.T.right[i] + for j in xrange(0, gl): + if not((S[u][j] | S[v][j]) <= S[i][j]): S[u][j], S[v][j] = S[i][j], S[i][j] + for i in xrange(self.T.n, self.T.N): + for ii in xrange(0, 20): + f = set(range(0, gl)) + cand = Genome(copy(self.g[i]._g)) + for j in xrange(0, gl): + if j not in f: continue + g = S[i][j] & f + if len(g) == 0: k = choice(list(f)); #print "u", + else: k = choice(list(g)); #print "O", + cand._g[j], cand._g[k] = k, j + f.discard(k) + f.discard(j) + self.c[i].append(cand) + +### self.c[i].append (Genome(s)) +## self.heur[i].append (8) + + def fill_in_oe (self, p): + self.cand() + for i in xrange(self.T.n, self.T.N): + if level[i] % 2 == p: + s = [] +# s = set() + neigh = [] + for k in xrange(0, len(self.c[i])): + neigh.extend (self.c[i][k].neigh()) + self.c[i].extend (neigh) + print i, len(neigh) + for k in xrange(0, len(neigh)): + s.extend (neigh[k].neigh()) +# s.union(set(neigh[k].neigh())) # preco to tam nic nehadze? +# if k % 50 == 0: +# print '.', len(s) + self.c[i].extend (list(s)) + + def _simul (self, v, mean): + d = poisson (mean, 1)[0] + G = Genome(self.g[self.T.parent[v]]) + for i in xrange(0, d): + G = G.rand_neigh() + self.g[v] = G + if self.T.left[v] < v: self._simul(self.T.left[v], mean) + if self.T.right[v] < v: self._simul(self.T.right[v], mean) + + def simul_data (self, mean, m): + self.g[self.T.N - 1] = Genome (id_genome (m)) + self._simul (self.T.left[self.T.N - 1], mean) + self._simul (self.T.right[self.T.N - 1], mean) + f = open (self.wdir + os.sep + 'true-hist.txt', 'w') + f.write ('# ' + str(self.score()) + '\n') + f.write (self.__str__()) + f.close () + f = open (self.wdir + os.sep + 'G', 'w') + for i in xrange(0, self.T.n): + f.write(self.T.name[i]+' ' + self.g[i].__str__() + '\n') + f.close() + + def dist_tab (self): + T = [] + for i in xrange(0, self.T.n): + self.T.append([]) + for j in xrange(0, i): + T[i].append(self.g[i].dist(self.g[j])) + return T + + def _path (self, i, j): + s = [] + while level[i] > level[j]: + s.append('h' + str(i)) + i = self.T.parent[i] + while level[j] > level[i]: + s.append('h' + str(j)) + j = self.T.parent[j] + while i != j: + s.append('h' + str(i)) + s.append('h' + str(j)) + i = self.T.parent[i] + j = self.T.parent[j] + return s + + def _path2 (self, i, j): + return '+'.join(self._path(i, j)) + + def lower_bound (self, fn): + f = open (fn, 'w') + print >> f, "minimize" + print >> f, "obj:" + all = ['h' + str(i) for i in xrange(0, self.T.N - 1)] + print >> f, '+'.join(all) + print >> f, "subject to" + T = self.dist_tab() + for i in xrange(0, self.T.n): + for j in xrange(0, i): + print >> f, 'c_' + str(i) + ',' + str(j) + ':', self._path2(i, j), '>=', T[i][j] + print >> f, "bounds" + for h in all: + print >> f, h, '>= 0' + print >> f, 'generals' + print >> f, ' '.join(all) + print >> f, "end" + f.close() + + def _path3 (self, i, j, k): + t = self._path(i, j) + self._path(i, k) + self._path(j, k) + print t + t.sort() + print t + last = t[0] + lasti = i = 1 + self.T.n = len(t) + while i < self.T.n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + t = t[:lasti] + return '+'.join(t) + + def lower_bound2 (self, fn): + f = open (fn, 'w') + print >> f, "minimize" + print >> f, "obj:" + all = ['h' + str(i) for i in xrange(0, self.T.N - 1)] + print >> f, '+'.join(all) + print >> f, "subject to" + T = self.dist_tab() + for i in xrange(0, self.T.n): + for j in xrange(0, i): + print >> f, 'c_' + str(i) + ',' + str(j) + ':', self._path2(i, j), '>=', T[i][j] + for i in xrange(0, self.T.n - 2): + for j in xrange(i + 1, self.T.n - 1): + for k in xrange(j + 1, self.T.n): + fm = open ('med/' + str(i) + '-' + str(j) + '-' + str(k) + '.md', 'r') + print >> f, 'm_' + str(i) + ',' + str(j) + ',' + str(k) + ':', self._path3(i, j, k), '>=', int(fm.read()) + fm.close() + print >> f, "bounds" + for h in all: + print >> f, h, '>= 0' + print >> f, 'generals' + print >> f, ' '.join(all) + print >> f, "end" + f.close() + + def LP (self, fn): + f = open (fn, 'w') + print >> f, "maximize" + print >> f, "obj:" + + L = self.g[0].n + cycles = [_c(g, i) for g in xrange(0, self.T.N - 1) for i in xrange(0, L)] + print >> f, '+'.join(cycles) + + print >> f, "subject to" + for g in xrange(0, self.T.n): + for i in xrange(0, L): + j = self.g[g]._g[i] + if i < j: + print g, i, j + print >> f, _g(g, i, j) + " = 1" + for g in xrange(0, self.T.N): + for i in xrange(0, L - 1): + print >> f, '+'.join([_g(g, i, j) for j in xrange(0, L) if i != j]) + " = 1" + for g in xrange(0, self.T.N - 1): + h = self.T.parent[g] + for i in xrange(0, L - 1): + for j in xrange(i + 1, L): + print >> f, _p(g, i, j), '-', _g(g, i, j), ">= 0" + print >> f, _p(g, i, j), '-', _g(h, i, j), ">= 0" + for k in xrange(0, L): + if k != i and k != j: + print >> f, _p(g, i, j), '-', _p(g, i, k), '-', _g(g, k, j), ">= -1" + print >> f, _p(g, i, j), '-', _p(g, i, k), '-', _g(h, k, j), ">= -1" + print >> f, _c(g, j), '+', _p(g, i, j) + "<= 1" + + print >> f, "binary" + print >> f, '\n'.join(cycles) + print >> f, '\n'.join([_g(g, i, j) for g in xrange(0, self.T.N - 1) for i in xrange(0, L - 1) for j in xrange(i + 1, L)]) + print >> f, '\n'.join([_p(g, i, j) for g in xrange(0, self.T.N - 1) for i in xrange(0, L - 1) for j in xrange(i + 1, L)]) + print >> f, "end" + f.close() + + + def LPnove (self, fn): + f = open (fn, 'w') + print >> f, "maximize" + print >> f, "obj:" + + L = self.g[0].n + cycles = [_c(g, i) for g in xrange(0, self.T.N - 1) for i in xrange(0, L)] + print >> f, '+'.join(cycles) + + print >> f, "subject to" + for g in xrange(0, self.T.n): + for i in xrange(0, L): + j = self.g[g]._g[i] + if i < j: + print g, i, j + print >> f, _g(g, i, j) + " = 1" + for g in xrange(0, self.T.N): + for i in xrange(0, L - 1): + print >> f, '+'.join([_g(g, i, j) for j in xrange(0, L) if i != j]) + " = 1" + for g in xrange(0, self.T.N - 1): + h = self.T.parent[g] + for i in xrange(0, L): + for j in xrange(0, L): + if i != j: + print >> f, _u(g, i), '-', _u(g, j), '+', L, _g(g, i, j), '-', L, _c(g, i), "<=", L - 1 + print >> f, _u(g, j), '-', _u(g, i), '+', L, _g(g, i, j), '-', L, _c(g, i), "<=", L - 1 + print >> f, _u(g, i), '-', _u(g, j), '+', L, _g(h, i, j), '-', L, _c(g, i), "<=", L - 1 + print >> f, _u(g, j), '-', _u(g, i), '+', L, _g(h, i, j), '-', L, _c(g, i), "<=", L - 1 + for g in xrange(0, self.T.N - 1): + for i in xrange(0, L): + print >> f, _u(g, i), '+', _c(g, i), ' >= 1' +# print >> f, _p(g,i,j), '-', _g(g,i,j), ">= 0" +# print >> f, _p(g,i,j), '-', _g(h,i,j), ">= 0" +# for k in xrange(0,L): +# if k != i and k != j: +# print >> f, _p(g,i,j), '-', _p(g,i,k), '-', _g(g,k,j), ">= -1" +# print >> f, _p(g,i,j), '-', _p(g,i,k), '-', _g(h,k,j), ">= -1" +# for i in xrange(0,L-1): +# for j in xrange(i+1,L): +# print >> f, _c(g,j), '+', _p(g,i,j) + "<= 1" + + print >> f, "binary" + print >> f, '\n'.join(cycles) + print >> f, '\n'.join([_g(g, i, j) for g in xrange(0, self.T.N - 1) for i in xrange(0, L - 1) for j in xrange(i + 1, L)]) +# print >> f, '\n'.join([_p(g, i, j) for g in xrange(0,self.T.N-1) for i in xrange(0,L-1) for j in xrange(i+1,L)]) + print >> f, "bounds" + for g in xrange(0, self.T.N - 1): + for i in xrange(0, L): + print >> f, _u(g, i), '<=', L - 1 + print >> f, "end" + f.close() + + def draw_tree (self, treewidth, dy, lengths=False): + n, N = self.T.n, self.T.N + height = N*[0] + for i in xrange(n,N): + height[i] = max(height[self.T.left[i]], height[self.T.right[i]]) + 1 + new() + posx, posy = N*[0], N*[0] + for i in xrange(0,n): + posy[i] = i*dy*1j + labelr(posy[i], "\\it "+self.T.name[i]) + #print i, height[i], self.T.level[i] + #labelr(treewidth*height[i]+posy[i], self.T.name[i]) + for i in xrange(n,N): + l, r, h, lev = self.T.left[i], self.T.right[i], height[i], self.T.level[i] + #print i, h, lev + posx[i] = -float(treewidth) / (lev+h) * h + posy[i] = (posy[l]+posy[r]) / 2.0 + line (posx[i]+posy[l], posx[l]+posy[l]) + line (posx[i]+posy[r], posx[r]+posy[r]) + line (posx[i]+posy[l], posx[i]+posy[r]) + if lengths: + setcolor (color.grey(1)) + if i == N-1: + p = posx[i]+(posy[l]+posy[r])/2 + node (p, 0.15, str(self.g[i].dist(self.g[self.T.left[i]]) + + self.g[i].dist(self.g[self.T.right[i]]))) + else: + node ((posx[i]+posx[l])/2 +posy[l], 0.15, str(self.g[i].dist(self.g[self.T.left[i]]))) + node ((posx[i]+posx[r])/2 +posy[r], 0.15, str(self.g[i].dist(self.g[self.T.right[i]]))) + setcolor (color.grey(0)) + #line (treewidth*height[i]+posy[l], treewidth*height[l]+posy[l]) + #line (treewidth*height[i]+posy[r], treewidth*height[r]+posy[r]) + #line (treewidth*height[i]+posy[l], treewidth*height[i]+posy[r]) + save("T.pdf") + + # verzia pre veroniku + def opt_neigh2 (self): + "Najde historiu v okoli s najmensim skore" + if len(self.c[self.T.left[self.T.N - 1]]) < len(self.c[self.T.right[self.T.N - 1]]): + self.c[self.T.N - 1] = self.c[self.T.left[self.T.N - 1]] + else: + self.c[self.T.N - 1] = self.c[self.T.right[self.T.N - 1]] + B, M = [], [] + print [len(self.c[i]) for i in xrange(self.T.n, self.T.N)] + for u in xrange(0, self.T.n): + B.append(len(self.c[u]) * [0]) + M.append([]) + for i in xrange(0, len(self.c[u])): + M[u].append([0, 0]) + for u in xrange(self.T.n, self.T.N): + v, w = self.T.left[u], self.T.right[u] + ul, vl, wl = len(self.c[u]), len(self.c[v]), len(self.c[w]) + B.append([]); M.append([]) + for i in xrange(0, ul): + B[u].append(penalty(self.c[u][i].numch())) + M[u].append([0, 0]) + chl = [] + chl.extend(self.c[v]) + chl.extend(self.c[w]) + DM = dist_matrix(self.c[u], chl) + for i in xrange(0, ul): + m, mi, q = 999999, 0, 1 + for j in xrange(0, vl): + t = B[v][j] + DM[i][j] + if t < m: m, mi = t, j # , q = t, j, ns[v][j] + B[u][i], M[u][i][0] = B[u][i] + m, mi #, ns[u][i] = B[u][i]+m, mi, q + m, mi, q = 999999, 0, 1 + for j in xrange(0, wl): + t = B[w][j] + DM[i][vl + j] + if t < m: m, mi = t, j # , q = t, j, ns[w][j] + B[u][i], M[u][i][1] = B[u][i] + m, mi + m, mi, q = 999999, 0, 1 + for i in xrange(0, len(self.c[self.T.N - 1])): + if B[self.T.N - 1][i] < m: m, mi = B[self.T.N - 1][i], i #, q = B[self.T.N-1][i], i, ns[self.T.N-1][i] + self._replace (M, self.T.N - 1, mi) + if m != self.score(): print "BUGGGGGGGGGGG", m, self.score() + return m + + + +# +# def lower_bound (self): +# global self.T.n, self.T.N, self.T.left, self.T.right +# desc, = [] +# for i in xrange(0,self.T.n): +# desc.append([i,]) +# for i in xrange(self.T.n,self.T.N): +# l, r = self.T.left[i], self.T.right[i] +# for j in xrange(0,len(desc[l])): +# for k in xrange(0,len(desc[r])): +# +# self.c[i].extend(self.g[desc[l][j]].path(self.g[desc[r][k]])) +# desc.append (desc[l]+desc[r]) + + + +# def fill_in_rand (self): +# global self.T.n, self.T.N +# g = range(1,27) +# for i in xrange(self.T.n,self.T.N): +# for j in xrange(0,len(g)): +# g[j] = g[j]*choice([-1,1]) +# shuffle(g) +# s = " ".join([str(x) for x in g]) + " " + choice('$@') +# self.g[i] = Genome(s) diff --git a/LinRev.py b/LinRev.py new file mode 100644 index 0000000..af22d1e --- /dev/null +++ b/LinRev.py @@ -0,0 +1,308 @@ +from Genome import Genome +from DCJ import DCJ +from copy import deepcopy, copy +from random import randint +import os + +class LinRev(Genome): + def __init__ (self, s): + if isinstance(s, str): + self.set (s) + else: + self.n = s.n + self.g = copy(s.g) + self.s = copy(s.s) + + def set (self, s): + s = s.split() + if s[-1] != '$': + raise TypeError("The input genome is not linear") + s.pop() + self.n = len(s) + self.g, self.s, check = [0], [True], (self.n + 1) * [False] + for x in s: + x = int(x) # try + self.s.append(x > 0) + x = abs(x) + if not (0 < x <= self.n): print 'mimo' + if check[x]: print 'uz bolo...' + check[x] = True + self.g.append(x) + self.g.append (self.n + 1) + self.s.append (True) + + def __len__ (self): return self.n + + def __str__ (self): + return self.toString() + ' $' + + def toString(self): + sg = [('' if self.s[i] else '-') + str(self.g[i]) for i in xrange(1, self.n + 1)] + return " ".join(sg) + + def numch (self): + return (1, 0) + + def _find_components (self, pi, si): + n, badc = len(pi) - 2, 0 + M1, M2, S1, S2, M, m, comp = [n + 1], [0], [0], [0], [n + 1], [0], [] + Max, Min, mark1, mark2 = (n + 2) * [0], (n + 2) * [n + 1], (n + 2) * [False], (n + 2) * [False] + for i in xrange(1, n + 2): + # compute M[i] + if pi[i - 1] > pi[i]: + M1.append (pi[i - 1]) + else: + while M1[-1] < pi[i]: M1.pop() + M.append (M1[-1]) + # find direct components + #print S1[-1], pi, M + s = S1[-1] + while s > 0 and (pi[s] > pi[i] or M[s] < pi[i]): + S1.pop() + s2 = S1[-1] + Max[s2] = max(Max[s], Max[s2]) + mark1[s2] |= mark1[s] + s = s2 + pii, pis = pi[i], pi[s] + if si[i] and M[i] == M[s] and i - s == pii - pis: + if pii < pis: pii, pis = pis, pii + good = mark1[s] or pii==pis+1 + comp.append ((pis, pii, good)) + if not good: + badc += 1 + mark1[s] = False + # compute m[i] + if pi[i - 1] < pi[i]: + M2.append (pi[i - 1]) + else: + while M2[-1] > pi[i]: M2.pop() + m.append (M2[-1]) + # find reversed components + s = S2[-1] + while s > 0 and (pi[s] < pi[i] or m[s] > pi[i]): + S2.pop() + s2 = S2[-1] + Min[s2] = min(Min[s], Min[s2]) + mark2[s2] |= mark2[s] + s = s2 + pii, pis = pi[i], pi[s] + if not si[i] and m[i] == m[s] and i - s == pis - pii: + if pii < pis: pii, pis = pis, pii + good = mark2[s] or pii==pis+1 + comp.append ((pis, pii, good)) + if not good: + badc += 1 + mark2[s] = False + # update stacks and marks + if si[i]: S1.append (i) + else: S2.append (i) + Max.append(pi[i]) + Min[S2[-1]] = min(Min[S2[-1]], pi[i]) + Min.append(pi[i]) + Max[S1[-1]] = max(Max[S2[-1]], pi[i]) + mark1[S1[-1]] = not si[i]; + mark2[S2[-1]] = si[i]; + return badc, comp + + def _cover(self, comp): + n = m = len(comp) # n = #components; m = #all nodes (including square nodes - these have numbers n...m-1) + parent = 2*n*[-1] + node, first, last = [], [], [-1] + i = 0 + for (x, y, good) in comp: + if x < last[-1]: # component containing + while len(first) > 0 and first[-1] > x: + parent[node[-1]] = i + node.pop() + first.pop() + last.pop() + if x > last[-1]: # new circle node + node.append(i) + first.append(x) + last.append(y) + if x == last[-1]: # chain + last_node = node[-1] + if last_node < n: # last_node is a circle node - we create a new square node + node[-1] = parent[last_node] = parent[i] = m + m += 1 + else: # square node + parent[i] = last_node + last[-1] = y + i += 1 + #print 'parents:', parent[0:m] + + last_deg3 = -1 + leaves = 0 + j = n + deg = m*[0] # degree after cutting good branches + branch = m*[0] # number of bad components on the branch + for i in xrange(0,n): + if parent[i] > j: # process square node j first + branch[parent[j]] += branch[j] + if deg[j] > 0: + deg[parent[j]] += 1 + if deg[j] > 2: + last_deg3 = j + j += 1 + good = comp[i][2] + branch[parent[i]] += branch[i] + if not good or deg[i] > 0: # if it is bad or it has bad descendant + deg[parent[i]] += 1 + if not good: + branch[i] += 1 + branch[parent[i]] += branch[i] + if deg[i] == 0: # bad leaf + leaves += 1 + if deg[i] > 2: + last_deg3 = i + while j 2: + last_deg3 = j + if parent[j] != -1: + branch[parent[j]] += branch[j] + if deg[j] > 0: + deg[parent[j]] += 1 + j += 1 + #print 'degs:', deg[0:m] + #print 'branches:', branch[0:m] + + v = last_deg3 + if last_deg3 == -1: + #print 'no deg3 vertex' + return 2 # we assume that badc > 2 + else: + badc = 0 + v = parent[v] + while v != -1: + if v < n and not comp[v][2]: + badc += 1 + #print 'toto:', v + v = parent[v] + if badc > 0: + leaves += 1 + if badc == 1: + return leaves # short branch + + #print 'leaves: ', leaves + if leaves%2==0: + return leaves + + short_branch = False + for i in xrange(0,m-1): + if deg[parent[i]] >= 3 and branch[i] == 1: + short_branch = True + break + if short_branch: + return leaves + else: + return leaves+1 +# print deg + + def cappedString (self): + sg = ["1"] + sg.extend([('' if self.s[i] else '-') + str(self.g[i]+1) for i in xrange(1, self.n + 1)]) + sg.append(str(self.n+2)) + return " ".join(sg) + " $" + + def dist (self, B): + if self.n != B.n: + raise TypeError ("Computing distance of genomes with different number of markers.") +# print 'BUUG, rozne dlzky', self.n, B.n; print self; print B + inv, invs = (self.n + 2) * [0], (self.n + 2) * [True] + for i in xrange(1, self.n + 1): + inv[B.g[i]] = i + invs[B.g[i]] = B.s[i] + inv[self.n + 1] = self.n + 1 + pi, s = [], [] + for i in xrange(0, self.n + 2): + pi.append(inv[self.g[i]]) + s.append (invs[self.g[i]] == self.s[i]) + # print pi, s + d = DCJ(self.cappedString()).dist(DCJ(B.cappedString())) + #print 'dcj: ', d + badc, comp = self._find_components (pi, s) + #print 'bad components: ', badc + #print 'comp: ', comp + if badc <= 2: + return d + badc + #print 'cover:' + return d + self._cover(comp) + + def dist2 (self, B): + f = open ('input', 'w') + print >>f, '> g' + print >>f, self + print >>f, '> h' + print >>f, B + f.close() + os.system('rm output') + os.system('./grimm -f input -o output -Ld') + f = open ('output', 'r') + for l in f: + if l[:18] == 'Reversal Distance:': + f.close() + return int(l[19:]) + raise Error("grimm") + + def rev (self, i, j): # reverse interval from i to j (inclusive) + while i < j: + self.g[i], self.s[i], self.g[j], self.s[j] = self.g[j], not self.s[j], self.g[i], not self.s[i] + i, j = i + 1, j - 1 + if i == j: self.s[i] = not self.s[i] + + def neigh (self): + c = [] + for i in xrange (1, self.n + 1): + for j in xrange (i, self.n + 1): + t = deepcopy (self) + t.rev(i, j) + c.append(t) + return c + + def neigh_part (self, i1, i2, j1, j2): + """ + List all genomes within one DCJ operation from the genome. + + The list has size O(n^2). + """ + c = [] + i1 = max(i1, 1) + i2 = max(i2, 1) + i2 = min(i2, self.n+1) + j2 = min(j2, self.n+1) + for i in xrange (i1, i2): + for j in xrange (j1, j2): + t = deepcopy (self) + t.rev(i, j) + c.append(t) + return c + + def betterer_neigh (self, z): + g = DCJ(self.g) + z = [DCJ(g.__str__()) for g in z] + return [LinRev(h.__str__()) for h in g.betterer_neigh(z) if h.linear()] + + def rand_neigh (self): + i = randint (1, self.n) + j = randint (i, self.n) + g = deepcopy (self) + g.rev(i, j) + return g + + def median (self, g1, g2, g3): + f = open ('input', 'w') + print >>f, '> g1' + print >>f, g1.toString() + print >>f, '> g2' + print >>f, g2.toString() + print >>f, '> g3' + print >>f, g3.toString() + f.close() + print os.system("ms/siepel/inv_medians -f input -n " + str(len(g1)) + " -a -o output") + ret = [] + f = open ('input', 'r') + for l in f: + if l[0] != '>': + ret.append(LinRev(l+' $')) + f.close() + return ret diff --git a/MultiLinDCJ.py b/MultiLinDCJ.py new file mode 100644 index 0000000..78192d5 --- /dev/null +++ b/MultiLinDCJ.py @@ -0,0 +1,94 @@ +import os +import sys +from DCJ import DCJ +from random import choice, randint +from copy import deepcopy + +class MultiLinDCJ(DCJ): + def median_solver (self, g1, g2, g3, errors=0): + """ + Use ASMedian solver to find median of g1, g2, and g3. + """ + if not (g1.multilin() and g2.multilin() and g3.multilin()): + print g1 + print g2 + print g3 + raise TypeError("The input genomes are not multilinear") + + r = str(randint(0,99999)) + inf = "input" + r + outf = "output" + r + f = open (inf, 'w') + f.write('> Genome 1\n') + f.write(g1.__str__().replace("$", "$\n")) + f.write('> Genome 2\n') + f.write(g2.__str__().replace("$", "$\n")) + f.write('> Genome 3\n') + f.write(g3.__str__().replace("$", "$\n")) + f.close() + os.system('java -cp ms/ASM ASMedian '+inf+' >> '+outf) + f = open (outf, 'r') + s = '' + zac = False + for line in f: + if not zac: + if line[0] == '>': zac = True + continue + pos = line.find(':') + if pos == -1: break + if line[pos - 1] == '-': s += line[pos + 1:] + ' $ ' + else: s += line[pos + 1:] + ' @ ' + f.close() + os.system('rm '+inf) + os.system('rm '+outf) + print '.' + M = DCJ(s) + if self.n != M.n: + if errors == 3: + #TODO: najlepsie odchytit exception a vypisat historiu + #sys.exit() + return [g1, g2, g3] + else: + print "ASM ERROR" + return self.median_solver(g1, g2, g3, errors+1) + r = self.linearize(M) + return r + + def linearize (self, g): + """ + Returns a set of multilinear genomes close to the genome. + """ + L, C = g.numch() + if C == 0: return [MultiLinDCJ(g._g), ] + while C > 1: g = choice ([h for h in g.neigh() if h.numcirc() < C]); C = g.numcirc() + return [MultiLinDCJ(h._g) for h in g.neigh() if h.multilin()] + + def rand_neigh(self): + u = deepcopy (self) + i, j = randint(0, self.n-1), randint(0, self.n-1) + #i, j = min (i, self._g[i]), min (j, self._g[j]) + u.swap (i, self._g[i], j, self._g[j]) + while not u.multilin(): + u = deepcopy (self) + i, j = randint(0, self.n-1), randint(0, self.n-1) + #i, j = min (i, self._g[i]), min (j, self._g[j]) + u.swap (i, self._g[i], j, self._g[j]) + return u #MultiLinDCJ(self.rand_neigh2(3)._g) + + def neigh(self): + c = [] + for i in xrange(0, self.n): + if self._g[i] >= i: + for j in xrange(i + 1, self.n): + if self._g[j] >= j and j != self._g[i]: + u = deepcopy(self) + u.swap (i, self._g[i], j, self._g[j]) + if u.multilin(): + c.append(u) + if self._g[i] != i: + j = self._g[i] + u = deepcopy(self) + u.swap (i, j, i, j) + if u.multilin(): + c.append(u) + return c diff --git a/TestDCJ.py b/TestDCJ.py new file mode 100644 index 0000000..dc776d4 --- /dev/null +++ b/TestDCJ.py @@ -0,0 +1,112 @@ +import os +import sys +from DCJ import DCJ +from random import choice, randint +from copy import deepcopy + +class TestDCJ(DCJ): + def median2 (self, g1, g2, g3): + """ + Use ASMedian solver to find median of g1, g2, and g3. + """ + TestDCJ.nMedianCalls += 1 + if TestDCJ.nMedianCalls > 10000: + sys.exit() + + if not (g1.multilin() and g2.multilin() and g3.multilin()): + print g1 + print g2 + print g3 + raise TypeError("The input genomes are not multilinear") + + r = str(randint(0,99999)) + inf = "input" + r + outf = "output" + r + f = open (inf, 'w') + f.write('> Genome 1\n') + f.write(g1.__str__().replace("$", "$\n")) + f.write('> Genome 2\n') + f.write(g2.__str__().replace("$", "$\n")) + f.write('> Genome 3\n') + f.write(g3.__str__().replace("$", "$\n")) + f.close() + os.system('java -cp ms/ASM ASMedian '+inf+' >> '+outf) + f = open (outf, 'r') + s = '' + zac = False + for line in f: + if not zac: + if line[0] == '>': zac = True + continue + pos = line.find(':') + if pos == -1: break + if line[pos - 1] == '-': s += line[pos + 1:] + ' $ ' + else: s += line[pos + 1:] + ' @ ' + f.close() + os.system('rm '+inf) + os.system('rm '+outf) + print '.' + M = DCJ(s) + if self.n != M.n: print 'MS: ' + s; sys.exit() + r = self.linearize(M) + return r + + def linearize (self, g): + """ + Returns a set of multilinear genomes close to the genome. + """ + L, C = g.numch() + if C == 0: return [TestDCJ(g._g), ] + while C > 1: g = choice ([h for h in g.neigh() if h.numcirc() < C]); C = g.numcirc() + return [TestDCJ(h._g) for h in g.neigh() if h.multilin()] + + def median (self, g1, g2, g3): + g = self.median2(g1, g2, g3)[0] + t = g.median_score(g1, g2, g3) + S = [set(), set(), set(), set()] + a = [[g], [], [], []] + i = 0 + while i < 10000 and len(S[0]) <= 300: # len(a) > 0 and len(S) <= 300: + if len(a[0]) > 0: g = a[0].pop() + elif len(a[1]) > 0: g = a[1].pop() + elif len(a[2]) > 0: g = a[2].pop() + elif len(a[3]) > 0: g = a[3].pop() + else: break + for h in g.neigh(): + k = h.median_score(g1, g2, g3) - t + i += 1 + if i >= 10000: break + if k > 3: continue + if k < 0: k = 0 + if not h in S[k]: + S[k].add(h) + a[k].append(h) + if len(S[0]) > 300: break + #print len(S[0]), len(S[1]), len(S[2]), len(S[3]), i + l = list(S[0]) + if len(l) < 300: l.extend(list(S[1])) + if len(l) < 300: l.extend(list(S[2])) + if len(l) < 300: l.extend(list(S[3])) + if len(l) > 300: l = l[0:300] + return l + + def rand_neigh(self): + return TestDCJ(self.rand_neigh2(3)._g) + + def neigh(self): + c = [] + for i in xrange(0, self.n): + if self._g[i] >= i: + for j in xrange(i + 1, self.n): + if self._g[j] >= j and j != self._g[i]: + u = deepcopy(self) + u.swap (i, self._g[i], j, self._g[j]) + if u.multilin(): + c.append(u) + if self._g[i] != i: + j = self._g[i] + u = deepcopy(self) + u.swap (i, j, i, j) + if u.multilin(): + c.append(u) + return c diff --git a/TestDCJ2.py b/TestDCJ2.py new file mode 100644 index 0000000..3a05f4a --- /dev/null +++ b/TestDCJ2.py @@ -0,0 +1,82 @@ +import os +import sys +from DCJ import DCJ +from random import choice, randint +from copy import deepcopy + +class TestDCJ2(DCJ): + def median (self, g1, g2, g3): + """ + Use ASMedian solver to find median of g1, g2, and g3. + """ + TestDCJ2.nMedianCalls += 1 + if TestDCJ2.nMedianCalls > 100000: + sys.exit() + + if not (g1.multilin() and g2.multilin() and g3.multilin()): + print g1 + print g2 + print g3 + raise TypeError("The input genomes are not multilinear") + + r = str(randint(0,99999)) + inf = "input" + r + outf = "output" + r + f = open (inf, 'w') + f.write('> Genome 1\n') + f.write(g1.__str__().replace("$", "$\n")) + f.write('> Genome 2\n') + f.write(g2.__str__().replace("$", "$\n")) + f.write('> Genome 3\n') + f.write(g3.__str__().replace("$", "$\n")) + f.close() + os.system('java -cp ms/ASM ASMedian '+inf+' >> '+outf) + f = open (outf, 'r') + s = '' + zac = False + for line in f: + if not zac: + if line[0] == '>': zac = True + continue + pos = line.find(':') + if pos == -1: break + if line[pos - 1] == '-': s += line[pos + 1:] + ' $ ' + else: s += line[pos + 1:] + ' @ ' + f.close() + os.system('rm '+inf) + os.system('rm '+outf) + print '.' + M = DCJ(s) + if self.n != M.n: print 'MS: ' + s; sys.exit() + r = self.linearize(M) + return r + + def linearize (self, g): + """ + Returns a set of multilinear genomes close to the genome. + """ + L, C = g.numch() + if C == 0: return [TestDCJ2(g._g), ] + while C > 1: g = choice ([h for h in g.neigh() if h.numcirc() < C]); C = g.numcirc() + return [TestDCJ2(h._g) for h in g.neigh() if h.multilin()] + + def rand_neigh(self): + return TestDCJ2(self.rand_neigh2(3)._g) + + def neigh(self): + c = [] + for i in xrange(0, self.n): + if self._g[i] >= i: + for j in xrange(i + 1, self.n): + if self._g[j] >= j and j != self._g[i]: + u = deepcopy(self) + u.swap (i, self._g[i], j, self._g[j]) + if u.multilin(): + c.append(u) + if self._g[i] != i: + j = self._g[i] + u = deepcopy(self) + u.swap (i, j, i, j) + if u.multilin(): + c.append(u) + return c diff --git a/Tree.py b/Tree.py new file mode 100644 index 0000000..d8a65ac --- /dev/null +++ b/Tree.py @@ -0,0 +1,64 @@ +import re + +class Tree: + # 0...n-1 su listy, n...N-1 su vnutorne vrcholy + #n = N = 0 + #name, left, right, parent, level = [], [], [], [], [] + #leaf = {} + + def __init__(self, wdir): + f = open ('data/' + wdir + '/T', 'r') + s = f.readline () + f.close () + self._set_tree(s) + self._levels (self.N - 1, 0) + #print self.n, self.N + + def _set_tree(self, s): + #print s + s = re.sub(r':[0-9.]*', '', s) + s = s.translate (None, "(.;\n").replace(",", " ").replace(")", " * ").split() + #print s + self.n = 0 + self.name = [] + self.left, self.right, self.parent = [], [], [] + self.leaf = {} + namel, namer = [], [] + for i in xrange(0, len(s)): + if s[i] == '*': + s[i] = -1 + else: + self.name.append(s[i]) + namel.append(s[i]) + namer.append(s[i]) + self.left.append(self.n) + self.right.append(self.n) + self.parent.append(self.n) + self.leaf[s[i]] = self.n + s[i] = self.n + self.n += 1 + self.N = self.n + z = [] + for i in xrange(0, len(s)): + if s[i] >= 0: + z.append(s[i]) + else: + r, l = z.pop(), z.pop() + namel.append(namel[l]) + namer.append(namer[r]) + self.name.append(namel[self.N] + '-' + namer[self.N]) + self.left.append(l) + self.right.append(r) + self.parent.append(self.N) + self.parent[l] = self.parent[r] = self.N + z.append(self.N) + self.N += 1 + self.level = self.N * [0] + # parent[left[N-1]] = right[N-1] + # parent[right[N-1]] = left[N-1] + + def _levels (self, v, l): + self.level[v] = l + if v < self.n: return + self._levels (self.left[v], l + 1) + self._levels (self.right[v], l + 1) diff --git a/argparse.py b/argparse.py new file mode 100644 index 0000000..0658472 --- /dev/null +++ b/argparse.py @@ -0,0 +1,2391 @@ +# Author: Steven J. Bethard . + +"""Command-line parsing library + +This module is an optparse-inspired command-line parsing library that: + + - handles both optional and positional arguments + - produces highly informative usage messages + - supports parsers that dispatch to sub-parsers + +The following is a simple usage example that sums integers from the +command-line and writes the result to a file:: + + parser = argparse.ArgumentParser( + description='sum the integers at the command line') + parser.add_argument( + 'integers', metavar='int', nargs='+', type=int, + help='an integer to be summed') + parser.add_argument( + '--log', default=sys.stdout, type=argparse.FileType('w'), + help='the file where the sum should be written') + args = parser.parse_args() + args.log.write('%s' % sum(args.integers)) + args.log.close() + +The module contains the following public classes: + + - ArgumentParser -- The main entry point for command-line parsing. As the + example above shows, the add_argument() method is used to populate + the parser with actions for optional and positional arguments. Then + the parse_args() method is invoked to convert the args at the + command-line into an object with attributes. + + - ArgumentError -- The exception raised by ArgumentParser objects when + there are errors with the parser's actions. Errors raised while + parsing the command-line are caught by ArgumentParser and emitted + as command-line messages. + + - FileType -- A factory for defining types of files to be created. As the + example above shows, instances of FileType are typically passed as + the type= argument of add_argument() calls. + + - Action -- The base class for parser actions. Typically actions are + selected by passing strings like 'store_true' or 'append_const' to + the action= argument of add_argument(). However, for greater + customization of ArgumentParser actions, subclasses of Action may + be defined and passed as the action= argument. + + - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, + ArgumentDefaultsHelpFormatter -- Formatter classes which + may be passed as the formatter_class= argument to the + ArgumentParser constructor. HelpFormatter is the default, + RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser + not to change the formatting for help text, and + ArgumentDefaultsHelpFormatter adds information about argument defaults + to the help. + +All other classes in this module are considered implementation details. +(Also note that HelpFormatter and RawDescriptionHelpFormatter are only +considered public as object names -- the API of the formatter objects is +still considered an implementation detail.) +""" + +__version__ = '1.1' +__all__ = [ + 'ArgumentParser', + 'ArgumentError', + 'ArgumentTypeError', + 'FileType', + 'HelpFormatter', + 'ArgumentDefaultsHelpFormatter', + 'RawDescriptionHelpFormatter', + 'RawTextHelpFormatter', + 'MetavarTypeHelpFormatter', + 'Namespace', + 'Action', + 'ONE_OR_MORE', + 'OPTIONAL', + 'PARSER', + 'REMAINDER', + 'SUPPRESS', + 'ZERO_OR_MORE', +] + + +import collections as _collections +import copy as _copy +import os as _os +import re as _re +import sys as _sys +import textwrap as _textwrap + +from gettext import gettext as _, ngettext + + +def _callable(obj): + return hasattr(obj, '__call__') or hasattr(obj, '__bases__') + + +SUPPRESS = '==SUPPRESS==' + +OPTIONAL = '?' +ZERO_OR_MORE = '*' +ONE_OR_MORE = '+' +PARSER = 'A...' +REMAINDER = '...' +_UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' + +# ============================= +# Utility functions and classes +# ============================= + +class _AttributeHolder(object): + """Abstract base class that provides __repr__. + + The __repr__ method returns a string in the format:: + ClassName(attr=name, attr=name, ...) + The attributes are determined either by a class-level attribute, + '_kwarg_names', or by inspecting the instance __dict__. + """ + + def __repr__(self): + type_name = type(self).__name__ + arg_strings = [] + for arg in self._get_args(): + arg_strings.append(repr(arg)) + for name, value in self._get_kwargs(): + arg_strings.append('%s=%r' % (name, value)) + return '%s(%s)' % (type_name, ', '.join(arg_strings)) + + def _get_kwargs(self): + return sorted(self.__dict__.items()) + + def _get_args(self): + return [] + + +def _ensure_value(namespace, name, value): + if getattr(namespace, name, None) is None: + setattr(namespace, name, value) + return getattr(namespace, name) + + +# =============== +# Formatting Help +# =============== + +class HelpFormatter(object): + """Formatter for generating usage messages and argument help strings. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def __init__(self, + prog, + indent_increment=2, + max_help_position=24, + width=None): + + # default setting for width + if width is None: + try: + width = int(_os.environ['COLUMNS']) + except (KeyError, ValueError): + width = 80 + width -= 2 + + self._prog = prog + self._indent_increment = indent_increment + self._max_help_position = max_help_position + self._width = width + + self._current_indent = 0 + self._level = 0 + self._action_max_length = 0 + + self._root_section = self._Section(self, None) + self._current_section = self._root_section + + self._whitespace_matcher = _re.compile(r'\s+') + self._long_break_matcher = _re.compile(r'\n\n\n+') + + # =============================== + # Section and indentation methods + # =============================== + def _indent(self): + self._current_indent += self._indent_increment + self._level += 1 + + def _dedent(self): + self._current_indent -= self._indent_increment + assert self._current_indent >= 0, 'Indent decreased below 0.' + self._level -= 1 + + class _Section(object): + + def __init__(self, formatter, parent, heading=None): + self.formatter = formatter + self.parent = parent + self.heading = heading + self.items = [] + + def format_help(self): + # format the indented section + if self.parent is not None: + self.formatter._indent() + join = self.formatter._join_parts + for func, args in self.items: + func(*args) + item_help = join([func(*args) for func, args in self.items]) + if self.parent is not None: + self.formatter._dedent() + + # return nothing if the section was empty + if not item_help: + return '' + + # add the heading if the section was non-empty + if self.heading is not SUPPRESS and self.heading is not None: + current_indent = self.formatter._current_indent + heading = '%*s%s:\n' % (current_indent, '', self.heading) + else: + heading = '' + + # join the section-initial newline, the heading and the help + return join(['\n', heading, item_help, '\n']) + + def _add_item(self, func, args): + self._current_section.items.append((func, args)) + + # ======================== + # Message building methods + # ======================== + def start_section(self, heading): + self._indent() + section = self._Section(self, self._current_section, heading) + self._add_item(section.format_help, []) + self._current_section = section + + def end_section(self): + self._current_section = self._current_section.parent + self._dedent() + + def add_text(self, text): + if text is not SUPPRESS and text is not None: + self._add_item(self._format_text, [text]) + + def add_usage(self, usage, actions, groups, prefix=None): + if usage is not SUPPRESS: + args = usage, actions, groups, prefix + self._add_item(self._format_usage, args) + + def add_argument(self, action): + if action.help is not SUPPRESS: + + # find all invocations + get_invocation = self._format_action_invocation + invocations = [get_invocation(action)] + for subaction in self._iter_indented_subactions(action): + invocations.append(get_invocation(subaction)) + + # update the maximum item length + invocation_length = max([len(s) for s in invocations]) + action_length = invocation_length + self._current_indent + self._action_max_length = max(self._action_max_length, + action_length) + + # add the item to the list + self._add_item(self._format_action, [action]) + + def add_arguments(self, actions): + for action in actions: + self.add_argument(action) + + # ======================= + # Help-formatting methods + # ======================= + def format_help(self): + help = self._root_section.format_help() + if help: + help = self._long_break_matcher.sub('\n\n', help) + help = help.strip('\n') + '\n' + return help + + def _join_parts(self, part_strings): + return ''.join([part + for part in part_strings + if part and part is not SUPPRESS]) + + def _format_usage(self, usage, actions, groups, prefix): + if prefix is None: + prefix = _('usage: ') + + # if usage is specified, use that + if usage is not None: + usage = usage % dict(prog=self._prog) + + # if no optionals or positionals are available, usage is just prog + elif usage is None and not actions: + usage = '%(prog)s' % dict(prog=self._prog) + + # if optionals and positionals are available, calculate usage + elif usage is None: + prog = '%(prog)s' % dict(prog=self._prog) + + # split optionals from positionals + optionals = [] + positionals = [] + for action in actions: + if action.option_strings: + optionals.append(action) + else: + positionals.append(action) + + # build full usage string + format = self._format_actions_usage + action_usage = format(optionals + positionals, groups) + usage = ' '.join([s for s in [prog, action_usage] if s]) + + # wrap the usage parts if it's too long + text_width = self._width - self._current_indent + if len(prefix) + len(usage) > text_width: + + # break usage into wrappable parts + part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' + opt_usage = format(optionals, groups) + pos_usage = format(positionals, groups) + opt_parts = _re.findall(part_regexp, opt_usage) + pos_parts = _re.findall(part_regexp, pos_usage) + assert ' '.join(opt_parts) == opt_usage + assert ' '.join(pos_parts) == pos_usage + + # helper for wrapping lines + def get_lines(parts, indent, prefix=None): + lines = [] + line = [] + if prefix is not None: + line_len = len(prefix) - 1 + else: + line_len = len(indent) - 1 + for part in parts: + if line_len + 1 + len(part) > text_width: + lines.append(indent + ' '.join(line)) + line = [] + line_len = len(indent) - 1 + line.append(part) + line_len += len(part) + 1 + if line: + lines.append(indent + ' '.join(line)) + if prefix is not None: + lines[0] = lines[0][len(indent):] + return lines + + # if prog is short, follow it with optionals or positionals + if len(prefix) + len(prog) <= 0.75 * text_width: + indent = ' ' * (len(prefix) + len(prog) + 1) + if opt_parts: + lines = get_lines([prog] + opt_parts, indent, prefix) + lines.extend(get_lines(pos_parts, indent)) + elif pos_parts: + lines = get_lines([prog] + pos_parts, indent, prefix) + else: + lines = [prog] + + # if prog is long, put it on its own line + else: + indent = ' ' * len(prefix) + parts = opt_parts + pos_parts + lines = get_lines(parts, indent) + if len(lines) > 1: + lines = [] + lines.extend(get_lines(opt_parts, indent)) + lines.extend(get_lines(pos_parts, indent)) + lines = [prog] + lines + + # join lines into usage + usage = '\n'.join(lines) + + # prefix with 'usage:' + return '%s%s\n\n' % (prefix, usage) + + def _format_actions_usage(self, actions, groups): + # find group indices and identify actions in groups + group_actions = set() + inserts = {} + for group in groups: + try: + start = actions.index(group._group_actions[0]) + except ValueError: + continue + else: + end = start + len(group._group_actions) + if actions[start:end] == group._group_actions: + for action in group._group_actions: + group_actions.add(action) + if not group.required: + if start in inserts: + inserts[start] += ' [' + else: + inserts[start] = '[' + inserts[end] = ']' + else: + if start in inserts: + inserts[start] += ' (' + else: + inserts[start] = '(' + inserts[end] = ')' + for i in range(start + 1, end): + inserts[i] = '|' + + # collect all actions format strings + parts = [] + for i, action in enumerate(actions): + + # suppressed arguments are marked with None + # remove | separators for suppressed arguments + if action.help is SUPPRESS: + parts.append(None) + if inserts.get(i) == '|': + inserts.pop(i) + elif inserts.get(i + 1) == '|': + inserts.pop(i + 1) + + # produce all arg strings + elif not action.option_strings: + default = self._get_default_metavar_for_positional(action) + part = self._format_args(action, default) + + # if it's in a group, strip the outer [] + if action in group_actions: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + + # add the action string to the list + parts.append(part) + + # produce the first way to invoke the option in brackets + else: + option_string = action.option_strings[0] + + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = '%s' % option_string + + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + part = '%s %s' % (option_string, args_string) + + # make it look optional if it's not required or in a group + if not action.required and action not in group_actions: + part = '[%s]' % part + + # add the action string to the list + parts.append(part) + + # insert things at the necessary indices + for i in sorted(inserts, reverse=True): + parts[i:i] = [inserts[i]] + + # join all the action items with spaces + text = ' '.join([item for item in parts if item is not None]) + + # clean up separators for mutually exclusive groups + open = r'[\[(]' + close = r'[\])]' + text = _re.sub(r'(%s) ' % open, r'\1', text) + text = _re.sub(r' (%s)' % close, r'\1', text) + text = _re.sub(r'%s *%s' % (open, close), r'', text) + text = _re.sub(r'\(([^|]*)\)', r'\1', text) + text = text.strip() + + # return the text + return text + + def _format_text(self, text): + if '%(prog)' in text: + text = text % dict(prog=self._prog) + text_width = self._width - self._current_indent + indent = ' ' * self._current_indent + return self._fill_text(text, text_width, indent) + '\n\n' + + def _format_action(self, action): + # determine the required width and the entry label + help_position = min(self._action_max_length + 2, + self._max_help_position) + help_width = self._width - help_position + action_width = help_position - self._current_indent - 2 + action_header = self._format_action_invocation(action) + + # ho nelp; start on same line and add a final newline + if not action.help: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + + # short action name; start on the same line and pad two spaces + elif len(action_header) <= action_width: + tup = self._current_indent, '', action_width, action_header + action_header = '%*s%-*s ' % tup + indent_first = 0 + + # long action name; start on the next line + else: + tup = self._current_indent, '', action_header + action_header = '%*s%s\n' % tup + indent_first = help_position + + # collect the pieces of the action help + parts = [action_header] + + # if there was help for the action, add lines of help text + if action.help: + help_text = self._expand_help(action) + help_lines = self._split_lines(help_text, help_width) + parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) + for line in help_lines[1:]: + parts.append('%*s%s\n' % (help_position, '', line)) + + # or add a newline if the description doesn't end with one + elif not action_header.endswith('\n'): + parts.append('\n') + + # if there are any sub-actions, add their help as well + for subaction in self._iter_indented_subactions(action): + parts.append(self._format_action(subaction)) + + # return a single string + return self._join_parts(parts) + + def _format_action_invocation(self, action): + if not action.option_strings: + default = self._get_default_metavar_for_positional(action) + metavar, = self._metavar_formatter(action, default)(1) + return metavar + + else: + parts = [] + + # if the Optional doesn't take a value, format is: + # -s, --long + if action.nargs == 0: + parts.extend(action.option_strings) + + # if the Optional takes a value, format is: + # -s ARGS, --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + for option_string in action.option_strings: + parts.append('%s %s' % (option_string, args_string)) + + return ', '.join(parts) + + def _metavar_formatter(self, action, default_metavar): + if action.metavar is not None: + result = action.metavar + elif action.choices is not None: + choice_strs = [str(choice) for choice in action.choices] + result = '{%s}' % ','.join(choice_strs) + else: + result = default_metavar + + def format(tuple_size): + if isinstance(result, tuple): + return result + else: + return (result, ) * tuple_size + return format + + def _format_args(self, action, default_metavar): + get_metavar = self._metavar_formatter(action, default_metavar) + if action.nargs is None: + result = '%s' % get_metavar(1) + elif action.nargs == OPTIONAL: + result = '[%s]' % get_metavar(1) + elif action.nargs == ZERO_OR_MORE: + result = '[%s [%s ...]]' % get_metavar(2) + elif action.nargs == ONE_OR_MORE: + result = '%s [%s ...]' % get_metavar(2) + elif action.nargs == REMAINDER: + result = '...' + elif action.nargs == PARSER: + result = '%s ...' % get_metavar(1) + else: + formats = ['%s' for _ in range(action.nargs)] + result = ' '.join(formats) % get_metavar(action.nargs) + return result + + def _expand_help(self, action): + params = dict(vars(action), prog=self._prog) + for name in list(params): + if params[name] is SUPPRESS: + del params[name] + for name in list(params): + if hasattr(params[name], '__name__'): + params[name] = params[name].__name__ + if params.get('choices') is not None: + choices_str = ', '.join([str(c) for c in params['choices']]) + params['choices'] = choices_str + return self._get_help_string(action) % params + + def _iter_indented_subactions(self, action): + try: + get_subactions = action._get_subactions + except AttributeError: + pass + else: + self._indent() + for subaction in get_subactions(): + yield subaction + self._dedent() + + def _split_lines(self, text, width): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.wrap(text, width) + + def _fill_text(self, text, width, indent): + text = self._whitespace_matcher.sub(' ', text).strip() + return _textwrap.fill(text, width, initial_indent=indent, + subsequent_indent=indent) + + def _get_help_string(self, action): + return action.help + + def _get_default_metavar_for_optional(self, action): + return action.dest.upper() + + def _get_default_metavar_for_positional(self, action): + return action.dest + + +class RawDescriptionHelpFormatter(HelpFormatter): + """Help message formatter which retains any formatting in descriptions. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _fill_text(self, text, width, indent): + return ''.join([indent + line for line in text.splitlines(True)]) + + +class RawTextHelpFormatter(RawDescriptionHelpFormatter): + """Help message formatter which retains formatting of all help text. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _split_lines(self, text, width): + return text.splitlines() + + +class ArgumentDefaultsHelpFormatter(HelpFormatter): + """Help message formatter which adds default values to argument help. + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _get_help_string(self, action): + help = action.help + if '%(default)' not in action.help: + if action.default is not SUPPRESS: + defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] + if action.option_strings or action.nargs in defaulting_nargs: + help += ' (default: %(default)s)' + return help + + +class MetavarTypeHelpFormatter(HelpFormatter): + """Help message formatter which uses the argument 'type' as the default + metavar value (instead of the argument 'dest') + + Only the name of this class is considered a public API. All the methods + provided by the class are considered an implementation detail. + """ + + def _get_default_metavar_for_optional(self, action): + return action.type.__name__ + + def _get_default_metavar_for_positional(self, action): + return action.type.__name__ + + + +# ===================== +# Options and Arguments +# ===================== + +def _get_action_name(argument): + if argument is None: + return None + elif argument.option_strings: + return '/'.join(argument.option_strings) + elif argument.metavar not in (None, SUPPRESS): + return argument.metavar + elif argument.dest not in (None, SUPPRESS): + return argument.dest + else: + return None + + +class ArgumentError(Exception): + """An error from creating or using an argument (optional or positional). + + The string value of this exception is the message, augmented with + information about the argument that caused it. + """ + + def __init__(self, argument, message): + self.argument_name = _get_action_name(argument) + self.message = message + + def __str__(self): + if self.argument_name is None: + format = '%(message)s' + else: + format = 'argument %(argument_name)s: %(message)s' + return format % dict(message=self.message, + argument_name=self.argument_name) + + +class ArgumentTypeError(Exception): + """An error from trying to convert a command line string to a type.""" + pass + + +# ============== +# Action classes +# ============== + +class Action(_AttributeHolder): + """Information about how to convert command line strings to Python objects. + + Action objects are used by an ArgumentParser to represent the information + needed to parse a single argument from one or more strings from the + command line. The keyword arguments to the Action constructor are also + all attributes of Action instances. + + Keyword Arguments: + + - option_strings -- A list of command-line option strings which + should be associated with this action. + + - dest -- The name of the attribute to hold the created object(s) + + - nargs -- The number of command-line arguments that should be + consumed. By default, one argument will be consumed and a single + value will be produced. Other values include: + - N (an integer) consumes N arguments (and produces a list) + - '?' consumes zero or one arguments + - '*' consumes zero or more arguments (and produces a list) + - '+' consumes one or more arguments (and produces a list) + Note that the difference between the default and nargs=1 is that + with the default, a single value will be produced, while with + nargs=1, a list containing a single value will be produced. + + - const -- The value to be produced if the option is specified and the + option uses an action that takes no values. + + - default -- The value to be produced if the option is not specified. + + - type -- The type which the command-line arguments should be converted + to, should be one of 'string', 'int', 'float', 'complex' or a + callable object that accepts a single string argument. If None, + 'string' is assumed. + + - choices -- A container of values that should be allowed. If not None, + after a command-line argument has been converted to the appropriate + type, an exception will be raised if it is not a member of this + collection. + + - required -- True if the action must always be specified at the + command line. This is only meaningful for optional command-line + arguments. + + - help -- The help string describing the argument. + + - metavar -- The name to be used for the option's argument with the + help string. If None, the 'dest' value will be used as the name. + """ + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + self.option_strings = option_strings + self.dest = dest + self.nargs = nargs + self.const = const + self.default = default + self.type = type + self.choices = choices + self.required = required + self.help = help + self.metavar = metavar + + def _get_kwargs(self): + names = [ + 'option_strings', + 'dest', + 'nargs', + 'const', + 'default', + 'type', + 'choices', + 'help', + 'metavar', + ] + return [(name, getattr(self, name)) for name in names] + + def __call__(self, parser, namespace, values, option_string=None): + raise NotImplementedError(_('.__call__() not defined')) + + +class _StoreAction(Action): + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs for store actions must be > 0; if you ' + 'have nothing to store, actions such as store ' + 'true or store const may be more appropriate') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_StoreAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, values) + + +class _StoreConstAction(Action): + + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_StoreConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, self.const) + + +class _StoreTrueAction(_StoreConstAction): + + def __init__(self, + option_strings, + dest, + default=False, + required=False, + help=None): + super(_StoreTrueAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=True, + default=default, + required=required, + help=help) + + +class _StoreFalseAction(_StoreConstAction): + + def __init__(self, + option_strings, + dest, + default=True, + required=False, + help=None): + super(_StoreFalseAction, self).__init__( + option_strings=option_strings, + dest=dest, + const=False, + default=default, + required=required, + help=help) + + +class _AppendAction(Action): + + def __init__(self, + option_strings, + dest, + nargs=None, + const=None, + default=None, + type=None, + choices=None, + required=False, + help=None, + metavar=None): + if nargs == 0: + raise ValueError('nargs for append actions must be > 0; if arg ' + 'strings are not supplying the value to append, ' + 'the append const action may be more appropriate') + if const is not None and nargs != OPTIONAL: + raise ValueError('nargs must be %r to supply const' % OPTIONAL) + super(_AppendAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=nargs, + const=const, + default=default, + type=type, + choices=choices, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + items = _copy.copy(_ensure_value(namespace, self.dest, [])) + items.append(values) + setattr(namespace, self.dest, items) + + +class _AppendConstAction(Action): + + def __init__(self, + option_strings, + dest, + const, + default=None, + required=False, + help=None, + metavar=None): + super(_AppendConstAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + const=const, + default=default, + required=required, + help=help, + metavar=metavar) + + def __call__(self, parser, namespace, values, option_string=None): + items = _copy.copy(_ensure_value(namespace, self.dest, [])) + items.append(self.const) + setattr(namespace, self.dest, items) + + +class _CountAction(Action): + + def __init__(self, + option_strings, + dest, + default=None, + required=False, + help=None): + super(_CountAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=0, + default=default, + required=required, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + new_count = _ensure_value(namespace, self.dest, 0) + 1 + setattr(namespace, self.dest, new_count) + + +class _HelpAction(Action): + + def __init__(self, + option_strings, + dest=SUPPRESS, + default=SUPPRESS, + help=None): + super(_HelpAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + + def __call__(self, parser, namespace, values, option_string=None): + parser.print_help() + parser.exit() + + +class _VersionAction(Action): + + def __init__(self, + option_strings, + version=None, + dest=SUPPRESS, + default=SUPPRESS, + help="show program's version number and exit"): + super(_VersionAction, self).__init__( + option_strings=option_strings, + dest=dest, + default=default, + nargs=0, + help=help) + self.version = version + + def __call__(self, parser, namespace, values, option_string=None): + version = self.version + if version is None: + version = parser.version + formatter = parser._get_formatter() + formatter.add_text(version) + parser.exit(message=formatter.format_help()) + + +class _SubParsersAction(Action): + + class _ChoicesPseudoAction(Action): + + def __init__(self, name, aliases, help): + metavar = dest = name + if aliases: + metavar += ' (%s)' % ', '.join(aliases) + sup = super(_SubParsersAction._ChoicesPseudoAction, self) + sup.__init__(option_strings=[], dest=dest, help=help, + metavar=metavar) + + def __init__(self, + option_strings, + prog, + parser_class, + dest=SUPPRESS, + help=None, + metavar=None): + + self._prog_prefix = prog + self._parser_class = parser_class + self._name_parser_map = _collections.OrderedDict() + self._choices_actions = [] + + super(_SubParsersAction, self).__init__( + option_strings=option_strings, + dest=dest, + nargs=PARSER, + choices=self._name_parser_map, + help=help, + metavar=metavar) + + def add_parser(self, name, **kwargs): + # set prog from the existing prefix + if kwargs.get('prog') is None: + kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + + aliases = kwargs.pop('aliases', ()) + + # create a pseudo-action to hold the choice help + if 'help' in kwargs: + help = kwargs.pop('help') + choice_action = self._ChoicesPseudoAction(name, aliases, help) + self._choices_actions.append(choice_action) + + # create the parser and add it to the map + parser = self._parser_class(**kwargs) + self._name_parser_map[name] = parser + + # make parser available under aliases also + for alias in aliases: + self._name_parser_map[alias] = parser + + return parser + + def _get_subactions(self): + return self._choices_actions + + def __call__(self, parser, namespace, values, option_string=None): + parser_name = values[0] + arg_strings = values[1:] + + # set the parser name if requested + if self.dest is not SUPPRESS: + setattr(namespace, self.dest, parser_name) + + # select the parser + try: + parser = self._name_parser_map[parser_name] + except KeyError: + args = {'parser_name': parser_name, + 'choices': ', '.join(self._name_parser_map)} + msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args + raise ArgumentError(self, msg) + + # parse all the remaining options into the namespace + # store any unrecognized options on the object, so that the top + # level parser can decide what to do with them + namespace, arg_strings = parser.parse_known_args(arg_strings, namespace) + if arg_strings: + vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) + getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) + + +# ============== +# Type classes +# ============== + +class FileType(object): + """Factory for creating file object types + + Instances of FileType are typically passed as type= arguments to the + ArgumentParser add_argument() method. + + Keyword Arguments: + - mode -- A string indicating how the file is to be opened. Accepts the + same values as the builtin open() function. + - bufsize -- The file's desired buffer size. Accepts the same values as + the builtin open() function. + """ + + def __init__(self, mode='r', bufsize=-1): + self._mode = mode + self._bufsize = bufsize + + def __call__(self, string): + # the special argument "-" means sys.std{in,out} + if string == '-': + if 'r' in self._mode: + return _sys.stdin + elif 'w' in self._mode: + return _sys.stdout + else: + msg = _('argument "-" with mode %r') % self._mode + raise ValueError(msg) + + # all other arguments are used as file names + try: + return open(string, self._mode, self._bufsize) + except IOError as e: + message = _("can't open '%s': %s") + raise ArgumentTypeError(message % (string, e)) + + def __repr__(self): + args = self._mode, self._bufsize + args_str = ', '.join(repr(arg) for arg in args if arg != -1) + return '%s(%s)' % (type(self).__name__, args_str) + +# =========================== +# Optional and Positional Parsing +# =========================== + +class Namespace(_AttributeHolder): + """Simple object for storing attributes. + + Implements equality by attribute names and values, and provides a simple + string representation. + """ + + def __init__(self, **kwargs): + for name in kwargs: + setattr(self, name, kwargs[name]) + + def __eq__(self, other): + return vars(self) == vars(other) + + def __ne__(self, other): + return not (self == other) + + def __contains__(self, key): + return key in self.__dict__ + + +class _ActionsContainer(object): + + def __init__(self, + description, + prefix_chars, + argument_default, + conflict_handler): + super(_ActionsContainer, self).__init__() + + self.description = description + self.argument_default = argument_default + self.prefix_chars = prefix_chars + self.conflict_handler = conflict_handler + + # set up registries + self._registries = {} + + # register actions + self.register('action', None, _StoreAction) + self.register('action', 'store', _StoreAction) + self.register('action', 'store_const', _StoreConstAction) + self.register('action', 'store_true', _StoreTrueAction) + self.register('action', 'store_false', _StoreFalseAction) + self.register('action', 'append', _AppendAction) + self.register('action', 'append_const', _AppendConstAction) + self.register('action', 'count', _CountAction) + self.register('action', 'help', _HelpAction) + self.register('action', 'version', _VersionAction) + self.register('action', 'parsers', _SubParsersAction) + + # raise an exception if the conflict handler is invalid + self._get_handler() + + # action storage + self._actions = [] + self._option_string_actions = {} + + # groups + self._action_groups = [] + self._mutually_exclusive_groups = [] + + # defaults storage + self._defaults = {} + + # determines whether an "option" looks like a negative number + self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + + # whether or not there are any optionals that look like negative + # numbers -- uses a list so it can be shared and edited + self._has_negative_number_optionals = [] + + # ==================== + # Registration methods + # ==================== + def register(self, registry_name, value, object): + registry = self._registries.setdefault(registry_name, {}) + registry[value] = object + + def _registry_get(self, registry_name, value, default=None): + return self._registries[registry_name].get(value, default) + + # ================================== + # Namespace default accessor methods + # ================================== + def set_defaults(self, **kwargs): + self._defaults.update(kwargs) + + # if these defaults match any existing arguments, replace + # the previous default on the object with the new one + for action in self._actions: + if action.dest in kwargs: + action.default = kwargs[action.dest] + + def get_default(self, dest): + for action in self._actions: + if action.dest == dest and action.default is not None: + return action.default + return self._defaults.get(dest, None) + + + # ======================= + # Adding argument actions + # ======================= + def add_argument(self, *args, **kwargs): + """ + add_argument(dest, ..., name=value, ...) + add_argument(option_string, option_string, ..., name=value, ...) + """ + + # if no positional args are supplied or only one is supplied and + # it doesn't look like an option string, parse a positional + # argument + chars = self.prefix_chars + if not args or len(args) == 1 and args[0][0] not in chars: + if args and 'dest' in kwargs: + raise ValueError('dest supplied twice for positional argument') + kwargs = self._get_positional_kwargs(*args, **kwargs) + + # otherwise, we're adding an optional argument + else: + kwargs = self._get_optional_kwargs(*args, **kwargs) + + # if no default was supplied, use the parser-level default + if 'default' not in kwargs: + dest = kwargs['dest'] + if dest in self._defaults: + kwargs['default'] = self._defaults[dest] + elif self.argument_default is not None: + kwargs['default'] = self.argument_default + + # create the action object, and add it to the parser + action_class = self._pop_action_class(kwargs) + if not _callable(action_class): + raise ValueError('unknown action "%s"' % (action_class,)) + action = action_class(**kwargs) + + # raise an error if the action type is not callable + type_func = self._registry_get('type', action.type, action.type) + if not _callable(type_func): + raise ValueError('%r is not callable' % (type_func,)) + + # raise an error if the metavar does not match the type + if hasattr(self, "_get_formatter"): + try: + self._get_formatter()._format_args(action, None) + except TypeError: + raise ValueError("length of metavar tuple does not match nargs") + + return self._add_action(action) + + def add_argument_group(self, *args, **kwargs): + group = _ArgumentGroup(self, *args, **kwargs) + self._action_groups.append(group) + return group + + def add_mutually_exclusive_group(self, **kwargs): + group = _MutuallyExclusiveGroup(self, **kwargs) + self._mutually_exclusive_groups.append(group) + return group + + def _add_action(self, action): + # resolve any conflicts + self._check_conflict(action) + + # add to actions list + self._actions.append(action) + action.container = self + + # index the action by any option strings it has + for option_string in action.option_strings: + self._option_string_actions[option_string] = action + + # set the flag if any option strings look like negative numbers + for option_string in action.option_strings: + if self._negative_number_matcher.match(option_string): + if not self._has_negative_number_optionals: + self._has_negative_number_optionals.append(True) + + # return the created action + return action + + def _remove_action(self, action): + self._actions.remove(action) + + def _add_container_actions(self, container): + # collect groups by titles + title_group_map = {} + for group in self._action_groups: + if group.title in title_group_map: + msg = _('cannot merge actions - two groups are named %r') + raise ValueError(msg % (group.title)) + title_group_map[group.title] = group + + # map each action to its group + group_map = {} + for group in container._action_groups: + + # if a group with the title exists, use that, otherwise + # create a new group matching the container's group + if group.title not in title_group_map: + title_group_map[group.title] = self.add_argument_group( + title=group.title, + description=group.description, + conflict_handler=group.conflict_handler) + + # map the actions to their new group + for action in group._group_actions: + group_map[action] = title_group_map[group.title] + + # add container's mutually exclusive groups + # NOTE: if add_mutually_exclusive_group ever gains title= and + # description= then this code will need to be expanded as above + for group in container._mutually_exclusive_groups: + mutex_group = self.add_mutually_exclusive_group( + required=group.required) + + # map the actions to their new mutex group + for action in group._group_actions: + group_map[action] = mutex_group + + # add all actions to this container or their group + for action in container._actions: + group_map.get(action, self)._add_action(action) + + def _get_positional_kwargs(self, dest, **kwargs): + # make sure required is not specified + if 'required' in kwargs: + msg = _("'required' is an invalid argument for positionals") + raise TypeError(msg) + + # mark positional arguments as required if at least one is + # always required + if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: + kwargs['required'] = True + if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: + kwargs['required'] = True + + # return the keyword arguments with no option strings + return dict(kwargs, dest=dest, option_strings=[]) + + def _get_optional_kwargs(self, *args, **kwargs): + # determine short and long option strings + option_strings = [] + long_option_strings = [] + for option_string in args: + # error on strings that don't start with an appropriate prefix + if not option_string[0] in self.prefix_chars: + args = {'option': option_string, + 'prefix_chars': self.prefix_chars} + msg = _('invalid option string %(option)r: ' + 'must start with a character %(prefix_chars)r') + raise ValueError(msg % args) + + # strings starting with two prefix characters are long options + option_strings.append(option_string) + if option_string[0] in self.prefix_chars: + if len(option_string) > 1: + if option_string[1] in self.prefix_chars: + long_option_strings.append(option_string) + + # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' + dest = kwargs.pop('dest', None) + if dest is None: + if long_option_strings: + dest_option_string = long_option_strings[0] + else: + dest_option_string = option_strings[0] + dest = dest_option_string.lstrip(self.prefix_chars) + if not dest: + msg = _('dest= is required for options like %r') + raise ValueError(msg % option_string) + dest = dest.replace('-', '_') + + # return the updated keyword arguments + return dict(kwargs, dest=dest, option_strings=option_strings) + + def _pop_action_class(self, kwargs, default=None): + action = kwargs.pop('action', default) + return self._registry_get('action', action, action) + + def _get_handler(self): + # determine function from conflict handler string + handler_func_name = '_handle_conflict_%s' % self.conflict_handler + try: + return getattr(self, handler_func_name) + except AttributeError: + msg = _('invalid conflict_resolution value: %r') + raise ValueError(msg % self.conflict_handler) + + def _check_conflict(self, action): + + # find all options that conflict with this option + confl_optionals = [] + for option_string in action.option_strings: + if option_string in self._option_string_actions: + confl_optional = self._option_string_actions[option_string] + confl_optionals.append((option_string, confl_optional)) + + # resolve any conflicts + if confl_optionals: + conflict_handler = self._get_handler() + conflict_handler(action, confl_optionals) + + def _handle_conflict_error(self, action, conflicting_actions): + message = ngettext('conflicting option string: %s', + 'conflicting option strings: %s', + len(conflicting_actions)) + conflict_string = ', '.join([option_string + for option_string, action + in conflicting_actions]) + raise ArgumentError(action, message % conflict_string) + + def _handle_conflict_resolve(self, action, conflicting_actions): + + # remove all conflicting options + for option_string, action in conflicting_actions: + + # remove the conflicting option + action.option_strings.remove(option_string) + self._option_string_actions.pop(option_string, None) + + # if the option now has no option string, remove it from the + # container holding it + if not action.option_strings: + action.container._remove_action(action) + + +class _ArgumentGroup(_ActionsContainer): + + def __init__(self, container, title=None, description=None, **kwargs): + # add any missing keyword arguments by checking the container + update = kwargs.setdefault + update('conflict_handler', container.conflict_handler) + update('prefix_chars', container.prefix_chars) + update('argument_default', container.argument_default) + super_init = super(_ArgumentGroup, self).__init__ + super_init(description=description, **kwargs) + + # group attributes + self.title = title + self._group_actions = [] + + # share most attributes with the container + self._registries = container._registries + self._actions = container._actions + self._option_string_actions = container._option_string_actions + self._defaults = container._defaults + self._has_negative_number_optionals = \ + container._has_negative_number_optionals + self._mutually_exclusive_groups = container._mutually_exclusive_groups + + def _add_action(self, action): + action = super(_ArgumentGroup, self)._add_action(action) + self._group_actions.append(action) + return action + + def _remove_action(self, action): + super(_ArgumentGroup, self)._remove_action(action) + self._group_actions.remove(action) + + +class _MutuallyExclusiveGroup(_ArgumentGroup): + + def __init__(self, container, required=False): + super(_MutuallyExclusiveGroup, self).__init__(container) + self.required = required + self._container = container + + def _add_action(self, action): + if action.required: + msg = _('mutually exclusive arguments must be optional') + raise ValueError(msg) + action = self._container._add_action(action) + self._group_actions.append(action) + return action + + def _remove_action(self, action): + self._container._remove_action(action) + self._group_actions.remove(action) + + +class ArgumentParser(_AttributeHolder, _ActionsContainer): + """Object for parsing command line strings into Python objects. + + Keyword Arguments: + - prog -- The name of the program (default: sys.argv[0]) + - usage -- A usage message (default: auto-generated from arguments) + - description -- A description of what the program does + - epilog -- Text following the argument descriptions + - parents -- Parsers whose arguments should be copied into this one + - formatter_class -- HelpFormatter class for printing help messages + - prefix_chars -- Characters that prefix optional arguments + - fromfile_prefix_chars -- Characters that prefix files containing + additional arguments + - argument_default -- The default value for all arguments + - conflict_handler -- String indicating how to handle conflicts + - add_help -- Add a -h/-help option + """ + + def __init__(self, + prog=None, + usage=None, + description=None, + epilog=None, + version=None, + parents=[], + formatter_class=HelpFormatter, + prefix_chars='-', + fromfile_prefix_chars=None, + argument_default=None, + conflict_handler='error', + add_help=True): + + if version is not None: + import warnings + warnings.warn( + """The "version" argument to ArgumentParser is deprecated. """ + """Please use """ + """"add_argument(..., action='version', version="N", ...)" """ + """instead""", DeprecationWarning) + + superinit = super(ArgumentParser, self).__init__ + superinit(description=description, + prefix_chars=prefix_chars, + argument_default=argument_default, + conflict_handler=conflict_handler) + + # default setting for prog + if prog is None: + prog = _os.path.basename(_sys.argv[0]) + + self.prog = prog + self.usage = usage + self.epilog = epilog + self.version = version + self.formatter_class = formatter_class + self.fromfile_prefix_chars = fromfile_prefix_chars + self.add_help = add_help + + add_group = self.add_argument_group + self._positionals = add_group(_('positional arguments')) + self._optionals = add_group(_('optional arguments')) + self._subparsers = None + + # register types + def identity(string): + return string + self.register('type', None, identity) + + # add help and version arguments if necessary + # (using explicit default to override global argument_default) + default_prefix = '-' if '-' in prefix_chars else prefix_chars[0] + if self.add_help: + self.add_argument( + default_prefix+'h', default_prefix*2+'help', + action='help', default=SUPPRESS, + help=_('show this help message and exit')) + if self.version: + self.add_argument( + default_prefix+'v', default_prefix*2+'version', + action='version', default=SUPPRESS, + version=self.version, + help=_("show program's version number and exit")) + + # add parent arguments and defaults + for parent in parents: + self._add_container_actions(parent) + try: + defaults = parent._defaults + except AttributeError: + pass + else: + self._defaults.update(defaults) + + # ======================= + # Pretty __repr__ methods + # ======================= + def _get_kwargs(self): + names = [ + 'prog', + 'usage', + 'description', + 'version', + 'formatter_class', + 'conflict_handler', + 'add_help', + ] + return [(name, getattr(self, name)) for name in names] + + # ================================== + # Optional/Positional adding methods + # ================================== + def add_subparsers(self, **kwargs): + if self._subparsers is not None: + self.error(_('cannot have multiple subparser arguments')) + + # add the parser class to the arguments if it's not present + kwargs.setdefault('parser_class', type(self)) + + if 'title' in kwargs or 'description' in kwargs: + title = _(kwargs.pop('title', 'subcommands')) + description = _(kwargs.pop('description', None)) + self._subparsers = self.add_argument_group(title, description) + else: + self._subparsers = self._positionals + + # prog defaults to the usage message of this parser, skipping + # optional arguments and with no "usage:" prefix + if kwargs.get('prog') is None: + formatter = self._get_formatter() + positionals = self._get_positional_actions() + groups = self._mutually_exclusive_groups + formatter.add_usage(self.usage, positionals, groups, '') + kwargs['prog'] = formatter.format_help().strip() + + # create the parsers action and add it to the positionals list + parsers_class = self._pop_action_class(kwargs, 'parsers') + action = parsers_class(option_strings=[], **kwargs) + self._subparsers._add_action(action) + + # return the created parsers action + return action + + def _add_action(self, action): + if action.option_strings: + self._optionals._add_action(action) + else: + self._positionals._add_action(action) + return action + + def _get_optional_actions(self): + return [action + for action in self._actions + if action.option_strings] + + def _get_positional_actions(self): + return [action + for action in self._actions + if not action.option_strings] + + # ===================================== + # Command line argument parsing methods + # ===================================== + def parse_args(self, args=None, namespace=None): + args, argv = self.parse_known_args(args, namespace) + if argv: + msg = _('unrecognized arguments: %s') + self.error(msg % ' '.join(argv)) + return args + + def parse_known_args(self, args=None, namespace=None): + # args default to the system args + if args is None: + args = _sys.argv[1:] + + # default Namespace built from parser defaults + if namespace is None: + namespace = Namespace() + + # add any action defaults that aren't present + for action in self._actions: + if action.dest is not SUPPRESS: + if not hasattr(namespace, action.dest): + if action.default is not SUPPRESS: + default = action.default + if isinstance(action.default, str): + default = self._get_value(action, default) + setattr(namespace, action.dest, default) + + # add any parser defaults that aren't present + for dest in self._defaults: + if not hasattr(namespace, dest): + setattr(namespace, dest, self._defaults[dest]) + + # parse the arguments and exit if there are any errors + try: + namespace, args = self._parse_known_args(args, namespace) + if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): + args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) + delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) + return namespace, args + except ArgumentError: + err = _sys.exc_info()[1] + self.error(str(err)) + + def _parse_known_args(self, arg_strings, namespace): + # replace arg strings that are file references + if self.fromfile_prefix_chars is not None: + arg_strings = self._read_args_from_files(arg_strings) + + # map all mutually exclusive arguments to the other arguments + # they can't occur with + action_conflicts = {} + for mutex_group in self._mutually_exclusive_groups: + group_actions = mutex_group._group_actions + for i, mutex_action in enumerate(mutex_group._group_actions): + conflicts = action_conflicts.setdefault(mutex_action, []) + conflicts.extend(group_actions[:i]) + conflicts.extend(group_actions[i + 1:]) + + # find all option indices, and determine the arg_string_pattern + # which has an 'O' if there is an option at an index, + # an 'A' if there is an argument, or a '-' if there is a '--' + option_string_indices = {} + arg_string_pattern_parts = [] + arg_strings_iter = iter(arg_strings) + for i, arg_string in enumerate(arg_strings_iter): + + # all args after -- are non-options + if arg_string == '--': + arg_string_pattern_parts.append('-') + for arg_string in arg_strings_iter: + arg_string_pattern_parts.append('A') + + # otherwise, add the arg to the arg strings + # and note the index if it was an option + else: + option_tuple = self._parse_optional(arg_string) + if option_tuple is None: + pattern = 'A' + else: + option_string_indices[i] = option_tuple + pattern = 'O' + arg_string_pattern_parts.append(pattern) + + # join the pieces together to form the pattern + arg_strings_pattern = ''.join(arg_string_pattern_parts) + + # converts arg strings to the appropriate and then takes the action + seen_actions = set() + seen_non_default_actions = set() + + def take_action(action, argument_strings, option_string=None): + seen_actions.add(action) + argument_values = self._get_values(action, argument_strings) + + # error if this argument is not allowed with other previously + # seen arguments, assuming that actions that use the default + # value don't really count as "present" + if argument_values is not action.default: + seen_non_default_actions.add(action) + for conflict_action in action_conflicts.get(action, []): + if conflict_action in seen_non_default_actions: + msg = _('not allowed with argument %s') + action_name = _get_action_name(conflict_action) + raise ArgumentError(action, msg % action_name) + + # take the action if we didn't receive a SUPPRESS value + # (e.g. from a default) + if argument_values is not SUPPRESS: + action(self, namespace, argument_values, option_string) + + # function to convert arg_strings into an optional action + def consume_optional(start_index): + + # get the optional identified at this index + option_tuple = option_string_indices[start_index] + action, option_string, explicit_arg = option_tuple + + # identify additional optionals in the same arg string + # (e.g. -xyz is the same as -x -y -z if no args are required) + match_argument = self._match_argument + action_tuples = [] + while True: + + # if we found no optional action, skip it + if action is None: + extras.append(arg_strings[start_index]) + return start_index + 1 + + # if there is an explicit argument, try to match the + # optional's string arguments to only this + if explicit_arg is not None: + arg_count = match_argument(action, 'A') + + # if the action is a single-dash option and takes no + # arguments, try to parse more single-dash options out + # of the tail of the option string + chars = self.prefix_chars + if arg_count == 0 and option_string[1] not in chars: + action_tuples.append((action, [], option_string)) + char = option_string[0] + option_string = char + explicit_arg[0] + new_explicit_arg = explicit_arg[1:] or None + optionals_map = self._option_string_actions + if option_string in optionals_map: + action = optionals_map[option_string] + explicit_arg = new_explicit_arg + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if the action expect exactly one argument, we've + # successfully matched the option; exit the loop + elif arg_count == 1: + stop = start_index + 1 + args = [explicit_arg] + action_tuples.append((action, args, option_string)) + break + + # error if a double-dash option did not use the + # explicit argument + else: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) + + # if there is no explicit argument, try to match the + # optional's string arguments with the following strings + # if successful, exit the loop + else: + start = start_index + 1 + selected_patterns = arg_strings_pattern[start:] + arg_count = match_argument(action, selected_patterns) + stop = start + arg_count + args = arg_strings[start:stop] + action_tuples.append((action, args, option_string)) + break + + # add the Optional to the list and return the index at which + # the Optional's string args stopped + assert action_tuples + for action, args, option_string in action_tuples: + take_action(action, args, option_string) + return stop + + # the list of Positionals left to be parsed; this is modified + # by consume_positionals() + positionals = self._get_positional_actions() + + # function to convert arg_strings into positional actions + def consume_positionals(start_index): + # match as many Positionals as possible + match_partial = self._match_arguments_partial + selected_pattern = arg_strings_pattern[start_index:] + arg_counts = match_partial(positionals, selected_pattern) + + # slice off the appropriate arg strings for each Positional + # and add the Positional and its args to the list + for action, arg_count in zip(positionals, arg_counts): + args = arg_strings[start_index: start_index + arg_count] + start_index += arg_count + take_action(action, args) + + # slice off the Positionals that we just parsed and return the + # index at which the Positionals' string args stopped + positionals[:] = positionals[len(arg_counts):] + return start_index + + # consume Positionals and Optionals alternately, until we have + # passed the last option string + extras = [] + start_index = 0 + if option_string_indices: + max_option_string_index = max(option_string_indices) + else: + max_option_string_index = -1 + while start_index <= max_option_string_index: + + # consume any Positionals preceding the next option + next_option_string_index = min([ + index + for index in option_string_indices + if index >= start_index]) + if start_index != next_option_string_index: + positionals_end_index = consume_positionals(start_index) + + # only try to parse the next optional if we didn't consume + # the option string during the positionals parsing + if positionals_end_index > start_index: + start_index = positionals_end_index + continue + else: + start_index = positionals_end_index + + # if we consumed all the positionals we could and we're not + # at the index of an option string, there were extra arguments + if start_index not in option_string_indices: + strings = arg_strings[start_index:next_option_string_index] + extras.extend(strings) + start_index = next_option_string_index + + # consume the next optional and any arguments for it + start_index = consume_optional(start_index) + + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) + + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + + # if we didn't use all the Positional objects, there were too few + # arg strings supplied. + if positionals: + self.error(_('too few arguments')) + + # make sure all required actions were present + for action in self._actions: + if action.required: + if action not in seen_actions: + name = _get_action_name(action) + self.error(_('argument %s is required') % name) + + # make sure all required groups had one option present + for group in self._mutually_exclusive_groups: + if group.required: + for action in group._group_actions: + if action in seen_non_default_actions: + break + + # if no actions were used, report the error + else: + names = [_get_action_name(action) + for action in group._group_actions + if action.help is not SUPPRESS] + msg = _('one of the arguments %s is required') + self.error(msg % ' '.join(names)) + + # return the updated namespace and the extra arguments + return namespace, extras + + def _read_args_from_files(self, arg_strings): + # expand arguments referencing files + new_arg_strings = [] + for arg_string in arg_strings: + + # for regular arguments, just add them back into the list + if arg_string[0] not in self.fromfile_prefix_chars: + new_arg_strings.append(arg_string) + + # replace arguments referencing files with the file content + else: + try: + args_file = open(arg_string[1:]) + try: + arg_strings = [] + for arg_line in args_file.read().splitlines(): + for arg in self.convert_arg_line_to_args(arg_line): + arg_strings.append(arg) + arg_strings = self._read_args_from_files(arg_strings) + new_arg_strings.extend(arg_strings) + finally: + args_file.close() + except IOError: + err = _sys.exc_info()[1] + self.error(str(err)) + + # return the modified argument list + return new_arg_strings + + def convert_arg_line_to_args(self, arg_line): + return [arg_line] + + def _match_argument(self, action, arg_strings_pattern): + # match the pattern for this action to the arg strings + nargs_pattern = self._get_nargs_pattern(action) + match = _re.match(nargs_pattern, arg_strings_pattern) + + # raise an exception if we weren't able to find a match + if match is None: + nargs_errors = { + None: _('expected one argument'), + OPTIONAL: _('expected at most one argument'), + ONE_OR_MORE: _('expected at least one argument'), + } + default = ngettext('expected %s argument', + 'expected %s arguments', + action.nargs) % action.nargs + msg = nargs_errors.get(action.nargs, default) + raise ArgumentError(action, msg) + + # return the number of arguments matched + return len(match.group(1)) + + def _match_arguments_partial(self, actions, arg_strings_pattern): + # progressively shorten the actions list by slicing off the + # final actions until we find a match + result = [] + for i in range(len(actions), 0, -1): + actions_slice = actions[:i] + pattern = ''.join([self._get_nargs_pattern(action) + for action in actions_slice]) + match = _re.match(pattern, arg_strings_pattern) + if match is not None: + result.extend([len(string) for string in match.groups()]) + break + + # return the list of arg string counts + return result + + def _parse_optional(self, arg_string): + # if it's an empty string, it was meant to be a positional + if not arg_string: + return None + + # if it doesn't start with a prefix, it was meant to be positional + if not arg_string[0] in self.prefix_chars: + return None + + # if the option string is present in the parser, return the action + if arg_string in self._option_string_actions: + action = self._option_string_actions[arg_string] + return action, arg_string, None + + # if it's just a single character, it was meant to be positional + if len(arg_string) == 1: + return None + + # if the option string before the "=" is present, return the action + if '=' in arg_string: + option_string, explicit_arg = arg_string.split('=', 1) + if option_string in self._option_string_actions: + action = self._option_string_actions[option_string] + return action, option_string, explicit_arg + + # search through all possible prefixes of the option string + # and all actions in the parser for possible interpretations + option_tuples = self._get_option_tuples(arg_string) + + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join([option_string + for action, option_string, explicit_arg in option_tuples]) + args = {'option': arg_string, 'matches': options} + msg = _('ambiguous option: %(option)s could match %(matches)s') + self.error(msg % args) + + # if exactly one action matched, this segmentation is good, + # so return the parsed action + elif len(option_tuples) == 1: + option_tuple, = option_tuples + return option_tuple + + # if it was not found as an option, but it looks like a negative + # number, it was meant to be positional + # unless there are negative-number-like options + if self._negative_number_matcher.match(arg_string): + if not self._has_negative_number_optionals: + return None + + # if it contains a space, it was meant to be a positional + if ' ' in arg_string: + return None + + # it was meant to be an optional but there is no such option + # in this parser (though it might be a valid option in a subparser) + return None, arg_string, None + + def _get_option_tuples(self, option_string): + result = [] + + # option strings starting with two prefix characters are only + # split at the '=' + chars = self.prefix_chars + if option_string[0] in chars and option_string[1] in chars: + if '=' in option_string: + option_prefix, explicit_arg = option_string.split('=', 1) + else: + option_prefix = option_string + explicit_arg = None + for option_string in self._option_string_actions: + if option_string.startswith(option_prefix): + action = self._option_string_actions[option_string] + tup = action, option_string, explicit_arg + result.append(tup) + + # single character options can be concatenated with their arguments + # but multiple character options always have to have their argument + # separate + elif option_string[0] in chars and option_string[1] not in chars: + option_prefix = option_string + explicit_arg = None + short_option_prefix = option_string[:2] + short_explicit_arg = option_string[2:] + + for option_string in self._option_string_actions: + if option_string == short_option_prefix: + action = self._option_string_actions[option_string] + tup = action, option_string, short_explicit_arg + result.append(tup) + elif option_string.startswith(option_prefix): + action = self._option_string_actions[option_string] + tup = action, option_string, explicit_arg + result.append(tup) + + # shouldn't ever get here + else: + self.error(_('unexpected option string: %s') % option_string) + + # return the collected option tuples + return result + + def _get_nargs_pattern(self, action): + # in all examples below, we have to allow for '--' args + # which are represented as '-' in the pattern + nargs = action.nargs + + # the default (None) is assumed to be a single argument + if nargs is None: + nargs_pattern = '(-*A-*)' + + # allow zero or one arguments + elif nargs == OPTIONAL: + nargs_pattern = '(-*A?-*)' + + # allow zero or more arguments + elif nargs == ZERO_OR_MORE: + nargs_pattern = '(-*[A-]*)' + + # allow one or more arguments + elif nargs == ONE_OR_MORE: + nargs_pattern = '(-*A[A-]*)' + + # allow any number of options or arguments + elif nargs == REMAINDER: + nargs_pattern = '([-AO]*)' + + # allow one argument followed by any number of options or arguments + elif nargs == PARSER: + nargs_pattern = '(-*A[-AO]*)' + + # all others should be integers + else: + nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) + + # if this is an optional action, -- is not allowed + if action.option_strings: + nargs_pattern = nargs_pattern.replace('-*', '') + nargs_pattern = nargs_pattern.replace('-', '') + + # return the pattern + return nargs_pattern + + # ======================== + # Value conversion methods + # ======================== + def _get_values(self, action, arg_strings): + # for everything but PARSER args, strip out '--' + if action.nargs not in [PARSER, REMAINDER]: + arg_strings = [s for s in arg_strings if s != '--'] + + # optional argument produces a default when not present + if not arg_strings and action.nargs == OPTIONAL: + if action.option_strings: + value = action.const + else: + value = action.default + if isinstance(value, str): + value = self._get_value(action, value) + self._check_value(action, value) + + # when nargs='*' on a positional, if there were no command-line + # args, use the default if it is anything other than None + elif (not arg_strings and action.nargs == ZERO_OR_MORE and + not action.option_strings): + if action.default is not None: + value = action.default + else: + value = arg_strings + self._check_value(action, value) + + # single argument or optional argument produces a single value + elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: + arg_string, = arg_strings + value = self._get_value(action, arg_string) + self._check_value(action, value) + + # REMAINDER arguments convert all values, checking none + elif action.nargs == REMAINDER: + value = [self._get_value(action, v) for v in arg_strings] + + # PARSER arguments convert all values, but check only the first + elif action.nargs == PARSER: + value = [self._get_value(action, v) for v in arg_strings] + self._check_value(action, value[0]) + + # all other types of nargs produce a list + else: + value = [self._get_value(action, v) for v in arg_strings] + for v in value: + self._check_value(action, v) + + # return the converted value + return value + + def _get_value(self, action, arg_string): + type_func = self._registry_get('type', action.type, action.type) + if not _callable(type_func): + msg = _('%r is not callable') + raise ArgumentError(action, msg % type_func) + + # convert the value to the appropriate type + try: + result = type_func(arg_string) + + # ArgumentTypeErrors indicate errors + except ArgumentTypeError: + name = getattr(action.type, '__name__', repr(action.type)) + msg = str(_sys.exc_info()[1]) + raise ArgumentError(action, msg) + + # TypeErrors or ValueErrors also indicate errors + except (TypeError, ValueError): + name = getattr(action.type, '__name__', repr(action.type)) + args = {'type': name, 'value': arg_string} + msg = _('invalid %(type)s value: %(value)r') + raise ArgumentError(action, msg % args) + + # return the converted value + return result + + def _check_value(self, action, value): + # converted value must be one of the choices (if specified) + if action.choices is not None and value not in action.choices: + args = {'value': value, + 'choices': ', '.join(map(repr, action.choices))} + msg = _('invalid choice: %(value)r (choose from %(choices)s)') + raise ArgumentError(action, msg % args) + + # ======================= + # Help-formatting methods + # ======================= + def format_usage(self): + formatter = self._get_formatter() + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + return formatter.format_help() + + def format_help(self): + formatter = self._get_formatter() + + # usage + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + + # description + formatter.add_text(self.description) + + # positionals, optionals and user-defined groups + for action_group in self._action_groups: + formatter.start_section(action_group.title) + formatter.add_text(action_group.description) + formatter.add_arguments(action_group._group_actions) + formatter.end_section() + + # epilog + formatter.add_text(self.epilog) + + # determine help from format above + return formatter.format_help() + + def format_version(self): + import warnings + warnings.warn( + 'The format_version method is deprecated -- the "version" ' + 'argument to ArgumentParser is no longer supported.', + DeprecationWarning) + formatter = self._get_formatter() + formatter.add_text(self.version) + return formatter.format_help() + + def _get_formatter(self): + return self.formatter_class(prog=self.prog) + + # ===================== + # Help-printing methods + # ===================== + def print_usage(self, file=None): + if file is None: + file = _sys.stdout + self._print_message(self.format_usage(), file) + + def print_help(self, file=None): + if file is None: + file = _sys.stdout + self._print_message(self.format_help(), file) + + def print_version(self, file=None): + import warnings + warnings.warn( + 'The print_version method is deprecated -- the "version" ' + 'argument to ArgumentParser is no longer supported.', + DeprecationWarning) + self._print_message(self.format_version(), file) + + def _print_message(self, message, file=None): + if message: + if file is None: + file = _sys.stderr + file.write(message) + + # =============== + # Exiting methods + # =============== + def exit(self, status=0, message=None): + if message: + self._print_message(message, _sys.stderr) + _sys.exit(status) + + def error(self, message): + """error(message: string) + + Prints a usage message incorporating the message to stderr and + exits. + + If you override this in a subclass, it should not return -- it + should either exit or raise an exception. + """ + self.print_usage(_sys.stderr) + args = {'prog': self.prog, 'message': message} + self.exit(2, _('%(prog)s: error: %(message)s\n') % args) diff --git a/cross.py b/cross.py new file mode 100755 index 0000000..c7b99d3 --- /dev/null +++ b/cross.py @@ -0,0 +1,45 @@ +#!/usr/bin/python +import sys +import os +from copy import * +from types import * +from random import * +from Tree import Tree +import History +from Genome import * +from DCJ import DCJ +from MultiLinDCJ import MultiLinDCJ +from LinRev import LinRev +from CircRev import CircRev + +if len(sys.argv) != 4: + print "Usage: python cross.py working-dir n" + sys.exit(1) + +History.Genome = DCJ #CircRev #MultiLinDCJ + +wdir = sys.argv[1] +n = int(sys.argv[2]) +if n < 1: n = 500 +m = int(sys.argv[3]) +if m < 1: m = 500 + +T = Tree(wdir) +h = History.History(T, wdir) +l = sorted(os.listdir('data/'+wdir+'/')) +l = [s for s in l if s[0] != '.' and s != 'T' and s != 'G'] +n = min(n, len(l)) +l = l[0:n] + +for i in xrange(0,n): + h.read_cand(l[i]) + +h.neigh2() + +for i in xrange(m,n): + h.read_cand(l[i]) + +h.opt_neigh() +h.write() +print h +print h.score() diff --git a/pivo.py b/pivo.py new file mode 100755 index 0000000..9eb7344 --- /dev/null +++ b/pivo.py @@ -0,0 +1,172 @@ +#!/usr/bin/python +import sys +import os +import argparse +from Tree import Tree +import History +from Genome import * +from DCJ import DCJ +from MultiLinDCJ import MultiLinDCJ +from CircDCJ import CircDCJ +from LinRev import LinRev +from CircRev import CircRev +from copy import copy + + +def chain (g2, h2): # we assume that the last + g, h = copy(g2), copy(h2) + d = g[-1] - 1 + h = [x+d for x in h] + g.extend(h[1:]) + return g + +def concat (g2, h2): + g, h = copy(g2), copy(h2) + d = g[-1] + h = [x+d for x in h] + g.extend(h) + return g + +def insert (f, g, h): # ZLEEE + return concat(concat(f,g), h) + +def LR (g): + return LinRev(" ".join([str(x) for x in g])+" $") + +#H, SH = [1, 3, 2, 4], [1, 7, 3, 5, 4, 6, 2, 8] +#H1 = [1, 3]; H2 = [2, 4] + +#g = CircRev (id_cgenome(23)) +#h = CircRev (random_circular_genome(23)) +#print g.dist (h) +#print g.dist2 (h) + +#g1 = MultiLinDCJ ("-1 -7 4 -2 -6 9 -3 5 -8 $") +#g2 = MultiLinDCJ ("1 7 8 9 2 3 4 5 6 $") +#g3 = MultiLinDCJ ("2 9 4 6 -3 8 1 5 -7 $") +#g1.median (g1, g2, g3) +#sys.exit(); + +parser = argparse.ArgumentParser(prog='pivo') +#parser.add_argument('integers', metavar='N', type=int, nargs='+', help='an integer for the accumulator') +#parser.add_argument('--sum', dest='accumulate', action='store_const', +# const=sum, default=max, +# help='sum the integers (default: find the max)') +parser.add_argument('wdir', + help = 'the working directory') +parser.add_argument('n', + type=int, + help = 'the number of reconstructed histories') +parser.add_argument('p', + type=int, + help = 'how often we start from the best history') +parser.add_argument('-m', '--model', + choices = ['dcj', 'lin-dcj', 'circ-dcj', 'rev', 'circ-rev'], + default = 'dcj', + #metavar = 'M', + help = 'the genome model used; either dcj, multilinear dcj,' + +'circular dcj, reversal, or circular reversal ' + +'(dcj is the default)') +parser.add_argument('-s', '--strategy', + type = int, + choices = [0, 1, 2, 3, 4], + default = 2, + #metavar = 'M', + help = '0 - steinerization\r' + +'1 - medians\n' + +'2 - medians with neighbours\n' + +'3 - all neighbours\n' + +'4 - better neighbours\n') +parser.add_argument('-x', + type=int, + default = -1, + help = 'start [when partitioning ') +parser.add_argument('-y', + type=int, + default = 20, + help = 'chunks size [when partitioning ') +parser.add_argument('-b', + type=int, + default = 0, + help = 'start [when partitioning ') + +args = parser.parse_args() + +print args + +wdir = args.wdir +n = args.n +if n < 1: n = 500 +p = args.p +if p < 0: p = 10 + +print 'model:', args.model +if args.model == 'dcj': + History.Genome = DCJ +elif args.model == 'lin-dcj': + History.Genome = MultiLinDCJ +elif args.model == 'circ-dcj': + History.Genome = CircDCJ +elif args.model == 'rev': + History.Genome = LinRev +elif args.model == 'circ-rev': + History.Genome = CircRev + +T = Tree(wdir) +h = History.History(T, wdir) +#h.rand_hist(2) + +s = args.strategy +if s == 0: + h.local_opt = h.local_opt_steinerization + Genome.median = Genome.single_median + s = 'steinerization' +elif s == 1: + h.local_opt = h.local_opt_medians + Genome.median = Genome.single_median + s = 'medians' +elif s == 2: + h.local_opt = h.local_opt_medians + Genome.median = Genome.median_cloud + s = 'medians with neighbours' +elif s == 3: + h.local_opt = h.local_opt_neighbours + s = 'all neighbours' +elif s == 4: + h.local_opt = h.local_opt_better_neighbours + s = 'better neighbours' +print 'strategy:', s + +h.x = args.x +h.chunks = args.y + +#l = os.listdir('data/'+wdir+'/') +#l = sorted ([s for s in l if '0' <= s[0] <= '9']) +#l = l[args.b : args.b+20] +#for hh in l: + #h.read(hh) + #s = h.score() + #h.local_opt() + #s2 = h.score() + #if s2 < s: + #h.write() + #print "YAY" + #print '-------------------------------------------------------------------' +#sys.exit() + +for i in xrange(0,n): + if p==0: + h.rand_hist(1) + elif i%p == 0: + l = os.listdir('data/'+wdir+'/') + l = [s for s in l if '0' <= s[0] <= '9'] + print l + if len(l) > 0: + best = min(l) + h.read (best) + print best + print h.score() + else: h.rand_hist(1) + h.local_opt() + #if p>0: h.mutate() + #h.write() diff --git a/prepare.py b/prepare.py new file mode 100755 index 0000000..fc8aee2 --- /dev/null +++ b/prepare.py @@ -0,0 +1,31 @@ +#!/usr/bin/python +import sys +import os +from Tree import Tree +import History +from Genome import * +from DCJ import DCJ +from MultiLinDCJ import MultiLinDCJ +from LinRev import LinRev +from CircRev import CircRev +from copy import copy + +if len(sys.argv) != 3: + print "Usage: python prepare.py working-dir n" + sys.exit(1) + +History.Genome = MultiLinDCJ + +print len(sys.argv) +print sys.argv + +wdir = sys.argv[1] +n = int(sys.argv[2]) +if n < 1: n = 500 + +T = Tree(wdir) +h = History.History(T, wdir) + +for i in xrange(0,n): + h.rand_hist(2) + h.write("init"+str(i)) diff --git a/pyxfigs.py b/pyxfigs.py new file mode 100644 index 0000000..7189c26 --- /dev/null +++ b/pyxfigs.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +from pyx import * +from cmath import * + +text.preamble(r""" +\parindent=0pt\ +\let\phi\varphi +\font\fivebf =cmbx10 scaled 500 +\font\sevenbf=cmbx10 scaled 700 +\font\tenbf =cmbx10 +\font\fivemb =cmmib10 scaled 500 +\font\sevenmb=cmmib10 scaled 700 +\font\tenmb =cmmib10 +\def\boldmath{\textfont0=\tenbf \scriptfont0=\sevenbf + \scriptscriptfont0=\fivebf \textfont1=\tenmb + \scriptfont1=\sevenmb \scriptscriptfont1=\fivemb} +""") +C=canvas.canvas() +u=1; v=1; addr="" + +cc = C +current_color = color.rgb.black +current_pos = 0j + +def setuv(uu,vv): global u,v; u=uu; v=vv; +def setwdir(wdir): + global addr + addr = wdir + if addr[-1] != '/': addr = addr+'/' + +def setcolor (c = None): + global current_color; + if c == None: current_color = color.rgb.black + else: current_color= c; + +def setcanvas (canvas = None): + global cc; + if canvas == None: cc = C + else: cc = canvas + +def newcanvas(): + global cc; + cc = canvas.canvas() + return cc + +def cpaste (z=0j, a=0, f=1.0, c = None): + global cc, u, v, C; + z = complex(z) + if c == None: C.insert (cc, [trafo.translate (z.real*u, z.imag*v), trafo.scale (f), trafo.rotate (a)]) + else: c.insert (cc, [trafo.translate (z.real*u, z.imag*v), trafo.scale (f), trafo.rotate (a)]) + +def paste (z=0j, a=0, f=1.0, c = None): + global cc, C; + cpaste (z, a, f, c); + if c == None: cc = C + else: cc = c + +def new(): global C, cc; C = cc = canvas.canvas(); +def newa(x,y,xt="$x$",yt="$y$"): new(); axes(-x,-y,x,y,xt,yt); +def newp(x,y,xt="",yt=""): new(); plainaxes(-x,-y,x,y,xt,yt); +def newpa(x,y,xt="$x$",yt="$y$"): new(); axes(0,0,x,y,xt,yt); +def newpp(x,y,xt="",yt=""): new(); plainaxes(0,0,x,y,xt,yt); +def newc(x,y): new(); axes(-x,-y,x,y,r"$\Re$",r"$\Im$"); +def save(f): + global C, cc; + #paste(); + C.writePDFfile(addr+f); + +def circ(z, col=None, fill=100, r=0.06): + global cc, u, v, current_color; + if col == None: col = current_color + z = complex(z) + #cc.stroke (path.circle(u*z.real, v*z.imag, 0.06*u), [col]); + cc.draw(path.circle(u*z.real,v*z.imag,r*u), [deco.stroked(), deco.filled([color.grey(0.01*fill)]), col]) +def dot(z, col=None, r=0.06): + global cc, u, v, current_color; + if col == None: col = current_color + z = complex(z) + cc.fill (path.circle(u*z.real, v*z.imag, r*u), [col]); +def bdot(z, col=None): dot(z,col,0.09) +def node(z,r,t="",col=100): + global cc, u, v + cc.draw(path.circle(u*z.real,v*z.imag,u*r), [deco.stroked(), deco.filled([color.grey(0.01*col)]), current_color]) + label(z,t) + +# styl - 0, 1, 2 = solid, dashed, dotted +st = [style.linestyle.solid, style.linestyle.dashed, style.linestyle.dotted] +# line(z,w,s) kde z, w su komplexne cisla, nakresli ciaru od z ku w stylom s +# r- relative, b- bold +# arrow(z,w,s) to iste, ale sipka; arrow2 je obojstranna sipka +def line (z, w=0j, s=0, col=None, lw=0.02): + global cc, u, v, current_color + if col == None: col = current_color + z = complex(z); w = complex(w) + cc.stroke (path.line(u*z.real, v*z.imag, u*w.real, v*w.imag), [st[s], col, style.linewidth(lw)]) +def bline (z, w=0j, s=0, col=None): line (z, w, s, col, 0.05) +def rline (z, w, s=0, col=None, lw=0.02): line (z, z+w, s, col, lw) +def rbline (z, w, s=0, col=None, lw=0.02): bline (z, z+w, s, col, lw) +def moveto (z): + global current_pos + current_pos = complex(z) +def lineto (z, s=0, col=None, lw=0.02): + global current_pos + z = complex(z) + line (current_pos, z, s, col, lw) + current_pos = z +def blineto (z, s=0, col=None): lineto (z, s, col, 0.05) +def arrow(z, w, s=0, col=None, lw = 0.02): + global cc, u, v, current_color + if col == None: col = current_color + z = complex(z); w = complex(w) + cc.stroke (path.line (u*z.real, v*z.imag, u*w.real, v*w.imag), [st[s], deco.earrow.normal, col, style.linewidth(lw)]); +def barrow(z, w, s=0, col=None): arrow (z, w, s, col, 0.05) +def arrow2(z, w=0j, s=0, col=None, lw = 0.02): + global cc, u, v, current_color + if col == None: col = current_color + z = complex(z); w = complex(w) + cc.stroke (path.line (u*z.real, v*z.imag, u*w.real, v*w.imag), [st[s], deco.earrow.normal, deco.barrow.normal, col, style.linewidth(lw)]); +def barrow2(z, w=0j, s=0, col=None): arrow2 (z, w, s, col, 0.05) + +# label(z,t) - text t na poziciu z; t/b/l/r top/bottom/left/right + kombinacie +def label(z,t): global cc,u,v; cc.text(u*z.real,v*z.imag,t,[text.halign.boxcenter,text.valign.middle]); +def labelt(z,t): global cc,u,v; cc.text(u*z.real,v*z.imag+.25,t,[text.halign.boxcenter,text.valign.bottom]); +def labelb(z,t): global cc,u,v; cc.text(u*z.real,v*z.imag-.25,t,[text.halign.boxcenter,text.valign.top]); +def labell(z,t): global cc,u,v; cc.text(u*z.real-.25,v*z.imag,t,[text.halign.boxright,text.valign.middle]); +def labelr(z,t): global cc,u,v; cc.text(u*z.real+.25,v*z.imag,t,[text.halign.boxleft,text.valign.middle]); +def labeltr(z,t): global cc,u,v; cc.text(u*z.real+.2,v*z.imag+.2,t,[text.halign.boxleft,text.valign.bottom]); +def labeltl(z,t): global cc,u,v; cc.text(u*z.real-.2,v*z.imag+.2,t,[text.halign.boxright,text.valign.bottom]); +def labelbr(z,t): global cc,u,v; cc.text(u*z.real+.2,v*z.imag-.2,t,[text.halign.boxleft,text.valign.top]); +def labelbl(z,t): global cc,u,v; cc.text(u*z.real-.2,v*z.imag-.2,t,[text.halign.boxright,text.valign.top]); +def labela(w, a, t): # TODO: rozsirit uhly a upravit tie vzdialenosti + while a < 0: a += 360 + while a > 360: a -= 360 + if a > 315 or a < 45: + labelr(w,t) + elif 45 <= a < 135: + labelt(w,t) + elif 135 <= a < 215: + labell(w,t) + else: + labelb(w,t) + + +def rect(z,w,s=0): rrect(z,w-z,s); +def rrect(z,w,s=0): global cc,u,v; cc.draw(path.rect(u*z.real,v*z.imag,u*w.real,v*w.imag), [deco.stroked(),st[s]]); +def block(z,w,col=None,t=""): rblock(z,w-z,t,col); +def rblock(z,w,t="",col=100): + global cc, u, v, current_color + cc.draw(path.rect(u*z.real,v*z.imag,u*w.real,v*w.imag), [deco.stroked(), deco.filled([color.grey(0.01*col)]), current_color]) + label(z+w/2,t) + +def circle(z, r, col=None, s=0): ellipse(z, r+r*1j, col, s) +def ellipse (z, r, col=None, s=0): + global cc, u, v, current_color; + if col == None: col = current_color + z, r = complex(z), complex(r) + cc.draw(path.circle(u*z.real,v*z.imag, 1), [deco.stroked(), col, st[s], trafo.scale(sx=r.real*u, sy=r.imag*v)]) + +def poly(p, s=0, col=None, fill=None, lw = 0.02): + global cc, u, v, current_color + if col == None: col = current_color + l = path.path(path.moveto(u*p[0].real,v*p[0].imag)) + for pt in p[1:]: + l.append(path.lineto(u*pt.real,v*pt.imag)) + l.append(path.closepath()) + if fill == None: cc.stroke(l, [st[s], col, style.linewidth(lw)]) + else: cc.fill(l, [st[s], col, style.linewidth(lw), deco.stroked(), deco.filled([fill])]) +def fpoly(p, s=0, col=None, lw = 0.02): poly(p, s, col, color.grey(0.01*col), lw); +def rpoly(p): + global cc,u,v + l = path.path(path.moveto(u*p[0].real,v*p[0].imag)) + for pt in p[1:]: + l.append(path.rlineto(u*pt.real,v*pt.imag)) + l.append(path.closepath()) + cc.stroke(l); +def rfpoly(p,col=0): + global cc,u,v + l = path.path(path.moveto(u*p[0].real,v*p[0].imag)) + for pt in p[1:]: + l.append(path.rlineto(u*pt.real,v*pt.imag)) + l.append(path.closepath()) + cc.fill(l,[deco.stroked(),deco.filled([color.grey(0.01*col)])]); + +def angle(z, r, a1, a2, s=0, col=None, lw=0.02): + global cc, u, current_color; + if col==None: col = current_color + cc.stroke(path.path(path.arc(u*z.real,v*z.imag,r*u,a1,a2)),[st[s], col, style.linewidth(lw)]); + +def aarrow(z, r, a1, a2, s=0, col=None, lw=0.02): #TODO: toto nejak lepsie + global cc, u, current_color; + if col==None: col = current_color + cc.stroke(path.path(path.arc(u*z.real,v*z.imag,r*u,a1,a2)),[st[s], deco.earrow.normal, col, style.linewidth(lw)]); + +def raarrow(z, r, a1, a2, s=0, col=None, lw=0.02): #TODO: toto nejak lepsie + global cc, u, current_color; + if col==None: col = current_color + cc.stroke(path.path(path.arc(u*z.real,v*z.imag,r*u,a1,a2)),[st[s], deco.barrow.normal, col, style.linewidth(lw)]); + +def plainaxes(x1,y1,x2,y2,xt="", yt=""): + global cc, u, v + arrow2(x1-.5+0j, x2+.5+0j); arrow2((y1-.5)*1j,(y2+.5)*1j) + labelr(x2+0.5+0j,xt); labelt((y2+0.5)*1j,yt) + +def axes (x1,y1,x2,y2,xt="$x$",yt="$y$"): + global cc,u,v + plainaxes(x1,y1,x2,y2,xt,yt) + for i in xrange(x1,x2+1): + cc.stroke(path.line(u*i,0.1,u*i,-0.1)) + if i==0: cc.text(u*i-.1,-.4,r"0",[text.halign.boxright]); + else: cc.text(u*i,-.4,str(i),[text.halign.boxcenter]); + for i in xrange(y1,y2+1): + cc.stroke(path.line(-0.1,u*i,0.1,u*i)) + if i!=0: + if i==1: cc.text(-.25,u,"$i$",[text.halign.boxright,text.valign.middle]); + elif i==-1: cc.text(-.25,-u,"$-i$",[text.halign.boxright,text.valign.middle]); + else: cc.text(-.25, u*i,"$"+str(i)+"i$",[text.halign.boxright,text.valign.middle]); + +def parrow (p, s=0, col=None, lw = 0.02): + global cc, current_color; + if col == None: col = current_color + cc.stroke(p, [st[s], deco.earrow.normal, col, style.linewidth(lw)]); +def curve (w, x, y, z, s=0, col=None, lw = 0.02): + global cc, u, v, current_color + if col==None: col = current_color + cc.stroke (path.curve (u*w.real, v*w.imag, u*x.real, v*x.imag, u*y.real, v*y.imag, u*z.real, v*z.imag,), + [st[s], col, style.linewidth(lw)]) +def bcurve (w, x, y, z, s = 0, col = None): curve (w, x, y, z, s, col, 0.05) +def carrow (w, x, y, z, s = 0, col = None, lw = 0.02): + global cc, u, v, current_color + if col==None: col = current_color + cc.stroke (path.curve (u*w.real, v*w.imag, u*x.real, v*x.imag, u*y.real, v*y.imag, u*z.real, v*z.imag), + [st[s], deco.earrow.normal, col, style.linewidth(lw)]) +def carrow2 (w, x, y, z, s = 0, col = None, lw = 0.02): + global cc, u, v, current_color + if col==None: col = current_color + cc.stroke (path.curve (u*w.real, v*w.imag, u*x.real, v*x.imag, u*y.real, v*y.imag, u*z.real, v*z.imag), + [st[s], deco.earrow.normal, deco.barrow.normal, col, style.linewidth(lw)]) + +def vlnka (w, z, n = 5, s = 0, col = None, lw = 0.02): #TODO: vseobecnejsie + global cc, u, v, current_color + if col==None: col = current_color + l = path.path(path.moveto(u*w.real,v*w.imag)) + for i in xrange(0, 85): + pt = w*(1-i/100.0)+z*(i/100.0) + 0.03*sin(i/85.0*2*pi*n)*1j + l.append(path.lineto(u*pt.real,v*pt.imag)) + l.append(path.lineto(u*z.real,v*z.imag)) + cc.stroke(l, [deformer.smoothed(2.5),deco.earrow.normal]) \ No newline at end of file diff --git a/score.py b/score.py new file mode 100755 index 0000000..20b5759 --- /dev/null +++ b/score.py @@ -0,0 +1,29 @@ +#!/usr/bin/python +import sys +import os +from copy import * +from types import * +from random import * +from Tree import Tree +import History +from Genome import * +from DCJ import DCJ +from MultiLinDCJ import MultiLinDCJ +from CircDCJ import CircDCJ +from LinRev import LinRev +from CircRev import CircRev + +if len(sys.argv) != 3: + print "Usage: python score.py working-dir file" + sys.exit(1) + +History.Genome = CircRev #DCJ #CircDCJ #Rev #MultiLinDCJ + +wdir = sys.argv[1] +f = sys.argv[2] + +T = Tree(wdir) +h = History.History(T, wdir) +h.read (f) +print h.score() +#h.draw_tree(5, 0.5) diff --git a/simul.py b/simul.py new file mode 100755 index 0000000..a25c2b9 --- /dev/null +++ b/simul.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +import sys +from copy import * +from types import * +from random import * +from Genome import * +from Tree import Tree +import History +from LinRev import LinRev + +if len(sys.argv) != 4: + print "Usage: python simul.py working-dir mean #genes" + sys.exit(1) + +wdir = sys.argv[1] +mean = int(sys.argv[2]) +n = int(sys.argv[3]) + +History.Genome = LinRev +T = Tree(wdir) +h = History.History(T, wdir, True) +h.simul_data(mean,n) diff --git a/testP.py b/testP.py new file mode 100755 index 0000000..bd7c28c --- /dev/null +++ b/testP.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +import sys +import os +from Tree import Tree +import History +from Genome import * +from DCJ import DCJ +from MultiLinDCJ import MultiLinDCJ +from TestDCJ import TestDCJ +from LinRev import LinRev +from CircRev import CircRev +from copy import copy + +if len(sys.argv) != 4: + print "Usage: python genm.py working-dir n p" + sys.exit(1) + +History.Genome = MultiLinDCJ #TestDCJ +#TestDCJ.nMedianCalls = 0 + +print len(sys.argv) +print sys.argv + +wdir = sys.argv[1] +n = int(sys.argv[2]) +if n < 1: n = 500 +p = int(sys.argv[3]) +if p < 0: p = 10 + +T = Tree(wdir) +h = History.History(T, wdir) +h.rand_hist(2) +suf = "" #'-'+str(n)+'-'+str(p)+'.txt' +init = 0 + +#h.rand_hist(2) +#h.local_opt = h.local_opt_desc +#h.local_opt() +h.local_opt = h.local_opt_medians +for i in xrange(0,n): + if p==0: + h.rand_hist(2) + elif i%p == 0: + l = os.listdir('data/'+wdir+'/') + l = [s for s in l if '0' <= s[0] <= '9'] + print l + if len(l) > 0: + best = min(l) + h.read (best) + print best + print h.score() + else: h.rand_hist(2) + h.mutate() + h.local_opt() + h.write() diff --git a/testS.py b/testS.py new file mode 100755 index 0000000..456f929 --- /dev/null +++ b/testS.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +import sys +import os +from Tree import Tree +import History +from Genome import * +from DCJ import DCJ +from MultiLinDCJ import MultiLinDCJ +from TestDCJ2 import TestDCJ2 +from LinRev import LinRev +from CircRev import CircRev +from copy import copy + +if len(sys.argv) != 4: + print "Usage: python genm.py working-dir n p" + sys.exit(1) + +History.Genome = MultiLinDCJ #TestDCJ2 +#TestDCJ2.nMedianCalls = 0 + +print len(sys.argv) +print sys.argv + +wdir = sys.argv[1] +n = int(sys.argv[2]) +if n < 1: n = 500 +p = int(sys.argv[3]) +if p < 0: p = 10 + +T = Tree(wdir) +h = History.History(T, wdir) +h.rand_hist(2) +suf = "" #'-'+str(n)+'-'+str(p)+'.txt' +init = 0 + +#h.rand_hist(2) +#h.local_opt = h.local_opt_desc +#h.local_opt() +h.local_opt = h.local_opt_medians +for i in xrange(0,n): + if p==0: + h.rand_hist(2) + elif i%p == 0: + l = os.listdir('data/'+wdir+'/') + l = [s for s in l if '0' <= s[0] <= '9'] + print l + if len(l) > 0: + best = min(l) + h.read (best) + print best + print h.score() + else: h.rand_hist(2) + h.mutate() + h.local_opt() + h.write()