-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGeneratemiRfamInput.py
44 lines (36 loc) · 1.17 KB
/
GeneratemiRfamInput.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# -*- coding: utf-8 -*-
"""
Created on Thu May 28 21:23:49 2015
@author: Richard
"""
from miRNA_target import *
# use this script to generate a fasta file with cremanei mature sequences
# and the targetscan miRNA family input file
# generate a fasta file with mature mirna sequences
infile = open('CRM_miRNAsCoordinatesFinal.txt')
infile.readline()
mature = {}
for line in infile:
line = line.rstrip()
if line != '':
line = line.split('\t')
# replace T by U
mature[line[0]] = line[6].upper().replace('T', 'U')
infile.close()
newfile = open('Cremanei_mature.fasta', 'w')
for mir in mature:
newfile.write('>' + mir + '\n')
newfile.write(mature[mir] + '\n')
newfile.close()
# generate the targetscan mirna family imput file
# create a dict of seed sequence and list of mirna pairs
seeds = seed_mirnas('Cremanei_mature.fasta')
# open file for writing
newfile = open('Cremanei_miRfam_info.txt', 'w')
# loop over the seed sequences
for motif in seeds:
# record only 1 mirna per family, grab first mirna
# write mirna, seed, species ID
newfile.write(seeds[motif][0] + '\t' + motif + '\t' + '31234' + '\n')
# close file after writing
newfile.close()