Skip to content

Commit

Permalink
changed to hash novel mature names
Browse files Browse the repository at this point in the history
  • Loading branch information
dmnfarrell committed Apr 24, 2017
1 parent 1d0394a commit 9f799e4
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 12 deletions.
3 changes: 2 additions & 1 deletion smallrnaseq/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ def map_mirnas(self):
return
new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False)
#pad mature novel and write to fasta for counting
novpad = base.get_mature_padded(new, idkey='id', seqkey='mature')
novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature')
novpad = novpad.drop_duplicates('name')
utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'),
seqkey='sequence', idkey='name')
novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html'))
Expand Down
11 changes: 6 additions & 5 deletions smallrnaseq/data/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ a:active { color: red; }
margin-right: 5px;
float: right;
height: 650px;
width: 20%;
width: 15%;
position: fixed;
overflow:hidden;
top: 30;
Expand All @@ -139,14 +139,15 @@ a:active { color: red; }
border: 0px solid transparent;
font-family: monospace, sans-serif;
overflow-y: auto;
height: 600px;
width: 400px;
display:block;
overflow-x: auto;
height: 640px;
width: 300px;
display: block;
}

.sidebar td, th {
text-align: left;
font-size: 14px;
font-size: 12px;
}

.main, .aside
Expand Down
22 changes: 16 additions & 6 deletions smallrnaseq/novel.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,11 +589,11 @@ def find_mirnas(reads, ref_fasta, score_cutoff=.8, read_cutoff=50, species='',
anchor = df.iloc[0]
st = anchor.start
end = anchor.end
m = df.loc[(abs(df.start-st)<=3) & (abs(df.end-end)<=5)]
m = df.loc[(abs(df.start-st)<=3) & (abs(df.end-end)<=5)].copy()
m['mature'] = True
X.append(m)
#remainder of reads assigned as non-mature
o = df.loc[-df.index.isin(m.index)]
o = df.loc[-df.index.isin(m.index)].copy()
o['mature'] = False
X.append(o)

Expand Down Expand Up @@ -638,9 +638,19 @@ def assign_names(df, species=''):
"""Assign name to novel mirna, precursor/mature ids should allow consistent
identification across datasets"""

df['id'] = df.apply( lambda x: species+'_novel_'+x.chrom+'_'+str(x.start),1 )
df['precursor_id'] = df.apply( lambda x: species+'_novel_'+x.chrom+'_'+str(x.start),1 )
df['mature_id'] = df.apply( lambda x: species+'_'+encode_name(x.mature), 1 )
return

def encode_name(s):
"""hash a sequence into a short string"""

import hashlib
h = hashlib.md5(s.encode())
s = h.digest().encode('base64')[:8]
s.replace('/','x')
return s

def forna_url(precursor, mature, star=None, struct=None):
"""Create link to view mirna structure in forna web app"""

Expand Down Expand Up @@ -688,13 +698,13 @@ def create_report(df, reads, species=None, outfile='report.html'):
h += '<h3>novel miRNA predictions</h3>'
h += '</div>'
h += '<div class="sidebar">'
links = df[['id','mature_reads']].copy()
links['id'] = links.id.apply(lambda x: ('<a href=#%s > %s </a>' %(x,x)))
links = df[['mature_id','mature_reads','chrom']].copy()
links['mature_id'] = links.mature_id.apply(lambda x: ('<a href=#%s > %s </a>' %(x,x)))
h += links.to_html(escape=False, classes='sidebar', index=False)
h += '</div>'

df = df.copy()
df = df.set_index('id')
df = df.set_index('mature_id')

ens_sp = pd.read_csv(os.path.join(datadir, 'ensembl_names.csv'), index_col=0)
if species in ens_sp.index:
Expand Down

0 comments on commit 9f799e4

Please sign in to comment.