diff --git a/smallrnaseq/app.py b/smallrnaseq/app.py index 25841ea..992fa73 100755 --- a/smallrnaseq/app.py +++ b/smallrnaseq/app.py @@ -193,7 +193,8 @@ def map_mirnas(self): return new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False) #pad mature novel and write to fasta for counting - novpad = base.get_mature_padded(new, idkey='id', seqkey='mature') + novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature') + novpad = novpad.drop_duplicates('name') utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'), seqkey='sequence', idkey='name') novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html')) diff --git a/smallrnaseq/data/styles.css b/smallrnaseq/data/styles.css index 0af2627..090b033 100644 --- a/smallrnaseq/data/styles.css +++ b/smallrnaseq/data/styles.css @@ -129,7 +129,7 @@ a:active { color: red; } margin-right: 5px; float: right; height: 650px; - width: 20%; + width: 15%; position: fixed; overflow:hidden; top: 30; @@ -139,14 +139,15 @@ a:active { color: red; } border: 0px solid transparent; font-family: monospace, sans-serif; overflow-y: auto; - height: 600px; - width: 400px; - display:block; + overflow-x: auto; + height: 640px; + width: 300px; + display: block; } .sidebar td, th { text-align: left; - font-size: 14px; + font-size: 12px; } .main, .aside diff --git a/smallrnaseq/novel.py b/smallrnaseq/novel.py index 57455f2..27e9325 100644 --- a/smallrnaseq/novel.py +++ b/smallrnaseq/novel.py @@ -589,11 +589,11 @@ def find_mirnas(reads, ref_fasta, score_cutoff=.8, read_cutoff=50, species='', anchor = df.iloc[0] st = anchor.start end = anchor.end - m = df.loc[(abs(df.start-st)<=3) & (abs(df.end-end)<=5)] + m = df.loc[(abs(df.start-st)<=3) & (abs(df.end-end)<=5)].copy() m['mature'] = True X.append(m) #remainder of reads assigned as non-mature - o = df.loc[-df.index.isin(m.index)] + o = df.loc[-df.index.isin(m.index)].copy() o['mature'] = False X.append(o) @@ -638,9 +638,19 @@ def assign_names(df, species=''): """Assign name to novel mirna, precursor/mature ids should allow consistent identification across datasets""" - df['id'] = df.apply( lambda x: species+'_novel_'+x.chrom+'_'+str(x.start),1 ) + df['precursor_id'] = df.apply( lambda x: species+'_novel_'+x.chrom+'_'+str(x.start),1 ) + df['mature_id'] = df.apply( lambda x: species+'_'+encode_name(x.mature), 1 ) return +def encode_name(s): + """hash a sequence into a short string""" + + import hashlib + h = hashlib.md5(s.encode()) + s = h.digest().encode('base64')[:8] + s.replace('/','x') + return s + def forna_url(precursor, mature, star=None, struct=None): """Create link to view mirna structure in forna web app""" @@ -688,13 +698,13 @@ def create_report(df, reads, species=None, outfile='report.html'): h += '