Skip to content

Commit

Permalink
remove ORDER BY from seqs and res pairs sqlite query
Browse files Browse the repository at this point in the history
  • Loading branch information
colin-combe committed Oct 4, 2024
1 parent 3fb5a08 commit 16246e6
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
15 changes: 9 additions & 6 deletions parser/process_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import sqlite3
import traceback

import orjson
import shutil
import socket
import sys
Expand Down Expand Up @@ -127,7 +126,6 @@ def json_sequences_and_residue_pairs(filepath, tmpdir):
The temporary directory to use for validation - an Sqlite DB is created here if given,
otherwise an in-memory sqlite DB is used.
"""
local_dir = os.path.dirname(filepath)
file = os.path.basename(filepath)
filewithoutext = os.path.splitext(file)[0]
temp_database = os.path.join(str(tmpdir), f'{filewithoutext}.db')
Expand Down Expand Up @@ -219,9 +217,14 @@ def json_sequences_and_residue_pairs(filepath, tmpdir):
WHERE mp1.link_site1 > 0 AND mp2.link_site1 > 0 AND pe1.is_decoy = false AND pe2.is_decoy = false
AND si.pass_threshold = true
GROUP BY pe1.dbsequence_id , dbs1.accession, (pe1.pep_start + mp1.link_site1 - 1), pe2.dbsequence_id, dbs2.accession , (pe2.pep_start + mp2.link_site1 - 1)
ORDER BY pe1.dbsequence_id , (pe1.pep_start + mp1.link_site1 - 1), pe2.dbsequence_id, (pe2.pep_start + mp2.link_site1 - 1)
;"""
# ORDER BY pe1.dbsequence_id , (pe1.pep_start + mp1.link_site1 - 1), pe2.dbsequence_id, (pe2.pep_start + mp2.link_site1 - 1)
#
# time sql execution
start_time = time.time()
cur.execute(sql)
elapsed_time = time.time() - start_time
logging.info(f"residue pair SQL execution time: {elapsed_time}")
rp_rows = cur.fetchall()
except (Exception, sqlite3.DatabaseError) as error:
raise error
Expand Down Expand Up @@ -469,8 +472,8 @@ def read_sequences_and_residue_pairs(filepath, upload_id, conn_str):
The path to the mzIdentML file to be validated.
upload_id : int
The upload id to use for the sequences and residue pairs.
sqlite_engine : sqlalchemy.engine.base.Connection
The sqlite engine to use for the sequences and residue pairs.
conn_str : str
The connection string to use for the sqlite database.
Returns
-------
Expand All @@ -484,7 +487,7 @@ def read_sequences_and_residue_pairs(filepath, upload_id, conn_str):
id_parser.parse()
except Exception as e:
print(f"Error parsing {filepath}")
raise(e)
raise e



Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def get_version(rel_path):

setup(
name="xi-mzidentml-converter",
version="0.2.6a1",
version="0.2.6a2",
description="xi-mzidentml-converter uses pyteomics (https://pyteomics.readthedocs.io/en/latest/index.html) to "
"parse mzIdentML files (v1.2.0) and extract crosslink information. Results are written to a "
"relational database (PostgreSQL or SQLite) using sqlalchemy.",
Expand Down

0 comments on commit 16246e6

Please sign in to comment.