Skip to content

Commit

Permalink
wip: add feature tables
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkent committed Nov 4, 2024
1 parent 3b23585 commit 0cf1f62
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 1 deletion.
51 changes: 50 additions & 1 deletion store/neurostore/models/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class Study(BaseMixin, db.Model):
public = db.Column(db.Boolean, default=True)
level = db.Column(db.String)
metadata_ = db.Column(JSONB)
source = db.Column(db.String, index=True)
source = db.Column(db.String, index=True)base_study_id = db.Column(db.Text, db.ForeignKey("base_studies.id"), index=True)
source_id = db.Column(db.String, index=True)
source_updated_at = db.Column(db.DateTime(timezone=True))
base_study_id = db.Column(db.Text, db.ForeignKey("base_studies.id"), index=True)
Expand Down Expand Up @@ -538,6 +538,55 @@ class PointValue(BaseMixin, db.Model):
user = relationship("User", backref=backref("point_values", passive_deletes=True))


class Pipeline(BaseMixin, db.Model):
__tablename__ = "pipelines"

name = db.Column(db.String)
description = db.Column(db.String)
version = db.Column(db.String)
study_depenedent = db.Column(db.Boolean, default=False)
ace_compatible = db.Column(db.Boolean, default=False)
pubget_compatible = db.Column(db.Boolean, default=False)
derived_from = db.Column(db.Text)


class PipelineConfig(BaseMixin, db.Model):
__tablename__ = "pipeline_configs"

pipeline_id = db.Column(
db.Text, db.ForeignKey("pipelines.id", ondelete="CASCADE"), index=True
)
config = db.Column(JSONB)
config_hash = db.Column(db.String, index=True)
pipeline = relationship("Pipeline", backref=backref("configs", passive_deletes=True))


class PipelineRun(BaseMixin, db.Model):
__tablename__ = "pipeline_runs"

pipeline_id = db.Column(
db.Text, db.ForeignKey("pipelines.id", ondelete="CASCADE"), index=True
)
config_id = db.Column(
db.Text, db.ForeignKey("pipeline_configs.id", ondelete="CASCADE"), index=True
)
config = relationship("PipelineConfig", backref=backref("runs", passive_deletes=True))
run_index = db.Column(db.Integer())


class PipelineRunResult(BaseMixin, db.Model):
__tablename__ = "pipeline_run_results"

run_id = db.Column(
db.Text, db.ForeignKey("pipeline_runs.id", ondelete="CASCADE"), index=True
)
base_study_id = db.Column(db.Text, db.ForeignKey("base_studies.id"), index=True)
feature_index = db.Column(db.Integer) # the same categories of information can be extracted multiple times from a single paper (e.g., multiple demographic groups, multiple software packages, etc)
feature_group = db.Column(db.String) # task, disease, software, age
feature = db.Column(db.String) # stroop task, schizophrenia, fsl
value = db.Column(db.Float) # 0.67, 0.3, 0.5 (some measure of confidence for the result)
run = relationship("PipelineRun", backref=backref("results", passive_deletes=True))

# from . import event_listeners # noqa E402

# del event_listeners
27 changes: 27 additions & 0 deletions store/neurostore/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import pytest
import random
import json
from os import environ
from neurostore.models.data import Analysis, Condition
from sqlalchemy.orm import scoped_session, sessionmaker
Expand Down Expand Up @@ -586,3 +588,28 @@ def simple_neurosynth_annotation(session, ingest_neurosynth):
session.commit()

return smol_annot


@pytest.fixture(scope="function")
def create_demographic_features(session, ingest_neurosynth, tmp_path):
output_dir = tmp_path / "output" / "demographics" / "v1.0.0"
output_dir.mkdir(exist_ok=True, parents=True)
studies = Study.query.all()
diseases = ["schizophrenia", "bipolar disorder", "depression", "healthy"]
studies_data = [
[
{
"age": random.randint(18, 100),
"group": group
} for group in random.sample(diseases, k=random.randint(1, 2))
] for study in studies
]

for study, study_data in zip(studies, studies_data):
study_dir = output_dir / study.id
with open(study_dir / "results.json", "w") as f:
for entry in study_data:
json.dump(entry, f)
f.write('\n')

return output_dir

0 comments on commit 0cf1f62

Please sign in to comment.