Skip to content

Commit

Permalink
Merge pull request #115 from /issues/114
Browse files Browse the repository at this point in the history
Fix ingesting bug that creates one file per table
  • Loading branch information
shntnu authored Dec 5, 2019
2 parents fb4aeef + c6d385c commit 3b89692
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 9 deletions.
5 changes: 2 additions & 3 deletions cytominer_database/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,11 @@ def into(input, output, name, identifier, skip_table_prefix=False):
# deprecated, use inspect.signature() or inspect.getfullargspec()
warnings.simplefilter("ignore", category=DeprecationWarning)

target = "{}::{}".format(output, name)
engine = create_engine(target)
engine = create_engine(output)
con = engine.connect()

df = pd.read_csv(source, index_col=0)
df.to_sql(name=target, con=con, if_exists="append")
df.to_sql(name=name, con=con, if_exists="append")

def checksum(pathname, buffer_size=65536):
"""
Expand Down
8 changes: 4 additions & 4 deletions tests/commands/test_command_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ def test_run(dataset, runner):
for blob in dataset["ingest"]:
table_name = blob["table"].capitalize()

target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
target = "sqlite:///{}".format(str(sqlite_file))
engine = create_engine(target)
con = engine.connect()

df = pd.read_sql(target, con=con, index_col=0)
df = pd.read_sql(sql=table_name, con=con, index_col=0)

assert df.shape[0] == blob["nrows"]
assert df.shape[1] == blob["ncols"] + 1
Expand Down Expand Up @@ -80,11 +80,11 @@ def test_run_defaults(cellpainting, runner):
for blob in cellpainting["ingest"]:
table_name = blob["table"].capitalize()

target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
target = "sqlite:///{}".format(str(sqlite_file))
engine = create_engine(target)
con = engine.connect()

df = pd.read_sql(target, con=con, index_col=0)
df = pd.read_sql(sql=table_name, con=con, index_col=0)

assert df.shape[0] == blob["nrows"]
assert df.shape[1] == blob["ncols"] + 1
Expand Down
17 changes: 17 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def pytest_generate_tests(metafunc):

@pytest.fixture
def cellpainting():
"""
Return configuration for a Cell Painting dataset.
- 3 compartments CSVs: Cells, Cytoplasm, Nuclei
- 1 image CSV
- No object.csv and therefore no munging
"""
return {
"config": "config.ini",
"data_dir": "tests/data_b",
Expand Down Expand Up @@ -49,6 +55,12 @@ def cellpainting():

@pytest.fixture
def htqc():
"""
Return configuration for a 3-channel image-based profiling dataset.
- 1 object CSV that comprises Cells, Cytoplasm, Nuclei
- 1 image CSV
- munging required
"""
return {
"config": "config.ini",
"data_dir": "tests/data_a",
Expand Down Expand Up @@ -82,6 +94,11 @@ def htqc():

@pytest.fixture
def qc():
"""
Return configuration for a QC dataset (only image table, no objects).
- 1 image CSV
- No object.csv and therefore no munging
"""
return {
"config": None,
"data_dir": "tests/data_c",
Expand Down
6 changes: 4 additions & 2 deletions tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,16 @@ def test_seed(dataset):
target="sqlite:///{}".format(str(sqlite_file))
)

assert os.path.exists(str(sqlite_file))

for blob in ingest:
table_name = blob["table"].capitalize()

target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
target = "sqlite:///{}".format(str(sqlite_file))
engine = create_engine(target)
con = engine.connect()

df = pd.read_sql(target, con=con, index_col=0)
df = pd.read_sql(sql=table_name, con=con, index_col=0)

assert df.shape[0] == blob["nrows"]
assert df.shape[1] == blob["ncols"] + 1
Expand Down

0 comments on commit 3b89692

Please sign in to comment.