-
Notifications
You must be signed in to change notification settings - Fork 6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Normalize IndexSet.data
DB storage
#122
Changes from all commits
4e9bc8e
6e51100
24a1181
4234042
4bbd26a
52f3423
2b62ae7
a7246cd
03f75c2
827a1b8
ef5616d
85dc134
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
|
||
from .. import base | ||
from .docs import IndexSetDocsRepository | ||
from .model import IndexSet | ||
from .model import IndexSet, IndexSetData | ||
|
||
|
||
class IndexSetRepository( | ||
|
@@ -60,22 +60,45 @@ def list(self, *args, **kwargs) -> list[IndexSet]: | |
return super().list(*args, **kwargs) | ||
|
||
@guard("view") | ||
def tabulate(self, *args, **kwargs) -> pd.DataFrame: | ||
return super().tabulate(*args, **kwargs) | ||
def tabulate(self, *args, include_data: bool = False, **kwargs) -> pd.DataFrame: | ||
if not include_data: | ||
return ( | ||
super() | ||
.tabulate(*args, **kwargs) | ||
.rename(columns={"_data_type": "data_type"}) | ||
) | ||
else: | ||
result = super().tabulate(*args, **kwargs).drop(labels="_data_type", axis=1) | ||
result.insert( | ||
loc=0, | ||
column="data", | ||
value=[indexset.data for indexset in self.list(**kwargs)], | ||
) | ||
return result | ||
|
||
@guard("edit") | ||
def add_elements( | ||
def add_data( | ||
self, | ||
indexset_id: int, | ||
elements: float | int | List[float | int | str] | str, | ||
data: float | int | List[float | int | str] | str, | ||
) -> None: | ||
indexset = self.get_by_id(id=indexset_id) | ||
if not isinstance(elements, list): | ||
elements = [elements] | ||
if indexset.elements is None: | ||
indexset.elements = elements | ||
else: | ||
indexset.elements = indexset.elements + elements | ||
if not isinstance(data, list): | ||
data = [data] | ||
|
||
bulk_insert_enabled_data: list[dict[str, str]] = [ | ||
{"value": str(d)} for d in data | ||
] | ||
try: | ||
self.session.execute( | ||
db.insert(IndexSetData).values(indexset__id=indexset_id), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using ORM-enabled bulk inserting should be faster than creating individual objects (as I did before). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this got very fast in sqlalchemy 2! |
||
bulk_insert_enabled_data, | ||
) | ||
except db.IntegrityError as e: | ||
self.session.rollback() | ||
raise indexset.DataInvalid from e | ||
|
||
indexset._data_type = type(data[0]).__name__ | ||
|
||
self.session.add(indexset) | ||
self.session.commit() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,7 +65,7 @@ def _add_column( | |
self.columns.create( | ||
name=column_name, | ||
constrained_to_indexset=indexset.id, | ||
dtype=pd.Series(indexset.elements).dtype.name, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ALso this looks quite expensive for what it does, no idea how to avoid it right now though... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For all items like |
||
dtype=pd.Series(indexset.data).dtype.name, | ||
variable_id=variable_id, | ||
unique=True, | ||
**kwargs, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I discussed how to do this best with an SQLAlchemy maintainer here. Since we are not going to use
.data
in SQL queries, we should be better served with a normal Python property.