Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Saving a list of values for a single record fails succeeds silently once and throws IntegrityError on second save #2368

Open
Zethson opened this issue Jan 21, 2025 · 2 comments
Assignees

Comments

@Zethson
Copy link
Member

Zethson commented Jan 21, 2025

Add a description

!lamin init --storage run-tests --schema bionty

import bionty as bt

bt.Disease(name=["complexdisease"]).save()  # this silently just saves but should ideally throw already because the name field should be a single value
bt.Disease(name=["complexdisease"]).save()  # this throws with an error that users do not understand

throws

---------------------------------------------------------------------------
IntegrityError                            Traceback (most recent call last)
File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/utils.py:105, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args)
    104 else:
--> 105     return self.cursor.execute(sql, params)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/sqlite3/base.py:354, in SQLiteCursorWrapper.execute(self, query, params)
    353 query = self.convert_query(query, param_names=param_names)
--> 354 return super().execute(query, params)

IntegrityError: UNIQUE constraint failed: bionty_disease.uid

The above exception was the direct cause of the following exception:

IntegrityError                            Traceback (most recent call last)
Cell In[3], line 2
      1 bt.Disease(name=["complexdisease"]).save()
----> 2 bt.Disease(name=["complexdisease"]).save()

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/bionty/models.py:493, in BioRecord.save(self, *args, **kwargs)
    491 def save(self, *args, **kwargs) -> BioRecord:
    492     """Save the record and its parents recursively."""
--> 493     super().save(*args, **kwargs)
    494     # saving records of parents
    495     if hasattr(self, "_parents"):

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/lamindb/_record.py:759, in save(self, *args, **kwargs)
    757 check_name_change(self)
    758 check_key_change(self)
--> 759 super(BasicRecord, self).save(*args, **kwargs)
    760 # update _old_name and _old_key after saving
    761 _store_record_old_name(self)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/base.py:892, in Model.save(self, force_insert, force_update, using, update_fields, *args)
    889     if loaded_fields:
    890         update_fields = frozenset(loaded_fields)
--> 892 self.save_base(
    893     using=using,
    894     force_insert=force_insert,
    895     force_update=force_update,
    896     update_fields=update_fields,
    897 )

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/base.py:998, in Model.save_base(self, raw, force_insert, force_update, using, update_fields)
    994         force_insert = self._validate_force_insert(force_insert)
    995         parent_inserted = self._save_parents(
    996             cls, using, update_fields, force_insert
    997         )
--> 998     updated = self._save_table(
    999         raw,
   1000         cls,
   1001         force_insert or parent_inserted,
   1002         force_update,
   1003         using,
   1004         update_fields,
   1005     )
   1006 # Store the database on which the object was saved
   1007 self._state.db = using

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/base.py:1161, in Model._save_table(self, raw, cls, force_insert, force_update, using, update_fields)
   1155 fields = [
   1156     f
   1157     for f in meta.local_concrete_fields
   1158     if not f.generated and (pk_set or f is not meta.auto_field)
   1159 ]
   1160 returning_fields = meta.db_returning_fields
-> 1161 results = self._do_insert(
   1162     cls._base_manager, using, fields, returning_fields, raw
   1163 )
   1164 if results:
   1165     for value, field in zip(results[0], returning_fields):

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/base.py:1202, in Model._do_insert(self, manager, using, fields, returning_fields, raw)
   1197 def _do_insert(self, manager, using, fields, returning_fields, raw):
   1198     """
   1199     Do an INSERT. If returning_fields is defined then this method should
   1200     return the newly created data for the model.
   1201     """
-> 1202     return manager._insert(
   1203         [self],
   1204         fields=fields,
   1205         returning_fields=returning_fields,
   1206         using=using,
   1207         raw=raw,
   1208     )

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/manager.py:87, in BaseManager._get_queryset_methods.<locals>.create_method.<locals>.manager_method(self, *args, **kwargs)
     85 @wraps(method)
     86 def manager_method(self, *args, **kwargs):
---> 87     return getattr(self.get_queryset(), name)(*args, **kwargs)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/query.py:1847, in QuerySet._insert(self, objs, fields, returning_fields, raw, using, on_conflict, update_fields, unique_fields)
   1840 query = sql.InsertQuery(
   1841     self.model,
   1842     on_conflict=on_conflict,
   1843     update_fields=update_fields,
   1844     unique_fields=unique_fields,
   1845 )
   1846 query.insert_values(fields, objs, raw=raw)
-> 1847 return query.get_compiler(using=using).execute_sql(returning_fields)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/models/sql/compiler.py:1836, in SQLInsertCompiler.execute_sql(self, returning_fields)
   1834 with self.connection.cursor() as cursor:
   1835     for sql, params in self.as_sql():
-> 1836         cursor.execute(sql, params)
   1837     if not self.returning_fields:
   1838         return []

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/utils.py:79, in CursorWrapper.execute(self, sql, params)
     78 def execute(self, sql, params=None):
---> 79     return self._execute_with_wrappers(
     80         sql, params, many=False, executor=self._execute
     81     )

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/utils.py:92, in CursorWrapper._execute_with_wrappers(self, sql, params, many, executor)
     90 for wrapper in reversed(self.db.execute_wrappers):
     91     executor = functools.partial(wrapper, executor)
---> 92 return executor(sql, params, many, context)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/utils.py:100, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args)
     98     warnings.warn(self.APPS_NOT_READY_WARNING_MSG, category=RuntimeWarning)
     99 self.db.validate_no_broken_transaction()
--> 100 with self.db.wrap_database_errors:
    101     if params is None:
    102         # params default might be backend specific.
    103         return self.cursor.execute(sql)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/utils.py:91, in DatabaseErrorWrapper.__exit__(self, exc_type, exc_value, traceback)
     89 if dj_exc_type not in (DataError, IntegrityError):
     90     self.wrapper.errors_occurred = True
---> 91 raise dj_exc_value.with_traceback(traceback) from exc_value

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/utils.py:105, in CursorWrapper._execute(self, sql, params, *ignored_wrapper_args)
    103     return self.cursor.execute(sql)
    104 else:
--> 105     return self.cursor.execute(sql, params)

File ~/miniforge3/envs/lamindb/lib/python3.12/site-packages/django/db/backends/sqlite3/base.py:354, in SQLiteCursorWrapper.execute(self, query, params)
    352 param_names = list(params) if isinstance(params, Mapping) else None
    353 query = self.convert_query(query, param_names=param_names)
--> 354 return super().execute(query, params)

IntegrityError: UNIQUE constraint failed: bionty_disease.uid

looking at the db

bt.Disease.df()

gives us

uid name ontology_id abbr synonyms description space_id source_id run_id created_at created_by_id _aux _branch_code
id
1 4NhM08vm ['complexdisease'] None None None None 1 None None 2025-01-21 16:00:50.990000+00:00 1 None 1
@Zethson
Copy link
Member Author

Zethson commented Jan 21, 2025

@sunnyosun should we solve that with Field validation? Something like:

class CharField(models.CharField):
    """Custom `CharField` with default values for `blank`, `default`, and `max_length`.

    Also prevents Python lists from being used as values.
    """

    def __init__(self, max_length: int = 255, **kwargs):
        kwargs["max_length"] = max_length
        kwargs.setdefault("blank", True)
        kwargs.setdefault("default", None)
        super().__init__(**kwargs)

    def clean(self, value, model_instance):
        if isinstance(value, list):
            raise ValidationError("This field does not accept lists as values.")
        return super().clean(value, model_instance)

Of course we can also extend that to other Collection types

@Zethson Zethson self-assigned this Jan 21, 2025
@sunnyosun
Copy link
Member

sunnyosun commented Jan 22, 2025

No, this will make creation very slow, it's on purpose turned off for core and bionty schema: #2177

Also clean() makes DB calls so the correct call is clean_fields() and it's already implemented: https://github.com/laminlabs/lamindb/blob/main/lamindb/_record.py#L210-L216

I think even a simple type check here is slow due to the getattr calls, need to think more how to catch such edge cases.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants