-
Notifications
You must be signed in to change notification settings - Fork 25
Migration workflow
- add a
PYTHONPATH='<path>/rero-ils-migration'
variable in thedocker-service.yaml
file.
poetry run invenio reroils migrations index init
# create the bramois migration project
poetry run invenio reroils migrations create bramois 64 -d " Migration données bibliothèque de Bramois" "rero_ils_migrations.converter_2024_bramois.BramoisConverter"
# convert the data to JSON
poetry run invenio reroils migrations data load bramois <path>/bramois.xml
# run the dedup
poetry run invenio reroils migrations data dedup bramois
# update permissions
from rero_ils.modules.cli.fixtures import load_role_policies
from rero_ils.modules.cli.fixtures import load_system_role_policies
cfg = {
"mig-search": [
"pro_full_permissions",
"pro_library_administrator",
"pro_catalog_manager"
],
"mig-read": [
"pro_full_permissions",
"pro_library_administrator",
"pro_catalog_manager"
],
"mig-access": [
"pro_full_permissions",
"pro_library_administrator",
"pro_catalog_manager"
],
"mig-update": [
"pro_full_permissions",
"pro_library_administrator",
"pro_catalog_manager"
]
}
load_role_policies(cfg)
from rero_ils.modules.documents.api import Document, DocumentsSearch
query = (
DocumentsSearch()
.filter("exists", field="adminMetadata")
.filter("match", adminMetadata__note="Bibliomedia")
.exclude("term", harvested=True)
)
doc_pids = [hit.pid for hit in query.source('pid').scan()]
for pid in doc_pids:
if doc := Document.get_record_by_pid(pid):
change = False
admin_metadata = doc.get('adminMetadata')
new_note = []
changed = False
# print('---->', admin_metadata.get('note', []))
for note in admin_metadata.get('note', []):
if note.startswith('!!! Attention : notice Bibliomedia, ne pas se raccrocher !!!'):
print(doc.pid, admin_metadata['note'])
doc['harvested'] = True
change = True
if change:
doc.update(data=doc, dbcommit=True, reindex=True)
from rero_ils.modules.entities.remote_entities.api import RemoteEntity
from rero_ils.modules.entities.remote_entities.sync import SyncEntity
sync = SyncEntity()
query = RemoteEntitiesSearch().filter("terms", type=["bf:Topic", "bf:Temporal", "bf:Place"])
count = query.count()
for idx, hit in enumerate(query.source("pid").scan(), 1):
res = sync.sync_record(hit.pid)
print(f"{idx:>7}/{count} {hit.pid} {res}")
from rero_invenio_base.modules.tasks import run_on_worker
from rero_invenio_base.modules.utils import chunk
from rero_ils.modules.documents.api import Document, DocumentsSearch
code = '''
def reindex(_ids):
from rero_ils.modules.documents.api import Document
n = 0
errors = []
for _id in _ids:
try:
doc = Document.get_record(_id)
doc.reindex()
n += 1
except Exception as e:
print('error', e)
errors.append(_id)
return (n, errors)
'''
parallel = 7
# subjects
count = 0
query = DocumentsSearch().filter("exists", field="subjects.entity.pid")
ids = [hit.meta.id for hit in query.source().scan()]
for c in chunk([str(val) for val in ids], len(ids) // parallel):
count += 1
res = run_on_worker.delay(code, 'reindex', c)
print('subjects', count, len(c), res)
# genreForm
count = 0
query = DocumentsSearch().filter("exists", field="genreForm.entity.pid")
ids = [hit.meta.id for hit in query.source().scan()]
for c in chunk([str(val) for val in ids], len(ids) // parallel):
count += 1
res = run_on_worker.delay(code, 'reindex', c)
print('genreForm', count, len(c), res)
# from rero_ils.modules.documents.api import Document, DocumentsSearch
#
# for entity_type in ["subjects", "genreForm"]:
# query = DocumentsSearch().filter("exists", field=f"{entity_type}.entity.pid")
# count = query.count()
# for idx, hit in enumerate(query.source("pid").scan(), 1):
# if doc := Document.get_record_by_pid(hit.pid):
# print(f"{idx:>7}/{count} {entity_type} {doc.pid}")
# doc.reindex()
rsync -avr /network/nfs/files_prod/* /network/nfs/files/ --exclude=lost+found/
# after docker compose down
sudo rm -rf /data/ils/prod/mq/*
docker-compose exec -u root web-ui bash -c "cp -r /invenio/var/instance/static/* /invenio/instance/static/."
poetry run invenio alembic upgrade
poetry run invenio rero es index update-mapping
from rero_ils.modules.entities.remote_entities.api import \
RemoteEntitiesIndexer, RemoteEntity
from rero_ils.modules.tasks import process_bulk_queue
entities_ids = RemoteEntity.get_all_ids()
RemoteEntitiesIndexer().bulk_index(entities_ids)
process_bulk_queue.apply_async()
from invenio_search import current_search
# update operation logs templates
[p for p in current_search.put_templates()]
from invenio_search import current_search_client
def reindex(source, destination):
res = current_search_client.reindex(
body=dict(
source=dict(
index=source
),
dest=dict(
index=destination,
version_type='external_gte'
)
),
wait_for_completion=False
)
return res['task']
def index_in_new(indices):
tasks = []
body = {
"settings": {
"number_of_shards": "8",
"number_of_replicas": "1",
"max_result_window": "100000"
}}
for index_name in indices:
print(index_name)
new_index_name = f'{index_name}-new'
current_search_client.indices.create(new_index_name, body=body)
tasks.append(reindex(index_name, new_index_name))
return tasks
def remove_old(indices):
for index_name in indices:
current_search_client.indices.delete(index_name)
print(f'{index_name} has been deleted')
def rename_to_old(indices):
tasks = []
body = {
"settings": {
"number_of_shards": "8",
"number_of_replicas": "1",
"max_result_window": "100000"
}}
for index_name in indices:
print(index_name)
new_index_name = f'{index_name}-new'
current_search_client.indices.create(index_name, body=body)
tasks.append(reindex(new_index_name, index_name))
return tasks
# Execute one line after the other
# Get the list of the operation logs indices
indices = ['operation_logs-2024']
tasks = index_in_new(indices)
# Check for completion
[current_search_client.tasks.get(t).get('completed') for t in tasks]
remove_old(indices)
tasks = rename_to_old(indices)
[current_search_client.tasks.get(t).get('completed') for t in tasks]
remove_old([i+'-new' for i in indices])
poetry run invenio reroils oaiharvester harvest -n ebooks -q -f 1990-01-01
poetry run invenio reroils scheduler enable_tasks -n automatic_renewal
# create new frontend image with the new configuration
docker-compose build frontend
docker-compose stop selfcheck scheduler worker frontend
docker-compose exec -u root web-ui bash -c "cp -r /invenio/var/instance/static/* /invenio/instance/static/."
Set RERO_ILS_APP_BABELTHEQUE_ENABLED_VIEWS = ['vs']
Set INVENIO_RERO_ILS_FILES_FOLDER = /network/nfs/files
# create new tables
poetry run invenio db create
# create new indices
poetry run invenio rero es index update-mapping
poetry run invenio index create -b rero_ils/modules/files/mappings/v7/files/record-v1.0.0.json files-record-v1.0.0-20240521
# init storage
poetry run invenio files location create --default default /network/nfs/files
from invenio_search import current_search, current_search_client
current_search_client.indices.put_alias('files-record-v1.0.0-20240521', 'files')
current_search_client.indices.put_alias('files-record-v1.0.0-20240521', 'files-record-v1.0.0')
# update permissions
from rero_ils.modules.cli.fixtures import load_role_policies
from rero_ils.modules.cli.fixtures import load_system_role_policies
cfg = {
"file-create": [
"pro_full_permissions",
"pro_catalog_manager",
"pro_library_administrator"
],
"file-update": [
"pro_full_permissions",
"pro_catalog_manager",
"pro_library_administrator"
],
"file-delete": [
"pro_full_permissions",
"pro_catalog_manager",
"pro_library_administrator"
]
}
load_role_policies(cfg)
sys_cfg = {
"file-search": [
"any_user"
],
"file-read": [
"any_user"
],
}
load_system_role_policies(sys_cfg)
index_name=`poetry run invenio rero es index info -i records`
echo $index_name
poetry run invenio rero es index move records $index_name records-record-v1.0.0-20240521 -v
poetry run invenio index delete $index_name
from invenio_search import current_search, current_search_client
current_search_client.indices.put_alias('records-record-v1.0.0-20240521', 'records-record-v1.0.0')
index_name=`poetry run invenio rero es index info -i documents`
echo $index_name
poetry run invenio rero es index move documents $index_name documents-document-v0.0.1-20240521 -v
poetry run invenio index delete $index_name
from sqlalchemy import func
from invenio_db import db
from rero_ils.modules.documents.api import Document, DocumentsSearch
from invenio_search import current_search_client
from time import sleep
def change(query, fiction_statement, delay=5):
"""change DB and ES."""
print(f'Get ids ({query.count()}) ...')
ids = [hit.meta.id for hit in query.source(False).scan()]
print('Update DB ...', end=' ')
count = Document.model_cls.query \
.filter(Document.model_cls.id.in_(ids)) \
.update(
{
'json': func.jsonb_set(
Document.model_cls.json,
'{fiction_statement}',
f'"{fiction_statement}"'
),
'version_id': Document.model_cls.version_id + 1
},
synchronize_session=False
)
print(count)
db.session.commit()
body = query.to_dict()
body.update(
{"script": {
"source": f"ctx._source['fiction_statement'] = '{fiction_statement}'"
}}
)
info = current_search_client.update_by_query(
index='documents',
body=body,
wait_for_completion=False
)
task_id = info['task']
print(f'Update ES ... task id: "{task_id}"')
task = current_search_client.tasks.get(task_id)
print(
f'updated: {task["task"]["status"]["updated"]} '
f'conflicts: {task["task"]["status"]["version_conflicts"]}',
end='\r'
)
while not task['completed']:
sleep(delay)
task = current_search_client.tasks.get(task_id)
print(
f'updated: {task["task"]["status"]["updated"]} '
f'conflicts: {task["task"]["status"]["version_conflicts"]}',
end='\r'
)
print()
return task['response']
# if errors
failures = response.get('failures', [])
print(f'Correct errors: {len(failures)}')
for idx, failure in enumerate(failures, 1):
id_ = failure.get('id')
print(idx, id_, end='\r')
current_search_client.delete(
index='documents',
id=id_,
refresh=True
)
print('Sleep: 60 ')
sleep(60)
for idx, failure in enumerate(failures, 1):
id_ = failure.get('id')
print(idx, id_, end='\r')
doc = Document.get_record(id_)
try:
doc.reindex()
except Exception as err:
print(idx, id_, err)
# if errors
FICTIONS_TERMS = ['Fictions', 'Films de fiction']
# Fiction
query = DocumentsSearch() \
.filter('terms', facet_genre_form_en=FICTIONS_TERMS)
response = change(query, 'fiction')
# test
fiction_count = DocumentsSearch() \
.filter('term', fiction_statement='fiction') \
.count()
print(f'term: {query.count()} statement: {fiction_count}')
# Non fiction
query = DocumentsSearch() \
.exclude('term', harvested=True) \
.exclude('terms', facet_genre_form_en=FICTIONS_TERMS) \
.filter('exists', field='subjects')
response = change(query, 'non_fiction')
# test
non_fiction_count = DocumentsSearch() \
.filter('term', fiction_statement='non_fiction') \
.count()
print(f'term: {query.count()} statement: {non_fiction_count}')
# Unspecified
query = DocumentsSearch() \
.exclude('exists', field='fiction_statement')
db.session.close()
response = change(query, 'unspecified')
# test
count = DocumentsSearch().count() - fiction_count - non_fiction_count
unspecified_count = DocumentsSearch() \
.filter('term', fiction_statement='unspecified') \
.count()
print(f'{count} statement: {unspecified_count}')
Delete field legacy_circulation_rules
from all items (https://github.com/rero/rero-ils/pull/3671).
from rero_ils.modules.items.api import ItemsSearch, Item
from invenio_search import current_search_client
from invenio_db import db
from time import sleep
from elasticsearch_dsl import Q
query = ItemsSearch() \
.filter(
Q('exists', field='legacy_circulation_rules')
)
print(f'Get ids ({query.count()}) ...')
ids = [hit.meta.id for hit in query.source().scan()]
print('Change DB ... ', end='')
count = Item.model_cls.query \
.filter(Item.model_cls.id.in_(ids)) \
.update(
{
Item.model_cls.json: (
Item.model_cls.json - 'legacy_circulation_rules'
),
'version_id': Item.model_cls.version_id + 1
},
synchronize_session=False
)
db.session.commit()
print(count)
print('Change ES ...')
body = query.to_dict()
body.update({"script" : "ctx._source.remove(\"legacy_circulation_rules\")"}, )
info = current_search_client.update_by_query(
index='items',
body=body,
wait_for_completion=False
)
task_id = info['task']
print(f'Update ES ... task id: "{task_id}"')
task = current_search_client.tasks.get(task_id)
print(
f'updated: {task["task"]["status"]["updated"]} '
f'conflicts: {task["task"]["status"]["version_conflicts"]}',
end='\r'
)
while not task['completed']:
sleep(5)
task = current_search_client.tasks.get(task_id)
print(
f'updated: {task["task"]["status"]["updated"]} '
f'conflicts: {task["task"]["status"]["version_conflicts"]}',
end='\r'
)
print()
poetry run invenio alembic upgrade
poetry run invenio alembic stamp 2e97565eba72
poetry run invenio alembic upgrade
poetry run invenio alembic stamp 8ae99b034410
poetry run invenio alembic upgrade
poetry run invenio alembic stamp a29271fd78f8
poetry run invenio alembic upgrade
Add files*
to slm_daily_all.json
.
poetry run invenio rero es slm put daily slm_daily_all.json
- documents ressource -> https://github.com/rero/rero-ils/commit/4f2374b5fb3fc331bd7ab5faeefba2d039bbebaa
- add a INVENIO_WIKI_INDEX_DIR in the configmap of you producution environment.
from invenio_db import db
from rero_ils.modules.patrons.api import Patron
db.session.execute("DELETE from alembic_version where version_num = 'c25ef2c50ffa'")
db.session.commit()
# run poetry run invenio alembic upgrade
# INFO [alembic.runtime.migration] Context impl PostgresqlImpl.
# INFO [alembic.runtime.migration] Will assume transactional DDL.
# INFO [alembic.runtime.migration] Running upgrade 04480be1593e -> 842a62b56e60, Change FK AccountsRole to string (downgrade recipe).
# INFO [alembic.runtime.migration] Running upgrade e12419831262 -> 999dcbd19ace, Add versioning information to models.
# INFO [alembic.runtime.migration] Running upgrade 999dcbd19ace -> dfbdf43a3e96, Separate login info from user table.
# INFO [alembic.runtime.migration] Running upgrade dfbdf43a3e96 -> 62efc52773d4, Create UserIdentity table.
# INFO [alembic.runtime.migration] Running upgrade 62efc52773d4 -> eb9743315a9d, Add user profile and preferences as JSON fields to the User table.
# INFO [alembic.runtime.migration] Running upgrade eb9743315a9d -> f2522cdd5fcd, Change AccountsRole primary key to string.
# INFO [alembic.runtime.migration] Running upgrade f2522cdd5fcd, 842a62b56e60 -> f9843093f686, Change FK AccountsRole to string (upgrade recipe).
# INFO [alembic.runtime.migration] Running upgrade f9843093f686 -> 037afe10e9ff, Add user moderation fields.
# INFO [alembic.runtime.migration] Running upgrade bff1f190b9bd -> aaa265b0afa6, Move UserIdentity to accounts.
# INFO [alembic.runtime.migration] Running upgrade -> 759d47cbdba7, Create oaiserver branch.
# INFO [alembic.runtime.migration] Running upgrade 759d47cbdba7 -> e655021de0de, Create oiaserver tables.
# INFO [alembic.runtime.migration] Running upgrade e655021de0de -> 5d25c1981985, Add system_created field.
total = db.session.execute('SELECT COUNT(*) from userprofiles_userprofile').all()[0][0]
errors = {}
for idx, prof in enumerate(db.session.execute('SELECT * from userprofiles_userprofile').all(), 1):
print(f'{idx}/{total}', end='\r')
prof = {k: v for k, v in dict(prof).items() if v}
user_id = prof.pop('user_id')
user = Patron._get_user_by_user_id(user_id)
if user.username:
continue
prof.pop('displayname', None)
try:
if birth_date := prof.get('birth_date'):
prof['birth_date'] = birth_date.strftime('%Y-%m-%d')
if username := prof.pop('username', None):
user.username = username
user.user_profile = prof
db.session.merge(user)
if idx % 100 == 0:
db.session.commit()
except Exception as e:
errors[user_id] = e
db.session.commit()
if errors:
print('errors:', errors)
else:
db.session.execute('DROP TABLE userprofiles_userprofile')
poetry run invenio rero es index update-mapping
index_name=`poetry run invenio rero es index info -i vendors`
echo $index_name
poetry run invenio rero es index move vendors $index_name vendors-vendor-v0.0.1-20240206 -v
poetry run invenio rero es index update-mapping
poetry run invenio index delete $index_name
index_name=`poetry run invenio rero es index info -i acq_orders`
echo $index_name
poetry run invenio rero es index move acq_orders $index_name acq_orders-acq_order-v0.0.1-20240206 -v
poetry run invenio rero es index update-mapping
poetry run invenio index delete $index_name
index_name=`poetry run invenio rero es index info -i documents`
echo $index_name
poetry run invenio rero es index move documents $index_name documents-document-v0.0.1-20240206 -v
poetry run invenio rero es index update-mapping
poetry run invenio index delete $index_name
from rero_ils.modules.documents.api import Document, DocumentsSearch
from invenio_db import db
query = DocumentsSearch().filter('exists', field='partOf.numbering')
total = query.count()
for idx, hit in enumerate(query.source('pid').scan(), 1):
print(f'{idx}/{total} {hit.pid}', end='\r')
if doc := Document.get_record_by_pid(hit.pid):
part_of = doc['partOf']
for part_of in doc['partOf']:
for numbering in part_of.get('numbering', []):
if 'volume' in numbering:
numbering['volume'] = str(numbering['volume'])
if 'issue' in numbering:
numbering['issue'] = str(numbering['issue'])
# print(part_of)
# TODO: make it faster
# doc.update(data=doc, dbcommit=True, reindex=True)
doc.model.json = doc
db.session.merge(doc.model)
doc.reindex()
if idx % 100 == 0:
db.session.commit()
db.session.commit()
# create stat cfg table
poetry run invenio db create
# create the index
poetry run invenio index create -b rero_ils/modules/stats_cfg/mappings/v7/stats_cfg/stat_cfg-v0.0.1.json 'stats_cfg-stat_cfg-v0.0.1-20231121'
from invenio_search import current_search, current_search_client
# create the aliases
current_search_client.indices.put_alias('stats_cfg-stat_cfg-v0.0.1-20231121', 'stats_cfg')
current_search_client.indices.put_alias('stats_cfg-stat_cfg-v0.0.1-20231121', 'stats_cfg-stat_cfg-v0.0.1')
# update operation logs templates
[p for p in current_search.put_templates()]
poetry run invenio rero es index update-mapping
from invenio_search import current_search_client
def reindex(source, destination):
res = current_search_client.reindex(
body=dict(
source=dict(
index=source
),
dest=dict(
index=destination,
version_type='external_gte'
)
),
wait_for_completion=False
)
return res['task']
def index_in_new(indices):
tasks = []
body = {
"settings": {
"number_of_shards": "8",
"number_of_replicas": "1",
"max_result_window": "100000"
}}
for index_name in indices:
print(index_name)
new_index_name = f'{index_name}-new'
current_search_client.indices.create(new_index_name, body=body)
tasks.append(reindex(index_name, new_index_name))
return tasks
def remove_old(indices):
for index_name in indices:
current_search_client.indices.delete(index_name)
print(f'{index_name} has been deleted')
def rename_to_old(indices):
tasks = []
body = {
"settings": {
"number_of_shards": "8",
"number_of_replicas": "1",
"max_result_window": "100000"
}}
for index_name in indices:
print(index_name)
new_index_name = f'{index_name}-new'
current_search_client.indices.create(index_name, body=body)
tasks.append(reindex(new_index_name, index_name))
return tasks
# Execute one line after the other
# Get the list of the operation logs indices
indices = list(current_search_client.indices.get_alias('operation_logs').keys())
tasks = index_in_new(indices)
# Check for completion
[current_search_client.tasks.get(t).get('completed') for t in tasks]
remove_old(indices)
tasks = rename_to_old(indices)
[current_search_client.tasks.get(t).get('completed') for t in tasks]
remove_old([i+'-new' for i in indices])
from invenio_access.models import Role, ActionRoles
from invenio_db import db
from rero_ils.modules.cli.fixtures import load_role_policies
r1 = [r for r in Role.query.all()][-1]
r1.name
r1.name = 'pro_statistic_manager'
db.session.merge(r1)
db.session.commit()
cfg = {
"stat-access": [
"pro_statistic_manager",
],
"stat-search": [
"pro_statistic_manager",
"pro_library_administrator"
],
"stat-read": [
"pro_statistic_manager",
"pro_library_administrator"
],
"stat_cfg-access": [
"pro_full_permissions",
"pro_statistic_manager",
"pro_library_administrator"
],
"stat_cfg-search": [
"pro_full_permissions",
"pro_statistic_manager",
"pro_library_administrator"
],
"stat_cfg-read": [
"pro_full_permissions",
"pro_statistic_manager",
"pro_library_administrator"
],
"stat_cfg-create": [
"pro_full_permissions",
"pro_statistic_manager",
"pro_library_administrator"
],
"stat_cfg-update": [
"pro_full_permissions",
"pro_statistic_manager",
"pro_library_administrator"
],
"stat_cfg-delete": [
"pro_full_permissions",
"pro_statistic_manager",
"pro_library_administrator"
]}
load_role_policies(cfg)
# removes some roles
role_id = Role.query.filter_by(name='pro_read_only').first().id
ar = ActionRoles.query.filter_by(action='stat-search').filter_by(role_id=role_id).first()
db.session.delete(ar)
ar = ActionRoles.query.filter_by(action='stat-read').filter_by(role_id=role_id).first()
db.session.delete(ar)
db.session.commit()
TODO
Entities: delete old index (new: local and remote with alias)
poetry run invenio index create -b rero_ils/modules/entities/remote_entities/mappings/v7/remote_entities/remote_entity-v0.0.1.json remote_entities-remote_entity-v0.0.1-20231031
/_reindex
{
"source": {
"index": "entities-entity-v0.0.1-20230516"
},
"dest": {
"index": "remote_entities-remote_entity-v0.0.1-20231031",
"version_type": "external_gte"
},
"script": {
"source": "ctx._source['$schema'] = \"https://bib.rero.ch/schemas/remote_entities/remote_entity-v0.0.1.json\", "ctx._source['resource_type'] = \"remote\"]"
}
}
poetry run invenio rero es index update-mapping
#index_name=entities-entity-v0.0.1-20230516
#poetry run invenio index delete $index_name
poetry run invenio rero es alias put remote_entities-remote_entity-v0.0.1-20231031 remote_entities
poetry run invenio rero es alias put remote_entities-remote_entity-v0.0.1-20231031 remote_entities-remote_entity-v0.0.1
poetry run invenio rero es alias put remote_entities-remote_entity-v0.0.1-20231031 entities
poetry run invenio rero es alias put remote_entities-remote_entity-v0.0.1-20231031 entities-entity-v0.0.1
poetry run invenio index create -b rero_ils/modules/entities/local_entities/mappings/v7/local_entities/local_entity-v0.0.1.json local_entities-local_entity-v0.0.1-20231031
poetry run invenio rero es alias put local_entities-local_entity-v0.0.1-20231031 local_entities
poetry run invenio rero es alias put local_entities-local_entity-v0.0.1-20231031 local_entities-local_entity-v0.0.1
poetry run invenio rero es alias put local_entities-local_entity-v0.0.1-20231031 entities
poetry run invenio rero es alias put local_entities-local_entity-v0.0.1-20231031 entities-entity-v0.0.1
index_name=`poetry run invenio rero es index info -i items`
echo $index_name
poetry run invenio rero es index move items $index_name items-item-v0.0.1-20231019
poetry run invenio index delete $index_name
poetry run invenio rero es index update-mapping
su - postgres
psql reroils
ALTER TABLE entity_id RENAME TO remote_entity_id;
ALTER TABLE entity_metadata RENAME TO remote_entity_metadata;
from invenio_db import db
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
PersistentIdentifier.query.filter_by(pid_type='ent').update({'pid_type': 'rement'})
db.session.commit()
from sqlalchemy import func
from rero_ils.modules.entities.remote_entities.api import RemoteEntity
from invenio_db import db
# Correct $schema for entities in db
old_schema = 'https://bib.rero.ch/schemas/entities/entity-v0.0.1.json'
schema = 'https://bib.rero.ch/schemas/remote_entities/remote_entity-v0.0.1.json'
count = RemoteEntity.model_cls.query.filter(RemoteEntity.model_cls.json['$schema'].as_string() == old_schema).update({"json": func.jsonb_set(RemoteEntity.model_cls.json, '{$schema}', f'"{schema}"')}, synchronize_session=False)
db.session.commit()
print(f"{count} updated entities")
If DB ES differences:
poetry run /network/nfs/data_ils/ils/scripts/correct_wrong_pids.py reroils utils correct-wrong-pids -t rement -v -c
poetry run invenio db create
from rero_invenio_base.modules.tasks import run_on_worker
from rero_invenio_base.modules.utils import chunk
from rero_ils.modules.documents.api import Document
code = '''
def reindex(_ids):
from rero_ils.modules.documents.api import Document
n = 0
errors = []
for _id in _ids:
try:
doc = Document.get_record(_id)
doc.reindex()
n += 1
except Exception as e:
print('error', e)
errors.append(_id)
return (n, errors)
'''
parallel = 7
count = 0
for c in chunk([str(val) for val in Document.get_all_ids()], Document.count() // parallel):
count += 1
res = run_on_worker.delay(code, 'reindex', c)
print(count, len(c), res)
Re-generate incorrect circulation stats
TO-DO: FIX script for pricing stats, the incorrect numbers are computed since those stats seem to have no date_range!!!
import arrow
from dateutil.relativedelta import relativedelta
from datetime import datetime
from rero_ils.modules.stats.api.librarian import StatsForLibrarian
from rero_ils.modules.stats.api.pricing import StatsForPricing
from rero_ils.modules.stats.api.api import Stat, StatsSearch
from rero_ils.modules.libraries.api import LibrariesSearch
search = StatsSearch()\
.filter('range', _created={'gte': '2023-07-30'})
for hit in list(search.source('pid').scan()):
try:
stat = Stat.get_record(hit.meta.id)
if stat['type'] == 'billing':
to_date = arrow.Arrow.fromdatetime(stat.created - relativedelta(days=1))
compute = StatsForPricing(to_date=to_date)
for val in stat.get('values', []):
lib_pid = val['library']['pid']
# number_of_checkouts
new_number_of_checkouts = compute.number_of_circ_operations(lib_pid, 'checkout')
print(lib_pid, val['number_of_checkouts'], new_number_of_checkouts)
val['number_of_checkouts'] = new_number_of_checkouts
# number_of_renewals
new_number_of_renewals = compute.number_of_circ_operations(lib_pid, 'extend')
print(lib_pid, val['number_of_renewals'], new_number_of_renewals)
val['number_of_renewals'] = new_number_of_renewals
# number_of_checkins
new_number_of_checkins = compute.number_of_circ_operations(lib_pid, 'checkin')
print(lib_pid, val['number_of_checkins'], new_number_of_checkins)
val['number_of_checkins'] = new_number_of_checkins
# number_of_ill_requests
new_number_of_ill_requests = compute.number_of_ill_requests(lib_pid, ['denied'])
print(lib_pid, val['number_of_validated_ill_requests'], new_number_of_ill_requests)
val.pop('number_of_validated_ill_requests')
val['number_of_ill_requests'] = new_number_of_ill_requests
# number_of_requests
new_number_of_requests = compute.number_of_circ_operations(lib_pid, 'request')
print(lib_pid, val['number_of_requests'], new_number_of_requests)
val['number_of_requests'] = new_number_of_requests
elif stat['type'] == 'librarian':
compute = StatsForLibrarian()
compute.date_range = stat['date_range']
for val in stat.get('values', []):
lib_pid = val['library']['pid']
# checkouts_for_transaction_library
new_checkouts_for_transaction_library = compute.checkouts_for_transaction_library(lib_pid)
print(lib_pid, val['checkouts_for_transaction_library'], new_checkouts_for_transaction_library)
val['checkouts_for_transaction_library'] = new_checkouts_for_transaction_library
# checkouts_for_owning_library
new_checkouts_for_owning_library = compute.checkouts_for_owning_library(lib_pid)
print(lib_pid, val['checkouts_for_owning_library'], new_checkouts_for_owning_library)
val['checkouts_for_owning_library'] = new_checkouts_for_owning_library
# active_patrons_by_postal_code
new_active_patrons_by_postal_code = compute.active_patrons_by_postal_code(lib_pid)
print(lib_pid, val['active_patrons_by_postal_code'], new_active_patrons_by_postal_code)
val['active_patrons_by_postal_code'] = new_active_patrons_by_postal_code
# new_active_patrons_by_postal_code
new_new_active_patrons_by_postal_code = compute.active_patrons_by_postal_code(lib_pid, new_patrons=True)
print(lib_pid, val['new_active_patrons_by_postal_code'], new_new_active_patrons_by_postal_code)
val['new_active_patrons_by_postal_code'] = new_new_active_patrons_by_postal_code
# renewals
new_renewals = compute.renewals(lib_pid)
print(lib_pid, val['renewals'], new_renewals)
val['renewals'] = new_renewals
# loans_of_transaction_library_by_item_location
new_loans_of_transaction_library_by_item_location = compute.loans_of_transaction_library_by_item_location(lib_pid)
print(lib_pid, val['loans_of_transaction_library_by_item_location'], new_loans_of_transaction_library_by_item_location)
val['loans_of_transaction_library_by_item_location'] = new_loans_of_transaction_library_by_item_location
stat.update(stat, commit=True, dbcommit=True, reindex=True)
except Exception as err:
print('ERROR', hit.pid, err)
RERO+ instance: disable all local entities permissions for all_permissions
role AND pro_entity_manager
. We don't use this feature.
poetry run invenio rero es index update-mapping
index_name=`poetry run invenio rero es index info -i libraries`
echo $index_name
poetry run invenio rero es index move libraries $index_name libraries-library-v0.0.1-20230719
poetry run invenio rero es index update-mapping
poetry run invenio index delete $index_name
poetry run invenio alembic upgrade e63e5dfa2416
poetry run invenio alembic upgrade 64a5cc96f96e
poetry run invenio alembic upgrade 8d97be2c8ad6
from rero_ils.modules.stats.api import StatsForLibrarian, Stat
stat = Stat.get_record_by_pid('786')
compute = StatsForLibrarian()
compute.date_range = stat['date_range']
for val in stat.get('values', []):
lib_pid = val['library']['pid']
new_v_req = compute.validated_requests(lib_pid)
print(lib_pid, val['validated_requests'], new_v_req)
val['validated_requests'] = new_v_req
stat.update(stat, commit=True, dbcommit=True, reindex=True)
- copy configuration for serial acquisition settings from acquisition settings and set default times for exception dates that are open and don't have times: the script below
from rero_ils.modules.libraries.api import Library, LibrariesSearch
print('Updating libraries acquisition settings and exception dates...')
libraries = LibrariesSearch()
print(f'Found {libraries.count()}')
errors = []
default_time_libs = []
time = {
'start_time': '08:00',
'end_time': '08:10'
}
for hit in libraries.source().scan():
lib = Library.get_record(hit.meta.id)
default_time = False
for date in lib.get('exception_dates', []):
if date.get('is_open', False) and not date.get('times', []):
default_time = True
date['times'] = [time]
if default_time:
default_time_libs.append(lib.pid)
try:
if settings := lib.get('acquisition_settings'):
lib['serial_acquisition_settings'] = settings
lib.update(lib, True, True, True)
print(f"Updating library, pid: {lib.get('pid')}.")
except Exception as err:
print(f"Error: {err} with lib pid: {lib.get('pid')}")
errors.append(lib.pid)
print(f"Libraries updated with {len(errors)} errors.")
print(errors)
print(f'Default times set for exception open days in libraries: {default_time_libs}')
Write a script to add a default value for loan_status
.
import click
from rero_ils.modules.ill_requests.api import ILLRequest, ILLRequestsSearch
click.secho('Updating ill_requests loan_status...')
ill_without_loan_status = ILLRequestsSearch().exclude('exists', field='loan_status')
click.secho(f'Found {ill_without_loan_status.count()} ill_requests without loan status')
status_mapping = {
"pending": "PENDING",
"validated": "ITEM_ON_LOAN",
"denied": "PENDING",
"closed": "ITEM_RETURNED"
}
errors = []
for hit in ill_without_loan_status.source().scan():
ill = ILLRequest.get_record(hit.meta.id)
try:
status = ill.get("status")
ill['loan_status'] = status_mapping[status]
ill.update(ill, True, True, True)
click.secho(f"Updating ill_request, pid: {ill.get('pid')}.")
except Exception as err:
click.secho(f"Error: {err} with ill_requests pid: {ill.get('pid')}")
errors.append(ill.pid)
click.secho(f"Ill_requests updated with {len(errors)} errors.")
for pid in errors:
ILLRequest.get_record_by_pid(pid).delete(dbcommit=True, delindex=True)
print(pid)
from rero_ils.schedulers import current_scheduler
current_scheduler.remove('replace-idby-subjects-imported')
current_scheduler.remove('replace-idby-contribution')
current_scheduler.remove('replace-idby-subjects')
# update es mapping
poetry run invenio rero es index update-mapping
# rename contribution tables
poetry run invenio alembic upgrade a710021979fe
# move the contribution index into the entity index
poetry run invenio index create -b rero_ils/modules/entities/mappings/v7/entities/entity-v0.0.1.json 'entities-entity-v0.0.1-20230516'
from invenio_search import current_search_client
current_search_client.indices.put_alias('entities-entity-v0.0.1-20230516', 'entities')
current_search_client.indices.put_alias('entities-entity-v0.0.1-20230516', 'entities-entity-v0.0.1')
from invenio_db import db
from invenio_pidstore.models import PersistentIdentifier, PIDStatus
PersistentIdentifier.query.filter_by(pid_type='cont').update({'pid_type': 'ent'})
db.session.commit()
Note: remove the contributions es alias
from sqlalchemy import func
from rero_ils.modules.entities.api import Entity
from invenio_db import db
# Correct $schema for entities in db
old_schema = 'https://bib.rero.ch/schemas/contributions/contribution-v0.0.1.json'
schema = 'https://bib.rero.ch/schemas/entities/entity-v0.0.1.json'
count = Entity.model_cls.query.filter(Entity.model_cls.json['$schema'].as_string() == old_schema).update({"json": func.jsonb_set(Entity.model_cls.json, '{$schema}', f'"{schema}"')}, synchronize_session=False)
db.session.commit()
print(f"{count} updated entities")
poetry run invenio reroils index reindex -t ent
poetry run invenio reroils index run -c 7 -d
poetry run /network/nfs/data_ils/ils/scripts/entities.py reroils utils correct-templates -c -l <change_this_log_file>
Verify template 2015
is correct. (No contribution with entities having only type
!)
index_name=`poetry run invenio rero es index info -i documents`
echo $index_name
poetry run invenio rero es index move -v documents $index_name documents-document-v0.0.1-20230531
poetry run invenio index delete $index_name
poetry run python /network/nfs/data_ils/ils/scripts/entities.py reroils utils correct-documents -c -l <change_this_log_file>
from rero_invenio_base.modules.tasks import run_on_worker
from rero_invenio_base.modules.utils import chunk
n = 1
with open('/network/nfs/data_ils/ils/scripts/entities.py') as f:
src = f.read()
for c in chunk([str(val) for val in Document.get_all_ids()], int(Document.count()/14.9)):
run_on_worker.delay(src, 'do_documents', ids=c, logfile=f'/network/nfs/data_ils/ils/logs/create-authorized-access-points-2023-05-28-{n}.log', commit=True)
n += 1
print(f'start {n}')
from rero_ils.schedulers import current_scheduler
current_scheduler.remove('sync-agents')
from invenio_cache import current_cache
data = current_cache.get('timestamps')
data.pop('sync_agents')
current_cache.set(key='timestamps', value=data, timeout=0)
poetry run invenio index delete contributions-contribution-v0.0.1-20230116
- update the es mapping :
poetry run invenio rero es index update-mapping
- add new 'pro_statistics_manager' role using CLI :
poetry run invenio roles create -d 'Professional: Statistics manager' pro_statistics_manager
- ill request:
is_ill_pickup
andill_pickup_name
(new fields) -> There is a script alembic.
# fixes contributions alias by hand using elastic chrome extensions
# Note: not necessary done in the previous script poetry run invenio alembic upgrade add75cbcad66
poetry run invenio alembic upgrade e3eb396b39bb
- item operation history: change type of
trigger
tokeyword
(missing field definition). Reindexoperation_logs
.
from invenio_search import current_search_client
def reindex(source, destination):
res = current_search_client.reindex(
body=dict(
source=dict(
index=source
),
dest=dict(
index=destination,
version_type='external_gte'
)
),
wait_for_completion=False
)
return res['task']
def index_in_new(indices):
tasks = []
body = {
"settings": {
"number_of_shards": "8",
"number_of_replicas": "1",
"max_result_window": "100000"
}}
for index_name in indices:
print(index_name)
new_index_name = f'{index_name}-new'
current_search_client.indices.create(new_index_name, body=body)
tasks.append(reindex(index_name, new_index_name))
return tasks
def remove_old(indices):
for index_name in indices:
current_search_client.indices.delete(index_name)
print(f'{index_name} has been deleted')
def rename_to_old(indices):
tasks = []
body = {
"settings": {
"number_of_shards": "8",
"number_of_replicas": "1",
"max_result_window": "100000"
}}
for index_name in indices:
print(index_name)
new_index_name = f'{index_name}-new'
current_search_client.indices.create(index_name, body=body)
tasks.append(reindex(new_index_name, index_name))
return tasks
# Execute one line after the other
# Get the list of the operation logs indices
indices = list(current_search_client.indices.get_alias('operation_logs').keys())
tasks = index_in_new(indices)
# Check for completion
[current_search_client.tasks.get(t).get('completed') for t in tasks]
remove_old(indices)
tasks = rename_to_old(indices)
[current_search_client.tasks.get(t).get('completed') for t in tasks]
remove_old([i+'-new' for i in indices])
poetry run invenio rero es index update-mapping
poetry run invenio alembic upgrade
In case of error try
On the database: update alembic_version set version_num='eec683a446e5' where version_num='e655021de0de';
and update alembic_version set version_num='eec683a446e5' where version_num='8145a7cdef99';
. This should be done first on a test server.
poetry run invenio alembic upgrade 5f0b086e4b82
poetry run invenio alembic upgrade 8145a7cdef99
poetry run invenio alembic stamp 8145a7cdef99
poetry run invenio reroils fixtures import_role_policies data/role_policies.json
poetry run invenio reroils fixtures import_system_role_policies data/system_role_policies.json
Easier with: https://github.com/rero/rero-invenio-base/pull/12 (poetry run pip install git+https://github.com/rerowep/rero-invenio-base.git@wep-es-tasks
)
index_name=`poetry run invenio rero es index info -i acq_orders`
echo $index_name
poetry run invenio rero es index move acq_orders $index_name acq_orders-acq_order-v0.0.1-20230313
poetry run invenio index delete $index_name
index_name=`poetry run invenio rero es index info -i items`
echo $index_name
poetry run invenio rero es index move items $index_name items-item-v0.0.1-20230313
poetry run invenio index delete $index_name
index_name=`poetry run invenio rero es index info -i documents`
echo $index_name
poetry run invenio rero es index move documents $index_name documents-document-v0.0.1-20230313
poetry run invenio index delete $index_name
index_name=`poetry run invenio rero es index info -i patrons`
echo $index_name
poetry run invenio rero es index move patrons $index_name patrons-patron-v0.0.1-20230313
poetry run invenio index delete $index_name
from rero_ils.modules.loans.api import LoansSearch
from rero_ils.modules.items.api import Item
from rero_ils.modules.loans.models import LoanState
states = [
LoanState.PENDING,
LoanState.ITEM_AT_DESK,
LoanState.ITEM_IN_TRANSIT_FOR_PICKUP,
LoanState.ITEM_IN_TRANSIT_TO_HOUSE
]
item_requested_pids = set([hit.item_pid.value for hit in LoansSearch().filter('terms', state=states).source('item_pid').scan()])
for pid in item_requested_pids:
rec = Item.get_record_by_pid(pid)
rec.reindex()
from rero_invenio_base.modules.tasks import run_on_worker
from rero_invenio_base.modules.utils import chunk
from rero_ils.modules.items.api import ItemsSearch
code = '''
def reindex(_ids):
from rero_ils.modules.items.api import Item
n = 0
errors = []
for _id in _ids:
try:
doc = Item.get_record(_id)
doc.reindex()
n += 1
except Exception as e:
print('error', e)
errors.append(_id)
return (n, errors)
'''
def get_all_ids():
search = ItemsSearch().filter('term', type='issue').source().scan()
for hit in search:
yield hit.meta.id
for c in chunk([str(val) for val in get_all_ids()], 500):
run_on_worker.delay(code, 'reindex', c)
—— rero.ch | Twitter | Gitter —— RERO+ catalogue | RERO ILS test version | RERO DOC | RERO MEF ——