From 69fbd6edf28faa31e7ecb921d8715cf8c35467ae Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:52:31 +0200 Subject: [PATCH 01/21] Update CKAN extensions and remove deprecated config settings --- .env.example | 36 ++-- CHANGELOG.md | 190 ++++++++++++++++++ ckan/Dockerfile | 8 +- ckan/Dockerfile.dev | 8 +- .../01_setup_ckanext_config.sh | 60 ------ ckan/docker-entrypoint.d/02_setup_xloader.sh | 3 +- doc/info_envfile.md | 4 +- samples/.env.codespaces | 34 ++-- samples/.env.dev.example | 34 ++-- samples/.env.es.example | 34 ++-- 10 files changed, 270 insertions(+), 141 deletions(-) create mode 100644 CHANGELOG.md diff --git a/.env.example b/.env.example index 26baca8a..b7542310 100644 --- a/.env.example +++ b/.env.example @@ -121,7 +121,7 @@ CKAN___API_TOKEN__JWT__ENCODE__SECRET=string:CHANGE_ME CKAN___API_TOKEN__JWT__DECODE__SECRET=string:CHANGE_ME CKAN_SYSADMIN_NAME=ckan_admin CKAN_SYSADMIN_PASSWORD=test1234 -CKAN_SYSADMIN_EMAIL=your_emai@${PROXY_SERVER_NAME} +CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings @@ -146,7 +146,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -SEARCH__FACETS__DEFAULT=4 +CKAN__SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -165,7 +165,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -175,8 +175,8 @@ CKAN__HARVEST__MQ__REDIS_DB=${REDIS_CKAN_DATABASE} # Clean-up mechanism for the harvest log table. The default is 30 days. CKAN__HARVEST__LOG_TIMEFRAME=40 -# ckanext-xloader -CKANEXT__XLOADER__JOBS__DB_URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} +# ckanext-xloader ckanext.xloader.jobs_db.uri +CKANEXT__XLOADER__JOBS_DB__URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} # ckanext-dcat CKANEXT__DCAT__BASE_URI=${CKAN_URL} @@ -200,24 +200,24 @@ CKANEXT__GEOVIEW__SHP_VIEWER__ENCODING=UTF-8 # ckanext-schemingdcat ## CSW Endpoint for spatial metadata -CKANEXT__SCHEMINGDCAT_GEOMETADATA_BASE_URI=${PYCSW_URL} +CKANEXT__SCHEMINGDCAT__GEOMETADATA_BASE_URI=${PYCSW_URL} ## Scheming: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_DATASET_SCHEMA="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_full.yaml" -CKANEXT__SCHEMINGDCAT_GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_group.json" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_org.json" -CKANEXT__SCHEMINGDCAT_PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" +CKAN___SCHEMING__DATASET_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_full.yaml" +CKAN___SCHEMING__GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_group.json" +CKAN___SCHEMING__ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_org.json" +CKAN___SCHEMING__PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" ## Facets: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_FACET_LIST="dataset_scope theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_GROUP_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_ICON="theme" -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True -CKANEXT__SCHEMINGDCAT_SHOW_METADATA_TEMPLATES_TOOLBAR=False -CKANEXT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" -CKANEXT__SCHEMINGDCAT_ENDPOINTS_YAML="endpoints.yaml" +CKANEXT__SCHEMINGDCAT__FACET_LIST="dataset_scope hvd_category theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_ICON="theme" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True +CKANEXT__SCHEMINGDCAT__SHOW_METADATA_TEMPLATES_TOOLBAR=False +CKANEXT__SCHEMINGDCAT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" +CKANEXT__SCHEMINGDCAT__ENDPOINTS_YAML="endpoints.yaml" CKANEXT__SCHEMINGDCAT__SOCIAL_GITHUB="https://github.com/mjanez/ckanext-schemingdcat" CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..4ff17e30 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,190 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + + +## Unreleased + +[Compare with latest](https://github.com/mjanez/ckan-docker/compare/v2.9.11-stable...HEAD) + +### Added + +- Add patch to fix ckanext-pages ([a646b49](https://github.com/mjanez/ckan-docker/commit/a646b491251c3554230f488f4b2ac6b4bd9ae4f8) by mjanez). +- Add docs about ckanet-schemingdcat harvesters. ([7136656](https://github.com/mjanez/ckan-docker/commit/71366560b32b5e6e6de7a2e26ec1b93afde6cd46) by mjanez). + +### Fixed + +- Fix CKAN xloader API token creation ([05edcca](https://github.com/mjanez/ckan-docker/commit/05edcca600a64c8a0fb269c1d1dd034c8808fbec) by mjanez). +- Fix start_ckan.sh.override for clearer CSRF check and update PROXY_SERVER_URL in .env.codespaces ([d312797](https://github.com/mjanez/ckan-docker/commit/d31279779b432edf69cfc0e2a354895bb4c55ff2) by mjanez). + +### Removed + +- Remove --plugins options from uWSGI configuration ([3fdab45](https://github.com/mjanez/ckan-docker/commit/3fdab45448d5372bd5e1e2eb9bb4f34a5e2b3e83) by mjanez). +- Remove unnecessary files related to updating who.ini ([ae97c91](https://github.com/mjanez/ckan-docker/commit/ae97c9120b5bab908a1e8985d112639d23789618) by mjanez). +- Remove unnecesary who.ini config ([6adab9d](https://github.com/mjanez/ckan-docker/commit/6adab9d26294b8f115c4f4a9e471dde4aefdcad2) by mjanez). + + +## [v2.9.11-stable](https://github.com/mjanez/ckan-docker/releases/tag/v2.9.11-stable) - 2024-08-29 + +[Compare with v2.9.9-stable](https://github.com/mjanez/ckan-docker/compare/v2.9.9-stable...v2.9.11-stable) + +### Added + +- Add CKANEXT__SCHEMINGDCAT_ENDPOINTS_YAML ([dc3032d](https://github.com/mjanez/ckan-docker/commit/dc3032d37cef6c60ce9e4b056297539b578d2511) by mjanez). +- Add info to include dev mode and clone CKAN extensions with specific tags. ([c402d5a](https://github.com/mjanez/ckan-docker/commit/c402d5a38d92f1ed0fe234af04070a9f8be69077) by mjanez). +- Add info to clone CKAN extensions with specific tags, update reqs and autoclrf for dev mode ([274861a](https://github.com/mjanez/ckan-docker/commit/274861a28a19db85954456e166b8e3c0ad53232e) by mjanez). +- Add new fields to solr schema file ([37ed4c1](https://github.com/mjanez/ckan-docker/commit/37ed4c163e267def27e322bbd56cd1d8b8df6daf) by mjanez). +- Add slug-preview patch ([3a51ddd](https://github.com/mjanez/ckan-docker/commit/3a51ddd8295c42e74340bf1d697f0ee76fd37dde) by mjanez). +- Add .env docs ([f19de7e](https://github.com/mjanez/ckan-docker/commit/f19de7eb1a41dd210291c0d48b62b10eab200d16) by mjanez). +- Add CORS envvars ([fa50127](https://github.com/mjanez/ckan-docker/commit/fa501274f0a42e68010a096a7370dae9ff85d243) by mjanez). +- Add Docker workflow for building and pushing ckan-docker image from master push ([28602e0](https://github.com/mjanez/ckan-docker/commit/28602e0f0c08bca8d18e8f689bc0e96c5f30a74a) by mjanez). +- Add crond in background ([c1b819b](https://github.com/mjanez/ckan-docker/commit/c1b819b624cdf909962d9ccf444ec33c2681143c) by mjanez). +- Add harvest & xloader worker configuration ([68a9ce9](https://github.com/mjanez/ckan-docker/commit/68a9ce9eccde43ec130e0148cf3282441bd91a03) by mjanez). +- Add init script for configuring datapusher ([e439eda](https://github.com/mjanez/ckan-docker/commit/e439eda620939709e38037a3dba295e991f1db5c) by amercader). +- Add step to copy init files ([f9ebc0c](https://github.com/mjanez/ckan-docker/commit/f9ebc0c41c1bda1bfac992d5687e15c81e2c5e80) by amercader). + +### Fixed + +- Fix for ckanext-dcat v1.80 and ckanext-schemingdcat v3.2.2 ([35a6bdc](https://github.com/mjanez/ckan-docker/commit/35a6bdc56efe0c560b065d4a904643f1b3a6314d) by mjanez). +- Fix dataset_scope field type ([11d7b9e](https://github.com/mjanez/ckan-docker/commit/11d7b9ef1772dd24f790c94add05ac87a92248f3) by mjanez). +- Fix 02_setup_scheming.sh ([4cecf1a](https://github.com/mjanez/ckan-docker/commit/4cecf1a9d1d94a2087d430a8e892f6a90406425e) by mjanez). +- Fix notes/tips ([176a18a](https://github.com/mjanez/ckan-docker/commit/176a18a2ef25b38fb9d78fa79e8cf1db174e8067) by mjanez). +- Fix dev mode ([c678a29](https://github.com/mjanez/ckan-docker/commit/c678a294ecf1d11bcb23715d723b88a12970a0ac) by mjanez). +- Fix dev networks bug ([1d87acc](https://github.com/mjanez/ckan-docker/commit/1d87acc1243b4ae0285cdb1fe6ea8601170a0d59) by mjanez). +- Fix depends_on ckan-dev ([501f2d2](https://github.com/mjanez/ckan-docker/commit/501f2d209bac2d7b21621bff42298b68043fe95e) by mjanez). +- fix readme toc, headings, replace one screenshot of text ([32af5cf](https://github.com/mjanez/ckan-docker/commit/32af5cfbf8d1c81fd7e309d158117445067a8f13) by Ian Ward). +- Fix restore backups and plugin creation docs ([1a44ca5](https://github.com/mjanez/ckan-docker/commit/1a44ca5b8484821c39ec83c624cc5a486c2f1e66) by mjanez). +- Fix Dockerfile to install requirements for ckanext-schemingdcat ([efefb73](https://github.com/mjanez/ckan-docker/commit/efefb7338ec0bbc83421303f5a5be5a1b85c1bca) by mjanez). +- Fix scheming_dcat to schemingdcat ([f08177a](https://github.com/mjanez/ckan-docker/commit/f08177a55e60acee2b6edcf69297048556f80916) by mjanez). +- Fix supervisord command in start_ckan_development.sh.override ([c2c1ef6](https://github.com/mjanez/ckan-docker/commit/c2c1ef60040d56c665f3862afb7524d3304c68c5) by mjanez). +- Fix token revocation in 01_setup_xloader.sh ([96a4c39](https://github.com/mjanez/ckan-docker/commit/96a4c394a74bd7c62031f52f4c9902055c03414e) by mjanez). +- Fix image tag in the Dev Dockerfile ([5fc1efc](https://github.com/mjanez/ckan-docker/commit/5fc1efce33dd6d752660dac3752d0c144c5a90cc) by amercader). +- fixed line endings for windows ([bb936fa](https://github.com/mjanez/ckan-docker/commit/bb936fab7ab2c2661d1e7dc455098dd58c5afdd8) by Hendrik Lücke-Tieke). + +### Removed + +- remove broken, confusing CKAN_PORT setting ([18fbe44](https://github.com/mjanez/ckan-docker/commit/18fbe445c1782d887f6ac77813e507d529246b53) by Ian Ward). +- Remove unused harvest patch ([f033f6b](https://github.com/mjanez/ckan-docker/commit/f033f6b10c2f5b88205928b6a9df93327f9f4611) by mjanez). +- remove container names ([445d525](https://github.com/mjanez/ckan-docker/commit/445d52555de0567d7b9f40e53ba2621ec2494598) by Ian Ward). +- Remove unnecesary TODO ([7309d15](https://github.com/mjanez/ckan-docker/commit/7309d15bc0e8d91a245cef01a938fb762a2ef7d2) by mjanez). +- Remove timezone commands ([f653b03](https://github.com/mjanez/ckan-docker/commit/f653b0324de14e83d5fc0e26dc72c2ec714de4be) by amercader). +- remove container_name? ([2a8f844](https://github.com/mjanez/ckan-docker/commit/2a8f844ecf5564b63a3430924e8bacdc91e40c30) by Ian Ward). + +## [v2.9.9-stable](https://github.com/mjanez/ckan-docker/releases/tag/v2.9.9-stable) - 2024-02-12 + +[Compare with v2.9.8-stable](https://github.com/mjanez/ckan-docker/compare/v2.9.8-stable...v2.9.9-stable) + +### Added + +- Add info about ckanext-scheming_dcat ([da0f7b6](https://github.com/mjanez/ckan-docker/commit/da0f7b66d5198859e9af5ca2cfd0a7a279d20261) by mjanez). +- Add ckanext-fluent ([7eb35dc](https://github.com/mjanez/ckan-docker/commit/7eb35dc37084867fb8e115a932bc8c0fad6ca9c8) by mjanez). +- Add hadolint no-fail ([99b4376](https://github.com/mjanez/ckan-docker/commit/99b437691cf8046a742a6ab667fe6d48395a7a70) by mjanez). +- Add ckanext-pages patch for root_path ([5d9ad91](https://github.com/mjanez/ckan-docker/commit/5d9ad919c2f22724d074ae330d1a8e8618751d89) by mjanez). +- Add multiple Dockerfiles to hadolint ([2c99632](https://github.com/mjanez/ckan-docker/commit/2c99632c1d285b00a7b3fc0e32d7f8a911cf096a) by mjanez). +- Add hadolint to Dockerfile.ghcr image ([3c9d916](https://github.com/mjanez/ckan-docker/commit/3c9d9166f1542ce2f1f592ef7ab2893f51d19189) by mjanez). +- Add :test-build-only tag ([c8eb770](https://github.com/mjanez/ckan-docker/commit/c8eb770920f4dd527ac278498abcf86b1b5dd8e0) by mjanez). +- Add docker-manual action and logo to README ([66e231b](https://github.com/mjanez/ckan-docker/commit/66e231b343a72ecf0074323f307ebded20d9d3b7) by mjanez). +- Add CKANEXT__DCAT__DEFAULT_CATALOG_ENDPOINT ennvar ([e5447ef](https://github.com/mjanez/ckan-docker/commit/e5447ef32b2bd6a9ce420c74ba357f6ca213b6b4) by mjanez). +- Add geometadata_base_uri ennvar ([8392b0a](https://github.com/mjanez/ckan-docker/commit/8392b0a72fb64f900dbf52fbd8e4ef2b6f4bc9a8) by mjanez). +- Added Copying and License in README.md ([c0e6217](https://github.com/mjanez/ckan-docker/commit/c0e62170578616c73423bbb4c5c7eed80535da95) by GauravPandey-NECI). +- Add entrypoint for ckanext-pages ([09f56eb](https://github.com/mjanez/ckan-docker/commit/09f56ebce605c39030048b665caa802b1fe66bed) by mjanez). +- Add CKAN backup scripts ([5fbd5e6](https://github.com/mjanez/ckan-docker/commit/5fbd5e64b3632a6364f31bea58d2f4f90afb299c) by mjanez). +- Added steps for user creation ([6e92dee](https://github.com/mjanez/ckan-docker/commit/6e92deebfa94a957f82779a77f7f020cedbcfd3e) by GauravPandey-NECI). + +### Fixed + +- Fix bash script to backup ckan db ([49b9be0](https://github.com/mjanez/ckan-docker/commit/49b9be0c3b4057b17ae5e7370a61c2f4e2fe6d00) by mjanez). +- Fix tag name ([4c0c627](https://github.com/mjanez/ckan-docker/commit/4c0c627f8b7e84ee43a0cadbcdc607c467583f40) by mjanez). +- Fix Dockerfiles ([8a599a1](https://github.com/mjanez/ckan-docker/commit/8a599a11a4285eb15bb4f0e67bb680259f1cf080) by mjanez). +- Fix docker build context ([89610da](https://github.com/mjanez/ckan-docker/commit/89610daec4084e775bed1fd17eb20dd56054ddbd) by mjanez). +- Fix dockerfile images ([712abb1](https://github.com/mjanez/ckan-docker/commit/712abb16b2ffb955988f087a3995a7409b8462fa) by mjanez). +- Fix actions ([42d2ddb](https://github.com/mjanez/ckan-docker/commit/42d2ddb91c7144540f95401b3f3353d53e834d80) by mjanez). +- Fix docker-pr ([21b0902](https://github.com/mjanez/ckan-docker/commit/21b09029593c699b4230482a9e57ef6fb755bf21) by mjanez). +- Fix image ([a6d9194](https://github.com/mjanez/ckan-docker/commit/a6d9194f3014a45cd18d21b0232f05d0ed8a869e) by mjanez). +- Fix host_ports/ports in Apache/NGINX Dockerfiles ([39611be](https://github.com/mjanez/ckan-docker/commit/39611bec41b02052a4ba7633d3ad1c9a403bda16) by mjanez). +- Fix ckan-pycsw service ([94573c4](https://github.com/mjanez/ckan-docker/commit/94573c4290b57877735bdfc470b83d6834fa8e2b) by mjanez). +- Fix non-root user execution of ckan-pycsw ([f86e1be](https://github.com/mjanez/ckan-docker/commit/f86e1be5a5ed4433272c51a4fc1c29be91a4e67d) by mjanez). +- Fix map attribution ([eea2a8b](https://github.com/mjanez/ckan-docker/commit/eea2a8b4aad91595aed130ee93fac0dc7661fe00) by mjanez). +- Fix ckan-pycsw entrypoint ([f578862](https://github.com/mjanez/ckan-docker/commit/f578862769023f8d1a59a826313e46c95b77cfb0) by mjanez). +- Fix script extension ([43ca2b6](https://github.com/mjanez/ckan-docker/commit/43ca2b6a4e7f4364489b95d42f4e8ccb837cc530) by avdata99). +- Fix default .env.example ([77719fd](https://github.com/mjanez/ckan-docker/commit/77719fd2ebf56a9743cc1cd1e5e9d4439393e915) by mjanez). +- Fix 05_setup_pages.sh ([145b53f](https://github.com/mjanez/ckan-docker/commit/145b53ffaf3ddaa9722a883f14c56fd9698fefba) by mjanez). +- Fix docker-compose and remove Solr image ([5e3baba](https://github.com/mjanez/ckan-docker/commit/5e3baba522bbc0cbcaf59e943aed88df2fc1f429) by mjanez). +- Fix nginx exposed ports ([1be9948](https://github.com/mjanez/ckan-docker/commit/1be99482fa943440967dc2ec0d856bebfd2afbd4) by mjanez). +- Fix multilocations ([7ea93fc](https://github.com/mjanez/ckan-docker/commit/7ea93fcb9b4143813ab102948a5ad71af661b628) by mjanez). +- Fix postgresql entrypoint ([2c1f12e](https://github.com/mjanez/ckan-docker/commit/2c1f12e790cf7e548a6dcb7aad4c3c19ee69ca2f) by mjanez). +- Fix env files ([cf18c31](https://github.com/mjanez/ckan-docker/commit/cf18c31492b844594606ee8137ee41a1f1708572) by mjanez). +- Fix Dockerfile plugins ([922edc7](https://github.com/mjanez/ckan-docker/commit/922edc720cf611a7027bad76da2219fb47569e50) by mjanez). +- Fix dev image ([754711c](https://github.com/mjanez/ckan-docker/commit/754711c83779c29d2fa1ef8a7c777b4c2f66863f) by mjanez). +- Fix nginx image ([b2b2761](https://github.com/mjanez/ckan-docker/commit/b2b27613286d5897ee3e1847eabf8e04d0f420e4) by mjanez). +- Fix who.ini when is only: "/" ([a2fa917](https://github.com/mjanez/ckan-docker/commit/a2fa91798b7b812a1ead9928b70d7c43da5dc7c3) by mjanez). +- Fix nginx multiple locations ([eba3896](https://github.com/mjanez/ckan-docker/commit/eba3896b44cd2b632a9879ebf8e33bd6966c54a6) by mjanez). +- Fix README ([93831d9](https://github.com/mjanez/ckan-docker/commit/93831d96f240c9fabbdcfa8ca6d6e60804702f3f) by mjanez). + +### Removed + +- Remove hadolint when build & push ([5de535d](https://github.com/mjanez/ckan-docker/commit/5de535d018748532528f0aea33af71c0cf2fe490) by mjanez). +- Remove unnecesary patch ([4b1e735](https://github.com/mjanez/ckan-docker/commit/4b1e735fea2a95ff914239be87e4c139abfed5d0) by mjanez). +- Remove ckanext-sparql_interface ([30b699f](https://github.com/mjanez/ckan-docker/commit/30b699f4d7811dc98f284a9898799c112799bfd7) by mjanez). +- remove .env file ([aed9916](https://github.com/mjanez/ckan-docker/commit/aed99166ce4dbc7c6becb20b1f36941c87ae7fd4) by Brett). + +## [v2.9.8-stable](https://github.com/mjanez/ckan-docker/releases/tag/v2.9.8-stable) - 2023-05-09 + +[Compare with first commit](https://github.com/mjanez/ckan-docker/compare/17c07d99ff636b0899f5f4fc05795f6bbf6ba672...v2.9.8-stable) + +### Added + +- Add ckanext-facet_scheming ([eeeb652](https://github.com/mjanez/ckan-docker/commit/eeeb652acee1a8e6353d5199127415da0dcf0969) by mjanez). +- Add README.md to log/ and metadata/ ([5c3a97e](https://github.com/mjanez/ckan-docker/commit/5c3a97ea8c1dd55db8adae5e4f24b8ab9c348d8c) by mjanez). +- Add Dockerfile.ghcr ([de06443](https://github.com/mjanez/ckan-docker/commit/de0644318e2cd6cab46bdfbf096f210baa4fafa4) by mjanez). +- Add resourcedictionary ckan patch ([c0db947](https://github.com/mjanez/ckan-docker/commit/c0db94798742f9590e184f4f823ff6a3fff64f8b) by mjanez). +- Add logging to docker compose files ([25b49de](https://github.com/mjanez/ckan-docker/commit/25b49de65c1b953c506b5ddd319754fca54f4a1c) by mjanez). +- Add info about images ([9053bec](https://github.com/mjanez/ckan-docker/commit/9053becc76466e4d77d2e833dbb7c255388ae819) by mjanez). +- Add ckan-pycsw & httpd (RDF/CSW) bundle ([7a39ea5](https://github.com/mjanez/ckan-docker/commit/7a39ea5f410a6ef2a607e13a00e8cafe8f3b7f02) by mjanez). +- Add solr8-spatial image ([5357b1f](https://github.com/mjanez/ckan-docker/commit/5357b1f64f1f13e60170c6d2a5a8d9cba9a18739) by mnjnz). +- Add custom images of base Dockerfiles ([53d2d65](https://github.com/mjanez/ckan-docker/commit/53d2d65cb10d22af216a6e132aa552090dc08d21) by mnjnz). +- Add entrypoints - ckanext-dcat - previews - ckanext-xloader ([4985c8b](https://github.com/mjanez/ckan-docker/commit/4985c8befc9f1c33bb04b8a6f8dd80ea4555aed8) by mjanez). +- Add warning info about this repo ([64195b5](https://github.com/mjanez/ckan-docker/commit/64195b52e8d7e6c9056900b874bc1fd526d78b3a) by mjanez). +- Added enable-threads to Datapusher ([3522bf5](https://github.com/mjanez/ckan-docker/commit/3522bf54c301730a8fd2496d499b89b0517fb50c) by Brett). +- Added Sphinx doc (initial commit) ([5643f06](https://github.com/mjanez/ckan-docker/commit/5643f067de844580e4df5160f2a30124c22815d0) by Clément MOUCHET). +- Added missing Dockerfile ([3cd3b50](https://github.com/mjanez/ckan-docker/commit/3cd3b50fad50f71c29a46e344fea2028b5e27cef) by Clément MOUCHET). +- ADDED: - Data only container for Postgres db & CKAN FileStore - Nginx container (still based on official container, but copies the config for portability) UPDATED: - Postgres Dockerfile to allow custom PGDATA directory & initiate it if it's not there or is empty - fig.ml to reflect the latest changes (datapusher, nginx, data container) ([3946774](https://github.com/mjanez/ckan-docker/commit/39467745f1395d52143890a1e11488fedab0d457) by Clément MOUCHET). + +### Fixed + +- Fix README ([b362ad8](https://github.com/mjanez/ckan-docker/commit/b362ad8e2fc8c93205484b2a0db226b03beee800) by mjanez). +- Fix Dockerfile.dev and add info to doc ([65c038a](https://github.com/mjanez/ckan-docker/commit/65c038afd0c118cca94672225c9c5fa1249f5948) by mjanez). +- Fix development mode ([0ba0561](https://github.com/mjanez/ckan-docker/commit/0ba0561f3189c8a946cb2431dfffac4fcd8c730c) by mjanez). +- Fix cronjobs by use scheduler ([f16a527](https://github.com/mjanez/ckan-docker/commit/f16a5270027e5960e2ddc3a1b8f88101426769da) by mjanez). +- Fix image names ([3d7c418](https://github.com/mjanez/ckan-docker/commit/3d7c4182bee1e3ac478481988d9efd58d6238b19) by mjanez). +- Fix xloader token creation ([3f90532](https://github.com/mjanez/ckan-docker/commit/3f90532d05fb3c0192060be6a49b23d833f29e03) by mjanez). +- Fix image ([05b1abe](https://github.com/mjanez/ckan-docker/commit/05b1abe4bd51c80d034aa9ca5c4e985014a3ab17) by mjanez). +- Fix APACHE_PROXY_PASS to APACHE_CKAN_PROXY_PASS ([9d47fec](https://github.com/mjanez/ckan-docker/commit/9d47feca615bfb5aef8be2a18885720ea9754a09) by mjanez). +- Fix CKAN_SITE_URLS in .env ([3152fd5](https://github.com/mjanez/ckan-docker/commit/3152fd5b48e2519d408acfec5f0ce6bfe33d6d65) by mjanez). +- Fix docker image tag ([af1ce41](https://github.com/mjanez/ckan-docker/commit/af1ce41a86936a87f385f01e3ffad6f51dfe3048) by mjanez). +- Fix workflows & docker compose files ([86b6486](https://github.com/mjanez/ckan-docker/commit/86b6486b7904418877d5508649ecbae2c8bbc9e4) by mnjnz). +- Fix docker.yml ([b222c09](https://github.com/mjanez/ckan-docker/commit/b222c0924aeed62d84c4b5e35de318d07e9dfc04) by mjanez). +- Fix .env.example ([4702b86](https://github.com/mjanez/ckan-docker/commit/4702b86449d8b27de0b1006e96ce229fea8c809e) by mnjnz). +- Fix xloader setup ([3ffba6f](https://github.com/mjanez/ckan-docker/commit/3ffba6fe0dc1c941f84dfa0a48d9467a5f9e3e6c) by mnjnz). +- Fix views ([d30bbad](https://github.com/mjanez/ckan-docker/commit/d30bbad8e96bc7f0d326dcc454eafa71d992be56) by mjanez). +- Fix setup worker ([4240356](https://github.com/mjanez/ckan-docker/commit/424035617f2f3a541f21e89b964eac2a11804a9e) by mjanez). +- Fix setup_xloader ([f50edb4](https://github.com/mjanez/ckan-docker/commit/f50edb467a305d94a8920262a55874b49752a98d) by mjanez). +- Fix Dockerfile ([cd3dfa0](https://github.com/mjanez/ckan-docker/commit/cd3dfa0985875ede8f28fd5713b2a08cefc3df5a) by mjanez). +- Fix link to CKAN Docs ([309078d](https://github.com/mjanez/ckan-docker/commit/309078d3e546002de66bce3474063c64bd1af3b5) by mjanez). +- fixed path of datapusher ([dbc7561](https://github.com/mjanez/ckan-docker/commit/dbc7561a5e7324018122845822e01680dd9d2547) by tajima). + +### Changed + +- changed default port to avoid CORS issues ([1750619](https://github.com/mjanez/ckan-docker/commit/1750619291e73db6258395a5026ee0b55c1b37de) by Clément MOUCHET). + +### Removed + +- Remove solr build ([e0eb54f](https://github.com/mjanez/ckan-docker/commit/e0eb54f61f7fe0a62fdb471439d16a339e4aa915) by mnjnz). +- Remove datapusher - Datapusher out of date with ckanext-xloader - Fix deprecated views ([7db1611](https://github.com/mjanez/ckan-docker/commit/7db161188eaa8d210b7b818c5cbdde92a8fcd53c) by mjanez). +- Remove datapusher ([c61176e](https://github.com/mjanez/ckan-docker/commit/c61176e20cc612a6ba00b6e998e8f371e17d9c65) by mjanez). +- Remove images directory + TODO's file ([7e24429](https://github.com/mjanez/ckan-docker/commit/7e244290a5d436ecf1ac00a51051825d550f1608) by Brett). +- Remove pdf_view from default plugins ([dcf7b52](https://github.com/mjanez/ckan-docker/commit/dcf7b52039d9e2ccb443ed9056842f4c99e131c3) by Matt). + diff --git a/ckan/Dockerfile b/ckan/Dockerfile index d0eb048d..d6eeb313 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -18,11 +18,11 @@ COPY req_fixes req_fixes ## Spatial - v2.1.1 ## ## DCAT - v1.8.0 (Latest stable version of ckanext-dcat with minor fixes) ## ## Scheming - release-3.0.0 ## -## Resource dictionary - v1.0.1 (mjanez/Fixed version) ## +## Resource dictionary - v1.0.2 (mjanez/Fixed version) ## ## Pages - v0.5.2 ## ## PDFView - 0.0.8 ## ## Fluent - v1.0.1 (mjanez/Forked stable version) ## -## Scheming DCAT - v4.0.0 (mjanez/GeoDCAT-AP/NTI-RISP extended version) ## +## Scheming DCAT - v4.1.0 (mjanez/GeoDCAT-AP/NTI-RISP extended version) ## RUN echo ${TZ} > /etc/timezone && \ if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then cp /usr/share/zoneinfo/${TZ} /etc/localtime; fi && \ # Install patch utility @@ -49,7 +49,7 @@ RUN echo ${TZ} > /etc/timezone && \ echo "ckan/ckanext-scheming" && \ pip3 install --no-cache-dir -e git+https://github.com/ckan/ckanext-scheming.git@release-3.0.0#egg=ckanext-scheming && \ echo "mjanez/ckanext-resourcedictionary" && \ - pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-resourcedictionary.git@v1.0.1#egg=ckanext-resourcedictionary && \ + pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-resourcedictionary.git@v1.0.2#egg=ckanext-resourcedictionary && \ echo "ckan/ckanext-pages" && \ pip3 install --no-cache-dir -e git+https://github.com/ckan/ckanext-pages.git@v0.5.2#egg=ckanext-pages && \ echo "ckan/ckanext-pdfview" && \ @@ -57,7 +57,7 @@ RUN echo ${TZ} > /etc/timezone && \ echo "mjanez/ckanext-fluent" && \ pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-fluent.git@v1.0.1#egg=ckanext-fluent && \ echo "mjanez/ckanext-schemingdcat" && \ - pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-schemingdcat.git@v4.0.0#egg=ckanext_schemingdcat && \ + pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-schemingdcat.git@v4.1.0#egg=ckanext_schemingdcat && \ pip3 install --no-cache-dir -r ${APP_DIR}/src/ckanext-schemingdcat/requirements.txt && \ # Remove system cache apt-get clean && \ diff --git a/ckan/Dockerfile.dev b/ckan/Dockerfile.dev index 25cbcb40..8defa5b8 100644 --- a/ckan/Dockerfile.dev +++ b/ckan/Dockerfile.dev @@ -71,12 +71,12 @@ COPY patches patches RUN for d in $APP_DIR/patches/*; do \ if [ -d $d ]; then \ for f in `ls $d/*.patch | sort -g`; do \ - if [ -d $SRC_DIR/`basename "$d"` ]; then \ - cd $SRC_DIR/`basename "$d"` && \ - echo "$0: Applying patch $f to $SRC_DIR/`basename $d`" && \ + if [ -d $SRC_EXTENSIONS_DIR/`basename "$d"` ]; then \ + cd $SRC_EXTENSIONS_DIR/`basename "$d"` && \ + echo "$0: Applying patch $f to $SRC_EXTENSIONS_DIR/`basename $d`" && \ patch -p1 < "$f" ; \ else \ - echo "$0: Skipping patch $f because directory $SRC_DIR/`basename $d` does not exist. Built the extension: `basename $d`" ; \ + echo "$0: Skipping patch $f because directory $SRC_EXTENSIONS_DIR/`basename $d` does not exist. Built the extension: `basename $d`" ; \ fi \ done ; \ fi ; \ diff --git a/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh b/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh index ec958146..5e5bbd20 100644 --- a/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh +++ b/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh @@ -4,12 +4,6 @@ echo "[docker-entrypoint.01_setup_ckanext_config] Clear index" ckan -c $CKAN_INI search-index clear -# ckan: Update core settings -echo "[docker-entrypoint.01_setup_ckanext_config] Loading CKAN core settings in the CKAN config file" -ckan config-tool $CKAN_INI \ - "search.facets.default=$SEARCH__FACETS__DEFAULT" \ - "ckan.datastore.sqlsearch.enabled=$CKAN__DATASTORE__SQLSEARCH__ENABLED" - # Add SMTP settings if CKAN__SMTP_ENABLED is True if [ "$CKAN__SMTP_ENABLED" = "True" ]; then echo "[docker-entrypoint.01_setup_ckanext_config] Adding SMTP settings to the CKAN config file" @@ -24,60 +18,6 @@ if [ "$CKAN__SMTP_ENABLED" = "True" ]; then "error_email_from=" fi -# ckanext-schemingdcat: Update settings -echo "[docker-entrypoint.01_setup_ckanext_config] Loading ckanext-scheming and ckanext-schemingdcat settings into ckan.ini" -ckan config-tool $CKAN_INI \ - "scheming.dataset_schemas=$CKANEXT__SCHEMINGDCAT_DATASET_SCHEMA" \ - "scheming.group_schemas=$CKANEXT__SCHEMINGDCAT_GROUP_SCHEMAS" \ - "scheming.organization_schemas=$CKANEXT__SCHEMINGDCAT_ORGANIZATION_SCHEMAS" \ - "scheming.presets=$CKANEXT__SCHEMINGDCAT_PRESETS" \ - "schemingdcat.facet_list=$CKANEXT__SCHEMINGDCAT_FACET_LIST" \ - "schemingdcat.organization_custom_facets=$CKANEXT__SCHEMINGDCAT_ORGANIZATION_CUSTOM_FACETS" \ - "schemingdcat.group_custom_facets=$CKANEXT__SCHEMINGDCAT_GROUP_CUSTOM_FACETS" \ - "schemingdcat.geometadata_base_uri=$CKANEXT__SCHEMINGDCAT_GEOMETADATA_BASE_URI" \ - "schemingdcat.default_package_item_icon=$CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_ICON" \ - "schemingdcat.default_package_item_show_spatial=$CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL" \ - "schemingdcat.show_metadata_templates_toolbar=$CKANEXT__SCHEMINGDCAT_SHOW_METADATA_TEMPLATES_TOOLBAR" \ - "schemingdcat.metadata_templates_search_identifier=$CKANEXT__METADATA_TEMPLATES_SEARCH_IDENTIFIER" \ - "schemingdcat.endpoints_yaml=$CKANEXT__SCHEMINGDCAT_ENDPOINTS_YAML" \ - "schemingdcat.social_github=$CKANEXT__SCHEMINGDCAT__SOCIAL_GITHUB" \ - "schemingdcat.social_x=$CKANEXT__SCHEMINGDCAT__SOCIAL_X" \ - "schemingdcat.social_linkedin=$CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN" - -# ckanext-dcat: Add settings to the CKAN config file -echo "[docker-entrypoint.01_setup_ckanext_config] Loading ckanext-dcat settings in the CKAN config file" -ckan config-tool $CKAN_INI \ - "ckanext.dcat.base_uri=$CKANEXT__DCAT__BASE_URI" \ - "ckanext.dcat.catalog_endpoint=$CKANEXT__DCAT__DEFAULT_CATALOG_ENDPOINT" \ - "ckanext.dcat.rdf.profiles=$CKANEXT__DCAT__RDF_PROFILES" - -# ckan previews: Add CKAN Resource views to the CKAN config file -echo "[docker-entrypoint.01_setup_ckanext_config] Loading resource views in the CKAN config file" -ckan config-tool $CKAN_INI \ - "ckan.views.default_views=$CKAN__VIEWS__DEFAULT_VIEWS" \ - "ckan.preview.json_formats=$CKAN__PREVIEW__JSON_FORMATS" \ - "ckan.preview.xml_formats=$CKAN__PREVIEW__XML_FORMATS" \ - "ckan.preview.text_formats=$CKAN__PREVIEW__TEXT_FORMATS" \ - "ckan.preview.loadable=$CKAN__PREVIEW__LOADABLE" - -# ckanext-geoview: Add geoviews CKAN config file -echo "[docker-entrypoint.01_setup_ckanext_config] Loading geoviews in the CKAN config file" -ckan config-tool $CKAN_INI \ - "ckanext.geoview.ol_viewer.formats=$CKANEXT__GEOVIEW__OL_VIEWER__FORMATS" \ - "ckanext.geoview.shp_viewer.srid=$CKANEXT__GEOVIEW__SHP_VIEWER__SRID" \ - "ckanext.geoview.shp_viewer.encoding=$CKANEXT__GEOVIEW__SHP_VIEWER__ENCODING" \ - "ckanext.geoview.geojson.max_file_size=$CKANEXT__GEOVIEW__GEOJSON__MAX_FILE_SIZE" - -# ckanext-pages: Add pages CKAN config file -echo "[docker-entrypoint.01_setup_ckanext_config] Loading pages config in the CKAN config file" -ckan config-tool $CKAN_INI \ - "ckan.pages.allow_html=$CKANEXT__PAGES__ALOW_HTML" \ - "ckanext.pages.organization=$CKANEXT__PAGES__ORGANIZATION" \ - "ckanext.pages.group=$CKANEXT__PAGES__GROUP" \ - "ckanext.pages.about_menu=$CKANEXT__PAGES__ABOUT_MENU" \ - "ckanext.pages.group_menu=$CKANEXT__PAGES__GROUP_MENU" \ - "ckanext.pages.organization_menu=$CKANEXT__PAGES__ORGANIZATION_MENU" - # Rebuild index echo "[docker-entrypoint.01_setup_ckanext_config] Rebuild index" ckan -c $CKAN_INI search-index rebuild \ No newline at end of file diff --git a/ckan/docker-entrypoint.d/02_setup_xloader.sh b/ckan/docker-entrypoint.d/02_setup_xloader.sh index 0caca95e..7e62b1fa 100644 --- a/ckan/docker-entrypoint.d/02_setup_xloader.sh +++ b/ckan/docker-entrypoint.d/02_setup_xloader.sh @@ -22,8 +22,7 @@ done # Add ckanext.xloader.api_token to the CKAN config file echo "[docker-entrypoint.01_setup_xloader] Loading ckanext-xloader settings in the CKAN config file" ckan config-tool $CKAN_INI \ - "ckanext.xloader.api_token=xxx" \ - "ckanext.xloader.jobs_db.uri=$CKANEXT__XLOADER__JOBS__DB_URI" + "ckanext.xloader.api_token=xxx" # Create ckanext-xloader API_TOKEN echo "[docker-entrypoint.01_setup_xloader] Set up ckanext.xloader.api_token in the CKAN config file" diff --git a/doc/info_envfile.md b/doc/info_envfile.md index 37c0178d..7a4b6565 100644 --- a/doc/info_envfile.md +++ b/doc/info_envfile.md @@ -249,7 +249,7 @@ This is a CKAN extension that allows users to harvest dataset metadata from mult ### ckanext-xloader This is a CKAN extension that allows users to load data into CKAN's data store for faster and more efficient processing. -- `CKANEXT__XLOADER__JOBS__DB_URI`: The database URI for the XLoader jobs. +- `CKANEXT__XLOADER__JOBS_DB__URI`: The database URI for the XLoader jobs. ### ckanext-dcat This is a CKAN extension that provides support for the DCAT standard, allowing CKAN to expose and consume metadata in this format. @@ -283,7 +283,7 @@ This extension provides a way to configure and customize CKAN's dataset, resourc - `CKANEXT__SCHEMINGDCAT_GROUP_SCHEMAS`: This is the path to the group schema file. - `CKANEXT__SCHEMINGDCAT_ORGANIZATION_SCHEMAS`: This is the path to the organization schema file. - `CKANEXT__SCHEMINGDCAT_PRESETS`: This is the path to the presets file. -- `CKANEXT__SCHEMINGDCAT_FACET_LIST`: This is a list of facets for the dataset. +- `CKANEXT__SCHEMINGDCAT__FACET_LIST`: This is a list of facets for the dataset. - `CKANEXT__SCHEMINGDCAT_ORGANIZATION_CUSTOM_FACETS`: This is a boolean value to enable or disable custom facets for organizations. - `CKANEXT__SCHEMINGDCAT_GROUP_CUSTOM_FACETS`: This is a boolean value to enable or disable custom facets for groups. - `CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_ICON`: The default icon for package items. diff --git a/samples/.env.codespaces b/samples/.env.codespaces index dbf58fff..706b09b7 100644 --- a/samples/.env.codespaces +++ b/samples/.env.codespaces @@ -149,7 +149,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -SEARCH__FACETS__DEFAULT=4 +CKAN__SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -168,7 +168,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -178,8 +178,8 @@ CKAN__HARVEST__MQ__REDIS_DB=${REDIS_CKAN_DATABASE} # Clean-up mechanism for the harvest log table. The default is 30 days. CKAN__HARVEST__LOG_TIMEFRAME=40 -# ckanext-xloader -CKANEXT__XLOADER__JOBS__DB_URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} +# ckanext-xloader ckanext.xloader.jobs_db.uri +CKANEXT__XLOADER__JOBS_DB__URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} # ckanext-dcat CKANEXT__DCAT__BASE_URI=${CKAN_URL} @@ -203,24 +203,24 @@ CKANEXT__GEOVIEW__SHP_VIEWER__ENCODING=UTF-8 # ckanext-schemingdcat ## CSW Endpoint for spatial metadata -CKANEXT__SCHEMINGDCAT_GEOMETADATA_BASE_URI=${PYCSW_URL} +CKANEXT__SCHEMINGDCAT__GEOMETADATA_BASE_URI=${PYCSW_URL} ## Scheming: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_DATASET_SCHEMA="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_full.yaml" -CKANEXT__SCHEMINGDCAT_GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_group.json" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_org.json" -CKANEXT__SCHEMINGDCAT_PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" +CKAN___SCHEMING__DATASET_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_full.yaml" +CKAN___SCHEMING__GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_group.json" +CKAN___SCHEMING__ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_org.json" +CKAN___SCHEMING__PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" ## Facets: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_FACET_LIST="dataset_scope theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_GROUP_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_ICON="theme" -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True -CKANEXT__SCHEMINGDCAT_SHOW_METADATA_TEMPLATES_TOOLBAR=False -CKANEXT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" -CKANEXT__SCHEMINGDCAT_ENDPOINTS_YAML="endpoints.yaml" +CKANEXT__SCHEMINGDCAT__FACET_LIST="dataset_scope hvd_category theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_ICON="theme" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True +CKANEXT__SCHEMINGDCAT__SHOW_METADATA_TEMPLATES_TOOLBAR=False +CKANEXT__SCHEMINGDCAT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" +CKANEXT__SCHEMINGDCAT__ENDPOINTS_YAML="endpoints.yaml" CKANEXT__SCHEMINGDCAT__SOCIAL_GITHUB="https://github.com/mjanez/ckanext-schemingdcat" CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False diff --git a/samples/.env.dev.example b/samples/.env.dev.example index a0abd13f..04deb61a 100644 --- a/samples/.env.dev.example +++ b/samples/.env.dev.example @@ -147,7 +147,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -SEARCH__FACETS__DEFAULT=4 +CKAN__SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -166,7 +166,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -176,8 +176,8 @@ CKAN__HARVEST__MQ__REDIS_DB=${REDIS_CKAN_DATABASE} # Clean-up mechanism for the harvest log table. The default is 30 days. CKAN__HARVEST__LOG_TIMEFRAME=40 -# ckanext-xloader -CKANEXT__XLOADER__JOBS__DB_URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} +# ckanext-xloader ckanext.xloader.jobs_db.uri +CKANEXT__XLOADER__JOBS_DB__URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} # ckanext-dcat CKANEXT__DCAT__BASE_URI=${CKAN_URL} @@ -201,24 +201,24 @@ CKANEXT__GEOVIEW__SHP_VIEWER__ENCODING=UTF-8 # ckanext-schemingdcat ## CSW Endpoint for spatial metadata -CKANEXT__SCHEMINGDCAT_GEOMETADATA_BASE_URI=${PYCSW_URL} +CKANEXT__SCHEMINGDCAT__GEOMETADATA_BASE_URI=${PYCSW_URL} ## Scheming: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_DATASET_SCHEMA="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_full.yaml" -CKANEXT__SCHEMINGDCAT_GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_group.json" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_org.json" -CKANEXT__SCHEMINGDCAT_PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" +CKAN___SCHEMING__DATASET_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_full.yaml" +CKAN___SCHEMING__GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_group.json" +CKAN___SCHEMING__ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/eu_geodcat_ap_org.json" +CKAN___SCHEMING__PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" ## Facets: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_FACET_LIST="dataset_scope theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_GROUP_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_ICON="theme" -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True -CKANEXT__SCHEMINGDCAT_SHOW_METADATA_TEMPLATES_TOOLBAR=False -CKANEXT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" -CKANEXT__SCHEMINGDCAT_ENDPOINTS_YAML="endpoints.yaml" +CKANEXT__SCHEMINGDCAT__FACET_LIST="dataset_scope hvd_category theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_ICON="theme" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True +CKANEXT__SCHEMINGDCAT__SHOW_METADATA_TEMPLATES_TOOLBAR=False +CKANEXT__SCHEMINGDCAT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" +CKANEXT__SCHEMINGDCAT__ENDPOINTS_YAML="endpoints.yaml" CKANEXT__SCHEMINGDCAT__SOCIAL_GITHUB="https://github.com/mjanez/ckanext-schemingdcat" CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False diff --git a/samples/.env.es.example b/samples/.env.es.example index 991dd9b4..12b46905 100644 --- a/samples/.env.es.example +++ b/samples/.env.es.example @@ -147,7 +147,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -SEARCH__FACETS__DEFAULT=4 +CKAN__SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -166,7 +166,7 @@ CKAN__LOCALE_ORDER="es en pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="es en pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -176,8 +176,8 @@ CKAN__HARVEST__MQ__REDIS_DB=${REDIS_CKAN_DATABASE} # Clean-up mechanism for the harvest log table. The default is 30 days. CKAN__HARVEST__LOG_TIMEFRAME=40 -# ckanext-xloader -CKANEXT__XLOADER__JOBS__DB_URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} +# ckanext-xloader ckanext.xloader.jobs_db.uri +CKANEXT__XLOADER__JOBS_DB__URI=postgresql://${CKAN_DB_USER}:${CKAN_DB_PASSWORD}@${POSTGRES_HOST}/${CKAN_DB} # ckanext-dcat CKANEXT__DCAT__BASE_URI=${CKAN_URL} @@ -201,24 +201,24 @@ CKANEXT__GEOVIEW__SHP_VIEWER__ENCODING=UTF-8 # ckanext-schemingdcat ## CSW Endpoint for spatial metadata -CKANEXT__SCHEMINGDCAT_GEOMETADATA_BASE_URI=${PYCSW_URL} +CKANEXT__SCHEMINGDCAT__GEOMETADATA_BASE_URI=${PYCSW_URL} ## Scheming: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_DATASET_SCHEMA="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_full.yaml" -CKANEXT__SCHEMINGDCAT_GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_group.json" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_org.json" -CKANEXT__SCHEMINGDCAT_PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" +CKAN___SCHEMING__DATASET_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_full.yaml" +CKAN___SCHEMING__GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_group.json" +CKAN___SCHEMING__ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_org.json" +CKAN___SCHEMING__PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" ## Facets: setup_scheming.sh -CKANEXT__SCHEMINGDCAT_FACET_LIST="dataset_scope theme groups theme_es language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" -CKANEXT__SCHEMINGDCAT_ORGANIZATION_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_GROUP_CUSTOM_FACETS=True -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_ICON="theme" -CKANEXT__SCHEMINGDCAT_DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True -CKANEXT__SCHEMINGDCAT_SHOW_METADATA_TEMPLATES_TOOLBAR=False -CKANEXT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" -CKANEXT__SCHEMINGDCAT_ENDPOINTS_YAML="endpoints.yaml" +CKANEXT__SCHEMINGDCAT__FACET_LIST="dataset_scope hvd_category theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_ICON="theme" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True +CKANEXT__SCHEMINGDCAT__SHOW_METADATA_TEMPLATES_TOOLBAR=False +CKANEXT__SCHEMINGDCAT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" +CKANEXT__SCHEMINGDCAT__ENDPOINTS_YAML="endpoints.yaml" CKANEXT__SCHEMINGDCAT__SOCIAL_GITHUB="https://github.com/mjanez/ckanext-schemingdcat" CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True +CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False From 380abfb4553518df8cf8ecbcd45a917d86d2dd59 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:52:40 +0200 Subject: [PATCH 02/21] Add 'hvd_category' field to Solr schema for enhanced indexing --- solr/Dockerfile.spatial | 1 + 1 file changed, 1 insertion(+) diff --git a/solr/Dockerfile.spatial b/solr/Dockerfile.spatial index f74d6adf..b0442959 100644 --- a/solr/Dockerfile.spatial +++ b/solr/Dockerfile.spatial @@ -59,6 +59,7 @@ ENV SOLR_BBOX_FIELDS ' \ \ \ + \ \ \ \ From 52a80e49da1632c83f3b06ce94d0327500356ba2 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Tue, 8 Oct 2024 12:11:04 +0200 Subject: [PATCH 03/21] Fix uWSGI configuration to handle write errors and optimize performance --- ckan/setup/start_ckan.sh.override | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ckan/setup/start_ckan.sh.override b/ckan/setup/start_ckan.sh.override index ec3e3ace..68f01b49 100644 --- a/ckan/setup/start_ckan.sh.override +++ b/ckan/setup/start_ckan.sh.override @@ -57,8 +57,11 @@ chown -R ckan:ckan $CKAN_LOGS_PATH/xloader # Set the common uwsgi options. ## Add thunder-lock to prevent multiple workers from running the same job and buffer-size to prevent large headers ## To increase performance, you can adjust the number of processes (-p) (More info: https://uwsgi-docs.readthedocs.io/en/latest/Options.html & https://www.bloomberg.com/company/stories/configuring-uwsgi-production-deployment/) +# Fix uWSGI raises OSError: write error: https://stackoverflow.com/a/45393743 UWSGI_OPTS="--thunder-lock \ --ignore-sigpipe \ + --ignore-write-errors \ + --disable-write-exception \ --socket /tmp/uwsgi.sock \ --wsgi-file /srv/app/wsgi.py \ --module wsgi:application \ @@ -71,8 +74,8 @@ UWSGI_OPTS="--thunder-lock \ --harakiri $UWSGI_HARAKIRI \ --max-requests 500 \ --max-worker-lifetime 3600 \ - --reload-on-rss 1024 \ - --processes 2 \ + --reload-on-rss 2048 \ + --processes 4 \ --buffer-size 32768 \ --disable-logging \ --log-4xx \ From 3af29303da09497f7e13d7644c8f774f8461fbd7 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Wed, 9 Oct 2024 08:45:06 +0200 Subject: [PATCH 04/21] Fix patch extension --- .../{01_org_group_templates => 01_org_group_templates.patch} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ckan/patches/ckanext-pages/{01_org_group_templates => 01_org_group_templates.patch} (100%) diff --git a/ckan/patches/ckanext-pages/01_org_group_templates b/ckan/patches/ckanext-pages/01_org_group_templates.patch similarity index 100% rename from ckan/patches/ckanext-pages/01_org_group_templates rename to ckan/patches/ckanext-pages/01_org_group_templates.patch From b6b51255c39c4879692d5fa64d3e44785c30c81e Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:55:17 +0200 Subject: [PATCH 05/21] Update SMTP configuration in .env.example and entrypoint script for Docker --- .env.example | 5 ++++- ckan/docker-entrypoint.d/01_setup_ckanext_config.sh | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index b7542310..67f4f5eb 100644 --- a/.env.example +++ b/.env.example @@ -125,12 +125,15 @@ CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings -CKAN__SMTP_ENABLED=False +CKAN_DOCKER_SMTP_ENABLED=False CKAN_SMTP_SERVER=smtp.corporateict.domain:25 CKAN_SMTP_STARTTLS=True CKAN_SMTP_USER=user CKAN_SMTP_PASSWORD=pass CKAN_SMTP_MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN_SMTP_REPLY_TO='' +CKAN_EMAIL_TO='' +CKAN_ERROR_EMAIL_FROM='' ## Customize which text formats the text_view plugin will show CKAN__PREVIEW__JSON_FORMATS="json jsonld" # html htm rdf+xml owl+xml xml n3 n-triples turtle plain atom csv tsv rss txt json diff --git a/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh b/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh index 5e5bbd20..322bd224 100644 --- a/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh +++ b/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh @@ -4,8 +4,8 @@ echo "[docker-entrypoint.01_setup_ckanext_config] Clear index" ckan -c $CKAN_INI search-index clear -# Add SMTP settings if CKAN__SMTP_ENABLED is True -if [ "$CKAN__SMTP_ENABLED" = "True" ]; then +# Add SMTP settings if CKAN_DOCKER_SMTP_ENABLED is True +if [ "$CKAN_DOCKER_SMTP_ENABLED" = "True" ]; then echo "[docker-entrypoint.01_setup_ckanext_config] Adding SMTP settings to the CKAN config file" ckan config-tool $CKAN_INI \ "smtp.server=$CKAN_SMTP_SERVER" \ @@ -13,9 +13,9 @@ if [ "$CKAN__SMTP_ENABLED" = "True" ]; then "smtp.user=$CKAN_SMTP_USER" \ "smtp.password=$CKAN_SMTP_PASSWORD" \ "smtp.mail_from=$CKAN_SMTP_MAIL_FROM" \ - "smtp.reply_to=" \ - "email_to=" \ - "error_email_from=" + "smtp.reply_to=$CKAN_SMTP_REPLY_TO" \ + "email_to=$CKAN_EMAIL_TO" \ + "error_email_from=$CKAN_ERROR_EMAIL_FROM" fi # Rebuild index From 1da030d88bb5c680abf5b9867ff03272689314c8 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:21:18 +0200 Subject: [PATCH 06/21] Update README.md to clarify user creation methods in CKAN --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b1e4eab2..9e4351a2 100644 --- a/README.md +++ b/README.md @@ -643,7 +643,11 @@ If need to use a backup, restore it: ### CKAN. Manage new users -1. Create a new user from the Docker host, for example to create a new user called `user_example` +1. Create a new user directly by a sysadmin in the `{ckan_site_url}/user/register` endpoint + +2. Create new user accounts via the API [`user_create`](https://docs.ckan.org/en/2.10/api/#ckan.logic.action.create.user_create) + +3. Create a new user from the Docker host, for example to create a new user called `user_example` ```bash docker exec -it ckan -c ckan.ini user add user_example email=user_example@localhost @@ -658,7 +662,7 @@ If need to use a backup, restore it: docker exec -it ckan -c ckan.ini user remove user_example` ``` -1. Create a new user from within the ckan container. You will need to get a session on the running container +4. Create a new user from within the ckan container. You will need to get a session on the running container ```bash ckan -c ckan.ini user add user_example email=user_example@localhost` From 86c0a73ffddb6e9e4a256d9b0eb44d5fec9912b3 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:25:22 +0200 Subject: [PATCH 07/21] Add CKANAPI scripts and configuration files for data management --- .gitignore | 8 + doc/scripts/ckanapi/.gitignore | 4 + doc/scripts/ckanapi/README.md | 175 ++++++++++++++++++++ doc/scripts/ckanapi/input/sites.example.yml | 51 ++++++ doc/scripts/ckanapi/requirements.txt | 2 + doc/scripts/ckanapi/src/__init__.py | 0 doc/scripts/ckanapi/src/__main__.py | 8 + doc/scripts/ckanapi/src/config.py | 82 +++++++++ doc/scripts/ckanapi/src/logic/__init__.py | 50 ++++++ doc/scripts/ckanapi/src/logic/create.py | 63 +++++++ doc/scripts/ckanapi/src/logic/delete.py | 1 + doc/scripts/ckanapi/src/logic/get.py | 43 +++++ doc/scripts/ckanapi/src/logic/update.py | 53 ++++++ doc/scripts/ckanapi/src/quick_ckanapi.py | 62 +++++++ 14 files changed, 602 insertions(+) create mode 100644 doc/scripts/ckanapi/.gitignore create mode 100644 doc/scripts/ckanapi/README.md create mode 100644 doc/scripts/ckanapi/input/sites.example.yml create mode 100644 doc/scripts/ckanapi/requirements.txt create mode 100644 doc/scripts/ckanapi/src/__init__.py create mode 100644 doc/scripts/ckanapi/src/__main__.py create mode 100644 doc/scripts/ckanapi/src/config.py create mode 100644 doc/scripts/ckanapi/src/logic/__init__.py create mode 100644 doc/scripts/ckanapi/src/logic/create.py create mode 100644 doc/scripts/ckanapi/src/logic/delete.py create mode 100644 doc/scripts/ckanapi/src/logic/get.py create mode 100644 doc/scripts/ckanapi/src/logic/update.py create mode 100644 doc/scripts/ckanapi/src/quick_ckanapi.py diff --git a/.gitignore b/.gitignore index ca11b090..13280de0 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,11 @@ samples/api/* # docker compose files docker-compose.local-*.yml + +# CKANAPI scripts +doc/scripts/ckanapi/*_ckanapi.py +!doc/scripts/ckanapi/*sample_ckan_api.py +doc/scripts/ckanapi/sites.yml +sites.yml +venv +env \ No newline at end of file diff --git a/doc/scripts/ckanapi/.gitignore b/doc/scripts/ckanapi/.gitignore new file mode 100644 index 00000000..7273b114 --- /dev/null +++ b/doc/scripts/ckanapi/.gitignore @@ -0,0 +1,4 @@ +venv +env +sites.yml +output/* \ No newline at end of file diff --git a/doc/scripts/ckanapi/README.md b/doc/scripts/ckanapi/README.md new file mode 100644 index 00000000..0184a20f --- /dev/null +++ b/doc/scripts/ckanapi/README.md @@ -0,0 +1,175 @@ +# CKAN API Script + +This script allows you to interact with a CKAN instance to perform various actions such as updating packages, exporting groups, and exporting organizations. The configuration for different CKAN instances and actions is specified in a YAML file. + +## Prerequisites + +- Python 3.x + +## Setup + +### Creating a Virtual Environment + +1. Create a virtual environment: + +```sh +python3 -m venv venv +``` + +2. Activate the virtual environment: + +- On Linux and macOS: + +```sh +source venv/bin/activate +``` + +- On Windows: + +```sh +.\venv\Scripts\activate +``` + +### Installing Dependencies + +Install the required libraries using pip: + +```sh +pip install -r requirements.txt +``` + +## Configuration + +The configuration for the CKAN instances and actions is specified in a YAML file located at `./input/sites.yml`. If this file does not exist, you can use the provided `sites.example.yml` as a template. + +### Creating `sites.yml` + +1. Copy the `sites.example.yml` file to `sites.yml`: + +```sh +cp ./input/sites.example.yml ./input/sites.yml +``` + +2. Edit the `sites.yml` file to include your CKAN instance details and the actions you want to perform. Below is an example configuration: + +```yaml +# ./input/sites.yml + +default: + ckan_site_url: 'https://demo.ckan.dcat-ap-3.es' + api_token: 'your_api_token' + actions: + - update_packages + - export_groups + - export_organizations + override: + theme_es: "http://datos.gob.es/kos/sector-publico/sector/medio-ambiente" + target_values: + publisher_name: + condition: "Sample Company" + override: + publisher_type: "http://purl.org/adms/publishertype/Company" + +site_1: + ckan_site_url: 'https://site1.ckan.instance' + api_token: 'site1_api_token' + actions: + - update_packages + - export_groups + - export_organizations + override: + theme_es: "http://site1.theme.url" + target_values: + publisher_name: + condition: "Site 1 Company" + override: + publisher_type: "http://site1.publisher.type" + +site_2: + ckan_site_url: 'https://site2.ckan.instance' + api_token: null + actions: + - export_groups + - export_organizations + +site_3: + ckan_site_url: 'https://site2.ckan.instance' + api_token: 'site2_api_token' + actions: + - export_groups + - export_organizations + - create_organizations + - create_groups + - create_users + organizations: './input/site_3/organizations.json' + groups: './input/site_3/groups.json' + users: './input/site_3/users.json' + +``` + +## Usage + +To run the script, use the following command: + +```sh +python -m src -s +``` + +Replace `` with the site configuration you want to use (e.g., `default`, `site_1`, `site_2`). + +### Example + +To run the script using the `site_2` configuration: + +```sh +python -m src -s site_2 +``` + +### Command-line Arguments + +- `-s` or `--site`: The site configuration to load (default: `default`). +- `-c` or `--config`: The path to the YAML configuration file (default: `./input/sites.yml`). + +### Notes + +- Actions that modify data (e.g., `update_packages`, `create`, `delete`) require a non-null `api_token`. +- If only `GET` actions are specified (e.g., `export_groups`, `export_organizations`), the `api_token`, `override`, and `target_values` are not required. + +## Script Details + +The script performs the following actions based on the configuration: + +1. **Update Packages**: Updates the packages in the CKAN instance based on the specified overrides and target values. +2. **Export Groups**: Exports all groups from the CKAN instance to a JSON file (`groups.json`). +3. **Export Organizations**: Exports all organizations from the CKAN instance to a JSON file (`organizations.json`). + +### Functions + +#### Connect + +- `connect_to_ckan(ckan_site_url, api_token)`: Connects to the CKAN instance. + +#### Create + +- `create_organizations(rc, json_path)`: Creates new organizations in CKAN from a JSON file. +- `create_groups(rc, json_path)`: Creates new groups in CKAN from a JSON file. +- `create_users(rc, json_path)`: Creates new users in CKAN from a JSON file. + +#### Update + +- `update_package(rc, package_id, theme_es, publisher_name, publisher_type)`: Updates a package given its ID. + +#### Get + +- `export_groups_to_json(rc, file_path)`: Exports all groups to a JSON file. +- `export_organizations_to_json(rc, file_path)`: Exports all organizations to a JSON file. + + +### Example Output + +- `groups.json`: Contains detailed information about all groups. +- `organizations.json`: Contains detailed information about all organizations. + +## License + +This project is licensed under the MIT License. diff --git a/doc/scripts/ckanapi/input/sites.example.yml b/doc/scripts/ckanapi/input/sites.example.yml new file mode 100644 index 00000000..a4741bc6 --- /dev/null +++ b/doc/scripts/ckanapi/input/sites.example.yml @@ -0,0 +1,51 @@ +# ./input/sites.yml + +default: + ckan_site_url: 'https://demo.ckan.dcat-ap-3.es' + api_token: 'your_api_token' + actions: + - update_packages + - export_groups + - export_organizations + override: + theme_es: "http://datos.gob.es/kos/sector-publico/sector/medio-ambiente" + target_values: + publisher_name: + condition: "Sample Company" + override: + publisher_type: "http://purl.org/adms/publishertype/Company" + +site_1: + ckan_site_url: 'https://site1.ckan.instance' + api_token: 'site1_api_token' + actions: + - update_packages + - export_groups + - export_organizations + override: + theme_es: "http://site1.theme.url" + target_values: + publisher_name: + condition: "Site 1 Company" + override: + publisher_type: "http://site1.publisher.type" + +site_2: + ckan_site_url: 'https://site2.ckan.instance' + api_token: null + actions: + - export_groups + - export_organizations + +site_3: + ckan_site_url: 'https://site2.ckan.instance' + api_token: 'site2_api_token' + actions: + - export_groups + - export_organizations + - create_organizations + - create_groups + - create_users + organizations: './input/site_3/organizations.json' + groups: './input/site_3/groups.json' + users: './input/site_3/users.json' \ No newline at end of file diff --git a/doc/scripts/ckanapi/requirements.txt b/doc/scripts/ckanapi/requirements.txt new file mode 100644 index 00000000..7904dfa2 --- /dev/null +++ b/doc/scripts/ckanapi/requirements.txt @@ -0,0 +1,2 @@ +ckanapi==4.8 +PyYAML==6.0.2 \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/__init__.py b/doc/scripts/ckanapi/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/doc/scripts/ckanapi/src/__main__.py b/doc/scripts/ckanapi/src/__main__.py new file mode 100644 index 00000000..dc1dc051 --- /dev/null +++ b/doc/scripts/ckanapi/src/__main__.py @@ -0,0 +1,8 @@ +from .quick_ckanapi import main +from .config import parse_args, load_config + +if __name__ == "__main__": + args = parse_args() + config = load_config(args.config, site=args.site) + config.site = args.site # Add the site name to the config object + main(config) \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/config.py b/doc/scripts/ckanapi/src/config.py new file mode 100644 index 00000000..8da44396 --- /dev/null +++ b/doc/scripts/ckanapi/src/config.py @@ -0,0 +1,82 @@ +import yaml +import argparse +import os + +default_sites_file = './input/sites.yml' +default_site = 'default' + +class SampleConfig: + """ + Configuration class for CKAN instance settings and overrides. + + Attributes: + ckan_site_url (str): The URL of the CKAN instance. + api_token (str): The API token for accessing the CKAN instance. + actions (list): List of actions to perform. + organizations (str): Path to the JSON file containing organization data. + groups (str): Path to the JSON file containing group data. + users (str): Path to the JSON file containing user data. + override (dict): Dictionary of override values. + target_values (dict): Dictionary of target values. + """ + def __init__(self, config): + self.ckan_site_url = config['ckan_site_url'] + self.api_token = config.get('api_token') + self.actions = config['actions'] + self.organizations = config.get('organizations') + self.groups = config.get('groups') + self.users = config.get('users') + self.override = config.get('override', {}) + self.target_values = config.get('target_values', {}) + +def load_config(config_file, site='default'): + """ + Load the configuration from a YAML file. + + Args: + config_file (str): The path to the YAML configuration file. + site (str): The site configuration to load. + + Returns: + SampleConfig: The loaded configuration object. + """ + if not os.path.exists(config_file): + example_file = config_file.replace('sites.yml', 'sites.example.yml') + if os.path.exists(example_file): + print(f"Warning: {config_file} not found. Please use {example_file} as a template to create your own {config_file} before running the script.") + else: + print(f"Error: {config_file} not found and no example file {example_file} available.") + exit(1) + + with open(config_file, 'r') as file: + config_data = yaml.safe_load(file) + return SampleConfig(config_data[site]) + +def parse_args(): + """ + Parse command-line arguments. + + Returns: + argparse.Namespace: The parsed arguments. + """ + parser = argparse.ArgumentParser(description='Load CKAN configuration.') + parser.add_argument('-s', '--site', type=str, default=default_site, help='The site configuration to load') + parser.add_argument('-c', '--config', type=str, default=default_sites_file, help='The path to the YAML configuration file') + return parser.parse_args() + +if __name__ == "__main__": + args = parse_args() + config = load_config(args.config, site=args.site) + print(f"Loaded configuration for site: {args.site}") + print(f"CKAN Site URL: {config.ckan_site_url}") + print(f"Actions: {config.actions}") + if config.override: + print(f"Override: {config.override}") + if config.target_values: + print(f"Target Values: {config.target_values}") + if config.organizations: + print(f"Organizations JSON Path: {config.organizations}") + if config.groups: + print(f"Groups JSON Path: {config.groups}") + if config.users: + print(f"Users JSON Path: {config.users}") \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/logic/__init__.py b/doc/scripts/ckanapi/src/logic/__init__.py new file mode 100644 index 00000000..4430246f --- /dev/null +++ b/doc/scripts/ckanapi/src/logic/__init__.py @@ -0,0 +1,50 @@ +from .get import export_groups_to_json, export_organizations_to_json +from .update import update_package +from .create import create_organizations, create_groups, create_users +# Import other actions as needed + +def list_actions(): + """ + List all available actions in the logic module. + + Returns: + dict: A dictionary of action names and their corresponding functions. + """ + actions = { + 'export_groups': export_groups_to_json, + 'export_organizations': export_organizations_to_json, + 'update_packages': update_package, + 'create_organizations': create_organizations, + 'create_groups': create_groups, + 'create_users': create_users, + # Add other actions here + } + return actions + +def execute_action(action_name, rc, config, output_dir): + """ + Execute a specific action based on the action name. + + Args: + action_name (str): The name of the action to execute. + rc (RemoteCKAN): The CKAN instance connection. + config (SampleConfig): The configuration object. + output_dir (str): The output directory path. + """ + actions = list_actions() + if action_name in actions: + if action_name == 'update_packages': + # Get the list of all packages + package_list = rc.action.package_list() + + # Iterate over each package and update it + for package_id in package_list: + actions[action_name](rc, package_id, config.override, config.target_values) + elif action_name == 'create_organizations': + actions[action_name](rc, config.organizations) + elif action_name == 'create_groups': + actions[action_name](rc, config.groups) + elif action_name == 'create_users': + actions[action_name](rc, config.users) + else: + actions[action_name](rc, output_dir) \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/logic/create.py b/doc/scripts/ckanapi/src/logic/create.py new file mode 100644 index 00000000..017e2250 --- /dev/null +++ b/doc/scripts/ckanapi/src/logic/create.py @@ -0,0 +1,63 @@ +import json +import os + +def load_json(file_path): + """ + Load JSON data from a file. + + Args: + file_path (str): The path to the JSON file. + + Returns: + list: The JSON data loaded from the file. + """ + with open(file_path, 'r') as f: + return json.load(f) + +def create_organizations(rc, json_path): + """ + Create new organizations in CKAN. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + json_path (str): The path to the JSON file containing organization data. + """ + organizations = load_json(json_path) + for org in organizations: + try: + rc.action.organization_create(**org) + print(f"Organization '{org['name']}' created successfully.") + except Exception as e: + print(f"Failed to create organization '{org['name']}': {e}") + +def create_groups(rc, json_path): + """ + Create new groups in CKAN. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + json_path (str): The path to the JSON file containing group data. + """ + groups = load_json(json_path) + for group in groups: + try: + rc.action.group_create(**group) + print(f"Group '{group['name']}' created successfully.") + except Exception as e: + print(f"Failed to create group '{group['name']}': {e}") + +def create_users(rc, json_path): + """ + Create new users in CKAN. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + json_path (str): The path to the JSON file containing user data. + """ + users = load_json(json_path) + for user in users: + try: + rc.action.user_create(**user) + print(f"User '{user['name']}' created successfully.") + except Exception as e: + print(f"Failed to create user '{user['name']}': {e}") \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/logic/delete.py b/doc/scripts/ckanapi/src/logic/delete.py new file mode 100644 index 00000000..b0d2ee81 --- /dev/null +++ b/doc/scripts/ckanapi/src/logic/delete.py @@ -0,0 +1 @@ +# Placeholder for delete functions \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/logic/get.py b/doc/scripts/ckanapi/src/logic/get.py new file mode 100644 index 00000000..b581be48 --- /dev/null +++ b/doc/scripts/ckanapi/src/logic/get.py @@ -0,0 +1,43 @@ +import json +import os + +def export_to_json(rc, output_dir, file_name, action_list, action_show): + """ + Export data to a JSON file. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + output_dir (str): The directory where the JSON file will be saved. + file_name (str): The name of the JSON file. + action_list (str): The CKAN action to list items. + action_show (str): The CKAN action to show item details. + """ + item_list = getattr(rc.action, action_list)() + items = [] + for item_id in item_list: + item = getattr(rc.action, action_show)(id=item_id) + items.append(item) + file_path = os.path.join(output_dir, f"{file_name}.json") + with open(file_path, 'w') as f: + json.dump(items, f, indent=4) + print(f"Data exported to {file_path}") + +def export_groups_to_json(rc, output_dir): + """ + Export all groups to a JSON file. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + output_dir (str): The directory where the JSON file will be saved. + """ + export_to_json(rc, output_dir, 'groups', 'group_list', 'group_show') + +def export_organizations_to_json(rc, output_dir): + """ + Export all organizations to a JSON file. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + output_dir (str): The directory where the JSON file will be saved. + """ + export_to_json(rc, output_dir, 'organizations', 'organization_list', 'organization_show') \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/logic/update.py b/doc/scripts/ckanapi/src/logic/update.py new file mode 100644 index 00000000..0b3ceaed --- /dev/null +++ b/doc/scripts/ckanapi/src/logic/update.py @@ -0,0 +1,53 @@ +def apply_overrides(package, overrides): + """ + Apply overrides to the package. + + Args: + package (dict): The package dictionary. + overrides (dict): The dictionary of fields to override. + + Returns: + dict: The updated package dictionary. + """ + for key, value in overrides.items(): + package[key] = value if isinstance(value, list) else [value] + return package + +def apply_target_values(package, target_values): + """ + Apply target values to the package based on conditions. + + Args: + package (dict): The package dictionary. + target_values (dict): The dictionary of conditions and overrides. + + Returns: + dict: The updated package dictionary. + """ + for field, details in target_values.items(): + condition = details.get('condition') + overrides = details.get('override', {}) + if package.get(field) == condition: + package = apply_overrides(package, overrides) + return package + +def update_package(rc, package_id, overrides, target_values): + """ + Update a package given its ID. + + Args: + rc (RemoteCKAN): The CKAN instance connection. + package_id (str): The ID of the package. + overrides (dict): The dictionary of fields to override. + target_values (dict): The dictionary of conditions and overrides. + """ + package = rc.action.package_show(id=package_id) + + # Apply overrides + package = apply_overrides(package, overrides) + + # Apply target values based on conditions + package = apply_target_values(package, target_values) + + # Save the changes + rc.action.package_update(**package) \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/quick_ckanapi.py b/doc/scripts/ckanapi/src/quick_ckanapi.py new file mode 100644 index 00000000..63e8c751 --- /dev/null +++ b/doc/scripts/ckanapi/src/quick_ckanapi.py @@ -0,0 +1,62 @@ +import os +from ckanapi import RemoteCKAN +from .config import load_config, parse_args +from .logic import execute_action, list_actions + +def connect_to_ckan(ckan_site_url, api_token): + """ + Connect to the CKAN instance. + + Args: + ckan_site_url (str): The URL of the CKAN instance. + api_token (str): The API token for accessing the CKAN instance. + + Returns: + RemoteCKAN: The CKAN instance connection. + """ + return RemoteCKAN(ckan_site_url, apikey=api_token) + +def is_modifying_action(action_name): + """ + Determine if an action modifies data based on its name. + + Args: + action_name (str): The name of the action. + + Returns: + bool: True if the action modifies data, False otherwise. + """ + modifying_prefixes = ('update_', 'create_', 'delete_') + return action_name.startswith(modifying_prefixes) + +def main(config): + """ + Main function to update all packages and export groups and organizations. + + Args: + config (SampleConfig): The configuration object. + """ + # Connect to the CKAN instance + rc = connect_to_ckan(config.ckan_site_url, config.api_token) + + # Create output directories if they don't exist + output_dir = os.path.join('output', config.site) + os.makedirs(output_dir, exist_ok=True) + + # Check if there are any actions that modify data + if any(is_modifying_action(action) for action in config.actions): + if not config.api_token: + print("Error: API token is required for modifying actions.") + return + + # Execute actions based on the configuration + for action in config.actions: + execute_action(action, rc, config, output_dir) + + print("Actions completed.") + +if __name__ == "__main__": + args = parse_args() + config = load_config(args.config, site=args.site) + config.site = args.site # Add the site name to the config object + main(config) \ No newline at end of file From 2c25c97d01b99def7f13541ac3aec3499a12423d Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:30:39 +0200 Subject: [PATCH 08/21] Remove Trivy vulnerability scanner steps from Docker master workflow --- .github/workflows/docker-master.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/docker-master.yml b/.github/workflows/docker-master.yml index b0eab55a..49687428 100644 --- a/.github/workflows/docker-master.yml +++ b/.github/workflows/docker-master.yml @@ -69,16 +69,3 @@ jobs: with: dockerfile: ${{ env.CONTEXT }}${{ env.DOCKERFILE_PATH }}/${{ env.DOCKERFILE }} no-fail: true - - - name: Run Trivy container image vulnerability scanner - uses: aquasecurity/trivy-action@0.24.0 - with: - image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.VERSION }} - format: sarif - output: trivy-results.sarif - - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 - if: always() - with: - sarif_file: trivy-results.sarif \ No newline at end of file From 6b9a31131b08923d3714aba219a7b84b01e4aa57 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:46:56 +0200 Subject: [PATCH 09/21] Add robots.txt to prevent crawling of specific site paths --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++ nginx/setup/robots.txt | 22 +++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 nginx/setup/robots.txt diff --git a/README.md b/README.md index 9e4351a2..e7bae12f 100644 --- a/README.md +++ b/README.md @@ -729,6 +729,60 @@ To have Docker Compose run automatically when you reboot a machine, you can foll sudo systemctl status ckan-docker-compose ``` +### `robots.txt` +### Configuring `robots.txt` to Mitigate Bot and Crawler Overload +To prevent bots and crawlers from overloading your CKAN API and causing service disruptions, it is essential to properly configure the `robots.txt` file in the root directory of your server. This file provides instructions to web crawlers about which parts of your site they are allowed to access and crawl. + +#### Steps to Configure `robots.txt` + +1. **Create or Edit [`nginx/setup/robots.txt`](./nginx/setup/robots.txt) in the Root Directory**: + Ensure that the `robots.txt` file is located in the root directory of your server. This is crucial because bots typically look for this file at the root level. + +2. **Disallow Specific Bots**: + To prevent specific bots, such as the SEMrushBot, from crawling certain parts of your site, add the following lines to your `robots.txt` file: + + ```txt + User-agent: SemrushBot + Disallow: /catalogo + ``` + +3. **Set Crawl Delay**: + To reduce the load on your server, you can set a crawl delay for bots. This instructs the bot to wait a specified number of seconds between requests. For example, to set a 10-second delay for SEMrushBot, add: + + ```txt + User-agent: SemrushBot + Crawl-delay: 10 + ``` + +4. **General Disallow Rules**: + You can also add general rules to disallow all bots from accessing specific directories or files. For example: + + ```txt + User-agent: * + Disallow: /catalog/ + Disallow: /csw/ + ``` + +5. **Example `robots.txt` File**: + Here is an example of a complete `robots.txt` file that includes the above configurations: + + ```txt + # Disallow SEMrushBot from accessing the /catalogo directory + User-agent: SemrushBot + Disallow: /catalogo + Crawl-delay: 10 + + # General disallow rules for all bots + User-agent: * + Disallow: /private/ + Disallow: /tmp/ + ``` + +6. **Verify `robots.txt` Configuration**: + After updating the `robots.txt` file, verify that it is correctly configured by accessing it via your browser. For example, navigate to `https://{ckan_site_url}/robots.txt` and ensure that the rules are as expected. + +7. **Monitor Bot Activity**: + Continuously monitor your server logs to ensure that bots are adhering to the rules specified in the `robots.txt` file. If you notice any bots ignoring the rules, you may need to take additional measures, such as blocking their IP addresses. ## CKAN API > [!NOTE] diff --git a/nginx/setup/robots.txt b/nginx/setup/robots.txt new file mode 100644 index 00000000..c3885ee8 --- /dev/null +++ b/nginx/setup/robots.txt @@ -0,0 +1,22 @@ +# +# robots.txt +# +# This file is to prevent the crawling and indexing of certain parts +# of your site by web crawlers and spiders run by sites like Yahoo! +# and Google. By telling these "robots" where not to go on your site, +# you save bandwidth and server resources. +# +# This file will be ignored unless it is at the root of your host: +# Used: http://example.com/robots.txt +# Ignored: http://example.com/site/robots.txt +# +# For more information about the robots.txt standard, see: +# http://www.robotstxt.org/robotstxt.html + +User-agent: * + +# Paths (clean URLs) +Disallow: / +Disallow: /catalogo/ +Disallow: /catalog/ +Disallow: /csw/ \ No newline at end of file From fc71cbfa34bada8bd5114273d354f881d21814b0 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:22:26 +0200 Subject: [PATCH 10/21] Update README.md to add Solr backup instruction --- README.md | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e7bae12f..ed1c3471 100644 --- a/README.md +++ b/README.md @@ -587,9 +587,9 @@ PostgreSQL offers the command line tools [`pg_dump`](https://www.postgresql.org/ - `your_postgres_password`: The password for the PostgreSQL user. - `/path/to/your/backup/directory`: The path to the directory where you want to store the backup files. - > [!WARNING] - > If you have changed the values of the PostgreSQL container, database or user, change them too. - > Check that `zip` package is installed: `sudo apt-get install zip` +> [!WARNING] +> If you have changed the values of the PostgreSQL container, database or user, change them too. +> Check that `zip` package is installed: `sudo apt-get install zip` 4. Save and close the file. @@ -642,6 +642,38 @@ If need to use a backup, restore it: 3. Restart the `ckan` container. +### Solr backups +To perform a backup, follow these steps: + +1. **Replicate the `ckan` core** + + ```sh + docker exec -it bash -c "curl http://localhost:8983/solr/ckan/replication?command=backup&wt=json" + ``` + + Replace `` with the id of your `solr-1` container. + +2. **In the container, navigate to the Solr data directory:** + + ```sh + docker exec -it bash + + solr@12d91jdkas:/opt/solr-9.7.0$ cd /var/solr/data/ckan/ + + # Backup data (e.g. snapshot.20241015102836306) + solr@12d91jdkas:/var/solr/data/ckan$ tar -czvf /tmp/snapshots_backup.tgz data/snapshot.20241015102836306 + + # Backup conf + tar -czvf /tmp/conf_backup.tgz conf + ``` + +3. **Export it to the host** + + ```sh + docker cp :/tmp/snapshots_backup.tgz ./snapshots_backup.tgz + docker cp :/tmp/conf_backup.tgz ./conf_backup.tgz + ``` + ### CKAN. Manage new users 1. Create a new user directly by a sysadmin in the `{ckan_site_url}/user/register` endpoint From 006ab1550c3e9cdf82ec025948e5fa8985cf5d49 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:21:02 +0200 Subject: [PATCH 11/21] Change worker user from 'ckan' to 'root' in harvester and xloader configuration files - Until https://github.com/ckan/ckan-docker/pull/172 and https://github.com/ckan/ckan-docker-base/pull/80 to ckan-docker --- ckan/setup/workers/harvester.conf | 8 ++++---- ckan/setup/workers/xloader.conf | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ckan/setup/workers/harvester.conf b/ckan/setup/workers/harvester.conf index cb1c2e3d..0300b02d 100644 --- a/ckan/setup/workers/harvester.conf +++ b/ckan/setup/workers/harvester.conf @@ -1,6 +1,6 @@ [program:ckan_gather_consumer] command=ckan harvester gather-consumer -user=ckan +user=root numprocs=1 stdout_logfile=/var/log/harvester/gather_consumer.log stdout_logfile_maxbytes=50MB @@ -13,7 +13,7 @@ priority=1 [program:ckan_fetch_consumer] command=ckan harvester fetch-consumer -user=ckan +user=root numprocs=1 stdout_logfile=/var/log/harvester/fetch_consumer.log stdout_logfile_maxbytes=50MB @@ -26,7 +26,7 @@ priority=2 [program:ckan_harvester_run] command=ckan harvester run -user=ckan +user=root numprocs=1 stdout_logfile=/var/log/harvester/ckan_harvester.log stdout_logfile_maxbytes=25MB @@ -39,7 +39,7 @@ priority=3 [program:ckan_harvester_clean_log] command=ckan harvester clean-harvest-log -user=ckan +user=root numprocs=1 stdout_logfile=/var/log/harvester/ckan_harvester_clean_log.log stdout_logfile_maxbytes=25MB diff --git a/ckan/setup/workers/xloader.conf b/ckan/setup/workers/xloader.conf index 7d12cef1..9e5ba810 100644 --- a/ckan/setup/workers/xloader.conf +++ b/ckan/setup/workers/xloader.conf @@ -1,6 +1,6 @@ [program:ckan_xloader] command=ckan jobs worker default -user=ckan +user=root numprocs=1 stdout_logfile=/var/log/xloader/ckan_xloader.log stdout_logfile_maxbytes=100MB From de00769a3159ebde11f8963b39b8f0d4e4b87044 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:24:30 +0200 Subject: [PATCH 12/21] Add comment to clarify permissions for CKAN i18n files in start_ckan.sh.override --- ckan/setup/start_ckan.sh.override | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckan/setup/start_ckan.sh.override b/ckan/setup/start_ckan.sh.override index 68f01b49..2a4d7bd0 100644 --- a/ckan/setup/start_ckan.sh.override +++ b/ckan/setup/start_ckan.sh.override @@ -54,6 +54,9 @@ chown -R ckan:ckan $CKAN_LOGS_PATH/harvester mkdir -p $CKAN_LOGS_PATH/xloader chown -R ckan:ckan $CKAN_LOGS_PATH/xloader +# Ensure CKAN has the correct permissions. Uncomment to avoid i18n/*js errors: https://github.com/ckan/ckanext-pages/issues/91 +#chmod -R 777 /srv/app/src/ckan/ckan/public/base/i18n + # Set the common uwsgi options. ## Add thunder-lock to prevent multiple workers from running the same job and buffer-size to prevent large headers ## To increase performance, you can adjust the number of processes (-p) (More info: https://uwsgi-docs.readthedocs.io/en/latest/Options.html & https://www.bloomberg.com/company/stories/configuring-uwsgi-production-deployment/) From f670147c602fbbe6d499bb3c4cf65327ecb45593 Mon Sep 17 00:00:00 2001 From: Ernesto Date: Tue, 22 Oct 2024 11:14:20 +0000 Subject: [PATCH 13/21] fixed smtp on .env.example, added SMTP test server on dev --- .env.example | 20 +++++++++++--------- docker-compose.dev.yml | 9 ++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.env.example b/.env.example index 67f4f5eb..94b3631f 100644 --- a/.env.example +++ b/.env.example @@ -125,15 +125,17 @@ CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings -CKAN_DOCKER_SMTP_ENABLED=False -CKAN_SMTP_SERVER=smtp.corporateict.domain:25 -CKAN_SMTP_STARTTLS=True -CKAN_SMTP_USER=user -CKAN_SMTP_PASSWORD=pass -CKAN_SMTP_MAIL_FROM=ckan@${PROXY_SERVER_NAME} -CKAN_SMTP_REPLY_TO='' -CKAN_EMAIL_TO='' -CKAN_ERROR_EMAIL_FROM='' +CKAN___ACTIVITY_STREAMS_EMAIL_NOTIFICATION=True +CKAN___SMTP__ENABLED=True +CKAN___SMTP__SERVER=mailserver:25 +CKAN___SMTP__STARTTLS=False +CKAN___SMTP__USER=user +CKAN___SMTP__PASSWORD=password +CKAN___SMTP__MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN___SMTP__REPLY_TO=ckan@${PROXY_SERVER_NAME} +CKAN___EMAIL_TO=ckan@${PROXY_SERVER_NAME} +CKAN___ERROR_EMAIL_FROM='' + ## Customize which text formats the text_view plugin will show CKAN__PREVIEW__JSON_FORMATS="json jsonld" # html htm rdf+xml owl+xml xml n3 n-triples turtle plain atom csv tsv rss txt json diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index f7bcba6b..d36fb40a 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -134,4 +134,11 @@ services: max-file: "10" restart: unless-stopped healthcheck: - test: ["CMD", "redis-cli", "-e", "QUIT"] \ No newline at end of file + test: ["CMD", "redis-cli", "-e", "QUIT"] + + #mailserver: + # image: rnwood/smtp4dev + # ports: + # - "1080:80" + # restart: unless-stopped + \ No newline at end of file From d6dbff76e10315487c0027aa4b228ff5d8f34e8c Mon Sep 17 00:00:00 2001 From: Ernesto Date: Tue, 22 Oct 2024 15:23:13 +0000 Subject: [PATCH 14/21] fixed variable name in .env.example --- .env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 94b3631f..4a04e3af 100644 --- a/.env.example +++ b/.env.example @@ -125,7 +125,7 @@ CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings -CKAN___ACTIVITY_STREAMS_EMAIL_NOTIFICATION=True +CKAN__ACTIVITY_STREAMS_EMAIL_NOTIFICATIONS=True CKAN___SMTP__ENABLED=True CKAN___SMTP__SERVER=mailserver:25 CKAN___SMTP__STARTTLS=False From 55095c4455085c7afbc48830f336341192a28a97 Mon Sep 17 00:00:00 2001 From: Ernesto Date: Tue, 22 Oct 2024 15:24:36 +0000 Subject: [PATCH 15/21] removed unnecesary code from ckan setup script --- .../docker-entrypoint.d/01_setup_ckanext_config.sh | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh b/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh index 322bd224..afe8be78 100644 --- a/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh +++ b/ckan/docker-entrypoint.d/01_setup_ckanext_config.sh @@ -4,20 +4,6 @@ echo "[docker-entrypoint.01_setup_ckanext_config] Clear index" ckan -c $CKAN_INI search-index clear -# Add SMTP settings if CKAN_DOCKER_SMTP_ENABLED is True -if [ "$CKAN_DOCKER_SMTP_ENABLED" = "True" ]; then - echo "[docker-entrypoint.01_setup_ckanext_config] Adding SMTP settings to the CKAN config file" - ckan config-tool $CKAN_INI \ - "smtp.server=$CKAN_SMTP_SERVER" \ - "smtp.starttls=$CKAN_SMTP_STARTTLS" \ - "smtp.user=$CKAN_SMTP_USER" \ - "smtp.password=$CKAN_SMTP_PASSWORD" \ - "smtp.mail_from=$CKAN_SMTP_MAIL_FROM" \ - "smtp.reply_to=$CKAN_SMTP_REPLY_TO" \ - "email_to=$CKAN_EMAIL_TO" \ - "error_email_from=$CKAN_ERROR_EMAIL_FROM" -fi - # Rebuild index echo "[docker-entrypoint.01_setup_ckanext_config] Rebuild index" ckan -c $CKAN_INI search-index rebuild \ No newline at end of file From 0fa21a27ad899e65702fe827a3fd5a5ce0a902fc Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Wed, 23 Oct 2024 22:30:18 +0200 Subject: [PATCH 16/21] Update Scheming DCAT version to v4.2.1 in Dockerfile --- ckan/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckan/Dockerfile b/ckan/Dockerfile index d6eeb313..7dc790c3 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -22,7 +22,7 @@ COPY req_fixes req_fixes ## Pages - v0.5.2 ## ## PDFView - 0.0.8 ## ## Fluent - v1.0.1 (mjanez/Forked stable version) ## -## Scheming DCAT - v4.1.0 (mjanez/GeoDCAT-AP/NTI-RISP extended version) ## +## Scheming DCAT - v4.2.1 (mjanez/GeoDCAT-AP/NTI-RISP extended version) ## RUN echo ${TZ} > /etc/timezone && \ if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then cp /usr/share/zoneinfo/${TZ} /etc/localtime; fi && \ # Install patch utility @@ -57,7 +57,7 @@ RUN echo ${TZ} > /etc/timezone && \ echo "mjanez/ckanext-fluent" && \ pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-fluent.git@v1.0.1#egg=ckanext-fluent && \ echo "mjanez/ckanext-schemingdcat" && \ - pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-schemingdcat.git@v4.1.0#egg=ckanext_schemingdcat && \ + pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-schemingdcat.git@v4.2.1#egg=ckanext_schemingdcat && \ pip3 install --no-cache-dir -r ${APP_DIR}/src/ckanext-schemingdcat/requirements.txt && \ # Remove system cache apt-get clean && \ From e9622de37c18bd0292ce522c176463eb27d4c85e Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Fri, 25 Oct 2024 14:40:14 +0200 Subject: [PATCH 17/21] Add ckanext-openapi and OpenAPI endpoints configuration to .env.example --- .env.example | 3 ++- ckan/Dockerfile | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 67f4f5eb..f351cb17 100644 --- a/.env.example +++ b/.env.example @@ -168,7 +168,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent openapi envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -221,6 +221,7 @@ CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True +CKANEXT__OPENAPI__ENDPOINTS='[{"url":"https://raw.githubusercontent.com/OAI/OpenAPI-Specification/refs/heads/main/examples/v3.0/api-with-examples.json","name":"sample","title":{"en":"OpenAPI sample 1","es":"Ejemplo de OpenAPI 1"},"description":{"en":"API with examples.","es":"API con ejemplos."}},{"url":"https://raw.githubusercontent.com/OAI/OpenAPI-Specification/refs/heads/main/examples/v3.0/petstore.json","name":"petstore","title":{"en":"Petstore OpenAPI example","es":"Ejemplo OpenAPI Petstore"},"description":{"en":"This is a sample Pet Store Server based on the OpenAPI 3.0 specification.","es":"Este es un ejemplo de Servidor de Tienda de Mascotas basado en la especificación OpenAPI 3.0."}}]' # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False diff --git a/ckan/Dockerfile b/ckan/Dockerfile index 7dc790c3..440f8875 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -22,6 +22,7 @@ COPY req_fixes req_fixes ## Pages - v0.5.2 ## ## PDFView - 0.0.8 ## ## Fluent - v1.0.1 (mjanez/Forked stable version) ## +## OpenAPI - v1.0.0 (mjanez stable version) ## ## Scheming DCAT - v4.2.1 (mjanez/GeoDCAT-AP/NTI-RISP extended version) ## RUN echo ${TZ} > /etc/timezone && \ if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then cp /usr/share/zoneinfo/${TZ} /etc/localtime; fi && \ @@ -56,6 +57,8 @@ RUN echo ${TZ} > /etc/timezone && \ pip3 install --no-cache-dir -e git+https://github.com/ckan/ckanext-pdfview.git@0.0.8#egg=ckanext-pdfview && \ echo "mjanez/ckanext-fluent" && \ pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-fluent.git@v1.0.1#egg=ckanext-fluent && \ + echo "mjanez/ckanext-openapi" && \ + pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-openapi.git@v1.0.0#egg=ckanext-openapi && \ echo "mjanez/ckanext-schemingdcat" && \ pip3 install --no-cache-dir -e git+https://github.com/mjanez/ckanext-schemingdcat.git@v4.2.1#egg=ckanext_schemingdcat && \ pip3 install --no-cache-dir -r ${APP_DIR}/src/ckanext-schemingdcat/requirements.txt && \ From 548a01a870312f5592981a3694fb1fed4ee01c95 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Thu, 31 Oct 2024 08:33:01 +0100 Subject: [PATCH 18/21] Update actions trivy security scan step to 0.28.0 version --- .github/workflows/docker-build.yml | 2 +- .github/workflows/docker-manual.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 08b8f2be..51c8f775 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -81,7 +81,7 @@ jobs: no-fail: true - name: Run Trivy container image vulnerability scanner - uses: aquasecurity/trivy-action@0.24.0 + uses: aquasecurity/trivy-action@0.28.0 with: image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.VERSION }} format: sarif diff --git a/.github/workflows/docker-manual.yml b/.github/workflows/docker-manual.yml index c13de528..f4e5a97b 100644 --- a/.github/workflows/docker-manual.yml +++ b/.github/workflows/docker-manual.yml @@ -71,7 +71,7 @@ jobs: no-fail: true - name: Run Trivy container image vulnerability scanner - uses: aquasecurity/trivy-action@0.24.0 + uses: aquasecurity/trivy-action@0.28.0 with: image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.VERSION }} format: sarif From 5ef4c298c53ccd5f5f0ef9de1264e31d2cd5f809 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 4 Nov 2024 13:21:13 +0100 Subject: [PATCH 19/21] Fix email_to/error_email_from bugs Using email_to or error_email_from generate errors: https://github.com/ckan/ckan/issues/7802 --- .env.example | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 8f276738..d92e64a0 100644 --- a/.env.example +++ b/.env.example @@ -133,7 +133,8 @@ CKAN___SMTP__USER=user CKAN___SMTP__PASSWORD=password CKAN___SMTP__MAIL_FROM=ckan@${PROXY_SERVER_NAME} CKAN___SMTP__REPLY_TO=ckan@${PROXY_SERVER_NAME} -CKAN___EMAIL_TO=ckan@${PROXY_SERVER_NAME} +#WARNING: Using email_to or error_email_from generate errors: https://github.com/ckan/ckan/issues/7802 +CKAN___EMAIL_TO='' CKAN___ERROR_EMAIL_FROM='' ## Customize which text formats the text_view plugin will show From 145778bd22d3050ad37eb97257c544fc79f0ac27 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:57:14 +0100 Subject: [PATCH 20/21] Update environment configuration for CKAN and OpenAPI endpoints - Fix CKAN__SEARCH__FACETS__DEFAULT and CKAN_SITE_ID - Update Codespaces, DEV and ES samples. --- .env.example | 7 +++---- samples/.env.codespaces | 26 ++++++++++++++++---------- samples/.env.dev.example | 26 ++++++++++++++++---------- samples/.env.es.example | 30 ++++++++++++++++++------------ 4 files changed, 53 insertions(+), 36 deletions(-) diff --git a/.env.example b/.env.example index d92e64a0..c2806eac 100644 --- a/.env.example +++ b/.env.example @@ -101,7 +101,7 @@ TEST_CKAN_DATASTORE_READ_URL=postgresql://${DATASTORE_READONLY_USER}:${DATASTORE # CKAN core ## If use docker-compose.ghcr.yml only "*.*.*" versions available in: https://github.com/mjanez/ckan-docker/pkgs/container/ckan-docker CKAN_VERSION=2.10.5 -CKAN_SITE_ID=default +CKAN__SITE_ID=default # CKAN_SITE_URL = http:/ or https:/ + PROXY_SERVER_NAME. Optionally the APACHE_HOST_PORT if different from 80 CKAN_SITE_URL=${PROXY_SERVER_URL} CKAN__ROOT_PATH=${PROXY_CKAN_LOCATION}/{{LANG}} @@ -152,7 +152,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -CKAN__SEARCH__FACETS__DEFAULT=4 +CKAN___SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -224,8 +224,7 @@ CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True -CKANEXT__OPENAPI__ENDPOINTS='[{"url":"https://raw.githubusercontent.com/OAI/OpenAPI-Specification/refs/heads/main/examples/v3.0/api-with-examples.json","name":"sample","title":{"en":"OpenAPI sample 1","es":"Ejemplo de OpenAPI 1"},"description":{"en":"API with examples.","es":"API con ejemplos."}},{"url":"https://raw.githubusercontent.com/OAI/OpenAPI-Specification/refs/heads/main/examples/v3.0/petstore.json","name":"petstore","title":{"en":"Petstore OpenAPI example","es":"Ejemplo OpenAPI Petstore"},"description":{"en":"This is a sample Pet Store Server based on the OpenAPI 3.0 specification.","es":"Este es un ejemplo de Servidor de Tienda de Mascotas basado en la especificación OpenAPI 3.0."}}]' - +CKANEXT__OPENAPI__ENDPOINTS='[{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/datastore.yaml","name":"datastore","title":{"en":"CKAN - Datastore API","es":"Portal de datos abiertos de CKAN - API Datastore"},"description":{"en":"This API provides live access to the Datastore portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de Datastore del Portal de Datos Abiertos CKAN."}},{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/ckan.yaml","name":"ckan","title":{"en":"CKAN Open Data Portal - CKAN API (ES)","es":"Portal de datos abiertos de CKAN - API CKAN"},"description":{"en":"This API provides live access to the CKAN portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de CKAN del Portal de Datos Abiertos CKAN."}}]' # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False CKANEXT__PAGES__ORGANIZATION=True diff --git a/samples/.env.codespaces b/samples/.env.codespaces index 706b09b7..3d1f82f4 100644 --- a/samples/.env.codespaces +++ b/samples/.env.codespaces @@ -104,7 +104,7 @@ TEST_CKAN_DATASTORE_READ_URL=postgresql://${DATASTORE_READONLY_USER}:${DATASTORE # CKAN core ## If use docker-compose.ghcr.yml only "*.*.*" versions available in: https://github.com/mjanez/ckan-docker/pkgs/container/ckan-docker CKAN_VERSION=2.10.5 -CKAN_SITE_ID=default +CKAN__SITE_ID=default # CKAN_SITE_URL = http:/ or https:/ + PROXY_SERVER_NAME. Optionally the APACHE_HOST_PORT if different from 80 CKAN_SITE_URL=${PROXY_SERVER_URL} CKAN__ROOT_PATH=${PROXY_CKAN_LOCATION}/{{LANG}} @@ -128,12 +128,18 @@ CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings -CKAN__SMTP_ENABLED=False -CKAN_SMTP_SERVER=smtp.corporateict.domain:25 -CKAN_SMTP_STARTTLS=True -CKAN_SMTP_USER=user -CKAN_SMTP_PASSWORD=pass -CKAN_SMTP_MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN__ACTIVITY_STREAMS_EMAIL_NOTIFICATIONS=True +CKAN___SMTP__ENABLED=True +CKAN___SMTP__SERVER=mailserver:25 +CKAN___SMTP__STARTTLS=False +CKAN___SMTP__USER=user +CKAN___SMTP__PASSWORD=password +CKAN___SMTP__MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN___SMTP__REPLY_TO=ckan@${PROXY_SERVER_NAME} +#WARNING: Using email_to or error_email_from generate errors: https://github.com/ckan/ckan/issues/7802 +CKAN___EMAIL_TO='' +CKAN___ERROR_EMAIL_FROM='' + ## Customize which text formats the text_view plugin will show CKAN__PREVIEW__JSON_FORMATS="json jsonld" # html htm rdf+xml owl+xml xml n3 n-triples turtle plain atom csv tsv rss txt json @@ -149,7 +155,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -CKAN__SEARCH__FACETS__DEFAULT=4 +CKAN___SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -168,7 +174,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent openapi envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -221,7 +227,7 @@ CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True - +CKANEXT__OPENAPI__ENDPOINTS='[{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/datastore.yaml","name":"datastore","title":{"en":"CKAN - Datastore API","es":"Portal de datos abiertos de CKAN - API Datastore"},"description":{"en":"This API provides live access to the Datastore portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de Datastore del Portal de Datos Abiertos CKAN."}},{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/ckan.yaml","name":"ckan","title":{"en":"CKAN Open Data Portal - CKAN API (ES)","es":"Portal de datos abiertos de CKAN - API CKAN"},"description":{"en":"This API provides live access to the CKAN portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de CKAN del Portal de Datos Abiertos CKAN."}}]' # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False CKANEXT__PAGES__ORGANIZATION=True diff --git a/samples/.env.dev.example b/samples/.env.dev.example index 04deb61a..7a299075 100644 --- a/samples/.env.dev.example +++ b/samples/.env.dev.example @@ -102,7 +102,7 @@ TEST_CKAN_DATASTORE_READ_URL=postgresql://${DATASTORE_READONLY_USER}:${DATASTORE # CKAN core ## If use docker-compose.ghcr.yml only "*.*.*" versions available in: https://github.com/mjanez/ckan-docker/pkgs/container/ckan-docker CKAN_VERSION=2.10.5 -CKAN_SITE_ID=default +CKAN__SITE_ID=default # CKAN_SITE_URL = http:/ or https:/ + PROXY_SERVER_NAME. Optionally the APACHE_HOST_PORT if different from 80 CKAN_SITE_URL=${PROXY_SERVER_URL} CKAN__ROOT_PATH=${PROXY_CKAN_LOCATION}/{{LANG}} @@ -126,12 +126,18 @@ CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings -CKAN__SMTP_ENABLED=False -CKAN_SMTP_SERVER=smtp.corporateict.domain:25 -CKAN_SMTP_STARTTLS=True -CKAN_SMTP_USER=user -CKAN_SMTP_PASSWORD=pass -CKAN_SMTP_MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN__ACTIVITY_STREAMS_EMAIL_NOTIFICATIONS=True +CKAN___SMTP__ENABLED=True +CKAN___SMTP__SERVER=mailserver:25 +CKAN___SMTP__STARTTLS=False +CKAN___SMTP__USER=user +CKAN___SMTP__PASSWORD=password +CKAN___SMTP__MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN___SMTP__REPLY_TO=ckan@${PROXY_SERVER_NAME} +#WARNING: Using email_to or error_email_from generate errors: https://github.com/ckan/ckan/issues/7802 +CKAN___EMAIL_TO='' +CKAN___ERROR_EMAIL_FROM='' + ## Customize which text formats the text_view plugin will show CKAN__PREVIEW__JSON_FORMATS="json jsonld" # html htm rdf+xml owl+xml xml n3 n-triples turtle plain atom csv tsv rss txt json @@ -147,7 +153,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -CKAN__SEARCH__FACETS__DEFAULT=4 +CKAN___SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -166,7 +172,7 @@ CKAN__LOCALE_ORDER="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="en es pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent openapi envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -219,7 +225,7 @@ CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True - +CKANEXT__OPENAPI__ENDPOINTS='[{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/datastore.yaml","name":"datastore","title":{"en":"CKAN - Datastore API","es":"Portal de datos abiertos de CKAN - API Datastore"},"description":{"en":"This API provides live access to the Datastore portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de Datastore del Portal de Datos Abiertos CKAN."}},{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/ckan.yaml","name":"ckan","title":{"en":"CKAN Open Data Portal - CKAN API (ES)","es":"Portal de datos abiertos de CKAN - API CKAN"},"description":{"en":"This API provides live access to the CKAN portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de CKAN del Portal de Datos Abiertos CKAN."}}]' # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False CKANEXT__PAGES__ORGANIZATION=True diff --git a/samples/.env.es.example b/samples/.env.es.example index 12b46905..354d8332 100644 --- a/samples/.env.es.example +++ b/samples/.env.es.example @@ -102,7 +102,7 @@ TEST_CKAN_DATASTORE_READ_URL=postgresql://${DATASTORE_READONLY_USER}:${DATASTORE # CKAN core ## If use docker-compose.ghcr.yml only "*.*.*" versions available in: https://github.com/mjanez/ckan-docker/pkgs/container/ckan-docker CKAN_VERSION=2.10.5 -CKAN_SITE_ID=default +CKAN__SITE_ID=default # CKAN_SITE_URL = http:/ or https:/ + PROXY_SERVER_NAME. Optionally the APACHE_HOST_PORT if different from 80 CKAN_SITE_URL=${PROXY_SERVER_URL} CKAN__ROOT_PATH=${PROXY_CKAN_LOCATION}/{{LANG}} @@ -126,12 +126,18 @@ CKAN_SYSADMIN_EMAIL=your_email@example.com CKAN_STORAGE_PATH=/var/lib/ckan CKAN_LOGS_PATH=/var/log # SMTP settings -CKAN__SMTP_ENABLED=False -CKAN_SMTP_SERVER=smtp.corporateict.domain:25 -CKAN_SMTP_STARTTLS=True -CKAN_SMTP_USER=user -CKAN_SMTP_PASSWORD=pass -CKAN_SMTP_MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN__ACTIVITY_STREAMS_EMAIL_NOTIFICATIONS=True +CKAN___SMTP__ENABLED=True +CKAN___SMTP__SERVER=mailserver:25 +CKAN___SMTP__STARTTLS=False +CKAN___SMTP__USER=user +CKAN___SMTP__PASSWORD=password +CKAN___SMTP__MAIL_FROM=ckan@${PROXY_SERVER_NAME} +CKAN___SMTP__REPLY_TO=ckan@${PROXY_SERVER_NAME} +#WARNING: Using email_to or error_email_from generate errors: https://github.com/ckan/ckan/issues/7802 +CKAN___EMAIL_TO='' +CKAN___ERROR_EMAIL_FROM='' + ## Customize which text formats the text_view plugin will show CKAN__PREVIEW__JSON_FORMATS="json jsonld" # html htm rdf+xml owl+xml xml n3 n-triples turtle plain atom csv tsv rss txt json @@ -147,7 +153,7 @@ CKAN__CORS__ORIGIN_WHITELIST="" CKAN__AUTH__ALLOW_DATASET_COLLABORATORS=False CKAN__AUTH__ALLOW_ADMIN_COLLABORATORS=False # Default number of facets shown in search results. Default 10. -CKAN__SEARCH__FACETS__DEFAULT=4 +CKAN___SEARCH__FACETS__DEFAULT=4 # Enable or disable the DataStore SQL search backend. Default is False. CKAN__DATASTORE__SQLSEARCH__ENABLED=True @@ -166,7 +172,7 @@ CKAN__LOCALE_ORDER="es en pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru CKAN__LOCALES_OFFERED="es en pt_BR ja it cs_CZ ca fr el sv sr sr@latin no sk fi ru de pl nl bg ko_KR hu sa sl lv" # Extensions -CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent envvars" +CKAN__PLUGINS="activity stats image_view video_view audio_view webpage_view text_view datatables_view resourcedictionary datastore xloader spatial_metadata spatial_query spatial_harvest_metadata_api csw_harvester waf_harvester doc_harvester resource_proxy geo_view geojson_view wmts_view shp_view dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface schemingdcat schemingdcat_datasets schemingdcat_groups schemingdcat_organizations schemingdcat_ckan_harvester schemingdcat_xls_harvester schemingdcat_postgres_harvester schemingdcat_open_data_statistics harvest pdf_view pages fluent openapi envvars" # ckanext-harvest CKAN__HARVEST__MQ__TYPE=redis @@ -208,8 +214,8 @@ CKAN___SCHEMING__GROUP_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodc CKAN___SCHEMING__ORGANIZATION_SCHEMAS="ckanext.schemingdcat:schemas/geodcat_ap/es_geodcat_ap_org.json" CKAN___SCHEMING__PRESETS="ckanext.schemingdcat:schemas/default_presets.json ckanext.fluent:presets.json" ## Facets: setup_scheming.sh -CKANEXT__SCHEMINGDCAT__FACET_LIST="dataset_scope hvd_category theme groups theme_eu language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" -CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_ICON="theme" +CKANEXT__SCHEMINGDCAT__FACET_LIST="dataset_scope hvd_category theme groups theme_es language dcat_type groups publisher_name publisher_type spatial_uri owner_org res_format frequency tags tag_uri conforms_to" +CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_ICON="theme_es" CKANEXT__SCHEMINGDCAT__DEFAULT_PACKAGE_ITEM_SHOW_SPATIAL=True CKANEXT__SCHEMINGDCAT__SHOW_METADATA_TEMPLATES_TOOLBAR=False CKANEXT__SCHEMINGDCAT__METADATA_TEMPLATES_SEARCH_IDENTIFIER="schemingdcat_xls-template" @@ -219,7 +225,7 @@ CKANEXT__SCHEMINGDCAT__SOCIAL_X="https://x.com/ckanproject" CKANEXT__SCHEMINGDCAT__SOCIAL_LINKEDIN="https://www.linkedin.com/company/ckanproject" CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS=True CKANEXT__SCHEMINGDCAT__OPEN_DATA_STATISTICS_THEMES=True - +CKANEXT__OPENAPI__ENDPOINTS='[{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/datastore.yaml","name":"datastore","title":{"en":"CKAN - Datastore API","es":"Portal de datos abiertos de CKAN - API Datastore"},"description":{"en":"This API provides live access to the Datastore portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de Datastore del Portal de Datos Abiertos CKAN."}},{"url":"https://raw.githubusercontent.com/mjanez/ckanext-openapi/refs/heads/develop/ckanext/openapi/public/static/openapi/ckan.yaml","name":"ckan","title":{"en":"CKAN Open Data Portal - CKAN API (ES)","es":"Portal de datos abiertos de CKAN - API CKAN"},"description":{"en":"This API provides live access to the CKAN portion of the CKAN Open Data Portal.","es":"Esta API proporciona acceso en tiempo real a la parte de CKAN del Portal de Datos Abiertos CKAN."}}]' # ckanext-pages CKANEXT__PAGES__ALOW_HTML=False CKANEXT__PAGES__ORGANIZATION=True From 9df8fa945759869606901fe251e7ba139b9f1379 Mon Sep 17 00:00:00 2001 From: mjanez <96422458+mjanez@users.noreply.github.com> Date: Mon, 4 Nov 2024 20:58:01 +0100 Subject: [PATCH 21/21] Refactor CKAN API scripts: reorganize structure, update logic, and enhance configuration handling - Add ckanapi documentation as root README. --- doc/scripts/ckanapi/.gitignore | 4 - doc/scripts/ckanapi/.old/.gitignore | 6 + doc/scripts/ckanapi/.old/README.md | 175 ++++++++ .../{ => .old}/input/sites.example.yml | 0 .../ckanapi/{ => .old}/requirements.txt | 0 .../ckanapi/{ => .old}/src/__init__.py | 0 .../ckanapi/{ => .old}/src/__main__.py | 0 doc/scripts/ckanapi/{ => .old}/src/config.py | 11 + .../ckanapi/{ => .old}/src/logic/__init__.py | 6 +- .../ckanapi/{ => .old}/src/logic/create.py | 2 + .../ckanapi/{ => .old}/src/logic/delete.py | 0 .../ckanapi/{ => .old}/src/logic/get.py | 0 .../ckanapi/{ => .old}/src/logic/update.py | 11 +- .../ckanapi/{ => .old}/src/quick_ckanapi.py | 19 +- doc/scripts/ckanapi/README.md | 391 +++++++++++++----- 15 files changed, 495 insertions(+), 130 deletions(-) delete mode 100644 doc/scripts/ckanapi/.gitignore create mode 100644 doc/scripts/ckanapi/.old/.gitignore create mode 100644 doc/scripts/ckanapi/.old/README.md rename doc/scripts/ckanapi/{ => .old}/input/sites.example.yml (100%) rename doc/scripts/ckanapi/{ => .old}/requirements.txt (100%) rename doc/scripts/ckanapi/{ => .old}/src/__init__.py (100%) rename doc/scripts/ckanapi/{ => .old}/src/__main__.py (100%) rename doc/scripts/ckanapi/{ => .old}/src/config.py (83%) rename doc/scripts/ckanapi/{ => .old}/src/logic/__init__.py (92%) rename doc/scripts/ckanapi/{ => .old}/src/logic/create.py (96%) rename doc/scripts/ckanapi/{ => .old}/src/logic/delete.py (100%) rename doc/scripts/ckanapi/{ => .old}/src/logic/get.py (100%) rename doc/scripts/ckanapi/{ => .old}/src/logic/update.py (82%) rename doc/scripts/ckanapi/{ => .old}/src/quick_ckanapi.py (70%) diff --git a/doc/scripts/ckanapi/.gitignore b/doc/scripts/ckanapi/.gitignore deleted file mode 100644 index 7273b114..00000000 --- a/doc/scripts/ckanapi/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -venv -env -sites.yml -output/* \ No newline at end of file diff --git a/doc/scripts/ckanapi/.old/.gitignore b/doc/scripts/ckanapi/.old/.gitignore new file mode 100644 index 00000000..8a6f585c --- /dev/null +++ b/doc/scripts/ckanapi/.old/.gitignore @@ -0,0 +1,6 @@ +venv +env +sites.yml +sites.*.yml +output/* +!sites.example.yml \ No newline at end of file diff --git a/doc/scripts/ckanapi/.old/README.md b/doc/scripts/ckanapi/.old/README.md new file mode 100644 index 00000000..0184a20f --- /dev/null +++ b/doc/scripts/ckanapi/.old/README.md @@ -0,0 +1,175 @@ +# CKAN API Script + +This script allows you to interact with a CKAN instance to perform various actions such as updating packages, exporting groups, and exporting organizations. The configuration for different CKAN instances and actions is specified in a YAML file. + +## Prerequisites + +- Python 3.x + +## Setup + +### Creating a Virtual Environment + +1. Create a virtual environment: + +```sh +python3 -m venv venv +``` + +2. Activate the virtual environment: + +- On Linux and macOS: + +```sh +source venv/bin/activate +``` + +- On Windows: + +```sh +.\venv\Scripts\activate +``` + +### Installing Dependencies + +Install the required libraries using pip: + +```sh +pip install -r requirements.txt +``` + +## Configuration + +The configuration for the CKAN instances and actions is specified in a YAML file located at `./input/sites.yml`. If this file does not exist, you can use the provided `sites.example.yml` as a template. + +### Creating `sites.yml` + +1. Copy the `sites.example.yml` file to `sites.yml`: + +```sh +cp ./input/sites.example.yml ./input/sites.yml +``` + +2. Edit the `sites.yml` file to include your CKAN instance details and the actions you want to perform. Below is an example configuration: + +```yaml +# ./input/sites.yml + +default: + ckan_site_url: 'https://demo.ckan.dcat-ap-3.es' + api_token: 'your_api_token' + actions: + - update_packages + - export_groups + - export_organizations + override: + theme_es: "http://datos.gob.es/kos/sector-publico/sector/medio-ambiente" + target_values: + publisher_name: + condition: "Sample Company" + override: + publisher_type: "http://purl.org/adms/publishertype/Company" + +site_1: + ckan_site_url: 'https://site1.ckan.instance' + api_token: 'site1_api_token' + actions: + - update_packages + - export_groups + - export_organizations + override: + theme_es: "http://site1.theme.url" + target_values: + publisher_name: + condition: "Site 1 Company" + override: + publisher_type: "http://site1.publisher.type" + +site_2: + ckan_site_url: 'https://site2.ckan.instance' + api_token: null + actions: + - export_groups + - export_organizations + +site_3: + ckan_site_url: 'https://site2.ckan.instance' + api_token: 'site2_api_token' + actions: + - export_groups + - export_organizations + - create_organizations + - create_groups + - create_users + organizations: './input/site_3/organizations.json' + groups: './input/site_3/groups.json' + users: './input/site_3/users.json' + +``` + +## Usage + +To run the script, use the following command: + +```sh +python -m src -s +``` + +Replace `` with the site configuration you want to use (e.g., `default`, `site_1`, `site_2`). + +### Example + +To run the script using the `site_2` configuration: + +```sh +python -m src -s site_2 +``` + +### Command-line Arguments + +- `-s` or `--site`: The site configuration to load (default: `default`). +- `-c` or `--config`: The path to the YAML configuration file (default: `./input/sites.yml`). + +### Notes + +- Actions that modify data (e.g., `update_packages`, `create`, `delete`) require a non-null `api_token`. +- If only `GET` actions are specified (e.g., `export_groups`, `export_organizations`), the `api_token`, `override`, and `target_values` are not required. + +## Script Details + +The script performs the following actions based on the configuration: + +1. **Update Packages**: Updates the packages in the CKAN instance based on the specified overrides and target values. +2. **Export Groups**: Exports all groups from the CKAN instance to a JSON file (`groups.json`). +3. **Export Organizations**: Exports all organizations from the CKAN instance to a JSON file (`organizations.json`). + +### Functions + +#### Connect + +- `connect_to_ckan(ckan_site_url, api_token)`: Connects to the CKAN instance. + +#### Create + +- `create_organizations(rc, json_path)`: Creates new organizations in CKAN from a JSON file. +- `create_groups(rc, json_path)`: Creates new groups in CKAN from a JSON file. +- `create_users(rc, json_path)`: Creates new users in CKAN from a JSON file. + +#### Update + +- `update_package(rc, package_id, theme_es, publisher_name, publisher_type)`: Updates a package given its ID. + +#### Get + +- `export_groups_to_json(rc, file_path)`: Exports all groups to a JSON file. +- `export_organizations_to_json(rc, file_path)`: Exports all organizations to a JSON file. + + +### Example Output + +- `groups.json`: Contains detailed information about all groups. +- `organizations.json`: Contains detailed information about all organizations. + +## License + +This project is licensed under the MIT License. diff --git a/doc/scripts/ckanapi/input/sites.example.yml b/doc/scripts/ckanapi/.old/input/sites.example.yml similarity index 100% rename from doc/scripts/ckanapi/input/sites.example.yml rename to doc/scripts/ckanapi/.old/input/sites.example.yml diff --git a/doc/scripts/ckanapi/requirements.txt b/doc/scripts/ckanapi/.old/requirements.txt similarity index 100% rename from doc/scripts/ckanapi/requirements.txt rename to doc/scripts/ckanapi/.old/requirements.txt diff --git a/doc/scripts/ckanapi/src/__init__.py b/doc/scripts/ckanapi/.old/src/__init__.py similarity index 100% rename from doc/scripts/ckanapi/src/__init__.py rename to doc/scripts/ckanapi/.old/src/__init__.py diff --git a/doc/scripts/ckanapi/src/__main__.py b/doc/scripts/ckanapi/.old/src/__main__.py similarity index 100% rename from doc/scripts/ckanapi/src/__main__.py rename to doc/scripts/ckanapi/.old/src/__main__.py diff --git a/doc/scripts/ckanapi/src/config.py b/doc/scripts/ckanapi/.old/src/config.py similarity index 83% rename from doc/scripts/ckanapi/src/config.py rename to doc/scripts/ckanapi/.old/src/config.py index 8da44396..641a1b06 100644 --- a/doc/scripts/ckanapi/src/config.py +++ b/doc/scripts/ckanapi/.old/src/config.py @@ -17,6 +17,7 @@ class SampleConfig: groups (str): Path to the JSON file containing group data. users (str): Path to the JSON file containing user data. override (dict): Dictionary of override values. + delete (list): List of keys to delete. target_values (dict): Dictionary of target values. """ def __init__(self, config): @@ -27,7 +28,14 @@ def __init__(self, config): self.groups = config.get('groups') self.users = config.get('users') self.override = config.get('override', {}) + self.delete = config.get('delete', []) self.target_values = config.get('target_values', {}) + target_site_config = config.get('target_site', {}) + self.target_ckan_site_url = target_site_config.get('ckan_site_url') + self.target_api_token = target_site_config.get('api_token') + self.target_organization_id = target_site_config.get('target_organization_id') + self.delete_all_before_copy = target_site_config.get('delete_all_before_copy', False) # Nueva opción + def load_config(config_file, site='default'): """ @@ -72,6 +80,9 @@ def parse_args(): print(f"Actions: {config.actions}") if config.override: print(f"Override: {config.override}") + #FIXME: Not working, duplicate datasets. + # if config.delete: + # print(f"Delete fields: {config.delete}") if config.target_values: print(f"Target Values: {config.target_values}") if config.organizations: diff --git a/doc/scripts/ckanapi/src/logic/__init__.py b/doc/scripts/ckanapi/.old/src/logic/__init__.py similarity index 92% rename from doc/scripts/ckanapi/src/logic/__init__.py rename to doc/scripts/ckanapi/.old/src/logic/__init__.py index 4430246f..a8459b22 100644 --- a/doc/scripts/ckanapi/src/logic/__init__.py +++ b/doc/scripts/ckanapi/.old/src/logic/__init__.py @@ -39,12 +39,10 @@ def execute_action(action_name, rc, config, output_dir): # Iterate over each package and update it for package_id in package_list: - actions[action_name](rc, package_id, config.override, config.target_values) + actions[action_name](rc, package_id, config.override, config.delete, config.target_values) elif action_name == 'create_organizations': actions[action_name](rc, config.organizations) elif action_name == 'create_groups': actions[action_name](rc, config.groups) elif action_name == 'create_users': - actions[action_name](rc, config.users) - else: - actions[action_name](rc, output_dir) \ No newline at end of file + actions[action_name](rc, config.users) \ No newline at end of file diff --git a/doc/scripts/ckanapi/src/logic/create.py b/doc/scripts/ckanapi/.old/src/logic/create.py similarity index 96% rename from doc/scripts/ckanapi/src/logic/create.py rename to doc/scripts/ckanapi/.old/src/logic/create.py index 017e2250..cc760163 100644 --- a/doc/scripts/ckanapi/src/logic/create.py +++ b/doc/scripts/ckanapi/.old/src/logic/create.py @@ -1,5 +1,7 @@ import json import os +from ckanapi import RemoteCKAN, NotFound, ValidationError +import uuid def load_json(file_path): """ diff --git a/doc/scripts/ckanapi/src/logic/delete.py b/doc/scripts/ckanapi/.old/src/logic/delete.py similarity index 100% rename from doc/scripts/ckanapi/src/logic/delete.py rename to doc/scripts/ckanapi/.old/src/logic/delete.py diff --git a/doc/scripts/ckanapi/src/logic/get.py b/doc/scripts/ckanapi/.old/src/logic/get.py similarity index 100% rename from doc/scripts/ckanapi/src/logic/get.py rename to doc/scripts/ckanapi/.old/src/logic/get.py diff --git a/doc/scripts/ckanapi/src/logic/update.py b/doc/scripts/ckanapi/.old/src/logic/update.py similarity index 82% rename from doc/scripts/ckanapi/src/logic/update.py rename to doc/scripts/ckanapi/.old/src/logic/update.py index 0b3ceaed..2f263bbd 100644 --- a/doc/scripts/ckanapi/src/logic/update.py +++ b/doc/scripts/ckanapi/.old/src/logic/update.py @@ -31,7 +31,7 @@ def apply_target_values(package, target_values): package = apply_overrides(package, overrides) return package -def update_package(rc, package_id, overrides, target_values): +def update_package(rc, package_id, overrides, deletes, target_values=None): """ Update a package given its ID. @@ -39,8 +39,15 @@ def update_package(rc, package_id, overrides, target_values): rc (RemoteCKAN): The CKAN instance connection. package_id (str): The ID of the package. overrides (dict): The dictionary of fields to override. - target_values (dict): The dictionary of conditions and overrides. + deletes (list): The list of keys to delete. + target_values (dict, optional): The dictionary of conditions and overrides. Defaults to {}. + + Returns: + None """ + if target_values is None: + target_values = {} + package = rc.action.package_show(id=package_id) # Apply overrides diff --git a/doc/scripts/ckanapi/src/quick_ckanapi.py b/doc/scripts/ckanapi/.old/src/quick_ckanapi.py similarity index 70% rename from doc/scripts/ckanapi/src/quick_ckanapi.py rename to doc/scripts/ckanapi/.old/src/quick_ckanapi.py index 63e8c751..81cefe2a 100644 --- a/doc/scripts/ckanapi/src/quick_ckanapi.py +++ b/doc/scripts/ckanapi/.old/src/quick_ckanapi.py @@ -18,15 +18,15 @@ def connect_to_ckan(ckan_site_url, api_token): def is_modifying_action(action_name): """ - Determine if an action modifies data based on its name. + Determina si una acción modifica datos basándose en su nombre. Args: - action_name (str): The name of the action. + action_name (str): El nombre de la acción. Returns: - bool: True if the action modifies data, False otherwise. + bool: True si la acción modifica datos, False de lo contrario. """ - modifying_prefixes = ('update_', 'create_', 'delete_') + modifying_prefixes = ('update_', 'create_', 'delete_', 'copy_') return action_name.startswith(modifying_prefixes) def main(config): @@ -36,9 +36,6 @@ def main(config): Args: config (SampleConfig): The configuration object. """ - # Connect to the CKAN instance - rc = connect_to_ckan(config.ckan_site_url, config.api_token) - # Create output directories if they don't exist output_dir = os.path.join('output', config.site) os.makedirs(output_dir, exist_ok=True) @@ -51,7 +48,13 @@ def main(config): # Execute actions based on the configuration for action in config.actions: - execute_action(action, rc, config, output_dir) + if action == "copy_datasets": + # For copy_datasets, connections are handled within the function + execute_action(action, None, config, output_dir) + else: + # Connect to the CKAN instance + rc = connect_to_ckan(config.ckan_site_url, config.api_token) + execute_action(action, rc, config, output_dir) print("Actions completed.") diff --git a/doc/scripts/ckanapi/README.md b/doc/scripts/ckanapi/README.md index 0184a20f..281bb088 100644 --- a/doc/scripts/ckanapi/README.md +++ b/doc/scripts/ckanapi/README.md @@ -1,175 +1,342 @@ -# CKAN API Script +# ckanapi +A command line interface and Python module for accessing the [CKAN Action API](http://docs.ckan.org/en/latest/api/index.html#action-api-reference) -This script allows you to interact with a CKAN instance to perform various actions such as updating packages, exporting groups, and exporting organizations. The configuration for different CKAN instances and actions is specified in a YAML file. +>[!NOTE] +> `ckanapi`: https://github.com/ckan/ckanapi -## Prerequisites +## Installation -- Python 3.x +Installation with pip: +``` +pip install ckanapi +``` + +Installation with conda: +``` +conda install -c conda-forge ckanapi +``` + + +## ckanapi CLI + +The ckanapi command line interface lets you access local and +remote CKAN instances for bulk operations and simple API actions. + + +### Actions -## Setup +Simple actions with string parameters may be called directly. The +response is pretty-printed to STDOUT. -### Creating a Virtual Environment +used to adjust these limits. `CKANAPI_MY_SITES` (comma-delimited list of CKAN urls) +will not have the `PARALLEL_LIMIT` applied. -1. Create a virtual environment: +`dump` and `load` jobs may be resumed from the last completed +record or split across multiple servers by specifying record +start and max values. -```sh -python3 -m venv venv +#### 🔧 Dump datasets from CKAN into a local file with 4 processes + +``` +$ ckanapi dump datasets --all -O datasets.jsonl.gz -z -p 4 -r http://localhost ``` -2. Activate the virtual environment: +#### 🔧 Export datasets including private ones using search -- On Linux and macOS: +``` +$ ckanapi search datasets include_private=true -O datasets.jsonl.gz -z \ + -c /etc/ckan/production.ini -```sh -source venv/bin/activate +# Remote URL +$ ckanapi search datasets include_private=true -O datasets.jsonl.gz -z \ + -r http://myckan.org/catalog --apikey my_api_key ``` -- On Windows: +`search` is faster than `dump` because it calls `package_search` to retrieve +many records per call, paginating automatically. + +You may add parameters supported by `package_search` to filter the +records returned. + -```sh -.\venv\Scripts\activate +#### 🔧 Load/update datasets from a dataset JSON lines file with 3 processes + +``` +$ ckanapi load datasets -I datasets.jsonl.gz -z -p 3 -c /etc/ckan/production.ini + +# Remote URL +$ ckanapi load datasets -I datasets.jsonl.gz -z -p 3 -r http://myckan.org/catalog --apikey my_api_key ``` -### Installing Dependencies -Install the required libraries using pip: +### Bulk Delete + +Datasets, groups, organizations, users and related items may be deleted in +bulk with the delete command. This command accepts ids or names on the +command line or a number of different formats piped on standard input. -```sh -pip install -r requirements.txt + +```bash + ckanapi delete (datasets | groups | organizations | users | related) + (ID_OR_NAME ... | [-I JSONL_INPUT] [-s START] [-m MAX]) + [-p PROCESSES] [-l LOG_FILE] [-qwz] + [[-c CONFIG] [-u USER] | -r SITE_URL [-a APIKEY] [--insecure]] ``` -## Configuration +#### 🔧 All datasets (JSON list of "id" or "name" values) +``` +$ ckanapi action package_list -j | ckanapi delete datasets -The configuration for the CKAN instances and actions is specified in a YAML file located at `./input/sites.yml`. If this file does not exist, you can use the provided `sites.example.yml` as a template. +$ ckanapi action package_list -j -r http://ckan.source.org/catalog --apikey my_api_key | ckanapi delete datasets -r https://ckan.target.org -u ckan_admin -a target_api_key -### Creating `sites.yml` +``` -1. Copy the `sites.example.yml` file to `sites.yml`: +#### 🔧 Selective delete (JSON object with "results" list containing "id" values) +``` +$ ckanapi action package_search q=ponies | ckanapi delete datasets +``` -```sh -cp ./input/sites.example.yml ./input/sites.yml +#### 🔧 Processed JSON Lines (JSON objects with "id" or "name" value, one per line) ``` +$ ckanapi dump groups --all > groups.jsonl +$ grep ponies groups.jsonl | ckanapi delete groups -2. Edit the `sites.yml` file to include your CKAN instance details and the actions you want to perform. Below is an example configuration: -```yaml -# ./input/sites.yml +# Remote delete datasets +$ ckanapi action package_list -j -r http://ousrce.ckan.org/catalog --apikey my_api_key > default.jsonl +$ ckanapi delete datasets -I default.jsonl -p 3 -l log.log -u ckan_admin -r http://target.ckan.org/catalog --apikey my_api_key -default: - ckan_site_url: 'https://demo.ckan.dcat-ap-3.es' - api_token: 'your_api_token' - actions: - - update_packages - - export_groups - - export_organizations - override: - theme_es: "http://datos.gob.es/kos/sector-publico/sector/medio-ambiente" - target_values: - publisher_name: - condition: "Sample Company" - override: - publisher_type: "http://purl.org/adms/publishertype/Company" +``` -site_1: - ckan_site_url: 'https://site1.ckan.instance' - api_token: 'site1_api_token' - actions: - - update_packages - - export_groups - - export_organizations - override: - theme_es: "http://site1.theme.url" - target_values: - publisher_name: - condition: "Site 1 Company" - override: - publisher_type: "http://site1.publisher.type" +#### 🔧 Text list of "id" or "name" values (one per line) +``` +$ cat users_to_remove.txt +fred +bill +larry +$ ckanapi delete users < users_to_remove.txt -site_2: - ckan_site_url: 'https://site2.ckan.instance' - api_token: null - actions: - - export_groups - - export_organizations -site_3: - ckan_site_url: 'https://site2.ckan.instance' - api_token: 'site2_api_token' - actions: - - export_groups - - export_organizations - - create_organizations - - create_groups - - create_users - organizations: './input/site_3/organizations.json' - groups: './input/site_3/groups.json' - users: './input/site_3/users.json' +## Datasets +ckanapi delete users -r http://myckan.org/catalog --apikey my_api_key < output_pre.txt ``` -## Usage -To run the script, use the following command: +### Bulk Dataset and Resource Export - datapackage.json format + +Datasets may be exported to a simplified +[datapackage.json format](http://dataprotocols.org/data-packages/) +(which includes the actual resources, where available). -```sh -python -m src -s +If the resource url is not available, the resource will be included +in the datapackage.json file but the actual resource data will not be downloaded. + +``` +$ ckanapi dump datasets --all --datapackages=./output_directory/ -r http://sourceckan.example.com ``` -Replace `` with the site configuration you want to use (e.g., `default`, `site_1`, `site_2`). +### Batch Actions -### Example +Run a set of actions from a JSON lines file. For local actions this is much faster than running +`ckanapi action ...` in a shell loop because the local start-up time only happens once. -To run the script using the `site_2` configuration: +Batch actions can also be run in parallel with multiple processes and errors logged, just like the +dump and load commands. -```sh -python -m src -s site_2 +#### 🔧 Update a dataset field across a number of datasets +``` +$ cat update-emails.jsonl +{"action":"package_patch","data":{"id":"dataset-1","maintainer_email":"new@example.com"}} +{"action":"package_patch","data":{"id":"dataset-2","maintainer_email":"new@example.com"}} +{"action":"package_patch","data":{"id":"dataset-3","maintainer_email":"new@example.com"}} +$ ckanapi batch -I update-emails.jsonl ``` -### Command-line Arguments +#### 🔧 Replace a set of uploaded files +``` +$ cat upload-files.jsonl +{"action":"resource_patch","data":{"id":"408e1b1d-d0ca-50ca-9ae6-aedcee37aaa9"},"files":{"upload":"data1.csv"}} +{"action":"resource_patch","data":{"id":"c1eab17f-c2d0-536d-a3f6-41a3dfe6a2c3"},"files":{"upload":"data2.csv"}} +{"action":"resource_patch","data":{"id":"8ed068c2-4d4c-5f20-90db-39d2d596ce1a"},"files":{"upload":"data3.csv"}} +$ ckanapi batch -I upload-files.jsonl --local-files +``` -- `-s` or `--site`: The site configuration to load (default: `default`). -- `-c` or `--config`: The path to the YAML configuration file (default: `./input/sites.yml`). +The `"files"` values in the JSON lines file is ignored unless the `--local-files` parameter is passed. +Paths in the JSON lines file reference files on the local filesystems relative to the current working +directory. -### Notes +### Shell pipelines -- Actions that modify data (e.g., `update_packages`, `create`, `delete`) require a non-null `api_token`. -- If only `GET` actions are specified (e.g., `export_groups`, `export_organizations`), the `api_token`, `override`, and `target_values` are not required. +Simple shell pipelines are possible with the CLI. -## Script Details +#### 🔧 Copy the name of a dataset to its title with 'jq' +``` +$ ckanapi action package_show id=my-dataset \ + | jq '.+{"title":.name}' \ + | ckanapi action package_update -i +``` -The script performs the following actions based on the configuration: +#### 🔧 Mirror all datasets from one CKAN instance to another +``` +$ ckanapi dump datasets --all -q -r http://sourceckan.example.com \ + | ckanapi load datasets +``` -1. **Update Packages**: Updates the packages in the CKAN instance based on the specified overrides and target values. -2. **Export Groups**: Exports all groups from the CKAN instance to a JSON file (`groups.json`). -3. **Export Organizations**: Exports all organizations from the CKAN instance to a JSON file (`organizations.json`). +* `ValidationError` - field errors listed in `.error_dict` +* `SearchQueryError` - error reported from SOLR index +* `SearchError` +* `CKANAPIError` - incorrect use of ckanapi or unable to parse response +* `ServerIncompatibleError` - the remote API is not a CKAN API + +When using an action shortcut or the `call_action` method +failures are raised as exceptions just like when calling `get_action` from a +CKAN plugin: + +```python +from ckanapi import RemoteCKAN, NotAuthorized +ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' + +demo = RemoteCKAN('https://demo.ckan.org', apikey='phony-key', user_agent=ua) +try: + pkg = demo.action.package_create(name='my-dataset', title='not going to work') +except NotAuthorized: + print('denied') +``` -### Functions +When it is possible to `import ckan` all the ckanapi exception classes are +replaced with the CKAN exceptions with the same names. -#### Connect -- `connect_to_ckan(ckan_site_url, api_token)`: Connects to the CKAN instance. +### File Uploads -#### Create +File uploads for CKAN 2.2+ are supported by passing file-like objects to action +shortcut methods: -- `create_organizations(rc, json_path)`: Creates new organizations in CKAN from a JSON file. -- `create_groups(rc, json_path)`: Creates new groups in CKAN from a JSON file. -- `create_users(rc, json_path)`: Creates new users in CKAN from a JSON file. +```python +from ckanapi import RemoteCKAN +ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' -#### Update +mysite = RemoteCKAN('http://myckan.example.com', apikey='real-key', user_agent=ua) +mysite.action.resource_create( + package_id='my-dataset-with-files', + url='dummy-value', # ignored but required by CKAN<2.6 + upload=open('/path/to/file/to/upload.csv', 'rb')) +``` -- `update_package(rc, package_id, theme_es, publisher_name, publisher_type)`: Updates a package given its ID. +When using `call_action` you must pass file objects separately: -#### Get +```python +mysite.call_action('resource_create', + {'package_id': 'my-dataset-with-files'}, + files={'upload': open('/path/to/file/to/upload.csv', 'rb')}) +``` + + +### List all private datasets +```py +from ckanapi import RemoteCKAN, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError +import requests + +ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' + +# Configura tu instancia de CKAN y tu API key +ckan_instance = RemoteCKAN('http://myckan.example.com', apikey='api_key', user_agent=ua) + +try: + # Realiza la búsqueda de paquetes + result = ckan_instance.action.package_search( + q='*:*', + include_private=True, + include_drafts=True + ) + print(f"Datasets: {result['count']}") +except (ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError) as e: + print(f"Error: {e}") +``` + +### Session Control + +As of ckanapi 4.0 RemoteCKAN will keep your HTTP connection open using a +[requests session](http://docs.python-requests.org/en/master/user/advanced/). + +For long-running scripts make sure to close your connections by using +RemoteCKAN as a context manager: -- `export_groups_to_json(rc, file_path)`: Exports all groups to a JSON file. -- `export_organizations_to_json(rc, file_path)`: Exports all organizations to a JSON file. +```python +from ckanapi import RemoteCKAN +ua = 'ckanapiexample/1.0 (+http://example.com/my/website)' + +with RemoteCKAN('https://demo.ckan.org', user_agent=ua) as demo: + groups = demo.action.group_list(id='data-explorer') +print(groups) +``` + +Or by explicitly calling `RemoteCKAN.close()`. + +### LocalCKAN + +A similar class is provided for accessing local CKAN instances from a plugin in +the same way as remote CKAN instances. +Unlike [CKAN's get_action](http://docs.ckan.org/en/latest/extensions/plugins-toolkit.html?highlight=get_action#ckan.plugins.toolkit.get_action) +LocalCKAN prevents data from one action +call leaking into the next which can cause issues that are very hard do debug. + +This class defaults to using the site user with full access. + +```python +from ckanapi import LocalCKAN, ValidationError + +registry = LocalCKAN() +try: + registry.action.package_create(name='my-dataset', title='this will work fine') +except ValidationError: + print('unless my-dataset already exists') +``` + +For extra caution pass a blank username to LocalCKAN and only actions allowed +by anonymous users will be permitted. + +```python +from ckanapi import LocalCKAN + +anon = LocalCKAN(username='') +print(anon.action.status_show()) +``` + +#### Extra Loggging + +To enable extra info logging for the execution of LocalCKAN ckanapi commands, you can enable the config option in your CKAN INI file. + +``` +ckanapi.log_local = True +``` + +The output of the log will look like: + +``` +INFO [ckan.ckanapi] OS User executed LocalCKAN: ckanapi +``` + +### TestAppCKAN + +A class is provided for making action requests to a +[webtest.TestApp](http://webtest.readthedocs.org/en/latest/testapp.html) +instance for use in CKAN tests: + +```python +from ckanapi import TestAppCKAN +from webtest import TestApp + +test_app = TestApp(...) +demo = TestAppCKAN(test_app, apikey='my-test-key') +groups = demo.action.group_list(id='data-explorer') +``` -### Example Output +## Tests -- `groups.json`: Contains detailed information about all groups. -- `organizations.json`: Contains detailed information about all organizations. +To run the tests: -## License + python setup.py test -This project is licensed under the MIT License.