From aa4e6901767706f6c3bcdb01fe1fe4f4b9b7f46e Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Mon, 6 May 2024 12:26:28 +0000 Subject: [PATCH 01/15] add basic job runner --- .vscode/settings.json | 3 ++ requirements/fastapi_requirements.txt | 38 +++++++------- requirements/main.txt | 38 +++++++------- requirements/satosa_scim_requirements.txt | 38 +++++++------- requirements/test_requirements.txt | 51 +++++++++++-------- requirements/webapp_requirements.txt | 44 ++++++++-------- requirements/worker_requirements.in | 1 + requirements/worker_requirements.txt | 45 +++++++++------- src/eduid/dev-extra-modules.txt | 0 src/eduid/userdb/user_cleaner/db.py | 35 +++++++++++++ src/eduid/workers/job_runner/__init__.py | 0 src/eduid/workers/job_runner/app.py | 45 ++++++++++++++++ src/eduid/workers/job_runner/config.py | 28 ++++++++++ src/eduid/workers/job_runner/context.py | 30 +++++++++++ src/eduid/workers/job_runner/jobs/skv.py | 16 ++++++ src/eduid/workers/job_runner/run.py | 10 ++++ src/eduid/workers/job_runner/scheduler.py | 42 +++++++++++++++ src/eduid/workers/job_runner/status.py | 62 +++++++++++++++++++++++ 18 files changed, 412 insertions(+), 114 deletions(-) create mode 100755 src/eduid/dev-extra-modules.txt create mode 100644 src/eduid/userdb/user_cleaner/db.py create mode 100644 src/eduid/workers/job_runner/__init__.py create mode 100644 src/eduid/workers/job_runner/app.py create mode 100644 src/eduid/workers/job_runner/config.py create mode 100644 src/eduid/workers/job_runner/context.py create mode 100644 src/eduid/workers/job_runner/jobs/skv.py create mode 100644 src/eduid/workers/job_runner/run.py create mode 100644 src/eduid/workers/job_runner/scheduler.py create mode 100644 src/eduid/workers/job_runner/status.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 3f08348cb..a97b8628a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -76,4 +76,7 @@ "Werkzeug", "zxcvbn" ], + "mypy-type-checker.args": [ + "--ignore-missing-imports" + ], } \ No newline at end of file diff --git a/requirements/fastapi_requirements.txt b/requirements/fastapi_requirements.txt index 078e1ee67..7d15691ea 100644 --- a/requirements/fastapi_requirements.txt +++ b/requirements/fastapi_requirements.txt @@ -70,15 +70,15 @@ bleach==6.1.0 \ # via # -c main.txt # -r main.in -boto3==1.34.76 \ - --hash=sha256:530a4cea3d40a6bd2f15a368ea395beef1ea6dff4491823bc48bd20c7d4da655 \ - --hash=sha256:8c598382e8fb61cfa8f75056197e9b509eb52039ebc291af3b1096241ba2542c +boto3==1.34.81 \ + --hash=sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b \ + --hash=sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e # via # -c main.txt # -r main.in -botocore==1.34.76 \ - --hash=sha256:62e45e7374844ee39e86a96fe7f5e973eb5bf3469da028b4e3a8caba0909fb1f \ - --hash=sha256:68be44487a95132fccbc0b836fded4190dae30324f6bf822e1b6efd385ffdc83 +botocore==1.34.81 \ + --hash=sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01 \ + --hash=sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8 # via # -c main.txt # boto3 @@ -365,9 +365,9 @@ dnspython==2.6.1 \ # -c main.txt # email-validator # pymongo -ecdsa==0.18.0 \ - --hash=sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49 \ - --hash=sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd +ecdsa==0.19.0 \ + --hash=sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a \ + --hash=sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8 # via # -c main.txt # python-jose @@ -661,6 +661,7 @@ lxml==5.2.1 \ --hash=sha256:b070bbe8d3f0f6147689bed981d19bbb33070225373338df755a46893528104a \ --hash=sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c \ --hash=sha256:b560e3aa4b1d49e0e6c847d72665384db35b2f5d45f8e6a5c0072e0283430533 \ + --hash=sha256:b6241d4eee5f89453307c2f2bfa03b50362052ca0af1efecf9fef9a41a22bb4f \ --hash=sha256:b6787b643356111dfd4032b5bffe26d2f8331556ecb79e15dacb9275da02866e \ --hash=sha256:bcbf4af004f98793a95355980764b3d80d47117678118a44a80b721c9913436a \ --hash=sha256:beb72935a941965c52990f3a32d7f07ce869fe21c6af8b34bf6a277b33a345d3 \ @@ -847,9 +848,9 @@ packaging==24.0 \ pbkdf2==1.3 \ --hash=sha256:ac6397369f128212c43064a2b4878038dab78dab41875364554aaf2a684e6979 # via ndnkdf -phonenumbers==8.13.33 \ - --hash=sha256:991f2619f0593b36b674c345af47944ec4bae526b353cf53d707e662087be63b \ - --hash=sha256:f2d653268ece55a4f3752d9cda4be6f7465f298e6d028d522aedda13cf057201 +phonenumbers==8.13.34 \ + --hash=sha256:7c2676be07b7d0f74411e275e0660380a0ec3ee0d359f070d719424bd2c5f62e \ + --hash=sha256:bc0bb5d3bab29e28549194f6bf57cb3ca03c3dd84238af12674fe24031631bda # via # -c main.txt # -r main.in @@ -1292,9 +1293,9 @@ pyparsing==3.1.2 \ # via # -c main.txt # httplib2 -pypdf==4.1.0 \ - --hash=sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc \ - --hash=sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89 +pypdf==4.2.0 \ + --hash=sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1 \ + --hash=sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1 # via # -c main.txt # xhtml2pdf @@ -1636,9 +1637,9 @@ typeguard==3.0.2 \ # via # -c main.txt # marshmallow-dataclass -typing-extensions==4.10.0 \ - --hash=sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475 \ - --hash=sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a # via # -c main.txt # anyio @@ -1647,6 +1648,7 @@ typing-extensions==4.10.0 \ # marshmallow-dataclass # pydantic # pydantic-core + # pypdf # qrcode # typeguard # typing-inspect diff --git a/requirements/main.txt b/requirements/main.txt index de8eb77a5..16e4021fe 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -49,13 +49,13 @@ bleach==6.1.0 \ --hash=sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe \ --hash=sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6 # via -r main.in -boto3==1.34.76 \ - --hash=sha256:530a4cea3d40a6bd2f15a368ea395beef1ea6dff4491823bc48bd20c7d4da655 \ - --hash=sha256:8c598382e8fb61cfa8f75056197e9b509eb52039ebc291af3b1096241ba2542c +boto3==1.34.81 \ + --hash=sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b \ + --hash=sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e # via -r main.in -botocore==1.34.76 \ - --hash=sha256:62e45e7374844ee39e86a96fe7f5e973eb5bf3469da028b4e3a8caba0909fb1f \ - --hash=sha256:68be44487a95132fccbc0b836fded4190dae30324f6bf822e1b6efd385ffdc83 +botocore==1.34.81 \ + --hash=sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01 \ + --hash=sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8 # via # boto3 # s3transfer @@ -312,9 +312,9 @@ dnspython==2.6.1 \ # via # email-validator # pymongo -ecdsa==0.18.0 \ - --hash=sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49 \ - --hash=sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd +ecdsa==0.19.0 \ + --hash=sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a \ + --hash=sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8 # via python-jose elementpath==4.4.0 \ --hash=sha256:cda092281afe508ece1bf65373905b30196c9426f3730cfea46059e103a131bd \ @@ -522,6 +522,7 @@ lxml==5.2.1 \ --hash=sha256:b070bbe8d3f0f6147689bed981d19bbb33070225373338df755a46893528104a \ --hash=sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c \ --hash=sha256:b560e3aa4b1d49e0e6c847d72665384db35b2f5d45f8e6a5c0072e0283430533 \ + --hash=sha256:b6241d4eee5f89453307c2f2bfa03b50362052ca0af1efecf9fef9a41a22bb4f \ --hash=sha256:b6787b643356111dfd4032b5bffe26d2f8331556ecb79e15dacb9275da02866e \ --hash=sha256:bcbf4af004f98793a95355980764b3d80d47117678118a44a80b721c9913436a \ --hash=sha256:beb72935a941965c52990f3a32d7f07ce869fe21c6af8b34bf6a277b33a345d3 \ @@ -676,9 +677,9 @@ packaging==24.0 \ # via # gunicorn # marshmallow -phonenumbers==8.13.33 \ - --hash=sha256:991f2619f0593b36b674c345af47944ec4bae526b353cf53d707e662087be63b \ - --hash=sha256:f2d653268ece55a4f3752d9cda4be6f7465f298e6d028d522aedda13cf057201 +phonenumbers==8.13.34 \ + --hash=sha256:7c2676be07b7d0f74411e275e0660380a0ec3ee0d359f070d719424bd2c5f62e \ + --hash=sha256:bc0bb5d3bab29e28549194f6bf57cb3ca03c3dd84238af12674fe24031631bda # via -r main.in pillow==10.3.0 \ --hash=sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c \ @@ -1048,9 +1049,9 @@ pyparsing==3.1.2 \ --hash=sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad \ --hash=sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742 # via httplib2 -pypdf==4.1.0 \ - --hash=sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc \ - --hash=sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89 +pypdf==4.2.0 \ + --hash=sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1 \ + --hash=sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1 # via xhtml2pdf pypng==0.20220715.0 \ --hash=sha256:4a43e969b8f5aaafb2a415536c1a8ec7e341cd6a3f957fd5b5f32a4cfeed902c \ @@ -1334,15 +1335,16 @@ typeguard==3.0.2 \ --hash=sha256:bbe993854385284ab42fd5bd3bee6f6556577ce8b50696d6cb956d704f286c8e \ --hash=sha256:fee5297fdb28f8e9efcb8142b5ee219e02375509cd77ea9d270b5af826358d5a # via marshmallow-dataclass -typing-extensions==4.10.0 \ - --hash=sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475 \ - --hash=sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a # via # anyio # jwcrypto # marshmallow-dataclass # pydantic # pydantic-core + # pypdf # qrcode # typeguard # typing-inspect diff --git a/requirements/satosa_scim_requirements.txt b/requirements/satosa_scim_requirements.txt index 9a2cd30f2..97f256407 100644 --- a/requirements/satosa_scim_requirements.txt +++ b/requirements/satosa_scim_requirements.txt @@ -68,15 +68,15 @@ bleach==6.1.0 \ # via # -c main.txt # -r main.in -boto3==1.34.76 \ - --hash=sha256:530a4cea3d40a6bd2f15a368ea395beef1ea6dff4491823bc48bd20c7d4da655 \ - --hash=sha256:8c598382e8fb61cfa8f75056197e9b509eb52039ebc291af3b1096241ba2542c +boto3==1.34.81 \ + --hash=sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b \ + --hash=sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e # via # -c main.txt # -r main.in -botocore==1.34.76 \ - --hash=sha256:62e45e7374844ee39e86a96fe7f5e973eb5bf3469da028b4e3a8caba0909fb1f \ - --hash=sha256:68be44487a95132fccbc0b836fded4190dae30324f6bf822e1b6efd385ffdc83 +botocore==1.34.81 \ + --hash=sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01 \ + --hash=sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8 # via # -c main.txt # boto3 @@ -364,9 +364,9 @@ dnspython==2.6.1 \ # -c main.txt # email-validator # pymongo -ecdsa==0.18.0 \ - --hash=sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49 \ - --hash=sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd +ecdsa==0.19.0 \ + --hash=sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a \ + --hash=sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8 # via # -c main.txt # python-jose @@ -614,6 +614,7 @@ lxml==5.2.1 \ --hash=sha256:b070bbe8d3f0f6147689bed981d19bbb33070225373338df755a46893528104a \ --hash=sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c \ --hash=sha256:b560e3aa4b1d49e0e6c847d72665384db35b2f5d45f8e6a5c0072e0283430533 \ + --hash=sha256:b6241d4eee5f89453307c2f2bfa03b50362052ca0af1efecf9fef9a41a22bb4f \ --hash=sha256:b6787b643356111dfd4032b5bffe26d2f8331556ecb79e15dacb9275da02866e \ --hash=sha256:bcbf4af004f98793a95355980764b3d80d47117678118a44a80b721c9913436a \ --hash=sha256:beb72935a941965c52990f3a32d7f07ce869fe21c6af8b34bf6a277b33a345d3 \ @@ -791,9 +792,9 @@ packaging==24.0 \ # -c main.txt # gunicorn # marshmallow -phonenumbers==8.13.33 \ - --hash=sha256:991f2619f0593b36b674c345af47944ec4bae526b353cf53d707e662087be63b \ - --hash=sha256:f2d653268ece55a4f3752d9cda4be6f7465f298e6d028d522aedda13cf057201 +phonenumbers==8.13.34 \ + --hash=sha256:7c2676be07b7d0f74411e275e0660380a0ec3ee0d359f070d719424bd2c5f62e \ + --hash=sha256:bc0bb5d3bab29e28549194f6bf57cb3ca03c3dd84238af12674fe24031631bda # via # -c main.txt # -r main.in @@ -1204,9 +1205,9 @@ pyparsing==3.1.2 \ # via # -c main.txt # httplib2 -pypdf==4.1.0 \ - --hash=sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc \ - --hash=sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89 +pypdf==4.2.0 \ + --hash=sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1 \ + --hash=sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1 # via # -c main.txt # xhtml2pdf @@ -1541,9 +1542,9 @@ typeguard==3.0.2 \ # via # -c main.txt # marshmallow-dataclass -typing-extensions==4.10.0 \ - --hash=sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475 \ - --hash=sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a # via # -c main.txt # anyio @@ -1551,6 +1552,7 @@ typing-extensions==4.10.0 \ # marshmallow-dataclass # pydantic # pydantic-core + # pypdf # qrcode # typeguard # typing-inspect diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt index 6ba0c3440..ef69af923 100644 --- a/requirements/test_requirements.txt +++ b/requirements/test_requirements.txt @@ -32,6 +32,10 @@ anyio==4.3.0 \ # httpx # starlette # watchfiles +apscheduler==3.10.4 \ + --hash=sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a \ + --hash=sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661 + # via -r worker_requirements.in arabic-reshaper==3.0.0 \ --hash=sha256:3f71d5034bb694204a239a6f1ebcf323ac3c5b059de02259235e2016a1a5e2dc \ --hash=sha256:ffcd13ba5ec007db71c072f5b23f420da92ac7f268512065d49e790e62237099 @@ -106,15 +110,15 @@ blinker==1.7.0 \ # flask # flask-mail # raven -boto3==1.34.76 \ - --hash=sha256:530a4cea3d40a6bd2f15a368ea395beef1ea6dff4491823bc48bd20c7d4da655 \ - --hash=sha256:8c598382e8fb61cfa8f75056197e9b509eb52039ebc291af3b1096241ba2542c +boto3==1.34.81 \ + --hash=sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b \ + --hash=sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e # via # -c main.txt # -r main.in -botocore==1.34.76 \ - --hash=sha256:62e45e7374844ee39e86a96fe7f5e973eb5bf3469da028b4e3a8caba0909fb1f \ - --hash=sha256:68be44487a95132fccbc0b836fded4190dae30324f6bf822e1b6efd385ffdc83 +botocore==1.34.81 \ + --hash=sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01 \ + --hash=sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8 # via # -c main.txt # boto3 @@ -473,9 +477,9 @@ dnspython==2.6.1 \ # -c main.txt # email-validator # pymongo -ecdsa==0.18.0 \ - --hash=sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49 \ - --hash=sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd +ecdsa==0.19.0 \ + --hash=sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a \ + --hash=sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8 # via # -c main.txt # python-jose @@ -516,9 +520,9 @@ fido2==1.1.3 \ # -c main.txt # -r main.in # fido-mds -flask==3.0.2 \ - --hash=sha256:3232e0e9c850d781933cf0207523d1ece087eb8d87b23777ae38456e2fbe7c6e \ - --hash=sha256:822c03f4b799204250a7ee84b1eddc40665395333973dfb9deebfe425fefcb7d +flask==3.0.3 \ + --hash=sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3 \ + --hash=sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842 # via # -r webapp_requirements.in # flask-babel @@ -811,6 +815,7 @@ lxml==5.2.1 \ --hash=sha256:b070bbe8d3f0f6147689bed981d19bbb33070225373338df755a46893528104a \ --hash=sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c \ --hash=sha256:b560e3aa4b1d49e0e6c847d72665384db35b2f5d45f8e6a5c0072e0283430533 \ + --hash=sha256:b6241d4eee5f89453307c2f2bfa03b50362052ca0af1efecf9fef9a41a22bb4f \ --hash=sha256:b6787b643356111dfd4032b5bffe26d2f8331556ecb79e15dacb9275da02866e \ --hash=sha256:bcbf4af004f98793a95355980764b3d80d47117678118a44a80b721c9913436a \ --hash=sha256:beb72935a941965c52990f3a32d7f07ce869fe21c6af8b34bf6a277b33a345d3 \ @@ -1046,9 +1051,9 @@ pathspec==0.12.1 \ pbkdf2==1.3 \ --hash=sha256:ac6397369f128212c43064a2b4878038dab78dab41875364554aaf2a684e6979 # via ndnkdf -phonenumbers==8.13.33 \ - --hash=sha256:991f2619f0593b36b674c345af47944ec4bae526b353cf53d707e662087be63b \ - --hash=sha256:f2d653268ece55a4f3752d9cda4be6f7465f298e6d028d522aedda13cf057201 +phonenumbers==8.13.34 \ + --hash=sha256:7c2676be07b7d0f74411e275e0660380a0ec3ee0d359f070d719424bd2c5f62e \ + --hash=sha256:bc0bb5d3bab29e28549194f6bf57cb3ca03c3dd84238af12674fe24031631bda # via # -c main.txt # -r main.in @@ -1511,9 +1516,9 @@ pyparsing==3.1.2 \ # via # -c main.txt # httplib2 -pypdf==4.1.0 \ - --hash=sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc \ - --hash=sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89 +pypdf==4.2.0 \ + --hash=sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1 \ + --hash=sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1 # via # -c main.txt # xhtml2pdf @@ -1599,6 +1604,7 @@ pytz==2024.1 \ --hash=sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 # via # -c main.txt + # apscheduler # flask-babel # neo4j # pysaml2 @@ -1830,6 +1836,7 @@ six==1.16.0 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # via # -c main.txt + # apscheduler # bleach # ecdsa # html5lib @@ -1894,9 +1901,9 @@ typeguard==3.0.2 \ # via # -c main.txt # marshmallow-dataclass -typing-extensions==4.10.0 \ - --hash=sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475 \ - --hash=sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a # via # -c main.txt # anyio @@ -1908,6 +1915,7 @@ typing-extensions==4.10.0 \ # mypy # pydantic # pydantic-core + # pypdf # qrcode # typeguard # typing-inspect @@ -1929,6 +1937,7 @@ tzlocal==5.2 \ --hash=sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e # via # -c main.txt + # apscheduler # pyhanko ua-parser==0.18.0 \ --hash=sha256:9d94ac3a80bcb0166823956a779186c746b50ea4c9fd9bf30fdb758553c38950 \ diff --git a/requirements/webapp_requirements.txt b/requirements/webapp_requirements.txt index 1a8451e97..e6f9e2b5e 100644 --- a/requirements/webapp_requirements.txt +++ b/requirements/webapp_requirements.txt @@ -80,15 +80,15 @@ blinker==1.7.0 \ # flask # flask-mail # raven -boto3==1.34.76 \ - --hash=sha256:530a4cea3d40a6bd2f15a368ea395beef1ea6dff4491823bc48bd20c7d4da655 \ - --hash=sha256:8c598382e8fb61cfa8f75056197e9b509eb52039ebc291af3b1096241ba2542c +boto3==1.34.81 \ + --hash=sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b \ + --hash=sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e # via # -c main.txt # -r main.in -botocore==1.34.76 \ - --hash=sha256:62e45e7374844ee39e86a96fe7f5e973eb5bf3469da028b4e3a8caba0909fb1f \ - --hash=sha256:68be44487a95132fccbc0b836fded4190dae30324f6bf822e1b6efd385ffdc83 +botocore==1.34.81 \ + --hash=sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01 \ + --hash=sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8 # via # -c main.txt # boto3 @@ -376,9 +376,9 @@ dnspython==2.6.1 \ # -c main.txt # email-validator # pymongo -ecdsa==0.18.0 \ - --hash=sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49 \ - --hash=sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd +ecdsa==0.19.0 \ + --hash=sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a \ + --hash=sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8 # via # -c main.txt # python-jose @@ -414,9 +414,9 @@ fido2==1.1.3 \ # -c main.txt # -r main.in # fido-mds -flask==3.0.2 \ - --hash=sha256:3232e0e9c850d781933cf0207523d1ece087eb8d87b23777ae38456e2fbe7c6e \ - --hash=sha256:822c03f4b799204250a7ee84b1eddc40665395333973dfb9deebfe425fefcb7d +flask==3.0.3 \ + --hash=sha256:34e815dfaa43340d1d15a5c3a02b8476004037eb4840b34910c6e21679d288f3 \ + --hash=sha256:ceb27b0af3823ea2737928a4d99d125a06175b8512c445cbd9a9ce200ef76842 # via # -r webapp_requirements.in # flask-babel @@ -655,6 +655,7 @@ lxml==5.2.1 \ --hash=sha256:b070bbe8d3f0f6147689bed981d19bbb33070225373338df755a46893528104a \ --hash=sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c \ --hash=sha256:b560e3aa4b1d49e0e6c847d72665384db35b2f5d45f8e6a5c0072e0283430533 \ + --hash=sha256:b6241d4eee5f89453307c2f2bfa03b50362052ca0af1efecf9fef9a41a22bb4f \ --hash=sha256:b6787b643356111dfd4032b5bffe26d2f8331556ecb79e15dacb9275da02866e \ --hash=sha256:bcbf4af004f98793a95355980764b3d80d47117678118a44a80b721c9913436a \ --hash=sha256:beb72935a941965c52990f3a32d7f07ce869fe21c6af8b34bf6a277b33a345d3 \ @@ -832,9 +833,9 @@ packaging==24.0 \ # -c main.txt # gunicorn # marshmallow -phonenumbers==8.13.33 \ - --hash=sha256:991f2619f0593b36b674c345af47944ec4bae526b353cf53d707e662087be63b \ - --hash=sha256:f2d653268ece55a4f3752d9cda4be6f7465f298e6d028d522aedda13cf057201 +phonenumbers==8.13.34 \ + --hash=sha256:7c2676be07b7d0f74411e275e0660380a0ec3ee0d359f070d719424bd2c5f62e \ + --hash=sha256:bc0bb5d3bab29e28549194f6bf57cb3ca03c3dd84238af12674fe24031631bda # via # -c main.txt # -r main.in @@ -1240,9 +1241,9 @@ pyparsing==3.1.2 \ # via # -c main.txt # httplib2 -pypdf==4.1.0 \ - --hash=sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc \ - --hash=sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89 +pypdf==4.2.0 \ + --hash=sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1 \ + --hash=sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1 # via # -c main.txt # xhtml2pdf @@ -1575,9 +1576,9 @@ typeguard==3.0.2 \ # via # -c main.txt # marshmallow-dataclass -typing-extensions==4.10.0 \ - --hash=sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475 \ - --hash=sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a # via # -c main.txt # anyio @@ -1585,6 +1586,7 @@ typing-extensions==4.10.0 \ # marshmallow-dataclass # pydantic # pydantic-core + # pypdf # qrcode # typeguard # typing-inspect diff --git a/requirements/worker_requirements.in b/requirements/worker_requirements.in index 86ce6b370..d291d8433 100644 --- a/requirements/worker_requirements.in +++ b/requirements/worker_requirements.in @@ -1,3 +1,4 @@ -r main.in -c main.txt jinja2 +apscheduler diff --git a/requirements/worker_requirements.txt b/requirements/worker_requirements.txt index e078aa1e1..20887e490 100644 --- a/requirements/worker_requirements.txt +++ b/requirements/worker_requirements.txt @@ -30,6 +30,10 @@ anyio==4.3.0 \ # via # -c main.txt # httpx +apscheduler==3.10.4 \ + --hash=sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a \ + --hash=sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661 + # via -r worker_requirements.in arabic-reshaper==3.0.0 \ --hash=sha256:3f71d5034bb694204a239a6f1ebcf323ac3c5b059de02259235e2016a1a5e2dc \ --hash=sha256:ffcd13ba5ec007db71c072f5b23f420da92ac7f268512065d49e790e62237099 @@ -68,15 +72,15 @@ bleach==6.1.0 \ # via # -c main.txt # -r main.in -boto3==1.34.76 \ - --hash=sha256:530a4cea3d40a6bd2f15a368ea395beef1ea6dff4491823bc48bd20c7d4da655 \ - --hash=sha256:8c598382e8fb61cfa8f75056197e9b509eb52039ebc291af3b1096241ba2542c +boto3==1.34.81 \ + --hash=sha256:004dad209d37b3d2df88f41da13b7ad702a751904a335fac095897ff7a19f82b \ + --hash=sha256:18224d206a8a775bcaa562d22ed3d07854934699190e12b52fcde87aac76a80e # via # -c main.txt # -r main.in -botocore==1.34.76 \ - --hash=sha256:62e45e7374844ee39e86a96fe7f5e973eb5bf3469da028b4e3a8caba0909fb1f \ - --hash=sha256:68be44487a95132fccbc0b836fded4190dae30324f6bf822e1b6efd385ffdc83 +botocore==1.34.81 \ + --hash=sha256:85f6fd7c5715eeef7a236c50947de00f57d72e7439daed1125491014b70fab01 \ + --hash=sha256:f79bf122566cc1f09d71cc9ac9fcf52d47ba48b761cbc3f064017b36a3c40eb8 # via # -c main.txt # boto3 @@ -358,9 +362,9 @@ dnspython==2.6.1 \ # -c main.txt # email-validator # pymongo -ecdsa==0.18.0 \ - --hash=sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49 \ - --hash=sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd +ecdsa==0.19.0 \ + --hash=sha256:2cea9b88407fdac7bbeca0833b189e4c9c53f2ef1e1eaa29f6224dbc809b707a \ + --hash=sha256:60eaad1199659900dd0af521ed462b793bbdf867432b3948e87416ae4caf6bf8 # via # -c main.txt # python-jose @@ -611,6 +615,7 @@ lxml==5.2.1 \ --hash=sha256:b070bbe8d3f0f6147689bed981d19bbb33070225373338df755a46893528104a \ --hash=sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c \ --hash=sha256:b560e3aa4b1d49e0e6c847d72665384db35b2f5d45f8e6a5c0072e0283430533 \ + --hash=sha256:b6241d4eee5f89453307c2f2bfa03b50362052ca0af1efecf9fef9a41a22bb4f \ --hash=sha256:b6787b643356111dfd4032b5bffe26d2f8331556ecb79e15dacb9275da02866e \ --hash=sha256:bcbf4af004f98793a95355980764b3d80d47117678118a44a80b721c9913436a \ --hash=sha256:beb72935a941965c52990f3a32d7f07ce869fe21c6af8b34bf6a277b33a345d3 \ @@ -787,9 +792,9 @@ packaging==24.0 \ # -c main.txt # gunicorn # marshmallow -phonenumbers==8.13.33 \ - --hash=sha256:991f2619f0593b36b674c345af47944ec4bae526b353cf53d707e662087be63b \ - --hash=sha256:f2d653268ece55a4f3752d9cda4be6f7465f298e6d028d522aedda13cf057201 +phonenumbers==8.13.34 \ + --hash=sha256:7c2676be07b7d0f74411e275e0660380a0ec3ee0d359f070d719424bd2c5f62e \ + --hash=sha256:bc0bb5d3bab29e28549194f6bf57cb3ca03c3dd84238af12674fe24031631bda # via # -c main.txt # -r main.in @@ -1194,9 +1199,9 @@ pyparsing==3.1.2 \ # via # -c main.txt # httplib2 -pypdf==4.1.0 \ - --hash=sha256:01c3257ec908676efd60a4537e525b89d48e0852bc92b4e0aa4cc646feda17cc \ - --hash=sha256:16cac912a05200099cef3f347c4c7e0aaf0a6d027603b8f9a973c0ea500dff89 +pypdf==4.2.0 \ + --hash=sha256:dc035581664e0ad717e3492acebc1a5fc23dba759e788e3d4a9fc9b1a32e72c1 \ + --hash=sha256:fe63f3f7d1dcda1c9374421a94c1bba6c6f8c4a62173a59b64ffd52058f846b1 # via # -c main.txt # xhtml2pdf @@ -1256,6 +1261,7 @@ pytz==2024.1 \ --hash=sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319 # via # -c main.txt + # apscheduler # neo4j # pysaml2 pyxmlsecurity[PKCS11,pkcs11]==1.0.0 \ @@ -1471,6 +1477,7 @@ six==1.16.0 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 # via # -c main.txt + # apscheduler # bleach # ecdsa # html5lib @@ -1524,9 +1531,9 @@ typeguard==3.0.2 \ # via # -c main.txt # marshmallow-dataclass -typing-extensions==4.10.0 \ - --hash=sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475 \ - --hash=sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a # via # -c main.txt # anyio @@ -1534,6 +1541,7 @@ typing-extensions==4.10.0 \ # marshmallow-dataclass # pydantic # pydantic-core + # pypdf # qrcode # typeguard # typing-inspect @@ -1554,6 +1562,7 @@ tzlocal==5.2 \ --hash=sha256:8d399205578f1a9342816409cc1e46a93ebd5755e39ea2d85334bea911bf0e6e # via # -c main.txt + # apscheduler # pyhanko uritools==4.0.2 \ --hash=sha256:04df2b787d0eb76200e8319382a03562fbfe4741fd66c15506b08d3b8211d573 \ diff --git a/src/eduid/dev-extra-modules.txt b/src/eduid/dev-extra-modules.txt new file mode 100755 index 000000000..e69de29bb diff --git a/src/eduid/userdb/user_cleaner/db.py b/src/eduid/userdb/user_cleaner/db.py new file mode 100644 index 000000000..a30a24d6c --- /dev/null +++ b/src/eduid/userdb/user_cleaner/db.py @@ -0,0 +1,35 @@ +from datetime import datetime +from enum import Enum + +from eduid.userdb.db.base import TUserDbDocument +from eduid.userdb.user import User +from eduid.userdb.userdb import UserDB + + +class CleanerType(str, Enum): + SKV = "Skatteverket" + + +class CleanerQueueUser(User): + """ + User version to bookkeep cleaning actions. + eppn + cleaner_type + next_check_ts + """ + + cleaner_type: CleanerType + + +class CleanerQueueDB(UserDB[CleanerQueueUser]): + def __init__(self, db_uri: str, db_name: str = "eduid_user_cleaner", collection: str = "cleaner_queue"): + super().__init__(db_uri, db_name) + + indexes = { + "eppn-index-v1": {"key": [("eduPersonPrincipalName", 1)], "unique": True}, + } + self.setup_indexes(indexes) + + @classmethod + def user_from_dict(cls, data: TUserDbDocument) -> CleanerQueueUser: + return super().user_from_dict(data) diff --git a/src/eduid/workers/job_runner/__init__.py b/src/eduid/workers/job_runner/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/eduid/workers/job_runner/app.py b/src/eduid/workers/job_runner/app.py new file mode 100644 index 000000000..07c6e60c3 --- /dev/null +++ b/src/eduid/workers/job_runner/app.py @@ -0,0 +1,45 @@ +from contextlib import asynccontextmanager +from typing import Callable, Optional + +from fastapi import FastAPI + +from eduid.common.config.parsers import load_config +from eduid.workers.job_runner.config import JobRunnerConfig +from eduid.workers.job_runner.context import Context +from eduid.workers.job_runner.scheduler import JobScheduler +from eduid.workers.job_runner.status import status_router + + +class JobRunner(FastAPI): + scheduler: JobScheduler = JobScheduler(timezone="UTC") + + def __init__( + self, name: str = "job_runner", test_config: Optional[dict] = None, lifespan: Optional[Callable] = None + ): + self.config = load_config(typ=JobRunnerConfig, app_name=name, ns="worker", test_config=test_config) + super().__init__(root_path=self.config.application_root, lifespan=lifespan) + + self.context = Context(config=self.config) + self.context.logger.info(f"Starting {name} worker: {self.context.worker_name}") + + +@asynccontextmanager +async def lifespan(app: JobRunner): + app.context.logger.info("Starting scheduler...") + app.scheduler.start() + yield + app.context.logger.info("Stopping scheduler...") + app.scheduler.shutdown() + + +def init_app(name: str = "job_runner", test_config: Optional[dict] = None) -> JobRunner: + app = JobRunner(name, test_config, lifespan=lifespan) + app.context.logger.info(app.config) + + app.include_router(status_router) + + # schedule jobs defined in config + app.scheduler.schedule_jobs(app.context) + + app.context.logger.info("app running...") + return app diff --git a/src/eduid/workers/job_runner/config.py b/src/eduid/workers/job_runner/config.py new file mode 100644 index 000000000..d99e13f7b --- /dev/null +++ b/src/eduid/workers/job_runner/config.py @@ -0,0 +1,28 @@ +import logging + +from pydantic import field_validator + +from eduid.common.config.base import LoggingConfigMixin, RootConfig, StatsConfigMixin +from eduid.common.utils import removesuffix + +logger = logging.getLogger(__name__) + + +class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin): + """ + Configuration for the user-cleaner service. + """ + + application_root: str = "" + log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" + mongo_uri: str = "" + status_cache_seconds: int = 10 + jobs: dict = {} + + @field_validator("application_root") + @classmethod + def application_root_must_not_end_with_slash(cls, v: str): + if v.endswith("/"): + logger.warning(f"application_root should not end with slash ({v})") + v = removesuffix(v, "/") + return v diff --git a/src/eduid/workers/job_runner/context.py b/src/eduid/workers/job_runner/context.py new file mode 100644 index 000000000..ea284f7c3 --- /dev/null +++ b/src/eduid/workers/job_runner/context.py @@ -0,0 +1,30 @@ +import logging +from os import environ + +from eduid.common.fastapi.log import init_logging +from eduid.userdb.user_cleaner.db import CleanerQueueDB +from eduid.userdb.userdb import AmDB +from eduid.workers.job_runner.config import JobRunnerConfig + + +class Context: + def __init__(self, config: JobRunnerConfig): + self.name = config.app_name + self.config = config + + worker_name = environ.get("WORKER_NAME", None) + if worker_name is None: + raise RuntimeError("Environment variable WORKER_NAME needs to be set") + self.worker_name = worker_name + + # Setup logging + init_logging(self.name, self.config) + self.logger = logging.getLogger("user_cleaner") + self.logger.info("Logging initialized") + + # Setup databases + self.db = AmDB(db_uri=self.config.mongo_uri) + self.logger.info(f"Database {self.db} initialized") + + self.cleaner_queue = CleanerQueueDB(db_uri=self.config.mongo_uri) + self.logger.info(f"Database {self.cleaner_queue} initialized") diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py new file mode 100644 index 000000000..103801422 --- /dev/null +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -0,0 +1,16 @@ +from eduid.workers.job_runner.context import Context + + +def gather_skv_users(context: Context): + """ " + Gather and queue all users that should be checked against SKV API:s + + """ + context.logger.debug(f"gathering users to check") + + +def check_skv_users(context: Context): + """ + Check all users that should be checked against SKV API:s + """ + context.logger.debug(f"checking users") diff --git a/src/eduid/workers/job_runner/run.py b/src/eduid/workers/job_runner/run.py new file mode 100644 index 000000000..d5c2a6980 --- /dev/null +++ b/src/eduid/workers/job_runner/run.py @@ -0,0 +1,10 @@ +import os +from sys import stderr + +from eduid.workers.job_runner.app import init_app + +DEBUG = os.environ.get("EDUID_APP_DEBUG", False) +if DEBUG: + stderr.writelines("----- WARNING! EDUID_APP_DEBUG is enabled -----\n") + +app = init_app() diff --git a/src/eduid/workers/job_runner/scheduler.py b/src/eduid/workers/job_runner/scheduler.py new file mode 100644 index 000000000..9600ff945 --- /dev/null +++ b/src/eduid/workers/job_runner/scheduler.py @@ -0,0 +1,42 @@ +from apscheduler.schedulers.asyncio import AsyncIOScheduler + +from eduid.workers.job_runner.context import Context +from eduid.workers.job_runner.jobs.skv import check_skv_users, gather_skv_users + + +class JobScheduler(AsyncIOScheduler): + + def schedule_jobs(self, context: Context): + """ + Schedule all jobs configured for host or environment + """ + + environment = context.config.environment + + jobs_config = context.config.jobs + + jobs: dict = {} + + if environment in jobs_config: + context.logger.info(f"Setting up jobs for environment {environment}") + context.logger.info(f"Setting up jobs {jobs_config[environment]}") + jobs.update(jobs_config[environment]) + + if context.worker_name in jobs_config: + context.logger.info(f"Setting up jobs for worker {context.worker_name}") + context.logger.info(f"Setting up jobs {jobs_config[context.worker_name]}") + jobs.update(jobs_config[context.worker_name]) + + context.logger.info(f"Setting up jobs {jobs} for {context.worker_name} running {environment}") + + for job in jobs: + params = jobs[job] + context.logger.info(f"Setting up job {job} with parameters {params}") + + match job: + case "gather_skv_users": + self.add_job(gather_skv_users, "cron", **params, args=(context,)) + case "check_skv_users": + self.add_job(check_skv_users, "cron", **params, args=(context,)) + case _: + BaseException("unknown job in config") diff --git a/src/eduid/workers/job_runner/status.py b/src/eduid/workers/job_runner/status.py new file mode 100644 index 000000000..1249ae7e9 --- /dev/null +++ b/src/eduid/workers/job_runner/status.py @@ -0,0 +1,62 @@ +from os import environ +from typing import Mapping + +from fastapi import APIRouter, Request, Response +from pydantic import BaseModel + +from eduid.common.fastapi.context_request import ContextRequest, ContextRequestRoute +from eduid.common.fastapi.utils import ( + check_restart, + get_cached_response, + log_failure_info, + reset_failure_info, + set_cached_response, +) + +status_router = APIRouter(route_class=ContextRequestRoute, prefix="/status") + + +class StatusResponse(BaseModel): + status: str + hostname: str + reason: str + + +def check_mongo(request: ContextRequest): + db = request.app.context.db + try: + db.is_healthy() + reset_failure_info(request, "_check_mongo") + return True + except Exception as exc: + log_failure_info(request, "_check_mongo", msg="Mongodb health check failed", exc=exc) + check_restart("_check_mongo", restart=0, terminate=120) + return False + + +def check_scheduler(request: ContextRequest): + scheduler = request.app.scheduler + return scheduler.running + + +@status_router.get("/healthy", response_model=StatusResponse) +async def healthy(request: ContextRequest, response: Response) -> Mapping: + status = get_cached_response(request, response, key="health_check") + if not status: + status = { + "status": f"STATUS_FAIL_{request.app.context.name}_", + "hostname": environ.get("HOSTNAME", "UNKNOWN"), + } + reasons = [] + if not check_mongo(request): + reasons.append("mongodb check failed") + request.app.context.logger.warning("MongoDB health check failed") + elif not check_scheduler(request): + reasons.append("scheduler check failed") + request.app.context.logger.warning("APScheduler health check failed") + else: + status["status"] = f"STATUS_OK_{request.app.context.name}_" + reasons.append("mongodb check succeeded") + status["reason"] = ", ".join(reasons) + set_cached_response(request, response, key="health_check", data=status) + return status From 70ce68ae151a5f5d0e249300c4279e50c7e397f7 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Wed, 29 May 2024 14:38:46 +0000 Subject: [PATCH 02/15] check status of persons check status of persons with nin against navet and terminate deregistered persons --- .../clients/amapi_client/amapi_client.py | 6 +-- src/eduid/common/clients/gnap_client/base.py | 5 +- src/eduid/common/models/amapi_user.py | 1 + src/eduid/common/rpc/msg_relay.py | 9 +++- src/eduid/common/rpc/tests/test_msg_relay.py | 11 ++++- src/eduid/userdb/user_cleaner/db.py | 30 +++++++++--- src/eduid/userdb/userdb.py | 37 +++++++------- src/eduid/workers/amapi/routers/users.py | 2 +- src/eduid/workers/job_runner/config.py | 15 ++++-- src/eduid/workers/job_runner/context.py | 14 ++++++ src/eduid/workers/job_runner/jobs/skv.py | 48 +++++++++++++++++++ src/eduid/workers/msg/tasks.py | 15 ++++-- 12 files changed, 150 insertions(+), 43 deletions(-) diff --git a/src/eduid/common/clients/amapi_client/amapi_client.py b/src/eduid/common/clients/amapi_client/amapi_client.py index 6b022b33d..bcf6a1b6f 100644 --- a/src/eduid/common/clients/amapi_client/amapi_client.py +++ b/src/eduid/common/clients/amapi_client/amapi_client.py @@ -20,15 +20,15 @@ class AMAPIClient(GNAPClient): - def __init__(self, amapi_url: str, app, auth_data=GNAPClientAuthData, **kwargs): - super().__init__(auth_data=auth_data, app=app, **kwargs) + def __init__(self, amapi_url: str, auth_data=GNAPClientAuthData, verify_tls: bool = True, **kwargs): + super().__init__(auth_data=auth_data, verify=verify_tls, **kwargs) self.amapi_url = amapi_url def _users_base_url(self) -> str: return urlappend(self.amapi_url, "users") def _put(self, base_path: str, user: str, endpoint: str, body: Any) -> httpx.Response: - return self.put(urlappend(base_path, f"{user}/{endpoint}"), json=body.json()) + return self.put(url=urlappend(base_path, f"{user}/{endpoint}"), content=body.json()) def update_user_email(self, user: str, body: UserUpdateEmailRequest) -> UserUpdateResponse: ret = self._put(base_path=self._users_base_url(), user=user, endpoint="email", body=body) diff --git a/src/eduid/common/clients/gnap_client/base.py b/src/eduid/common/clients/gnap_client/base.py index eb408bdff..c91bb8558 100644 --- a/src/eduid/common/clients/gnap_client/base.py +++ b/src/eduid/common/clients/gnap_client/base.py @@ -35,7 +35,8 @@ class GNAPClientException(Exception): class GNAPClientAuthData(BaseModel): - authn_server_url: str + auth_server_url: str + auth_server_verify: bool = True key_name: str client_jwk: ClientJWK access: list[Union[str, Access]] = Field(default_factory=list) @@ -49,7 +50,7 @@ class GNAPBearerTokenMixin(ABC): @property def transaction_uri(self) -> str: - return urlappend(self._auth_data.authn_server_url, "transaction") + return urlappend(self._auth_data.auth_server_url, "transaction") def _create_grant_request_jws(self) -> str: req = GrantRequest( diff --git a/src/eduid/common/models/amapi_user.py b/src/eduid/common/models/amapi_user.py index ee7f7e78e..2f8284fe2 100644 --- a/src/eduid/common/models/amapi_user.py +++ b/src/eduid/common/models/amapi_user.py @@ -13,6 +13,7 @@ class Reason(str, Enum): USER_DECEASED = "user_deceased" + USER_DEREGISTERED = "user_deregistered" NAME_CHANGED = "name_changed" CAREGIVER_CHANGED = "caregiver_changed" READ_USER = "read_user" diff --git a/src/eduid/common/rpc/msg_relay.py b/src/eduid/common/rpc/msg_relay.py index 5fd900898..53b1d9ff7 100644 --- a/src/eduid/common/rpc/msg_relay.py +++ b/src/eduid/common/rpc/msg_relay.py @@ -99,9 +99,14 @@ class DeregisteredCauseCode(str, Enum): DECEASED = "AV" EMIGRATED = "UV" OLD_NIN = "GN" - OTHER_REASON = "AN" - TECHNICALLY_DEREGISTERED = "TA" + OLD_COORDINATION_NUMBER = "GS" + # From 2006-09-20 MISSING = "OB" + TECHNICALLY_DEREGISTERED = "TA" + ANNULLED_COORDINATION_NUMBER = "AS" + # Before 2006-09-20 + OTHER_REASON = "AN" + # From 2018-07-01 FALSE_IDENTITY = "FI" diff --git a/src/eduid/common/rpc/tests/test_msg_relay.py b/src/eduid/common/rpc/tests/test_msg_relay.py index 8c1c820fa..48a1c1e48 100644 --- a/src/eduid/common/rpc/tests/test_msg_relay.py +++ b/src/eduid/common/rpc/tests/test_msg_relay.py @@ -7,7 +7,7 @@ from eduid.common.config.base import CeleryConfig, MsgConfigMixin from eduid.common.config.workers import MsgConfig from eduid.common.rpc.exceptions import NoAddressFound, NoNavetData -from eduid.common.rpc.msg_relay import FullPostalAddress, MsgRelay, NavetData, RelationType +from eduid.common.rpc.msg_relay import DeregisteredCauseCode, FullPostalAddress, MsgRelay, NavetData, RelationType from eduid.workers.msg import MsgCelerySingleton from eduid.workers.msg.tasks import MessageSender @@ -39,6 +39,15 @@ def test_get_all_navet_data(self, mock_get_all_navet_data: MagicMock): res = self.msg_relay.get_all_navet_data(nin="190102031234") assert res == NavetData(**self.message_sender.get_devel_all_navet_data()) + @patch("eduid.workers.msg.tasks.get_all_navet_data.apply_async") + def test_get_all_navet_data_deceased(self, mock_get_all_navet_data: MagicMock): + mock_conf = {"get.return_value": self.message_sender.get_devel_all_navet_data(identity_number="189001019802")} + ret = Mock(**mock_conf) + mock_get_all_navet_data.return_value = ret + res = self.msg_relay.get_all_navet_data(nin="189001019802", allow_deregistered=True) + assert res.person.deregistration_information.cause_code == DeregisteredCauseCode.DECEASED + assert res == NavetData(**self.message_sender.get_devel_all_navet_data(identity_number="189001019802")) + @patch("eduid.workers.msg.tasks.get_all_navet_data.apply_async") def test_get_all_navet_data_none_response(self, mock_get_all_navet_data: MagicMock): mock_conf = {"get.return_value": None} diff --git a/src/eduid/userdb/user_cleaner/db.py b/src/eduid/userdb/user_cleaner/db.py index a30a24d6c..243b2661d 100644 --- a/src/eduid/userdb/user_cleaner/db.py +++ b/src/eduid/userdb/user_cleaner/db.py @@ -1,13 +1,17 @@ +import logging from datetime import datetime from enum import Enum +from typing import Optional + +import pymongo from eduid.userdb.db.base import TUserDbDocument +from eduid.userdb.identity import IdentityType +from eduid.userdb.meta import CleanerType from eduid.userdb.user import User -from eduid.userdb.userdb import UserDB - +from eduid.userdb.userdb import UserDB, UserVar -class CleanerType(str, Enum): - SKV = "Skatteverket" +logger = logging.getLogger(__name__) class CleanerQueueUser(User): @@ -23,7 +27,7 @@ class CleanerQueueUser(User): class CleanerQueueDB(UserDB[CleanerQueueUser]): def __init__(self, db_uri: str, db_name: str = "eduid_user_cleaner", collection: str = "cleaner_queue"): - super().__init__(db_uri, db_name) + super().__init__(db_uri, db_name, collection) indexes = { "eppn-index-v1": {"key": [("eduPersonPrincipalName", 1)], "unique": True}, @@ -32,4 +36,18 @@ def __init__(self, db_uri: str, db_name: str = "eduid_user_cleaner", collection: @classmethod def user_from_dict(cls, data: TUserDbDocument) -> CleanerQueueUser: - return super().user_from_dict(data) + return CleanerQueueUser.from_dict(data) + + def get_next_user(self) -> Optional[CleanerQueueUser]: + docs = self._get_documents_by_aggregate( + match={"cleaner_type": CleanerType.SKV}, sort={"meta.created_ts": pymongo.DESCENDING}, limit=1 + ) + logger.debug(f"Found {len(docs)} documents") + if len(docs) == 0: + return None + else: + doc = docs[0] + logger.debug(f"Found document with id {doc['_id']}, removing it from queue") + user = self.user_from_dict(doc) + self.remove_document(doc["_id"]) + return user diff --git a/src/eduid/userdb/userdb.py b/src/eduid/userdb/userdb.py index d68f96179..a235a866f 100644 --- a/src/eduid/userdb/userdb.py +++ b/src/eduid/userdb/userdb.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from typing import Any, Generic, Mapping, Optional, TypeVar, Union +import pymongo from bson import ObjectId from bson.errors import InvalidId from pymongo import ReturnDocument @@ -19,7 +20,7 @@ UserOutOfSync, ) from eduid.userdb.identity import IdentityType -from eduid.userdb.meta import CleanerType, Meta +from eduid.userdb.meta import Meta from eduid.userdb.user import User logger = logging.getLogger(__name__) @@ -96,26 +97,6 @@ def get_user_by_id(self, user_id: Union[str, ObjectId]) -> Optional[UserVar]: return None return self._get_user_by_attr("_id", user_id) - def _get_users_by_aggregate(self, match: dict[str, Any], sort: dict[str, Any], limit: int) -> list[UserVar]: - users = self._get_documents_by_aggregate(match=match, sort=sort, limit=limit) - return self._users_from_documents(users) - - def get_uncleaned_verified_users( - self, cleaned_type: CleanerType, identity_type: IdentityType, limit: int - ) -> list[UserVar]: - match = { - "identities": { - "$elemMatch": { - "verified": True, - "identity_type": identity_type.value, - } - } - } - - type_filter = f"meta.cleaned.{cleaned_type.value}" - sort = {type_filter: 1} - return self._get_users_by_aggregate(match=match, sort=sort, limit=limit) - def get_verified_users_count(self, identity_type: Optional[IdentityType] = None) -> int: spec: dict[str, Any] spec = { @@ -369,6 +350,20 @@ def save(self, user: User) -> UserSaveResult: return UserSaveResult(success=bool(result)) + def get_unterminated_users_with_nin(self) -> list[User]: + match = { + "identities": { + "$elemMatch": { + "verified": True, + "identity_type": IdentityType.NIN.value, + } + }, + "terminated": {"$exists": False}, + } + + users = self._get_documents_by_aggregate(match=match) + return self._users_from_documents(users) + def unverify_mail_aliases(self, user_id: ObjectId, mail_aliases: Optional[list[dict[str, Any]]]) -> int: count = 0 if mail_aliases is None: diff --git a/src/eduid/workers/amapi/routers/users.py b/src/eduid/workers/amapi/routers/users.py index 81a42c454..a3dd96f89 100644 --- a/src/eduid/workers/amapi/routers/users.py +++ b/src/eduid/workers/amapi/routers/users.py @@ -57,5 +57,5 @@ async def on_put_meta_cleaned(req: ContextRequest, data: UserUpdateMetaCleanedRe @users_router.put("/{eppn}/terminate", response_model=UserUpdateResponse) async def on_terminate_user(req: ContextRequest, data: UserUpdateTerminateRequest, eppn: str): - req.app.context.logger.info(f"Terminate user {eppn} email") + req.app.context.logger.info(f"Terminate user {eppn}") return update_user(req=req, eppn=eppn, data=data) diff --git a/src/eduid/workers/job_runner/config.py b/src/eduid/workers/job_runner/config.py index d99e13f7b..585917451 100644 --- a/src/eduid/workers/job_runner/config.py +++ b/src/eduid/workers/job_runner/config.py @@ -1,14 +1,20 @@ import logging -from pydantic import field_validator +from pydantic import BaseModel, field_validator -from eduid.common.config.base import LoggingConfigMixin, RootConfig, StatsConfigMixin +from eduid.common.clients.gnap_client.base import GNAPClientAuthData +from eduid.common.config.base import LoggingConfigMixin, MsgConfigMixin, RootConfig, StatsConfigMixin from eduid.common.utils import removesuffix logger = logging.getLogger(__name__) -class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin): +class AmAPIConfig(BaseModel): + url: str + tls_verify: bool = True + + +class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfigMixin): """ Configuration for the user-cleaner service. """ @@ -19,6 +25,9 @@ class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin): status_cache_seconds: int = 10 jobs: dict = {} + gnap_auth_data: GNAPClientAuthData + amapi: AmAPIConfig + @field_validator("application_root") @classmethod def application_root_must_not_end_with_slash(cls, v: str): diff --git a/src/eduid/workers/job_runner/context.py b/src/eduid/workers/job_runner/context.py index ea284f7c3..184629b44 100644 --- a/src/eduid/workers/job_runner/context.py +++ b/src/eduid/workers/job_runner/context.py @@ -1,7 +1,9 @@ import logging from os import environ +from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.common.fastapi.log import init_logging +from eduid.common.rpc.msg_relay import MsgRelay from eduid.userdb.user_cleaner.db import CleanerQueueDB from eduid.userdb.userdb import AmDB from eduid.workers.job_runner.config import JobRunnerConfig @@ -28,3 +30,15 @@ def __init__(self, config: JobRunnerConfig): self.cleaner_queue = CleanerQueueDB(db_uri=self.config.mongo_uri) self.logger.info(f"Database {self.cleaner_queue} initialized") + + # Setup MsgRelay + self.msg_relay = MsgRelay(self.config) + self.logger.info(f"MsgRelay {self.msg_relay} initialized") + + # Setup amapi client + self.amapi_client = AMAPIClient( + amapi_url=self.config.amapi.url, + auth_data=self.config.gnap_auth_data, + verify_tls=self.config.amapi.tls_verify, + ) + self.logger.info(f"AMAPIClient {self.amapi_client} initialized") diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index 103801422..5bbcbd1e0 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -1,3 +1,9 @@ +from eduid.common.models.amapi_user import Reason, Source, UserUpdateResponse, UserUpdateTerminateRequest +from eduid.common.rpc.msg_relay import DeregisteredCauseCode, NavetData +from eduid.userdb.exceptions import UserDoesNotExist +from eduid.userdb.meta import CleanerType +from eduid.userdb.user import User +from eduid.userdb.user_cleaner.db import CleanerQueueUser from eduid.workers.job_runner.context import Context @@ -7,6 +13,17 @@ def gather_skv_users(context: Context): """ context.logger.debug(f"gathering users to check") + users: list[User] = context.db.get_unterminated_users_with_nin() + context.logger.debug(f"gathered {len(users)} users to check") + for user in users: + try: + context.cleaner_queue.get_user_by_eppn(user.eppn) + context.logger.debug(f"{user.eppn} already in queue") + except UserDoesNotExist: + queue_user: CleanerQueueUser = CleanerQueueUser( + eppn=user.eppn, cleaner_type=CleanerType.SKV, identities=user.identities + ) + context.cleaner_queue.save(queue_user) def check_skv_users(context: Context): @@ -14,3 +31,34 @@ def check_skv_users(context: Context): Check all users that should be checked against SKV API:s """ context.logger.debug(f"checking users") + user = context.cleaner_queue.get_next_user() + if user is not None: + context.logger.debug(f"Checking if user with eppn {user.eppn} should be terminated") + assert user.identities.nin is not None # Please mypy + navet_data: NavetData = context.msg_relay.get_all_navet_data( + nin=user.identities.nin.number, allow_deregistered=True + ) + context.logger.debug(f"Navet data: {navet_data}") + + if navet_data.person.is_deregistered(): + cause = navet_data.person.deregistration_information.cause_code + if cause is DeregisteredCauseCode.EMIGRATED: + context.logger.debug(f"User with eppn {user.eppn} has emigrated and should not be terminated") + else: + context.logger.debug(f"User with eppn {user.eppn} should be terminated") + reason = Reason.USER_DECEASED if cause == DeregisteredCauseCode.DECEASED else Reason.USER_DEREGISTERED + terminate_user(context, user.eppn, reason) + else: + context.logger.debug(f"User with eppn {user.eppn} is still registered") + + else: + context.logger.debug(f"Nothing to do") + + +def terminate_user(context: Context, eppn: str, reason: Reason): + """ + Terminate a user + """ + request_body: UserUpdateTerminateRequest = UserUpdateTerminateRequest(reason=reason, source=Source.SKV_NAVET_V2) + response: UserUpdateResponse = context.amapi_client.update_user_terminate(user=eppn, body=request_body) + context.logger.debug(f"Terminate user response: {response}") diff --git a/src/eduid/workers/msg/tasks.py b/src/eduid/workers/msg/tasks.py index 7571bcf5f..f83c4dec9 100644 --- a/src/eduid/workers/msg/tasks.py +++ b/src/eduid/workers/msg/tasks.py @@ -234,23 +234,30 @@ def get_devel_relations() -> OrderedDict[str, Any]: def get_all_navet_data(self, identity_number: str) -> Optional[OrderedDict[str, Any]]: # Only log the message if devel_mode is enabled if MsgCelerySingleton.worker_config.devel_mode is True: - return self.get_devel_all_navet_data() + return self.get_devel_all_navet_data(identity_number) data = self._get_navet_data(identity_number) return navet_get_all_data(data) @staticmethod - def get_devel_all_navet_data() -> OrderedDict[str, Any]: + def get_devel_all_navet_data(identity_number: str = "190102031234") -> OrderedDict[str, Any]: """ Return a dict with devel data + Birthdates preceding 1900 are shown as deceased for testing purposes """ + + deregistration_information = {} + birth_year = int(identity_number[0:4]) + if birth_year < 1900: + deregistration_information = {"date": "20220315", "causeCode": "AV"} + result = OrderedDict( { "CaseInformation": {"lastChanged": "20170904141659"}, "Person": { - "PersonId": {"NationalIdentityNumber": "197609272393"}, + "PersonId": {"NationalIdentityNumber": identity_number}, "ReferenceNationalIdentityNumber": "", - "DeregistrationInformation": {}, + "DeregistrationInformation": deregistration_information, "Name": {"GivenNameMarking": "20", "GivenName": "Testaren Test", "Surname": "Testsson"}, "PostalAddresses": { "OfficialAddress": {"Address2": "ÖRGATAN 79 LGH 10", "PostalCode": "12345", "City": "LANDET"} From a24fad0ee9c5a189e9e1f7057fe3ac614b1a35b8 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 30 May 2024 09:15:17 +0000 Subject: [PATCH 03/15] add tests --- src/eduid/userdb/user_cleaner/db.py | 6 +-- src/eduid/workers/job_runner/jobs/skv.py | 2 +- src/eduid/workers/job_runner/testing.py | 49 ++++++++++++++++++ .../job_runner/tests/test_user_cleaner.py | 51 +++++++++++++++++++ 4 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 src/eduid/workers/job_runner/testing.py create mode 100644 src/eduid/workers/job_runner/tests/test_user_cleaner.py diff --git a/src/eduid/userdb/user_cleaner/db.py b/src/eduid/userdb/user_cleaner/db.py index 243b2661d..4d0334552 100644 --- a/src/eduid/userdb/user_cleaner/db.py +++ b/src/eduid/userdb/user_cleaner/db.py @@ -19,7 +19,6 @@ class CleanerQueueUser(User): User version to bookkeep cleaning actions. eppn cleaner_type - next_check_ts """ cleaner_type: CleanerType @@ -31,6 +30,7 @@ def __init__(self, db_uri: str, db_name: str = "eduid_user_cleaner", collection: indexes = { "eppn-index-v1": {"key": [("eduPersonPrincipalName", 1)], "unique": True}, + "creation-index-v1": {"key": [("meta.created_ts", 1)], "unique": False}, } self.setup_indexes(indexes) @@ -38,9 +38,9 @@ def __init__(self, db_uri: str, db_name: str = "eduid_user_cleaner", collection: def user_from_dict(cls, data: TUserDbDocument) -> CleanerQueueUser: return CleanerQueueUser.from_dict(data) - def get_next_user(self) -> Optional[CleanerQueueUser]: + def get_next_user(self, cleaner_type: CleanerType) -> Optional[CleanerQueueUser]: docs = self._get_documents_by_aggregate( - match={"cleaner_type": CleanerType.SKV}, sort={"meta.created_ts": pymongo.DESCENDING}, limit=1 + match={"cleaner_type": cleaner_type}, sort={"meta.created_ts": pymongo.ASCENDING}, limit=1 ) logger.debug(f"Found {len(docs)} documents") if len(docs) == 0: diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index 5bbcbd1e0..0b87339eb 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -31,7 +31,7 @@ def check_skv_users(context: Context): Check all users that should be checked against SKV API:s """ context.logger.debug(f"checking users") - user = context.cleaner_queue.get_next_user() + user = context.cleaner_queue.get_next_user(CleanerType.SKV) if user is not None: context.logger.debug(f"Checking if user with eppn {user.eppn} should be terminated") assert user.identities.nin is not None # Please mypy diff --git a/src/eduid/workers/job_runner/testing.py b/src/eduid/workers/job_runner/testing.py new file mode 100644 index 000000000..818898256 --- /dev/null +++ b/src/eduid/workers/job_runner/testing.py @@ -0,0 +1,49 @@ +import os +import unittest +from typing import Any + +import pkg_resources +from jwcrypto.jwk import JWK + +from eduid.common.config.parsers import load_config +from eduid.userdb.testing import MongoTemporaryInstance +from eduid.userdb.user_cleaner.db import CleanerQueueDB +from eduid.workers.job_runner.config import JobRunnerConfig +from eduid.workers.job_runner.context import Context + + +class BaseDBTestCase(unittest.TestCase): + """ + Base test case that sets up a temporary database for testing. + """ + + mongodb_instance: MongoTemporaryInstance + mongo_uri: str + + @classmethod + def setUpClass(cls): + cls.mongodb_instance = MongoTemporaryInstance.get_instance() + cls.mongo_uri = cls.mongodb_instance.uri + + +class CleanerQueueTestCase(BaseDBTestCase): + """ + Base class for tests of the cleaner queue. + """ + + @classmethod + def setUpClass(cls) -> None: + return super().setUpClass() + + def setUp(self) -> None: + if "EDUID_CONFIG_YAML" not in os.environ: + os.environ["EDUID_CONFIG_YAML"] = "YAML_CONFIG_NOT_USED" + + self.datadir = pkg_resources.resource_filename(__name__, "tests/data") + + self.cleaner_queue_db = CleanerQueueDB(db_uri=self.mongo_uri) + + def tearDown(self) -> None: + super().tearDown() + if self.cleaner_queue_db: + self.cleaner_queue_db._drop_whole_collection() diff --git a/src/eduid/workers/job_runner/tests/test_user_cleaner.py b/src/eduid/workers/job_runner/tests/test_user_cleaner.py new file mode 100644 index 000000000..7cb4abf4b --- /dev/null +++ b/src/eduid/workers/job_runner/tests/test_user_cleaner.py @@ -0,0 +1,51 @@ +import time + +from eduid.userdb.fixtures.users import UserFixtures +from eduid.userdb.meta import CleanerType +from eduid.userdb.user_cleaner.db import CleanerQueueUser +from eduid.workers.job_runner.testing import CleanerQueueTestCase + + +class TestCleanerQueueDB(CleanerQueueTestCase): + users = UserFixtures() + + def setUp(self): + super().setUp() + + def test_queue_order(self): + first = self.users.mocked_user_standard + second = self.users.mocked_user_standard_2 + first_user: CleanerQueueUser = CleanerQueueUser( + eppn=first.eppn, cleaner_type=CleanerType.SKV, identities=first.identities + ) + self.cleaner_queue_db.save(first_user) + second_user: CleanerQueueUser = CleanerQueueUser( + eppn=second.eppn, cleaner_type=CleanerType.SKV, identities=second.identities + ) + self.cleaner_queue_db.save(second_user) + + first_user_from_db = self.cleaner_queue_db.get_next_user(CleanerType.SKV) + second_user_from_db = self.cleaner_queue_db.get_next_user(CleanerType.SKV) + + self.assertEqual(first_user_from_db.eppn, first.eppn) + self.assertEqual(second_user_from_db.eppn, second_user.eppn) + + def test_mixed_queue(self): + first = self.users.mocked_user_standard + second = self.users.mocked_user_standard_2 + ladok_queue_user: CleanerQueueUser = CleanerQueueUser( + eppn=first.eppn, cleaner_type=CleanerType.LADOK, identities=first.identities + ) + self.cleaner_queue_db.save(ladok_queue_user) + skv_queue_user: CleanerQueueUser = CleanerQueueUser( + eppn=second.eppn, cleaner_type=CleanerType.SKV, identities=second.identities + ) + self.cleaner_queue_db.save(skv_queue_user) + + first_user_from_db = self.cleaner_queue_db.get_next_user(CleanerType.SKV) + second_user_from_db = self.cleaner_queue_db.get_next_user(CleanerType.SKV) + third_user_from_db = self.cleaner_queue_db.get_next_user(CleanerType.LADOK) + + self.assertEqual(first_user_from_db.eppn, skv_queue_user.eppn) + self.assertIsNone(second_user_from_db) + self.assertEqual(third_user_from_db.eppn, ladok_queue_user.eppn) From c17d279113f24bda4d285061029e1dd62b0d37da Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 30 May 2024 11:23:16 +0000 Subject: [PATCH 04/15] don't rename class variables --- src/eduid/common/clients/gnap_client/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/eduid/common/clients/gnap_client/base.py b/src/eduid/common/clients/gnap_client/base.py index c91bb8558..b9863fa63 100644 --- a/src/eduid/common/clients/gnap_client/base.py +++ b/src/eduid/common/clients/gnap_client/base.py @@ -35,8 +35,8 @@ class GNAPClientException(Exception): class GNAPClientAuthData(BaseModel): - auth_server_url: str - auth_server_verify: bool = True + authn_server_url: str + authn_server_verify: bool = True key_name: str client_jwk: ClientJWK access: list[Union[str, Access]] = Field(default_factory=list) @@ -50,7 +50,7 @@ class GNAPBearerTokenMixin(ABC): @property def transaction_uri(self) -> str: - return urlappend(self._auth_data.auth_server_url, "transaction") + return urlappend(self._auth_data.authn_server_url, "transaction") def _create_grant_request_jws(self) -> str: req = GrantRequest( From 916fba4344f89ed53fb50b68b5cecd121b1902e0 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 30 May 2024 11:51:34 +0000 Subject: [PATCH 05/15] fix a few code smells --- src/eduid/workers/job_runner/jobs/skv.py | 6 +++--- src/eduid/workers/job_runner/scheduler.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index 0b87339eb..e61e45712 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -12,7 +12,7 @@ def gather_skv_users(context: Context): Gather and queue all users that should be checked against SKV API:s """ - context.logger.debug(f"gathering users to check") + context.logger.debug("gathering users to check") users: list[User] = context.db.get_unterminated_users_with_nin() context.logger.debug(f"gathered {len(users)} users to check") for user in users: @@ -30,7 +30,7 @@ def check_skv_users(context: Context): """ Check all users that should be checked against SKV API:s """ - context.logger.debug(f"checking users") + context.logger.debug("checking users") user = context.cleaner_queue.get_next_user(CleanerType.SKV) if user is not None: context.logger.debug(f"Checking if user with eppn {user.eppn} should be terminated") @@ -52,7 +52,7 @@ def check_skv_users(context: Context): context.logger.debug(f"User with eppn {user.eppn} is still registered") else: - context.logger.debug(f"Nothing to do") + context.logger.debug("Nothing to do") def terminate_user(context: Context, eppn: str, reason: Reason): diff --git a/src/eduid/workers/job_runner/scheduler.py b/src/eduid/workers/job_runner/scheduler.py index 9600ff945..5dbac0d03 100644 --- a/src/eduid/workers/job_runner/scheduler.py +++ b/src/eduid/workers/job_runner/scheduler.py @@ -39,4 +39,4 @@ def schedule_jobs(self, context: Context): case "check_skv_users": self.add_job(check_skv_users, "cron", **params, args=(context,)) case _: - BaseException("unknown job in config") + raise BaseException("unknown job in config") From cc19d2ce8e621a12147a0cdf02659882b143b468 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 30 May 2024 12:23:01 +0000 Subject: [PATCH 06/15] use more specific exception --- src/eduid/workers/job_runner/scheduler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eduid/workers/job_runner/scheduler.py b/src/eduid/workers/job_runner/scheduler.py index 5dbac0d03..557805940 100644 --- a/src/eduid/workers/job_runner/scheduler.py +++ b/src/eduid/workers/job_runner/scheduler.py @@ -1,5 +1,6 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler +from eduid.common.config.exceptions import BadConfiguration from eduid.workers.job_runner.context import Context from eduid.workers.job_runner.jobs.skv import check_skv_users, gather_skv_users @@ -39,4 +40,4 @@ def schedule_jobs(self, context: Context): case "check_skv_users": self.add_job(check_skv_users, "cron", **params, args=(context,)) case _: - raise BaseException("unknown job in config") + raise BadConfiguration("unknown job in config") From e97c865bdfd405de964f3ba2c025c9dafdb6c6a2 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Tue, 4 Jun 2024 12:45:23 +0000 Subject: [PATCH 07/15] use atomic find_one_and_delete --- src/eduid/userdb/user_cleaner/db.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/eduid/userdb/user_cleaner/db.py b/src/eduid/userdb/user_cleaner/db.py index 4d0334552..5dcf31339 100644 --- a/src/eduid/userdb/user_cleaner/db.py +++ b/src/eduid/userdb/user_cleaner/db.py @@ -39,15 +39,13 @@ def user_from_dict(cls, data: TUserDbDocument) -> CleanerQueueUser: return CleanerQueueUser.from_dict(data) def get_next_user(self, cleaner_type: CleanerType) -> Optional[CleanerQueueUser]: - docs = self._get_documents_by_aggregate( - match={"cleaner_type": cleaner_type}, sort={"meta.created_ts": pymongo.ASCENDING}, limit=1 + doc = self._coll.find_one_and_delete( + filter={"cleaner_type": cleaner_type}, sort=[("meta.created_ts", pymongo.ASCENDING)] ) - logger.debug(f"Found {len(docs)} documents") - if len(docs) == 0: - return None - else: - doc = docs[0] - logger.debug(f"Found document with id {doc['_id']}, removing it from queue") + if doc is not None: + logger.debug("Found document") user = self.user_from_dict(doc) - self.remove_document(doc["_id"]) return user + else: + logger.debug("No document found") + return None From 13f017dbdf1d43f458ed39a09d430836768a7881 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Wed, 3 Jul 2024 08:48:05 +0000 Subject: [PATCH 08/15] change to AmRelay --- src/eduid/userdb/user_cleaner/userdb.py | 16 ++++++++++++++++ src/eduid/workers/am/ams/__init__.py | 9 +++++++++ src/eduid/workers/job_runner/config.py | 4 ++-- src/eduid/workers/job_runner/context.py | 9 +++++++++ src/eduid/workers/job_runner/helpers.py | 22 ++++++++++++++++++++++ src/eduid/workers/job_runner/jobs/skv.py | 13 +++++++------ 6 files changed, 65 insertions(+), 8 deletions(-) create mode 100644 src/eduid/userdb/user_cleaner/userdb.py create mode 100644 src/eduid/workers/job_runner/helpers.py diff --git a/src/eduid/userdb/user_cleaner/userdb.py b/src/eduid/userdb/user_cleaner/userdb.py new file mode 100644 index 000000000..97844efe1 --- /dev/null +++ b/src/eduid/userdb/user_cleaner/userdb.py @@ -0,0 +1,16 @@ +from eduid.userdb.db.base import TUserDbDocument +from eduid.userdb.user import User +from eduid.userdb.userdb import UserDB + + +class CleanerUser(User): + pass + + +class CleanerUserDB(UserDB[CleanerUser]): + def __init__(self, db_uri: str, db_name: str = "eduid_user_cleaner", collection: str = "profiles"): + super().__init__(db_uri, db_name, collection) + + @classmethod + def user_from_dict(cls, data: TUserDbDocument) -> CleanerUser: + return CleanerUser.from_dict(data) diff --git a/src/eduid/workers/am/ams/__init__.py b/src/eduid/workers/am/ams/__init__.py index 5a0bddb2f..33296b710 100644 --- a/src/eduid/workers/am/ams/__init__.py +++ b/src/eduid/workers/am/ams/__init__.py @@ -26,6 +26,7 @@ from eduid.userdb.reset_password import ResetPasswordUserDB from eduid.userdb.security import SecurityUserDB from eduid.userdb.signup import SignupUserDB +from eduid.userdb.user_cleaner.userdb import CleanerUserDB from eduid.workers.am.ams.common import AttributeFetcher logger = get_task_logger(__name__) @@ -310,3 +311,11 @@ class eduid_bankid(AttributeFetcher): @classmethod def get_user_db(cls, uri: str) -> BankIDProofingUserDB: return BankIDProofingUserDB(uri) + + +class eduid_job_runner(AttributeFetcher): + whitelist_set_attrs = ["terminated"] # skv cleaner checks status of registered persons + + @classmethod + def get_user_db(cls, uri: str) -> CleanerUserDB: + return CleanerUserDB(uri) diff --git a/src/eduid/workers/job_runner/config.py b/src/eduid/workers/job_runner/config.py index 585917451..e710cf638 100644 --- a/src/eduid/workers/job_runner/config.py +++ b/src/eduid/workers/job_runner/config.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, field_validator from eduid.common.clients.gnap_client.base import GNAPClientAuthData -from eduid.common.config.base import LoggingConfigMixin, MsgConfigMixin, RootConfig, StatsConfigMixin +from eduid.common.config.base import AmConfigMixin, LoggingConfigMixin, MsgConfigMixin, RootConfig, StatsConfigMixin from eduid.common.utils import removesuffix logger = logging.getLogger(__name__) @@ -14,7 +14,7 @@ class AmAPIConfig(BaseModel): tls_verify: bool = True -class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfigMixin): +class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfigMixin, AmConfigMixin): """ Configuration for the user-cleaner service. """ diff --git a/src/eduid/workers/job_runner/context.py b/src/eduid/workers/job_runner/context.py index 184629b44..b3f4f674d 100644 --- a/src/eduid/workers/job_runner/context.py +++ b/src/eduid/workers/job_runner/context.py @@ -3,8 +3,10 @@ from eduid.common.clients.amapi_client.amapi_client import AMAPIClient from eduid.common.fastapi.log import init_logging +from eduid.common.rpc.am_relay import AmRelay from eduid.common.rpc.msg_relay import MsgRelay from eduid.userdb.user_cleaner.db import CleanerQueueDB +from eduid.userdb.user_cleaner.userdb import CleanerUserDB from eduid.userdb.userdb import AmDB from eduid.workers.job_runner.config import JobRunnerConfig @@ -31,10 +33,17 @@ def __init__(self, config: JobRunnerConfig): self.cleaner_queue = CleanerQueueDB(db_uri=self.config.mongo_uri) self.logger.info(f"Database {self.cleaner_queue} initialized") + self.private_db = CleanerUserDB(db_uri=self.config.mongo_uri) + self.logger.info(f"Database {self.private_db} initialized") + # Setup MsgRelay self.msg_relay = MsgRelay(self.config) self.logger.info(f"MsgRelay {self.msg_relay} initialized") + # Setup AmRelay + self.am_relay = AmRelay(self.config) + self.logger.info(f"AmRelay {self.am_relay} initialized") + # Setup amapi client self.amapi_client = AMAPIClient( amapi_url=self.config.amapi.url, diff --git a/src/eduid/workers/job_runner/helpers.py b/src/eduid/workers/job_runner/helpers.py new file mode 100644 index 000000000..81168c52a --- /dev/null +++ b/src/eduid/workers/job_runner/helpers.py @@ -0,0 +1,22 @@ +from eduid.userdb.user import User +from eduid.userdb.user_cleaner.userdb import CleanerUser +from eduid.workers.job_runner.context import Context + + +def save_and_sync_user(context: Context, user: User) -> bool: + """ + Save to private userdb and propagate change to central user db. + + May raise UserOutOfSync exception + + :param user: the modified user + """ + private_user = CleanerUser.from_user(user, context.private_db) + context.private_db.save(private_user) + context.logger.debug( + f"Saving user {private_user} to private userdb {context.private_db} (is_in_database: {user.meta.is_in_database})" + ) + + # Sync to central userdb + context.logger.debug(f"Syncing {user} to central userdb") + return context.am_relay.request_user_sync(user) diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index e61e45712..f79ddd7f3 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -1,3 +1,4 @@ +from eduid.common.misc.timeutil import utc_now from eduid.common.models.amapi_user import Reason, Source, UserUpdateResponse, UserUpdateTerminateRequest from eduid.common.rpc.msg_relay import DeregisteredCauseCode, NavetData from eduid.userdb.exceptions import UserDoesNotExist @@ -5,6 +6,7 @@ from eduid.userdb.user import User from eduid.userdb.user_cleaner.db import CleanerQueueUser from eduid.workers.job_runner.context import Context +from eduid.workers.job_runner.helpers import save_and_sync_user def gather_skv_users(context: Context): @@ -46,8 +48,7 @@ def check_skv_users(context: Context): context.logger.debug(f"User with eppn {user.eppn} has emigrated and should not be terminated") else: context.logger.debug(f"User with eppn {user.eppn} should be terminated") - reason = Reason.USER_DECEASED if cause == DeregisteredCauseCode.DECEASED else Reason.USER_DEREGISTERED - terminate_user(context, user.eppn, reason) + terminate_user(context, user) else: context.logger.debug(f"User with eppn {user.eppn} is still registered") @@ -55,10 +56,10 @@ def check_skv_users(context: Context): context.logger.debug("Nothing to do") -def terminate_user(context: Context, eppn: str, reason: Reason): +def terminate_user(context: Context, queue_user: CleanerQueueUser): """ Terminate a user """ - request_body: UserUpdateTerminateRequest = UserUpdateTerminateRequest(reason=reason, source=Source.SKV_NAVET_V2) - response: UserUpdateResponse = context.amapi_client.update_user_terminate(user=eppn, body=request_body) - context.logger.debug(f"Terminate user response: {response}") + user = context.db.get_user_by_eppn(queue_user.eppn) + user.terminated = utc_now() + save_and_sync_user(context, user) From 7f67708b71b50dacf71010293e39d871a3a679de Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Tue, 27 Aug 2024 09:03:55 +0200 Subject: [PATCH 09/15] Remove unused amapi config and setup --- src/eduid/workers/job_runner/config.py | 6 ------ src/eduid/workers/job_runner/context.py | 8 -------- 2 files changed, 14 deletions(-) diff --git a/src/eduid/workers/job_runner/config.py b/src/eduid/workers/job_runner/config.py index e710cf638..e6d85fa89 100644 --- a/src/eduid/workers/job_runner/config.py +++ b/src/eduid/workers/job_runner/config.py @@ -9,11 +9,6 @@ logger = logging.getLogger(__name__) -class AmAPIConfig(BaseModel): - url: str - tls_verify: bool = True - - class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfigMixin, AmConfigMixin): """ Configuration for the user-cleaner service. @@ -26,7 +21,6 @@ class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfi jobs: dict = {} gnap_auth_data: GNAPClientAuthData - amapi: AmAPIConfig @field_validator("application_root") @classmethod diff --git a/src/eduid/workers/job_runner/context.py b/src/eduid/workers/job_runner/context.py index b3f4f674d..babf359ac 100644 --- a/src/eduid/workers/job_runner/context.py +++ b/src/eduid/workers/job_runner/context.py @@ -43,11 +43,3 @@ def __init__(self, config: JobRunnerConfig): # Setup AmRelay self.am_relay = AmRelay(self.config) self.logger.info(f"AmRelay {self.am_relay} initialized") - - # Setup amapi client - self.amapi_client = AMAPIClient( - amapi_url=self.config.amapi.url, - auth_data=self.config.gnap_auth_data, - verify_tls=self.config.amapi.tls_verify, - ) - self.logger.info(f"AMAPIClient {self.amapi_client} initialized") From 3ee775dc1729d7717cbbebca582911b98091ed20 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Tue, 27 Aug 2024 09:31:16 +0200 Subject: [PATCH 10/15] catch exceptions on navet lookup --- src/eduid/workers/job_runner/jobs/skv.py | 29 +++++++++++++----------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index f79ddd7f3..b25798a42 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -37,21 +37,24 @@ def check_skv_users(context: Context): if user is not None: context.logger.debug(f"Checking if user with eppn {user.eppn} should be terminated") assert user.identities.nin is not None # Please mypy - navet_data: NavetData = context.msg_relay.get_all_navet_data( - nin=user.identities.nin.number, allow_deregistered=True - ) - context.logger.debug(f"Navet data: {navet_data}") + try: + navet_data: NavetData = context.msg_relay.get_all_navet_data( + nin=user.identities.nin.number, allow_deregistered=True + ) + context.logger.debug(f"Navet data: {navet_data}") - if navet_data.person.is_deregistered(): - cause = navet_data.person.deregistration_information.cause_code - if cause is DeregisteredCauseCode.EMIGRATED: - context.logger.debug(f"User with eppn {user.eppn} has emigrated and should not be terminated") + if navet_data.person.is_deregistered(): + cause = navet_data.person.deregistration_information.cause_code + if cause is DeregisteredCauseCode.EMIGRATED: + context.logger.debug(f"User with eppn {user.eppn} has emigrated and should not be terminated") + else: + context.logger.debug(f"User with eppn {user.eppn} should be terminated") + terminate_user(context, user) else: - context.logger.debug(f"User with eppn {user.eppn} should be terminated") - terminate_user(context, user) - else: - context.logger.debug(f"User with eppn {user.eppn} is still registered") - + context.logger.debug(f"User with eppn {user.eppn} is still registered") + except: + context.logger.error(f"Failed to get Navet data for user with eppn {user.eppn}") + # The user will be requeued for a new check by the next run of gather_skv_users else: context.logger.debug("Nothing to do") From 508f852d19f90856dba4a5f9325c5aa38ab8150f Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 29 Aug 2024 15:52:37 +0200 Subject: [PATCH 11/15] add type support for jobs configuration --- src/eduid/workers/job_runner/config.py | 61 +++++++++++++++++++++-- src/eduid/workers/job_runner/scheduler.py | 7 ++- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/src/eduid/workers/job_runner/config.py b/src/eduid/workers/job_runner/config.py index e6d85fa89..14e9a8ecb 100644 --- a/src/eduid/workers/job_runner/config.py +++ b/src/eduid/workers/job_runner/config.py @@ -1,6 +1,8 @@ import logging +from datetime import datetime, tzinfo +from typing import Any, Optional, Union -from pydantic import BaseModel, field_validator +from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator, model_validator from eduid.common.clients.gnap_client.base import GNAPClientAuthData from eduid.common.config.base import AmConfigMixin, LoggingConfigMixin, MsgConfigMixin, RootConfig, StatsConfigMixin @@ -9,6 +11,60 @@ logger = logging.getLogger(__name__) +class JobCronConfig(BaseModel): + """ + Cron configuration for a single job. + https://apscheduler.readthedocs.io/en/stable/modules/triggers/cron.html#module-apscheduler.triggers.cron + """ + + model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid") + + year: Optional[Union[int, str]] = None + month: Optional[Union[int, str]] = None + day: Optional[Union[int, str]] = None + week: Optional[Union[int, str]] = None + day_of_week: Optional[Union[int, str]] = None + hour: Optional[Union[int, str]] = None + minute: Optional[Union[int, str]] = None + second: Optional[Union[int, str]] = None + start_date: Optional[Union[datetime, str]] = None + end_date: Optional[Union[datetime, str]] = None + timezone: Optional[Union[tzinfo, str]] = None + jitter: Optional[int] = None + + @model_validator(mode="before") + @classmethod + def at_least_one_datetime_value(cls, data: Any) -> Any: + if isinstance(data, dict): + need_one_of = ["year", "month", "day", "week", "day_of_week", "hour", "minute", "second"] + assert len(data.keys() & need_one_of), f"At least one of {need_one_of} must be set" + return data + + +class JobConfig(RootModel): + """ + Configuration for a single job. + """ + + root: dict[str, JobCronConfig] + + def __iter__(self): + return iter(self.root) + + def __getitem__(self, key): + return self.root[key] + + +class JobsRootConfig(RootModel): + root: dict[str, JobConfig] + + def __iter__(self): + return iter(self.root) + + def __getitem__(self, key): + return self.root[key] + + class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfigMixin, AmConfigMixin): """ Configuration for the user-cleaner service. @@ -18,8 +74,7 @@ class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfi log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" mongo_uri: str = "" status_cache_seconds: int = 10 - jobs: dict = {} - + jobs: Optional[JobsRootConfig] = None gnap_auth_data: GNAPClientAuthData @field_validator("application_root") diff --git a/src/eduid/workers/job_runner/scheduler.py b/src/eduid/workers/job_runner/scheduler.py index 557805940..55e2d8441 100644 --- a/src/eduid/workers/job_runner/scheduler.py +++ b/src/eduid/workers/job_runner/scheduler.py @@ -14,7 +14,12 @@ def schedule_jobs(self, context: Context): environment = context.config.environment - jobs_config = context.config.jobs + if context.config.jobs is None: + context.logger.info(f"No jobs configured for {context.worker_name} running {environment}") + return + + jobs_config = context.config.jobs.model_dump() + context.logger.debug(f"jobs_config: {jobs_config}") jobs: dict = {} From 706dc19d7212e263a719e8356aa331f8727b36c0 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 29 Aug 2024 15:53:08 +0200 Subject: [PATCH 12/15] add apscheduler contraint before next major release --- requirements/worker_requirements.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/worker_requirements.in b/requirements/worker_requirements.in index d291d8433..0f09ca02b 100644 --- a/requirements/worker_requirements.in +++ b/requirements/worker_requirements.in @@ -1,4 +1,4 @@ -r main.in -c main.txt jinja2 -apscheduler +apscheduler<4 # Next major version has API breaking changes \ No newline at end of file From 0e79627982d41f04f337d21ef803caeaf9d6da9c Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Thu, 29 Aug 2024 17:11:31 +0200 Subject: [PATCH 13/15] specify possible exceptions --- src/eduid/workers/job_runner/jobs/skv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index b25798a42..25c35ae82 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -1,5 +1,6 @@ from eduid.common.misc.timeutil import utc_now from eduid.common.models.amapi_user import Reason, Source, UserUpdateResponse, UserUpdateTerminateRequest +from eduid.common.rpc.exceptions import MsgTaskFailed, NoNavetData from eduid.common.rpc.msg_relay import DeregisteredCauseCode, NavetData from eduid.userdb.exceptions import UserDoesNotExist from eduid.userdb.meta import CleanerType @@ -52,7 +53,7 @@ def check_skv_users(context: Context): terminate_user(context, user) else: context.logger.debug(f"User with eppn {user.eppn} is still registered") - except: + except (MsgTaskFailed, NoNavetData): context.logger.error(f"Failed to get Navet data for user with eppn {user.eppn}") # The user will be requeued for a new check by the next run of gather_skv_users else: From 0b891feff6935b364fee61f79d2f11b1b4eae1bc Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Fri, 30 Aug 2024 08:01:18 +0200 Subject: [PATCH 14/15] remove redundant exception class, clean up imports --- src/eduid/workers/job_runner/jobs/skv.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/eduid/workers/job_runner/jobs/skv.py b/src/eduid/workers/job_runner/jobs/skv.py index 25c35ae82..cf95e9252 100644 --- a/src/eduid/workers/job_runner/jobs/skv.py +++ b/src/eduid/workers/job_runner/jobs/skv.py @@ -1,6 +1,5 @@ from eduid.common.misc.timeutil import utc_now -from eduid.common.models.amapi_user import Reason, Source, UserUpdateResponse, UserUpdateTerminateRequest -from eduid.common.rpc.exceptions import MsgTaskFailed, NoNavetData +from eduid.common.rpc.exceptions import MsgTaskFailed from eduid.common.rpc.msg_relay import DeregisteredCauseCode, NavetData from eduid.userdb.exceptions import UserDoesNotExist from eduid.userdb.meta import CleanerType @@ -53,7 +52,7 @@ def check_skv_users(context: Context): terminate_user(context, user) else: context.logger.debug(f"User with eppn {user.eppn} is still registered") - except (MsgTaskFailed, NoNavetData): + except MsgTaskFailed: context.logger.error(f"Failed to get Navet data for user with eppn {user.eppn}") # The user will be requeued for a new check by the next run of gather_skv_users else: From 5e20c1f120dbb80d80be0f6309f9bec843dab459 Mon Sep 17 00:00:00 2001 From: Lasse Yledahl Date: Fri, 30 Aug 2024 13:58:29 +0200 Subject: [PATCH 15/15] redo type support and adjust logging --- src/eduid/workers/job_runner/config.py | 31 +++++------------------ src/eduid/workers/job_runner/scheduler.py | 31 +++++++++++++++-------- 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/src/eduid/workers/job_runner/config.py b/src/eduid/workers/job_runner/config.py index 14e9a8ecb..b48164cd3 100644 --- a/src/eduid/workers/job_runner/config.py +++ b/src/eduid/workers/job_runner/config.py @@ -1,8 +1,8 @@ import logging from datetime import datetime, tzinfo -from typing import Any, Optional, Union +from typing import Any, NewType, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, RootModel, field_validator, model_validator +from pydantic import BaseModel, ConfigDict, field_validator, model_validator from eduid.common.clients.gnap_client.base import GNAPClientAuthData from eduid.common.config.base import AmConfigMixin, LoggingConfigMixin, MsgConfigMixin, RootConfig, StatsConfigMixin @@ -41,28 +41,9 @@ def at_least_one_datetime_value(cls, data: Any) -> Any: return data -class JobConfig(RootModel): - """ - Configuration for a single job. - """ - - root: dict[str, JobCronConfig] - - def __iter__(self): - return iter(self.root) - - def __getitem__(self, key): - return self.root[key] - - -class JobsRootConfig(RootModel): - root: dict[str, JobConfig] - - def __iter__(self): - return iter(self.root) - - def __getitem__(self, key): - return self.root[key] +EnvironmentOrWorkerName = NewType("EnvironmentOrWorkerName", str) +JobName = NewType("JobName", str) +JobsConfig = NewType("JobsConfig", dict[EnvironmentOrWorkerName, dict[JobName, JobCronConfig]]) class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfigMixin, AmConfigMixin): @@ -74,7 +55,7 @@ class JobRunnerConfig(RootConfig, LoggingConfigMixin, StatsConfigMixin, MsgConfi log_format: str = "{asctime} | {levelname:7} | {hostname} | {name:35} | {module:10} | {message}" mongo_uri: str = "" status_cache_seconds: int = 10 - jobs: Optional[JobsRootConfig] = None + jobs: Optional[JobsConfig] = None gnap_auth_data: GNAPClientAuthData @field_validator("application_root") diff --git a/src/eduid/workers/job_runner/scheduler.py b/src/eduid/workers/job_runner/scheduler.py index 55e2d8441..44d1e1ea3 100644 --- a/src/eduid/workers/job_runner/scheduler.py +++ b/src/eduid/workers/job_runner/scheduler.py @@ -1,6 +1,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler from eduid.common.config.exceptions import BadConfiguration +from eduid.workers.job_runner.config import EnvironmentOrWorkerName, JobCronConfig from eduid.workers.job_runner.context import Context from eduid.workers.job_runner.jobs.skv import check_skv_users, gather_skv_users @@ -12,31 +13,39 @@ def schedule_jobs(self, context: Context): Schedule all jobs configured for host or environment """ - environment = context.config.environment + environment = EnvironmentOrWorkerName(context.config.environment) + worker_name = EnvironmentOrWorkerName(context.worker_name) if context.config.jobs is None: - context.logger.info(f"No jobs configured for {context.worker_name} running {environment}") + context.logger.info("No jobs configured in config") return - jobs_config = context.config.jobs.model_dump() + jobs_config = context.config.jobs context.logger.debug(f"jobs_config: {jobs_config}") jobs: dict = {} + # Gather jobs for current environment and worker in a dictionary if environment in jobs_config: - context.logger.info(f"Setting up jobs for environment {environment}") - context.logger.info(f"Setting up jobs {jobs_config[environment]}") + context.logger.debug(f"Setting up jobs for environment {environment}") + context.logger.debug(f"Setting up jobs {jobs_config[environment]}") jobs.update(jobs_config[environment]) - if context.worker_name in jobs_config: - context.logger.info(f"Setting up jobs for worker {context.worker_name}") - context.logger.info(f"Setting up jobs {jobs_config[context.worker_name]}") - jobs.update(jobs_config[context.worker_name]) + if worker_name in jobs_config: + context.logger.debug(f"Setting up jobs for worker {worker_name}") + context.logger.debug(f"Setting up jobs {jobs_config[worker_name]}") + jobs.update(jobs_config[worker_name]) - context.logger.info(f"Setting up jobs {jobs} for {context.worker_name} running {environment}") + if len(jobs) == 0: + context.logger.info(f"No jobs configured for {worker_name} running {environment}") + return + + context.logger.info(f"Setting up jobs {jobs} for {worker_name} running {environment}") + # Add all configured jobs to the scheduler for job in jobs: - params = jobs[job] + cron_settings: JobCronConfig = jobs[job] + params = cron_settings.model_dump() context.logger.info(f"Setting up job {job} with parameters {params}") match job: