add mets, cleaning #24
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: gtrepo | |
on: | |
push: | |
tags: | |
- 'v[0-9]+.[0-9]+.[0-9]+' | |
workflow_dispatch: | |
inputs: | |
tag-name: | |
description: Name of the release tag | |
jobs: | |
job1: | |
name: uniTest | |
runs-on: ubuntu-latest | |
permissions: | |
checks: write | |
contents: write | |
# Map a step output to a job output | |
outputs: | |
output1: ${{ steps.step4.outputs.test }} | |
output2: ${{ steps.step4.outputs.test2 }} | |
steps: | |
- name: Git checkout | |
id: step1 | |
uses: actions/checkout@v4 | |
# Installation Styles and Saxon | |
- name: install analyse xsl-styles | |
id: step2 | |
run: | | |
git clone https://github.com/tboenig/gt-repo-scripts.git | |
mv gt-repo-scripts/scripts scripts/ | |
rm -r gt-repo-scripts | |
- name: Download and install saxon | |
id: step3 | |
run: | | |
wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip | |
unzip SaxonHE12-3J.zip | |
# Installation and Directories | |
- name: make gh-pages_out | |
run: mkdir ghout | |
- name: transform METADATA.yml to METADATA.json | |
uses: mikefarah/yq@master | |
with: | |
cmd: | | |
yq -o=json METADATA.yml > METADATA.json | |
- name: PathTest | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_unitTest.xsl \ | |
output=unitTest1 \ | |
-s:scripts/gt-overview_unitTest.xsl -o:ghout/pathtest.md | |
shell: bash | |
# Test GT-Page Folder Repo Structure | |
- name: Empty | |
id: step4 | |
run: | | |
[ -s ghout/pathtest.md ] || echo "test=empty" >> $GITHUB_OUTPUT | |
[ ! -s ghout/pathtest.md ] || echo "test2=full" >> $GITHUB_OUTPUT | |
# Error Logview | |
- name: uniTestError | |
id: step5 | |
if: ${{steps.step4.outputs.test2 == 'full'}} | |
run: | | |
less ghout/pathtest.md | |
job2: | |
name: analyse_and_makebagit | |
needs: job1 | |
if: ${{needs.job1.outputs.output1 == 'empty'}} | |
runs-on: ubuntu-latest | |
permissions: | |
checks: write | |
contents: write | |
steps: | |
- name: Using tag name from ref name | |
if: github.event.inputs.tag-name == '' | |
run: echo "TAG_NAME=$GITHUB_REF_NAME" >> $GITHUB_ENV | |
- name: Using tag name from input param | |
if: github.event.inputs.tag-name != '' | |
run: echo "TAG_NAME=${{ github.event.inputs.tag-name}}" >> $GITHUB_ENV | |
- name: Git checkout | |
uses: actions/checkout@v4 | |
# Installation Styles | |
- name: install analyse xsl-styles | |
run: | | |
git clone https://github.com/tboenig/gt-repo-scripts.git | |
mv gt-repo-scripts/scripts scripts/ | |
rm -r gt-repo-scripts | |
# Transfer megarules.xml | |
- name: install megarules.xml | |
run: | | |
git clone --branch gh-pages --single-branch https://github.com/OCR-D/gt-MufiLevelRules.git | |
mv gt-MufiLevelRules/rules/megalevelrules.xml scripts/megalevelrules.xml | |
rm -r gt-MufiLevelRules | |
# Installation GT-Labelling Documentation | |
- name: install labeling | |
run: | | |
git clone https://github.com/tboenig/gt-guidelines.git | |
# Installation and Directories | |
- name: install jq | |
run: sudo apt-get install jq | |
- name: Download and install saxon | |
run: | | |
wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip | |
unzip SaxonHE12-3J.zip | |
- name: make metadata_out | |
run: mkdir metadata_out | |
- name: make ocrdzip_out | |
run: mkdir ocrdzip_out | |
- name: make gh-pages_out | |
run: mkdir ghout | |
- name: make readme_out | |
run: sh scripts/readmefolder.sh | |
- name: readme.xml file | |
run: sh scripts/xreadme.sh | |
# Transformation and analyzing | |
- name: transform METADATA.yml to METADATA.json | |
uses: mikefarah/yq@master | |
with: | |
cmd: | | |
yq -o=json METADATA.yml > METADATA.json | |
- name: transform METADATA and make GT-Overview | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=METADATA repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \ | |
-s:scripts/gt-overview_metadata.xsl -o:ghout/metadata.md | |
shell: bash | |
- name: make Compressed table view | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=TABLE repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \ | |
-s:scripts/gt-overview_metadata.xsl -o:ghout/table.md | |
shell: bash | |
- name: detailed table view | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=OVERVIEW repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \ | |
-s:scripts/gt-overview_metadata.xsl -o:ghout/overview.md | |
shell: bash | |
- name: leveling the volume and documents | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-level_parser.xsl \ | |
repoName=$GITHUB_REPOSITORY \ | |
-s:scripts/gt-level_parser.xsl -o:ghout/overview-level.md | |
shell: bash | |
- name: generate mets.sh | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=METS repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \ | |
-s:scripts/gt-overview_metadata.xsl -o:scripts/mets.sh | |
shell: bash | |
- name: generate Metadata JSON file | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=METAJSON repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \ | |
-s:scripts/gt-overview_metadata.xsl -o:metadata_out/metadata_l.json | |
shell: bash | |
- name: format json file and copy to gh branch | |
run: | | |
jq '.' metadata_out/metadata_l.json > metadata_out/metadata.json | |
cp metadata_out/metadata.json ghout/ | |
rm metadata_out/metadata_l.json | |
- name: generate README | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=README repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \ | |
-s:scripts/gt-overview_metadata.xsl -o:README.md | |
shell: bash | |
- name: generate METADATA_htr_united.yml | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-metadata_htr_united.xsl repoName=${{ github.event.repository.name }} \ | |
-s:scripts/gt-metadata_htr_united.xsl | |
shell: bash | |
- name: generate METS Volume File | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=METSvolume repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \ | |
-s:scripts/gt-overview_metadata.xsl -o:metadata_out/mets.xml | |
shell: bash | |
- name: generate release download List | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=download repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \ | |
-s:scripts/gt-overview_metadata.xsl -o:ghout/download.txt | |
shell: bash | |
- name: delete fileGrp DEFAULT | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=METSdefault repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \ | |
-s:scripts/gt-overview_metadata.xsl | |
shell: bash | |
- name: generate CITATION.cff | |
run: | | |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \ | |
output=CITATION repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \ | |
-s:scripts/gt-overview_metadata.xsl -o:rawCITATION.cff | |
shell: bash | |
- name: formating CITATION.cff | |
uses: mikefarah/yq@master | |
with: | |
cmd: | | |
yq -I4 rawCITATION.cff > CITATION.cff | |
rm rawCITATION.cff | |
- name: Index-link | |
run: | | |
cd ghout | |
ln -s metadata.md index.md | |
# Mets handling, Install OCR-D and Bagit | |
- name: del invalidMets | |
run: sh -ex scripts/data_mets.sh | |
shell: bash | |
- name: install ocrd, make validMets and bagit | |
run: | | |
sudo apt-get install -y python3 imagemagick libgeos-dev | |
python3 -m venv venv | |
source venv/bin/activate | |
pip install -U pip 'setuptools>=61' | |
pip install ocrd | |
ocrd --version | |
- name: make validMets | |
run: | | |
source venv/bin/activate | |
sh -ex scripts/mets.sh | |
- name: make bagit | |
run: | | |
source venv/bin/activate | |
sh scripts/data_structure.sh | |
- name: copy css styles, js javascript and yml files to ghout | |
run: | | |
cp scripts/table_hide.css ghout/ | |
cp scripts/levelparser.css ghout/ | |
cp scripts/lang.js ghout/ | |
cp scripts/_config.yml ghout/ | |
- name: archive the metadata files from metadata_out folder | |
uses: thedoctor0/zip-release@master | |
with: | |
filename: metadata-v${{ github.run_number }}.zip | |
path: 'metadata_out' | |
- name: copy metadata.zip to ocrdzip_out | |
run: | | |
cp metadata-v${{ github.run_number }}.zip ocrdzip_out/ | |
- name: Upload Release | |
uses: ncipollo/release-action@v1 | |
if: env.TAG_NAME != '' | |
with: | |
allowUpdates: true | |
artifacts: 'ocrdzip_out/*.zip' | |
artifactContentType: application/zip | |
body: | | |
<dl> | |
<dt>Version:</dt> | |
<dd>${{ env.TAG_NAME }}</dd> | |
<dt>Info:</dt> | |
<dd> | |
To make use of Ground Truth, please download the provided zip files.<br/> | |
The 'ocrd.zip' files are ocr-d-bagit files.<br/> | |
The 'metadata-v${{ github.run_number }}.zip' file contains metadata for the Ground Truth corpus in both METS and JSON format.<br/> | |
The 'mets.xml' file enumerates all the documents and BagIt files contained within.<br/> | |
The bagits correspond to the <a href="https://ocr-d.de/de/spec/ocrd_zip.html">OCR-D Bagit Spec</a>.<br/> | |
The source-code-zip and source-code-tar.gz files only provide metadata, citations, license and readme information.<br/> | |
If you want to use the source files, please clone the repository. | |
</dd> | |
</dl> | |
</dl> | |
name: Release ${{ github.run_number }}_${{ env.TAG_NAME }} | |
omitNameDuringUpdate: true | |
tag: ${{ env.TAG_NAME }} | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- name: Commit README | |
run: | | |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
git config --local user.name "github-actions[bot]" | |
git add README.md | |
git commit -m "[Automatic] Update readme files" || echo "Nothing to update" | |
git push origin HEAD:main | |
- name: Commit METADATA_htr_united.yml | |
run: | | |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
git config --local user.name "github-actions[bot]" | |
git add ${{ github.event.repository.name }}_METADATA_htr_united.yml | |
git commit -m "[Automatic] Update METADATA_htr_united.yml files" || echo "Nothing to update" | |
git push origin HEAD:main | |
- name: Commit CITATION.cff | |
run: | | |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" | |
git config --local user.name "github-actions[bot]" | |
git add CITATION.cff | |
git commit -m "[Automatic] Update CITATION.cff files" || echo "Nothing to update" | |
git push origin HEAD:main | |
- name: Deploy GT-Overview to GitHub Pages 🚀 | |
uses: JamesIves/github-pages-deploy-action@v4 | |
with: | |
branch: gh-pages # The branch the action should deploy to. | |
folder: ghout # The folder the action should deploy. |