From aac4e71b0b5df63a3f01d90ec175f04c6c4a8e67 Mon Sep 17 00:00:00 2001 From: Jelmer van der Linde Date: Thu, 9 Nov 2023 13:44:34 +0000 Subject: [PATCH 1/2] Build and run tests --- .github/workflows/ubuntu.yml | 34 ++++++++++++++++++++++++++++++++ tests/checksums.txt | 1 + tests/hello-world.json.expected | 1 + tests/hello-world.warc.gz | Bin 0 -> 2975 bytes 4 files changed, 36 insertions(+) create mode 100644 .github/workflows/ubuntu.yml create mode 100644 tests/checksums.txt create mode 100644 tests/hello-world.json.expected create mode 100644 tests/hello-world.warc.gz diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml new file mode 100644 index 0000000..6b1c5aa --- /dev/null +++ b/.github/workflows/ubuntu.yml @@ -0,0 +1,34 @@ +name: Ubuntu + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - name: dependencies + run: sudo apt-get install -y build-essential libboost-test-dev libboost-program-options-dev cmake zlib1g-dev libbz2-dev liblzma-dev libicu-dev uchardet libuchardet-dev libzip-dev wget + - name: cmake + run: | + cmake -E make_directory build + cd build + cmake .. + - name: Compile + working-directory: build + run: cmake --build . -j2 + - name: Unit Tests + working-directory: build + run: ctest -j2 + # - name: Regression Tests + # working-directory: tests + # run: | + # ../build/bin/warc2text -v --json hello-world.warc.gz > hello-world.json + # shasum -c checksums.txt diff --git a/tests/checksums.txt b/tests/checksums.txt new file mode 100644 index 0000000..3e75f44 --- /dev/null +++ b/tests/checksums.txt @@ -0,0 +1 @@ +28d6c5e68f9e8c3275015ff1bd082fce3382441a hello-world.json diff --git a/tests/hello-world.json.expected b/tests/hello-world.json.expected new file mode 100644 index 0000000..9ff0d4c --- /dev/null +++ b/tests/hello-world.json.expected @@ -0,0 +1 @@ +{"f":"./hello-world.warc.gz","o":907,"s":723,"rs":11,"ps":12,"l":"en","u":"http://iipc.github.io/warc-specifications/primers/web-archive-formats/hello-world.txt","c":"text/plain","ts":"2015-07-08T21:55:13Z","p":"Hello World\n"} diff --git a/tests/hello-world.warc.gz b/tests/hello-world.warc.gz new file mode 100644 index 0000000000000000000000000000000000000000..872d294b0aeb404651c791affb2132d21e3fc40b GIT binary patch literal 2975 zcmZ9NcRUmh7{+bxu58&Zl$miQXZs;LWOG8u-t&xP9%W}IM`Y#9WY5D97m|I#p>Xyn zhsaO=_I^IkKhK}<_j#WeoJd1Kay4Y+*GT+5Xh^z9Nl5g_uJp<}F<+87ZjbU(Ef;mn z2WgYjR}(&gWIKYy=A-83-tf9je#zBhoN4Or^3~X2q&W>9lidp$l?@Y*rng&?Z)Z}t zc+Ex%n!28YJnU4}#+EEB5~Ki&kFZ(S%&u?CFjddl>$)KYdhN)3fd|XXh+EoPC;M%H z0?T&+Ah0f((iki~WKC>{&eSsdJ!s7T+NuLXUx!wzB-V!q;d(BNA>@duU??*J0!*m{ zVxtqk7OSI}CNCGHS|5!_SR59x>qfG>Xr-t>A6mH;K1|HnHo>fe3C5N?wzf9nwhf0C z=iW!08J*lT!9BDu_SmN^a~7uLiJ#>jF7v44)pURLIotq2Tbh})*5e~hqu)dHh)XjjFDrK%4>8Y=embp% z4F61PojveCNkN5dH1*R~l%13aR@FvgGyBhG875s78E07RtB__JUeA4F10=F4?Kncj z^owggruBVK(aCSk`v!_2C5NvF#%GwU>lk7IQ&Ok1k{vjCHhLap% ze3xcK*Q)@Ea?+-C-82LXF3TOHb5ia&UU28SBuhk>)%1S1yYy_Ay<2!11qNfw&s*pU z&Ict9j$b+5`FvTkB?^Wuh{)ovLR5W<3g5%(e*2#!0<&akj zoiW+uqvDbifbMyUR=X?Z7045Jk0YN|IP%A3-yF{+Rqe)2cRs$;l{^ZTAg^UbQYiW; zsp2?85|Ka7si^NSeg6yXUzihDp32p6dZt_XzmcRzEWKV<-e_fF2lVT?%Jq%Z&$sxJ zCnGzA-$s_PvGEEXbl3Ximo-DJu+Q7WM&}+tUm~18NlBlv-k~Kg|L@Ruqy1GOk9Y7K z7AS5ERN8gaB!(Fp3LXr7Q{nw-KN#QN2T1DciIWLpQa$h2esoNrn3c3l*sZ?=F;E{# z>0MBi`6k^QVkBGbAA7=FG&`UK2>;&Ao8Bc#G&n+9pBM@b2x8!0DTq z6zq8D8r`ED&V&AC2MPGYWV62crueZ=57g^W!*W?+W0*Rp9V;KW_-!2|&^iJ6dx9E! zKdDj&t6&sK4qwBw(>l%i>i=rl!Ga}O?t@t<9r`2#Ys?SA3ts(Dz?J+t_G;*`()6E! zPhfJ+v#a!x?m;eLW*2kq*6O|iP>FP*ROR-i;uZIfj^SpIH*(1trEAXtvk9C$-caU@ zI}Yf&PYCGZbT1WU-9rbvx2#xKa{#p}gv;uM5*kyOrU&_bE-+Gl;)N|Gc%uqLcFQ(A zg3kC{-BJzv08Tp`H}ndX*clod<@&hZ5N&GfdM{WV9cS)LLWw`>QcM5|SaMg|ikl-` zA2`0JM5Y%%UFy}6Ke|?1TA{9qrk?m@Lsdj~0v;@L`LLdqQTJiln++JWP5B|uHkjrj zOghMBoff!QS5vS-M~%^(2p;K)Fnfo%REDRW{;Om{h(8+TgG*;wbR;eAKuB}?_PG0($~>kOoRTa^O(`k z{3=?#4f9;c1*cj6U-l2({t!M?i8fMOhf4(m6=oksL6Zo}f@hzYvi?T*-$=_}^;7wk zveFE_qa~RUOWG%N=TW#DR-lm&u}ydj=>>Y{re-S9^j7=22Ny&KK;qEiagg6$tL?7r zo02qogS0(QD$3!l@qIj2rgKU^wXfI2vzfAW1#P`#xkwzzj)$bIP?Y{BnMbNBLeKxCOhxcdO*d$6`RVmC<8Ed}CitWqnzps+mD*ZS;0PdU7EVwa>yog3llv z{YBMEQEqPjehM->_t*F`F@1ig1v#m;s2`z_2&?b1ESXjP&!ocv;Ra39>{|o7`K>a+ z3L689(gp+5USs5prv-Z#4J3Q(Yn-u)UoZcz=-<_SxbkGK8o~ONwKwkXV}5l)$3Uc> z=S9CX)k|==Tl7`GSeOucuN6(<&YQyJ{PFTRm0>k*Ve9B1|LDwQPFnM6N_obxsE~3f zh73h@4+mR_wld{xk8gZ}UT~3@P6-`5;ak47ru!CK97$tLIwUfpQUbWGjz(ClEqO)i z;9zj-BV|6}So%q#PYN&W2g81T%}uN%9U$2#fgs2SwN`|o@GztxD^N1znhCgwv1>UaYb7G1A{H&A3)n#<@Nq?IKAH&o(P+s8zL@GXI5z zCkrZvByCL*^#QW#%y4Up)daig}GY%%W)O0!Ubu74t<)4Pkcwop_wJJ=-* zuB~sKLBd*>#HA(VVH$LFOEVKM;e?tSwaz}93q@U7ZNGEoDr$T+Z_uljXAquJ`OGoV zwwX3O2{Q3?$9s6RcX#A(bt+c>4qbGCb1kjm9*3j9pkUwtF{JcDWGKpDn0BLTv8O!s zEHyf&UXVcb6A^Nt3=+seN07#qy%+vV=3inaS6)3i2?^53+F)LiIlOuD5})%@FOOUP zLzt0bm zk0(>Z%p7xs#Z12l*_={oU8xb{lOm`dj_CIK%+QE_RAxA1pAA9jEaZ?u1Ic6M#lBO} z|5UDs&XO#3GdrWCOiff6dj}bo6%@)(QtKZar*LFnR!Z)@#W&7`v4A>yCwbg~ZIE*V zk~5MX(x+NP^`6lKbcMXDoVRE<|BMYP`GKhCZtPA+L^Cc7w6?oC`9=lD3mIVhn!6ln z&*8v95!0ZN9TW+>AiMA(3D^uwSI3uljk~q#J{Z>uj;`=ERR2x70l%!C5cya+P|c|{r!3@gIz;v2{}Fs9_ek( kI@&VR8MAR8P{+jC8Vi-$cku!B>>o#v%uZbX`J0{p0WQ~=#sB~S literal 0 HcmV?d00001 From cbbacd50a6f4f8fe37fea454f495e896c7c0eae4 Mon Sep 17 00:00:00 2001 From: Jelmer van der Linde Date: Fri, 10 Nov 2023 17:42:48 +0000 Subject: [PATCH 2/2] Run base64 tests as well --- .github/workflows/ubuntu.yml | 16 +++++++++++----- tests/checksums.txt | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 6b1c5aa..e8f82b1 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -27,8 +27,14 @@ jobs: - name: Unit Tests working-directory: build run: ctest -j2 - # - name: Regression Tests - # working-directory: tests - # run: | - # ../build/bin/warc2text -v --json hello-world.warc.gz > hello-world.json - # shasum -c checksums.txt + - name: Regression Tests + working-directory: tests + run: | + ../build/bin/warc2text \ + -f url,text,mime,html \ + -o output \ + hello-world.warc.gz + # ../build/bin/warc2text \ + # --json \ + # hello-world.warc.gz > hello-world.json + shasum -c checksums.txt diff --git a/tests/checksums.txt b/tests/checksums.txt index 3e75f44..4ed29f8 100644 --- a/tests/checksums.txt +++ b/tests/checksums.txt @@ -1 +1,5 @@ 28d6c5e68f9e8c3275015ff1bd082fce3382441a hello-world.json +84a4f15ca77e1aafe8f33e5b94c6010e23cdf15b output/en/html.gz +8baa00c2a8c791da25c52a638711fc1692ef266f output/en/mime.gz +9728d8b84aca564f9f1907825711926b69e83028 output/en/text.gz +5a6bead02cc1b50ca216665a71146420d6c8bb55 output/en/url.gz