From 83c5ff67696a9c7783f798d8a14b816bc445ce20 Mon Sep 17 00:00:00 2001 From: Andrii Nikitin Date: Tue, 24 Oct 2023 14:10:00 +0200 Subject: [PATCH] fix handling combinations of metalink / zsync / mirrorlist / symlinks --- lib/MirrorCache/Datamodule.pm | 2 +- lib/MirrorCache/WebAPI/Plugin/Dir.pm | 18 ++++++---- .../WebAPI/Plugin/RenderFileFromMirror.pm | 2 +- .../03-headquarter-subsidiaries-hashes.sh | 33 +++++++++++++++---- t/environ/03-headquarter-subsidiaries.sh | 23 +++++++------ 5 files changed, 54 insertions(+), 24 deletions(-) diff --git a/lib/MirrorCache/Datamodule.pm b/lib/MirrorCache/Datamodule.pm index 3001b91c..974a9d49 100644 --- a/lib/MirrorCache/Datamodule.pm +++ b/lib/MirrorCache/Datamodule.pm @@ -397,7 +397,7 @@ sub _init_headers($self) { $self->accept_meta4(1) if $headers->accept =~ m/\bapplication\/metalink4/i; $self->accept_zsync(1) if $headers->accept =~ m/\bapplication\/x-zsync/i; - $self->accept_all(1) if $headers->accept =~ m/\*\/\*/; + $self->accept_all(1) if scalar($headers->accept =~ m/\*\/\*/) && scalar($headers->accept ne '*/*'); } sub _init_req($self) { diff --git a/lib/MirrorCache/WebAPI/Plugin/Dir.pm b/lib/MirrorCache/WebAPI/Plugin/Dir.pm index 8927a281..f5f8de97 100644 --- a/lib/MirrorCache/WebAPI/Plugin/Dir.pm +++ b/lib/MirrorCache/WebAPI/Plugin/Dir.pm @@ -260,12 +260,17 @@ sub _redirect_project_ln_geo { sub _redirect_normalized { my $dm = shift; - return undef if $dm->accept; + return undef if $dm->accept || $dm->btih || $dm->torrent || $dm->magnet; my ($path, $trailing_slash, $original_path) = $dm->path; return undef if $path eq '/'; - $path = $path . '.metalink' if $dm->metalink; - $path = $path . '.meta4' if $dm->meta4; - return $dm->c->redirect_to($dm->route . $path . $trailing_slash . $dm->query1) unless $original_path eq $path || ($dm->extra && !$dm->metalink && !$dm->meta4); + my $path1 = $path; + $path1 = $path1 . '.zsync' if $dm->zsync; + $path1 = $path1 . '.metalink' if $dm->metalink; + $path1 = $path1 . '.meta4' if $dm->meta4; + $path1 = $path1 . '.mirrorlist' if $dm->mirrorlist; + $dm->c->log->error('DIR::redirect_normalized', $path, $path1, $original_path, $dm->original_path, $dm->_original_path) if $MCDEBUG; + return $dm->c->redirect_to($dm->route . $path . $trailing_slash . $dm->query1) unless $original_path eq $path || $original_path eq $path1 || ($dm->accept); + $dm->c->log->error('DIR::redirect_normalized2') if $MCDEBUG; return undef; } @@ -465,7 +470,8 @@ sub _guess_what_to_render { if ($dm->extra) { $c->log->error($c->dumper('guess what to render extra : ', $dm->extra, $dm->accept_all)) if $MCDEBUG; return $root->render_file($dm, $dm->original_path) if $dm->accept_all && !$trailing_slash && $dm->accept; - if (!$root->is_remote && !$dm->accept) { # for local we can check if it is the file we requested + + if (!$root->is_remote && $dm->accept_all) { # for local we can check if it is the file we requested return $root->render_file($dm, $dm->original_path) if $root->is_file($dm->original_path); } # the file is unknown, we cannot show generate meither mirrorlist or metalink @@ -701,7 +707,7 @@ sub _render_small { $c->log->error('DIR::render_small3') if $MCDEBUG; my ($path, undef) = $dm->path; my $full; - return $root->render_file_if_small($dm, $path, $small_file_size) unless $root_nfs; + return $root->render_file_if_small($dm, $path, $small_file_size) unless $root->is_remote; $c->log->error('DIR::render_small4') if $MCDEBUG; my $original_path = $dm->path; return undef if $original_path ne $path || $dm->extra; diff --git a/lib/MirrorCache/WebAPI/Plugin/RenderFileFromMirror.pm b/lib/MirrorCache/WebAPI/Plugin/RenderFileFromMirror.pm index 12d7c3e8..b6ae9e2e 100644 --- a/lib/MirrorCache/WebAPI/Plugin/RenderFileFromMirror.pm +++ b/lib/MirrorCache/WebAPI/Plugin/RenderFileFromMirror.pm @@ -153,7 +153,7 @@ sub register { return undef; } $c->log->error($c->dumper('RENDER FILE_ID', $file->{id})) if $MCDEBUG; - $c->res->headers->vary('Accept, COUNTRY'); + $c->res->headers->vary('Accept, COUNTRY, X-COUNTRY'); my $baseurl; # just hostname + eventual urldir (without folder and file) my $fullurl; # baseurl with path and filename if ($dm->metalink || $dm->meta4 || $dm->torrent || $dm->zsync || $dm->magnet) { diff --git a/t/environ/03-headquarter-subsidiaries-hashes.sh b/t/environ/03-headquarter-subsidiaries-hashes.sh index cdd527f8..bb25ab4d 100755 --- a/t/environ/03-headquarter-subsidiaries-hashes.sh +++ b/t/environ/03-headquarter-subsidiaries-hashes.sh @@ -66,6 +66,7 @@ echo Step 2. Add more files to folder1 and make sure only new hashes are transfe for i in 9 6 7 8; do echo 1111111112 > mc$i/dt/folder1/file1.1.dat echo 1111111112 > mc$i/dt/folder1/file4.1.dat + echo 13 > mc$i/dt/folder1/file4.1.dat.zsync mc$i/backstage/job -e folder_sync -a '["/folder1"]' mc$i/backstage/shoot mc$i/backstage/shoot -q hashes @@ -76,6 +77,8 @@ done echo Step 3. Add media symlinks and make sure they are imported properly for i in 9 6 7 8; do ( cd mc$i/dt/folder1/ && ln -s file4.1.dat file-Media.iso ) + ( cd mc$i/dt/folder1/ && ln -s file4.1.dat xcurr.dat ) + ( cd mc$i/dt/folder1/ && ln -s file4.1.dat.zsync xcurr.dat.zsync ) mc$i/backstage/job -e folder_sync -a '["/folder1"]' mc$i/backstage/shoot mc$i/backstage/shoot -q hashes @@ -101,21 +104,39 @@ for i in 6 7 8 9; do mc$i/backstage/shoot MIRRORCACHE_HASHES_IMPORT_RETRY_DELAY=$DELAY mc$i/backstage/shoot -q hashes if test $i != 9; then - test -z $(mc$i/sql "select md5 from hash where file_id=5") - test -z $(mc$i/sql "select md5 from hash where file_id=6") + test -z $(mc$i/sql "select md5 from hash where file_id=8") + test -z $(mc$i/sql "select md5 from hash where file_id=9") else - test $(mc$i/sql "select md5 from hash where file_id=5") == $(mc$i/sql 'select md5 from hash where file_id=6') - test $(mc$i/sql "select md5 from hash where file_id=3") != $(mc$i/sql 'select md5 from hash where file_id=6') + test $(mc$i/sql "select md5 from hash where file_id=8") == $(mc$i/sql 'select md5 from hash where file_id=9') + test $(mc$i/sql "select md5 from hash where file_id=3") != $(mc$i/sql 'select md5 from hash where file_id=8') fi done sleep $DELAY +mc9/curl -I /download/folder1/file-Media.iso | grep 'Location: /download/folder1/file4.1.dat' +mc9/curl -I /download/folder1/file-Media.iso.metalink | grep 'Location: /download/folder1/file4.1.dat?metalink=1' +mc9/curl -I /download/folder1/file-Media.iso.mirrorlist | grep 'Location: /download/folder1/file4.1.dat?mirrorlist=1' + +mc9/curl -I /download/folder1/xcurr.dat | grep "Location: http://$na_address/download/folder1/xcurr.dat" +mc9/curl -I /download/folder1/xcurr.dat.metalink | grep "Location: http://$na_address/download/folder1/xcurr.dat.metalink" +mc9/curl -I /download/folder1/xcurr.dat?meta4 | grep "Location: http://$na_address/download/folder1/xcurr.dat.meta4" +mc9/curl -I /download/folder1/xcurr.dat.mirrorlist | grep '200 OK' + +mc9/curl -I /download/folder1/xcurr.dat.zsync.mirrorlist | grep '200 OK' + +mc6/curl -IL /download/folder1/xcurr.dat | grep "200 OK" +mc6/curl -IL /download/folder1/xcurr.dat.metalink | grep "200 OK" +mc6/curl -IL /download/folder1/xcurr.dat?meta4 | grep "200 OK" +mc6/curl -IL /download/folder1/xcurr.dat.mirrorlist | grep '200 OK' + + + # now the hashes on subsidiaries should be retried and match the headquarter for i in 6 7 8; do mc$i/backstage/shoot -q hashes - test $(mc$i/sql "select md5 from hash where file_id=5") == $(mc9/sql 'select md5 from hash where file_id=6') - test $(mc$i/sql "select md5 from hash where file_id=6") == $(mc9/sql 'select md5 from hash where file_id=6') + test $(mc$i/sql "select md5 from hash where file_id=8") == $(mc9/sql 'select md5 from hash where file_id=9') + test $(mc$i/sql "select md5 from hash where file_id=3") != $(mc9/sql 'select md5 from hash where file_id=9') done echo success diff --git a/t/environ/03-headquarter-subsidiaries.sh b/t/environ/03-headquarter-subsidiaries.sh index 35718119..7102fcf4 100755 --- a/t/environ/03-headquarter-subsidiaries.sh +++ b/t/environ/03-headquarter-subsidiaries.sh @@ -9,7 +9,8 @@ set -ex SMALL_FILE_SIZE=3 HUGE_FILE_SIZE=9 -FAKEURL="notexists${RANDOM}.com" +FAKEURL1="notexists${RANDOM}.com" +FAKEURL2="notexists${RANDOM}.com" for i in 6 7 8 9; do x=$(environ mc$i $(pwd)) @@ -18,6 +19,8 @@ for i in 6 7 8 9; do echo -n 123 > $x/dt/folder1/filesmall1.1.dat echo -n 123456789 > $x/dt/folder1/filehuge1.1.dat echo '[]' > $x/dt/folder1/file.json + mkdir $x/dt/folder1/media.1 + echo 1 > $x/dt/folder1/media.1/media eval mc$i=$x done @@ -33,7 +36,9 @@ as_interface=127.0.0.4 # deploy db $mc9/gen_env MIRRORCACHE_TOP_FOLDERS='folder1 folder2 folder3' \ MIRRORCACHE_HUGE_FILE_SIZE=$HUGE_FILE_SIZE \ - MIRRORCACHE_REDIRECT_HUGE=$FAKEURL \ + MIRRORCACHE_REDIRECT=$FAKEURL1 \ + MIRRORCACHE_REDIRECT_HUGE=$FAKEURL2 \ + MIRRORCACHE_ROOT_NFS="$mc9/dt" \ MIRRORCACHE_SMALL_FILE_SIZE=$SMALL_FILE_SIZE $mc9/backstage/shoot @@ -43,15 +48,13 @@ $mc9/db/sql "insert into subsidiary(hostname,region) select '$eu_address','eu'" $mc9/db/sql "insert into subsidiary(hostname,region) select '$as_address','as'" $mc9/start -$mc6/gen_env MIRRORCACHE_REGION=na $mc6/start -$mc7/gen_env MIRRORCACHE_REGION=eu $mc7/start -$mc8/gen_env MIRRORCACHE_REGION=as $mc8/start echo the root folder is not redirected curl --interface $eu_interface -Is http://$hq_address/ | grep '200 OK' +curl --interface $eu_interface -Is http://$hq_address/download/folder1/media.1/media | grep '200 OK' echo check redirection from headquarter curl --interface $na_interface -Is http://$hq_address/download/folder1/filebig1.1.dat | grep "Location: http://$na_address/download/folder1/filebig1.1.dat" @@ -81,8 +84,8 @@ echo check small files are not redirected curl --interface $na_interface -Is http://$hq_address/download/folder1/filebig1.1.dat | grep "Location: http://$na_address/download/folder1/filebig1.1.dat" curl --interface $na_interface -Is http://$hq_address/download/folder1/filesmall1.1.dat | grep "200 OK" -echo check huge files are redirected to FAKEURL -curl --interface $hq_interface -Is http://$hq_address/download/folder1/filehuge1.1.dat | grep "Location: http://$FAKEURL/folder1/filehuge1.1.dat" +echo check huge files are redirected to FAKEURL2 +curl --interface $hq_interface -Is http://$hq_address/download/folder1/filehuge1.1.dat | grep "Location: http://$FAKEURL2/folder1/filehuge1.1.dat" echo test cache-control curl --interface $na_interface -Is http://$hq_address/download/folder1/filebig1.1.dat | grep -i 'cache-control' @@ -114,11 +117,11 @@ test $rc -gt 0 echo unless /download is asked explicitly $mc9/curl -H 'User-Agent: Chromium/xyz' /download/folder1/ | grep file.json -echo check metalink/mirrorlist for huge files reference FAKEURL, but need to scan them first +echo check metalink/mirrorlist for huge files reference FAKEURL2, but need to scan them first $mc9/backstage/job -e folder_sync_schedule_from_misses $mc9/backstage/job -e folder_sync_schedule $mc9/backstage/shoot -curl --interface $hq_interface -s http://$hq_address/download/folder1/filehuge1.1.dat.metalink | grep "http://$FAKEURL/folder1/filehuge1.1.dat" -curl --interface $hq_interface -s http://$hq_address/download/folder1/filehuge1.1.dat.mirrorlist | grep "http://$FAKEURL/folder1/filehuge1.1.dat" +curl --interface $hq_interface -s http://$hq_address/download/folder1/filehuge1.1.dat.metalink | grep "http://$FAKEURL2/folder1/filehuge1.1.dat" +curl --interface $hq_interface -s http://$hq_address/download/folder1/filehuge1.1.dat.mirrorlist | grep "http://$FAKEURL2/folder1/filehuge1.1.dat" echo success