From 5d7bf0bad948d7755bfaf0886321fd893dfa870c Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 2 Oct 2024 03:19:53 +0200 Subject: [PATCH] remove index option from scripts and reimplement it for backup The index option is meant to control the index that has all of our ES data. But in the future, each document type will need to be in a separate index. So the idea of having a global configurable index doesn't make sense. The backup script needs to be able to operate on all indexes, and works generically with any type. Reimplement an index option for it, allowing multiple indexes to be specified. --- bin/cron/author.sh | 5 +-- bin/cron/backups.sh | 10 ------ lib/MetaCPAN/Model.pm | 2 +- lib/MetaCPAN/Role/Script.pm | 26 +------------- lib/MetaCPAN/Script/Backup.pm | 65 ++++++++++++++++++++-------------- lib/MetaCPAN/Types/TypeTiny.pm | 7 ++++ 6 files changed, 49 insertions(+), 66 deletions(-) diff --git a/bin/cron/author.sh b/bin/cron/author.sh index c94913643..e3f279773 100755 --- a/bin/cron/author.sh +++ b/bin/cron/author.sh @@ -3,7 +3,4 @@ # export ES_SCRIPT_INDEX=author_01 # /home/metacpan/bin/metacpan-api-carton-exec bin/metacpan author --index author_01 -export ES_SCRIPT_INDEX=cpan_v1_01 -/home/metacpan/bin/metacpan-api-carton-exec bin/metacpan author --index cpan_v1_01 - -unset ES_SCRIPT_INDEX \ No newline at end of file +/home/metacpan/bin/metacpan-api-carton-exec bin/metacpan author diff --git a/bin/cron/backups.sh b/bin/cron/backups.sh index 0c68600e9..16ed9f950 100755 --- a/bin/cron/backups.sh +++ b/bin/cron/backups.sh @@ -1,16 +1,6 @@ #!/bin/sh -#export ES_SCRIPT_INDEX=favorite_01 -#/home/metacpan/bin/metacpan-api-carton-exec bin/metacpan backup --index favorite_01 --type favorite - -#export ES_SCRIPT_INDEX=author_01 -#/home/metacpan/bin/metacpan-api-carton-exec bin/metacpan backup --index author_01 --type author - -export ES_SCRIPT_INDEX=cpan_v1_01 /home/metacpan/bin/metacpan-api-carton-exec bin/metacpan backup --index cpan_v1_01 --type favorite /home/metacpan/bin/metacpan-api-carton-exec bin/metacpan backup --index cpan_v1_01 --type author -export ES_SCRIPT_INDEX=user /home/metacpan/bin/metacpan-api-carton-exec bin/metacpan backup --index user - -unset ES_SCRIPT_INDEX \ No newline at end of file diff --git a/lib/MetaCPAN/Model.pm b/lib/MetaCPAN/Model.pm index 01058e600..3272711f8 100644 --- a/lib/MetaCPAN/Model.pm +++ b/lib/MetaCPAN/Model.pm @@ -44,7 +44,7 @@ analyzer edge => ( index cpan => ( namespace => 'MetaCPAN::Document', - alias_for => ( $ENV{'ES_SCRIPT_INDEX'} || 'cpan_v1_01' ), + alias_for => 'cpan_v1_01', shards => 3 ); diff --git a/lib/MetaCPAN/Role/Script.pm b/lib/MetaCPAN/Role/Script.pm index d09df44d5..5eb94d89b 100644 --- a/lib/MetaCPAN/Role/Script.pm +++ b/lib/MetaCPAN/Role/Script.pm @@ -86,16 +86,6 @@ has model => ( traits => ['NoGetopt'], ); -has index => ( - reader => '_index', - is => 'ro', - isa => Str, - lazy => 1, - default => 'cpan', - documentation => - 'Index to use, defaults to "cpan" (when used: also export ES_SCRIPT_INDEX)', -); - has cluster_info => ( isa => HashRef, traits => ['Hash'], @@ -158,20 +148,6 @@ has queue => ( documentation => 'add indexing jobs to the minion queue', ); -sub BUILDARGS { - my ( $self, @args ) = @_; - my %args = @args == 1 ? %{ $args[0] } : @args; - - if ( exists $args{index} ) { - die - "when setting --index, please export ES_SCRIPT_INDEX to the same value\n" - unless $ENV{ES_SCRIPT_INDEX} - and $args{index} eq $ENV{ES_SCRIPT_INDEX}; - } - - return \%args; -} - sub handle_error { my ( $self, $error, $die_always ) = @_; @@ -194,7 +170,7 @@ sub print_error { sub index { my $self = shift; - return $self->model->index( $self->_index ); + return $self->model->index('cpan'); } sub _build_model { diff --git a/lib/MetaCPAN/Script/Backup.pm b/lib/MetaCPAN/Script/Backup.pm index 9f0c7c3ca..700150f8a 100644 --- a/lib/MetaCPAN/Script/Backup.pm +++ b/lib/MetaCPAN/Script/Backup.pm @@ -8,7 +8,7 @@ use Cpanel::JSON::XS qw( decode_json encode_json ); use DateTime (); use IO::Zlib (); use Log::Contextual qw( :log :dlog ); -use MetaCPAN::Types::TypeTiny qw( Bool Int Path Str ); +use MetaCPAN::Types::TypeTiny qw( Bool Int Path Str CommaSepOption ); use Moose; use Try::Tiny qw( catch try ); @@ -22,6 +22,15 @@ has batch_size => ( 'Number of documents to restore in one batch, defaults to 100', ); +has index => ( + reader => '_index', + is => 'ro', + isa => CommaSepOption, + coerce => 1, + default => 'cpan', + documentation => 'ES indexes to backup, defaults to "cpan"', +); + has type => ( is => 'ro', isa => Str, @@ -61,34 +70,38 @@ sub run { return $self->run_restore if $self->restore; my $es = $self->es; - $self->index->refresh; - - my $filename = join( '-', - DateTime->now->strftime('%F'), - grep {defined} $self->index->name, - $self->type ); - - my $file = $self->home->child( qw(var backup), "$filename.json.gz" ); - $file->parent->mkpath unless ( -e $file->parent ); - my $fh = IO::Zlib->new( "$file", 'wb4' ); - - my $scroll = $es->scroll_helper( - index => $self->index->name, - $self->type ? ( type => $self->type ) : (), - size => $self->size, - fields => [qw(_parent _source)], - scroll => '1m', - body => { - sort => '_doc', - }, - ); - log_info { 'Backing up ', $scroll->total, ' documents' }; + for my $index ( @{ $self->_index } ) { + + $self->es->indices->refresh( index => $index ); - while ( my $result = $scroll->next ) { - print $fh encode_json($result), $/; + my $filename = join( '-', + DateTime->now->strftime('%F'), + grep {defined} $index, + $self->type ); + + my $file = $self->home->child( qw(var backup), "$filename.json.gz" ); + $file->parent->mkpath unless ( -e $file->parent ); + my $fh = IO::Zlib->new( "$file", 'wb4' ); + + my $scroll = $es->scroll_helper( + index => $index, + $self->type ? ( type => $self->type ) : (), + size => $self->size, + fields => [qw(_parent _source)], + scroll => '1m', + body => { + sort => '_doc', + }, + ); + + log_info { 'Backing up ', $scroll->total, ' documents' }; + + while ( my $result = $scroll->next ) { + print $fh encode_json($result), $/; + } + close $fh; } - close $fh; log_info {'done'}; } diff --git a/lib/MetaCPAN/Types/TypeTiny.pm b/lib/MetaCPAN/Types/TypeTiny.pm index 5985af286..d63ac5808 100644 --- a/lib/MetaCPAN/Types/TypeTiny.pm +++ b/lib/MetaCPAN/Types/TypeTiny.pm @@ -18,6 +18,8 @@ use Type::Library -base, -declare => ( qw( Logger HashRefCPANMeta + + CommaSepOption ) ); use Type::Utils qw( as coerce declare extends from via ); @@ -126,4 +128,9 @@ if ( eval { require MooseX::Getopt; 1 } ) { } } +declare CommaSepOption, as ArrayRef [ StrMatch [qr{^[^, ]+$}] ]; +coerce CommaSepOption, from ArrayRef [Str], via { + return [ map split(/\s*,\s*/), @$_ ]; +}; + 1;