diff --git a/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb b/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb index 15590ed..36057f1 100644 --- a/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb +++ b/lib/ecs_deploy/auto_scaler/auto_scaling_group_config.rb @@ -63,10 +63,14 @@ def cluster_resource_manager ) end + # NOTE: InstanceDrainer calls this method when it receives spot instance interruption warnings def detach_instances(instance_ids:, should_decrement_desired_capacity:) return if instance_ids.empty? - instance_ids.each_slice(MAX_DETACHABLE_INSTANCE_COUNT) do |ids| + # detach only detachable instances + detachable_instance_ids = instance_ids & describe_detachable_instances.map(&:instance_id) + + detachable_instance_ids.each_slice(MAX_DETACHABLE_INSTANCE_COUNT) do |ids| client.detach_instances( auto_scaling_group_name: name, instance_ids: ids, @@ -181,7 +185,9 @@ def describe_detachable_instances client.describe_auto_scaling_groups({ auto_scaling_group_names: [name] }).auto_scaling_groups[0].instances.reject do |i| # The lifecycle state of terminated instances becomes "Detaching", "Terminating", "Terminating:Wait", or "Terminating:Proceed", # and we can't detach instances in such a state. - i.lifecycle_state.start_with?("Terminating") || i.lifecycle_state == "Detaching" + i.lifecycle_state.start_with?("Terminating") || i.lifecycle_state == "Detaching" || + # EC2 instance sometimes stays in Pending state for more than 10 minutes + i.lifecycle_state == "Pending" end end diff --git a/spec/ecs_deploy/auto_scaler/auto_scaling_group_config_spec.rb b/spec/ecs_deploy/auto_scaler/auto_scaling_group_config_spec.rb index dca848b..49c4388 100644 --- a/spec/ecs_deploy/auto_scaler/auto_scaling_group_config_spec.rb +++ b/spec/ecs_deploy/auto_scaler/auto_scaling_group_config_spec.rb @@ -308,4 +308,85 @@ end end end + + describe "#detach_instances" do + subject(:auto_scaling_group_config) do + described_class.new({ + "name" => asg_name, + "region" => "ap-northeast-1", + "buffer" => 0, + "services" => [], + }, Logger.new(nil)) + end + + let(:asg_name) { "asg_name" } + let(:auto_scaling_group_instances) do + [ + Aws::AutoScaling::Types::Instance.new( + instance_id: "i-000000", + availability_zone: "ap-notrheast-1a", + lifecycle_state: "InService", + health_status: "Healthy", + launch_template: "launch_template", + protected_from_scale_in: true, + ), + Aws::AutoScaling::Types::Instance.new( + instance_id: "i-222222", + availability_zone: "ap-notrheast-1c", + lifecycle_state: "Standby", + health_status: "Healthy", + launch_template: "launch_template", + protected_from_scale_in: true, + ), + Aws::AutoScaling::Types::Instance.new( + instance_id: "i-333333", + availability_zone: "ap-notrheast-1c", + lifecycle_state: "Terminating", + health_status: "", + launch_template: "launch_template", + protected_from_scale_in: true, + ), + Aws::AutoScaling::Types::Instance.new( + instance_id: "i-444444", + availability_zone: "ap-notrheast-1c", + lifecycle_state: "Pending", + health_status: "", + launch_template: "launch_template", + protected_from_scale_in: true, + ), + ] + end + + before do + allow_any_instance_of(Aws::AutoScaling::Client).to receive(:describe_auto_scaling_groups).with( + auto_scaling_group_names: [asg_name], + ).and_return( + double( + auto_scaling_groups: [ + double( + desired_capacity: auto_scaling_group_instances.size, + instances: auto_scaling_group_instances.map do |i| + double( + availability_zone: i.availability_zone, + instance_id: i.instance_id, + lifecycle_state: i.lifecycle_state, + ) + end, + ) + ] + ) + ) + end + + it "detaches only detachable instances" do + expect_any_instance_of(Aws::AutoScaling::Client).to receive(:detach_instances).with( + auto_scaling_group_name: asg_name, + instance_ids: ["i-000000", "i-222222"], + should_decrement_desired_capacity: false, + ) + + auto_scaling_group_config.detach_instances(instance_ids: ["i-000000", "i-222222", "i-333333"], should_decrement_desired_capacity: false) + end + end + end