Commit ba4114d2 authored by Dylan Griffith's avatar Dylan Griffith

Refactor ingress IP address waiting code (#42643)

parent f0b27f9b
......@@ -5,6 +5,7 @@ module Clusters
include ::Clusters::Concerns::ApplicationCore
include ::Clusters::Concerns::ApplicationStatus
include AfterCommitQueue
default_value_for :ingress_type, :nginx
default_value_for :version, :nginx
......@@ -15,6 +16,15 @@ module Clusters
IP_ADDRESS_FETCH_RETRIES = 3
state_machine :status do
before_transition any => [:installed] do |application|
application.run_after_commit do
ClusterWaitForIngressIpAddressWorker.perform_in(
ClusterWaitForIngressIpAddressWorker::INTERVAL, application.name, application.id, IP_ADDRESS_FETCH_RETRIES)
end
end
end
def chart
'stable/nginx-ingress'
end
......@@ -26,11 +36,6 @@ module Clusters
def install_command
Gitlab::Kubernetes::Helm::InstallCommand.new(name, chart: chart, chart_values_file: chart_values_file)
end
def post_install
ClusterWaitForIngressIpAddressWorker.perform_in(
ClusterWaitForIngressIpAddressWorker::INTERVAL, name, id, IP_ADDRESS_FETCH_RETRIES)
end
end
end
end
......@@ -23,10 +23,6 @@ module Clusters
def name
self.class.application_name
end
def post_install
# Override for any extra work that needs to be done after install
end
end
end
end
......
module Clusters
module Applications
class CheckIngressIpAddressService < BaseHelmService
Error = Class.new(StandardError)
LEASE_TIMEOUT = 3.seconds.to_i
def execute(retries_remaining)
return if app.external_ip
return unless try_obtain_lease
def execute
return true if app.external_ip
return true unless try_obtain_lease
service = get_service
if service.status.loadBalancer.ingress
resolve_external_ip(service)
else
retry_if_necessary(retries_remaining)
false
end
rescue KubeException
retry_if_necessary(retries_remaining)
rescue KubeException => e
raise Error, "#{e.class}: #{e.message}"
end
private
......@@ -28,19 +30,12 @@ module Clusters
end
def resolve_external_ip(service)
app.update!( external_ip: service.status.loadBalancer.ingress[0].ip)
app.update!(external_ip: service.status.loadBalancer.ingress[0].ip)
end
def get_service
kubeclient.get_service('ingress-nginx-ingress-controller', Gitlab::Kubernetes::Helm::NAMESPACE)
end
def retry_if_necessary(retries_remaining)
if retries_remaining > 0
ClusterWaitForIngressIpAddressWorker.perform_in(
ClusterWaitForIngressIpAddressWorker::INTERVAL, app.name, app.id, retries_remaining - 1)
end
end
end
end
end
......@@ -20,7 +20,6 @@ module Clusters
def on_success
app.make_installed!
app.post_install
ensure
remove_installation_pod
end
......
......@@ -4,11 +4,22 @@ class ClusterWaitForIngressIpAddressWorker
include ClusterApplications
INTERVAL = 10.seconds
TIMEOUT = 20.minutes
def perform(app_name, app_id, retries_remaining)
find_application(app_name, app_id) do |app|
Clusters::Applications::CheckIngressIpAddressService.new(app).execute(retries_remaining)
result = Clusters::Applications::CheckIngressIpAddressService.new(app).execute
retry_if_necessary(app_name, app_id, retries_remaining) unless result
end
rescue Clusters::Applications::CheckIngressIpAddressService::Error => e
retry_if_necessary(app_name, app_id, retries_remaining)
raise e
end
private
def retry_if_necessary(app_name, app_id, retries_remaining)
if retries_remaining > 0
self.class.perform_in(INTERVAL, app_name, app_id, retries_remaining - 1)
end
end
end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddExternalIpToClustersApplicationsIngress < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
# When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the
# migration requires downtime.
# DOWNTIME_REASON = ''
# When using the methods "add_concurrent_index", "remove_concurrent_index" or
# "add_column_with_default" you must disable the use of transactions
# as these methods can not run in an existing transaction.
# When using "add_concurrent_index" or "remove_concurrent_index" methods make sure
# that either of them is the _only_ method called in the migration,
# any other changes should go in a separate migration.
# This ensures that upon failure _only_ the index creation or removing fails
# and can be retried or reverted easily.
#
# To disable transactions uncomment the following line and remove these
# comments:
# disable_ddl_transaction!
def change
add_column :clusters_applications_ingress, :external_ip, :string
end
......
......@@ -31,7 +31,7 @@
}
},
"status_reason": { "type": ["string", "null"] },
"external_ip": { "type": ["string", null] }
"external_ip": { "type": ["string", "null"] }
},
"required" : [ "name", "status" ]
}
......
......@@ -4,16 +4,19 @@ describe Clusters::Applications::Ingress do
it { is_expected.to belong_to(:cluster) }
it { is_expected.to validate_presence_of(:cluster) }
include_examples 'cluster application specs', described_class
before do
allow(ClusterWaitForIngressIpAddressWorker).to receive(:perform_in)
end
describe '#post_install' do
let(:application) { create(:clusters_applications_ingress, :installed) }
include_examples 'cluster application specs', described_class
describe '#make_installed!' do
before do
allow(ClusterWaitForIngressIpAddressWorker).to receive(:perform_in)
application.post_install
application.make_installed!
end
let(:application) { create(:clusters_applications_ingress, :installing) }
it 'schedules a ClusterWaitForIngressIpAddressWorker' do
expect(ClusterWaitForIngressIpAddressWorker).to have_received(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 3)
......
require 'spec_helper'
describe Clusters::Applications::CheckIngressIpAddressService do
subject { service.execute }
let(:application) { create(:clusters_applications_ingress, :installed) }
let(:service) { described_class.new(application) }
let(:kubeclient) { double(::Kubeclient::Client, get_service: kube_service) }
let(:ingress) { [{ ip: '111.222.111.222' }] }
let(:exclusive_lease) { instance_double(Gitlab::ExclusiveLease, try_obtain: true) }
let(:kube_service) do
::Kubeclient::Resource.new(
{
......@@ -14,9 +19,6 @@ describe Clusters::Applications::CheckIngressIpAddressService do
}
)
end
let(:kubeclient) { double(::Kubeclient::Client, get_service: kube_service) }
let(:ingress) { [{ ip: '111.222.111.222' }] }
let(:exclusive_lease) { instance_double(Gitlab::ExclusiveLease, try_obtain: true) }
before do
allow(application.cluster).to receive(:kubeclient).and_return(kubeclient)
......@@ -28,10 +30,10 @@ describe Clusters::Applications::CheckIngressIpAddressService do
describe '#execute' do
context 'when the ingress ip address is available' do
it 'updates the external_ip for the app and does not schedule another worker' do
expect(ClusterWaitForIngressIpAddressWorker).not_to receive(:perform_in)
it { is_expected.to eq(true) }
service.execute(1)
it 'updates the external_ip for the app' do
subject
expect(application.external_ip).to eq('111.222.111.222')
end
......@@ -40,21 +42,7 @@ describe Clusters::Applications::CheckIngressIpAddressService do
context 'when the ingress ip address is not available' do
let(:ingress) { nil }
it 'it schedules another worker with 1 less retry' do
expect(ClusterWaitForIngressIpAddressWorker)
.to receive(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 0)
service.execute(1)
end
context 'when no more retries remaining' do
it 'does not schedule another worker' do
expect(ClusterWaitForIngressIpAddressWorker).not_to receive(:perform_in)
service.execute(0)
end
end
it { is_expected.to eq(false) }
end
context 'when the exclusive lease cannot be obtained' do
......@@ -64,22 +52,24 @@ describe Clusters::Applications::CheckIngressIpAddressService do
.and_return(false)
end
it { is_expected.to eq(true) }
it 'does not call kubeclient' do
expect(kubeclient).not_to receive(:get_service)
subject
service.execute(1)
expect(kubeclient).not_to have_received(:get_service)
end
end
context 'when there is already an external_ip' do
let(:application) { create(:clusters_applications_ingress, :installed, external_ip: '001.111.002.111') }
it 'does nothing' do
expect(kubeclient).not_to receive(:get_service)
it { is_expected.to eq(true) }
service.execute(1)
it 'does not call kubeclient' do
subject
expect(application.external_ip).to eq('001.111.002.111')
expect(kubeclient).not_to have_received(:get_service)
end
end
......@@ -88,12 +78,9 @@ describe Clusters::Applications::CheckIngressIpAddressService do
allow(kubeclient).to receive(:get_service).and_raise(KubeException.new(500, 'something blew up', nil))
end
it 'it schedules another worker with 1 less retry' do
expect(ClusterWaitForIngressIpAddressWorker)
.to receive(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 0)
service.execute(1)
it 'it raises Clusters::Applications::CheckIngressIpAddressServiceError' do
expect { subject }
.to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "KubeException: something blew up")
end
end
end
......
......@@ -2,19 +2,94 @@ require 'spec_helper'
describe ClusterWaitForIngressIpAddressWorker do
describe '#perform' do
let(:service) { instance_double(Clusters::Applications::CheckIngressIpAddressService) }
let(:service) { instance_double(Clusters::Applications::CheckIngressIpAddressService, execute: true) }
let(:application) { instance_double(Clusters::Applications::Ingress) }
let(:worker) { described_class.new }
it 'finds the application and calls CheckIngressIpAddressService#execute' do
expect(worker).to receive(:find_application).with('ingress', 117).and_yield(application)
expect(Clusters::Applications::CheckIngressIpAddressService)
before do
allow(worker)
.to receive(:find_application)
.with('ingress', 117)
.and_yield(application)
allow(Clusters::Applications::CheckIngressIpAddressService)
.to receive(:new)
.with(application)
.and_return(service)
expect(service).to receive(:execute).with(2)
allow(described_class)
.to receive(:perform_in)
end
it 'finds the application and calls CheckIngressIpAddressService#execute' do
worker.perform('ingress', 117, 2)
expect(service).to have_received(:execute)
end
context 'when the service succeeds' do
it 'does not schedule another worker' do
worker.perform('ingress', 117, 2)
expect(described_class)
.not_to have_received(:perform_in)
end
end
context 'when the service fails' do
before do
allow(service)
.to receive(:execute)
.and_return(false)
end
context 'when there are retries remaining' do
it 'schedules another worker with 1 less retry' do
worker.perform('ingress', 117, 2)
expect(described_class)
.to have_received(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', 117, 1)
end
end
context 'when there are no retries_remaining' do
it 'does not schedule another worker' do
worker.perform('ingress', 117, 0)
expect(described_class)
.not_to have_received(:perform_in)
end
end
end
context 'when the update raises exception' do
before do
allow(service)
.to receive(:execute)
.and_raise(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
end
context 'when there are retries remaining' do
it 'schedules another worker with 1 less retry and re-raises the error' do
expect { worker.perform('ingress', 117, 2) }
.to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
expect(described_class)
.to have_received(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', 117, 1)
end
end
context 'when there are no retries_remaining' do
it 'does not schedule another worker but re-raises the error' do
expect { worker.perform('ingress', 117, 0) }
.to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
expect(described_class)
.not_to have_received(:perform_in)
end
end
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment