diff --git a/Gemfile b/Gemfile index 515b42dc3847cb0e3b6761b5e51fc45d53465fe8..9ef4b79137553699f9cdfc47b0532f4fc26fa5ce 100644 --- a/Gemfile +++ b/Gemfile @@ -304,6 +304,12 @@ group :metrics do gem 'raindrops', '~> 0.18' end +group :tracing do + # OpenTracing + gem 'opentracing', '~> 0.4.3' + gem 'jaeger-client', '~> 0.10.0' +end + group :development do gem 'foreman', '~> 0.84.0' gem 'brakeman', '~> 4.2', require: false diff --git a/Gemfile.lock b/Gemfile.lock index fda6e8ff9753e90c202dc4c19b9c312d2b0777cf..122e22af167680f286934d4d643bc1154f42607a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -392,6 +392,9 @@ GEM cause json ipaddress (0.8.3) + jaeger-client (0.10.0) + opentracing (~> 0.3) + thrift jira-ruby (1.4.1) activesupport multipart-post @@ -547,6 +550,7 @@ GEM activesupport nokogiri (>= 1.4.4) omniauth (~> 1.0) + opentracing (0.4.3) org-ruby (0.9.12) rubypants (~> 0.2) orm_adapter (0.5.0) @@ -870,6 +874,7 @@ GEM rack (>= 1, < 3) thor (0.19.4) thread_safe (0.3.6) + thrift (0.11.0.0) tilt (2.0.8) timecop (0.8.1) timfel-krb5-auth (0.8.3) @@ -1040,6 +1045,7 @@ DEPENDENCIES httparty (~> 0.13.3) icalendar influxdb (~> 0.2) + jaeger-client (~> 0.10.0) jira-ruby (~> 1.4) jquery-atwho-rails (~> 1.3.2) js_regex (~> 2.2.1) @@ -1080,6 +1086,7 @@ DEPENDENCIES omniauth-shibboleth (~> 1.3.0) omniauth-twitter (~> 1.4) omniauth_crowd (~> 2.2.0) + opentracing (~> 0.4.3) org-ruby (~> 0.9.12) peek (~> 1.0.1) peek-gc (~> 0.0.2) diff --git a/changelogs/unreleased/an-opentracing-factory.yml b/changelogs/unreleased/an-opentracing-factory.yml new file mode 100644 index 0000000000000000000000000000000000000000..c04736f3e63bc64760ba108481e592b0c984fb45 --- /dev/null +++ b/changelogs/unreleased/an-opentracing-factory.yml @@ -0,0 +1,5 @@ +--- +title: Conditionally initialize the global opentracing tracer +merge_request: 24186 +author: +type: other diff --git a/config/initializers/tracing.rb b/config/initializers/tracing.rb new file mode 100644 index 0000000000000000000000000000000000000000..be95f30d075386627069179eec5d13007ece1db3 --- /dev/null +++ b/config/initializers/tracing.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +if Gitlab::Tracing.enabled? + require 'opentracing' + + # In multi-processed clustered architectures (puma, unicorn) don't + # start tracing until the worker processes are spawned. This works + # around issues when the opentracing implementation spawns threads + Gitlab::Cluster::LifecycleEvents.on_worker_start do + tracer = Gitlab::Tracing::Factory.create_tracer(Gitlab.process_name, Gitlab::Tracing.connection_string) + OpenTracing.global_tracer = tracer if tracer + end +end diff --git a/lib/gitlab/tracing.rb b/lib/gitlab/tracing.rb new file mode 100644 index 0000000000000000000000000000000000000000..3c4db42ac066807ab8a18d0942f2ea68d67e49c8 --- /dev/null +++ b/lib/gitlab/tracing.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module Tracing + # Only enable tracing when the `GITLAB_TRACING` env var is configured. Note that we avoid using ApplicationSettings since + # the same environment variable needs to be configured for Workhorse, Gitaly and any other components which + # emit tracing. Since other components may start before Rails, and may not have access to ApplicationSettings, + # an env var makes more sense. + def self.enabled? + connection_string.present? + end + + def self.connection_string + ENV['GITLAB_TRACING'] + end + end +end diff --git a/lib/gitlab/tracing/factory.rb b/lib/gitlab/tracing/factory.rb new file mode 100644 index 0000000000000000000000000000000000000000..fc714164353e1a8caf7b1f91999c085a406ad248 --- /dev/null +++ b/lib/gitlab/tracing/factory.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +require "cgi" + +module Gitlab + module Tracing + class Factory + OPENTRACING_SCHEME = "opentracing" + + def self.create_tracer(service_name, connection_string) + return unless connection_string.present? + + begin + opentracing_details = parse_connection_string(connection_string) + driver_name = opentracing_details[:driver_name] + + case driver_name + when "jaeger" + JaegerFactory.create_tracer(service_name, opentracing_details[:options]) + else + raise "Unknown driver: #{driver_name}" + end + rescue => e + # Can't create the tracer? Warn and continue sans tracer + warn "Unable to instantiate tracer: #{e}" + nil + end + end + + def self.parse_connection_string(connection_string) + parsed = URI.parse(connection_string) + + unless valid_uri?(parsed) + raise "Invalid tracing connection string" + end + + { + driver_name: parsed.host, + options: parse_query(parsed.query) + } + end + private_class_method :parse_connection_string + + def self.parse_query(query) + return {} unless query + + CGI.parse(query).symbolize_keys.transform_values(&:first) + end + private_class_method :parse_query + + def self.valid_uri?(uri) + return false unless uri + + uri.scheme == OPENTRACING_SCHEME && + uri.host.to_s =~ /^[a-z0-9_]+$/ && + uri.path.empty? + end + private_class_method :valid_uri? + end + end +end diff --git a/lib/gitlab/tracing/jaeger_factory.rb b/lib/gitlab/tracing/jaeger_factory.rb new file mode 100644 index 0000000000000000000000000000000000000000..0726f6b67f4e2be7325d1dbf46b44c06241bb985 --- /dev/null +++ b/lib/gitlab/tracing/jaeger_factory.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +require 'jaeger/client' + +module Gitlab + module Tracing + class JaegerFactory + # When the probabilistic sampler is used, by default 0.1% of requests will be traced + DEFAULT_PROBABILISTIC_RATE = 0.001 + + # The default port for the Jaeger agent UDP listener + DEFAULT_UDP_PORT = 6831 + + # Reduce this from default of 10 seconds as the Ruby jaeger + # client doesn't have overflow control, leading to very large + # messages which fail to send over UDP (max packet = 64k) + # Flush more often, with smaller packets + FLUSH_INTERVAL = 5 + + def self.create_tracer(service_name, options) + kwargs = { + service_name: service_name, + sampler: get_sampler(options[:sampler], options[:sampler_param]), + reporter: get_reporter(service_name, options[:http_endpoint], options[:udp_endpoint]) + } + + extra_params = options.except(:sampler, :sampler_param, :http_endpoint, :udp_endpoint, :strict_parsing, :debug) # rubocop: disable CodeReuse/ActiveRecord + if extra_params.present? + message = "jaeger tracer: invalid option: #{extra_params.keys.join(", ")}" + + if options[:strict_parsing] + raise message + else + warn message + end + end + + Jaeger::Client.build(kwargs) + end + + def self.get_sampler(sampler_type, sampler_param) + case sampler_type + when "probabilistic" + sampler_rate = sampler_param ? sampler_param.to_f : DEFAULT_PROBABILISTIC_RATE + Jaeger::Samplers::Probabilistic.new(rate: sampler_rate) + when "const" + const_value = sampler_param == "1" + Jaeger::Samplers::Const.new(const_value) + else + nil + end + end + private_class_method :get_sampler + + def self.get_reporter(service_name, http_endpoint, udp_endpoint) + encoder = Jaeger::Encoders::ThriftEncoder.new(service_name: service_name) + + if http_endpoint.present? + sender = get_http_sender(encoder, http_endpoint) + elsif udp_endpoint.present? + sender = get_udp_sender(encoder, udp_endpoint) + else + return nil + end + + Jaeger::Reporters::RemoteReporter.new( + sender: sender, + flush_interval: FLUSH_INTERVAL + ) + end + private_class_method :get_reporter + + def self.get_http_sender(encoder, address) + Jaeger::HttpSender.new( + url: address, + encoder: encoder, + logger: Logger.new(STDOUT) + ) + end + private_class_method :get_http_sender + + def self.get_udp_sender(encoder, address) + pair = address.split(":", 2) + host = pair[0] + port = pair[1] ? pair[1].to_i : DEFAULT_UDP_PORT + + Jaeger::UdpSender.new( + host: host, + port: port, + encoder: encoder, + logger: Logger.new(STDOUT) + ) + end + private_class_method :get_udp_sender + end + end +end diff --git a/spec/lib/gitlab/tracing/factory_spec.rb b/spec/lib/gitlab/tracing/factory_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..945490f0988ec31fd83de1fede51be46092bc757 --- /dev/null +++ b/spec/lib/gitlab/tracing/factory_spec.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +describe Gitlab::Tracing::Factory do + describe '.create_tracer' do + let(:service_name) { 'rspec' } + + context "when tracing is not configured" do + it 'ignores null connection strings' do + expect(described_class.create_tracer(service_name, nil)).to be_nil + end + + it 'ignores empty connection strings' do + expect(described_class.create_tracer(service_name, '')).to be_nil + end + + it 'ignores unknown implementations' do + expect(described_class.create_tracer(service_name, 'opentracing://invalid_driver')).to be_nil + end + + it 'ignores invalid connection strings' do + expect(described_class.create_tracer(service_name, 'open?tracing')).to be_nil + end + end + + context "when tracing is configured with jaeger" do + let(:mock_tracer) { double('tracer') } + + it 'processes default connections' do + expect(Gitlab::Tracing::JaegerFactory).to receive(:create_tracer).with(service_name, {}).and_return(mock_tracer) + + expect(described_class.create_tracer(service_name, 'opentracing://jaeger')).to be(mock_tracer) + end + + it 'processes connections with parameters' do + expect(Gitlab::Tracing::JaegerFactory).to receive(:create_tracer).with(service_name, { a: '1', b: '2', c: '3' }).and_return(mock_tracer) + + expect(described_class.create_tracer(service_name, 'opentracing://jaeger?a=1&b=2&c=3')).to be(mock_tracer) + end + end + end +end diff --git a/spec/lib/gitlab/tracing/jaeger_factory_spec.rb b/spec/lib/gitlab/tracing/jaeger_factory_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..3bffeb2883058ce7e7f96ea7fc5d777ac1614ed0 --- /dev/null +++ b/spec/lib/gitlab/tracing/jaeger_factory_spec.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +require 'fast_spec_helper' + +describe Gitlab::Tracing::JaegerFactory do + describe '.create_tracer' do + let(:service_name) { 'rspec' } + + it 'processes default connections' do + expect(described_class.create_tracer(service_name, {})).to respond_to(:active_span) + end + + it 'handles debug options' do + expect(described_class.create_tracer(service_name, { debug: "1" })).to respond_to(:active_span) + end + + it 'handles const sampler' do + expect(described_class.create_tracer(service_name, { sampler: "const", sampler_param: "1" })).to respond_to(:active_span) + end + + it 'handles probabilistic sampler' do + expect(described_class.create_tracer(service_name, { sampler: "probabilistic", sampler_param: "0.5" })).to respond_to(:active_span) + end + + it 'handles http_endpoint configurations' do + expect(described_class.create_tracer(service_name, { http_endpoint: "http://localhost:1234" })).to respond_to(:active_span) + end + + it 'handles udp_endpoint configurations' do + expect(described_class.create_tracer(service_name, { udp_endpoint: "localhost:4321" })).to respond_to(:active_span) + end + + it 'ignores invalid parameters' do + expect(described_class.create_tracer(service_name, { invalid: "true" })).to respond_to(:active_span) + end + + it 'accepts the debug parameter when strict_parser is set' do + expect(described_class.create_tracer(service_name, { debug: "1", strict_parsing: "1" })).to respond_to(:active_span) + end + + it 'rejects invalid parameters when strict_parser is set' do + expect { described_class.create_tracer(service_name, { invalid: "true", strict_parsing: "1" }) }.to raise_error(StandardError) + end + end +end