Commit 92e15071 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'pawel/reduce_cardinality_of_prometheus_metrics' into 'master'

Reduce cardinality of some of GitLab's Prometheus metrics and fix observed duration reporting.

Closes #41045

See merge request gitlab-org/gitlab-ce!15881
parents 866c97fb 10af36f0
---
title: Reduce the number of buckets in gitlab_cache_operation_duration_seconds metric
merge_request: 15881
author:
type: changed
......@@ -38,6 +38,7 @@ module Gitlab
# This is memoized since this method is called for every instrumented
# method. Loading data from an external cache on every method call slows
# things down too much.
# in milliseconds
@method_call_threshold ||= settings[:method_call_threshold]
end
......
......@@ -35,8 +35,8 @@ module Gitlab
@transaction = transaction
@name = name
@labels = { module: @module_name, method: @method_name }
@real_time = 0
@cpu_time = 0
@real_time = 0.0
@cpu_time = 0.0
@call_count = 0
end
......@@ -54,7 +54,7 @@ module Gitlab
@call_count += 1
if call_measurement_enabled? && above_threshold?
self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time / 1000.0)
self.class.call_duration_histogram.observe(@transaction.labels.merge(labels), real_time)
end
retval
......@@ -65,8 +65,8 @@ module Gitlab
Metric.new(
Instrumentation.series,
{
duration: real_time,
cpu_duration: cpu_time,
duration: real_time.in_milliseconds.to_i,
cpu_duration: cpu_time.in_milliseconds.to_i,
call_count: call_count
},
method: @name
......@@ -76,7 +76,7 @@ module Gitlab
# Returns true if the total runtime of this method exceeds the method call
# threshold.
def above_threshold?
real_time >= Metrics.method_call_threshold
real_time.in_milliseconds >= Metrics.method_call_threshold
end
def call_measurement_enabled?
......
......@@ -52,7 +52,7 @@ module Gitlab
metrics[:memory_usage].set(labels, System.memory_usage)
metrics[:file_descriptors].set(labels, System.file_descriptor_count)
metrics[:sampler_duration].observe(labels.merge(worker_label), (System.monotonic_time - start_time) / 1000.0)
metrics[:sampler_duration].observe(labels.merge(worker_label), System.monotonic_time - start_time)
ensure
GC::Profiler.clear
end
......
......@@ -66,7 +66,7 @@ module Gitlab
:gitlab_cache_operation_duration_seconds,
'Cache access time',
Transaction::BASE_LABELS.merge({ action: nil }),
[0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.500, 2.0, 10.0]
[0.001, 0.01, 0.1, 1, 10]
)
end
......
......@@ -35,27 +35,27 @@ module Gitlab
if Process.const_defined?(:CLOCK_THREAD_CPUTIME_ID)
def self.cpu_time
Process
.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :millisecond)
.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID, :float_second)
end
else
def self.cpu_time
Process
.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :millisecond)
.clock_gettime(Process::CLOCK_PROCESS_CPUTIME_ID, :float_second)
end
end
# Returns the current real time in a given precision.
#
# Returns the time as a Fixnum.
def self.real_time(precision = :millisecond)
# Returns the time as a Float for precision = :float_second.
def self.real_time(precision = :float_second)
Process.clock_gettime(Process::CLOCK_REALTIME, precision)
end
# Returns the current monotonic clock time in a given precision.
# Returns the current monotonic clock time as seconds with microseconds precision.
#
# Returns the time as a Fixnum.
def self.monotonic_time(precision = :millisecond)
Process.clock_gettime(Process::CLOCK_MONOTONIC, precision)
# Returns the time as a Float.
def self.monotonic_time
Process.clock_gettime(Process::CLOCK_MONOTONIC, :float_second)
end
end
end
......
......@@ -35,6 +35,10 @@ module Gitlab
@finished_at ? (@finished_at - @started_at) : 0.0
end
def duration_milliseconds
duration.in_milliseconds.to_i
end
def allocated_memory
@memory_after - @memory_before
end
......@@ -50,7 +54,7 @@ module Gitlab
@memory_after = System.memory_usage
@finished_at = System.monotonic_time
self.class.metric_transaction_duration_seconds.observe(labels, duration * 1000)
self.class.metric_transaction_duration_seconds.observe(labels, duration)
self.class.metric_transaction_allocated_memory_bytes.observe(labels, allocated_memory * 1024.0)
Thread.current[THREAD_KEY] = nil
......@@ -97,7 +101,7 @@ module Gitlab
end
def track_self
values = { duration: duration, allocated_memory: allocated_memory }
values = { duration: duration_milliseconds, allocated_memory: allocated_memory }
@values.each do |name, value|
values[name] = value
......
......@@ -96,14 +96,17 @@ describe Gitlab::Metrics::MethodCall do
describe '#to_metric' do
it 'returns a Metric instance' do
expect(method_call).to receive(:real_time).and_return(4.0001)
expect(method_call).to receive(:cpu_time).and_return(3.0001)
method_call.measure { 'foo' }
metric = method_call.to_metric
expect(metric).to be_an_instance_of(Gitlab::Metrics::Metric)
expect(metric.series).to eq('rails_method_calls')
expect(metric.values[:duration]).to be_a_kind_of(Numeric)
expect(metric.values[:cpu_duration]).to be_a_kind_of(Numeric)
expect(metric.values[:duration]).to eq(4000)
expect(metric.values[:cpu_duration]).to eq(3000)
expect(metric.values[:call_count]).to be_an(Integer)
expect(metric.tags).to eq({ method: 'Foo#bar' })
......@@ -116,13 +119,13 @@ describe Gitlab::Metrics::MethodCall do
end
it 'returns false when the total call time is not above the threshold' do
expect(method_call).to receive(:real_time).and_return(9)
expect(method_call).to receive(:real_time).and_return(0.009)
expect(method_call.above_threshold?).to eq(false)
end
it 'returns true when the total call time is above the threshold' do
expect(method_call).to receive(:real_time).and_return(9000)
expect(method_call).to receive(:real_time).and_return(9)
expect(method_call.above_threshold?).to eq(true)
end
......
......@@ -29,19 +29,19 @@ describe Gitlab::Metrics::System do
describe '.cpu_time' do
it 'returns a Fixnum' do
expect(described_class.cpu_time).to be_an(Integer)
expect(described_class.cpu_time).to be_an(Float)
end
end
describe '.real_time' do
it 'returns a Fixnum' do
expect(described_class.real_time).to be_an(Integer)
expect(described_class.real_time).to be_an(Float)
end
end
describe '.monotonic_time' do
it 'returns a Fixnum' do
expect(described_class.monotonic_time).to be_an(Integer)
it 'returns a Float' do
expect(described_class.monotonic_time).to be_an(Float)
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment