Commit 1065f8ce authored by Andrew Newdigate's avatar Andrew Newdigate

Add experimental support for Puma

This allows us (and others) to test drive Puma without it affecting all
users. Puma can be enabled by setting the environment variable
"EXPERIMENTAL_PUMA" to a non empty value.
parent 605e952e
......@@ -40,6 +40,7 @@ eslint-report.html
/config/redis.queues.yml
/config/redis.shared_state.yml
/config/unicorn.rb
/config/puma.rb
/config/secrets.yml
/config/sidekiq.yml
/config/registry.key
......
......@@ -153,6 +153,11 @@ group :unicorn do
gem 'unicorn-worker-killer', '~> 0.4.4'
end
group :puma do
gem 'puma', '~> 3.12', require: false
gem 'puma_worker_killer', require: false
end
# State machine
gem 'state_machines-activerecord', '~> 0.5.1'
......
......@@ -598,6 +598,10 @@ GEM
pry-rails (0.3.6)
pry (>= 0.10.4)
public_suffix (3.0.3)
puma (3.12.0)
puma_worker_killer (0.1.0)
get_process_mem (~> 0.2)
puma (>= 2.7, < 4)
pyu-ruby-sasl (0.0.3.3)
rack (1.6.10)
rack-accept (0.4.5)
......@@ -1076,6 +1080,8 @@ DEPENDENCIES
prometheus-client-mmap (~> 0.9.4)
pry-byebug (~> 3.4.1)
pry-rails (~> 0.3.4)
puma (~> 3.12)
puma_worker_killer
rack-attack (~> 4.4.1)
rack-cors (~> 1.0.0)
rack-oauth2 (~> 1.2.1)
......
......@@ -602,6 +602,10 @@ GEM
pry-rails (0.3.6)
pry (>= 0.10.4)
public_suffix (3.0.3)
puma (3.12.0)
puma_worker_killer (0.1.0)
get_process_mem (~> 0.2)
puma (>= 2.7, < 4)
pyu-ruby-sasl (0.0.3.3)
rack (2.0.5)
rack-accept (0.4.5)
......@@ -1085,6 +1089,8 @@ DEPENDENCIES
prometheus-client-mmap (~> 0.9.4)
pry-byebug (~> 3.4.1)
pry-rails (~> 0.3.4)
puma (~> 3.12)
puma_worker_killer
rack-attack (~> 4.4.1)
rack-cors (~> 1.0.0)
rack-oauth2 (~> 1.2.1)
......
......@@ -3,6 +3,11 @@
cd $(dirname $0)/..
app_root=$(pwd)
# Switch to experimental PUMA configuration
if [ -n "${EXPERIMENTAL_PUMA}" ]; then
exec bin/web_puma "$@"
fi
unicorn_pidfile="$app_root/tmp/pids/unicorn.pid"
unicorn_config="$app_root/config/unicorn.rb"
unicorn_cmd="bundle exec unicorn_rails -c $unicorn_config -E $RAILS_ENV"
......
#!/bin/sh
set -e
cd $(dirname $0)/..
app_root=$(pwd)
puma_pidfile="$app_root/tmp/pids/puma.pid"
puma_config="$app_root/config/puma.rb"
spawn_puma()
{
exec bundle exec puma --config "${puma_config}" "$@"
}
get_puma_pid()
{
pid=$(cat "${puma_pidfile}")
if [ -z "$pid" ] ; then
echo "Could not find a PID in $puma_pidfile"
exit 1
fi
echo "${pid}"
}
start()
{
spawn_puma -d
}
start_foreground()
{
spawn_puma
}
stop()
{
get_puma_pid
kill -QUIT "$(get_puma_pid)"
}
reload()
{
kill -USR2 "$(get_puma_pid)"
}
case "$1" in
start)
start
;;
start_foreground)
start_foreground
;;
stop)
stop
;;
reload)
reload
;;
*)
echo "Usage: RAILS_ENV=your_env $0 {start|stop|reload}"
;;
esac
---
title: Experimental support for running Puma multithreaded web-server
merge_request: 22372
author:
type: performance
......@@ -26,9 +26,25 @@ Sidekiq.configure_server do |config|
end
if !Rails.env.test? && Gitlab::Metrics.prometheus_metrics_enabled?
unless Sidekiq.server?
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
Gitlab::Cluster::LifecycleEvents.on_worker_start do
defined?(::Prometheus::Client.reinitialize_on_pid_change) && Prometheus::Client.reinitialize_on_pid_change
unless Sidekiq.server?
Gitlab::Metrics::Samplers::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_sampler_interval).start
end
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
end
end
Gitlab::Metrics::Samplers::RubySampler.initialize_instance(Settings.monitoring.ruby_sampler_interval).start
Gitlab::Cluster::LifecycleEvents.on_master_restart do
# The following is necessary to ensure stale Prometheus metrics don't
# accumulate over time. It needs to be done in this hook as opposed to
# inside an init script to ensure metrics files aren't deleted after new
# unicorn workers start after a SIGUSR2 is received.
prometheus_multiproc_dir = ENV['prometheus_multiproc_dir']
if prometheus_multiproc_dir
old_metrics = Dir[File.join(prometheus_multiproc_dir, '*.db')]
FileUtils.rm_rf(old_metrics)
end
end
......@@ -158,7 +158,9 @@ if Gitlab::Metrics.enabled? && !Rails.env.test?
GC::Profiler.enable
Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start
Gitlab::Cluster::LifecycleEvents.on_worker_start do
Gitlab::Metrics::Samplers::InfluxSampler.initialize_instance.start
end
module TrackNewRedisConnections
def connect(*args)
......
# frozen_string_literal: true
# Don't handle sidekiq configuration as it
# has its own special active record configuration here
if defined?(ActiveRecord::Base) && !Sidekiq.server?
Gitlab::Cluster::LifecycleEvents.on_worker_start do
ActiveSupport.on_load(:active_record) do
ActiveRecord::Base.establish_connection
Rails.logger.debug("ActiveRecord connection established")
end
end
end
if defined?(ActiveRecord::Base)
Gitlab::Cluster::LifecycleEvents.on_before_fork do
# the following is highly recommended for Rails + "preload_app true"
# as there's no need for the master process to hold a connection
ActiveRecord::Base.connection.disconnect!
Rails.logger.debug("ActiveRecord connection disconnected")
end
end
# frozen_string_literal: true
if /darwin/ =~ RUBY_PLATFORM
Gitlab::Cluster::LifecycleEvents.on_before_fork do
require 'fiddle'
# Dynamically load Foundation.framework, ~implicitly~ initialising
# the Objective-C runtime before any forking happens in Unicorn
#
# From https://bugs.ruby-lang.org/issues/14009
Fiddle.dlopen '/System/Library/Frameworks/Foundation.framework/Foundation'
end
end
# frozen_string_literal: true
if ENV['ENABLE_RBTRACE']
Gitlab::Cluster::LifecycleEvents.on_worker_start do
# Unicorn clears out signals before it forks, so rbtrace won't work
# unless it is enabled after the fork.
require 'rbtrace'
end
end
......@@ -14,8 +14,6 @@ Sidekiq.default_worker_options = { retry: 3 }
enable_json_logs = Gitlab.config.sidekiq.log_format == 'json'
Sidekiq.configure_server do |config|
require 'rbtrace' if ENV['ENABLE_RBTRACE']
config.redis = queues_config_hash
config.server_middleware do |chain|
......
# frozen_string_literal: true
# -----------------------------------------------------------------------
# This file is used by the GDK to generate a default config/puma.rb file
# Note that `/home/git` will be substituted for the actual GDK root
# directory when this file is generated
# -----------------------------------------------------------------------
# Load "path" as a rackup file.
#
# The default is "config.ru".
#
rackup 'config.ru'
pidfile '/home/git/gitlab/tmp/pids/puma.pid'
state_path '/home/git/gitlab/tmp/pids/puma.state'
stdout_redirect '/home/git/gitlab/log/puma.stdout.log',
'/home/git/gitlab/log/puma.stderr.log',
true
# Configure "min" to be the minimum number of threads to use to answer
# requests and "max" the maximum.
#
# The default is "0, 16".
#
threads 1, 4
# By default, workers accept all requests and queue them to pass to handlers.
# When false, workers accept the number of simultaneous requests configured.
#
# Queueing requests generally improves performance, but can cause deadlocks if
# the app is waiting on a request to itself. See https://github.com/puma/puma/issues/612
#
# When set to false this may require a reverse proxy to handle slow clients and
# queue requests before they reach puma. This is due to disabling HTTP keepalive
queue_requests false
# Bind the server to "url". "tcp://", "unix://" and "ssl://" are the only
# accepted protocols.
bind 'unix:///home/git/gitlab.socket'
workers 2
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer"
on_restart do
# Signal application hooks that we're about to restart
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
before_fork do
# Signal to the puma killer
Gitlab::Cluster::PumaWorkerKillerInitializer.start @config.options unless ENV['DISABLE_PUMA_WORKER_KILLER']
# Signal application hooks that we're about to fork
Gitlab::Cluster::LifecycleEvents.do_before_fork
end
Gitlab::Cluster::LifecycleEvents.set_puma_options @config.options
on_worker_boot do
# Signal application hooks of worker start
Gitlab::Cluster::LifecycleEvents.do_worker_start
end
# Preload the application before starting the workers; this conflicts with
# phased restart feature. (off by default)
preload_app!
tag 'gitlab-puma-worker'
# Verifies that all workers have checked in to the master process within
# the given timeout. If not the worker process will be restarted. Default
# value is 60 seconds.
#
worker_timeout 60
......@@ -81,22 +81,16 @@ preload_app true
# fast LAN.
check_client_connection false
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
before_exec do |server|
# The following is necessary to ensure stale Prometheus metrics don't
# accumulate over time. It needs to be done in this hook as opposed to
# inside an init script to ensure metrics files aren't deleted after new
# unicorn workers start after a SIGUSR2 is received.
if ENV['prometheus_multiproc_dir']
old_metrics = Dir[File.join(ENV['prometheus_multiproc_dir'], '*.db')]
FileUtils.rm_rf(old_metrics)
end
# Signal application hooks that we're about to restart
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
before_fork do |server, worker|
# the following is highly recommended for Rails + "preload_app true"
# as there's no need for the master process to hold a connection
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.connection.disconnect!
# Signal application hooks that we're about to fork
Gitlab::Cluster::LifecycleEvents.do_before_fork
# The following is only recommended for memory/DB-constrained
# installations. It is not needed if your system can house
......@@ -124,25 +118,10 @@ before_fork do |server, worker|
end
after_fork do |server, worker|
# Unicorn clears out signals before it forks, so rbtrace won't work
# unless it is enabled after the fork.
require 'rbtrace' if ENV['ENABLE_RBTRACE']
# Signal application hooks of worker start
Gitlab::Cluster::LifecycleEvents.do_worker_start
# per-process listener ports for debugging/admin/migrations
# addr = "127.0.0.1:#{9293 + worker.nr}"
# server.listen(addr, :tries => -1, :delay => 5, :tcp_nopush => true)
# the following is *required* for Rails + "preload_app true",
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.establish_connection
# reset prometheus client, this will cause any opened metrics files to be closed
defined?(::Prometheus::Client.reinitialize_on_pid_change) &&
Prometheus::Client.reinitialize_on_pid_change
# if preload_app is true, then you may also want to check and
# restart any other shared sockets/descriptors such as Memcached,
# and Redis. TokyoCabinet file handles are safe to reuse
# between any number of forked children (assuming your kernel
# correctly implements pread()/pwrite() system calls)
end
# frozen_string_literal: true
# -------------------------------------------------------------------------
# This file is used by the GDK to generate a default config/unicorn.rb file
# Note that `/home/git` will be substituted for the actual GDK root
# directory when this file is generated
# -------------------------------------------------------------------------
worker_processes 2
timeout 60
listen '/home/git/gitlab.socket'
preload_app true
check_client_connection false
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
before_exec do |server|
# Signal application hooks that we're about to restart
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
before_fork do |server, worker|
# the following is highly recommended for Rails + "preload_app true"
# as there's no need for the master process to hold a connection
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.connection.disconnect!
if /darwin/ =~ RUBY_PLATFORM
require 'fiddle'
# Dynamically load Foundation.framework, ~implicitly~ initialising
# the Objective-C runtime before any forking happens in Unicorn
#
# From https://bugs.ruby-lang.org/issues/14009
Fiddle.dlopen '/System/Library/Frameworks/Foundation.framework/Foundation'
# Signal application hooks that we're about to fork
Gitlab::Cluster::LifecycleEvents.do_before_fork
# The following is only recommended for memory/DB-constrained
# installations. It is not needed if your system can house
# twice as many worker_processes as you have configured.
#
# This allows a new master process to incrementally
# phase out the old master process with SIGTTOU to avoid a
# thundering herd (especially in the "preload_app false" case)
# when doing a transparent upgrade. The last worker spawned
# will then kill off the old master process with a SIGQUIT.
old_pid = "#{server.config[:pid]}.oldbin"
if old_pid != server.pid
begin
sig = (worker.nr + 1) >= server.worker_processes ? :QUIT : :TTOU
Process.kill(sig, File.read(old_pid).to_i)
rescue Errno::ENOENT, Errno::ESRCH
end
end
#
# Throttle the master from forking too quickly by sleeping. Due
# to the implementation of standard Unix signal handlers, this
# helps (but does not completely) prevent identical, repeated signals
# from being lost when the receiving process is busy.
# sleep 1
end
after_fork do |server, worker|
# Unicorn clears out signals before it forks, so rbtrace won't work
# unless it is enabled after the fork.
require 'rbtrace' if ENV['ENABLE_RBTRACE']
# Signal application hooks of worker start
Gitlab::Cluster::LifecycleEvents.do_worker_start
# the following is *required* for Rails + "preload_app true",
defined?(ActiveRecord::Base) &&
ActiveRecord::Base.establish_connection
# per-process listener ports for debugging/admin/migrations
# addr = "127.0.0.1:#{9293 + worker.nr}"
# server.listen(addr, :tries => -1, :delay => 5, :tcp_nopush => true)
end
This diff is collapsed.
# frozen_string_literal: true
module Gitlab
module Cluster
#
# LifecycleEvents lets Rails initializers register application startup hooks
# that are sensitive to forking. For example, to defer the creation of
# watchdog threads. This lets us abstract away the Unix process
# lifecycles of Unicorn, Sidekiq, Puma, Puma Cluster, etc.
#
# We have three lifecycle events.
#
# - before_fork (only in forking processes)
# - worker_start
# - before_master_restart (only in forking processes)
#
# Blocks will be executed in the order in which they are registered.
#
class LifecycleEvents
class << self
#
# Hook registration methods (called from initializers)
#
def on_worker_start(&block)
if in_clustered_environment?
# Defer block execution
(@worker_start_hooks ||= []) << block
else
yield
end
end
def on_before_fork(&block)
return unless in_clustered_environment?
# Defer block execution
(@before_fork_hooks ||= []) << block
end
def on_master_restart(&block)
return unless in_clustered_environment?
# Defer block execution
(@master_restart_hooks ||= []) << block
end
#
# Lifecycle integration methods (called from unicorn.rb, puma.rb, etc.)
#
def do_worker_start
@worker_start_hooks&.each do |block|
block.call
end
end
def do_before_fork
@before_fork_hooks&.each do |block|
block.call
end
end
def do_master_restart
@master_restart_hooks && @master_restart_hooks.each do |block|
block.call
end
end
# Puma doesn't use singletons (which is good) but
# this means we need to pass through whether the
# puma server is running in single mode or cluster mode
def set_puma_options(options)
@puma_options = options
end
private
def in_clustered_environment?
# Sidekiq doesn't fork
return false if Sidekiq.server?
# Unicorn always forks
return true if defined?(::Unicorn)
# Puma sometimes forks
return true if in_clustered_puma?
# Default assumption is that we don't fork
false
end
def in_clustered_puma?
return false unless defined?(::Puma)
@puma_options && @puma_options[:workers] && @puma_options[:workers] > 0
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Cluster
class PumaWorkerKillerInitializer
def self.start(puma_options, puma_per_worker_max_memory_mb: 650)
require 'puma_worker_killer'
PumaWorkerKiller.config do |config|
# Note! ram is expressed in megabytes (whereas GITLAB_UNICORN_MEMORY_MAX is in bytes)
# Importantly RAM is for _all_workers (ie, the cluster),
# not each worker as is the case with GITLAB_UNICORN_MEMORY_MAX
worker_count = puma_options[:workers] || 1
config.ram = worker_count * puma_per_worker_max_memory_mb
config.frequency = 20 # seconds
# We just want to limit to a fixed maximum, unrelated to the total amount
# of available RAM.
config.percent_usage = 0.98
# Ideally we'll never hit the maximum amount of memory. If so the worker
# is restarted already, thus periodically restarting workers shouldn't be
# needed.
config.rolling_restart_frequency = false
end
PumaWorkerKiller.start
end
end
end
end
# frozen_string_literal: true
app = proc do |env|
if env['REQUEST_METHOD'] == 'GET'
[200, {}, ["#{Process.pid}"]]
else
Process.kill(env['QUERY_STRING'], Process.pid)
[200, {}, ['Bye!']]
end
end
run app
# frozen_string_literal: true
# Note: this file is used for testing puma in `spec/rack_servers/puma_spec.rb` only
# Note: as per the convention in `config/puma.example.development.rb`,
# this file will replace `/home/git` with the actual working directory
directory '/home/git'
threads 1, 10
queue_requests false
pidfile '/home/git/gitlab/tmp/pids/puma.pid'
bind 'unix:///home/git/gitlab/tmp/tests/puma.socket'
workers 1
preload_app!
worker_timeout 60
require_relative "/home/git/gitlab/lib/gitlab/cluster/lifecycle_events"
require_relative "/home/git/gitlab/lib/gitlab/cluster/puma_worker_killer_initializer"
before_fork do
Gitlab::Cluster::PumaWorkerKillerInitializer.start @config.options
Gitlab::Cluster::LifecycleEvents.do_before_fork
end
Gitlab::Cluster::LifecycleEvents.set_puma_options @config.options
on_worker_boot do
Gitlab::Cluster::LifecycleEvents.do_worker_start
File.write('/home/git/gitlab/tmp/tests/puma-worker-ready', Process.pid)
end
on_restart do
Gitlab::Cluster::LifecycleEvents.do_master_restart
end
# frozen_string_literal: true
require 'fileutils'
require 'excon'
require 'spec_helper'
describe 'Puma' do
before(:all) do
project_root = File.expand_path('../..', __dir__)
config_lines = File.read('spec/rack_servers/configs/puma.rb')
.gsub('/home/git/gitlab', project_root)
.gsub('/home/git', project_root)
config_path = File.join(project_root, "tmp/tests/puma.rb")
@socket_path = File.join(project_root, 'tmp/tests/puma.socket')
File.write(config_path, config_lines)
cmd = %W[puma -e test -C #{config_path} #{File.join(__dir__, 'configs/config.ru')}]
@puma_master_pid = spawn(*cmd)
wait_puma_boot!(@puma_master_pid, File.join(project_root, 'tmp/tests/puma-worker-ready'))
WebMock.allow_net_connect!
end
%w[SIGQUIT SIGTERM SIGKILL].each do |signal|
it "has a worker that self-terminates on signal #{signal}" do
response = Excon.get('unix://', socket: @socket_path)
expect(response.status).to eq(200)
worker_pid = response.body.to_i
expect(worker_pid).to be > 0
begin
Excon.post("unix://?#{signal}", socket: @socket_path)
rescue Excon::Error::Socket
# The connection may be closed abruptly
end
expect(pid_gone?(worker_pid)).to eq(true)
end
end
after(:all) do
begin
WebMock.disable_net_connect!(allow_localhost: true)
Process.kill('TERM', @puma_master_pid)
rescue Errno::ESRCH
end
end
def wait_puma_boot!(master_pid, ready_file)
# We have seen the boot timeout after 2 minutes in CI so let's set it to 5 minutes.
timeout = 5 * 60
timeout.times do
return if File.exist?(ready_file)
pid = Process.waitpid(master_pid, Process::WNOHANG)
raise "puma failed to boot: #{$?}" unless pid.nil?
sleep 1
end
raise "puma boot timed out after #{timeout} seconds"
end
def pid_gone?(pid)
# Worker termination should take less than a second. That makes 10
# seconds a generous timeout.
10.times do
begin
Process.kill(0, pid)
rescue Errno::ESRCH
return true
end
sleep 1
end
false
end
end
# frozen_string_literal: true
require 'fileutils'
require 'excon'
......@@ -6,12 +8,16 @@ require 'spec_helper'
describe 'Unicorn' do
before(:all) do
config_lines = File.read('config/unicorn.rb.example').split("\n")
project_root = File.expand_path('../..', __dir__)
config_lines = File.read('config/unicorn.rb.example')
.gsub('/home/git/gitlab', project_root)
.gsub('/home/git', project_root)
.split("\n")
# Remove these because they make setup harder.
config_lines = config_lines.reject do |line|
%w[