Added helper methods for database migrations

These helpers can be used to perform migrations without taking down the
entire application.

For example, the method "add_column_with_default" can be used to add a
new column with a default value without locking the entire table.
parent 3dc64764
module Gitlab
module Database
module MigrationHelpers
# Creates a new index, concurrently when supported
#
# On PostgreSQL this method creates an index concurrently, on MySQL this
# creates a regular index.
#
# Example:
#
# add_concurrent_index :users, :some_column
#
# See Rails' `add_index` for more info on the available arguments.
def add_concurrent_index(*args)
if transaction_open?
raise 'add_concurrent_index can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
if Database.postgresql?
args << { algorithm: :concurrently }
end
add_index(*args)
end
# Updates the value of a column in batches.
#
# This method updates the table in batches of 5% of the total row count.
# Any data inserted while running this method (or after it has finished
# running) is _not_ updated automatically.
#
# This method _only_ updates rows where the column's value is set to NULL.
#
# table - The name of the table.
# column - The name of the column to update.
# value - The value for the column.
def update_column_in_batches(table, column, value)
quoted_table = quote_table_name(table)
quoted_column = quote_column_name(column)
quoted_value = quote(value)
processed = 0
total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}").
to_hash.
first['count'].
to_i
# Update in batches of 5% with an upper limit of 5000 rows.
batch_size = ((total / 100.0) * 5.0).ceil
while processed < total
start_row = exec_query(%Q{
SELECT id
FROM #{quoted_table}
ORDER BY id ASC
LIMIT 1 OFFSET #{processed}
}).to_hash.first
stop_row = exec_query(%Q{
SELECT id
FROM #{quoted_table}
ORDER BY id ASC
LIMIT 1 OFFSET #{processed + batch_size}
}.strip_heredoc).to_hash.first
query = %Q{
UPDATE #{quoted_table}
SET #{quoted_column} = #{quoted_value}
WHERE id >= #{start_row['id']}
}
if stop_row
query += " AND id < #{stop_row['id']}"
end
execute(query.strip_heredoc)
processed += batch_size
end
end
# Adds a column with a default value without locking an entire table.
#
# This method runs the following steps:
#
# 1. Add the column with a default value of NULL.
# 2. Update all existing rows in batches.
# 3. Change the default value of the column to the specified value.
# 4. Update any remaining rows.
#
# These steps ensure a column can be added to a large and commonly used
# table without locking the entire table for the duration of the table
# modification.
#
# table - The name of the table to update.
# column - The name of the column to add.
# type - The column type (e.g. `:integer`).
# default - The default value for the column.
# allow_null - When set to `true` the column will allow NULL values, the
# default is to not allow NULL values.
def add_column_with_default(table, column, type, default:, allow_null: false)
if transaction_open?
raise 'add_column_with_default can not be run inside a transaction, ' \
'you can disable transactions by calling disable_ddl_transaction! ' \
'in the body of your migration class'
end
transaction do
add_column(table, column, type, default: nil)
# Changing the default before the update ensures any newly inserted
# rows already use the proper default value.
change_column_default(table, column, default)
end
begin
transaction do
update_column_in_batches(table, column, default)
end
rescue Exception => error
remove_column(table, column)
raise error
end
change_column_null(table, column, false) unless allow_null
end
end
end
end
require 'spec_helper'
describe Gitlab::Database::MigrationHelpers, lib: true do
let(:model) do
Class.new do
include Gitlab::Database::MigrationHelpers
def method_missing(name, *args, &block)
ActiveRecord::Base.connection.send(name, *args, &block)
end
end.new
end
describe '#add_concurrent_index' do
context 'outside a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
end
context 'using PostgreSQL' do
it 'creates the index concurrently' do
expect(Gitlab::Database).to receive(:postgresql?).and_return(true)
expect(model).to receive(:add_index).
with(:users, :foo, algorithm: :concurrently)
model.add_concurrent_index(:users, :foo)
end
end
context 'using MySQL' do
it 'creates a regular index' do
expect(Gitlab::Database).to receive(:postgresql?).and_return(false)
expect(model).to receive(:add_index).
with(:users, :foo)
model.add_concurrent_index(:users, :foo)
end
end
end
context 'inside a transaction' do
it 'raises RuntimeError' do
expect(model).to receive(:transaction_open?).and_return(true)
expect { model.add_concurrent_index(:users, :foo) }.
to raise_error(RuntimeError)
end
end
end
describe '#update_column_in_batches' do
before do
create_list(:empty_project, 5)
end
it 'updates all the rows in a table' do
model.update_column_in_batches(:projects, :import_error, 'foo')
expect(Project.where(import_error: 'foo').count).to eq(5)
end
end
describe '#add_column_with_default' do
context 'outside of a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
expect(model).to receive(:transaction).twice.and_yield
expect(model).to receive(:add_column).
with(:projects, :foo, :integer, default: nil)
expect(model).to receive(:change_column_default).
with(:projects, :foo, 10)
end
it 'adds the column while allowing NULL values' do
expect(model).to receive(:update_column_in_batches).
with(:projects, :foo, 10)
expect(model).not_to receive(:change_column_null)
model.add_column_with_default(:projects, :foo, :integer,
default: 10,
allow_null: true)
end
it 'adds the column while not allowing NULL values' do
expect(model).to receive(:update_column_in_batches).
with(:projects, :foo, 10)
expect(model).to receive(:change_column_null).
with(:projects, :foo, false)
model.add_column_with_default(:projects, :foo, :integer, default: 10)
end
it 'removes the added column whenever updating the rows fails' do
expect(model).to receive(:update_column_in_batches).
with(:projects, :foo, 10).
and_raise(RuntimeError)
expect(model).to receive(:remove_column).
with(:projects, :foo)
expect {
model.add_column_with_default(:projects, :foo, :integer, default: 10)
}.to raise_error(RuntimeError)
end
end
context 'inside a transaction' do
it 'raises RuntimeError' do
expect(model).to receive(:transaction_open?).and_return(true)
expect {
model.add_column_with_default(:projects, :foo, :integer, default: 10)
}.to raise_error(RuntimeError)
end
end
end
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment