From e867099091e19e8cae47d928761b869ec20db2c3 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 9 May 2016 14:14:32 +0200 Subject: [PATCH] Added helper methods for database migrations These helpers can be used to perform migrations without taking down the entire application. For example, the method "add_column_with_default" can be used to add a new column with a default value without locking the entire table. --- lib/gitlab/database/migration_helpers.rb | 132 ++++++++++++++++++ .../gitlab/database/migration_helpers_spec.rb | 124 ++++++++++++++++ 2 files changed, 256 insertions(+) create mode 100644 lib/gitlab/database/migration_helpers.rb create mode 100644 spec/lib/gitlab/database/migration_helpers_spec.rb diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb new file mode 100644 index 00000000000..7aa8c90e6d1 --- /dev/null +++ b/lib/gitlab/database/migration_helpers.rb @@ -0,0 +1,132 @@ +module Gitlab + module Database + module MigrationHelpers + # Creates a new index, concurrently when supported + # + # On PostgreSQL this method creates an index concurrently, on MySQL this + # creates a regular index. + # + # Example: + # + # add_concurrent_index :users, :some_column + # + # See Rails' `add_index` for more info on the available arguments. + def add_concurrent_index(*args) + if transaction_open? + raise 'add_concurrent_index can not be run inside a transaction, ' \ + 'you can disable transactions by calling disable_ddl_transaction! ' \ + 'in the body of your migration class' + end + + if Database.postgresql? + args << { algorithm: :concurrently } + end + + add_index(*args) + end + + # Updates the value of a column in batches. + # + # This method updates the table in batches of 5% of the total row count. + # Any data inserted while running this method (or after it has finished + # running) is _not_ updated automatically. + # + # This method _only_ updates rows where the column's value is set to NULL. + # + # table - The name of the table. + # column - The name of the column to update. + # value - The value for the column. + def update_column_in_batches(table, column, value) + quoted_table = quote_table_name(table) + quoted_column = quote_column_name(column) + quoted_value = quote(value) + processed = 0 + + total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}"). + to_hash. + first['count']. + to_i + + # Update in batches of 5% with an upper limit of 5000 rows. + batch_size = ((total / 100.0) * 5.0).ceil + + while processed < total + start_row = exec_query(%Q{ + SELECT id + FROM #{quoted_table} + ORDER BY id ASC + LIMIT 1 OFFSET #{processed} + }).to_hash.first + + stop_row = exec_query(%Q{ + SELECT id + FROM #{quoted_table} + ORDER BY id ASC + LIMIT 1 OFFSET #{processed + batch_size} + }.strip_heredoc).to_hash.first + + query = %Q{ + UPDATE #{quoted_table} + SET #{quoted_column} = #{quoted_value} + WHERE id >= #{start_row['id']} + } + + if stop_row + query += " AND id < #{stop_row['id']}" + end + + execute(query.strip_heredoc) + + processed += batch_size + end + end + + # Adds a column with a default value without locking an entire table. + # + # This method runs the following steps: + # + # 1. Add the column with a default value of NULL. + # 2. Update all existing rows in batches. + # 3. Change the default value of the column to the specified value. + # 4. Update any remaining rows. + # + # These steps ensure a column can be added to a large and commonly used + # table without locking the entire table for the duration of the table + # modification. + # + # table - The name of the table to update. + # column - The name of the column to add. + # type - The column type (e.g. `:integer`). + # default - The default value for the column. + # allow_null - When set to `true` the column will allow NULL values, the + # default is to not allow NULL values. + def add_column_with_default(table, column, type, default:, allow_null: false) + if transaction_open? + raise 'add_column_with_default can not be run inside a transaction, ' \ + 'you can disable transactions by calling disable_ddl_transaction! ' \ + 'in the body of your migration class' + end + + transaction do + add_column(table, column, type, default: nil) + + # Changing the default before the update ensures any newly inserted + # rows already use the proper default value. + change_column_default(table, column, default) + end + + begin + transaction do + update_column_in_batches(table, column, default) + end + rescue Exception => error + remove_column(table, column) + + raise error + end + + change_column_null(table, column, false) unless allow_null + end + end + end +end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb new file mode 100644 index 00000000000..ad2f3cb7e45 --- /dev/null +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -0,0 +1,124 @@ +require 'spec_helper' + +describe Gitlab::Database::MigrationHelpers, lib: true do + let(:model) do + Class.new do + include Gitlab::Database::MigrationHelpers + + def method_missing(name, *args, &block) + ActiveRecord::Base.connection.send(name, *args, &block) + end + end.new + end + + describe '#add_concurrent_index' do + context 'outside a transaction' do + before do + expect(model).to receive(:transaction_open?).and_return(false) + end + + context 'using PostgreSQL' do + it 'creates the index concurrently' do + expect(Gitlab::Database).to receive(:postgresql?).and_return(true) + + expect(model).to receive(:add_index). + with(:users, :foo, algorithm: :concurrently) + + model.add_concurrent_index(:users, :foo) + end + end + + context 'using MySQL' do + it 'creates a regular index' do + expect(Gitlab::Database).to receive(:postgresql?).and_return(false) + + expect(model).to receive(:add_index). + with(:users, :foo) + + model.add_concurrent_index(:users, :foo) + end + end + end + + context 'inside a transaction' do + it 'raises RuntimeError' do + expect(model).to receive(:transaction_open?).and_return(true) + + expect { model.add_concurrent_index(:users, :foo) }. + to raise_error(RuntimeError) + end + end + end + + describe '#update_column_in_batches' do + before do + create_list(:empty_project, 5) + end + + it 'updates all the rows in a table' do + model.update_column_in_batches(:projects, :import_error, 'foo') + + expect(Project.where(import_error: 'foo').count).to eq(5) + end + end + + describe '#add_column_with_default' do + context 'outside of a transaction' do + before do + expect(model).to receive(:transaction_open?).and_return(false) + + expect(model).to receive(:transaction).twice.and_yield + + expect(model).to receive(:add_column). + with(:projects, :foo, :integer, default: nil) + + expect(model).to receive(:change_column_default). + with(:projects, :foo, 10) + end + + it 'adds the column while allowing NULL values' do + expect(model).to receive(:update_column_in_batches). + with(:projects, :foo, 10) + + expect(model).not_to receive(:change_column_null) + + model.add_column_with_default(:projects, :foo, :integer, + default: 10, + allow_null: true) + end + + it 'adds the column while not allowing NULL values' do + expect(model).to receive(:update_column_in_batches). + with(:projects, :foo, 10) + + expect(model).to receive(:change_column_null). + with(:projects, :foo, false) + + model.add_column_with_default(:projects, :foo, :integer, default: 10) + end + + it 'removes the added column whenever updating the rows fails' do + expect(model).to receive(:update_column_in_batches). + with(:projects, :foo, 10). + and_raise(RuntimeError) + + expect(model).to receive(:remove_column). + with(:projects, :foo) + + expect { + model.add_column_with_default(:projects, :foo, :integer, default: 10) + }.to raise_error(RuntimeError) + end + end + + context 'inside a transaction' do + it 'raises RuntimeError' do + expect(model).to receive(:transaction_open?).and_return(true) + + expect { + model.add_column_with_default(:projects, :foo, :integer, default: 10) + }.to raise_error(RuntimeError) + end + end + end +end -- GitLab