Commit a58d0a01 authored by Yorick Peterse's avatar Yorick Peterse

Merge branch 'ab-49754-gh-importer-internal-ids' into 'master'

GitHub importer: Keep track of internal_ids

Closes #49754

See merge request gitlab-org/gitlab-ce!20926
parents c2f14af7 729de4f1
......@@ -111,7 +111,7 @@ class InternalId < ActiveRecord::Base
# Generates next internal id and returns it
def generate
subject.transaction do
InternalId.transaction do
# Create a record in internal_ids if one does not yet exist
# and increment its last value
#
......@@ -125,7 +125,7 @@ class InternalId < ActiveRecord::Base
#
# Note this will acquire a ROW SHARE lock on the InternalId record
def track_greatest(new_value)
subject.transaction do
InternalId.transaction do
(lookup || create_record).track_greatest_and_save!(new_value)
end
end
......@@ -148,7 +148,7 @@ class InternalId < ActiveRecord::Base
# violation. We can safely roll-back the nested transaction and perform
# a lookup instead to retrieve the record.
def create_record
subject.transaction(requires_new: true) do
InternalId.transaction(requires_new: true) do
InternalId.create!(
**scope,
usage: usage_value,
......
---
title: Add migration to cleanup internal_ids inconsistency.
merge_request: 20926
author:
type: fixed
# frozen_string_literal: true
class DeleteInconsistentInternalIdRecords < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
# This migration cleans up any inconsistent records in internal_ids.
#
# That is, it deletes records that track a `last_value` that is
# smaller than the maximum internal id (usually `iid`) found in
# the corresponding model records.
def up
disable_statement_timeout do
delete_internal_id_records('issues', 'project_id')
delete_internal_id_records('merge_requests', 'project_id', 'target_project_id')
delete_internal_id_records('deployments', 'project_id')
delete_internal_id_records('milestones', 'project_id')
delete_internal_id_records('milestones', 'namespace_id', 'group_id')
delete_internal_id_records('ci_pipelines', 'project_id')
end
end
class InternalId < ActiveRecord::Base
self.table_name = 'internal_ids'
enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 }
end
private
def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name)
sql = <<~SQL
SELECT id FROM ( -- workaround for MySQL
SELECT internal_ids.id FROM (
SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name}
) maxima JOIN internal_ids USING (#{scope_column_name})
WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value
) internal_ids
SQL
InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection
say "Deleting internal_id records for #{base_table}: #{ids.pluck(:project_id, :last_value)}" unless ids.empty?
end.delete_all
end
end
......@@ -15,10 +15,12 @@ module Gitlab
end
# Bulk inserts the given rows into the database.
def bulk_insert(model, rows, batch_size: 100)
def bulk_insert(model, rows, batch_size: 100, pre_hook: nil)
rows.each_slice(batch_size) do |slice|
pre_hook.call(slice) if pre_hook
Gitlab::Database.bulk_insert(model.table_name, slice)
end
rows
end
end
end
......
......@@ -55,7 +55,11 @@ module Gitlab
updated_at: issue.updated_at
}
GithubImport.insert_and_return_id(attributes, project.issues)
GithubImport.insert_and_return_id(attributes, project.issues).tap do |id|
# We use .insert_and_return_id which effectively disables all callbacks.
# Trigger iid logic here to make sure we track internal id values consistently.
project.issues.find(id).ensure_project_iid!
end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the issue.
......
......@@ -17,10 +17,20 @@ module Gitlab
end
def execute
bulk_insert(Milestone, build_milestones)
# We insert records in bulk, by-passing any standard model callbacks.
# The pre_hook here makes sure we track internal ids consistently.
# Note this has to be called before performing an insert of a batch
# because we're outside a transaction scope here.
bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid))
build_milestones_cache
end
def track_greatest_iid(slice)
greatest_iid = slice.max { |e| e[:iid] }[:iid]
InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) })
end
def build_milestones
build_database_rows(each_milestone)
end
......
......@@ -76,7 +76,13 @@ module Gitlab
merge_request_id = GithubImport
.insert_and_return_id(attributes, project.merge_requests)
[project.merge_requests.find(merge_request_id), false]
merge_request = project.merge_requests.find(merge_request_id)
# We use .insert_and_return_id which effectively disables all callbacks.
# Trigger iid logic here to make sure we track internal id values consistently.
merge_request.ensure_target_project_iid!
[merge_request, false]
end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
......
......@@ -58,5 +58,17 @@ describe Gitlab::GithubImport::BulkImporting do
importer.bulk_insert(model, rows, batch_size: 5)
end
it 'calls pre_hook for each slice if given' do
rows = [{ title: 'Foo' }] * 10
model = double(:model, table_name: 'kittens')
pre_hook = double('pre_hook', call: nil)
allow(Gitlab::Database).to receive(:bulk_insert)
expect(pre_hook).to receive(:call).with(rows[0..4])
expect(pre_hook).to receive(:call).with(rows[5..9])
importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook)
end
end
end
......@@ -78,6 +78,11 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.to receive(:id_for)
.with(issue)
.and_return(milestone.id)
allow(importer.user_finder)
.to receive(:author_id_for)
.with(issue)
.and_return([user.id, true])
end
context 'when the issue author could be found' do
......@@ -172,6 +177,23 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
expect(importer.create_issue).to be_a_kind_of(Numeric)
end
it 'triggers internal_id functionality to track greatest iids' do
allow(importer.user_finder)
.to receive(:author_id_for)
.with(issue)
.and_return([user.id, true])
issue = build_stubbed(:issue, project: project)
allow(Gitlab::GithubImport)
.to receive(:insert_and_return_id)
.and_return(issue.id)
allow(project.issues).to receive(:find).with(issue.id).and_return(issue)
expect(issue).to receive(:ensure_project_iid!)
importer.create_issue
end
end
describe '#create_assignees' do
......
......@@ -29,13 +29,25 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis
expect(importer)
.to receive(:bulk_insert)
.with(Milestone, [milestone_hash])
.with(Milestone, [milestone_hash], any_args)
expect(importer)
.to receive(:build_milestones_cache)
importer.execute
end
it 'tracks internal ids' do
milestone_hash = { iid: 1, title: '1.0', project_id: project.id }
allow(importer)
.to receive(:build_milestones)
.and_return([milestone_hash])
expect(InternalId).to receive(:track_greatest)
.with(nil, { project: project }, :milestones, 1, any_args)
importer.execute
end
end
describe '#build_milestones' do
......
......@@ -111,6 +111,16 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
expect(mr).to be_instance_of(MergeRequest)
expect(exists).to eq(false)
end
it 'triggers internal_id functionality to track greatest iids' do
mr = build_stubbed(:merge_request, source_project: project, target_project: project)
allow(Gitlab::GithubImport).to receive(:insert_and_return_id).and_return(mr.id)
allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr)
expect(mr).to receive(:ensure_target_project_iid!)
importer.create_merge_request
end
end
context 'when the author could not be found' do
......
# frozen_string_literal: true
# rubocop:disable RSpec/FactoriesInMigrationSpecs
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180723130817_delete_inconsistent_internal_id_records.rb')
describe DeleteInconsistentInternalIdRecords, :migration do
let!(:project1) { create(:project) }
let!(:project2) { create(:project) }
let!(:project3) { create(:project) }
let(:internal_id_query) { ->(project) { InternalId.where(usage: InternalId.usages[scope.to_s.tableize], project: project) } }
let(:create_models) do
3.times { create(scope, project: project1) }
3.times { create(scope, project: project2) }
3.times { create(scope, project: project3) }
end
shared_examples_for 'deleting inconsistent internal_id records' do
before do
create_models
internal_id_query.call(project1).first.tap do |iid|
iid.last_value = iid.last_value - 2
# This is an inconsistent record
iid.save!
end
internal_id_query.call(project3).first.tap do |iid|
iid.last_value = iid.last_value + 2
# This is a consistent record
iid.save!
end
end
it "deletes inconsistent issues" do
expect { migrate! }.to change { internal_id_query.call(project1).size }.from(1).to(0)
end
it "retains consistent issues" do
expect { migrate! }.not_to change { internal_id_query.call(project2).size }
end
it "retains consistent records, especially those with a greater last_value" do
expect { migrate! }.not_to change { internal_id_query.call(project3).size }
end
end
context 'for issues' do
let(:scope) { :issue }
it_behaves_like 'deleting inconsistent internal_id records'
end
context 'for merge_requests' do
let(:scope) { :merge_request }
let(:create_models) do
3.times { |i| create(scope, target_project: project1, source_project: project1, source_branch: i.to_s) }
3.times { |i| create(scope, target_project: project2, source_project: project2, source_branch: i.to_s) }
3.times { |i| create(scope, target_project: project3, source_project: project3, source_branch: i.to_s) }
end
it_behaves_like 'deleting inconsistent internal_id records'
end
context 'for deployments' do
let(:scope) { :deployment }
it_behaves_like 'deleting inconsistent internal_id records'
end
context 'for milestones (by project)' do
let(:scope) { :milestone }
it_behaves_like 'deleting inconsistent internal_id records'
end
context 'for ci_pipelines' do
let(:scope) { :ci_pipeline }
it_behaves_like 'deleting inconsistent internal_id records'
end
context 'for milestones (by group)' do
# milestones (by group) is a little different than all of the other models
let!(:group1) { create(:group) }
let!(:group2) { create(:group) }
let!(:group3) { create(:group) }
let(:internal_id_query) { ->(group) { InternalId.where(usage: InternalId.usages['milestones'], namespace: group) } }
before do
3.times { create(:milestone, group: group1) }
3.times { create(:milestone, group: group2) }
3.times { create(:milestone, group: group3) }
internal_id_query.call(group1).first.tap do |iid|
iid.last_value = iid.last_value - 2
# This is an inconsistent record
iid.save!
end
internal_id_query.call(group3).first.tap do |iid|
iid.last_value = iid.last_value + 2
# This is a consistent record
iid.save!
end
end
it "deletes inconsistent issues" do
expect { migrate! }.to change { internal_id_query.call(group1).size }.from(1).to(0)
end
it "retains consistent issues" do
expect { migrate! }.not_to change { internal_id_query.call(group2).size }
end
it "retains consistent records, especially those with a greater last_value" do
expect { migrate! }.not_to change { internal_id_query.call(group3).size }
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment