Commit 5f6f0cd6 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '53966-refactor-hashed-storage' into 'master'

Refactor Hashed Storage for future Rollback functionality

See merge request gitlab-org/gitlab-ce!24402
parents 1d0580d4 41712ebe
......@@ -62,7 +62,7 @@ module Projects
def rename_or_migrate_repository!
success =
if migrate_to_hashed_storage?
::Projects::HashedStorageMigrationService
::Projects::HashedStorage::MigrationService
.new(project, full_path_before)
.execute
else
......
# frozen_string_literal: true
module Projects
module HashedStorage
# Returned when there is an error with the Hashed Storage migration
RepositoryMigrationError = Class.new(StandardError)
# Returned when there is an error with the Hashed Storage rollback
RepositoryRollbackError = Class.new(StandardError)
class BaseRepositoryService < BaseService
include Gitlab::ShellAdapter
attr_reader :old_disk_path, :new_disk_path, :old_wiki_disk_path, :old_storage_version, :logger, :move_wiki
def initialize(project, old_disk_path, logger: nil)
@project = project
@logger = logger || Gitlab::AppLogger
@old_disk_path = old_disk_path
@old_wiki_disk_path = "#{old_disk_path}.wiki"
@move_wiki = has_wiki?
end
protected
# rubocop: disable CodeReuse/ActiveRecord
def has_wiki?
gitlab_shell.exists?(project.repository_storage, "#{old_wiki_disk_path}.git")
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def move_repository(from_name, to_name)
from_exists = gitlab_shell.exists?(project.repository_storage, "#{from_name}.git")
to_exists = gitlab_shell.exists?(project.repository_storage, "#{to_name}.git")
# If we don't find the repository on either original or target we should log that as it could be an issue if the
# project was not originally empty.
if !from_exists && !to_exists
logger.warn "Can't find a repository on either source or target paths for #{project.full_path} (ID=#{project.id}) ..."
return false
elsif !from_exists
# Repository have been moved already.
return true
end
gitlab_shell.mv_repository(project.repository_storage, from_name, to_name)
end
# rubocop: enable CodeReuse/ActiveRecord
def rollback_folder_move
move_repository(new_disk_path, old_disk_path)
move_repository("#{new_disk_path}.wiki", old_wiki_disk_path)
end
end
end
end
......@@ -12,6 +12,7 @@ module Projects
@logger = logger || Rails.logger
@old_disk_path = old_disk_path
@new_disk_path = project.disk_path
@skipped = false
end
def execute
......@@ -32,24 +33,29 @@ module Projects
result
end
def skipped?
@skipped
end
private
def move_folder!(old_disk_path, new_disk_path)
unless File.directory?(old_disk_path)
logger.info("Skipped attachments migration from '#{old_disk_path}' to '#{new_disk_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
return
def move_folder!(old_path, new_path)
unless File.directory?(old_path)
logger.info("Skipped attachments migration from '#{old_path}' to '#{new_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
@skipped = true
return true
end
if File.exist?(new_disk_path)
logger.error("Cannot migrate attachments from '#{old_disk_path}' to '#{new_disk_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentMigrationError, "Target path '#{new_disk_path}' already exist"
if File.exist?(new_path)
logger.error("Cannot migrate attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentMigrationError, "Target path '#{new_path}' already exist"
end
# Create hashed storage base path folder
FileUtils.mkdir_p(File.dirname(new_disk_path))
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(old_disk_path, new_disk_path)
logger.info("Migrated project attachments from '#{old_disk_path}' to '#{new_disk_path}' (PROJECT_ID=#{project.id})")
FileUtils.mv(old_path, new_path)
logger.info("Migrated project attachments from '#{old_path}' to '#{new_path}' (PROJECT_ID=#{project.id})")
true
end
......
......@@ -2,21 +2,7 @@
module Projects
module HashedStorage
RepositoryMigrationError = Class.new(StandardError)
class MigrateRepositoryService < BaseService
include Gitlab::ShellAdapter
attr_reader :old_disk_path, :new_disk_path, :old_wiki_disk_path, :old_storage_version, :logger, :move_wiki
def initialize(project, old_disk_path, logger: nil)
@project = project
@logger = logger || Rails.logger
@old_disk_path = old_disk_path
@old_wiki_disk_path = "#{old_disk_path}.wiki"
@move_wiki = has_wiki?
end
class MigrateRepositoryService < BaseRepositoryService
def execute
try_to_set_repository_read_only!
......@@ -61,36 +47,6 @@ module Projects
raise RepositoryMigrationError, migration_error
end
end
# rubocop: disable CodeReuse/ActiveRecord
def has_wiki?
gitlab_shell.exists?(project.repository_storage, "#{old_wiki_disk_path}.git")
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def move_repository(from_name, to_name)
from_exists = gitlab_shell.exists?(project.repository_storage, "#{from_name}.git")
to_exists = gitlab_shell.exists?(project.repository_storage, "#{to_name}.git")
# If we don't find the repository on either original or target we should log that as it could be an issue if the
# project was not originally empty.
if !from_exists && !to_exists
logger.warn "Can't find a repository on either source or target paths for #{project.full_path} (ID=#{project.id}) ..."
return false
elsif !from_exists
# Repository have been moved already.
return true
end
gitlab_shell.mv_repository(project.repository_storage, from_name, to_name)
end
# rubocop: enable CodeReuse/ActiveRecord
def rollback_folder_move
move_repository(new_disk_path, old_disk_path)
move_repository("#{new_disk_path}.wiki", old_wiki_disk_path)
end
end
end
end
# frozen_string_literal: true
module Projects
module HashedStorage
class MigrationService < BaseService
attr_reader :logger, :old_disk_path
def initialize(project, old_disk_path, logger: nil)
@project = project
@old_disk_path = old_disk_path
@logger = logger || Gitlab::AppLogger
end
def execute
# Migrate repository from Legacy to Hashed Storage
unless project.hashed_storage?(:repository)
return false unless migrate_repository
end
# Migrate attachments from Legacy to Hashed Storage
unless project.hashed_storage?(:attachments)
return false unless migrate_attachments
end
true
end
private
def migrate_repository
HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute
end
def migrate_attachments
HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute
end
end
end
end
# frozen_string_literal: true
module Projects
class HashedStorageMigrationService < BaseService
attr_reader :logger, :old_disk_path
def initialize(project, old_disk_path, logger: nil)
@project = project
@old_disk_path = old_disk_path
@logger = logger || Rails.logger
end
def execute
# Migrate repository from Legacy to Hashed Storage
unless project.hashed_storage?(:repository)
return unless HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute
end
# Migrate attachments from Legacy to Hashed Storage
unless project.hashed_storage?(:attachments)
HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute
end
true
end
end
end
......@@ -11,7 +11,6 @@
.form-text.text-muted
Enable immutable, hash-based paths and repository names to store repositories on disk. This prevents
repositories from having to be moved or renamed when the Project URL changes and may improve disk I/O performance.
%em (EXPERIMENTAL)
.form-group
= f.label :repository_storages, 'Storage paths for new projects', class: 'label-bold'
= f.select :repository_storages, repository_storages_options_for_select(@application_setting.repository_storages),
......
......@@ -45,6 +45,8 @@
- github_importer:github_import_stage_import_pull_requests
- github_importer:github_import_stage_import_repository
- hashed_storage:hashed_storage_migrator
- mail_scheduler:mail_scheduler_issue_due
- mail_scheduler:mail_scheduler_notification_service
......@@ -131,7 +133,6 @@
- repository_fork
- repository_import
- repository_remove_remote
- storage_migrator
- system_hook_push
- update_merge_requests
- upload_checksum
......
# frozen_string_literal: true
module HashedStorage
class MigratorWorker
include ApplicationWorker
queue_namespace :hashed_storage
# @param [Integer] start initial ID of the batch
# @param [Integer] finish last ID of the batch
def perform(start, finish)
migrator = Gitlab::HashedStorage::Migrator.new
migrator.bulk_migrate(start: start, finish: finish)
end
end
end
......@@ -4,21 +4,25 @@ class ProjectMigrateHashedStorageWorker
include ApplicationWorker
LEASE_TIMEOUT = 30.seconds.to_i
LEASE_KEY_SEGMENT = 'project_migrate_hashed_storage_worker'.freeze
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, old_disk_path = nil)
project = Project.find_by(id: project_id)
return if project.nil? || project.pending_delete?
uuid = lease_for(project_id).try_obtain
if uuid
::Projects::HashedStorageMigrationService.new(project, old_disk_path || project.full_path, logger: logger).execute
project = Project.find_by(id: project_id)
return if project.nil? || project.pending_delete?
old_disk_path ||= project.disk_path
::Projects::HashedStorage::MigrationService.new(project, old_disk_path, logger: logger).execute
else
false
return false
end
rescue => ex
ensure
cancel_lease_for(project_id, uuid) if uuid
raise ex
end
# rubocop: enable CodeReuse/ActiveRecord
......@@ -29,7 +33,8 @@ class ProjectMigrateHashedStorageWorker
private
def lease_key(project_id)
"project_migrate_hashed_storage_worker:#{project_id}"
# we share the same lease key for both migration and rollback so they don't run simultaneously
"#{LEASE_KEY_SEGMENT}:#{project_id}"
end
def cancel_lease_for(project_id, uuid)
......
# frozen_string_literal: true
class StorageMigratorWorker
include ApplicationWorker
def perform(start, finish)
migrator = Gitlab::HashedStorage::Migrator.new
migrator.bulk_migrate(start, finish)
end
end
......@@ -68,7 +68,7 @@
- [background_migration, 1]
- [gcp_cluster, 1]
- [project_migrate_hashed_storage, 1]
- [storage_migrator, 1]
- [hashed_storage, 1]
- [pages_domain_verification, 1]
- [object_storage_upload, 1]
- [object_storage, 1]
......
# frozen_string_literal: true
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class MigrateStorageMigratorSidekiqQueue < ActiveRecord::Migration[5.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
sidekiq_queue_migrate 'storage_migrator', to: 'hashed_storage:hashed_storage_migrator'
end
def down
sidekiq_queue_migrate 'hashed_storage:hashed_storage_migrator', to: 'storage_migrator'
end
end
......@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20190115054216) do
ActiveRecord::Schema.define(version: 20190124200344) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......
......@@ -11,21 +11,21 @@ module Gitlab
# Schedule a range of projects to be bulk migrated with #bulk_migrate asynchronously
#
# @param [Object] start first project id for the range
# @param [Object] finish last project id for the range
def bulk_schedule(start, finish)
StorageMigratorWorker.perform_async(start, finish)
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
def bulk_schedule(start:, finish:)
::HashedStorage::MigratorWorker.perform_async(start, finish)
end
# Start migration of projects from specified range
#
# Flagging a project to be migrated is a synchronous action,
# Flagging a project to be migrated is a synchronous action
# but the migration runs through async jobs
#
# @param [Object] start first project id for the range
# @param [Object] finish last project id for the range
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
# rubocop: disable CodeReuse/ActiveRecord
def bulk_migrate(start, finish)
def bulk_migrate(start:, finish:)
projects = build_relation(start, finish)
projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
......@@ -34,9 +34,9 @@ module Gitlab
end
# rubocop: enable CodeReuse/ActiveRecord
# Flag a project to be migrated
# Flag a project to be migrated to Hashed Storage
#
# @param [Object] project that will be migrated
# @param [Project] project that will be migrated
def migrate(project)
Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..."
......@@ -45,6 +45,10 @@ module Gitlab
Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
end
def rollback(project)
# TODO: implement rollback strategy
end
private
# rubocop: disable CodeReuse/ActiveRecord
......
......@@ -37,7 +37,7 @@ namespace :gitlab do
print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}"
helper.project_id_batches do |start, finish|
storage_migrator.bulk_schedule(start, finish)
storage_migrator.bulk_schedule(start: start, finish: finish)
print '.'
end
......
......@@ -4,7 +4,7 @@ describe Gitlab::HashedStorage::Migrator do
describe '#bulk_schedule' do
it 'schedules job to StorageMigratorWorker' do
Sidekiq::Testing.fake! do
expect { subject.bulk_schedule(1, 5) }.to change(StorageMigratorWorker.jobs, :size).by(1)
expect { subject.bulk_schedule(start: 1, finish: 5) }.to change(HashedStorage::MigratorWorker.jobs, :size).by(1)
end
end
end
......@@ -15,13 +15,13 @@ describe Gitlab::HashedStorage::Migrator do
it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do
Sidekiq::Testing.fake! do
expect { subject.bulk_migrate(ids.min, ids.max) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(2)
expect { subject.bulk_migrate(start: ids.min, finish: ids.max) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(2)
end
end
it 'rescues and log exceptions' do
allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError)
expect { subject.bulk_migrate(ids.min, ids.max) }.not_to raise_error
expect { subject.bulk_migrate(start: ids.min, finish: ids.max) }.not_to raise_error
end
it 'delegates each project in specified range to #migrate' do
......@@ -29,12 +29,12 @@ describe Gitlab::HashedStorage::Migrator do
expect(subject).to receive(:migrate).with(project)
end
subject.bulk_migrate(ids.min, ids.max)
subject.bulk_migrate(start: ids.min, finish: ids.max)
end
it 'has migrated projects set as writable' do
perform_enqueued_jobs do
subject.bulk_migrate(ids.min, ids.max)
subject.bulk_migrate(start: ids.min, finish: ids.max)
end
projects.each do |project|
......@@ -46,7 +46,7 @@ describe Gitlab::HashedStorage::Migrator do
describe '#migrate' do
let(:project) { create(:project, :legacy_storage, :empty_repo) }
it 'enqueues job to ProjectMigrateHashedStorageWorker' do
it 'enqueues project migration job' do
Sidekiq::Testing.fake! do
expect { subject.migrate(project) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1)
end
......@@ -58,7 +58,7 @@ describe Gitlab::HashedStorage::Migrator do
expect { subject.migrate(project) }.not_to raise_error
end
it 'migrate project' do
it 'migrates project storage' do
perform_enqueued_jobs do
subject.migrate(project)
end
......@@ -73,5 +73,19 @@ describe Gitlab::HashedStorage::Migrator do
expect(project.reload.repository_read_only?).to be_falsey
end
context 'when project is already on hashed storage' do
let(:project) { create(:project, :empty_repo) }
it 'doesnt enqueue any migration job' do
Sidekiq::Testing.fake! do
expect { subject.migrate(project) }.not_to change(ProjectMigrateHashedStorageWorker.jobs, :size)
end
end
it 'returns false' do
expect(subject.migrate(project)).to be_falsey
end
end
end
end
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20190124200344_migrate_storage_migrator_sidekiq_queue.rb')
describe MigrateStorageMigratorSidekiqQueue, :sidekiq, :redis do
include Gitlab::Database::MigrationHelpers
context 'when there are jobs in the queues' do
it 'correctly migrates queue when migrating up' do
Sidekiq::Testing.disable! do
stubbed_worker(queue: :storage_migrator).perform_async(1, 5)
described_class.new.up
expect(sidekiq_queue_length('storage_migrator')).to eq 0
expect(sidekiq_queue_length('hashed_storage:hashed_storage_migrator')).to eq 1
end
end
it 'correctly migrates queue when migrating down' do
Sidekiq::Testing.disable! do
stubbed_worker(queue: :'hashed_storage:hashed_storage_migrator').perform_async(1, 5)
described_class.new.down
expect(sidekiq_queue_length('storage_migrator')).to eq 1
expect(sidekiq_queue_length('hashed_storage:hashed_storage_migrator')).to eq 0
end
end
end
context 'when there are no jobs in the queues' do
it 'does not raise error when migrating up' do
expect { described_class.new.up }.not_to raise_error
end
it 'does not raise error when migrating down' do
expect { described_class.new.down }.not_to raise_error
end
end
def stubbed_worker(queue:)
Class.new do
include Sidekiq::Worker
sidekiq_options queue: queue
end
end
end
......@@ -3224,7 +3224,7 @@ describe Project do
end
context 'legacy storage' do
let(:project) { create(:project, :repository, :legacy_storage) }
set(:project) { create(:project, :repository, :legacy_storage) }
let(:gitlab_shell) { Gitlab::Shell.new }
let(:project_storage) { project.send(:storage) }
......@@ -3279,13 +3279,14 @@ describe Project do
end
describe '#migrate_to_hashed_storage!' do
let(:project) { create(:project, :empty_repo, :legacy_storage) }
it 'returns true' do
expect(project.migrate_to_hashed_storage!).to be_truthy
end
it 'does not validate project visibility' do
expect(project).not_to receive(:visibility_level_allowed_as_fork)
expect(project).not_to receive(:visibility_level_allowed_by_group)
it 'does not run validation' do
expect(project).not_to receive(:valid?)
project.migrate_to_hashed_storage!
end
......@@ -3315,7 +3316,7 @@ describe Project do
end
context 'hashed storage' do
let(:project) { create(:project, :repository, skip_disk_validation: true) }
set(:project) { create(:project, :repository, skip_disk_validation: true) }
let(:gitlab_shell) { Gitlab::Shell.new }
let(:hash) { Digest::SHA2.hexdigest(project.id.to_s) }
let(:hashed_prefix) { File.join('@hashed', hash[0..1], hash[2..3]) }
......@@ -3372,6 +3373,8 @@ describe Project do
end
describe '#migrate_to_hashed_storage!' do
let(:project) { create(:project, :repository, skip_disk_validation: true) }
it 'returns nil' do
expect(project.migrate_to_hashed_storage!).to be_nil
end
......@@ -3381,10 +3384,12 @@ describe Project do
end
context 'when partially migrated' do
it 'returns true' do
it 'enqueues a job' do
project = create(:project, storage_version: 1, skip_disk_validation: true)
expect(project.migrate_to_hashed_storage!).to be_truthy
Sidekiq::Testing.fake! do
expect { project.migrate_to_hashed_storage! }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1)