Commit 3977421e authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '53966-make-hashed-storage-migration-safer-and-more-inviting' into 'master'

Hashed Storage rollback mechanism

See merge request gitlab-org/gitlab-ce!23955
parents 25c91fa4 3524a618
......@@ -1970,9 +1970,19 @@ class Project < ActiveRecord::Base
return unless storage_upgradable?
if git_transfer_in_progress?
ProjectMigrateHashedStorageWorker.perform_in(Gitlab::ReferenceCounter::REFERENCE_EXPIRE_TIME, id)
HashedStorage::ProjectMigrateWorker.perform_in(Gitlab::ReferenceCounter::REFERENCE_EXPIRE_TIME, id)
else
ProjectMigrateHashedStorageWorker.perform_async(id)
HashedStorage::ProjectMigrateWorker.perform_async(id)
end
end
def rollback_to_legacy_storage!
return if legacy_storage?
if git_transfer_in_progress?
HashedStorage::ProjectRollbackWorker.perform_in(Gitlab::ReferenceCounter::REFERENCE_EXPIRE_TIME, id)
else
HashedStorage::ProjectRollbackWorker.perform_async(id)
end
end
......
# frozen_string_literal: true
module Projects
module HashedStorage
AttachmentMigrationError = Class.new(StandardError)
AttachmentCannotMoveError = Class.new(StandardError)
class BaseAttachmentService < BaseService
# Returns the disk_path value before the execution
attr_reader :old_disk_path
# Returns the disk_path value after the execution
attr_reader :new_disk_path
# Returns the logger currently in use
attr_reader :logger
# Return whether this operation was skipped or not
#
# @return [Boolean] true if skipped of false otherwise
def skipped?
@skipped
end
protected
def move_folder!(old_path, new_path)
unless File.directory?(old_path)
logger.info("Skipped attachments move from '#{old_path}' to '#{new_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
@skipped = true
return true
end
if File.exist?(new_path)
logger.error("Cannot move attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentCannotMoveError, "Target path '#{new_path}' already exists"
end
# Create base path folder on the new storage layout
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(old_path, new_path)
logger.info("Project attachments moved from '#{old_path}' to '#{new_path}' (PROJECT_ID=#{project.id})")
true
end
end
end
end
......@@ -2,11 +2,8 @@
module Projects
module HashedStorage
# Returned when there is an error with the Hashed Storage migration
RepositoryMigrationError = Class.new(StandardError)
# Returned when there is an error with the Hashed Storage rollback
RepositoryRollbackError = Class.new(StandardError)
# Returned when repository can't be made read-only because there is already a git transfer in progress
RepositoryInUseError = Class.new(StandardError)
class BaseRepositoryService < BaseService
include Gitlab::ShellAdapter
......@@ -38,7 +35,10 @@ module Projects
# project was not originally empty.
if !from_exists && !to_exists
logger.warn "Can't find a repository on either source or target paths for #{project.full_path} (ID=#{project.id}) ..."
return false
# We return true so we still reflect the change in the database.
# Next time the repository is (re)created it will be under the new storage layout
return true
elsif !from_exists
# Repository have been moved already.
return true
......@@ -52,6 +52,16 @@ module Projects
move_repository(new_disk_path, old_disk_path)
move_repository("#{new_disk_path}.wiki", old_wiki_disk_path)
end
def try_to_set_repository_read_only!
# Mitigate any push operation to start during migration
unless project.set_repository_read_only!
migration_error = "Target repository '#{old_disk_path}' cannot be made read-only as there is a git transfer in progress"
logger.error migration_error
raise RepositoryInUseError, migration_error
end
end
end
end
end
......@@ -2,62 +2,37 @@
module Projects
module HashedStorage
AttachmentMigrationError = Class.new(StandardError)
class MigrateAttachmentsService < BaseService
attr_reader :logger, :old_disk_path, :new_disk_path
class MigrateAttachmentsService < BaseAttachmentService
def initialize(project, old_disk_path, logger: nil)
@project = project
@logger = logger || Rails.logger
@old_disk_path = old_disk_path
@new_disk_path = project.disk_path
@skipped = false
end
def execute
origin = FileUploader.absolute_base_dir(project)
# It's possible that old_disk_path does not match project.disk_path. For example, that happens when we rename a project
# It's possible that old_disk_path does not match project.disk_path.
# For example, that happens when we rename a project
origin.sub!(/#{Regexp.escape(project.full_path)}\z/, old_disk_path)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:attachments]
target = FileUploader.absolute_base_dir(project)
result = move_folder!(origin, target)
project.save!
if result && block_given?
yield
end
result
end
def skipped?
@skipped
end
@new_disk_path = project.disk_path
private
result = move_folder!(origin, target)
def move_folder!(old_path, new_path)
unless File.directory?(old_path)
logger.info("Skipped attachments migration from '#{old_path}' to '#{new_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
@skipped = true
return true
end
if result
project.save!
if File.exist?(new_path)
logger.error("Cannot migrate attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentMigrationError, "Target path '#{new_path}' already exist"
yield if block_given?
else
# Rollback changes
project.rollback!
end
# Create hashed storage base path folder
FileUtils.mkdir_p(File.dirname(new_path))
FileUtils.mv(old_path, new_path)
logger.info("Migrated project attachments from '#{old_path}' to '#{new_path}' (PROJECT_ID=#{project.id})")
true
result
end
end
end
......
......@@ -15,7 +15,7 @@ module Projects
result = move_repository(old_disk_path, new_disk_path)
if move_wiki
result &&= move_repository("#{old_wiki_disk_path}", "#{new_disk_path}.wiki")
result &&= move_repository(old_wiki_disk_path, "#{new_disk_path}.wiki")
end
if result
......@@ -35,18 +35,6 @@ module Projects
result
end
private
def try_to_set_repository_read_only!
# Mitigate any push operation to start during migration
unless project.set_repository_read_only!
migration_error = "Target repository '#{old_disk_path}' cannot be made read-only as there is a git transfer in progress"
logger.error migration_error
raise RepositoryMigrationError, migration_error
end
end
end
end
end
# frozen_string_literal: true
module Projects
module HashedStorage
class RollbackAttachmentsService < BaseAttachmentService
def initialize(project, logger: nil)
@project = project
@logger = logger || Rails.logger
@old_disk_path = project.disk_path
end
def execute
origin = FileUploader.absolute_base_dir(project)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:repository]
target = FileUploader.absolute_base_dir(project)
@new_disk_path = FileUploader.base_dir(project)
result = move_folder!(origin, target)
if result
project.save!
yield if block_given?
else
# Rollback changes
project.rollback!
end
result
end
end
end
end
# frozen_string_literal: true
module Projects
module HashedStorage
class RollbackRepositoryService < BaseRepositoryService
def execute
try_to_set_repository_read_only!
@old_storage_version = project.storage_version
project.storage_version = nil
project.ensure_storage_path_exists
@new_disk_path = project.disk_path
result = move_repository(old_disk_path, new_disk_path)
if move_wiki
result &&= move_repository(old_wiki_disk_path, "#{new_disk_path}.wiki")
end
if result
project.write_repository_config
project.track_project_repository
else
rollback_folder_move
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:repository]
end
project.repository_read_only = false
project.save!
if result && block_given?
yield
end
result
end
end
end
end
# frozen_string_literal: true
module Projects
module HashedStorage
class RollbackService < BaseService
attr_reader :logger, :old_disk_path
def initialize(project, old_disk_path, logger: nil)
@project = project
@old_disk_path = old_disk_path
@logger = logger || Rails.logger
end
def execute
# Rollback attachments from Hashed Storage to Legacy
if project.hashed_storage?(:attachments)
return false unless rollback_attachments
end
# Rollback repository from Hashed Storage to Legacy
if project.hashed_storage?(:repository)
rollback_repository
end
end
private
def rollback_attachments
HashedStorage::RollbackAttachmentsService.new(project, logger: logger).execute
end
def rollback_repository
HashedStorage::RollbackRepositoryService.new(project, old_disk_path, logger: logger).execute
end
end
end
end
......@@ -47,6 +47,9 @@
- github_importer:github_import_stage_import_repository
- hashed_storage:hashed_storage_migrator
- hashed_storage:hashed_storage_rollbacker
- hashed_storage:hashed_storage_project_migrate
- hashed_storage:hashed_storage_project_rollback
- mail_scheduler:mail_scheduler_issue_due
- mail_scheduler:mail_scheduler_notification_service
......@@ -126,7 +129,6 @@
- project_cache
- project_destroy
- project_export
- project_migrate_hashed_storage
- project_service
- propagate_service_template
- reactive_caching
......
# frozen_string_literal: true
module HashedStorage
class BaseWorker
include ExclusiveLeaseGuard
LEASE_TIMEOUT = 30.seconds.to_i
LEASE_KEY_SEGMENT = 'project_migrate_hashed_storage_worker'.freeze
protected
def lease_key
# we share the same lease key for both migration and rollback so they don't run simultaneously
"#{LEASE_KEY_SEGMENT}:#{project_id}"
end
def lease_timeout
LEASE_TIMEOUT
end
end
end
# frozen_string_literal: true
module HashedStorage
class ProjectMigrateWorker < BaseWorker
include ApplicationWorker
queue_namespace :hashed_storage
attr_reader :project_id
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, old_disk_path = nil)
@project_id = project_id # we need to set this in order to create the lease_key
try_obtain_lease do
project = Project.without_deleted.find_by(id: project_id)
break unless project
old_disk_path ||= project.disk_path
::Projects::HashedStorage::MigrationService.new(project, old_disk_path, logger: logger).execute
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
# frozen_string_literal: true
module HashedStorage
class ProjectRollbackWorker < BaseWorker
include ApplicationWorker
queue_namespace :hashed_storage
attr_reader :project_id
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, old_disk_path = nil)
@project_id = project_id # we need to set this in order to create the lease_key
try_obtain_lease do
project = Project.without_deleted.find_by(id: project_id)
break unless project
old_disk_path ||= project.disk_path
::Projects::HashedStorage::RollbackService.new(project, old_disk_path, logger: logger).execute
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
end
# frozen_string_literal: true
module HashedStorage
class RollbackerWorker
include ApplicationWorker
queue_namespace :hashed_storage
# @param [Integer] start initial ID of the batch
# @param [Integer] finish last ID of the batch
def perform(start, finish)
migrator = Gitlab::HashedStorage::Migrator.new
migrator.bulk_rollback(start: start, finish: finish)
end
end
end
# frozen_string_literal: true
class ProjectMigrateHashedStorageWorker
include ApplicationWorker
LEASE_TIMEOUT = 30.seconds.to_i
LEASE_KEY_SEGMENT = 'project_migrate_hashed_storage_worker'.freeze
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, old_disk_path = nil)
uuid = lease_for(project_id).try_obtain
if uuid
project = Project.find_by(id: project_id)
return if project.nil? || project.pending_delete?
old_disk_path ||= project.disk_path
::Projects::HashedStorage::MigrationService.new(project, old_disk_path, logger: logger).execute
else
return false
end
ensure
cancel_lease_for(project_id, uuid) if uuid
end
# rubocop: enable CodeReuse/ActiveRecord
def lease_for(project_id)
Gitlab::ExclusiveLease.new(lease_key(project_id), timeout: LEASE_TIMEOUT)
end
private
def lease_key(project_id)
# we share the same lease key for both migration and rollback so they don't run simultaneously
"#{LEASE_KEY_SEGMENT}:#{project_id}"
end
def cancel_lease_for(project_id, uuid)
Gitlab::ExclusiveLease.cancel(lease_key(project_id), uuid)
end
end
---
title: Hashed Storage rollback mechanism
merge_request: 23955
author:
type: added
......@@ -68,6 +68,7 @@
- [background_migration, 1]
- [gcp_cluster, 1]
- [project_migrate_hashed_storage, 1]
- [project_rollback_hashed_storage, 1]
- [hashed_storage, 1]
- [pages_domain_verification, 1]
- [object_storage_upload, 1]
......
# frozen_string_literal: true
class MigrateProjectMigrateSidekiqQueue < ActiveRecord::Migration[5.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DOWNTIME = false
def up
sidekiq_queue_migrate 'project_migrate_hashed_storage', to: 'hashed_storage:hashed_storage_project_migrate'
end
def down
sidekiq_queue_migrate 'hashed_storage:hashed_storage_project_migrate', to: 'project_migrate_hashed_storage'
end
end
......@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20190228092516) do
ActiveRecord::Schema.define(version: 20190301081611) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......
......@@ -13,10 +13,18 @@ module Gitlab
#
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
def bulk_schedule(start:, finish:)
def bulk_schedule_migration(start:, finish:)
::HashedStorage::MigratorWorker.perform_async(start, finish)
end
# Schedule a range of projects to be bulk rolledback with #bulk_rollback asynchronously
#
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
def bulk_schedule_rollback(start:, finish:)
::HashedStorage::RollbackerWorker.perform_async(start, finish)
end
# Start migration of projects from specified range
#
# Flagging a project to be migrated is a synchronous action
......@@ -34,6 +42,23 @@ module Gitlab
end
# rubocop: enable CodeReuse/ActiveRecord
# Start rollback of projects from specified range
#
# Flagging a project to be rolled back is a synchronous action
# but the rollback runs through async jobs
#
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
# rubocop: disable CodeReuse/ActiveRecord
def bulk_rollback(start:, finish:)
projects = build_relation(start, finish)
projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
rollback(project)
end
end
# rubocop: enable CodeReuse/ActiveRecord
# Flag a project to be migrated to Hashed Storage
#
# @param [Project] project that will be migrated
......@@ -45,8 +70,15 @@ module Gitlab
Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
end
# Flag a project to be rolled-back to Legacy Storage
#
# @param [Project] project that will be rolled-back
def rollback(project)
# TODO: implement rollback strategy
Rails.logger.info "Starting storage rollback of #{project.full_path} (ID=#{project.id})..."
project.rollback_to_legacy_storage!
rescue => err
Rails.logger.error("#{err.message} rolling-back storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}")
end
private
......
......@@ -24,7 +24,7 @@ module Gitlab
end
# rubocop: disable CodeReuse/ActiveRecord
def self.project_id_batches(&block)
def self.project_id_batches_migration(&block)
Project.with_unmigrated_storage.in_batches(of: batch_size, start: range_from, finish: range_to) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
......@@ -33,6 +33,16 @@ module Gitlab
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.project_id_batches_rollback(&block)
Project.with_storage_feature(:repository).in_batches(of: batch_size, start: range_from, finish: range_to) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max
end
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
......
......@@ -36,8 +36,54 @@ namespace :gitlab do
print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}"