Port cleanup tasks to use Gitaly

Rake tasks cleaning up the Git storage were still using direct disk
access, which won't work if these aren't attached. To mitigate a
migration issue was created.

To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was
required, ListDirectories. This was implemented in Gitaly, through
https://gitlab.com/gitlab-org/gitaly/merge_requests/868.

To be able to use the new RPC the Gitaly server was bumped to v0.120.

This is an RPC that will not use feature gates, as this doesn't scale on
.com so there is no way to test it at scale. Futhermore, we _know_ it
doesn't scale, but this might be a useful task for smaller instances.

Lastly, the tests are slightly updated to also work when the disk isn't
attached. Eventhough this is not planned, it was very little effort and
thus I applied the boy scout rule.

Closes https://gitlab.com/gitlab-org/gitaly/issues/954
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
parent c380d3ac
---
title: Administrative cleanup rake tasks now leverage Gitaly
merge_request: 21588
author:
type: changed
......@@ -5,6 +5,14 @@ module Gitlab
@storage = storage
end
# Returns all directories in the git storage directory, lexically ordered
def list_directories(depth: 1)
request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
GitalyClient.call(@storage, :storage_service, :list_directories, request)
.flat_map(&:paths)
end
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
......
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954
#
# frozen_string_literal: true
require 'set'
namespace :gitlab do
namespace :cleanup do
HASHED_REPOSITORY_NAME = '@hashed'.freeze
desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do
warn_user_is_not_gitlab
namespaces = Set.new(Namespace.pluck(:path))
namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
namespaces = Namespace.pluck(:path)
namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored
Gitlab.config.repositories.storages.each do |name, repository_storage|
git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
all_dirs = Dir.glob(git_base_path + '/*')
Gitaly::Server.all.each do |server|
all_dirs = Gitlab::GitalyClient::StorageService
.new(server.storage)
.list_directories(depth: 0)
.reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
puts git_base_path.color(:yellow)
puts "Looking for directories to remove... "
all_dirs.reject! do |dir|
# skip if git repo
dir =~ /.git$/
end
all_dirs.reject! do |dir|
dir_name = File.basename dir
# skip if namespace present
namespaces.include?(dir_name)
end
all_dirs.each do |dir_path|
if remove?
if FileUtils.rm_rf dir_path
puts "Removed...#{dir_path}".color(:red)
else
puts "Cannot remove #{dir_path}".color(:red)
begin
Gitlab::GitalyClient::NamespaceService.new(server.storage)
.remove(dir_path)
puts "Removed...#{dir_path}"
rescue StandardError => e
puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end
else
puts "Can be removed: #{dir_path}".color(:red)
......@@ -49,29 +38,29 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do
warn_user_is_not_gitlab
move_suffix = "+orphaned+#{Time.now.to_i}"
Gitlab.config.repositories.storages.each do |name, repository_storage|
repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
# Look for global repos (legacy, depth 1) and normal repos (depth 2)
IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find|
find.each_line do |path|
path.chomp!
repo_with_namespace = path
.sub(repo_root, '')
.sub(%r{^/*}, '')
.chomp('.git')
.chomp('.wiki')
# TODO ignoring hashed repositories for now. But revisit to fully support
# possible orphaned hashed repos
next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
new_path = path + move_suffix
puts path.inspect + ' -> ' + new_path.inspect
File.rename(path, new_path)
Gitaly::Server.all.each do |server|
Gitlab::GitalyClient::StorageService
.new(server.storage)
.list_directories
.each do |path|
repo_with_namespace = path.chomp('.git').chomp('.wiki')
# TODO ignoring hashed repositories for now. But revisit to fully support
# possible orphaned hashed repos
next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
next if Project.find_by_full_path(repo_with_namespace)
new_path = path + move_suffix
puts path.inspect + ' -> ' + new_path.inspect
begin
Gitlab::GitalyClient::NamespaceService
.new(server.storage)
.rename(path, new_path)
rescue StandardError => e
puts "Error occured while moving the repository: #{e.message}".color(:red)
end
end
end
......
......@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do
end
describe 'cleanup namespaces and repos' do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:storage) { storages.keys.first }
let(:storages) do
{
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
......@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do
end
before do
FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage'))
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
end
after do
FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage'))
Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories
end
describe 'cleanup:repos' do
before do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git'))
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
gitlab_shell.add_namespace(storage, 'broken/project.git')
gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
end
it 'moves it to an orphaned path' do
run_rake_task('gitlab:cleanup:repos')
repo_list = Dir['tmp/tests/default_storage/broken/*']
now = Time.now
Timecop.freeze(now) do
run_rake_task('gitlab:cleanup:repos')
repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0)
expect(repo_list.first).to include('+orphaned+')
expect(repo_list.last).to include("broken+orphaned+#{now.to_i}")
end
end
it 'ignores @hashed repos' do
run_rake_task('gitlab:cleanup:repos')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
describe 'cleanup:dirs' do
it 'removes missing namespaces' do
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git"))
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git"))
allow(Namespace).to receive(:pluck).and_return('namespace_1')
gitlab_shell.add_namespace(storage, "namespace_1/project.git")
gitlab_shell.add_namespace(storage, "namespace_2/project.git")
allow(Namespace).to receive(:pluck).and_return(['namespace_1'])
stub_env('REMOVE', 'true')
run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey
expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true)
expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false)
end
it 'ignores @hashed directory' do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment