Use BFG object maps to clean projects

parent 79b44c16
...@@ -50,10 +50,11 @@ function hideOrShowHelpBlock(form) { ...@@ -50,10 +50,11 @@ function hideOrShowHelpBlock(form) {
} }
$(() => { $(() => {
const $form = $('form.js-requires-input'); $('form.js-requires-input').each((i, el) => {
if ($form) { const $form = $(el);
$form.requiresInput(); $form.requiresInput();
hideOrShowHelpBlock($form); hideOrShowHelpBlock($form);
$('.select2.js-select-namespace').change(() => hideOrShowHelpBlock($form)); $('.select2.js-select-namespace').change(() => hideOrShowHelpBlock($form));
} });
}); });
export default (buttonSelector, fileSelector) => {
const btn = document.querySelector(buttonSelector);
const fileInput = document.querySelector(fileSelector);
const form = btn.closest('form');
btn.addEventListener('click', () => {
fileInput.click();
});
fileInput.addEventListener('change', () => {
form.querySelector('.js-filename').textContent = fileInput.value.replace(/^.*[\\\/]/, ''); // eslint-disable-line no-useless-escape
});
};
...@@ -3,8 +3,8 @@ import initSettingsPanels from '~/settings_panels'; ...@@ -3,8 +3,8 @@ import initSettingsPanels from '~/settings_panels';
import setupProjectEdit from '~/project_edit'; import setupProjectEdit from '~/project_edit';
import initConfirmDangerModal from '~/confirm_danger_modal'; import initConfirmDangerModal from '~/confirm_danger_modal';
import mountBadgeSettings from '~/pages/shared/mount_badge_settings'; import mountBadgeSettings from '~/pages/shared/mount_badge_settings';
import fileUpload from '~/lib/utils/file_upload';
import initProjectLoadingSpinner from '../shared/save_project_loader'; import initProjectLoadingSpinner from '../shared/save_project_loader';
import projectAvatar from '../shared/project_avatar';
import initProjectPermissionsSettings from '../shared/permissions'; import initProjectPermissionsSettings from '../shared/permissions';
document.addEventListener('DOMContentLoaded', () => { document.addEventListener('DOMContentLoaded', () => {
...@@ -12,7 +12,7 @@ document.addEventListener('DOMContentLoaded', () => { ...@@ -12,7 +12,7 @@ document.addEventListener('DOMContentLoaded', () => {
setupProjectEdit(); setupProjectEdit();
// Initialize expandable settings panels // Initialize expandable settings panels
initSettingsPanels(); initSettingsPanels();
projectAvatar(); fileUpload('.js-choose-project-avatar-button', '.js-project-avatar-input');
initProjectPermissionsSettings(); initProjectPermissionsSettings();
initConfirmDangerModal(); initConfirmDangerModal();
mountBadgeSettings(PROJECT_BADGE); mountBadgeSettings(PROJECT_BADGE);
......
...@@ -7,6 +7,7 @@ import initDeployKeys from '~/deploy_keys'; ...@@ -7,6 +7,7 @@ import initDeployKeys from '~/deploy_keys';
import ProtectedBranchCreate from '~/protected_branches/protected_branch_create'; import ProtectedBranchCreate from '~/protected_branches/protected_branch_create';
import ProtectedBranchEditList from '~/protected_branches/protected_branch_edit_list'; import ProtectedBranchEditList from '~/protected_branches/protected_branch_edit_list';
import DueDateSelectors from '~/due_date_select'; import DueDateSelectors from '~/due_date_select';
import fileUpload from '~/lib/utils/file_upload';
export default () => { export default () => {
new ProtectedTagCreate(); new ProtectedTagCreate();
...@@ -16,4 +17,5 @@ export default () => { ...@@ -16,4 +17,5 @@ export default () => {
new ProtectedBranchCreate(); new ProtectedBranchCreate();
new ProtectedBranchEditList(); new ProtectedBranchEditList();
new DueDateSelectors(); new DueDateSelectors();
fileUpload('.js-choose-file', '.js-object-map-input');
}; };
import $ from 'jquery';
export default function projectAvatar() {
$('.js-choose-project-avatar-button').bind('click', function onClickAvatar() {
const form = $(this).closest('form');
return form.find('.js-project-avatar-input').click();
});
$('.js-project-avatar-input').bind('change', function onClickAvatarInput() {
const form = $(this).closest('form');
const filename = $(this)
.val()
.replace(/^.*[\\\/]/, ''); // eslint-disable-line no-useless-escape
return form.find('.js-avatar-filename').text(filename);
});
}
...@@ -5,6 +5,7 @@ module Projects ...@@ -5,6 +5,7 @@ module Projects
class RepositoryController < Projects::ApplicationController class RepositoryController < Projects::ApplicationController
before_action :authorize_admin_project! before_action :authorize_admin_project!
before_action :remote_mirror, only: [:show] before_action :remote_mirror, only: [:show]
before_action :check_cleanup_feature_flag!, only: :cleanup
def show def show
render_show render_show
...@@ -20,8 +21,26 @@ module Projects ...@@ -20,8 +21,26 @@ module Projects
render_show render_show
end end
def cleanup
cleanup_params = params.require(:project).permit(:bfg_object_map)
result = Projects::UpdateService.new(project, current_user, cleanup_params).execute
if result[:status] == :success
RepositoryCleanupWorker.perform_async(project.id, current_user.id)
flash[:notice] = _('Repository cleanup has started. You will receive an email once the cleanup operation is complete.')
else
flash[:alert] = _('Failed to upload object map file')
end
redirect_to project_settings_repository_path(project)
end
private private
def check_cleanup_feature_flag!
render_404 unless ::Feature.enabled?(:project_cleanup, project)
end
def render_show def render_show
@deploy_keys = DeployKeysPresenter.new(@project, current_user: current_user) @deploy_keys = DeployKeysPresenter.new(@project, current_user: current_user)
@deploy_tokens = @project.deploy_tokens.active @deploy_tokens = @project.deploy_tokens.active
......
...@@ -257,6 +257,10 @@ module ProjectsHelper ...@@ -257,6 +257,10 @@ module ProjectsHelper
"xcode://clone?repo=#{CGI.escape(default_url_to_repo(project))}" "xcode://clone?repo=#{CGI.escape(default_url_to_repo(project))}"
end end
def link_to_bfg
link_to 'BFG', 'https://rtyley.github.io/bfg-repo-cleaner/', target: '_blank', rel: 'noopener noreferrer'
end
def legacy_render_context(params) def legacy_render_context(params)
params[:legacy_render] ? { markdown_engine: :redcarpet } : {} params[:legacy_render] ? { markdown_engine: :redcarpet } : {}
end end
......
...@@ -24,6 +24,21 @@ module Emails ...@@ -24,6 +24,21 @@ module Emails
subject: subject("Project export error")) subject: subject("Project export error"))
end end
def repository_cleanup_success_email(project, user)
@project = project
@user = user
mail(to: user.notification_email, subject: subject("Project cleanup has completed"))
end
def repository_cleanup_failure_email(project, user, error)
@project = project
@user = user
@error = error
mail(to: user.notification_email, subject: subject("Project cleanup failure"))
end
def repository_push_email(project_id, opts = {}) def repository_push_email(project_id, opts = {})
@message = @message =
Gitlab::Email::Message::RepositoryPush.new(self, project_id, opts) Gitlab::Email::Message::RepositoryPush.new(self, project_id, opts)
......
...@@ -339,6 +339,7 @@ class Project < ActiveRecord::Base ...@@ -339,6 +339,7 @@ class Project < ActiveRecord::Base
presence: true, presence: true,
inclusion: { in: ->(_object) { Gitlab.config.repositories.storages.keys } } inclusion: { in: ->(_object) { Gitlab.config.repositories.storages.keys } }
validates :variables, variable_duplicates: { scope: :environment_scope } validates :variables, variable_duplicates: { scope: :environment_scope }
validates :bfg_object_map, file_size: { maximum: :max_attachment_size }
# Scopes # Scopes
scope :pending_delete, -> { where(pending_delete: true) } scope :pending_delete, -> { where(pending_delete: true) }
...@@ -412,6 +413,9 @@ class Project < ActiveRecord::Base ...@@ -412,6 +413,9 @@ class Project < ActiveRecord::Base
only_integer: true, only_integer: true,
message: 'needs to be beetween 10 minutes and 1 month' } message: 'needs to be beetween 10 minutes and 1 month' }
# Used by Projects::CleanupService to hold a map of rewritten object IDs
mount_uploader :bfg_object_map, AttachmentUploader
# Returns a project, if it is not about to be removed. # Returns a project, if it is not about to be removed.
# #
# id - The ID of the project to retrieve. # id - The ID of the project to retrieve.
...@@ -1973,6 +1977,10 @@ class Project < ActiveRecord::Base ...@@ -1973,6 +1977,10 @@ class Project < ActiveRecord::Base
Ability.allowed?(user, :read_project_snippet, self) Ability.allowed?(user, :read_project_snippet, self)
end end
def max_attachment_size
Gitlab::CurrentSettings.max_attachment_size.megabytes.to_i
end
private private
def use_hashed_storage def use_hashed_storage
......
...@@ -466,6 +466,14 @@ class NotificationService ...@@ -466,6 +466,14 @@ class NotificationService
end end
end end
def repository_cleanup_success(project, user)
mailer.send(:repository_cleanup_success_email, project, user).deliver_later
end
def repository_cleanup_failure(project, user, error)
mailer.send(:repository_cleanup_failure_email, project, user, error).deliver_later
end
protected protected
def new_resource_email(target, method) def new_resource_email(target, method)
......
# frozen_string_literal: true
module Projects
# The CleanupService removes data from the project repository following a
# BFG rewrite: https://rtyley.github.io/bfg-repo-cleaner/
#
# Before executing this service, all refs rewritten by BFG should have been
# pushed to the repository
class CleanupService < BaseService
NoUploadError = StandardError.new("Couldn't find uploaded object map")
include Gitlab::Utils::StrongMemoize
# Attempt to clean up the project following the push. Warning: this is
# destructive!
#
# path is the path of an upload of a BFG object map file. It contains a line
# per rewritten object, with the old and new SHAs space-separated. It can be
# used to update or remove content that references the objects that BFG has
# altered
#
# Currently, only the project repository is modified by this service, but we
# may wish to modify other data sources in the future.
def execute
apply_bfg_object_map!
# Remove older objects that are no longer referenced
GitGarbageCollectWorker.new.perform(project.id, :gc)
# The cache may now be inaccurate, and holding onto it could prevent
# bugs assuming the presence of some object from manifesting for some
# time. Better to feel the pain immediately.
project.repository.expire_all_method_caches
project.bfg_object_map.remove!
end
private
def apply_bfg_object_map!
raise NoUploadError unless project.bfg_object_map.exists?
project.bfg_object_map.open do |io|
repository_cleaner.apply_bfg_object_map(io)
end
end
def repository_cleaner
@repository_cleaner ||= Gitlab::Git::RepositoryCleaner.new(repository.raw)
end
end
end
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
%p %p
= message = message
%p %p
= s_('403|Please contact your GitLab administrator to get the permission.') = s_('403|Please contact your GitLab administrator to get permission.')
.action-container.js-go-back{ style: 'display: none' } .action-container.js-go-back{ style: 'display: none' }
%a{ href: 'javascript:history.back()', class: 'btn btn-success' } %a{ href: 'javascript:history.back()', class: 'btn btn-success' }
= s_('Go Back') = s_('Go Back')
......
Repository cleanup failed on <%= @project.web_url %>
<%= @error %>
Repository cleanup succeeded on <%= @project.web_url %>
Repository size is now <%= "%.1f" % (@project.repository.size || 0) %> MiB
- return unless Feature.enabled?(:project_cleanup, @project)
- expanded = Rails.env.test?
%section.settings.no-animate#cleanup{ class: ('expanded' if expanded) }
.settings-header
%h4= _('Repository cleanup')
%button.btn.js-settings-toggle
= expanded ? _('Collapse') : _('Expand')
%p
= _("Clean up after running %{bfg} on the repository" % { bfg: link_to_bfg }).html_safe
= link_to icon('question-circle'),
help_page_path('user/project/repository/reducing_the_repo_size_using_git.md'),
target: '_blank', rel: 'noopener noreferrer'
.settings-content
- url = cleanup_namespace_project_settings_repository_path(@project.namespace, @project)
= form_for @project, url: url, method: :post, authenticity_token: true, html: { class: 'js-requires-input' } do |f|
%fieldset.prepend-top-0.append-bottom-10
.append-bottom-10
%h5.prepend-top-0
= _("Upload object map")
%button.btn.btn-default.js-choose-file{ type: "button" }
= _("Choose a file")
%span.prepend-left-default.js-filename
= _("No file selected")
= f.file_field :bfg_object_map, accept: 'text/plain', class: "hidden js-object-map-input", required: true
.form-text.text-muted
= _("The maximum file size allowed is %{max_attachment_size}mb") % { max_attachment_size: Gitlab::CurrentSettings.max_attachment_size }
= f.submit _('Start cleanup'), class: 'btn btn-success'
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
= _("Project avatar in repository: %{link}").html_safe % { link: @project.avatar_in_git } = _("Project avatar in repository: %{link}").html_safe % { link: @project.avatar_in_git }
.prepend-top-5.append-bottom-10 .prepend-top-5.append-bottom-10
%button.btn.js-choose-project-avatar-button{ type: 'button' }= _("Choose file...") %button.btn.js-choose-project-avatar-button{ type: 'button' }= _("Choose file...")
%span.file_name.prepend-left-default.js-avatar-filename= _("No file chosen") %span.file_name.prepend-left-default.js-filename= _("No file chosen")
= f.file_field :avatar, class: "js-project-avatar-input hidden" = f.file_field :avatar, class: "js-project-avatar-input hidden"
.form-text.text-muted= _("The maximum file size allowed is 200KB.") .form-text.text-muted= _("The maximum file size allowed is 200KB.")
- if @project.avatar? - if @project.avatar?
......
...@@ -13,3 +13,4 @@ ...@@ -13,3 +13,4 @@
= render "projects/protected_tags/index" = render "projects/protected_tags/index"
= render @deploy_keys = render @deploy_keys
= render "projects/deploy_tokens/index" = render "projects/deploy_tokens/index"
= render "projects/cleanup/show"
...@@ -133,3 +133,4 @@ ...@@ -133,3 +133,4 @@
- create_note_diff_file - create_note_diff_file
- delete_diff_files - delete_diff_files
- detect_repository_languages - detect_repository_languages
- repository_cleanup
# frozen_string_literal: true
class RepositoryCleanupWorker
include ApplicationWorker
sidekiq_options retry: 3
sidekiq_retries_exhausted do |msg, err|
next if err.is_a?(ActiveRecord::RecordNotFound)
args = msg['args'] + [msg['error_message']]
new.perform_failure(*args)
end
def perform(project_id, user_id)
project = Project.find(project_id)
user = User.find(user_id)
Projects::CleanupService.new(project, user).execute
notification_service.repository_cleanup_success(project, user)
end
def perform_failure(project_id, user_id, error)
project = Project.find(project_id)
user = User.find(user_id)
# Ensure the file is removed
project.bfg_object_map.remove!
notification_service.repository_cleanup_failure(project, user, error)
end
private
def notification_service
@notification_service ||= NotificationService.new
end
end
---
title: Use BFG object maps to clean projects
merge_request: 23189
author:
type: added
...@@ -432,6 +432,7 @@ constraints(::Constraints::ProjectUrlConstrainer.new) do ...@@ -432,6 +432,7 @@ constraints(::Constraints::ProjectUrlConstrainer.new) do
resource :integrations, only: [:show] resource :integrations, only: [:show]
resource :repository, only: [:show], controller: :repository do resource :repository, only: [:show], controller: :repository do
post :create_deploy_token, path: 'deploy_token/create' post :create_deploy_token, path: 'deploy_token/create'
post :cleanup
end end
end end
......
...@@ -81,3 +81,4 @@ ...@@ -81,3 +81,4 @@
- [delete_diff_files, 1] - [delete_diff_files, 1]
- [detect_repository_languages, 1] - [detect_repository_languages, 1]
- [auto_devops, 2] - [auto_devops, 2]
- [repository_cleanup, 1]
# frozen_string_literal: true
class AddProjectBfgObjectMapColumn < ActiveRecord::Migration[5.0]
DOWNTIME = false
def change
add_column :projects, :bfg_object_map, :string
end
end
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20181129104944) do ActiveRecord::Schema.define(version: 20181203002526) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -1684,6 +1684,7 @@ ActiveRecord::Schema.define(version: 20181129104944) do ...@@ -1684,6 +1684,7 @@ ActiveRecord::Schema.define(version: 20181129104944) do
t.boolean "remote_mirror_available_overridden" t.boolean "remote_mirror_available_overridden"
t.bigint "pool_repository_id" t.bigint "pool_repository_id"
t.string "runners_token_encrypted" t.string "runners_token_encrypted"
t.string "bfg_object_map"
t.index ["ci_id"], name: "index_projects_on_ci_id", using: :btree t.index ["ci_id"], name: "index_projects_on_ci_id", using: :btree
t.index ["created_at"], name: "index_projects_on_created_at", using: :btree t.index ["created_at"], name: "index_projects_on_created_at", using: :btree
t.index ["creator_id"], name: "index_projects_on_creator_id", using: :btree t.index ["creator_id"], name: "index_projects_on_creator_id", using: :btree
......
# Reducing the repository size using Git # Reducing the repository size using Git
A GitLab Enterprise Edition administrator can set a [repository size limit][admin-repo-size] A GitLab Enterprise Edition administrator can set a [repository size limit][admin-repo-size]
which will prevent you to exceed it. which will prevent you from exceeding it.
When a project has reached its size limit, you will not be able to push to it, When a project has reached its size limit, you will not be able to push to it,
create a new merge request, or merge existing ones. You will still be able to create a new merge request, or merge existing ones. You will still be able to
create new issues, and clone the project though. Uploading LFS objects will create new issues, and clone the project though. Uploading LFS objects will
also be denied. also be denied.
In order to lift these restrictions, the administrator of the GitLab instance
needs to increase the limit on the particular project that exceeded it or you
need to instruct Git to rewrite changes.
If you exceed the repository size limit, your first thought might be to remove If you exceed the repository size limit, your first thought might be to remove
some data, make a new commit and push back to the repository. Unfortunately, some data, make a new commit and push back to the repository. Perhaps you can
it's not so easy and that workflow won't work. Deleting files in a commit doesn't move some blobs to LFS, or remove some old dependency updates from history.
actually reduce the size of the repo since the earlier commits and blobs are Unfortunately, it's not so easy and that workflow won't work. Deleting files in
still around. What you need to do is rewrite history with Git's a commit doesn't actually reduce the size of the repo since the earlier commits
[`filter-branch` option][gitscm]. and blobs are still around. What you need to do is rewrite history with Git's
[`filter-branch` option][gitscm], or a tool like the [BFG Repo-Cleaner][bfg].
Note that even with that method, until `git gc` runs on the GitLab side, the Note that even with that method, until `git gc` runs on the GitLab side, the
"removed" commits and blobs will still be around. And if a commit was ever "removed" commits and blobs will still be around. You also need to be able to
included in an MR, or if a build was run for a commit, or if a user commented push the rewritten history to GitLab, which may be impossible if you've already
on it, it will be kept around too. So, in these cases the size will not decrease. exceeded the maximum size limit.
The only fool proof way to actually decrease the repository size is to prune all
the unneeded stuff locally, and then create a new project on GitLab and start
using that instead.
With that being said, you can try reducing your repository size with the In order to lift these restrictions, the administrator of the GitLab instance
following method. needs to increase the limit on the particular project that exceeded it, so it's
always better to spot that you're approaching the limit and act proactively to
## Using `git filter-branch` to purge files stay underneath it. If you hit the limit, and your admin can't - or won't -
temporarily increase it for you, your only option is to prune all the unneeded
stuff locally, and then create a new project on GitLab and start using that
instead.
If you can continue to use the original project, we recommend [using the
BFG Repo-Cleaner](#using-the-bfg-repo-cleaner). It's faster and simpler than
`git filter-branch`, and GitLab can use its account of what has changed to clean
up its own internal state, maximizing the space saved.
> **Warning:** > **Warning:**
> Make sure to first make a copy of your repository since rewriting history will > Make sure to first make a copy of your repository since rewriting history will
> purge the files and information you are about to delete. Also make sure to > purge the files and information you are about to delete. Also make sure to
> inform any collaborators to not use `pull` after your changes, but use `rebase`. > inform any collaborators to not use `pull` after your changes, but use `rebase`.
> **Warning:**
> This process is not suitable for removing sensitive data like password or keys
> from your repository. Information about commits, including file content, is
> cached in the database, and will remain visible even after they have been
> removed from the repository.
## Using the BFG Repo-Cleaner
> [Introduced](https://gitlab.com/gitlab-org/gitlab-ce/issues/19376) in GitLab 11.6.
1. [Install BFG](https://rtyley.github.io/bfg-repo-cleaner/).
1. Navigate to your repository:
```
cd my_repository/
```
1. Change to the branch you want to remove the big file from:
```
git checkout master
```
1. Create a commit removing the large file from the branch, if it still exists:
```
git rm path/to/big_file.mpg
git commit -m 'Remove unneeded large file'
```
1. Rewrite history:
```
bfg --delete-files path/to/big_file.mpg
```
An object map file will be written to `object-id-map.old-new.txt`. Keep it
around - you'll need it for the final step!
1. Force-push the changes to GitLab: