Use BFG object maps to clean projects

parent 79b44c16
......@@ -50,10 +50,11 @@ function hideOrShowHelpBlock(form) {
$(() => {
const $form = $('form.js-requires-input');
if ($form) {
$('form.js-requires-input').each((i, el) => {
const $form = $(el);
$('.select2.js-select-namespace').change(() => hideOrShowHelpBlock($form));
export default (buttonSelector, fileSelector) => {
const btn = document.querySelector(buttonSelector);
const fileInput = document.querySelector(fileSelector);
const form = btn.closest('form');
btn.addEventListener('click', () => {;
fileInput.addEventListener('change', () => {
form.querySelector('.js-filename').textContent = fileInput.value.replace(/^.*[\\\/]/, ''); // eslint-disable-line no-useless-escape
......@@ -3,8 +3,8 @@ import initSettingsPanels from '~/settings_panels';
import setupProjectEdit from '~/project_edit';
import initConfirmDangerModal from '~/confirm_danger_modal';
import mountBadgeSettings from '~/pages/shared/mount_badge_settings';
import fileUpload from '~/lib/utils/file_upload';
import initProjectLoadingSpinner from '../shared/save_project_loader';
import projectAvatar from '../shared/project_avatar';
import initProjectPermissionsSettings from '../shared/permissions';
document.addEventListener('DOMContentLoaded', () => {
......@@ -12,7 +12,7 @@ document.addEventListener('DOMContentLoaded', () => {
// Initialize expandable settings panels
fileUpload('.js-choose-project-avatar-button', '.js-project-avatar-input');
......@@ -7,6 +7,7 @@ import initDeployKeys from '~/deploy_keys';
import ProtectedBranchCreate from '~/protected_branches/protected_branch_create';
import ProtectedBranchEditList from '~/protected_branches/protected_branch_edit_list';
import DueDateSelectors from '~/due_date_select';
import fileUpload from '~/lib/utils/file_upload';
export default () => {
new ProtectedTagCreate();
......@@ -16,4 +17,5 @@ export default () => {
new ProtectedBranchCreate();
new ProtectedBranchEditList();
new DueDateSelectors();
fileUpload('.js-choose-file', '.js-object-map-input');
import $ from 'jquery';
export default function projectAvatar() {
$('.js-choose-project-avatar-button').bind('click', function onClickAvatar() {
const form = $(this).closest('form');
return form.find('.js-project-avatar-input').click();
$('.js-project-avatar-input').bind('change', function onClickAvatarInput() {
const form = $(this).closest('form');
const filename = $(this)
.replace(/^.*[\\\/]/, ''); // eslint-disable-line no-useless-escape
return form.find('.js-avatar-filename').text(filename);
......@@ -5,6 +5,7 @@ module Projects
class RepositoryController < Projects::ApplicationController
before_action :authorize_admin_project!
before_action :remote_mirror, only: [:show]
before_action :check_cleanup_feature_flag!, only: :cleanup
def show
......@@ -20,8 +21,26 @@ module Projects
def cleanup
cleanup_params = params.require(:project).permit(:bfg_object_map)
result =, current_user, cleanup_params).execute
if result[:status] == :success
flash[:notice] = _('Repository cleanup has started. You will receive an email once the cleanup operation is complete.')
flash[:alert] = _('Failed to upload object map file')
redirect_to project_settings_repository_path(project)
def check_cleanup_feature_flag!
render_404 unless ::Feature.enabled?(:project_cleanup, project)
def render_show
@deploy_keys =, current_user: current_user)
@deploy_tokens =
......@@ -257,6 +257,10 @@ module ProjectsHelper
def link_to_bfg
link_to 'BFG', '', target: '_blank', rel: 'noopener noreferrer'
def legacy_render_context(params)
params[:legacy_render] ? { markdown_engine: :redcarpet } : {}
......@@ -24,6 +24,21 @@ module Emails
subject: subject("Project export error"))
def repository_cleanup_success_email(project, user)
@project = project
@user = user
mail(to: user.notification_email, subject: subject("Project cleanup has completed"))
def repository_cleanup_failure_email(project, user, error)
@project = project
@user = user
@error = error
mail(to: user.notification_email, subject: subject("Project cleanup failure"))
def repository_push_email(project_id, opts = {})
@message =, project_id, opts)
......@@ -339,6 +339,7 @@ class Project < ActiveRecord::Base
presence: true,
inclusion: { in: ->(_object) { Gitlab.config.repositories.storages.keys } }
validates :variables, variable_duplicates: { scope: :environment_scope }
validates :bfg_object_map, file_size: { maximum: :max_attachment_size }
# Scopes
scope :pending_delete, -> { where(pending_delete: true) }
......@@ -412,6 +413,9 @@ class Project < ActiveRecord::Base
only_integer: true,
message: 'needs to be beetween 10 minutes and 1 month' }
# Used by Projects::CleanupService to hold a map of rewritten object IDs
mount_uploader :bfg_object_map, AttachmentUploader
# Returns a project, if it is not about to be removed.
# id - The ID of the project to retrieve.
......@@ -1973,6 +1977,10 @@ class Project < ActiveRecord::Base
Ability.allowed?(user, :read_project_snippet, self)
def max_attachment_size
def use_hashed_storage
......@@ -466,6 +466,14 @@ class NotificationService
def repository_cleanup_success(project, user)
mailer.send(:repository_cleanup_success_email, project, user).deliver_later
def repository_cleanup_failure(project, user, error)
mailer.send(:repository_cleanup_failure_email, project, user, error).deliver_later
def new_resource_email(target, method)
# frozen_string_literal: true
module Projects
# The CleanupService removes data from the project repository following a
# BFG rewrite:
# Before executing this service, all refs rewritten by BFG should have been
# pushed to the repository
class CleanupService < BaseService
NoUploadError ="Couldn't find uploaded object map")
include Gitlab::Utils::StrongMemoize
# Attempt to clean up the project following the push. Warning: this is
# destructive!
# path is the path of an upload of a BFG object map file. It contains a line
# per rewritten object, with the old and new SHAs space-separated. It can be
# used to update or remove content that references the objects that BFG has
# altered
# Currently, only the project repository is modified by this service, but we
# may wish to modify other data sources in the future.
def execute
# Remove older objects that are no longer referenced, :gc)
# The cache may now be inaccurate, and holding onto it could prevent
# bugs assuming the presence of some object from manifesting for some
# time. Better to feel the pain immediately.
def apply_bfg_object_map!
raise NoUploadError unless project.bfg_object_map.exists? do |io|
def repository_cleaner
@repository_cleaner ||=
......@@ -9,7 +9,7 @@
= message
= s_('403|Please contact your GitLab administrator to get the permission.')
= s_('403|Please contact your GitLab administrator to get permission.')
.action-container.js-go-back{ style: 'display: none' }
%a{ href: 'javascript:history.back()', class: 'btn btn-success' }
= s_('Go Back')
Repository cleanup failed on <%= @project.web_url %>
<%= @error %>
Repository cleanup succeeded on <%= @project.web_url %>
Repository size is now <%= "%.1f" % (@project.repository.size || 0) %> MiB
- return unless Feature.enabled?(:project_cleanup, @project)
- expanded = Rails.env.test?{ class: ('expanded' if expanded) }
%h4= _('Repository cleanup')
= expanded ? _('Collapse') : _('Expand')
= _("Clean up after running %{bfg} on the repository" % { bfg: link_to_bfg }).html_safe
= link_to icon('question-circle'),
target: '_blank', rel: 'noopener noreferrer'
- url = cleanup_namespace_project_settings_repository_path(@project.namespace, @project)
= form_for @project, url: url, method: :post, authenticity_token: true, html: { class: 'js-requires-input' } do |f|
= _("Upload object map")
%button.btn.btn-default.js-choose-file{ type: "button" }
= _("Choose a file")
= _("No file selected")
= f.file_field :bfg_object_map, accept: 'text/plain', class: "hidden js-object-map-input", required: true
= _("The maximum file size allowed is %{max_attachment_size}mb") % { max_attachment_size: Gitlab::CurrentSettings.max_attachment_size }
= f.submit _('Start cleanup'), class: 'btn btn-success'
......@@ -53,7 +53,7 @@
= _("Project avatar in repository: %{link}").html_safe % { link: @project.avatar_in_git }
%button.btn.js-choose-project-avatar-button{ type: 'button' }= _("Choose file...")
%span.file_name.prepend-left-default.js-avatar-filename= _("No file chosen")
%span.file_name.prepend-left-default.js-filename= _("No file chosen")
= f.file_field :avatar, class: "js-project-avatar-input hidden"
.form-text.text-muted= _("The maximum file size allowed is 200KB.")
- if @project.avatar?
......@@ -13,3 +13,4 @@
= render "projects/protected_tags/index"
= render @deploy_keys
= render "projects/deploy_tokens/index"
= render "projects/cleanup/show"
......@@ -133,3 +133,4 @@
- create_note_diff_file
- delete_diff_files
- detect_repository_languages
- repository_cleanup
# frozen_string_literal: true
class RepositoryCleanupWorker
include ApplicationWorker
sidekiq_options retry: 3
sidekiq_retries_exhausted do |msg, err|
next if err.is_a?(ActiveRecord::RecordNotFound)
args = msg['args'] + [msg['error_message']]
def perform(project_id, user_id)
project = Project.find(project_id)
user = User.find(user_id), user).execute
notification_service.repository_cleanup_success(project, user)
def perform_failure(project_id, user_id, error)
project = Project.find(project_id)
user = User.find(user_id)
# Ensure the file is removed
notification_service.repository_cleanup_failure(project, user, error)
def notification_service
@notification_service ||=
title: Use BFG object maps to clean projects
merge_request: 23189
type: added
......@@ -432,6 +432,7 @@ constraints( do
resource :integrations, only: [:show]
resource :repository, only: [:show], controller: :repository do
post :create_deploy_token, path: 'deploy_token/create'
post :cleanup
......@@ -81,3 +81,4 @@
- [delete_diff_files, 1]
- [detect_repository_languages, 1]
- [auto_devops, 2]
- [repository_cleanup, 1]
# frozen_string_literal: true
class AddProjectBfgObjectMapColumn < ActiveRecord::Migration[5.0]
DOWNTIME = false
def change
add_column :projects, :bfg_object_map, :string
......@@ -10,7 +10,7 @@
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20181129104944) do
ActiveRecord::Schema.define(version: 20181203002526) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -1684,6 +1684,7 @@ ActiveRecord::Schema.define(version: 20181129104944) do
t.boolean "remote_mirror_available_overridden"
t.bigint "pool_repository_id"
t.string "runners_token_encrypted"
t.string "bfg_object_map"
t.index ["ci_id"], name: "index_projects_on_ci_id", using: :btree
t.index ["created_at"], name: "index_projects_on_created_at", using: :btree
t.index ["creator_id"], name: "index_projects_on_creator_id", using: :btree
# Reducing the repository size using Git
A GitLab Enterprise Edition administrator can set a [repository size limit][admin-repo-size]
which will prevent you to exceed it.
which will prevent you from exceeding it.
When a project has reached its size limit, you will not be able to push to it,
create a new merge request, or merge existing ones. You will still be able to
create new issues, and clone the project though. Uploading LFS objects will
also be denied.
In order to lift these restrictions, the administrator of the GitLab instance
needs to increase the limit on the particular project that exceeded it or you
need to instruct Git to rewrite changes.
If you exceed the repository size limit, your first thought might be to remove
some data, make a new commit and push back to the repository. Unfortunately,
it's not so easy and that workflow won't work. Deleting files in a commit doesn't
actually reduce the size of the repo since the earlier commits and blobs are
still around. What you need to do is rewrite history with Git's
[`filter-branch` option][gitscm].
some data, make a new commit and push back to the repository. Perhaps you can
move some blobs to LFS, or remove some old dependency updates from history.
Unfortunately, it's not so easy and that workflow won't work. Deleting files in
a commit doesn't actually reduce the size of the repo since the earlier commits
and blobs are still around. What you need to do is rewrite history with Git's
[`filter-branch` option][gitscm], or a tool like the [BFG Repo-Cleaner][bfg].
Note that even with that method, until `git gc` runs on the GitLab side, the
"removed" commits and blobs will still be around. And if a commit was ever
included in an MR, or if a build was run for a commit, or if a user commented
on it, it will be kept around too. So, in these cases the size will not decrease.
The only fool proof way to actually decrease the repository size is to prune all
the unneeded stuff locally, and then create a new project on GitLab and start
using that instead.
"removed" commits and blobs will still be around. You also need to be able to
push the rewritten history to GitLab, which may be impossible if you've already
exceeded the maximum size limit.
With that being said, you can try reducing your repository size with the
following method.
## Using `git filter-branch` to purge files
In order to lift these restrictions, the administrator of the GitLab instance
needs to increase the limit on the particular project that exceeded it, so it's
always better to spot that you're approaching the limit and act proactively to
stay underneath it. If you hit the limit, and your admin can't - or won't -
temporarily increase it for you, your only option is to prune all the unneeded
stuff locally, and then create a new project on GitLab and start using that
If you can continue to use the original project, we recommend [using the
BFG Repo-Cleaner](#using-the-bfg-repo-cleaner). It's faster and simpler than
`git filter-branch`, and GitLab can use its account of what has changed to clean
up its own internal state, maximizing the space saved.
> **Warning:**
> Make sure to first make a copy of your repository since rewriting history will
> purge the files and information you are about to delete. Also make sure to
> inform any collaborators to not use `pull` after your changes, but use `rebase`.
> **Warning:**
> This process is not suitable for removing sensitive data like password or keys
> from your repository. Information about commits, including file content, is
> cached in the database, and will remain visible even after they have been
> removed from the repository.
## Using the BFG Repo-Cleaner
> [Introduced]( in GitLab 11.6.
1. [Install BFG](
1. Navigate to your repository:
cd my_repository/
1. Change to the branch you want to remove the big file from:
git checkout master
1. Create a commit removing the large file from the branch, if it still exists:
git rm path/to/big_file.mpg
git commit -m 'Remove unneeded large file'
1. Rewrite history:
bfg --delete-files path/to/big_file.mpg
An object map file will be written to `object-id-map.old-new.txt`. Keep it
around - you'll need it for the final step!
1. Force-push the changes to GitLab:
git push --force-with-lease origin master
If this step fails, someone has changed the `master` branch while you were
rewriting history. You could restore the branch and re-run BFG to preserve
their changes, or use `git push --force` to overwrite their changes.
1. Navigate to **Project > Settings > Repository > Repository Cleanup**:
![Repository settings cleanup form](img/repository_cleanup.png)
Upload the `object-id-map.old-new.txt` file and press **Start cleanup**.
This will remove any internal git references to the old commits, and run
`git gc` against the repository. You will receive an email once it has
## Using `git filter-branch`
1. Navigate to your repository:
......@@ -70,11 +132,6 @@ following method.
Your repository should now be below the size limit.
> **Note:**
> As an alternative to `filter-branch`, you can use the `bfg` tool with a
> command like: `bfg --delete-files path/to/big_file.mpg`. Read the
> [BFG Repo-Cleaner][bfg] documentation for more information.
# frozen_string_literal: true
module Gitlab
module Git
class RepositoryCleaner
include Gitlab::Git::WrapsGitalyErrors
attr_reader :repository
# 'repository' is a Gitlab::Git::Repository
def initialize(repository)
@repository = repository
def apply_bfg_object_map(io)
wrapped_gitaly_errors do
def gitaly_cleanup_client
@gitaly_cleanup_client ||=
# frozen_string_literal: true
module Gitlab
module GitalyClient
class CleanupService
attr_reader :repository, :gitaly_repo, :storage
# 'repository' is a Gitlab::Git::Repository
def initialize(repository)
@repository = repository
@gitaly_repo = repository.gitaly_repository
@storage =
def apply_bfg_object_map(io)
first_request = gitaly_repo)
enum = do |y|
y.yield first_request
while data =
y.yield data)
timeout: GitalyClient.no_timeout