Batchload blobs for diff generation

After installing a new gem, batch-loader, a construct can be used to
queue data to be fetched in bulk. The gem was also introduced in both
gitlab-org/gitlab-ce!14680 and gitlab-org/gitlab-ce!14846, but those mrs
are not merged yet.

For the generation of diffs, both the old blob and the new blob need to
be loaded. This for every file in the diff, too. Now we collect all
these so we do 1 fetch. Three `.allow_n_plus_1_calls` have been removed,
which I expect to be valid, but this needs to be confirmed by a full CI
run.

Possibly closes:
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37445
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
parent 6dd89059
......@@ -263,6 +263,8 @@ gem 'gettext_i18n_rails', '~> 1.8.0'
gem 'gettext_i18n_rails_js', '~> 1.2.0'
gem 'gettext', '~> 3.2.2', require: false, group: :development
gem 'batch-loader'
# Perf bar
gem 'peek', '~> 1.0.1'
gem 'peek-gc', '~> 0.0.2'
......
......@@ -73,6 +73,7 @@ GEM
thread_safe (~> 0.3, >= 0.3.1)
babosa (1.0.2)
base32 (0.3.2)
batch-loader (1.1.1)
bcrypt (3.1.11)
bcrypt_pbkdf (1.0.0)
benchmark-ips (2.3.0)
......@@ -982,6 +983,7 @@ DEPENDENCIES
awesome_print (~> 1.2.0)
babosa (~> 1.0.2)
base32 (~> 0.3.0)
batch-loader
bcrypt_pbkdf (~> 1.0)
benchmark-ips (~> 2.3.0)
better_errors (~> 2.1.0)
......
......@@ -22,12 +22,7 @@ class Projects::CommitController < Projects::ApplicationController
apply_diff_view_cookie!
respond_to do |format|
format.html do
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
Gitlab::GitalyClient.allow_n_plus_1_calls do
render
end
end
format.html { render }
format.diff { render text: @commit.to_diff }
format.patch { render text: @commit.to_patch }
end
......@@ -112,7 +107,7 @@ class Projects::CommitController < Projects::ApplicationController
end
def commit
@noteable = @commit ||= @project.commit(params[:id])
@noteable = @commit ||= @project.commit_by(oid: params[:id])
end
def define_commit_vars
......
......@@ -10,10 +10,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic
def show
@environment = @merge_request.environments_for(current_user).last
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
Gitlab::GitalyClient.allow_n_plus_1_calls do
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
def diff_for_path
......
......@@ -76,12 +76,24 @@ class Blob < SimpleDelegator
new(blob, project)
end
def self.lazy(project, commit_id, path)
BatchLoader.for(commit_id: commit_id, path: path).batch do |items, loader|
project.repository.blobs_at(items.map(&:values)).each do |blob|
loader.call({ commit_id: blob.commit_id, path: blob.path }, blob) if blob
end
end
end
def initialize(blob, project = nil)
@project = project
super(blob)
end
def inspect
"#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>"
end
# Returns the data of the blob.
#
# If the blob is a text based blob the content is converted to UTF-8 and any
......@@ -95,7 +107,10 @@ class Blob < SimpleDelegator
end
def load_all_data!
super(project.repository) if project
# Endpoint needed: gitlab-org/gitaly#756
Gitlab::GitalyClient.allow_n_plus_1_calls do
super(project.repository) if project
end
end
def no_highlighting?
......
......@@ -84,7 +84,7 @@ class Commit
end
def id
@raw.id
raw.id
end
def ==(other)
......@@ -361,7 +361,7 @@ class Commit
@deltas ||= raw.deltas
end
def diffs(diff_options = nil)
def diffs(diff_options = {})
Gitlab::Diff::FileCollection::Commit.new(self, diff_options: diff_options)
end
......
......@@ -478,6 +478,11 @@ class Repository
nil
end
# items is an Array like: [[oid, path], [oid1, path1]]
def blobs_at(items)
raw_repository.batch_blobs(items).map { |blob| Blob.decorate(blob, project) }
end
def root_ref
if raw_repository
raw_repository.root_ref
......
---
title: Fetch blobs in bulk when generating diffs
merge_request:
author:
type: performance
Rails.application.config.middleware.use(BatchLoader::Middleware)
......@@ -25,6 +25,10 @@ module Gitlab
@repository = repository
@diff_refs = diff_refs
@fallback_diff_refs = fallback_diff_refs
# Ensure items are collected in the the batch
new_blob
old_blob
end
def position(position_marker, position_type: :text)
......@@ -95,21 +99,15 @@ module Gitlab
end
def new_blob
return @new_blob if defined?(@new_blob)
sha = new_content_sha
return @new_blob = nil unless sha
return unless new_content_sha
@new_blob = repository.blob_at(sha, file_path)
Blob.lazy(repository.project, new_content_sha, file_path)
end
def old_blob
return @old_blob if defined?(@old_blob)
sha = old_content_sha
return @old_blob = nil unless sha
return unless old_content_sha
@old_blob = repository.blob_at(sha, old_path)
Blob.lazy(repository.project, old_content_sha, old_path)
end
def content_sha
......
......@@ -22,10 +22,7 @@ module Gitlab
end
def diff_files
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37445
Gitlab::GitalyClient.allow_n_plus_1_calls do
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
def diff_file_with_old_path(old_path)
......
......@@ -179,6 +179,8 @@ module Gitlab
)
end
end
rescue Rugged::ReferenceError
nil
end
def rugged_raw(repository, sha, limit:)
......
......@@ -1161,6 +1161,11 @@ module Gitlab
Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha)
end
# Items should be of format [[commit_id, path], [commit_id1, path1]]
def batch_blobs(items, blob_size_limit: nil)
Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit)
end
def commit_index(user, branch_name, index, options)
committer = user_to_committer(user)
......
require 'spec_helper'
describe Projects::CommitController do
let(:project) { create(:project, :repository) }
let(:user) { create(:user) }
set(:project) { create(:project, :repository) }
set(:user) { create(:user) }
let(:commit) { project.commit("master") }
let(:master_pickable_sha) { '7d3b0f7cff5f37573aea97cebfd5692ea1689924' }
let(:master_pickable_commit) { project.commit(master_pickable_sha) }
before do
sign_in(user)
project.team << [user, :master]
project.add_master(user)
end
describe 'GET show' do
......
......@@ -116,12 +116,8 @@ describe Gitlab::Diff::File do
end
context 'when renamed' do
let(:commit) { project.commit('6907208d755b60ebeacb2e9dfea74c92c3449a1f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/js/commit.coffee') }
before do
allow(diff_file.new_blob).to receive(:id).and_return(diff_file.old_blob.id)
end
let(:commit) { project.commit('94bb47ca1297b7b3731ff2a36923640991e9236f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('CHANGELOG.md') }
it 'returns false' do
expect(diff_file.content_changed?).to be_falsey
......
......@@ -16,6 +16,23 @@ describe Blob do
end
end
describe '.lazy' do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') }
it 'fetches all blobs when the first is accessed' do
changelog = described_class.lazy(project, commit.id, 'CHANGELOG')
contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md')
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
expect(Gitlab::Git::Blob).not_to receive(:find)
# Access property so the values are loaded
changelog.id
contributing.id
end
end
describe '#data' do
context 'using a binary blob' do
it 'returns the data as-is' do
......
......@@ -32,10 +32,8 @@ describe DiffViewer::Base do
end
context 'when the binaryness does not match' do
before do
allow(diff_file.old_blob).to receive(:binary?).and_return(false)
allow(diff_file.new_blob).to receive(:binary?).and_return(false)
end
let(:commit) { project.commit_by(oid: 'ae73cb07c9eeaf35924a10f713b364d32b2dd34f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('Gemfile.zip') }
it 'returns false' do
expect(viewer_class.can_render?(diff_file)).to be_falsey
......@@ -60,8 +58,7 @@ describe DiffViewer::Base do
context 'when the binaryness does not match' do
before do
allow(diff_file.old_blob).to receive(:binary?).and_return(true)
allow(diff_file.new_blob).to receive(:binary?).and_return(true)
allow_any_instance_of(Blob).to receive(:binary?).and_return(true)
end
it 'returns false' do
......@@ -77,12 +74,12 @@ describe DiffViewer::Base do
end
context 'when the file was renamed and only the old blob is supported' do
let(:commit) { project.commit('2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:commit) { project.commit_by(oid: '2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/images/6049019_460s.jpg') }
before do
allow(diff_file).to receive(:renamed_file?).and_return(true)
allow(diff_file.new_blob).to receive(:extension).and_return('jpeg')
viewer_class.extensions = %w(notjpg)
end
it 'returns false' do
......@@ -94,8 +91,7 @@ describe DiffViewer::Base do
describe '#collapsed?' do
context 'when the combined blob size is larger than the collapse limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(512.kilobytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(513.kilobytes)
allow(diff_file).to receive(:raw_size).and_return(1025.kilobytes)
end
it 'returns true' do
......@@ -113,8 +109,7 @@ describe DiffViewer::Base do
describe '#too_large?' do
context 'when the combined blob size is larger than the size limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
end
it 'returns true' do
......@@ -132,8 +127,7 @@ describe DiffViewer::Base do
describe '#render_error' do
context 'when the combined blob size is larger than the size limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
end
it 'returns :too_large' do
......
require 'spec_helper'
describe DiffViewer::ServerSide do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit('570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
set(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let!(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
let(:viewer_class) do
Class.new(DiffViewer::Base) do
......@@ -15,8 +15,7 @@ describe DiffViewer::ServerSide do
describe '#prepare!' do
it 'loads all diff file data' do
expect(diff_file.old_blob).to receive(:load_all_data!)
expect(diff_file.new_blob).to receive(:load_all_data!)
expect(Blob).to receive(:lazy).at_least(:twice)
subject.prepare!
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment