Commit 1f99589a authored by Sean McGivern's avatar Sean McGivern Committed by Alejandro Rodríguez
Browse files

Merge branch 'smarter-cache-invalidation' into 'master'

Smarter cache invalidation

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/23550

See merge request !7360
parent f2522c0c
......@@ -1086,7 +1086,7 @@ def change_head(branch)
"refs/heads/#{branch}",
force: true)
repository.copy_gitattributes(branch)
repository.expire_avatar_cache(branch)
repository.expire_avatar_cache
reload_default_branch
end
......
require 'securerandom'
class Repository
include Gitlab::ShellAdapter
attr_accessor :path_with_namespace, :project
class CommitError < StandardError; end
# Files to use as a project avatar in case no avatar was uploaded via the web
# UI.
AVATAR_FILES = %w{logo.png logo.jpg logo.gif}
# Methods that cache data from the Git repository.
#
# Each entry in this Array should have a corresponding method with the exact
# same name. The cache key used by those methods must also match method's
# name.
#
# For example, for entry `:readme` there's a method called `readme` which
# stores its data in the `readme` cache key.
CACHED_METHODS = %i(size commit_count readme version contribution_guide
changelog license_blob license_key gitignore koding_yml
gitlab_ci_yml branch_names tag_names branch_count
tag_count avatar exists? empty? root_ref)
# Certain method caches should be refreshed when certain types of files are
# changed. This Hash maps file types (as returned by Gitlab::FileDetector) to
# the corresponding methods to call for refreshing caches.
METHOD_CACHES_FOR_FILE_TYPES = {
readme: :readme,
changelog: :changelog,
license: %i(license_blob license_key),
contributing: :contribution_guide,
version: :version,
gitignore: :gitignore,
koding: :koding_yml,
gitlab_ci: :gitlab_ci_yml,
avatar: :avatar
}
# Wraps around the given method and caches its output in Redis and an instance
# variable.
#
# This only works for methods that do not take any arguments.
def self.cache_method(name, fallback: nil)
original = :"_uncached_#{name}"
include Gitlab::ShellAdapter
alias_method(original, name)
attr_accessor :path_with_namespace, :project
define_method(name) do
cache_method_output(name, fallback: fallback) { __send__(original) }
end
end
def self.storages
Gitlab.config.repositories.storages
......@@ -37,24 +75,6 @@ def path_to_repo
)
end
def exists?
return @exists unless @exists.nil?
@exists = cache.fetch(:exists?) do
begin
raw_repository && raw_repository.rugged ? true : false
rescue Gitlab::Git::Repository::NoRepository
false
end
end
end
def empty?
return @empty unless @empty.nil?
@empty = cache.fetch(:empty?) { raw_repository.empty? }
end
#
# Git repository can contains some hidden refs like:
# /refs/notes/*
......@@ -221,10 +241,6 @@ def ref_names
branch_names + tag_names
end
def branch_names
@branch_names ||= cache.fetch(:branch_names) { branches.map(&:name) }
end
def branch_exists?(branch_name)
branch_names.include?(branch_name)
end
......@@ -274,34 +290,6 @@ def kept_around?(sha)
ref_exists?(keep_around_ref_name(sha))
end
def tag_names
cache.fetch(:tag_names) { raw_repository.tag_names }
end
def commit_count
cache.fetch(:commit_count) do
begin
raw_repository.commit_count(self.root_ref)
rescue
0
end
end
end
def branch_count
@branch_count ||= cache.fetch(:branch_count) { branches.size }
end
def tag_count
@tag_count ||= cache.fetch(:tag_count) { raw_repository.rugged.tags.count }
end
# Return repo size in megabytes
# Cached in redis
def size
cache.fetch(:size) { raw_repository.size }
end
def diverging_commit_counts(branch)
root_ref_hash = raw_repository.rev_parse_target(root_ref).oid
cache.fetch(:"diverging_commit_counts_#{branch.name}") do
......@@ -317,48 +305,55 @@ def diverging_commit_counts(branch)
end
end
# Keys for data that can be affected for any commit push.
def cache_keys
%i(size commit_count
readme version contribution_guide changelog
license_blob license_key gitignore koding_yml)
def expire_tags_cache
expire_method_caches(%i(tag_names tag_count))
@tags = nil
end
# Keys for data on branch/tag operations.
def cache_keys_for_branches_and_tags
%i(branch_names tag_names branch_count tag_count)
def expire_branches_cache
expire_method_caches(%i(branch_names branch_count))
@local_branches = nil
end
def build_cache
(cache_keys + cache_keys_for_branches_and_tags).each do |key|
unless cache.exist?(key)
send(key)
end
end
def expire_statistics_caches
expire_method_caches(%i(size commit_count))
end
def expire_tags_cache
cache.expire(:tag_names)
@tags = nil
def expire_all_method_caches
expire_method_caches(CACHED_METHODS)
end
def expire_branches_cache
cache.expire(:branch_names)
@branch_names = nil
@local_branches = nil
# Expires the caches of a specific set of methods
def expire_method_caches(methods)
methods.each do |key|
cache.expire(key)
ivar = cache_instance_variable_name(key)
remove_instance_variable(ivar) if instance_variable_defined?(ivar)
end
end
def expire_cache(branch_name = nil, revision = nil)
cache_keys.each do |key|
cache.expire(key)
def expire_avatar_cache
expire_method_caches(%i(avatar))
end
# Refreshes the method caches of this repository.
#
# types - An Array of file types (e.g. `:readme`) used to refresh extra
# caches.
def refresh_method_caches(types)
to_refresh = []
types.each do |type|
methods = METHOD_CACHES_FOR_FILE_TYPES[type.to_sym]
to_refresh.concat(Array(methods)) if methods
end
expire_branch_cache(branch_name)
expire_avatar_cache(branch_name, revision)
expire_method_caches(to_refresh)
# This ensures this particular cache is flushed after the first commit to a
# new repository.
expire_emptiness_caches if empty?
to_refresh.each { |method| send(method) }
end
def expire_branch_cache(branch_name = nil)
......@@ -377,15 +372,14 @@ def expire_branch_cache(branch_name = nil)
end
def expire_root_ref_cache
cache.expire(:root_ref)
@root_ref = nil
expire_method_caches(%i(root_ref))
end
# Expires the cache(s) used to determine if a repository is empty or not.
def expire_emptiness_caches
cache.expire(:empty?)
@empty = nil
return unless empty?
expire_method_caches(%i(empty?))
expire_has_visible_content_cache
end
......@@ -394,51 +388,22 @@ def expire_has_visible_content_cache
@has_visible_content = nil
end
def expire_branch_count_cache
cache.expire(:branch_count)
@branch_count = nil
end
def expire_tag_count_cache
cache.expire(:tag_count)
@tag_count = nil
end
def lookup_cache
@lookup_cache ||= {}
end
def expire_avatar_cache(branch_name = nil, revision = nil)
# Avatars are pulled from the default branch, thus if somebody pushes to a
# different branch there's no need to expire anything.
return if branch_name && branch_name != root_ref
# We don't want to flush the cache if the commit didn't actually make any
# changes to any of the possible avatar files.
if revision && commit = self.commit(revision)
return unless commit.raw_diffs(deltas_only: true).
any? { |diff| AVATAR_FILES.include?(diff.new_path) }
end
cache.expire(:avatar)
@avatar = nil
end
def expire_exists_cache
cache.expire(:exists?)
@exists = nil
expire_method_caches(%i(exists?))
end
# expire cache that doesn't depend on repository data (when expiring)
def expire_content_cache
expire_tags_cache
expire_tag_count_cache
expire_branches_cache
expire_branch_count_cache
expire_root_ref_cache
expire_emptiness_caches
expire_exists_cache
expire_statistics_caches
end
# Runs code after a repository has been created.
......@@ -453,9 +418,8 @@ def after_create
# Runs code just before a repository is deleted.
def before_delete
expire_exists_cache
expire_cache if exists?
expire_all_method_caches
expire_branch_cache if exists?
expire_content_cache
repository_event(:remove_repository)
......@@ -472,9 +436,9 @@ def before_change_head
# Runs code before pushing (= creating or removing) a tag.
def before_push_tag
expire_cache
expire_statistics_caches
expire_emptiness_caches
expire_tags_cache
expire_tag_count_cache
repository_event(:push_tag)
end
......@@ -482,7 +446,7 @@ def before_push_tag
# Runs code before removing a tag.
def before_remove_tag
expire_tags_cache
expire_tag_count_cache
expire_statistics_caches
repository_event(:remove_tag)
end
......@@ -494,12 +458,14 @@ def before_import
# Runs code after a repository has been forked/imported.
def after_import
expire_content_cache
build_cache
expire_tags_cache
expire_branches_cache
end
# Runs code after a new commit has been pushed.
def after_push_commit(branch_name, revision)
expire_cache(branch_name, revision)
def after_push_commit(branch_name)
expire_statistics_caches
expire_branch_cache(branch_name)
repository_event(:push_commit, branch: branch_name)
end
......@@ -508,7 +474,6 @@ def after_push_commit(branch_name, revision)
def after_create_branch
expire_branches_cache
expire_has_visible_content_cache
expire_branch_count_cache
repository_event(:push_branch)
end
......@@ -523,7 +488,6 @@ def before_remove_branch
# Runs code after an existing branch has been removed.
def after_remove_branch
expire_has_visible_content_cache
expire_branch_count_cache
expire_branches_cache
end
......@@ -550,86 +514,127 @@ def blob_by_oid(oid)
Gitlab::Git::Blob.raw(self, oid)
end
def root_ref
if raw_repository
raw_repository.root_ref
else
# When the repo does not exist we raise this error so no data is cached.
raise Rugged::ReferenceError
end
end
cache_method :root_ref
def exists?
refs_directory_exists?
end
cache_method :exists?
def empty?
raw_repository.empty?
end
cache_method :empty?
# The size of this repository in megabytes.
def size
exists? ? raw_repository.size : 0.0
end
cache_method :size, fallback: 0.0
def commit_count
root_ref ? raw_repository.commit_count(root_ref) : 0
end
cache_method :commit_count, fallback: 0
def branch_names
branches.map(&:name)
end
cache_method :branch_names, fallback: []
def tag_names
raw_repository.tag_names
end
cache_method :tag_names, fallback: []
def branch_count
branches.size
end
cache_method :branch_count, fallback: 0
def tag_count
raw_repository.rugged.tags.count
end
cache_method :tag_count, fallback: 0
def avatar
if tree = file_on_head(:avatar)
tree.path
end
end
cache_method :avatar
def readme
cache.fetch(:readme) { tree(:head).readme }
if head = tree(:head)
head.readme
end
end
cache_method :readme
def version
cache.fetch(:version) do
tree(:head).blobs.find do |file|
file.name.casecmp('version').zero?
end
end
file_on_head(:version)
end
cache_method :version
def contribution_guide
cache.fetch(:contribution_guide) do
tree(:head).blobs.find do |file|
file.contributing?
end
end
file_on_head(:contributing)
end
cache_method :contribution_guide
def changelog
cache.fetch(:changelog) do
file_on_head(/\A(changelog|history|changes|news)/i)
end
file_on_head(:changelog)
end
cache_method :changelog
def license_blob
return nil unless head_exists?
cache.fetch(:license_blob) do
file_on_head(/\A(licen[sc]e|copying)(\..+|\z)/i)
end
file_on_head(:license)
end
cache_method :license_blob
def license_key
return nil unless head_exists?
return unless exists?
cache.fetch(:license_key) do
Licensee.license(path).try(:key)
end
Licensee.license(path).try(:key)
end
cache_method :license_key
def gitignore
return nil if !exists? || empty?
cache.fetch(:gitignore) do
file_on_head(/\A\.gitignore\z/)
end
file_on_head(:gitignore)
end
cache_method :gitignore
def koding_yml
return nil unless head_exists?
cache.fetch(:koding_yml) do
file_on_head(/\A\.koding\.yml\z/)
end
file_on_head(:koding)
end
cache_method :koding_yml
def gitlab_ci_yml
return nil unless head_exists?
@gitlab_ci_yml ||= tree(:head).blobs.find do |file|
file.name == '.gitlab-ci.yml'
end
rescue Rugged::ReferenceError
# For unknow reason spinach scenario "Scenario: I change project path"
# lead to "Reference 'HEAD' not found" exception from Repository#empty?
nil
file_on_head(:gitlab_ci)
end
cache_method :gitlab_ci_yml
def head_commit
@head_commit ||= commit(self.root_ref)
end
def head_tree
@head_tree ||= Tree.new(self, head_commit.sha, nil)
if head_commit
@head_tree ||= Tree.new(self, head_commit.sha, nil)
end
end
def tree(sha = :head, path = nil, recursive: false)
if sha == :head
return unless head_commit
if path.nil?
return head_tree
else
......@@ -779,10 +784,6 @@ def tags
@tags ||= raw_repository.tags
end
def root_ref
@root_ref ||= cache.fetch(:root_ref) { raw_repository.root_ref }
end
def commit_dir(user, path, message, branch, author_email: nil, author_name: nil)
update_branch_with_hooks(user, branch) do |ref|
options = {
......@@ -1140,28 +1141,55 @@ def copy_gitattributes(ref)
end
end
def avatar
return nil unless exists?
# Caches the supplied block both in a cache and in an instance variable.
#
# The cache key and instance variable are named the same way as the value of
# the `key` argument.
#
# This method will return `nil` if the corresponding instance variable is also
# set to `nil`. This ensures we don't keep yielding the block when it returns
# `nil`.
#
# key - The name of the key to cache the data in.
# fallback - A value to fall back to in the event of a Git error.
def cache_method_output(key, fallback: nil, &block)
ivar = cache_instance_variable_name(key)
@avatar ||= cache.fetch(:avatar) do
AVATAR_FILES.find do |file|
blob_at_branch(root_ref, file)
if instance_variable_defined?(ivar)
instance_variable_get(ivar)
else
begin
instance_variable_set(ivar, cache.fetch(key, &block))
rescue Rugged::ReferenceError, Gitlab::Git::Repository::NoRepository
# if e.g. HEAD or the entire repository doesn't exist we want to
# gracefully handle this and not cache anything.
fallback
end
end
end
private
def cache_instance_variable_name(key)
:"@#{key.to_s.tr('?!', '')}"
end
def cache
@cache ||= RepositoryCache.new(path_with_namespace, @project.id)
def file_on_head(type)
if head = tree(:head)
head.blobs.find do |file|
Gitlab::FileDetector.type_of(file.name) == type
end
end
end
def head_exists?
exists? && !empty? && !rugged.head_unborn?
private
def refs_directory_exists?
return false unless path_with_namespace
File.exist?(File.join(path_to_repo, 'refs'))
end
def file_on_head(regex)
tree(:head).blobs.find { |file| file.name =~ regex }
def cache
@cache ||= RepositoryCache.new(path_with_namespace, @project.id)
end
def tags_sorted_by_committed_date
......
......@@ -18,7 +18,9 @@ def initialize(repository, sha, path = '/', recursive: false)
def readme
return @readme if defined?(@readme)
available_readmes = blobs.select(&:readme?)
available_readmes = blobs.select do |blob|
Gitlab::FileDetector.type_of(blob.name) == :readme
end
previewable_readmes = available_readmes.select do |blob|
previewable?(blob.name)
......
......@@ -18,7 +18,7 @@ class GitPushService < BaseService
#
def execute
@project.repository.after_create if @project.empty_repo?
@project.repository.after_push_commit(branch_name, params[:newrev])
@project.repository.after_push_commit(branch_name)
if push_remove_branch?
@project.repository.after_remove_branch
......@@ -51,12 +51,32 @@ def execute
execute_related_hooks
perform_housekeeping
update_caches
end
def update_gitattributes
@project.repository.copy_gitattributes(params[:ref])