Commit e94cd6fd authored by Nick Thomas's avatar Nick Thomas

Add markdown cache columns to the database, but don't use them yet

This commit adds a number of _html columns and, with the exception of Note,
starts updating them whenever the content of their partner fields changes.

Note has a collision with the note_html attr_accessor; that will be fixed later

A background worker for clearing these cache columns is also introduced - use
`rake cache:clear` to set it off. You can clear the database or Redis caches
separately by running `rake cache:clear:db` or `rake cache:clear:redis`,
respectively.
parent 4a90e25f
......@@ -51,17 +51,15 @@ module GitlabMarkdownHelper
context[:project] ||= @project
html = Banzai.render(text, context)
banzai_postprocess(html, context)
end
context.merge!(
current_user: (current_user if defined?(current_user)),
def markdown_field(object, field)
object = object.for_display if object.respond_to?(:for_display)
return "" unless object.present?
# RelativeLinkFilter
requested_path: @path,
project_wiki: @project_wiki,
ref: @ref
)
Banzai.post_process(html, context)
html = Banzai.render_field(object, field)
banzai_postprocess(html, object.banzai_render_context(field))
end
def asciidoc(text)
......@@ -196,4 +194,18 @@ module GitlabMarkdownHelper
icon(options[:icon])
end
end
# Calls Banzai.post_process with some common context options
def banzai_postprocess(html, context)
context.merge!(
current_user: (current_user if defined?(current_user)),
# RelativeLinkFilter
requested_path: @path,
project_wiki: @project_wiki,
ref: @ref
)
Banzai.post_process(html, context)
end
end
class AbuseReport < ActiveRecord::Base
include CacheMarkdownField
cache_markdown_field :message, pipeline: :single_line
belongs_to :reporter, class_name: 'User'
belongs_to :user
......@@ -7,6 +11,9 @@ class AbuseReport < ActiveRecord::Base
validates :message, presence: true
validates :user_id, uniqueness: { message: 'has already been reported' }
# For CacheMarkdownField
alias_method :author, :reporter
def remove_user(deleted_by:)
user.block
DeleteUserWorker.perform_async(deleted_by.id, user.id, delete_solo_owned_groups: true)
......
class Appearance < ActiveRecord::Base
include CacheMarkdownField
cache_markdown_field :description
validates :title, presence: true
validates :description, presence: true
validates :logo, file_size: { maximum: 1.megabyte }
......
class ApplicationSetting < ActiveRecord::Base
include CacheMarkdownField
include TokenAuthenticatable
add_authentication_token_field :runners_registration_token
add_authentication_token_field :health_check_access_token
......@@ -17,6 +19,11 @@ class ApplicationSetting < ActiveRecord::Base
serialize :domain_whitelist, Array
serialize :domain_blacklist, Array
cache_markdown_field :sign_in_text
cache_markdown_field :help_page_text
cache_markdown_field :shared_runners_text, pipeline: :plain_markdown
cache_markdown_field :after_sign_up_text
attr_accessor :domain_whitelist_raw, :domain_blacklist_raw
validates :session_expire_delay,
......
class BroadcastMessage < ActiveRecord::Base
include CacheMarkdownField
include Sortable
cache_markdown_field :message, pipeline: :broadcast_message
validates :message, presence: true
validates :starts_at, presence: true
validates :ends_at, presence: true
......
# This module takes care of updating cache columns for Markdown-containing
# fields. Use like this in the body of your class:
#
# include CacheMarkdownField
# cache_markdown_field :foo
# cache_markdown_field :bar
# cache_markdown_field :baz, pipeline: :single_line
#
# Corresponding foo_html, bar_html and baz_html fields should exist.
module CacheMarkdownField
# Knows about the relationship between markdown and html field names, and
# stores the rendering contexts for the latter
class FieldData
extend Forwardable
def initialize
@data = {}
end
def_delegators :@data, :[], :[]=
def_delegator :@data, :keys, :markdown_fields
def html_field(markdown_field)
"#{markdown_field}_html"
end
def html_fields
markdown_fields.map {|field| html_field(field) }
end
end
# Dynamic registries don't really work in Rails as it's not guaranteed that
# every class will be loaded, so hardcode the list.
CACHING_CLASSES = %w[
AbuseReport
Appearance
ApplicationSetting
BroadcastMessage
Issue
Label
MergeRequest
Milestone
Namespace
Note
Project
Release
Snippet
]
def self.caching_classes
CACHING_CLASSES.map(&:constantize)
end
extend ActiveSupport::Concern
included do
cattr_reader :cached_markdown_fields do
FieldData.new
end
# Returns the default Banzai render context for the cached markdown field.
def banzai_render_context(field)
raise ArgumentError.new("Unknown field: #{field.inspect}") unless
cached_markdown_fields.markdown_fields.include?(field)
# Always include a project key, or Banzai complains
project = self.project if self.respond_to?(:project)
context = cached_markdown_fields[field].merge(project: project)
# Banzai is less strict about authors, so don't always have an author key
context[:author] = self.author if self.respond_to?(:author)
context
end
# Allow callers to look up the cache field name, rather than hardcoding it
def markdown_cache_field_for(field)
raise ArgumentError.new("Unknown field: #{field}") unless
cached_markdown_fields.markdown_fields.include?(field)
cached_markdown_fields.html_field(field)
end
# Always exclude _html fields from attributes (including serialization).
# They contain unredacted HTML, which would be a security issue
alias_method :attributes_before_markdown_cache, :attributes
def attributes
attrs = attributes_before_markdown_cache
cached_markdown_fields.html_fields.each do |field|
attrs.delete(field)
end
attrs
end
end
class_methods do
private
# Specify that a field is markdown. Its rendered output will be cached in
# a corresponding _html field. Any custom rendering options may be provided
# as a context.
def cache_markdown_field(markdown_field, context = {})
raise "Add #{self} to CacheMarkdownField::CACHING_CLASSES" unless
CacheMarkdownField::CACHING_CLASSES.include?(self.to_s)
cached_markdown_fields[markdown_field] = context
html_field = cached_markdown_fields.html_field(markdown_field)
cache_method = "#{markdown_field}_cache_refresh".to_sym
invalidation_method = "#{html_field}_invalidated?".to_sym
define_method(cache_method) do
html = Banzai::Renderer.cacheless_render_field(self, markdown_field)
__send__("#{html_field}=", html)
true
end
# The HTML becomes invalid if any dependent fields change. For now, assume
# author and project invalidate the cache in all circumstances.
define_method(invalidation_method) do
changed_fields = changed_attributes.keys
invalidations = changed_fields & [markdown_field.to_s, "author", "project"]
!invalidations.empty?
end
before_save cache_method, if: invalidation_method
end
end
end
......@@ -6,6 +6,7 @@
#
module Issuable
extend ActiveSupport::Concern
include CacheMarkdownField
include Participable
include Mentionable
include Subscribable
......@@ -13,6 +14,9 @@ module Issuable
include Awardable
included do
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :description
belongs_to :author, class_name: "User"
belongs_to :assignee, class_name: "User"
belongs_to :updated_by, class_name: "User"
......
......@@ -4,6 +4,10 @@ class GlobalLabel
delegate :color, :description, to: :@first_label
def for_display
@first_label
end
def self.build_collection(labels)
labels = labels.group_by(&:title)
......
......@@ -4,6 +4,10 @@ class GlobalMilestone
attr_accessor :title, :milestones
alias_attribute :name, :title
def for_display
@first_milestone
end
def self.build_collection(milestones)
milestones = milestones.group_by(&:title)
......@@ -17,6 +21,7 @@ class GlobalMilestone
@title = title
@name = title
@milestones = milestones
@first_milestone = milestones.find {|m| m.description.present? } || milestones.first
end
def safe_title
......
class Label < ActiveRecord::Base
include CacheMarkdownField
include Referable
include Subscribable
......@@ -8,6 +9,8 @@ class Label < ActiveRecord::Base
None = LabelStruct.new('No Label', 'No Label')
Any = LabelStruct.new('Any Label', '')
cache_markdown_field :description, pipeline: :single_line
DEFAULT_COLOR = '#428BCA'
default_value_for :color, DEFAULT_COLOR
......
......@@ -6,12 +6,16 @@ class Milestone < ActiveRecord::Base
Any = MilestoneStruct.new('Any Milestone', '', -1)
Upcoming = MilestoneStruct.new('Upcoming', '#upcoming', -2)
include CacheMarkdownField
include InternalId
include Sortable
include Referable
include StripAttribute
include Milestoneish
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :description
belongs_to :project
has_many :issues
has_many :labels, -> { distinct.reorder('labels.title') }, through: :issues
......
class Namespace < ActiveRecord::Base
acts_as_paranoid
include CacheMarkdownField
include Sortable
include Gitlab::ShellAdapter
cache_markdown_field :description, pipeline: :description
has_many :projects, dependent: :destroy
belongs_to :owner, class_name: "User"
......
......@@ -6,6 +6,7 @@ class Project < ActiveRecord::Base
include Gitlab::VisibilityLevel
include Gitlab::CurrentSettings
include AccessRequestable
include CacheMarkdownField
include Referable
include Sortable
include AfterCommitQueue
......@@ -17,6 +18,8 @@ class Project < ActiveRecord::Base
UNKNOWN_IMPORT_URL = 'http://unknown.git'
cache_markdown_field :description, pipeline: :description
delegate :feature_available?, :builds_enabled?, :wiki_enabled?, :merge_requests_enabled?, to: :project_feature, allow_nil: true
default_value_for :archived, false
......
class Release < ActiveRecord::Base
include CacheMarkdownField
cache_markdown_field :description
belongs_to :project
validates :description, :project, :tag, presence: true
......
class Snippet < ActiveRecord::Base
include Gitlab::VisibilityLevel
include Linguist::BlobHelper
include CacheMarkdownField
include Participable
include Referable
include Sortable
include Awardable
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :content
# If file_name changes, it invalidates content
alias_method :default_content_html_invalidator, :content_html_invalidated?
def content_html_invalidated?
default_content_html_invalidator || file_name_changed?
end
default_value_for :visibility_level, Snippet::PRIVATE
belongs_to :author, class_name: 'User'
......
# This worker clears all cache fields in the database, working in batches.
class ClearDatabaseCacheWorker
include Sidekiq::Worker
BATCH_SIZE = 1000
def perform
CacheMarkdownField.caching_classes.each do |kls|
fields = kls.cached_markdown_fields.html_fields
clear_cache_fields = fields.each_with_object({}) do |field, memo|
memo[field] = nil
end
Rails.logger.debug("Clearing Markdown cache for #{kls}: #{fields.inspect}")
kls.unscoped.in_batches(of: BATCH_SIZE) do |relation|
relation.update_all(clear_cache_fields)
end
end
nil
end
end
# Port ActiveRecord::Relation#in_batches from ActiveRecord 5.
# https://github.com/rails/rails/blob/ac027338e4a165273607dccee49a3d38bc836794/activerecord/lib/active_record/relation/batches.rb#L184
# TODO: this can be removed once we're using AR5.
raise "Vendored ActiveRecord 5 code! Delete #{__FILE__}!" if ActiveRecord::VERSION::MAJOR >= 5
module ActiveRecord
module Batches
# Differences from upstream: enumerator support was removed, and custom
# order/limit clauses are ignored without a warning.
def in_batches(of: 1000, start: nil, finish: nil, load: false)
raise "Must provide a block" unless block_given?
relation = self.reorder(batch_order).limit(of)
relation = relation.where(arel_table[primary_key].gteq(start)) if start
relation = relation.where(arel_table[primary_key].lteq(finish)) if finish
batch_relation = relation
loop do
if load
records = batch_relation.records
ids = records.map(&:id)
yielded_relation = self.where(primary_key => ids)
yielded_relation.load_records(records)
else
ids = batch_relation.pluck(primary_key)
yielded_relation = self.where(primary_key => ids)
end
break if ids.empty?
primary_key_offset = ids.last
raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset
yield yielded_relation
break if ids.length < of
batch_relation = relation.where(arel_table[primary_key].gt(primary_key_offset))
end
end
end
end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddMarkdownCacheColumns < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
COLUMNS = {
abuse_reports: [:message],
appearances: [:description],
application_settings: [
:sign_in_text,
:help_page_text,
:shared_runners_text,
:after_sign_up_text
],
broadcast_messages: [:message],
issues: [:title, :description],
labels: [:description],
merge_requests: [:title, :description],
milestones: [:title, :description],
namespaces: [:description],
notes: [:note],
projects: [:description],
releases: [:description],
snippets: [:title, :content],
}
def change
COLUMNS.each do |table, columns|
columns.each do |column|
add_column table, "#{column}_html", :text
end
end
end
end
......@@ -23,6 +23,7 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.text "message"
t.datetime "created_at"
t.datetime "updated_at"
t.text "message_html"
end
create_table "appearances", force: :cascade do |t|
......@@ -30,8 +31,9 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.text "description"
t.string "header_logo"
t.string "logo"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.text "description_html"
end
create_table "application_settings", force: :cascade do |t|
......@@ -92,6 +94,10 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.text "domain_blacklist"
t.boolean "koding_enabled"
t.string "koding_url"
t.text "sign_in_text_html"
t.text "help_page_text_html"
t.text "shared_runners_text_html"
t.text "after_sign_up_text_html"
end
create_table "audit_events", force: :cascade do |t|
......@@ -128,13 +134,14 @@ ActiveRecord::Schema.define(version: 20160926145521) do
add_index "boards", ["project_id"], name: "index_boards_on_project_id", using: :btree
create_table "broadcast_messages", force: :cascade do |t|
t.text "message", null: false
t.text "message", null: false
t.datetime "starts_at"
t.datetime "ends_at"
t.datetime "created_at"
t.datetime "updated_at"
t.string "color"
t.string "font"
t.text "message_html"
end
create_table "ci_application_settings", force: :cascade do |t|
......@@ -457,18 +464,20 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.integer "project_id"
t.datetime "created_at"
t.datetime "updated_at"
t.integer "position", default: 0
t.integer "position", default: 0
t.string "branch_name"
t.text "description"
t.integer "milestone_id"
t.string "state"
t.integer "iid"
t.integer "updated_by_id"
t.boolean "confidential", default: false
t.boolean "confidential", default: false
t.datetime "deleted_at"
t.date "due_date"
t.integer "moved_to_id"
t.integer "lock_version"
t.text "title_html"
t.text "description_html"
end
add_index "issues", ["assignee_id"], name: "index_issues_on_assignee_id", using: :btree
......@@ -514,9 +523,10 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.integer "project_id"
t.datetime "created_at"
t.datetime "updated_at"
t.boolean "template", default: false
t.boolean "template", default: false
t.string "description"
t.integer "priority"
t.text "description_html"
end
add_index "labels", ["priority"], name: "index_labels_on_priority", using: :btree
......@@ -632,6 +642,8 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.datetime "deleted_at"
t.string "in_progress_merge_commit_sha"
t.integer "lock_version"
t.text "title_html"
t.text "description_html"
end
add_index "merge_requests", ["assignee_id"], name: "index_merge_requests_on_assignee_id", using: :btree
......@@ -658,14 +670,16 @@ ActiveRecord::Schema.define(version: 20160926145521) do
add_index "merge_requests_closing_issues", ["merge_request_id"], name: "index_merge_requests_closing_issues_on_merge_request_id", using: :btree
create_table "milestones", force: :cascade do |t|
t.string "title", null: false
t.integer "project_id", null: false
t.string "title", null: false
t.integer "project_id", null: false
t.text "description"
t.date "due_date"
t.datetime "created_at"
t.datetime "updated_at"
t.string "state"
t.integer "iid"
t.text "title_html"
t.text "description_html"
end
add_index "milestones", ["description"], name: "index_milestones_on_description_trigram", using: :gin, opclasses: {"description"=>"gin_trgm_ops"}
......@@ -689,6 +703,7 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.boolean "request_access_enabled", default: true, null: false
t.datetime "deleted_at"
t.boolean "lfs_enabled"
t.text "description_html"
end
add_index "namespaces", ["created_at"], name: "index_namespaces_on_created_at", using: :btree
......@@ -721,6 +736,7 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.integer "resolved_by_id"
t.string "discussion_id"
t.string "original_discussion_id"
t.text "note_html"
end
add_index "notes", ["author_id"], name: "index_notes_on_author_id", using: :btree
......@@ -872,6 +888,7 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.boolean "request_access_enabled", default: true, null: false
t.boolean "has_external_wiki"
t.boolean "lfs_enabled"
t.text "description_html"
end
add_index "projects", ["ci_id"], name: "index_projects_on_ci_id", using: :btree
......@@ -922,6 +939,7 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.integer "project_id"
t.datetime "created_at"
t.datetime "updated_at"
t.text "description_html"
end
add_index "releases", ["project_id", "tag"], name: "index_releases_on_project_id_and_tag", using: :btree
......@@ -976,6 +994,8 @@ ActiveRecord::Schema.define(version: 20160926145521) do
t.string "file_name"
t.string "type"
t.integer "visibility_level", default: 0, null: false
t.text "title_html"
t.text "content_html"
end
add_index "snippets", ["author_id"], name: "index_snippets_on_author_id", using: :btree
......
......@@ -3,6 +3,10 @@ module Banzai
Renderer.render(text, context)
end
def self.render_field(object, field)
Renderer.render_field(object, field)
end
def self.cache_collection_render(texts_and_contexts)
Renderer.cache_collection_render(texts_and_contexts)
end
......
......@@ -31,6 +31,34 @@ module Banzai
end
end
# Convert a Markdown-containing field on an object into an HTML-safe String
# of HTML. This method is analogous to calling render(object.field), but it
# can cache the rendered HTML in the object, rather than Redis.
#
# The context to use is learned from the passed-in object by calling
# #banzai_render_context(field), and cannot be changed. Use #render, passing
# it the field text, if a custom rendering is needed. The generated context
# is returned along with the HTML.
def render_field(object, field)
html_field = object.markdown_cache_field_for(field)
html = object.__send__(html_field)
return html if html.present?
html = cacheless_render_field(object, field)
object.update_column(html_field, html) unless object.new_record? || object.destroyed?
html
end
# Same as +render_field+, but without consulting or updating the cache field
def cacheless_render_field(object, field)
text = object.__send__(field)
context = object.banzai_render_context(field)
cacheless_render(text, context)
end
# Perform multiple render from an Array of Markdown String into an
# Array of HTML-safe String of HTML.
#
......
namespace :cache do
CLEAR_BATCH_SIZE = 1000 # There seems to be no speedup when pushing beyond 1,000
REDIS_SCAN_START_STOP = '0' # Magic value, see http://redis.io/commands/scan
namespace :clear do
REDIS_CLEAR_BATCH_SIZE = 1000 # There seems to be no speedup when pushing beyond 1,000
REDIS_SCAN_START_STOP = '0' # Magic value, see http://redis.io/commands/scan
desc "GitLab | Clear redis cache"
task :clear => :environment do
Gitlab::Redis.with do |redis|
cursor = REDIS_SCAN_START_STOP
loop do
cursor, keys = redis.scan(
cursor,
match: "#{Gitlab::Redis::CACHE_NAMESPACE}*",
count: CLEAR_BATCH_SIZE
)
redis.del(*keys) if keys.any?
break if cursor == REDIS_SCAN_START_STOP
desc "GitLab | Clear redis cache"
task redis: :environment do
Gitlab::Redis.with do |redis|
cursor = REDIS_SCAN_START_STOP
loop do
cursor, keys = redis.scan(
cursor,
match: "#{Gitlab::Redis::CACHE_NAMESPACE}*",
count: REDIS_CLEAR_BATCH_SIZE
)
redis.del(*keys) if keys.any?
break if cursor == REDIS_SCAN_START_STOP
end
end
end
desc "GitLab | Clear database cache (in the background)"
task db: :environment do
ClearDatabaseCacheWorker.perform_async
end
task all: [:db, :redis]
end
task clear: 'cache:clear:all'
end
......@@ -9,6 +9,9 @@ FactoryGirl.define do
namespace
creator
# Behaves differently to nil due to cache_has_external_issue_tracker
has_external_issue_tracker false
trait :public do
visibility_level Gitlab::VisibilityLevel::PUBLIC