Commit 812c7a85 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets
Browse files

Merge branch 'rs-more-pipeline-filters' into 'master'

More HTML::Pipeline filters

The big part of this MR is a feature that is intended to test the entire Markdown-parsing process from beginning to end. See `spec/support/markdown_feature.rb` and `spec/features/markdown_spec.rb`.

One big thing this MR fixes is not being able to type a `<` or `>` anywhere. It now gets properly escaped.

This MR also adds three more custom HTML::Pipeline filters:

### AutolinkFilter

Similar to the built-in Autolink filter in that it still uses Rinku for standard http and ftp links, but then does some further processing to allow auto-linking of any URI scheme. See internal issue https://dev.gitlab.org/gitlab/gitlabhq/issues/2239

### SanitizationFilter

Created a simple custom SanitizationFilter that sub-classes the default one and adds our custom whitelisting.

### TableOfContentsFilter

Adds the anchor links to each header. This removes some processing from our Redcarpet renderer.

Closes #800, #1015, #1528, #1549

Closes GitHub [8535](https://github.com/gitlabhq/gitlabhq/issues/8535)

See merge request !584
parents 83bba1f8 99fcf2e6
......@@ -35,7 +35,12 @@ pre {
/* Link to current header. */
h1, h2, h3, h4, h5, h6 {
position: relative;
&:hover > :last-child {
a.anchor {
display: none;
}
&:hover > a.anchor {
$size: 16px;
position: absolute;
right: 100%;
......
......@@ -34,10 +34,8 @@ def markdown(text, options={})
# see https://github.com/vmg/redcarpet#darling-i-packed-you-a-couple-renderers-for-lunch
rend = Redcarpet::Render::GitlabHTML.new(self, user_color_scheme_class, {
with_toc_data: true,
safe_links_only: true,
# Handled further down the line by HTML::Pipeline::SanitizationFilter
escape_html: false
# Handled further down the line by Gitlab::Markdown::SanitizationFilter
escape_html: false
}.merge(options))
# see https://github.com/vmg/redcarpet#and-its-like-really-simple-to-use
......@@ -45,7 +43,6 @@ def markdown(text, options={})
no_intra_emphasis: true,
tables: true,
fenced_code_blocks: true,
autolink: true,
strikethrough: true,
lax_spacing: true,
space_after_headers: true,
......
......@@ -43,17 +43,6 @@ def url_for_issue(issue_iid, project = @project, options = {})
end
end
def title_for_issue(issue_iid, project = @project)
return '' if project.nil?
if project.default_issues_tracker?
issue = project.issues.where(iid: issue_iid).first
return issue.title if issue
end
''
end
def issue_timestamp(issue)
# Shows the created at time and the updated at time if different
ts = "#{time_ago_with_tooltip(issue.created_at, 'bottom', 'note_created_ago')}"
......@@ -110,5 +99,5 @@ def issue_to_atom(xml, issue)
end
# Required for Gitlab::Markdown::IssueReferenceFilter
module_function :url_for_issue, :title_for_issue
module_function :url_for_issue
end
......@@ -15,6 +15,10 @@ def iid
@issue_identifier.to_s
end
def title
"External Issue #{self}"
end
def ==(other)
other.is_a?(self.class) && (to_s == other.to_s)
end
......
......@@ -329,14 +329,18 @@ def project_id
self.id
end
def issue_exists?(issue_id)
def get_issue(issue_id)
if default_issues_tracker?
self.issues.where(iid: issue_id).first.present?
issues.find_by(iid: issue_id)
else
true
ExternalIssue.new(issue_id, self)
end
end
def issue_exists?(issue_id)
get_issue(issue_id)
end
def default_issue_tracker
gitlab_issue_tracker_service || create_gitlab_issue_tracker_service
end
......@@ -350,11 +354,7 @@ def issues_tracker
end
def default_issues_tracker?
if external_issue_tracker
false
else
true
end
!external_issue_tracker
end
def external_issues_trackers
......
......@@ -2,8 +2,12 @@ module SharedMarkdown
include Spinach::DSL
def header_should_have_correct_id_and_link(level, text, id, parent = ".wiki")
find(:css, "#{parent} h#{level}##{id}").text.should == text
find(:css, "#{parent} h#{level}##{id} > :last-child")[:href].should =~ /##{id}$/
node = find("#{parent} h#{level} a##{id}")
node[:href].should == "##{id}"
# Work around a weird Capybara behavior where calling `parent` on a node
# returns the whole document, not the node's actual parent element
find(:xpath, "#{node.path}/..").text.should == text
end
def create_taskable(type, title)
......
......@@ -3,33 +3,10 @@
module Gitlab
# Custom parser for GitLab-flavored Markdown
#
# It replaces references in the text with links to the appropriate items in
# GitLab.
#
# Supported reference formats are:
# * @foo for team members
# * #123 for issues
# * JIRA-123 for Jira issues
# * !123 for merge requests
# * $123 for snippets
# * 1c002d for specific commit
# * 1c002d...35cfb2 for commit ranges (comparisons)
#
# It also parses Emoji codes to insert images. See
# http://www.emoji-cheat-sheet.com/ for a list of the supported icons.
#
# Examples
#
# >> gfm("Hey @david, can you fix this?")
# => "Hey <a href="/u/david">@david</a>, can you fix this?"
#
# >> gfm("Commit 35d5f7c closes #1234")
# => "Commit <a href="/gitlab/commits/35d5f7c">35d5f7c</a> closes <a href="/gitlab/issues/1234">#1234</a>"
#
# >> gfm(":trollface:")
# => "<img alt=\":trollface:\" class=\"emoji\" src=\"/images/trollface.png" title=\":trollface:\" />
# See the files in `lib/gitlab/markdown/` for specific processing information.
module Markdown
# Provide autoload paths for filters to prevent a circular dependency error
autoload :AutolinkFilter, 'gitlab/markdown/autolink_filter'
autoload :CommitRangeReferenceFilter, 'gitlab/markdown/commit_range_reference_filter'
autoload :CommitReferenceFilter, 'gitlab/markdown/commit_reference_filter'
autoload :EmojiFilter, 'gitlab/markdown/emoji_filter'
......@@ -37,7 +14,9 @@ module Markdown
autoload :IssueReferenceFilter, 'gitlab/markdown/issue_reference_filter'
autoload :LabelReferenceFilter, 'gitlab/markdown/label_reference_filter'
autoload :MergeRequestReferenceFilter, 'gitlab/markdown/merge_request_reference_filter'
autoload :SanitizationFilter, 'gitlab/markdown/sanitization_filter'
autoload :SnippetReferenceFilter, 'gitlab/markdown/snippet_reference_filter'
autoload :TableOfContentsFilter, 'gitlab/markdown/table_of_contents_filter'
autoload :UserReferenceFilter, 'gitlab/markdown/user_reference_filter'
# Public: Parse the provided text with GitLab-Flavored Markdown
......@@ -74,13 +53,13 @@ def gfm_with_options(text, options = {}, project = @project, html_options = {})
pipeline = HTML::Pipeline.new(filters)
context = {
# SanitizationFilter
whitelist: sanitization_whitelist,
# EmojiFilter
asset_root: Gitlab.config.gitlab.url,
asset_host: Gitlab::Application.config.asset_host,
# TableOfContentsFilter
no_header_anchors: options[:no_header_anchors],
# ReferenceFilter
current_user: current_user,
only_path: options[:reference_only_path],
......@@ -111,12 +90,14 @@ def gfm_with_options(text, options = {}, project = @project, html_options = {})
# SanitizationFilter should come first so that all generated reference HTML
# goes through untouched.
#
# See https://gitlab.com/gitlab-org/html-pipeline-gitlab for more filters
# See https://github.com/jch/html-pipeline#filters for more filters.
def filters
[
HTML::Pipeline::SanitizationFilter,
Gitlab::Markdown::SanitizationFilter,
Gitlab::Markdown::EmojiFilter,
Gitlab::Markdown::TableOfContentsFilter,
Gitlab::Markdown::AutolinkFilter,
Gitlab::Markdown::UserReferenceFilter,
Gitlab::Markdown::IssueReferenceFilter,
......@@ -125,36 +106,10 @@ def filters
Gitlab::Markdown::SnippetReferenceFilter,
Gitlab::Markdown::CommitRangeReferenceFilter,
Gitlab::Markdown::CommitReferenceFilter,
Gitlab::Markdown::LabelReferenceFilter,
Gitlab::Markdown::LabelReferenceFilter
]
end
# Customize the SanitizationFilter whitelist
#
# - Allow `class` and `id` attributes on all elements
# - Allow `span` elements
# - Remove `rel` attributes from `a` elements
# - Remove `a` nodes with `javascript:` in the `href` attribute
def sanitization_whitelist
whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST
whitelist[:attributes][:all].push('class', 'id')
whitelist[:elements].push('span')
fix_anchors = lambda do |env|
name, node = env[:node_name], env[:node]
if name == 'a'
node.remove_attribute('rel')
if node['href'] && node['href'].match('javascript:')
node.remove_attribute('href')
end
end
end
whitelist[:transformers].push(fix_anchors)
whitelist
end
# Turn list items that start with "[ ]" into HTML checkbox inputs.
def parse_tasks(text)
li_tag = '<li class="task-list-item">'
......
require 'html/pipeline/filter'
require 'uri'
module Gitlab
module Markdown
# HTML Filter for auto-linking URLs in HTML.
#
# Based on HTML::Pipeline::AutolinkFilter
#
# Context options:
# :autolink - Boolean, skips all processing done by this filter when false
# :link_attr - Hash of attributes for the generated links
#
class AutolinkFilter < HTML::Pipeline::Filter
include ActionView::Helpers::TagHelper
# Pattern to match text that should be autolinked.
#
# A URI scheme begins with a letter and may contain letters, numbers,
# plus, period and hyphen. Schemes are case-insensitive but we're being
# picky here and allowing only lowercase for autolinks.
#
# See http://en.wikipedia.org/wiki/URI_scheme
#
# The negative lookbehind ensures that users can paste a URL followed by a
# period or comma for punctuation without those characters being included
# in the generated link.
#
# Rubular: http://rubular.com/r/cxjPyZc7Sb
LINK_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}
# Text matching LINK_PATTERN inside these elements will not be linked
IGNORE_PARENTS = %w(a code kbd pre script style).to_set
def call
return doc if context[:autolink] == false
rinku_parse
text_parse
end
private
# Run the text through Rinku as a first pass
#
# This will quickly autolink http(s) and ftp links.
#
# `@doc` will be re-parsed with the HTML String from Rinku.
def rinku_parse
# Convert the options from a Hash to a String that Rinku expects
options = tag_options(link_options)
# NOTE: We don't parse email links because it will erroneously match
# external Commit and CommitRange references.
#
# The final argument tells Rinku to link short URLs that don't include a
# period (e.g., http://localhost:3000/)
rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a, 1)
# Rinku returns a String, so parse it back to a Nokogiri::XML::Document
# for further processing.
@doc = parse_html(rinku)
end
# Autolinks any text matching LINK_PATTERN that Rinku didn't already
# replace
def text_parse
search_text_nodes(doc).each do |node|
content = node.to_html
next if has_ancestor?(node, IGNORE_PARENTS)
next unless content.match(LINK_PATTERN)
# If Rinku didn't link this, there's probably a good reason, so we'll
# skip it too
next if content.start_with?(*%w(http https ftp))
html = autolink_filter(content)
next if html == content
node.replace(html)
end
doc
end
def autolink_filter(text)
text.gsub(LINK_PATTERN) do |match|
options = link_options.merge(href: match)
content_tag(:a, match, options)
end
end
def link_options
@link_options ||= context[:link_attr] || {}
end
end
end
end
......@@ -44,21 +44,20 @@ def call
# Returns a String with `#123` references replaced with links. All links
# have `gfm` and `gfm-issue` class names attached for styling.
def issue_link_filter(text)
self.class.references_in(text) do |match, issue, project_ref|
self.class.references_in(text) do |match, id, project_ref|
project = self.project_from_ref(project_ref)
if project && project.issue_exists?(issue)
# FIXME (rspeicher): Law of Demeter
push_result(:issue, project.issues.where(iid: issue).first)
if project && issue = project.get_issue(id)
push_result(:issue, issue)
url = url_for_issue(issue, project, only_path: context[:only_path])
url = url_for_issue(id, project, only_path: context[:only_path])
title = escape_once("Issue: #{title_for_issue(issue, project)}")
title = escape_once("Issue: #{issue.title}")
klass = reference_class(:issue)
%(<a href="#{url}"
title="#{title}"
class="#{klass}">#{project_ref}##{issue}</a>)
class="#{klass}">#{project_ref}##{id}</a>)
else
match
end
......@@ -68,10 +67,6 @@ def issue_link_filter(text)
def url_for_issue(*args)
IssuesHelper.url_for_issue(*args)
end
def title_for_issue(*args)
IssuesHelper.title_for_issue(*args)
end
end
end
end
......@@ -64,7 +64,6 @@ def merge_request_link_filter(text)
end
end
# TODO (rspeicher): Cleanup
def url_for_merge_request(mr, project)
h = Rails.application.routes.url_helpers
h.namespace_project_merge_request_url(project.namespace, project, mr,
......
require 'html/pipeline/filter'
require 'html/pipeline/sanitization_filter'
module Gitlab
module Markdown
# Sanitize HTML
#
# Extends HTML::Pipeline::SanitizationFilter with a custom whitelist.
class SanitizationFilter < HTML::Pipeline::SanitizationFilter
def whitelist
whitelist = HTML::Pipeline::SanitizationFilter::WHITELIST
# Allow `class` and `id` on all elements
whitelist[:attributes][:all].push('class', 'id')
# Allow table alignment
whitelist[:attributes]['th'] = %w(style)
whitelist[:attributes]['td'] = %w(style)
# Allow span elements
whitelist[:elements].push('span')
# Remove `rel` attribute from `a` elements
whitelist[:transformers].push(remove_rel)
whitelist
end
def remove_rel
lambda do |env|
if env[:node_name] == 'a'
env[:node].remove_attribute('rel')
end
end
end
end
end
end
require 'html/pipeline/filter'
module Gitlab
module Markdown
# HTML filter that adds an anchor child element to all Headers in a
# document, so that they can be linked to.
#
# Generates the Table of Contents with links to each header. See Results.
#
# Based on HTML::Pipeline::TableOfContentsFilter.
#
# Context options:
# :no_header_anchors - Skips all processing done by this filter.
#
# Results:
# :toc - String containing Table of Contents data as a `ul` element with
# `li` child elements.
class TableOfContentsFilter < HTML::Pipeline::Filter
PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u
def call
return doc if context[:no_header_anchors]
result[:toc] = ""
headers = Hash.new(0)
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
text = node.text
id = text.downcase
id.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
id.gsub!(' ', '-') # replace spaces with dash
id.squeeze!(' -') # replace multiple spaces or dashes with one
uniq = (headers[id] > 0) ? "-#{headers[id]}" : ''
headers[id] += 1
if header_content = node.children.first
href = "#{id}#{uniq}"
push_toc(href, text)
header_content.add_previous_sibling(anchor_tag(href))
end
end
result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty?
doc
end
private
def anchor_tag(href)
%Q{<a id="#{href}" class="anchor" href="##{href}" aria-hidden="true"></a>}
end
def push_toc(href, text)
result[:toc] << %Q{<li><a href="##{href}">#{text}</a></li>\n}
end
end
end
end
......@@ -40,7 +40,7 @@ def self.type_css_class_by_id(id)
end
# Convenience method to get a space-separated String of all the theme
# classes that mighty be applied to the `body` element
# classes that might be applied to the `body` element
#
# Returns a String
def self.body_classes
......
class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
require 'active_support/core_ext/string/output_safety'
class Redcarpet::Render::GitlabHTML < Redcarpet::Render::HTML
attr_reader :template
alias_method :h, :template
......@@ -8,24 +9,12 @@ def initialize(template, color_scheme, options = {})
@color_scheme = color_scheme
@project = @template.instance_variable_get("@project")
@options = options.dup
super options
end
def preprocess(full_document)
# Redcarpet doesn't allow SMB links when `safe_links_only` is enabled.
# FTP links are allowed, so we trick Redcarpet.
full_document.gsub("smb://", "ftp://smb:")
super(options)
end
# If project has issue number 39, apostrophe will be linked in
# regular text to the issue as Redcarpet will convert apostrophe to
# #39;
# We replace apostrophe with right single quote before Redcarpet
# does the processing and put the apostrophe back in postprocessing.
# This only influences regular text, code blocks are untouched.
def normal_text(text)
return text unless text.present?
text.gsub("'", "&rsquo;")
ERB::Util.html_escape_once(text)
end
# Stolen from Rugments::Plugins::Redcarpet as this module is not required
......@@ -37,7 +26,7 @@ def block_code(code, language)
# so we assume you're not using leading spaces that aren't tabs,
# and just replace them here.
if lexer.tag == 'make'
code.gsub! /^ /, "\t"
code.gsub!(/^ /, "\t")
end
formatter = Rugments::Formatters::HTML.new(
......@@ -46,27 +35,11 @@ def block_code(code, language)
formatter.format(lexer.lex(code))
end
def link(link, title, content)
h.link_to_gfm(content, link, title: title)
end
def header(text, level)
if @options[:no_header_anchors]
"<h#{level}>#{text}</h#{level}>"
else
id = ActionController::Base.helpers.strip_tags(h.gfm(text)).downcase() \
.gsub(/[^a-z0-9_-]/, '-').gsub(/-+/, '-').gsub(/^-/, '').gsub(/-$/, '')
"<h#{level} id=\"#{id}\">#{text}<a href=\"\##{id}\"></a></h#{level}>"
end
end
def postprocess(full_document)
full_document.gsub!("ftp://smb:", "smb://")
full_document.gsub!("&rsquo;", "'")
unless @template.instance_variable_get("@project_wiki") || @project.nil?
full_document = h.create_relative_links(full_document)
end
h.gfm_with_options(full_document, @options)
end
end
......@@ -94,10 +94,26 @@
'new_issue_url' => 'http://redmine/projects/project_name_in_redmine/issues/new'
}
)
end
after :create do |project|
project.issues_tracker = 'redmine'
project.issues_tracker_id = 'project_name_in_redmine'
end
end
factory :jira_project, parent: :project do
after :create do |project|
project.create_jira_service(
active: true,
properties: {
'title' => 'JIRA tracker',
'project_url' => 'http://jira.example/issues/?jql=project=A',
'issues_url' => 'http://jira.example/browse/:id',
'new_issue_url' => 'http://jira.example/secure/CreateIssue.jspa'
}
)
project.issues_tracker = 'jira'
project.issues_tracker_id = 'project_name_in_jira'
end
end
end
require 'spec_helper'
require 'erb'
# This feature spec is intended to be a comprehensive exercising of all of
# GitLab's non-standard Markdown parsing and the integration thereof.
#
# These tests should be very high-level. Anything low-level belongs in the specs
# for the corresponding HTML::Pipeline filter or helper method.