Skip to content
autolink_filter.rb 2.92 KiB
Newer Older
require 'html/pipeline/filter'
require 'uri'

module Gitlab
  module Markdown
    # HTML Filter for auto-linking URLs in HTML.
    #
    # Based on HTML::Pipeline::AutolinkFilter
    #
    # Context options:
    #   :autolink  - Boolean, skips all processing done by this filter when false
    #   :link_attr - Hash of attributes for the generated links
    #
    class AutolinkFilter < HTML::Pipeline::Filter
      include ActionView::Helpers::TagHelper

      # Pattern to match text that should be autolinked.
      #
      # A URI scheme begins with a letter and may contain letters, numbers,
      # plus, period and hyphen. Schemes are case-insensitive but we're being
      # picky here and allowing only lowercase for autolinks.
      #
      # See http://en.wikipedia.org/wiki/URI_scheme
      #
      # The negative lookbehind ensures that users can paste a URL followed by a
      # period or comma for punctuation without those characters being included
      # in the generated link.
      #
      # Rubular: http://rubular.com/r/cxjPyZc7Sb
      SCHEME_PATTERN = %r{([a-z][a-z0-9\+\.-]+://\S+)(?<!,|\.)}

      # Text matching SCHEME_PATTERN inside these elements will not be linked
      IGNORE_PARENTS = %w(a code kbd pre script style).to_set

      def call
        return doc if context[:autolink] == false

        rinku_parse
        text_parse
      end

      private

      # Run the text through Rinku as a first pass
      #
      # This will quickly autolink http(s) and ftp links.
      #
      # `@doc` will be re-parsed with the HTML String from Rinku.
      def rinku_parse
        # Convert the options from a Hash to a String that Rinku expects
        options = tag_options(link_options)

        # NOTE: We don't parse email links because it will erroneously match
        # external Commit and CommitRange references.
        rinku = Rinku.auto_link(html, :urls, options, IGNORE_PARENTS.to_a)

        # Rinku returns a String, so parse it back to a Nokogiri::XML::Document
        # for further processing.
        @doc = parse_html(rinku)
      end

      # Autolinks any text matching SCHEME_PATTERN that Rinku didn't already
      # replace
      def text_parse
        search_text_nodes(doc).each do |node|
          content = node.to_html

          next if has_ancestor?(node, IGNORE_PARENTS)
          next unless content.match(SCHEME_PATTERN)

          # If Rinku didn't link this, there's probably a good reason, so we'll
          # skip it too
          next if content.start_with?(*%w(http https ftp))

          html = autolink_filter(content)

          next if html == content

          node.replace(html)
        end

        doc
      end

      def autolink_filter(text)
        text.gsub(SCHEME_PATTERN) do |match|
          options = link_options.merge(href: match)
          content_tag(:a, match, options)
        end
      end

      def link_options
        @link_options ||= context[:link_attr] || {}
      end
    end
  end
end