sanitization_filter_spec.rb 9.2 KB
Newer Older
1 2
require 'spec_helper'

3
describe Banzai::Filter::SanitizationFilter do
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
  include FilterSpecHelper

  describe 'default whitelist' do
    it 'sanitizes tags that are not whitelisted' do
      act = %q{<textarea>no inputs</textarea> and <blink>no blinks</blink>}
      exp = 'no inputs and no blinks'
      expect(filter(act).to_html).to eq exp
    end

    it 'sanitizes tag attributes' do
      act = %q{<a href="http://example.com/bar.html" onclick="bar">Text</a>}
      exp = %q{<a href="http://example.com/bar.html">Text</a>}
      expect(filter(act).to_html).to eq exp
    end

    it 'sanitizes javascript in attributes' do
      act = %q(<a href="javascript:alert('foo')">Text</a>)
      exp = '<a>Text</a>'
22 23 24 25 26 27
      expect(filter(act).to_html).to eq exp
    end

    it 'sanitizes mixed-cased javascript in attributes' do
      act = %q(<a href="javaScript:alert('foo')">Text</a>)
      exp = '<a>Text</a>'
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
      expect(filter(act).to_html).to eq exp
    end

    it 'allows whitelisted HTML tags from the user' do
      exp = act = "<dl>\n<dt>Term</dt>\n<dd>Definition</dd>\n</dl>"
      expect(filter(act).to_html).to eq exp
    end

    it 'sanitizes `class` attribute on any element' do
      act = %q{<strong class="foo">Strong</strong>}
      expect(filter(act).to_html).to eq %q{<strong>Strong</strong>}
    end

    it 'sanitizes `id` attribute on any element' do
      act = %q{<em id="foo">Emphasis</em>}
      expect(filter(act).to_html).to eq %q{<em>Emphasis</em>}
    end
  end

  describe 'custom whitelist' do
    it 'customizes the whitelist only once' do
      instance = described_class.new('Foo')
50 51
      control_count = instance.whitelist[:transformers].size

52 53
      3.times { instance.whitelist }

54
      expect(instance.whitelist[:transformers].size).to eq control_count
55 56
    end

57 58 59
    it 'sanitizes `class` attribute from all elements' do
      act = %q{<pre class="code highlight white c"><code>&lt;span class="k"&gt;def&lt;/span&gt;</code></pre>}
      exp = %q{<pre><code>&lt;span class="k"&gt;def&lt;/span&gt;</code></pre>}
60 61 62 63 64 65 66 67
      expect(filter(act).to_html).to eq exp
    end

    it 'sanitizes `class` attribute from non-highlight spans' do
      act = %q{<span class="k">def</span>}
      expect(filter(act).to_html).to eq %q{<span>def</span>}
    end

68 69
    it 'allows `text-align` property in `style` attribute on table elements' do
      html = <<~HTML
70 71 72 73 74 75 76 77 78 79 80 81
      <table>
        <tr><th style="text-align: center">Head</th></tr>
        <tr><td style="text-align: right">Body</th></tr>
      </table>
      HTML

      doc = filter(html)

      expect(doc.at_css('th')['style']).to eq 'text-align: center'
      expect(doc.at_css('td')['style']).to eq 'text-align: right'
    end

82 83 84 85 86 87 88 89 90 91 92 93 94 95
    it 'disallows other properties in `style` attribute on table elements' do
      html = <<~HTML
        <table>
          <tr><th style="text-align: foo">Head</th></tr>
          <tr><td style="position: fixed; height: 50px; width: 50px; background: red; z-index: 999; font-size: 36px; text-align: center">Body</th></tr>
        </table>
      HTML

      doc = filter(html)

      expect(doc.at_css('th')['style']).to be_nil
      expect(doc.at_css('td')['style']).to eq 'text-align: center'
    end

96 97 98 99 100 101 102 103 104 105
    it 'disallows `text-align` property in `style` attribute on other elements' do
      html = <<~HTML
        <div style="text-align: center">Text</div>
      HTML

      doc = filter(html)

      expect(doc.at_css('div')['style']).to be_nil
    end

106 107 108 109 110
    it 'allows `span` elements' do
      exp = act = %q{<span>Hello</span>}
      expect(filter(act).to_html).to eq exp
    end

111 112 113 114 115
    it 'allows `abbr` elements' do
      exp = act = %q{<abbr title="HyperText Markup Language">HTML</abbr>}
      expect(filter(act).to_html).to eq exp
    end

116
    it 'disallows the `name` attribute globally, allows on `a`' do
117 118 119
      html = <<~HTML
        <img name="getElementById" src="">
        <span name="foo" class="bar">Hi</span>
120
        <a name="foo" class="bar">Bye</a>
121 122 123 124 125 126
      HTML

      doc = filter(html)

      expect(doc.at_css('img')).not_to have_attribute('name')
      expect(doc.at_css('span')).not_to have_attribute('name')
127
      expect(doc.at_css('a')).to have_attribute('name')
128 129
    end

130 131 132 133 134 135 136 137 138 139
    it 'allows `summary` elements' do
      exp = act = '<summary>summary line</summary>'
      expect(filter(act).to_html).to eq exp
    end

    it 'allows `details` elements' do
      exp = act = '<details>long text goes here</details>'
      expect(filter(act).to_html).to eq exp
    end

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
    it 'allows `data-math-style` attribute on `code` and `pre` elements' do
      html = <<-HTML
      <pre class="code" data-math-style="inline">something</pre>
      <code class="code" data-math-style="inline">something</code>
      <div class="code" data-math-style="inline">something</div>
      HTML

      output = <<-HTML
      <pre data-math-style="inline">something</pre>
      <code data-math-style="inline">something</code>
      <div>something</div>
      HTML

      expect(filter(html).to_html).to eq(output)
    end

156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
    it 'removes `rel` attribute from `a` elements' do
      act = %q{<a href="#" rel="nofollow">Link</a>}
      exp = %q{<a href="#">Link</a>}

      expect(filter(act).to_html).to eq exp
    end

    # Adapted from the Sanitize test suite: http://git.io/vczrM
    protocols = {
      'protocol-based JS injection: simple, no spaces' => {
        input:  '<a href="javascript:alert(\'XSS\');">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: simple, spaces before' => {
        input:  '<a href="javascript    :alert(\'XSS\');">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: simple, spaces after' => {
        input:  '<a href="javascript:    alert(\'XSS\');">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: simple, spaces before and after' => {
        input:  '<a href="javascript    :   alert(\'XSS\');">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: preceding colon' => {
        input:  '<a href=":javascript:alert(\'XSS\');">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: UTF-8 encoding' => {
        input:  '<a href="javascript&#58;">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: long UTF-8 encoding' => {
        input:  '<a href="javascript&#0058;">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
        input:  '<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: hex encoding' => {
        input:  '<a href="javascript&#x3A;">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: long hex encoding' => {
        input:  '<a href="javascript&#x003A;">foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: hex encoding without semicolons' => {
        input:  '<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>',
        output: '<a>foo</a>'
      },

      'protocol-based JS injection: null char' => {
        input:  "<a href=java\0script:alert(\"XSS\")>foo</a>",
        output: '<a href="java"></a>'
      },

225 226 227 228 229
      'protocol-based JS injection: invalid URL char' => {
        input: '<img src=java\script:alert("XSS")>',
        output: '<img>'
      },

230 231 232 233 234
      'protocol-based JS injection: Unicode' => {
        input: %Q(<a href="\u0001java\u0003script:alert('XSS')">foo</a>),
        output: '<a>foo</a>'
      },

235 236
      'protocol-based JS injection: spaces and entities' => {
        input:  '<a href=" &#14;  javascript:alert(\'XSS\');">foo</a>',
237
        output: '<a href>foo</a>'
238
      },
239 240 241 242 243

      'protocol whitespace' => {
        input: '<a href=" http://example.com/"></a>',
        output: '<a href="http://example.com/"></a>'
      }
244 245 246
    }

    protocols.each do |name, data|
Robert Speicher's avatar
Robert Speicher committed
247
      it "disallows #{name}" do
248 249 250 251 252 253
        doc = filter(data[:input])

        expect(doc.to_html).to eq data[:output]
      end
    end

Robert Speicher's avatar
Robert Speicher committed
254 255 256 257 258 259 260
    it 'disallows data links' do
      input = '<a href="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4K">XSS</a>'
      output = filter(input)

      expect(output.to_html).to eq '<a>XSS</a>'
    end

Robert Speicher's avatar
Robert Speicher committed
261 262 263 264 265 266 267
    it 'disallows vbscript links' do
      input = '<a href="vbscript:alert(document.domain)">XSS</a>'
      output = filter(input)

      expect(output.to_html).to eq '<a>XSS</a>'
    end

268
    it 'disallows invalid URIs' do
269 270
      expect(Addressable::URI).to receive(:parse).with('foo://example.com')
        .and_raise(Addressable::URI::InvalidURIError)
271 272 273 274 275 276 277

      input = '<a href="foo://example.com">Foo</a>'
      output = filter(input)

      expect(output.to_html).to eq '<a>Foo</a>'
    end

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
    it 'allows non-standard anchor schemes' do
      exp = %q{<a href="irc://irc.freenode.net/git">IRC</a>}
      act = filter(exp)

      expect(act.to_html).to eq exp
    end

    it 'allows relative links' do
      exp = %q{<a href="foo/bar.md">foo/bar.md</a>}
      act = filter(exp)

      expect(act.to_html).to eq exp
    end
  end
end