GitLab wurde erfolgreich aktualisiert. Durch regelmäßige Updates bleibt das THM GitLab sicher. Danke für Ihre Geduld.

Commit e564fe97 authored by Bob Van Landuyt's avatar Bob Van Landuyt Committed by Robert Speicher

Rename `Gitlab::Git::EncodingHelper` to `Gitlab::EncodingHelper`

parent 8d131eb8
class MergeRequestDiff < ActiveRecord::Base
include Sortable
include Importable
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
# Prevent store of diff if commits amount more then 500
COMMITS_SAFE_SIZE = 100
......
......@@ -6,7 +6,7 @@
# Values are checked for formatting and exclusion from a list of illegal path
# names.
class DynamicPathValidator < ActiveModel::EachValidator
extend Gitlab::Git::EncodingHelper
extend Gitlab::EncodingHelper
class << self
def valid_user_path?(path)
......
module Gitlab
module EncodingHelper
extend self
# This threshold is carefully tweaked to prevent usage of encodings detected
# by CharlockHolmes with low confidence. If CharlockHolmes confidence is low,
# we're better off sticking with utf8 encoding.
# Reason: git diff can return strings with invalid utf8 byte sequences if it
# truncates a diff in the middle of a multibyte character. In this case
# CharlockHolmes will try to guess the encoding and will likely suggest an
# obscure encoding with low confidence.
# There is a lot more info with this merge request:
# https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
ENCODING_CONFIDENCE_THRESHOLD = 40
def encode!(message)
return nil unless message.respond_to? :force_encoding
# if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
# force detected encoding if we have sufficient confidence.
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
message.force_encoding(detect[:encoding])
end
# encode and clean the bad chars
message.replace clean(message)
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
end
def encode_utf8(message)
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect
begin
CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')
rescue ArgumentError => e
Rails.logger.warn("Ignoring error converting #{detect[:encoding]} into UTF8: #{e.message}")
''
end
else
clean(message)
end
end
private
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
.gsub("\0".encode("UTF-8"), "")
end
end
end
module Gitlab
module Git
class Blame
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
attr_reader :lines, :blames
......
......@@ -2,7 +2,7 @@ module Gitlab
module Git
class Blob
include Linguist::BlobHelper
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
# This number is the maximum amount of data that we want to display to
# the user. We load as much as we can for encoding detection
......
......@@ -2,7 +2,7 @@
module Gitlab
module Git
class Commit
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
attr_accessor :raw_commit, :head, :refs
......
......@@ -3,7 +3,7 @@ module Gitlab
module Git
class Diff
TimeoutError = Class.new(StandardError)
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
# Diff properties
attr_accessor :old_path, :new_path, :a_mode, :b_mode, :diff
......
module Gitlab
module Git
module EncodingHelper
extend self
# This threshold is carefully tweaked to prevent usage of encodings detected
# by CharlockHolmes with low confidence. If CharlockHolmes confidence is low,
# we're better off sticking with utf8 encoding.
# Reason: git diff can return strings with invalid utf8 byte sequences if it
# truncates a diff in the middle of a multibyte character. In this case
# CharlockHolmes will try to guess the encoding and will likely suggest an
# obscure encoding with low confidence.
# There is a lot more info with this merge request:
# https://gitlab.com/gitlab-org/gitlab_git/merge_requests/77#note_4754193
ENCODING_CONFIDENCE_THRESHOLD = 40
def encode!(message)
return nil unless message.respond_to? :force_encoding
# if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
# force detected encoding if we have sufficient confidence.
if detect && detect[:encoding] && detect[:confidence] > ENCODING_CONFIDENCE_THRESHOLD
message.force_encoding(detect[:encoding])
end
# encode and clean the bad chars
message.replace clean(message)
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
end
def encode_utf8(message)
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect
begin
CharlockHolmes::Converter.convert(message, detect[:encoding], 'UTF-8')
rescue ArgumentError => e
Rails.logger.warn("Ignoring error converting #{detect[:encoding]} into UTF8: #{e.message}")
''
end
else
clean(message)
end
end
private
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
.gsub("\0".encode("UTF-8"), "")
end
end
end
end
module Gitlab
module Git
class Ref
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
# Branch or tag name
# without "refs/tags|heads" prefix
......
module Gitlab
module Git
class Tree
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
attr_accessor :id, :root_id, :name, :path, :type,
:mode, :commit_id, :submodule_url
......
require "spec_helper"
describe Gitlab::Git::EncodingHelper do
let(:ext_class) { Class.new { extend Gitlab::Git::EncodingHelper } }
describe Gitlab::EncodingHelper do
let(:ext_class) { Class.new { extend Gitlab::EncodingHelper } }
let(:binary_string) { File.read(Rails.root + "spec/fixtures/dk.png") }
describe '#encode!' do
......
require "spec_helper"
describe Gitlab::Git::Repository, seed_helper: true do
include Gitlab::Git::EncodingHelper
include Gitlab::EncodingHelper
let(:repository) { Gitlab::Git::Repository.new('default', TEST_REPO_PATH) }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment