merge_request_diff.rb 10.1 KB
Newer Older
1
class MergeRequestDiff < ActiveRecord::Base
2
  include Sortable
3
  include Importable
4
  include Gitlab::EncodingHelper
5

6
  # Prevent store of diff if commits amount more then 500
7
  COMMITS_SAFE_SIZE = 100
8

9
  # Valid types of serialized diffs allowed by Gitlab::Git::Diff
Douwe Maan's avatar
Douwe Maan committed
10
  VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze
11

12
  belongs_to :merge_request
13
  has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
14

15 16
  serialize :st_commits # rubocop:disable Cop/ActiverecordSerialize
  serialize :st_diffs # rubocop:disable Cop/ActiverecordSerialize
17

18 19
  state_machine :state, initial: :empty do
    state :collected
20 21 22
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
23 24 25 26 27 28
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

29
  scope :viewable, -> { without_state(:empty) }
30

31 32
  # All diff information is collected from repository after object is created.
  # It allows you to override variables like head_commit_sha before getting diff.
33
  after_create :save_git_content, unless: :importing?
34

Douwe Maan's avatar
Douwe Maan committed
35
  def self.find_by_diff_refs(diff_refs)
Douwe Maan's avatar
Douwe Maan committed
36
    find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
Douwe Maan's avatar
Douwe Maan committed
37 38
  end

39 40 41 42
  def self.select_without_diff
    select(column_names - ['st_diffs'])
  end

43 44 45 46
  def st_commits
    super || []
  end

47 48 49
  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
50
    ensure_commits_sha
51
    save_commits
52
    reload_commits
53
    save_diffs
54 55 56 57
    keep_around_commits
  end

  def ensure_commits_sha
58
    merge_request.fetch_ref
59
    self.start_commit_sha ||= merge_request.target_branch_sha
60
    self.head_commit_sha  ||= merge_request.source_branch_sha
61
    self.base_commit_sha  ||= find_base_sha
62 63 64 65 66 67 68 69 70 71 72
    save
  end

  # Override head_commit_sha to keep compatibility with merge request diff
  # created before version 8.4 that does not store head_commit_sha in separate db field.
  def head_commit_sha
    if persisted? && super.nil?
      last_commit.try(:sha)
    else
      super
    end
73 74
  end

75 76 77 78 79 80 81
  # This method will rely on repository branch sha
  # in case start_commit_sha is nil. Its necesarry for old merge request diff
  # created before version 8.4 to work
  def safe_start_commit_sha
    start_commit_sha || merge_request.target_branch_sha
  end

82
  def size
83
    real_size.presence || raw_diffs.size
84 85
  end

86
  def raw_diffs(options = {})
87
    if options[:ignore_whitespace_change]
88
      @diffs_no_whitespace ||=
89 90 91
        Gitlab::Git::Compare.new(
          repository.raw_repository,
          safe_start_commit_sha,
92
          head_commit_sha).diffs(options)
93
    else
94
      @raw_diffs ||= {}
95
      @raw_diffs[options] ||= load_diffs(options)
96
    end
97 98
  end

99
  def commits
100
    @commits ||= load_commits(st_commits)
101 102
  end

103 104 105 106 107
  def reload_commits
    @commits = nil
    commits
  end

108 109 110 111
  def last_commit
    commits.first
  end

112 113 114 115
  def first_commit
    commits.last
  end

116
  def base_commit
117
    return unless base_commit_sha
118

119
    project.commit(base_commit_sha)
120 121
  end

122
  def start_commit
123
    return unless start_commit_sha
124

125
    project.commit(start_commit_sha)
126 127 128
  end

  def head_commit
129
    return unless head_commit_sha
130

131
    project.commit(head_commit_sha)
132 133
  end

134
  def commits_sha
135
    st_commits.map { |commit| commit[:id] }
136 137
  end

Douwe Maan's avatar
Douwe Maan committed
138
  def diff_refs=(new_diff_refs)
Douwe Maan's avatar
Douwe Maan committed
139 140 141
    self.base_commit_sha = new_diff_refs&.base_sha
    self.start_commit_sha = new_diff_refs&.start_sha
    self.head_commit_sha = new_diff_refs&.head_sha
Douwe Maan's avatar
Douwe Maan committed
142 143
  end

144
  def diff_refs
145
    return unless start_commit_sha || base_commit_sha
146 147 148 149 150 151 152 153

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

154 155
  # MRs created before 8.4 don't store their true diff refs (start and base),
  # but we need to get a commit SHA for the "View file @ ..." link by a file,
Douwe Maan's avatar
Douwe Maan committed
156 157
  # so we use an approximation of the diff refs if we can't get the actual one.
  #
158 159 160
  # These will not be the actual diff refs if the target branch was merged into
  # the source branch after the merge request was created, but it is good enough
  # for the specific purpose of linking to a commit.
Douwe Maan's avatar
Douwe Maan committed
161
  #
162 163 164
  # It is not good enough for highlighting diffs, so we can't simply pass
  # these as `diff_refs.`
  def fallback_diff_refs
165 166 167
    real_refs = diff_refs
    return real_refs if real_refs

168 169 170 171 172 173 174 175 176
    likely_base_commit_sha = (first_commit&.parent || first_commit)&.sha

    Gitlab::Diff::DiffRefs.new(
      base_sha:  likely_base_commit_sha,
      start_sha: safe_start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

177 178 179 180
  def diff_refs_by_sha?
    base_commit_sha? && head_commit_sha? && start_commit_sha?
  end

181 182 183 184 185 186 187
  def diffs(diff_options = nil)
    Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
  end

  def project
    merge_request.target_project
  end
188

189 190
  def compare
    @compare ||=
191 192 193 194 195
      Gitlab::Git::Compare.new(
        repository.raw_repository,
        safe_start_commit_sha,
        head_commit_sha
      )
196 197
  end

198 199 200 201
  def latest?
    self == merge_request.merge_request_diff
  end

202
  def compare_with(sha)
203
    # When compare merge request versions we want diff A..B instead of A...B
204
    # so we handle cases when user does squash and rebase of the commits between versions.
205
    # For this reason we set straight to true by default.
206
    CompareService.new(project, head_commit_sha).execute(project, sha, straight: true)
207 208
  end

209 210 211 212
  def commits_count
    st_commits.count
  end

213 214 215 216 217 218 219 220 221 222
  def utf8_st_diffs
    return [] if st_diffs.blank?

    st_diffs.map do |diff|
      diff.each do |k, v|
        diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
      end
    end
  end

223 224
  private

225 226 227 228 229 230 231 232 233
  # Old GitLab implementations may have generated diffs as ["--broken-diff"].
  # Avoid an error 500 by ignoring bad elements. See:
  # https://gitlab.com/gitlab-org/gitlab-ce/issues/20776
  def valid_raw_diff?(raw)
    return false unless raw.respond_to?(:each)

    raw.any? { |element| VALID_CLASSES.include?(element.class) }
  end

234 235 236 237 238 239 240 241
  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

242
  # Load all commits related to current merge request diff from repo
243
  # and save it as array of hashes in st_commits db field
244
  def save_commits
245 246
    new_attributes = {}

247
    commits = compare.commits
248

249 250 251
    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
      new_attributes[:st_commits] = dump_commits(commits)
252 253
    end

254
    update_columns_serialized(new_attributes)
255 256
  end

257 258 259 260 261 262
  def create_merge_request_diff_files(diffs)
    rows = diffs.map.with_index do |diff, index|
      diff.to_hash.merge(
        merge_request_diff_id: self.id,
        relative_order: index
      )
263
    end
264 265

    Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
266 267
  end

268 269
  def load_diffs(options)
    return Gitlab::Git::DiffCollection.new([]) unless diffs_from_database
270

271 272 273 274 275 276
    raw = diffs_from_database

    if paths = options[:paths]
      raw = raw.select do |diff|
        paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
      end
277
    end
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294

    Gitlab::Git::DiffCollection.new(raw, options)
  end

  def diffs_from_database
    return @diffs_from_database if defined?(@diffs_from_database)

    @diffs_from_database =
      if st_diffs.present?
        if valid_raw_diff?(st_diffs)
          st_diffs
        end
      elsif merge_request_diff_files.present?
        merge_request_diff_files
          .as_json(only: Gitlab::Git::Diff::SERIALIZE_KEYS)
          .map(&:with_indifferent_access)
      end
295 296
  end

297
  # Load diffs between branches related to current merge request diff from repo
298
  # and save it as array of hashes in st_diffs db field
299
  def save_diffs
300
    new_attributes = {}
301 302

    if commits.size.zero?
303
      new_attributes[:state] = :empty
304
    else
305
      diff_collection = compare.diffs(Commit.max_diff_options)
306
      new_attributes[:real_size] = diff_collection.real_size
307

308
      if diff_collection.any?
309
        new_attributes[:state] = :collected
310

311 312
        create_merge_request_diff_files(diff_collection)
      end
313 314 315 316 317 318 319

      # Set our state to 'overflow' to make the #empty? and #collected?
      # methods (generated by StateMachine) return false.
      #
      # This attribution has to come at the end of the method so 'overflow'
      # state does not get overridden by 'collected'.
      new_attributes[:state] = :overflow if diff_collection.overflow?
320
    end
321

322
    update_columns_serialized(new_attributes)
323 324 325
  end

  def repository
326
    project.repository
327
  end
328

329 330
  def find_base_sha
    return unless head_commit_sha && start_commit_sha
331

332
    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
333 334
  end

335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls resulting in bad performance.
  # Using #update_columns solves the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep the same behaviour as the attribute assignment we reload the instance.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes.merge(updated_at: current_time_from_proper_timezone))
    reload
  end
357

358
  def keep_around_commits
359 360 361 362 363
    [repository, merge_request.source_project.repository].each do |repo|
      repo.keep_around(start_commit_sha)
      repo.keep_around(head_commit_sha)
      repo.keep_around(base_commit_sha)
    end
364
  end
365
end