merge_request_diff.rb 12.5 KB
Newer Older
1 2
# frozen_string_literal: true

3
class MergeRequestDiff < ActiveRecord::Base
4
  include Sortable
5
  include Importable
6
  include ManualInverseAssociation
7
  include IgnorableColumn
8
  include EachBatch
9
  include Gitlab::Utils::StrongMemoize
10
  include ObjectStorage::BackgroundMove
11

12
  # Don't display more than 100 commits at once
13
  COMMITS_SAFE_SIZE = 100
14 15

  belongs_to :merge_request
16

17 18
  manual_inverse_association :merge_request, :merge_request_diff

19 20 21 22
  has_many :merge_request_diff_files,
    -> { order(:merge_request_diff_id, :relative_order) },
    inverse_of: :merge_request_diff

23
  has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
24

25 26
  validates :base_commit_sha, :head_commit_sha, :start_commit_sha, sha: true

27
  state_machine :state, initial: :empty do
28 29 30 31
    event :clean do
      transition any => :without_files
    end

32
    state :collected
33
    state :overflow
34 35
    # Diff files have been deleted by the system
    state :without_files
36 37
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
38 39 40 41 42 43
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

44
  scope :with_files, -> { without_states(:without_files, :empty) }
45
  scope :viewable, -> { without_state(:empty) }
46 47 48
  scope :by_commit_sha, ->(sha) do
    joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
  end
49

50 51
  scope :recent, -> { order(id: :desc).limit(100) }

52 53
  mount_uploader :external_diff, ExternalDiffUploader

54 55
  # All diff information is collected from repository after object is created.
  # It allows you to override variables like head_commit_sha before getting diff.
56
  after_create :save_git_content, unless: :importing?
57

58 59
  after_save :update_external_diff_store, if: :external_diff_changed?

Douwe Maan's avatar
Douwe Maan committed
60
  def self.find_by_diff_refs(diff_refs)
Douwe Maan's avatar
Douwe Maan committed
61
    find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
Douwe Maan's avatar
Douwe Maan committed
62 63
  end

64 65 66 67
  def viewable?
    collected? || without_files? || overflow?
  end

68 69 70
  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
71 72 73 74
    MergeRequest
      .where('id = ? AND COALESCE(latest_merge_request_diff_id, 0) < ?', self.merge_request_id, self.id)
      .update_all(latest_merge_request_diff_id: self.id)

75
    ensure_commit_shas
76 77
    save_commits
    save_diffs
78
    save
79 80 81
    keep_around_commits
  end

82
  def ensure_commit_shas
83
    self.start_commit_sha ||= merge_request.target_branch_sha
84
    self.head_commit_sha  ||= merge_request.source_branch_sha
85
    self.base_commit_sha  ||= find_base_sha
86 87 88 89 90 91
  end

  # Override head_commit_sha to keep compatibility with merge request diff
  # created before version 8.4 that does not store head_commit_sha in separate db field.
  def head_commit_sha
    if persisted? && super.nil?
92
      last_commit_sha
93 94 95
    else
      super
    end
96 97
  end

98 99 100 101 102 103 104
  # This method will rely on repository branch sha
  # in case start_commit_sha is nil. Its necesarry for old merge request diff
  # created before version 8.4 to work
  def safe_start_commit_sha
    start_commit_sha || merge_request.target_branch_sha
  end

105
  def size
106
    real_size.presence || raw_diffs.size
107 108
  end

109
  def raw_diffs(options = {})
110
    if options[:ignore_whitespace_change]
111
      @diffs_no_whitespace ||= compare.diffs(options)
112
    else
113
      @raw_diffs ||= {}
114
      @raw_diffs[options] ||= load_diffs(options)
115
    end
116 117
  end

118
  def commits
119
    @commits ||= load_commits
120 121
  end

122 123
  def last_commit_sha
    commit_shas.first
124 125
  end

126 127 128 129
  def first_commit
    commits.last
  end

130
  def base_commit
131
    return unless base_commit_sha
132

133
    project.commit_by(oid: base_commit_sha)
134 135
  end

136
  def start_commit
137
    return unless start_commit_sha
138

139
    project.commit_by(oid: start_commit_sha)
140 141 142
  end

  def head_commit
143
    return unless head_commit_sha
144

145
    project.commit_by(oid: head_commit_sha)
146 147
  end

148
  def commit_shas
149
    merge_request_diff_commits.map(&:sha)
150 151
  end

152
  def commits_by_shas(shas)
153
    return MergeRequestDiffCommit.none unless shas.present?
154 155 156 157

    merge_request_diff_commits.where(sha: shas)
  end

Douwe Maan's avatar
Douwe Maan committed
158
  def diff_refs=(new_diff_refs)
159 160 161
    self.base_commit_sha = new_diff_refs&.base_sha
    self.start_commit_sha = new_diff_refs&.start_sha
    self.head_commit_sha = new_diff_refs&.head_sha
Douwe Maan's avatar
Douwe Maan committed
162 163
  end

164
  def diff_refs
165
    return unless start_commit_sha || base_commit_sha
166 167 168 169 170 171 172 173

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

174 175
  # MRs created before 8.4 don't store their true diff refs (start and base),
  # but we need to get a commit SHA for the "View file @ ..." link by a file,
176 177
  # so we use an approximation of the diff refs if we can't get the actual one.
  #
178 179 180
  # These will not be the actual diff refs if the target branch was merged into
  # the source branch after the merge request was created, but it is good enough
  # for the specific purpose of linking to a commit.
181
  #
182 183 184
  # It is not good enough for highlighting diffs, so we can't simply pass
  # these as `diff_refs.`
  def fallback_diff_refs
185 186 187
    real_refs = diff_refs
    return real_refs if real_refs

188 189 190 191 192 193 194 195 196
    likely_base_commit_sha = (first_commit&.parent || first_commit)&.sha

    Gitlab::Diff::DiffRefs.new(
      base_sha:  likely_base_commit_sha,
      start_sha: safe_start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

197 198 199 200
  def diff_refs_by_sha?
    base_commit_sha? && head_commit_sha? && start_commit_sha?
  end

201
  def diffs(diff_options = nil)
202
    if without_files? && comparison = diff_refs&.compare_in(project)
203 204 205 206 207 208 209 210 211 212 213 214 215 216
      # It should fetch the repository when diffs are cleaned by the system.
      # We don't keep these for storage overload purposes.
      # See https://gitlab.com/gitlab-org/gitlab-ce/issues/37639
      comparison.diffs(diff_options)
    else
      diffs_collection(diff_options)
    end
  end

  # Should always return the DB persisted diffs collection
  # (e.g. Gitlab::Diff::FileCollection::MergeRequestDiff.
  # It's useful when trying to invalidate old caches through
  # FileCollection::MergeRequestDiff#clear_cache!
  def diffs_collection(diff_options = nil)
217 218 219 220 221 222
    Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
  end

  def project
    merge_request.target_project
  end
223

224 225
  def compare
    @compare ||=
226 227 228 229 230
      Gitlab::Git::Compare.new(
        repository.raw_repository,
        safe_start_commit_sha,
        head_commit_sha
      )
231 232
  end

233
  def latest?
234
    self.id == merge_request.latest_merge_request_diff_id
235 236
  end

237
  # rubocop: disable CodeReuse/ServiceClass
238
  def compare_with(sha)
239
    # When compare merge request versions we want diff A..B instead of A...B
240
    # so we handle cases when user does squash and rebase of the commits between versions.
241
    # For this reason we set straight to true by default.
242
    CompareService.new(project, head_commit_sha).execute(project, sha, straight: true)
243
  end
244
  # rubocop: enable CodeReuse/ServiceClass
245

246 247 248 249 250 251
  def modified_paths
    strong_memoize(:modified_paths) do
      merge_request_diff_files.pluck(:new_path, :old_path).flatten.uniq
    end
  end

252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
  # Carrierwave defines `write_uploader` dynamically on this class, so `super`
  # does not work. Alias the carrierwave method so we can call it when needed
  alias_method :carrierwave_write_uploader, :write_uploader

  # The `external_diff`, `external_diff_store`, and `stored_externally`
  # columns were introduced in GitLab 11.8, but some background migration specs
  # use factories that rely on current code with an old schema. Without these
  # `has_attribute?` guards, they fail with a `MissingAttributeError`.
  #
  # For more details, see: https://gitlab.com/gitlab-org/gitlab-ce/issues/44990

  def write_uploader(column, identifier)
    carrierwave_write_uploader(column, identifier) if has_attribute?(column)
  end

  def update_external_diff_store
    update_column(:external_diff_store, external_diff.object_store) if
      has_attribute?(:external_diff_store)
  end

  def external_diff_changed?
    super if has_attribute?(:external_diff)
  end

  def stored_externally
    super if has_attribute?(:stored_externally)
  end
  alias_method :stored_externally?, :stored_externally

  # If enabled, yields the external file containing the diff. Otherwise, yields
  # nil. This method is not thread-safe, but it *is* re-entrant, which allows
  # multiple merge_request_diff_files to load their data efficiently
  def opening_external_diff
    return yield(nil) unless stored_externally?
    return yield(@external_diff_file) if @external_diff_file

    external_diff.open do |file|
Nick Thomas's avatar
Nick Thomas committed
289
      @external_diff_file = file
290

Nick Thomas's avatar
Nick Thomas committed
291 292 293
      yield(@external_diff_file)
    ensure
      @external_diff_file = nil
294 295 296
    end
  end

297 298
  private

299 300 301 302 303
  def encode_in_base64?(diff_text)
    (diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?) ||
      diff_text.include?("\0")
  end

304
  def create_merge_request_diff_files(diffs)
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
    rows =
      if has_attribute?(:external_diff) && Gitlab.config.external_diffs.enabled
        build_external_merge_request_diff_files(diffs)
      else
        build_merge_request_diff_files(diffs)
      end

    # Faster inserts
    Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
  end

  def build_external_merge_request_diff_files(diffs)
    rows = build_merge_request_diff_files(diffs)
    tempfile = build_external_diff_tempfile(rows)

    self.external_diff = tempfile
    self.stored_externally = true

    rows
  ensure
    tempfile&.unlink
  end

  def build_external_diff_tempfile(rows)
    Tempfile.open(external_diff.filename) do |file|
      rows.inject(0) do |offset, row|
        data = row.delete(:diff)
        row[:external_diff_offset] = offset
        row[:external_diff_size] = data.size

        file.write(data)

        offset + data.size
      end

      file
    end
  end

  def build_merge_request_diff_files(diffs)
    diffs.map.with_index do |diff, index|
346 347
      diff_hash = diff.to_hash.merge(
        binary: false,
348 349 350
        merge_request_diff_id: self.id,
        relative_order: index
      )
351 352 353 354 355

      # Compatibility with old diffs created with Psych.
      diff_hash.tap do |hash|
        diff_text = hash[:diff]

356
        if encode_in_base64?(diff_text)
357 358 359 360
          hash[:binary] = true
          hash[:diff] = [diff_text].pack('m0')
        end
      end
361 362 363
    end
  end

364
  def load_diffs(options)
365 366 367 368
    # Ensure all diff files operate on the same external diff file instance if
    # present. This reduces file open/close overhead.
    opening_external_diff do
      collection = merge_request_diff_files
369

370 371 372
      if paths = options[:paths]
        collection = collection.where('old_path IN (?) OR new_path IN (?)', paths, paths)
      end
373

374 375
      Gitlab::Git::DiffCollection.new(collection.map(&:to_hash), options)
    end
376 377
  end

378
  def load_commits
379
    commits = merge_request_diff_commits.map { |commit| Commit.from_hash(commit.to_hash, project) }
380

381 382
    CommitCollection
      .new(merge_request.source_project, commits, merge_request.source_branch)
383 384
  end

385
  def save_diffs
386
    new_attributes = {}
387

388
    if compare.commits.size.zero?
389
      new_attributes[:state] = :empty
390
    else
391
      diff_collection = compare.diffs(Commit.max_diff_options)
392
      new_attributes[:real_size] = diff_collection.real_size
393

394
      if diff_collection.any?
395
        new_attributes[:state] = :collected
396

397 398
        create_merge_request_diff_files(diff_collection)
      end
399 400 401 402 403 404 405

      # Set our state to 'overflow' to make the #empty? and #collected?
      # methods (generated by StateMachine) return false.
      #
      # This attribution has to come at the end of the method so 'overflow'
      # state does not get overridden by 'collected'.
      new_attributes[:state] = :overflow if diff_collection.overflow?
406
    end
407

408
    assign_attributes(new_attributes)
409 410 411 412 413
  end

  def save_commits
    MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)

414 415
    # merge_request_diff_commits.reload is preferred way to reload associated
    # objects but it returns cached result for some reason in this case
416 417
    # we can circumvent that by specifying that we need an uncached reload
    commits = self.class.uncached { merge_request_diff_commits.reload }
418
    self.commits_count = commits.size
419 420 421
  end

  def repository
422
    project.repository
423
  end
424

425 426
  def find_base_sha
    return unless head_commit_sha && start_commit_sha
427

428
    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
429 430
  end

431
  def keep_around_commits
432
    [repository, merge_request.source_project.repository].uniq.each do |repo|
433
      repo.keep_around(start_commit_sha, head_commit_sha, base_commit_sha)
434
    end
435
  end
436
end