merge_request_diff.rb 7.27 KB
Newer Older
1
class MergeRequestDiff < ActiveRecord::Base
2
  include Sortable
3
  include Importable
James Lopez's avatar
James Lopez committed
4
  include EncodingHelper
5

6
  # Prevent store of diff if commits amount more then 500
7
  COMMITS_SAFE_SIZE = 100
8 9 10 11 12

  belongs_to :merge_request

  state_machine :state, initial: :empty do
    state :collected
13 14 15
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
16 17 18 19 20 21 22 23 24
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  serialize :st_commits
  serialize :st_diffs

25 26
  # All diff information is collected from repository after object is created.
  # It allows you to override variables like head_commit_sha before getting diff.
27
  after_create :save_git_content, unless: :importing?
28

29 30 31 32
  def self.select_without_diff
    select(column_names - ['st_diffs'])
  end

33 34 35
  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
36
    ensure_commits_sha
37
    save_commits
38
    reload_commits
39
    save_diffs
40 41 42 43 44
    keep_around_commits
  end

  def ensure_commits_sha
    self.start_commit_sha ||= merge_request.target_branch_sha
45
    self.head_commit_sha  ||= merge_request.source_branch_sha
46
    self.base_commit_sha  ||= find_base_sha
47 48 49 50 51 52 53 54 55 56 57
    save
  end

  # Override head_commit_sha to keep compatibility with merge request diff
  # created before version 8.4 that does not store head_commit_sha in separate db field.
  def head_commit_sha
    if persisted? && super.nil?
      last_commit.try(:sha)
    else
      super
    end
58 59
  end

60 61 62 63 64 65 66 67 68 69 70
  # This method will rely on repository branch sha
  # in case start_commit_sha is nil. Its necesarry for old merge request diff
  # created before version 8.4 to work
  def safe_start_commit_sha
    start_commit_sha || merge_request.target_branch_sha
  end

  # This method will rely on repository branch sha
  # in case head_commit_sha is nil. Its necesarry for old merge request diff
  # created before version 8.4 to work
  def safe_head_commit_sha
71
    head_commit_sha || merge_request.source_branch_sha
72 73
  end

74
  def size
75
    real_size.presence || raw_diffs.size
76 77
  end

78
  def raw_diffs(options = {})
79
    if options[:ignore_whitespace_change]
80 81 82 83
      @diffs_no_whitespace ||=
        begin
          compare = Gitlab::Git::Compare.new(
            repository.raw_repository,
84 85
            safe_start_commit_sha,
            safe_head_commit_sha
86 87 88
          )
          compare.diffs(options)
        end
89
    else
90 91
      @raw_diffs ||= {}
      @raw_diffs[options] ||= load_diffs(st_diffs, options)
92
    end
93 94
  end

95 96 97 98
  def commits
    @commits ||= load_commits(st_commits || [])
  end

99 100 101 102 103
  def reload_commits
    @commits = nil
    commits
  end

104 105 106 107
  def last_commit
    commits.first
  end

108 109 110 111
  def first_commit
    commits.last
  end

112
  def base_commit
113
    return unless base_commit_sha
114

115
    project.commit(base_commit_sha)
116 117
  end

118
  def start_commit
119
    return unless start_commit_sha
120

121
    project.commit(start_commit_sha)
122 123 124
  end

  def head_commit
125
    return unless head_commit_sha
126

127
    project.commit(head_commit_sha)
128 129
  end

130
  def diff_refs
131
    return unless start_commit_sha || base_commit_sha
132 133 134 135 136 137 138 139

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

140 141 142 143
  def diff_refs_by_sha?
    base_commit_sha? && head_commit_sha? && start_commit_sha?
  end

144 145 146 147 148 149 150
  def diffs(diff_options = nil)
    Gitlab::Diff::FileCollection::MergeRequestDiff.new(self, diff_options: diff_options)
  end

  def project
    merge_request.target_project
  end
151

152 153 154 155 156
  def compare
    @compare ||=
      begin
        # Update ref for merge request
        merge_request.fetch_ref
157

158 159
        Gitlab::Git::Compare.new(
          repository.raw_repository,
160 161
          safe_start_commit_sha,
          safe_head_commit_sha
162 163
        )
      end
164 165
  end

166 167
  private

168 169 170 171 172 173 174 175
  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

176
  # Load all commits related to current merge request diff from repo
177
  # and save it as array of hashes in st_commits db field
178
  def save_commits
179 180
    new_attributes = {}

181
    commits = compare.commits
182

183 184 185
    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
      new_attributes[:st_commits] = dump_commits(commits)
186 187
    end

188
    update_columns_serialized(new_attributes)
189 190
  end

191 192 193 194 195 196 197 198
  def dump_diffs(diffs)
    if diffs.respond_to?(:map)
      diffs.map(&:to_hash)
    end
  end

  def load_diffs(raw, options)
    if raw.respond_to?(:each)
199
      if paths = options[:paths]
200
        raw = raw.select do |diff|
201
          paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
202 203 204
        end
      end

205 206 207 208 209 210
      Gitlab::Git::DiffCollection.new(raw, options)
    else
      Gitlab::Git::DiffCollection.new([])
    end
  end

211
  # Load diffs between branches related to current merge request diff from repo
212
  # and save it as array of hashes in st_diffs db field
213
  def save_diffs
214
    new_attributes = {}
215 216 217
    new_diffs = []

    if commits.size.zero?
218
      new_attributes[:state] = :empty
219
    else
220
      diff_collection = compare.diffs(Commit.max_diff_options)
221

222 223 224
      if diff_collection.overflow?
        # Set our state to 'overflow' to make the #empty? and #collected?
        # methods (generated by StateMachine) return false.
225
        new_attributes[:state] = :overflow
226 227
      end

228
      new_attributes[:real_size] = diff_collection.real_size
229

230 231
      if diff_collection.any?
        new_diffs = dump_diffs(diff_collection)
232
        new_attributes[:state] = :collected
233
      end
234
    end
235

236 237
    new_attributes[:st_diffs] = new_diffs
    update_columns_serialized(new_attributes)
238 239 240
  end

  def repository
241
    project.repository
242
  end
243

244 245
  def find_base_sha
    return unless head_commit_sha && start_commit_sha
246

247
    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
248 249
  end

James Lopez's avatar
James Lopez committed
250 251 252 253 254 255 256 257
  def utf8_st_diffs
    st_diffs.map do |diff|
      diff.each do |k, v|
        diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
      end
    end
  end

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls resulting in bad performance.
  # Using #update_columns solves the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep the same behaviour as the attribute assignment we reload the instance.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes.merge(updated_at: current_time_from_proper_timezone))
    reload
  end
280

281
  def keep_around_commits
282 283 284
    repository.keep_around(start_commit_sha)
    repository.keep_around(head_commit_sha)
    repository.keep_around(base_commit_sha)
285
  end
286
end