merge_request_diff.rb 6.52 KB
Newer Older
1
class MergeRequestDiff < ActiveRecord::Base
2
  include Sortable
3
  include Importable
James Lopez's avatar
James Lopez committed
4
  include EncodingHelper
5

6
  # Prevent store of diff if commits amount more then 500
7
  COMMITS_SAFE_SIZE = 100
8 9 10 11 12

  belongs_to :merge_request

  state_machine :state, initial: :empty do
    state :collected
13 14 15
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
16 17 18 19 20 21 22 23 24
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  serialize :st_commits
  serialize :st_diffs

25 26 27
  # For compatibility with old MergeRequestDiff which
  # does not store those variables in database
  after_initialize :ensure_commits_sha, if: :persisted?
28

29 30
  # All diff information is collected from repository after object is created.
  # It allows you to override variables like head_commit_sha before getting diff.
31
  after_create :save_git_content, unless: :importing?
32

33 34 35 36
  def self.select_without_diff
    select(column_names - ['st_diffs'])
  end

37 38 39
  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
40
    ensure_commits_sha
41
    save_commits
42
    reload_commits
43
    save_diffs
44 45 46 47 48 49 50
    keep_around_commits
  end

  def ensure_commits_sha
    self.start_commit_sha ||= merge_request.target_branch_sha
    self.head_commit_sha  ||= last_commit.try(:sha) || merge_request.source_branch_sha
    self.base_commit_sha  ||= find_base_sha
51 52
  end

53 54
  def size
    real_size.presence || diffs.size
55 56
  end

57 58
  def diffs(options={})
    if options[:ignore_whitespace_change]
59 60 61 62 63 64 65 66 67
      @diffs_no_whitespace ||=
        begin
          compare = Gitlab::Git::Compare.new(
            repository.raw_repository,
            start_commit_sha,
            head_commit_sha
          )
          compare.diffs(options)
        end
68
    else
69
      @diffs ||= {}
70
      @diffs[options] ||= load_diffs(st_diffs, options)
71
    end
72 73
  end

74 75 76 77
  def commits
    @commits ||= load_commits(st_commits || [])
  end

78 79 80 81 82
  def reload_commits
    @commits = nil
    commits
  end

83 84 85 86
  def last_commit
    commits.first
  end

87 88 89 90
  def first_commit
    commits.last
  end

91
  def base_commit
92
    return unless base_commit_sha
93

94
    project.commit(base_commit_sha)
95 96
  end

97
  def start_commit
98
    return unless start_commit_sha
99

100
    project.commit(start_commit_sha)
101 102 103
  end

  def head_commit
104
    return last_commit unless head_commit_sha
105

106
    project.commit(head_commit_sha)
107 108
  end

109 110 111 112 113 114 115 116 117 118
  def diff_refs
    return unless start_commit || base_commit

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

119
  private
120

121 122 123 124 125
  def compare
    @compare ||=
      begin
        # Update ref for merge request
        merge_request.fetch_ref
126

127 128 129 130 131 132
        Gitlab::Git::Compare.new(
          repository.raw_repository,
          start_commit_sha,
          head_commit_sha
        )
      end
133 134
  end

135 136 137 138 139 140 141 142
  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

143
  # Load all commits related to current merge request diff from repo
144
  # and save it as array of hashes in st_commits db field
145
  def save_commits
146 147
    new_attributes = {}

148
    commits = compare.commits
149

150 151 152
    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
      new_attributes[:st_commits] = dump_commits(commits)
153 154
    end

155
    update_columns_serialized(new_attributes)
156 157
  end

158 159 160 161 162 163 164 165
  def dump_diffs(diffs)
    if diffs.respond_to?(:map)
      diffs.map(&:to_hash)
    end
  end

  def load_diffs(raw, options)
    if raw.respond_to?(:each)
166
      if paths = options[:paths]
167
        raw = raw.select do |diff|
168
          paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
169 170 171
        end
      end

172 173 174 175 176 177
      Gitlab::Git::DiffCollection.new(raw, options)
    else
      Gitlab::Git::DiffCollection.new([])
    end
  end

178
  # Load diffs between branches related to current merge request diff from repo
179
  # and save it as array of hashes in st_diffs db field
180
  def save_diffs
181
    new_attributes = {}
182 183 184
    new_diffs = []

    if commits.size.zero?
185
      new_attributes[:state] = :empty
186
    else
187
      diff_collection = compare.diffs(Commit.max_diff_options)
188

189 190 191
      if diff_collection.overflow?
        # Set our state to 'overflow' to make the #empty? and #collected?
        # methods (generated by StateMachine) return false.
192
        new_attributes[:state] = :overflow
193 194
      end

195
      new_attributes[:real_size] = diff_collection.real_size
196

197 198
      if diff_collection.any?
        new_diffs = dump_diffs(diff_collection)
199
        new_attributes[:state] = :collected
200
      end
201
    end
202

203 204
    new_attributes[:st_diffs] = new_diffs
    update_columns_serialized(new_attributes)
205 206
  end

207 208
  def project
    merge_request.target_project
209 210 211
  end

  def repository
212
    project.repository
213
  end
214

215 216
  def find_base_sha
    return unless head_commit_sha && start_commit_sha
217

218
    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
219 220 221
  rescue Rugged::OdbError
    # In case head or start commit does not exist in the repository any more
    nil
222 223
  end

James Lopez's avatar
James Lopez committed
224 225 226 227 228 229 230 231
  def utf8_st_diffs
    st_diffs.map do |diff|
      diff.each do |k, v|
        diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
      end
    end
  end

232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls resulting in bad performance.
  # Using #update_columns solves the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep the same behaviour as the attribute assignment we reload the instance.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes.merge(updated_at: current_time_from_proper_timezone))
    reload
  end
254

255
  def keep_around_commits
256 257 258
    repository.keep_around(start_commit_sha)
    repository.keep_around(head_commit_sha)
    repository.keep_around(base_commit_sha)
259
  end
260
end