merge_request_diff.rb 6.45 KB
Newer Older
1
class MergeRequestDiff < ActiveRecord::Base
2
  include Sortable
3
  include Importable
James Lopez's avatar
James Lopez committed
4
  include EncodingHelper
5

6
  # Prevent store of diff if commits amount more then 500
7
  COMMITS_SAFE_SIZE = 100
8 9 10 11 12

  belongs_to :merge_request

  state_machine :state, initial: :empty do
    state :collected
13 14 15
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
16 17 18 19 20 21 22 23 24
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  serialize :st_commits
  serialize :st_diffs

25 26 27
  # For compatibility with old MergeRequestDiff which
  # does not store those variables in database
  after_initialize :ensure_commits_sha, if: :persisted?
28

29 30
  # All diff information is collected from repository after object is created.
  # It allows you to override variables like head_commit_sha before getting diff.
31
  after_create :save_git_content, unless: :importing?
32

33 34 35
  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
36
    ensure_commits_sha
37
    save_commits
38
    reload_commits
39
    save_diffs
40 41 42 43 44 45 46
    keep_around_commits
  end

  def ensure_commits_sha
    self.start_commit_sha ||= merge_request.target_branch_sha
    self.head_commit_sha  ||= last_commit.try(:sha) || merge_request.source_branch_sha
    self.base_commit_sha  ||= find_base_sha
47 48
  end

49 50
  def size
    real_size.presence || diffs.size
51 52
  end

53 54
  def diffs(options={})
    if options[:ignore_whitespace_change]
55 56 57 58 59 60 61 62 63
      @diffs_no_whitespace ||=
        begin
          compare = Gitlab::Git::Compare.new(
            repository.raw_repository,
            start_commit_sha,
            head_commit_sha
          )
          compare.diffs(options)
        end
64
    else
65
      @diffs ||= {}
66
      @diffs[options] ||= load_diffs(st_diffs, options)
67
    end
68 69
  end

70 71 72 73
  def commits
    @commits ||= load_commits(st_commits || [])
  end

74 75 76 77 78
  def reload_commits
    @commits = nil
    commits
  end

79 80 81 82
  def last_commit
    commits.first
  end

83 84 85 86
  def first_commit
    commits.last
  end

87
  def base_commit
88
    return unless base_commit_sha
89

90
    project.commit(base_commit_sha)
91 92
  end

93
  def start_commit
94
    return unless start_commit_sha
95

96
    project.commit(start_commit_sha)
97 98 99
  end

  def head_commit
100
    return last_commit unless head_commit_sha
101

102
    project.commit(head_commit_sha)
103 104
  end

105 106 107 108 109 110 111 112 113 114
  def diff_refs
    return unless start_commit || base_commit

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

115
  private
116

117 118 119 120 121
  def compare
    @compare ||=
      begin
        # Update ref for merge request
        merge_request.fetch_ref
122

123 124 125 126 127 128
        Gitlab::Git::Compare.new(
          repository.raw_repository,
          start_commit_sha,
          head_commit_sha
        )
      end
129 130
  end

131 132 133 134 135 136 137 138
  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

139
  # Load all commits related to current merge request diff from repo
140
  # and save it as array of hashes in st_commits db field
141
  def save_commits
142 143
    new_attributes = {}

144
    commits = compare.commits
145

146 147 148
    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
      new_attributes[:st_commits] = dump_commits(commits)
149 150
    end

151
    update_columns_serialized(new_attributes)
152 153
  end

154 155 156 157 158 159 160 161
  def dump_diffs(diffs)
    if diffs.respond_to?(:map)
      diffs.map(&:to_hash)
    end
  end

  def load_diffs(raw, options)
    if raw.respond_to?(:each)
162
      if paths = options[:paths]
163
        raw = raw.select do |diff|
164
          paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
165 166 167
        end
      end

168 169 170 171 172 173
      Gitlab::Git::DiffCollection.new(raw, options)
    else
      Gitlab::Git::DiffCollection.new([])
    end
  end

174
  # Load diffs between branches related to current merge request diff from repo
175
  # and save it as array of hashes in st_diffs db field
176
  def save_diffs
177
    new_attributes = {}
178 179 180
    new_diffs = []

    if commits.size.zero?
181
      new_attributes[:state] = :empty
182
    else
183
      diff_collection = compare.diffs(Commit.max_diff_options)
184

185 186 187
      if diff_collection.overflow?
        # Set our state to 'overflow' to make the #empty? and #collected?
        # methods (generated by StateMachine) return false.
188
        new_attributes[:state] = :overflow
189 190
      end

191
      new_attributes[:real_size] = diff_collection.real_size
192

193 194
      if diff_collection.any?
        new_diffs = dump_diffs(diff_collection)
195
        new_attributes[:state] = :collected
196
      end
197
    end
198

199 200
    new_attributes[:st_diffs] = new_diffs
    update_columns_serialized(new_attributes)
201 202
  end

203 204
  def project
    merge_request.target_project
205 206 207
  end

  def repository
208
    project.repository
209
  end
210

211 212
  def find_base_sha
    return unless head_commit_sha && start_commit_sha
213

214
    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
215 216 217
  rescue Rugged::OdbError
    # In case head or start commit does not exist in the repository any more
    nil
218 219
  end

James Lopez's avatar
James Lopez committed
220 221 222 223 224 225 226 227
  def utf8_st_diffs
    st_diffs.map do |diff|
      diff.each do |k, v|
        diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
      end
    end
  end

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls resulting in bad performance.
  # Using #update_columns solves the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep the same behaviour as the attribute assignment we reload the instance.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes.merge(updated_at: current_time_from_proper_timezone))
    reload
  end
250

251
  def keep_around_commits
252 253 254
    repository.keep_around(start_commit_sha)
    repository.keep_around(head_commit_sha)
    repository.keep_around(base_commit_sha)
255
  end
256
end