merge_request_diff.rb 6.42 KB
Newer Older
1
class MergeRequestDiff < ActiveRecord::Base
2
  include Sortable
3
  include Importable
James Lopez's avatar
James Lopez committed
4
  include EncodingHelper
5

6
  # Prevent store of diff if commits amount more then 500
7
  COMMITS_SAFE_SIZE = 100
8 9 10 11 12

  belongs_to :merge_request

  state_machine :state, initial: :empty do
    state :collected
13 14 15
    state :overflow
    # Deprecated states: these are no longer used but these values may still occur
    # in the database.
16 17 18 19 20 21 22 23 24
    state :timeout
    state :overflow_commits_safe_size
    state :overflow_diff_files_limit
    state :overflow_diff_lines_limit
  end

  serialize :st_commits
  serialize :st_diffs

25 26
  validates :start_commit_sha, presence: true, unless: :importing?
  validates :head_commit_sha,  presence: true, unless: :importing?
27

28 29 30
  after_initialize :set_diff_range, unless: :importing?
  after_create :save_git_content,   unless: :importing?
  after_save :keep_around_commits,  unless: :importing?
31

32
  def set_diff_range
33 34 35
    if persisted?
      # Workaround for old MergeRequestDiff object
      # that does not have head_commit_sha in the database
36 37 38
      self.start_commit_sha ||= merge_request.target_branch_sha
      self.head_commit_sha  ||= last_commit.sha
      self.base_commit_sha  ||= find_base_sha
39 40 41 42 43
    else
      self.start_commit_sha ||= merge_request.target_branch_sha
      self.head_commit_sha  ||= merge_request.source_branch_sha
      self.base_commit_sha  ||= find_base_sha
    end
44 45
  end

46 47 48 49 50
  # Collect information about commits and diff from repository
  # and save it to the database as serialized data
  def save_git_content
    save_commits
    save_diffs
51 52
  end

53 54
  def size
    real_size.presence || diffs.size
55 56
  end

57 58 59 60
  def diffs(options={})
    if options[:ignore_whitespace_change]
      @diffs_no_whitespace ||= begin
        compare = Gitlab::Git::Compare.new(
61
          repository.raw_repository,
62 63
          start_commit_sha,
          head_commit_sha
64 65 66 67
        )
        compare.diffs(options)
      end
    else
68
      @diffs ||= {}
69
      @diffs[options] ||= load_diffs(st_diffs, options)
70
    end
71 72
  end

73 74 75 76 77 78 79 80
  def commits
    @commits ||= load_commits(st_commits || [])
  end

  def last_commit
    commits.first
  end

81 82 83 84
  def first_commit
    commits.last
  end

85
  def base_commit
86
    return unless base_commit_sha
87

88
    project.commit(base_commit_sha)
89 90
  end

91
  def start_commit
92
    return unless start_commit_sha
93

94
    project.commit(start_commit_sha)
95 96 97
  end

  def head_commit
98
    return last_commit unless head_commit_sha
99

100
    project.commit(head_commit_sha)
101 102
  end

103 104 105 106 107 108 109 110 111 112
  def diff_refs
    return unless start_commit || base_commit

    Gitlab::Diff::DiffRefs.new(
      base_sha:  base_commit_sha,
      start_sha: start_commit_sha,
      head_sha:  head_commit_sha
    )
  end

113
  private
114

115 116 117 118 119
  def compare
    @compare ||=
      begin
        # Update ref for merge request
        merge_request.fetch_ref
120

121 122 123 124 125 126
        Gitlab::Git::Compare.new(
          repository.raw_repository,
          start_commit_sha,
          head_commit_sha
        )
      end
127 128
  end

129 130 131 132 133 134 135 136
  def dump_commits(commits)
    commits.map(&:to_hash)
  end

  def load_commits(array)
    array.map { |hash| Commit.new(Gitlab::Git::Commit.new(hash), merge_request.source_project) }
  end

137
  # Load all commits related to current merge request diff from repo
138
  # and save it as array of hashes in st_commits db field
139
  def save_commits
140 141
    new_attributes = {}

142
    commits = compare.commits
143

144 145 146
    if commits.present?
      commits = Commit.decorate(commits, merge_request.source_project).reverse
      new_attributes[:st_commits] = dump_commits(commits)
147 148
    end

149
    update_columns_serialized(new_attributes)
150 151
  end

152 153 154 155 156 157 158 159
  def dump_diffs(diffs)
    if diffs.respond_to?(:map)
      diffs.map(&:to_hash)
    end
  end

  def load_diffs(raw, options)
    if raw.respond_to?(:each)
160
      if paths = options[:paths]
161
        raw = raw.select do |diff|
162
          paths.include?(diff[:old_path]) || paths.include?(diff[:new_path])
163 164 165
        end
      end

166 167 168 169 170 171
      Gitlab::Git::DiffCollection.new(raw, options)
    else
      Gitlab::Git::DiffCollection.new([])
    end
  end

172
  # Load diffs between branches related to current merge request diff from repo
173
  # and save it as array of hashes in st_diffs db field
174
  def save_diffs
175
    new_attributes = {}
176 177 178
    new_diffs = []

    if commits.size.zero?
179
      new_attributes[:state] = :empty
180
    else
181
      diff_collection = compare.diffs(Commit.max_diff_options)
182

183 184 185
      if diff_collection.overflow?
        # Set our state to 'overflow' to make the #empty? and #collected?
        # methods (generated by StateMachine) return false.
186
        new_attributes[:state] = :overflow
187 188
      end

189
      new_attributes[:real_size] = diff_collection.real_size
190

191 192
      if diff_collection.any?
        new_diffs = dump_diffs(diff_collection)
193
        new_attributes[:state] = :collected
194
      end
195
    end
196

197
    new_attributes[:st_diffs] = new_diffs
198

199
    update_columns_serialized(new_attributes)
200 201

    keep_around_commits
202 203
  end

204 205
  def project
    merge_request.target_project
206 207 208
  end

  def repository
209
    project.repository
210
  end
211

212 213
  def find_base_sha
    return unless head_commit_sha && start_commit_sha
214

215
    project.merge_base_commit(head_commit_sha, start_commit_sha).try(:sha)
216 217
  end

James Lopez's avatar
James Lopez committed
218 219 220 221 222 223 224 225
  def utf8_st_diffs
    st_diffs.map do |diff|
      diff.each do |k, v|
        diff[k] = encode_utf8(v) if v.respond_to?(:encoding)
      end
    end
  end

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
  #
  # #save or #update_attributes providing changes on serialized attributes do a lot of
  # serialization and deserialization calls resulting in bad performance.
  # Using #update_columns solves the problem with just one YAML.dump per serialized attribute that we provide.
  # As a tradeoff we need to reload the current instance to properly manage time objects on those serialized
  # attributes. So to keep the same behaviour as the attribute assignment we reload the instance.
  # The difference is in the usage of
  # #write_attribute= (#update_attributes) and #raw_write_attribute= (#update_columns)
  #
  # Ex:
  #
  #   new_attributes[:st_commits].first.slice(:committed_date)
  #   => {:committed_date=>2014-02-27 11:01:38 +0200}
  #   YAML.load(YAML.dump(new_attributes[:st_commits].first.slice(:committed_date)))
  #   => {:committed_date=>2014-02-27 10:01:38 +0100}
  #
  def update_columns_serialized(new_attributes)
    return unless new_attributes.any?

    update_columns(new_attributes.merge(updated_at: current_time_from_proper_timezone))
    reload
  end
248

249
  def keep_around_commits
250 251 252
    repository.keep_around(start_commit_sha)
    repository.keep_around(head_commit_sha)
    repository.keep_around(base_commit_sha)
253
  end
254
end