Commit e1099f97 authored by Dmitriy Zaporozhets's avatar Dmitriy Zaporozhets
Browse files

Merge branch 'google-code-import-performance' into 'master'

Decrease memory use and increase performance of Google Code importer.

Addresses private issue https://dev.gitlab.org/gitlab/gitlabhq/issues/2241.

See merge request !536
parents ea939d46 38982136
......@@ -54,6 +54,11 @@ def create_user_map
render "new_user_map" and return
end
# This is the default, so let's not save it into the database.
user_map.reject! do |key, value|
value == Gitlab::GoogleCodeImport::Client.mask_email(key)
end
session[:google_code_user_map] = user_map
flash[:notice] = "The user map has been saved. Continue by selecting the projects you want to import."
......
......@@ -27,7 +27,6 @@
# import_type :string(255)
# import_source :string(255)
# avatar :string(255)
# import_data :text
#
require 'carrierwave/orm/activerecord'
......@@ -51,8 +50,6 @@ class Project < ActiveRecord::Base
default_value_for :wall_enabled, false
default_value_for :snippets_enabled, gitlab_config_features.snippets
serialize :import_data, JSON
# set last_activity_at to the same as created_at
after_create :set_last_activity_at
def set_last_activity_at
......@@ -117,6 +114,8 @@ def set_last_activity_at
has_many :users_star_projects, dependent: :destroy
has_many :starrers, through: :users_star_projects, source: :user
has_one :import_data, dependent: :destroy, class_name: "ProjectImportData"
delegate :name, to: :owner, allow_nil: true, prefix: true
delegate :members, to: :team, prefix: true
......@@ -267,8 +266,7 @@ def add_import_job
end
def clear_import_data
self.import_data = nil
self.save
self.import_data.destroy if self.import_data
end
def import?
......
# == Schema Information
#
# Table name: project_import_datas
#
# id :integer not null, primary key
# project_id :integer
# data :text
#
require 'carrierwave/orm/activerecord'
require 'file_size_validator'
class ProjectImportData < ActiveRecord::Base
belongs_to :project
serialize :data, JSON
validates :project, presence: true
end
class CreateProjectImportData < ActiveRecord::Migration
def change
create_table :project_import_data do |t|
t.references :project
t.text :data
end
end
end
class RemoveImportDataFromProject < ActiveRecord::Migration
def change
remove_column :projects, :import_data
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20150413192223) do
ActiveRecord::Schema.define(version: 20150417122318) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -323,6 +323,11 @@
add_index "oauth_applications", ["owner_id", "owner_type"], name: "index_oauth_applications_on_owner_id_and_owner_type", using: :btree
add_index "oauth_applications", ["uid"], name: "index_oauth_applications_on_uid", unique: true, using: :btree
create_table "project_import_data", force: true do |t|
t.integer "project_id"
t.text "data"
end
create_table "projects", force: true do |t|
t.string "name"
t.string "path"
......@@ -348,7 +353,6 @@
t.integer "star_count", default: 0, null: false
t.string "import_type"
t.string "import_source"
t.text "import_data"
end
add_index "projects", ["created_at", "id"], name: "index_projects_on_created_at_and_id", using: :btree
......
......@@ -5,7 +5,10 @@ class Importer
def initialize(project)
@project = project
@repo = GoogleCodeImport::Repository.new(project.import_data["repo"])
import_data = project.import_data.try(:data)
repo_data = import_data["repo"] if import_data
@repo = GoogleCodeImport::Repository.new(repo_data)
@closed_statuses = []
@known_labels = Set.new
......@@ -27,9 +30,10 @@ def execute
def user_map
@user_map ||= begin
user_map = Hash.new { |hash, user| hash[user] = Client.mask_email(user) }
user_map = Hash.new { |hash, user| Client.mask_email(user) }
stored_user_map = project.import_data["user_map"]
import_data = project.import_data.try(:data)
stored_user_map = import_data["user_map"] if import_data
user_map.update(stored_user_map) if stored_user_map
user_map
......@@ -58,24 +62,7 @@ def import_labels
def import_issues
return unless repo.issues
last_id = 0
deleted_issues = []
repo.issues.each do |raw_issue|
while raw_issue["id"] > last_id + 1
last_id += 1
issue = project.issues.create!(
title: "Deleted issue",
description: "*This issue has been deleted*",
author_id: project.creator_id,
state: "closed"
)
deleted_issues << issue
end
last_id = raw_issue["id"]
while raw_issue = repo.issues.shift
author = user_map[raw_issue["author"]["name"]]
date = DateTime.parse(raw_issue["published"]).to_formatted_s(:long)
......@@ -112,7 +99,8 @@ def import_issues
end
end
issue = project.issues.create!(
issue = Issue.create!(
project_id: project.id,
title: raw_issue["title"],
description: body,
author_id: project.creator_id,
......@@ -121,39 +109,46 @@ def import_issues
)
issue.add_labels_by_names(labels)
if issue.iid != raw_issue["id"]
issue.update_attribute(:iid, raw_issue["id"])
end
import_issue_comments(issue, comments)
end
deleted_issues.each(&:destroy!)
end
def import_issue_comments(issue, comments)
comments.each do |raw_comment|
next if raw_comment.has_key?("deletedBy")
content = format_content(raw_comment["content"])
updates = format_updates(raw_comment["updates"])
attachments = format_attachments(issue.iid, raw_comment["id"], raw_comment["attachments"])
next if content.blank? && updates.blank? && attachments.blank?
author = user_map[raw_comment["author"]["name"]]
date = DateTime.parse(raw_comment["published"]).to_formatted_s(:long)
body = format_issue_comment_body(
raw_comment["id"],
author,
date,
content,
updates,
attachments
)
Note.transaction do
while raw_comment = comments.shift
next if raw_comment.has_key?("deletedBy")
content = format_content(raw_comment["content"])
updates = format_updates(raw_comment["updates"])
attachments = format_attachments(issue.iid, raw_comment["id"], raw_comment["attachments"])
next if content.blank? && updates.blank? && attachments.blank?
author = user_map[raw_comment["author"]["name"]]
date = DateTime.parse(raw_comment["published"]).to_formatted_s(:long)
body = format_issue_comment_body(
raw_comment["id"],
author,
date,
content,
updates,
attachments
)
issue.notes.create!(
project_id: project.id,
author_id: project.creator_id,
note: body
)
# Needs to match order of `comment_columns` below.
Note.create!(
project_id: project.id,
noteable_type: "Issue",
noteable_id: issue.id,
author_id: project.creator_id,
note: body
)
end
end
end
......@@ -232,7 +227,7 @@ def escape_for_markdown(s)
def create_label(name)
color = nice_label_color(name)
project.labels.create!(name: name, color: color)
Label.create!(project_id: project.id, name: name, color: color)
end
def format_content(raw_content)
......
......@@ -11,12 +11,7 @@ def initialize(repo, namespace, current_user, user_map = nil)
end
def execute
import_data = {
"repo" => repo.raw_data,
"user_map" => user_map
}
@project = Project.new(
project = ::Projects::CreateService.new(current_user,
name: repo.name,
path: repo.name,
description: repo.summary,
......@@ -25,21 +20,17 @@ def execute
visibility_level: Gitlab::VisibilityLevel::PUBLIC,
import_type: "google_code",
import_source: repo.name,
import_url: repo.import_url,
import_data: import_data
)
import_url: repo.import_url
).execute
if @project.save!
@project.reload
if @project.import_failed?
@project.import_retry
else
@project.import_start
end
end
import_data = project.create_import_data(
data: {
"repo" => repo.raw_data,
"user_map" => user_map
}
)
@project
project
end
end
end
......
......@@ -12,9 +12,13 @@
}
}
}
let(:project) { create(:project, import_data: import_data) }
let(:project) { create(:project) }
subject { described_class.new(project) }
before do
project.create_import_data(data: import_data)
end
describe "#execute" do
it "imports status labels" do
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment