diff --git a/barkeep_server.rb b/barkeep_server.rb index d2176e7e..a001fb61 100644 --- a/barkeep_server.rb +++ b/barkeep_server.rb @@ -247,6 +247,7 @@ def ensure_required_params(*required_params) permission = User.find(:permission => "admin").nil? ? "admin" : "normal" User.new(:email => email, :name => email, :permission => permission).save end + link_author_to_user(email) redirect session[:login_started_url] || "/" end end @@ -559,6 +560,19 @@ def get_openid_login_redirect(openid_provider_url) end end + # If the email matches an entry in both the authors table and the users table, then create + # a link between them by updating the "user_id" field in the authors table. + def link_author_to_user(email) + author = Author.first(:email => email) + if author && author.user_id.nil? + user = User.first(:email => email) + if user + author.user_id = user.id + author.save + end + end + end + def create_comment(repo_name, sha, filename, line_number_string, text) commit = MetaRepo.instance.db_commit(repo_name, sha) raise "No such commit." unless commit diff --git a/lib/models.rb b/lib/models.rb index 09adbc3c..52b8c35e 100644 --- a/lib/models.rb +++ b/lib/models.rb @@ -11,6 +11,7 @@ # Auto-populate "created_at" and "updated_at" fields. Sequel::Model.plugin :timestamps +require "models/author" require "models/git_repo" require "models/git_branch" require "models/user" diff --git a/migrations/20130116081012_populate_authors_table.rb b/migrations/20130116081012_populate_authors_table.rb new file mode 100644 index 00000000..b8584a83 --- /dev/null +++ b/migrations/20130116081012_populate_authors_table.rb @@ -0,0 +1,53 @@ +require "bundler/setup" +require "pathological" +require "migrations/migration_helper.rb" +require "grit" + +# This migration populates the "authors" table by fetching all the commits from all the repos +# and storing the unique commit authors in the "authors" table. If it finds an identical email +# address in the "users" table then it also adds the "user_id" to the authors table entry. + +# This is the number of commits that we fetch at a time. +PAGE_SIZE = 100 + +Sequel.migration do + up do + repos = DB[:git_repos].all + + # Find all the unique authors + authors = Hash.new { |hash, key| hash[key] = {} } + repos.each do |repo| + grit_repo = Grit::Repo.new(repo[:path]) + total = 0 + num = 0 + begin + commits = grit_repo.commits("master", PAGE_SIZE, total) + commits.each { |commit| authors[commit.author.email][:name] = commit.author.name } + num = commits.length + total += num + end while num == PAGE_SIZE + puts "Processed #{total} commits from repo #{repo[:path]}." + end + + # Find matching users (by email) in the "users" table. + authors.keys.each do |email| + user = DB[:users].first(:email => email) + authors[email][:user_id] = user[:id] if user + end + puts "Found #{authors.length} unique authors." + + # Fill in the "authors" table. + num_inserts = 0 + authors.each do |key, value| + row = DB[:authors].first(:email => key) + next if row + DB[:authors].insert(:email => key, :name => value[:name], :user_id => value[:user_id]) + num_inserts += 1 + end + puts "Inserted #{num_inserts} new authors." + end + + # We don't need to remove the author entries. + down do + end +end diff --git a/migrations/20130116151408_add_author_id_to_commits.rb b/migrations/20130116151408_add_author_id_to_commits.rb new file mode 100644 index 00000000..263f1730 --- /dev/null +++ b/migrations/20130116151408_add_author_id_to_commits.rb @@ -0,0 +1,11 @@ +require "bundler/setup" +require "pathological" +require "migrations/migration_helper.rb" + +Sequel.migration do + change do + alter_table :commits do + add_foreign_key :author_id, :authors, :key => :id + end + end +end diff --git a/migrations/20130116152425_populate_author_id_field.rb b/migrations/20130116152425_populate_author_id_field.rb new file mode 100644 index 00000000..35800f4c --- /dev/null +++ b/migrations/20130116152425_populate_author_id_field.rb @@ -0,0 +1,55 @@ +require "bundler/setup" +require "pathological" +require "migrations/migration_helper.rb" +require "grit" + +# This migration populates the "author_id" field in the "commits" table. + +Sequel.migration do + up do + # Fetch all the authors (expected to be small, say less than 1000) so that we don't have to do + # an extra SQL query for every commit to check if the author email exists. Then create a mapping + # from email -> author.id + rows = DB[:authors].all + authors = {} + rows.each do |row| + authors[row[:email]] = row[:id] + end + + # Fetch all the git repos (also small, less than 100) and create a mapping from + # git_repo_id -> Grit::Repo + repos = {} + DB[:git_repos].each { |row| repos[row[:id]] = Grit::Repo.new(row[:path]) } + + total_updates = 0 + new_authors = 0 + commits = DB[:commits].filter(:author_id => nil).all + commits.each do |row| + commit = repos[row[:git_repo_id]].commit(row[:sha]) + next unless commit + email = commit.author.email + author_id = authors[email] + # If the author is not in our db, then add it. + if author_id.nil? + # Check if the same email exists in the users table. + user = DB[:users].first(:email => email) + user_id = user[:id] if user + DB[:authors].insert(:email => email, :name => commit.author.name, :user_id => user_id) + + # Get the author_id and add it to the hash. + author = DB[:authors].first(:email => email) + author_id = author[:id] + authors[email] = author_id + new_authors += 1 + end + total_updates += 1 + DB[:commits].filter(:id => row[:id]).update(:author_id => authors[email]) + end + puts "New authors: #{new_authors}" + puts "Updated commits: #{total_updates}" + end + + # We don't need to undo this. + down do + end +end diff --git a/models/author.rb b/models/author.rb new file mode 100644 index 00000000..2af9e257 --- /dev/null +++ b/models/author.rb @@ -0,0 +1,5 @@ +class Author < Sequel::Model + # This is really one_to_one, but Sequel requires the table containing the foreign key to be many_to_one. + many_to_one :user + one_to_one :commit +end diff --git a/models/commit.rb b/models/commit.rb index a50da71e..de9dbc32 100644 --- a/models/commit.rb +++ b/models/commit.rb @@ -12,6 +12,9 @@ class Commit < Sequel::Model one_to_many :comments many_to_one :approved_by_user, :class => User + # This is really one_to_one, but Sequel requires the table containing the foreign key to be many_to_one. + many_to_one :author + add_association_dependencies :comments => :destroy, :commit_files => :destroy add_filter(:message) { |message| StringFilter.escape_html(message) } diff --git a/models/user.rb b/models/user.rb index 37ae46e6..6ade5607 100644 --- a/models/user.rb +++ b/models/user.rb @@ -12,6 +12,7 @@ class User < Sequel::Model one_to_many :saved_searches, :order => [:user_order.desc] one_to_many :comments + one_to_one :author ONE_YEAR = 365 diff --git a/resque_jobs/db_commit_ingest.rb b/resque_jobs/db_commit_ingest.rb index 5186645b..afcdf87c 100644 --- a/resque_jobs/db_commit_ingest.rb +++ b/resque_jobs/db_commit_ingest.rb @@ -44,6 +44,7 @@ def self.perform(repo_name, remote_name) page_of_rows_to_insert = commits.map do |commit| next if existing_shas.include?(commit.sha) + author_id = insert_author_if_new(commit) { :git_repo_id => db_repo.id, @@ -52,6 +53,7 @@ def self.perform(repo_name, remote_name) # NOTE(caleb): For some reason, the commit object you get from a remote returns nil for #date (but # it does have #authored_date and #committed_date. Bug? :date => commit.authored_date, + :author_id => author_id, } end page_of_rows_to_insert.compact! @@ -78,4 +80,18 @@ def self.perform(repo_name, remote_name) Resque.enqueue(GenerateTaggedDiffs, repo_name, row[:sha]) end end + + # Given a new commit, insert the author into the authors table if the author does not exist. + # In any case, returns the author id. + def self.insert_author_if_new(commit) + email = commit.author.email + author = Author.first(:email => email) + return author.id if author + + user = User.first(:email => email) + user_id = user.id if user + Author.insert(:email => email, :name => commit.author.name, :user_id => user_id) + author = Author.first(:email => email) + author.id + end end