Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/diaspora/diaspora.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/script
diff options
context:
space:
mode:
authorRaphael Sofaer <raphael@joindiaspora.com>2011-03-14 19:38:54 +0300
committerRaphael Sofaer <raphael@joindiaspora.com>2011-03-14 21:39:31 +0300
commit4b4654a4084d797c732e69207e4c60c3beb2264d (patch)
tree06c1271b05e0aaaacb0bc7e31e59e2096c8fff9c /script
parentadbd23ec5031d7fc2bfbf0b4c4924aeb54130d66 (diff)
Changing message to text, partway done, deleted data conversion
Diffstat (limited to 'script')
-rw-r--r--script/sanitize_database.rb177
1 files changed, 0 insertions, 177 deletions
diff --git a/script/sanitize_database.rb b/script/sanitize_database.rb
deleted file mode 100644
index 27efdf1a6..000000000
--- a/script/sanitize_database.rb
+++ /dev/null
@@ -1,177 +0,0 @@
-@start_time = Time.now.to_i
-@count = 0
-
-def sanitize_user(user)
- log "Sanitizing user #{@count += 1}: #{user.username}"
- people = Person.all(:owner_id => user.id)
- log "#{user.username} has #{people.count} person objects."
-
- people.sort_by {|person| contact_count(person)}
-
- keep_person = people.last
- dumb_people = people[0..(people.count)]
- d_p_ids = dumb_people.map{|p| "ObjectId('#{p.id.to_s}')"}
- d_p_ids_json = "[#{d_p_ids.join(',')}]"
-
- ["posts", "comments", "contacts"].each do |table_name|
- eval_string = <<-JS
- db.#{table_name}.find({ "person_id" : {"$in" : #{d_p_ids_json}}}).forEach(function(document){
- db.#{table_name}.update({"_id" : document["_id"]}, {"$set" : { "person_id" : ObjectId("#{keep_person.id.to_s}")}});
- });
- JS
- MongoMapper.database.eval eval_string
- end
-
- ['from_id', 'to_id'].each do |key|
- eval_string = <<-JS
- db.requests.find({ "#{key}" : {"$in" : #{d_p_ids_json}}}).forEach(function(document){
- db.requests.update({"_id" : document["_id"]}, {"$set" : { "#{key}" : ObjectId("#{keep_person.id.to_s}")}});
- });
- JS
- MongoMapper.database.eval eval_string
- end
-
- "Ids for user #{user.username} set to one person"
-
- dumb_people.each{|dumb| dumb.delete}
- if user.serialized_private_key
- keep_person.serialized_public_key = OpenSSL::PKey::RSA.new(user.serialized_private_key).public_key
- keep_person.save
- else
- log "#{user.username} HAS NO ENCRYPTION KEY"
- end
-end
-
-def log string
- time_diff = Time.now.to_i - @start_time
- puts "#{time_diff}s; #{string}"
-end
-
-def contact_count person
- @contact_counts ||= {}
- return @contact_counts[person.id] if @contact_counts[person.id]
- query_result = @contacts_for_people_collection.find("_id" => person.id).first
-
- if query_result
- @contact_counts[person.id] = query_result["value"]
- else
- @contact_counts[person.id] = 0
- end
-
- @contact_counts[person.id]
-end
-def get_user_ids
- cmd = BSON::OrderedHash.new
- cmd["mapreduce"] = "people"
- cmd["map"] = 'function(){ emit(this["owner_id"], 1)};'
- cmd["reduce"] = 'function(key, vals) {' +
- 'var sum=0;' +
- 'for(var i in vals) sum += vals[i];' +
- 'return sum;' +
- '};'
- result = MongoMapper.database.command(cmd)
- collection = MongoMapper.database.collection(result["result"])
- collection.find("value" => {"$gte" => 2}).map{|r| r["_id"]}
-end
-
-def contacts_for_people_collection
- cmd = BSON::OrderedHash.new
- cmd["mapreduce"] = "contacts"
- cmd["map"] = 'function(){ emit(this["person_id"], 1)};'
- cmd["reduce"] = 'function(key, vals) {' +
- 'var sum=0;' +
- 'for(var i in vals) sum += vals[i];' +
- 'return sum;' +
- '};'
- result = MongoMapper.database.command(cmd)
- MongoMapper.database.collection(result["result"])
-end
-
-user_ids = get_user_ids
-
-@contacts_for_people_collection = contacts_for_people_collection
-users = User.where(:id.in => user_ids).all
-log "#{users.size} Users retreived."
-users.each{ |user| sanitize_user(user) }
-
-log "Eliminating local people with no corresponding user."
-
-MongoMapper.database.eval <<-MOREJS
-db.people.find().forEach(
- function(doc){
- if(doc["owner_id"] != null && db.users.count({"_id" : doc["owner_id"]}) == 0){
- db.people.remove({"_id" : doc["_id"]});
- }
- }
-);
-MOREJS
-
-def dup_user_emails
- cmd = BSON::OrderedHash.new
- cmd["mapreduce"] = "users"
- cmd["map"] = 'function(){ emit(this["email"], 1)};'
- cmd["reduce"] = 'function(key, vals) {' +
- 'var sum=0;' +
- 'for(var i in vals) sum += vals[i];' +
- 'return sum;' +
- '};'
- result = MongoMapper.database.command(cmd)
- coll = MongoMapper.database.collection(result["result"])
- user_emails = coll.find("value" => {"$gte" => 2}).map{|r| r["_id"]}
-end
-
-emails = dup_user_emails
-log "Eliminating #{emails.count} users with duplicate emails"
-
-users_coll = MongoMapper.database.collection("users")
-users_coll.remove("email" => {"$in" => emails})
-
-def dup_requests
- cmd = BSON::OrderedHash.new
- cmd["mapreduce"] = "requests"
- cmd["map"] = 'function(){ emit(this["from_id"].toString() + "," + this["to_id"].toString(), {"array" : [this["_id"]], "count" : 1 })};'
- cmd["reduce"] = 'function(key, vals) {' +
- 'var result = {"array" : [], "count" : 0};' +
- 'for(var i in vals){' +
- 'result["array"] = result["array"].concat(vals[i]["array"]);' +
- 'result["count"] += vals[i]["count"];' +
- '}' +
- 'return result;' +
- '};'
- result = MongoMapper.database.command(cmd)
- coll = MongoMapper.database.collection(result["result"])
- #FIND WHERE "array" size is greater than 1
- coll.find({"value.count" => {"$gte" => 2}}).map{|r| r["value"]["array"]}
-end
-non_unique_requests = dup_requests
-non_unique_requests.each{|request_id_array| request_id_array.pop}
-non_unique_requests.flatten!
-
-log "Eliminating #{non_unique_requests.length} duplicate requests"
-req_coll = MongoMapper.database.collection("requests")
-req_coll.remove("_id" => {"$in" => non_unique_requests})
-
-def dup_contacts
- cmd = BSON::OrderedHash.new
- cmd["mapreduce"] = "contacts"
- cmd["map"] = 'function(){ emit(this["person_id"].toString() + "," + this["user_id"].toString(), {"array" : [this["_id"]], "count" : 1 })};'
- cmd["reduce"] = 'function(key, vals) {' +
- 'var result = {"array" : [], "count" : 0};' +
- 'for(var i in vals){' +
- 'result["array"] = result["array"].concat(vals[i]["array"]);' +
- 'result["count"] += vals[i]["count"];' +
- '}' +
- 'return result;' +
- '};'
- result = MongoMapper.database.command(cmd)
- coll = MongoMapper.database.collection(result["result"])
- #FIND WHERE "array" size is greater than 1
- coll.find({"value.count" => {"$gte" => 2}}).map{|r| r["value"]["array"]}
-end
-non_unique_contacts = dup_contacts
-non_unique_contacts.each{|contact_id_array| contact_id_array.pop}
-non_unique_contacts.flatten!
-
-log "Eliminating #{non_unique_contacts.length} duplicate contacts"
-req_coll = MongoMapper.database.collection("contacts")
-req_coll.remove("_id" => {"$in" => non_unique_contacts})