Skip to content
Snippets Groups Projects
  • Daniel Gerhardt's avatar
    Add migration which converts database and LDAP UIDs to lowercase · 0b33c1e3
    Daniel Gerhardt authored
    ARSnova Backend now converts all user IDs for database user accounts to
    lowercase. This migration converts existing accounts for compatibility.
    Documents mapped to LDAP UIDs are also reassigned to lowercase UIDs
    since most LDAP configurations use case-insesitive comparison for UIDs.
    
    This change was done to prevent duplicate account creation.
    0b33c1e3
migrations.py 13.28 KiB
import couchconnection
import json
import re

(db, conn) = couchconnection.arsnova_connection("/etc/arsnova/arsnova.properties")

migrations_document_id = "arsnova_migrations"
db_url = "/" + db
migrations_url = db_url + "/" + migrations_document_id

def bump(next_version):
    conn.request("GET", migrations_url)
    res = conn.getresponse()
    migration = json.loads(res.read())
    migration["version"] = next_version
    res = conn.json_put(migrations_url, json.dumps(migration))
    return res.read()

def migrate(migration):
    global db_url, migrations_url
    all_docs_url = db_url + "/_all_docs"
    bulk_url = db_url + "/_bulk_docs"
    cleanup_url = db_url + "/_view_cleanup"
    current_version = migration["version"]

    # Changes to 'skill_question' and 'skill_question_answer':
    #   added 'questionVariant' field, defaulting to 'lecture' value
    if current_version == 0:
        def question_migration():
            questions = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question') emit(doc._id, doc); }\" }"
            answers = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question_answer') emit(doc._id, doc); }\" }"

            # We are doing three steps:
            #   1) Load all documents we are going to migrate in bulk
            #   2) Each document that is not migrated yet is changed
            #   3) Update all changed documents in bulk
            #
            # Because the documents could change in the database while
            # we perform any of these steps, we will get an error for
            # those documents. To solve this we repeat all steps until
            # no more errors occur.
            def migrate_with_temp_view(temp_view):
                while True:
                    res = conn.temp_view(db_url, temp_view)
                    doc = json.loads(res.read())
                    ds = []
                    for col in doc["rows"]:
                        val = col["value"]
                        if not val.has_key("questionVariant"):
                            ds.append(val)
                    for d in ds:
                        d["questionVariant"] = "lecture"
                    res = conn.json_post(bulk_url, json.dumps({"docs":ds}))
                    result_docs = json.loads(res.read())
                    errors = []
                    for result in result_docs:
                        if result.has_key("error"):
                            errors.append(result)
                    if not errors:
                        # All documents were migrated.
                        # jump out of loop and exit this function
                        break
            print "Migrating all Question documents..."
            migrate_with_temp_view(questions)
            print "Migrating all Answer documents..."
            migrate_with_temp_view(answers)

        # skill_question
        question_migration()
        # bump database version
        current_version = 1
        print bump(current_version)

    if current_version == 1:
        print "Deleting obsolete food vote design document..."
        if not conn.delete(db_url + "/_design/food_vote"):
            print "Food vote design document not found"
        # bump database version
        current_version = 2
        print bump(current_version)

    if current_version == 2:
      print "Deleting obsolete user ranking, understanding, and admin design documents..."
      if not conn.delete(db_url + "/_design/user_ranking"):
          print "User ranking design document not found"
      if not conn.delete(db_url + "/_design/understanding"):
          print "Understanding design document not found"
      if not conn.delete(db_url + "/_design/admin"):
          print "Admin design document not found"
      # bump database version
      current_version = 3
      print bump(current_version)

    if current_version == 3:
        def add_variant_to_freetext_abstention_answers():
            answers = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question_answer' && typeof doc.questionVariant === 'undefined' && doc.abstention == true) emit(doc._id, doc.questionId); }\" }"

            # get all bug-affected answer documents
            res = conn.temp_view_with_params(db_url, "?include_docs=true", answers)
            doc = json.loads(res.read())
            questions = []
            answers = []
            for col in doc["rows"]:
                questions.append(col["value"])
                answers.append(col["doc"])
            # bulk fetch all (unique) question documents of which we found problematic answers
            res = conn.json_post(all_docs_url + "?include_docs=true", json.dumps({"keys":list(set(questions))}))
            result_docs = json.loads(res.read())
            # we need to find the variant of each question so that we can put it into the answer document
            questions = []
            for result in result_docs["rows"]:
                questions.append(result["doc"])
            for answer in answers:
                for question in questions:
                    if answer["questionId"] == question["_id"]:
                        answer["questionVariant"] = question["questionVariant"]
            # bulk update the answers
            res = conn.json_post(bulk_url, json.dumps({"docs":answers}))
            result_docs = json.loads(res.read())
            print result_docs

        print "Fixing freetext answers (abstentions) with missing question variant (#13313)..."
        add_variant_to_freetext_abstention_answers()
        # bump database version
        current_version = 4;
        print bump(current_version)

    if current_version == 4:
        print "Deleting obsolete learning_progress design documents..."
        if not conn.delete(db_url + "/_design/learning_progress_course_answers"):
            print "course_answers design document not found"
        if not conn.delete(db_url + "/_design/learning_progress_maximum_value"):
            print "maximum_value design document not found"
        if not conn.delete(db_url + "/_design/learning_progress_user_values"):
            print "learning_progress_user_values design document not found"
        # bump database version
        current_version = 5
        print bump(current_version)

    if current_version == 5:
        print "Deleting misspelled 'statistic' design document..."
        if not conn.delete(db_url + "/_design/statistic"):
            print "'statistic' design document not found"
        # bump database version
        current_version = 6
        print bump(current_version)

    if current_version == 6:
        print "Transforming pre-picture-answer freetext questions into text only questions (#15613)..."
        def add_text_answer_to_freetext_questions():
            old_freetext_qs = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question' && doc.questionType == 'freetext' && typeof doc.textAnswerEnabled === 'undefined') emit(doc._id); }\" }"

            # get all bug-affected documents
            res = conn.temp_view_with_params(db_url, "?include_docs=true", old_freetext_qs)
            doc = json.loads(res.read())
            questions = []
            for result in doc["rows"]:
                questions.append(result["doc"])
            # add missing properties
            for question in questions:
                question["imageQuestion"] = False
                question["textAnswerEnabled"] = True
            # bulk update the documents
            res = conn.json_post(bulk_url, json.dumps({"docs":questions}))
            result_docs = json.loads(res.read())
            print result_docs

        add_text_answer_to_freetext_questions()
        # bump database version
        current_version = 7
        print bump(current_version)

    if current_version == 7:
        print "Transforming session documents to new learning progress options format (#15617)..."
        def change_learning_progress_property_on_session():
            sessions = "{ \"map\": \"function(doc) { if (doc.type == 'session' && doc.learningProgressType) emit(doc._id); }\" }"

            res = conn.temp_view_with_params(db_url, "?include_docs=true", sessions)
            doc = json.loads(res.read())
            sessions = []
            for result in doc["rows"]:
                sessions.append(result["doc"])
            # change property 'learningProgressType' to 'learningProgressOptions'
            for session in sessions:
                currentProgressType = session.pop("learningProgressType", "questions")
                progressOptions = { "type": currentProgressType, "questionVariant": "" }
                session["learningProgressOptions"] = progressOptions
            # bulk update sessions
            res = conn.json_post(bulk_url, json.dumps({"docs":sessions}))
            result_docs = json.loads(res.read())
            print result_docs

        change_learning_progress_property_on_session()
        # bump database version
        current_version = 8
        print bump(current_version)

    if current_version == 8:
        print "Migrating DB and LDAP user IDs to lowercase..."
        conn.request("GET", db_url + "/_design/user/_view/all")
        res = conn.getresponse()
        doc = json.loads(res.read())
        affected_users = {}
        unaffected_users = []
        bulk_docs = []

        # Look for user documents where user ID is not in lowercase
        #   1) Delete document if account has not been activated
        #   2) Lock account if a lowercase version already exists
        #   3) Convert user ID to lowercase if only one captitalization exists
        for user_doc in doc["rows"]:
            if user_doc["key"] != user_doc["key"].lower():
                # create a list of user documents since there might be multiple
                # items for different captitalizations
                affected_users.setdefault(user_doc["key"].lower(), []).append(user_doc["value"])
            else:
                unaffected_users.append(user_doc["key"])
        for uid, users in affected_users.iteritems():
            migration_targets = []
            for user in users:
                if "activationKey" in user:
                    print "User %s has not been activated. Deleting document %s..." % (user["username"], user["_id"])
                    conn.delete(db_url + "/" + user["_id"])
                elif uid in unaffected_users:
                    print "Migration target exists. Locking duplicate user %s (document %s)..." % (user["username"], user["_id"])
                    user["locked"] = True
                    bulk_docs.append(user)
                else:
                    migration_targets.append(user)
            if len(migration_targets) > 1:
                print "Cannot migrate some users automatically. Conflicting duplicate users found:"
                for user in migration_targets:
                    print "Locking user %s (document %s)..." % (user["username"], user["_id"])
                    user["locked"] = True
                    bulk_docs.append(user)
            elif migration_targets:
                print "Migrating user %s (document %s)..." % (user["username"], user["_id"])
                user["username"] = uid
                bulk_docs.append(user)

        # Look for data where assigned user's ID is not in lowercase
        #   1) Migrate if user ID was affected by previous migration step
        #   2) Exclude Facebook and Google account IDs
        #   3) Exclude guest account IDs
        #   4) Migrate all remaining IDs (LDAP)
        def reassign_data(type, user_prop):
            print "Reassigning %s data to migrated users..." % type
            migration_view = "{ \"map\": \"function(doc) { function check(doc, type, uid) { return doc.type === type && uid !== uid.toLowerCase() && uid.indexOf('Guest') !== 0; } if (check(doc, '%s', doc.%s)) { emit(doc._id, doc); }}\" }" % (type, user_prop)
            res = conn.temp_view(db_url, migration_view)
            doc = json.loads(res.read())
            print "Documents: %d" % len(doc["rows"])
            for affected_doc in doc["rows"]:
                val = affected_doc["value"]
                print affected_doc["id"], val[user_prop]
                # exclude Facebook and Google accounts from migration (might be
                # redundant)
                if (not re.match("https?:", val[user_prop]) and not "@" in val[user_prop]) or val[user_prop].lower() in affected_users:
                    val[user_prop] = val[user_prop].lower()
                    bulk_docs.append(val)
                else:
                    print "Skipped %s (Facebook/Google account)" % val[user_prop]

        reassign_data("session", "creator")
        reassign_data("interposed_question", "creator")
        reassign_data("skill_question_answer", "user")
        reassign_data("logged_in", "user")
        reassign_data("motdlist", "username")

        # bulk update users and assignments
        res = conn.json_post(bulk_url, json.dumps({"docs": bulk_docs}))
        if res:
            res.read()
            # bump database version
            current_version = 9
            print bump(current_version)

    if current_version == 9:
        # Next migration goes here
        pass

    conn.json_post(cleanup_url)

conn.request("GET", migrations_url)
res = conn.getresponse()
mig = res.read()
if res.status == 404:
    res = conn.json_post(db_url, json.dumps({"_id":migrations_document_id, "version":0}))
    res.read()
    migrate({"version":0})
else:
    migrate(json.loads(mig))