From 2f7d06680882b340565631fbfd408cbaa9c0b3e6 Mon Sep 17 00:00:00 2001 From: Bo-Chun Louis Chen <louistw@uab.edu> Date: Wed, 27 Mar 2024 11:15:09 -0500 Subject: [PATCH] Add data migration script for created and origin column --- migrations/20240327_add_created_table.py | 50 ++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 migrations/20240327_add_created_table.py diff --git a/migrations/20240327_add_created_table.py b/migrations/20240327_add_created_table.py new file mode 100644 index 0000000..f9b6fa0 --- /dev/null +++ b/migrations/20240327_add_created_table.py @@ -0,0 +1,50 @@ +import csv +import dataset +from datetime import datetime +import rabbit_config as rcfg + + +def upgrade(): + db_path = rcfg.db_path + db_name = "user_reg.db" + csv_filename = "./migrations/account-registration-history.csv" + + db = dataset.connect(f"sqlite:///{db_path}/{db_name}") + table = db["users"] + + # Get all username and last_update + usernames = db.query("SELECT username, last_update FROM users") + users = {} + for u in usernames: + users[u["username"]] = dict(u) + + # Get created from csv file + with open(csv_filename) as f: + reader = csv.reader(f, delimiter=",") + # Skip header + next(reader) + for row in reader: + if row[0] in users: + users[row[0]]["created"] = datetime.fromisoformat(row[1][:-3]) + users[row[0]]["origin"] = row[2] + + # Add copy created column from last_update if they are not in csv file + for username in users: + if "created" not in users[username]: + users[username]["created"] = datetime.fromisoformat( + users[username]["last_update"] + ) + users[username]["origin"] = "last_update" + # No need to update this column back to the db + del users[username]["last_update"] + + # Create new columns + table.create_column("created", db.types.datetime) + table.create_column("origin", db.types.text) + + # Update database in chunk + table.update_many(list(users.values()), ["username"], ensure=True) + + +if __name__ == "__main__": + upgrade() -- GitLab