Skip to content
Snippets Groups Projects
20240327_add_created_table.py 1.71 KiB
Newer Older
import csv
import dataset
from datetime import datetime
import rabbit_config as rcfg


def fromisoformat(date_string: str):
    regex = re.compile(
        r"(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})\.(\d{1,6})"
    )
    mo = re.search(regex, date_string)
    return datetime(*[int(m) for m in mo.groups()])


def upgrade():
    db_path = rcfg.db_path
    db_name = "user_reg.db"
    csv_filename = "./migrations/account-registration-history.csv"

    db = dataset.connect(f"sqlite:///{db_path}/{db_name}")

    # Get all username and last_update
    usernames = db.query("SELECT username, last_update FROM users")
    users = {}
    for u in usernames:
        users[u["username"]] = {
            "username": u["username"],
            "date": fromisoformat(u["last_update"]),
            "source": "account-web",
        }

    # Get created from csv file
    with open(csv_filename) as f:
        reader = csv.reader(f, delimiter=",")
        # Skip header
        next(reader)
        for row in reader:
            if row[0] in users:
                users[row[0]]["date"] = fromisoformat(row[1])
                users[row[0]]["source"] = row[2]
            else:
                users[row[0]] = {
                    "username": row[0],
                    "date": fromisoformat(row[1]),
                    "source": row[2],
                }
    data_types = dict(
        username=db.types.text, date=db.types.datetime, source=db.types.text
    )
Bo-Chun Chen's avatar
Bo-Chun Chen committed
    # Sort the entries by date
    sorted_users = sorted(users.values(), key=lambda d: d["date"])

    # Insert into database in chunks
    table = db["user_created"]
Bo-Chun Chen's avatar
Bo-Chun Chen committed
    table.insert_many(sorted_users, ensure=True, types=data_types)


if __name__ == "__main__":
    upgrade()