diff --git a/pickle-list-policy-data.ipynb b/pickle-list-policy-data.ipynb index 40518a5632153228c6140b6a63ee093ddf89b95f..9dee8d7c77dbca736bfc433b8e174373af918316 100644 --- a/pickle-list-policy-data.ipynb +++ b/pickle-list-policy-data.ipynb @@ -59,15 +59,54 @@ "cell_type": "code", "execution_count": null, "id": "932707e6", - "metadata": {}, + "metadata": { + "tags": [ + "parameters" + ] + }, "outputs": [], "source": [ - "dirname=\"\" # directory to fine files to pickle\n", + "dirname=\"data/list-20191520.list.gather-info.d\" # directory to fine files to pickle\n", "glob_pattern = \"*.gz\" # file name glob pattern to match, can be file name for individual file\n", "line_regex_filter = \".*\" # regex to match lines of interest in file\n", + "\n", + "verbose = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "833be559", + "metadata": {}, + "outputs": [], + "source": [ + "pickledir=f\"{dirname}/pickles\"" + ] + }, + { + "cell_type": "markdown", + "id": "47ea1d93", + "metadata": {}, + "source": [ + "dirname=\"data/list-17404604.list.gather-info.d/\" # directory to fine files to pickle\n", + "glob_pattern = \"list-*.gz\" # file name glob pattern to match, can be file name for individual file\n", + "line_regex_filter = \".*\" # regex to match lines of interest in file\n", + "pickledir=f\"{dirname}/pickles\"\n", + "\n", + "verbose = True" + ] + }, + { + "cell_type": "markdown", + "id": "07ef745a", + "metadata": {}, + "source": [ + "dirname=\"data/list-16144464.list.gather-info.d/\" # directory to fine files to pickle\n", + "glob_pattern = \"list-*\" # file name glob pattern to match, can be file name for individual file\n", + "line_regex_filter = \".*\" # regex to match lines of interest in file\n", "pickledir=f\"{dirname}/pickles\"\n", "\n", - "verbose = False" + "verbose = True" ] }, { @@ -185,11 +224,11 @@ " ## Write the pickled data\n", "\n", " # only create dir if there is data to pickle\n", - " if (len(parsedfiles) and not os.path.isdir(pickledir)):\n", + " if (not os.path.isdir(pickledir)):\n", " os.mkdir(pickledir)\n", "\n", - " if (verbose): print(f\"pickling: {file}\")\n", - " parsedfiles[file].to_pickle(f\"{pickledir}/{file}\")" + " if (verbose): print(f\"pickling: {filename}\")\n", + " df.to_pickle(f\"{pickledir}/{filename}\")" ] } ],