From be11fd51099d8350e30c2a944f39e972de48674a Mon Sep 17 00:00:00 2001 From: John-Paul Robinson <jpr@uab.edu> Date: Thu, 25 Aug 2022 13:58:52 -0500 Subject: [PATCH] Fix csv processing to address parser errors Limit field splitting to one equals sign to separate just the field name and avoid splitting values with equals signs embedded. Add explict unix line terminator to avoid line breaks on carriage-returns that may be embedding in field values. --- scratch-log-explorations.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scratch-log-explorations.ipynb b/scratch-log-explorations.ipynb index 3d95773..b0509f3 100644 --- a/scratch-log-explorations.ipynb +++ b/scratch-log-explorations.ipynb @@ -86,7 +86,7 @@ " '''\n", " split each name=value field on = and return the value\n", " '''\n", - " return x.split(\"=\")[1]" + " return x.split(\"=\", 1)[1]" ] }, { @@ -116,6 +116,7 @@ "outputs": [], "source": [ "df = pd.read_csv(file,\n", + " lineterminator='\\n',\n", " sep=\"|\", header=0, \n", " #on_bad_lines=\"warn\", \n", " index_col=False,\n", -- GitLab