diff --git a/misc-scripts/cv-compare.R b/misc-scripts/cv-compare.R
index 3a66a89aee943a0d87fa5160cb1fb7a6d5ac5900..bda60135d102c2a800ea84ad8f5cb260fc4981f2 100644
--- a/misc-scripts/cv-compare.R
+++ b/misc-scripts/cv-compare.R
@@ -3,44 +3,58 @@
 # the GBDT error metrics, pick out the relevant columns from lm.df, and combine
 # them into a single cross validation matrix. From there, graphs comparing MAE
 # and RMSE across areas, smoothing kernels, and model types can be created
+library(sciplot)
+library(tidyverse)
 
-lmpath <- "/data/user/mdefende/Projects/ctmodel-ml/model-py/cv-out/linear-models/"
-lm.df <- list.files(path = lmpath, pattern = "*.csv", full.names = TRUE) %>%
-  sapply(read_csv, simplify=FALSE,n_max = 5) %>%
+
+lm.1200.path <- "/data/user/mdefende/Projects/ctmodel-ml/results/HCP1200/lm/cv/out"
+lm.1200 <- list.files(path = lm.1200.path, pattern = "*.csv", full.names = TRUE) %>%
+  sapply(read_csv, simplify=FALSE,n_max = 10) %>%
   bind_rows() %>%
-  mutate(region = as.factor(region),
-         sm = factor(sm,levels = c(0,2,5,10), labels = c(0,2,5,10))) %>%
-  rename(area = 'region') %>%
+  mutate(area = as.factor(area),
+         sm = factor(sm,levels = c(0,2,5,10), labels = c(0,2,5,10)),
+         method = 'lm') %>%
   mutate_if(is.numeric, abs) %>%
   mutate_at(vars(contains('mse')), sqrt) %>%
   rename('test_rmse' = 'test_mse', 'train_rmse' = 'train_mse') %>%
   group_by(area,sm) %>%
-  summarize_if(is.numeric, funs(mean, se)) %>%
+  summarize_if(is.numeric, list(mean = mean, se = se)) %>%
   ungroup() %>%
-  mutate(method = 'lm')
-
+  mutate(method = 'lm-1200')
 
-GBDTpath <- "/data/user/mdefende/Projects/ctmodel-ml/model-py/cv-out/best-GBDT"
-gbdt.df <- list.files(path = GBDTpath, pattern = "*.csv", full.names = TRUE) %>%
+gbdt.1200.path <- "/data/user/mdefende/Projects/ctmodel-ml/results/HCP1200/gbdt/cv/out/best-gbdt"
+gbdt.1200 <- list.files(path = gbdt.1200.path, pattern = "*.csv", full.names = TRUE) %>%
   sapply(read_csv, simplify=FALSE) %>%
   bind_rows() %>%
   mutate(area = as.factor(area),
-         sm = factor(sm,levels = c(0,2,5,10), labels = c(0,2,5,10)),
-         method = 'gbdt')
+         sm = factor(sm,levels = c(0,2,5,10), ordered = TRUE),
+         method = 'gbdt-1200')
+
+gbdt.786.path <- "/data/user/mdefende/Projects/ctmodel-ml/results/HCP786/gbdt/cv/out/best-gbdt"
+gbdt.786 <- list.files(path = gbdt.786.path, pattern = "*.csv", full.names = TRUE) %>%
+  sapply(read_csv, simplify=FALSE) %>%
+  bind_rows() %>%
+  mutate(area = str_replace_all(area,'_','-'),
+         area = as.factor(area),
+         sm = factor(sm,levels = c(0,2,5,10), ordered = TRUE),
+         method = 'gbdt-786')
 
 
-cv.df <- lm.df %>%
-  select(method,area,sm,test_mae_mean,test_rmse_mean) %>%
+cv.df <- lm.1200 %>%
+  dplyr::select(method,area,sm,test_mae_mean,test_rmse_mean) %>%
   rename(test_mae = test_mae_mean, test_rmse = test_rmse_mean) %>%
-  bind_rows(.,select(gbdt.df,-contains('train'))) %>%
-  mutate(method = factor(method))
+  bind_rows(.,dplyr::select(gbdt.1200,area,sm,contains('test'),method)) %>%
+  bind_rows(.,dplyr::select(gbdt.786,area,sm,contains('test'),method)) %>%
+  mutate(method = factor(method),
+         area = factor(area)) %>%
+  arrange(method,area,sm)
 
 
 # Variable to relabel facets
 method_labels <- c(BA45 = "Brodmann Area 45", 
                    dACC = "dorsal Anterior Cingulate Cortex", 
-                   V1_withRet = "V1 with Retinotopic Factors", 
-                   V1_noRet = "V1 with no Retinotopic Factors")
+                   V1-withRet = "V1 with Retinotopic Factors", 
+                   V1-noRet = "V1 with no Retinotopic Factors")
 
 
 cv.df %>%
@@ -48,7 +62,7 @@ cv.df %>%
     geom_point(size = 3) +
     geom_line(aes(group = method)) +
     facet_wrap(~area, labeller = labeller(area = method_labels)) +
-    scale_color_discrete(name = "Model Type", labels = c("GBDT", "Linear")) +
+    scale_color_discrete(name = "Model Type") +
     theme_bw() +
     ggtitle("Linear Model and GBDT Root Mean Square Error") +
     ylab("RMSE (mm)") +
diff --git a/model-py/batch/create_GBDT_cv_jobs.m b/model-py/batch/create_GBDT_cv_jobs.m
index 174086b0962a52f1509389f8f891c19fbdbfdf98..5a89247e39ddeffa0a3efa14f85713ba6e776b63 100644
--- a/model-py/batch/create_GBDT_cv_jobs.m
+++ b/model-py/batch/create_GBDT_cv_jobs.m
@@ -1,12 +1,25 @@
-datafile = '/data/user/mdefende/Projects/ctmodel-ml/data/HCP1200-dACC.csv';
+% Add project to path
+addpath(genpath('/data/user/mdefende/Projects/ctmodel-ml'))
+
+% initial parameters
+projdir = '/data/user/mdefende/Projects/ctmodel-ml';
+dataset = 'HCP1200';
 area = 'dACC';
-eta = [0.01,0.05,0.1:0.1:0.5];
-max_depth = 3:10;
-nboost = 1000:1000:15000;
-sm = 5;
+do_cv = 0;
+make_model = 1;
+
+if make_model
+    jobdir = fullfile(projdir,'results',dataset,'gbdt','models','jobs',area);
+else
+    jobdir = fullfile(projdir,'results',dataset,'gbdt','cv','jobs',area);
+end
+
+eta = [0.01,0.05,0.1,0.2,0.3];
+max_depth = 6:10;
+nboost = 15000;
+sm = [0,2,10];
 
 A = combvec(eta,max_depth,nboost,sm)';
-jobdir = ['/data/user/mdefende/Projects/ctmodel-ml/Results/HCP1200/cv-jobs/' area];
 if ~exist(jobdir,'dir')
     mkdir(jobdir)
 end
@@ -14,9 +27,9 @@ end
 for ii = 1:size(A,1)
     jobname = [area '_eta' num2str(A(ii,1)) '_depth' num2str(A(ii,2)) '_nboost' num2str(A(ii,3)) '_sm' num2str(A(ii,4)) '.sh'];
     
-    write_cv_script(jobname,jobdir,area,A(ii,1),A(ii,2),A(ii,3),A(ii,4),0,datafile)
+    write_cv_script(jobname,jobdir,projdir,area,A(ii,1),A(ii,2),A(ii,3),A(ii,4),do_cv,make_model,dataset)
     
-    if ~exist(['/data/user/mdefende/Projects/ctmodel-ml/Results/HCP1200/cv-out/' area '/' jobname(1:end-3) '.csv'],'file')
+    if ~exist(fullfile(projdir,'results',dataset,'gbdt','cv','out',area,[jobname(1:end-3) '.csv']),'file')
         system(['sbatch ' fullfile(jobdir,jobname)]);
         %disp(jobname)
     end
diff --git a/model-py/batch/create_GBDT_cv_jobs.m~ b/model-py/batch/create_GBDT_cv_jobs.m~
new file mode 100644
index 0000000000000000000000000000000000000000..bf445cf86e14260b7e8e2b2b778d7758fd9096ab
--- /dev/null
+++ b/model-py/batch/create_GBDT_cv_jobs.m~
@@ -0,0 +1,30 @@
+% Add project to path
+addpath(genpath('/data/user/mdefende/Projects/ctmodel-ml'))
+
+% initial parameters
+projdir = '/data/user/mdefende/Projects/ctmodel-ml';
+dataset = 'HCP1200';
+area = 'V1-withRet';
+make_model = 1;
+
+eta = [0.01,0.05,0.1,0.2,0.3];
+max_depth = 6:10;
+nboost = 15000;
+sm = [0,2,10];
+
+A = combvec(eta,max_depth,nboost,sm)';
+jobdir = fullfile(projdir,'results',dataset,'cv','jobs');
+if ~exist(jobdir,'dir')
+    mkdir(jobdir)
+end
+
+for ii = 1:size(A,1)
+    jobname = [area '_eta' num2str(A(ii,1)) '_depth' num2str(A(ii,2)) '_nboost' num2str(A(ii,3)) '_sm' num2str(A(ii,4)) '.sh'];
+    
+    write_cv_script(jobname,jobdir,projdir,area,A(ii,1),A(ii,2),A(ii,3),A(ii,4),0,datafile)
+    
+    if ~exist(fullfile(projdir,'results',dataset,'gbdt','cv','out',area,[jobname(1:end-3) '.csv']),'file')
+        system(['sbatch ' fullfile(jobdir,jobname)]);
+        %disp(jobname)
+    end
+end
\ No newline at end of file
diff --git a/model-py/batch/create_lm_model_jobs.m b/model-py/batch/create_lm_model_jobs.m
index b954908857f05a142c13d696567d06ceee3f3c8e..80c0d7bc9f45142fc6e98f3a90353fa403cef8e9 100644
--- a/model-py/batch/create_lm_model_jobs.m
+++ b/model-py/batch/create_lm_model_jobs.m
@@ -15,8 +15,8 @@ for ii = 1:length(areas)
         fprintf(fid,'#!/bin/bash\n');
         fprintf(fid,'#SBATCH --partition=express\n');
         fprintf(fid,'#SBATCH --time=1:00:00\n');
-        fprintf(fid,'#SBATCH --ntasks=1\n');
-        fprintf(fid,'#SBATCH --mem-per-cpu=150G\n');
+        fprintf(fid,'#SBATCH -ks=1\n');
+        fprintf(fid,'#SBATCH --mem-per-cpu=200G\n');
         fprintf(fid,['#SBATCH --output=' fullfile(jobdir,jobname(1:end-3)) '.txt\n\n']);
         
         fprintf(fid,'module load Anaconda3/5.3.0\n');
diff --git a/model-py/batch/create_model_jobs.m b/model-py/batch/create_model_jobs.m
index d28eb3703a892272e9395096b8f85b305860bc10..4de631cfc045d552b2c93c49264a22ea13e46edc 100644
--- a/model-py/batch/create_model_jobs.m
+++ b/model-py/batch/create_model_jobs.m
@@ -4,27 +4,22 @@ addpath(genpath('/data/user/mdefende/Projects/ctmodel-ml'))
 % initial parameters
 projdir = '/data/user/mdefende/Projects/ctmodel-ml';
 dataset = 'HCP1200';
-area = 'V1-noRet';
+area = 'all';
+do_cv = 0;
 make_model = 1;
 
 % Say where the cv outputs are
-cv_out = fullfile(projdir,'results',dataset,'cv','out');
+cv_out = fullfile(projdir,'results',dataset,'gbdt','cv','out','best-gbdt.csv');
+T = readtable(cv_out);
 
-% What cv parameter set are we going to keep
-eta = round([0.01,0.05,0.1:0.1:0.5],2); % There are problems with 0.3 when created by the : operator
-max_depth = 3:10;
-nboost = 1000:1000:15000;
-sm = 5;
-
-A = check_cv_new(fullfile(cv_out,area),eta,max_depth,nboost,sm);
-
-% Set the different smoothing kernels
-sm = [0,2,5,10];
+if ~strcmp(area,'all')
+    T(~strcmp(T.area,area),:) = [];
+end
 
-jobdir = fullfile(projdir,'results',dataset,'models','jobs');
+jobdir = fullfile(projdir,'results',dataset,'gbdt','models','jobs');
 
-for ii = 1:length(sm)
-    jobname = [area '_eta' num2str(A(2)) '_depth' num2str(A(3)) '_nboost' num2str(A(5) + 1) '_sm' num2str(sm(ii)) '.sh'];
-    write_cv_script(jobname,jobdir,projdir,area,A(2),A(3),A(5)+1,sm(ii),make_model,dataset)
+for ii = 1:height(T)
+    jobname = [T.area{ii} '_eta' num2str(T.eta(ii)) '_depth' num2str(T.depth(ii)) '_nboost' num2str(T.cv_index(ii) + 1) '_sm' num2str(T.sm(ii)) '.sh'];
+    write_cv_script(jobname,jobdir,projdir,T.area{ii},T.eta(ii),T.depth(ii),T.cv_index(ii)+1,T.sm(ii),do_cv,make_model,dataset)
     system(['sbatch ' fullfile(jobdir,jobname)]);
 end
diff --git a/model-py/batch/create_model_jobs.m~ b/model-py/batch/create_model_jobs.m~
new file mode 100644
index 0000000000000000000000000000000000000000..4421c77c6d3c89d142d8c4f65eb06b169acef289
--- /dev/null
+++ b/model-py/batch/create_model_jobs.m~
@@ -0,0 +1,24 @@
+% Add project to path
+addpath(genpath('/data/user/mdefende/Projects/ctmodel-ml'))
+
+% initial parameters
+projdir = '/data/user/mdefende/Projects/ctmodel-ml';
+dataset = 'HCP1200';
+area = 'V1-noRet';
+do_cv = 0;
+make_model = 1;
+
+% Say where the cv outputs are
+cv_out = fullfile(projdir,'results',dataset,'gbdt','cv','out','best-gbdt.csv');
+T = readtable(cv_out);
+
+% Set the different smoothing kernels
+sm = [0,2,5,10];
+
+jobdir = fullfile(projdir,'results',dataset,'models','jobs');
+
+for ii = 1:length(sm)
+    jobname = [area '_eta' num2str(A(2)) '_depth' num2str(A(3)) '_nboost' num2str(A(5) + 1) '_sm' num2str(sm(ii)) '.sh'];
+    write_cv_script(jobname,jobdir,projdir,area,A(2),A(3),A(5)+1,sm(ii),make_model,dataset)
+    system(['sbatch ' fullfile(jobdir,jobname)]);
+end
diff --git a/model-py/batch/write_cv_script.m b/model-py/batch/write_cv_script.m
index 69cea3da8fa5ecf36455fe177c73d7b86bbc3739..8cf0528c022a03dfd491eb1bdc3f09f2623678de 100644
--- a/model-py/batch/write_cv_script.m
+++ b/model-py/batch/write_cv_script.m
@@ -1,4 +1,4 @@
-function write_cv_script(jobname,jobdir,projdir,area,eta,maxdepth,nboost,sm,create_model,dataset)
+function write_cv_script(jobname,jobdir,projdir,area,eta,maxdepth,nboost,sm,do_cv,create_model,dataset)
 fid = fopen(fullfile(jobdir,jobname),'w');
 fprintf(fid,'#!/bin/bash\n');
 fprintf(fid,'#SBATCH --partition=pascalnodes\n');
@@ -14,10 +14,12 @@ fprintf(fid,'source activate ctmodel-ml\n\n');
 
 fprintf(fid,'cd /data/user/mdefende/Projects/ctmodel-ml/model-py\n\n');
 
-fprintf(fid,['python gbdt-cv.py ' num2str(eta) ' ' num2str(maxdepth) ' ' num2str(nboost) ' ' num2str(sm) ' ' area ' ' dataset  ' ' projdir '\n']);
-
+if do_cv
+    fprintf(fid,['python3.6 gbdt-cv.py ' num2str(eta) ' ' num2str(maxdepth) ' ' num2str(nboost) ' ' num2str(sm) ' ' area ' ' dataset  ' ' projdir '\n']);
+end
+    
 if create_model
-    fprintf(fid,['python create-GBDT-model.py ' num2str(eta) ' ' num2str(maxdepth) ' ' num2str(nboost) ' ' num2str(sm) ' ' area ' ' dataset  ' ' projdir '\n']);
+    fprintf(fid,['python3.6 create-GBDT-model.py ' num2str(eta) ' ' num2str(maxdepth) ' ' num2str(nboost) ' ' num2str(sm) ' ' area ' ' dataset  ' ' projdir '\n']);
 end
 
 fclose(fid);
diff --git a/model-py/gbdt-cv.py b/model-py/gbdt-cv.py
index d7f6429abdfa8af96ef9a08c7ab197b581a7a118..379a59de33e83db44fc1412bc963db80cb755f1a 100644
--- a/model-py/gbdt-cv.py
+++ b/model-py/gbdt-cv.py
@@ -71,7 +71,9 @@ cv_results = xgb.cv(params = param, dtrain = dfull, num_boost_round = nboost_val
 idx = cv_results['test-mae-mean'].idxmin()
 
 # Write out the cv results to a csv file
-cv_dict = {'eta': eta_val,
+cv_dict = {'area': area,
+           'sm': sm,
+           'eta': eta_val,
            'depth': max_depth_val,
            'nboost': nboost_val,
            'cv_index': idx,
diff --git a/model-py/lm-betas.py b/model-py/lm-betas.py
index f4babe867c05908d6679dfa3e73b2f552e95d574..efe3831db3a9e7719f5dc7fd6141cd8323f713e5 100644
--- a/model-py/lm-betas.py
+++ b/model-py/lm-betas.py
@@ -86,7 +86,7 @@ del df_pred
 df_x = calculate_vif_(df_x,df_y)
 
 # combine back into single dataframe and apply z-transform
-df_z = pd.concat([df_x,df_y]).apply(stats.zscore)
+df_z = pd.concat([df_x,df_y],axis = 1).apply(stats.zscore, axis = 0)
 
 # create the formula 
 features = '(' + '+'.join(df_z.drop(df_z.filter(like = 'Thick'),axis = 1).columns) + ') ** 3'
@@ -100,7 +100,7 @@ results = mod.fit()
 print(results.summary())
 
 # grab the p-value and beta values for each of the terms
-pvals = reg.pvalues
+pvals = results.pvalues
 pvals.name = 'p.value'
 
 coefs = results.params
diff --git a/model-py/notebooks/Untitled.ipynb b/model-py/notebooks/Untitled.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..ae5f35c2676fad98f626b2c05f81f3af65a82d22
--- /dev/null
+++ b/model-py/notebooks/Untitled.ipynb
@@ -0,0 +1,391 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3.6.6 |Anaconda, Inc.| (default, Jun 28 2018, 17:14:51) \n",
+      "[GCC 7.2.0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# basic libraries to load\n",
+    "import os\n",
+    "import sys\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "# import model generation and analysis\n",
+    "from patsy import dmatrices\n",
+    "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
+    "import statsmodels.api as sm\n",
+    "import statsmodels.formula.api as smf\n",
+    "from scipy import stats\n",
+    "\n",
+    "# print the current python version\n",
+    "print(sys.version)\n",
+    "\n",
+    "# Define a function to repeatedly calculate variance inflation factor and drop \n",
+    "def calculate_vif_(df_x, df_y, thresh=10.0):\n",
+    "    # Set up the while loop. While we have something to drop, do ...    \n",
+    "    dropped = True\n",
+    "    while dropped:\n",
+    "        # get a list of the column names\n",
+    "        variables = list(df_x.columns)\n",
+    "\n",
+    "        # concatenate the column names into a string separated by '+'\n",
+    "        features = '+'.join(variables)\n",
+    "        \n",
+    "        # re-concatenate the dataframes for creating the design matrices\n",
+    "        df_tmp = pd.concat([df_x,df_y],axis = 1)\n",
+    "      \n",
+    "        y, X = dmatrices(df_y.columns[0] + ' ~ ' + features, df_tmp, return_type='dataframe')\n",
+    "        vif = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n",
+    "        vif = vif[1:len(vif)]\n",
+    "        maxloc = vif.index(max(vif))\n",
+    "\n",
+    "        thresh = 10        \n",
+    "        if max(vif) > thresh:\n",
+    "            print('dropping \\'' + variables[maxloc] +\n",
+    "                  '\\' at index: ' + str(maxloc) + ' with VIF:' + str(vif[maxloc]))\n",
+    "            df_x = df_x.drop(variables[maxloc],axis=1)\n",
+    "        else:\n",
+    "            dropped = False\n",
+    "\n",
+    "    print('Remaining variables:')\n",
+    "    print(df_x.columns)\n",
+    "    return df_x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import parameters from command line\n",
+    "projdir = '/data/user/mdefende/Projects/ctmodel-ml'\n",
+    "dataset = 'HCP1200'\n",
+    "area = 'BA45'\n",
+    "sm = '0'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First load data\n",
+    "if 'V1' in area:\n",
+    "    data_file = projdir + '/data/' + dataset + '-V1.csv'\n",
+    "else:\n",
+    "    data_file = projdir + '/data/' + dataset + '-' + area + '.csv'\n",
+    "\n",
+    "df = pd.read_csv(data_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[Index(['Age', 'Sulc0', 'CurvK0', 'CurvH0', 'PialCurvK0', 'PialCurvH0', 'Area0',\n",
+      "       'MidArea0', 'PialArea0', 'LGI0'],\n",
+      "      dtype='object'), Index(['Thick0'], dtype='object')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Do initial feature selection by the smoothing kernel\n",
+    "df_pred = df.filter(like=sm)\n",
+    "df_pred = df_pred[df_pred.columns.drop(list(df_pred.filter(like='Thick')))]\n",
+    "df_pred = df_pred[df_pred.columns.drop(list(df_pred.filter(like='Sulc')))]\n",
+    "\n",
+    "# If sm == 0, remove all columns with 10. Else remove column with name Sulc + sm\n",
+    "if sm == '0':\n",
+    "    df_pred = df_pred[df_pred.columns.drop(list(df_pred.filter(regex='10')))]\n",
+    "\n",
+    "# Grab the rest of the features (Age, Ecc, Pol, and unsmoothed Sulc) and concatenate them with what remains from df_pred. Only grab Ecc and Pol if withRet is designated in the area. Then grab the correct thickness column\n",
+    "if 'withRet' in area:\n",
+    "    df_x = pd.concat([df[['Age','Ecc','Pol','Sulc0']],df_pred], axis = 1)\n",
+    "else:\n",
+    "    df_x = pd.concat([df[['Age','Sulc0']],df_pred], axis = 1)\n",
+    "\n",
+    "df_y = df[['Thick' + sm]]\n",
+    "\n",
+    "print([df_x.columns,df_y.columns])\n",
+    "\n",
+    "# remove df_pred from memory\n",
+    "del df_pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dropping 'MidArea0' at index: 7 with VIF:272945431961848.25\n",
+      "Remaining variables:\n",
+      "Index(['Age', 'Sulc0', 'CurvK0', 'CurvH0', 'PialCurvK0', 'PialCurvH0', 'Area0',\n",
+      "       'PialArea0', 'LGI0'],\n",
+      "      dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# calculate vif repeatedly removing features with a score above 10\n",
+    "df_x = calculate_vif_(df_x,df_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# combine back into single dataframe and apply z-transform\n",
+    "df_z = pd.concat([df_x,df_y],axis = 1).apply(stats.zscore, axis = 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                            OLS Regression Results                            \n",
+      "==============================================================================\n",
+      "Dep. Variable:                 Thick0   R-squared:                       0.411\n",
+      "Model:                            OLS   Adj. R-squared:                  0.411\n",
+      "Method:                 Least Squares   F-statistic:                 4.700e+04\n",
+      "Date:                Thu, 12 Dec 2019   Prob (F-statistic):               0.00\n",
+      "Time:                        17:23:52   Log-Likelihood:            -1.0049e+07\n",
+      "No. Observations:             8702966   AIC:                         2.010e+07\n",
+      "Df Residuals:                 8702836   BIC:                         2.010e+07\n",
+      "Df Model:                         129                                         \n",
+      "Covariance Type:            nonrobust                                         \n",
+      "===================================================================================================\n",
+      "                                      coef    std err          t      P>|t|      [0.025      0.975]\n",
+      "---------------------------------------------------------------------------------------------------\n",
+      "Intercept                          -0.3263      0.001   -612.789      0.000      -0.327      -0.325\n",
+      "Age                                -0.0375      0.000    -79.024      0.000      -0.038      -0.037\n",
+      "Sulc0                              -0.1811      0.001   -293.381      0.000      -0.182      -0.180\n",
+      "CurvK0                             -0.1594      0.001   -171.857      0.000      -0.161      -0.158\n",
+      "CurvH0                              0.1093      0.001    132.603      0.000       0.108       0.111\n",
+      "PialCurvK0                         -0.2774      0.001   -268.438      0.000      -0.279      -0.275\n",
+      "PialCurvH0                         -0.9025      0.001   -901.832      0.000      -0.904      -0.900\n",
+      "Area0                              -0.3285      0.001   -503.063      0.000      -0.330      -0.327\n",
+      "PialArea0                           0.2825      0.001    363.814      0.000       0.281       0.284\n",
+      "LGI0                                0.1225      0.000    257.511      0.000       0.122       0.123\n",
+      "Age:Sulc0                           0.0057      0.000     13.697      0.000       0.005       0.007\n",
+      "Age:CurvK0                          0.0065      0.001     10.001      0.000       0.005       0.008\n",
+      "Age:CurvH0                         -0.0094      0.001    -14.927      0.000      -0.011      -0.008\n",
+      "Age:PialCurvK0                      0.0049      0.001      5.913      0.000       0.003       0.006\n",
+      "Age:PialCurvH0                      0.0098      0.001     13.275      0.000       0.008       0.011\n",
+      "Age:Area0                          -0.0025      0.001     -4.705      0.000      -0.004      -0.001\n",
+      "Age:PialArea0                       0.0075      0.001     11.351      0.000       0.006       0.009\n",
+      "Age:LGI0                            0.0048      0.000     18.364      0.000       0.004       0.005\n",
+      "Sulc0:CurvK0                       -0.0456      0.001    -55.149      0.000      -0.047      -0.044\n",
+      "Sulc0:CurvH0                       -0.1505      0.001   -199.646      0.000      -0.152      -0.149\n",
+      "Sulc0:PialCurvK0                    0.0103      0.001     10.468      0.000       0.008       0.012\n",
+      "Sulc0:PialCurvH0                   -0.1050      0.001   -105.627      0.000      -0.107      -0.103\n",
+      "Sulc0:Area0                         0.1350      0.001    246.087      0.000       0.134       0.136\n",
+      "Sulc0:PialArea0                    -0.1482      0.001   -238.676      0.000      -0.149      -0.147\n",
+      "Sulc0:LGI0                          0.1658      0.000    406.482      0.000       0.165       0.167\n",
+      "CurvK0:CurvH0                      -0.0150      0.000    -44.477      0.000      -0.016      -0.014\n",
+      "CurvK0:PialCurvK0                   0.0035      0.001      4.064      0.000       0.002       0.005\n",
+      "CurvK0:PialCurvH0                  -0.1066      0.001    -91.949      0.000      -0.109      -0.104\n",
+      "CurvK0:Area0                       -0.0776      0.001    -86.855      0.000      -0.079      -0.076\n",
+      "CurvK0:PialArea0                   -0.0078      0.001     -7.268      0.000      -0.010      -0.006\n",
+      "CurvK0:LGI0                         0.0266      0.001     40.871      0.000       0.025       0.028\n",
+      "CurvH0:PialCurvK0                   0.0742      0.001     68.673      0.000       0.072       0.076\n",
+      "CurvH0:PialCurvH0                   0.1708      0.001    149.031      0.000       0.169       0.173\n",
+      "CurvH0:Area0                        0.2686      0.001    344.325      0.000       0.267       0.270\n",
+      "CurvH0:PialArea0                   -0.2880      0.001   -311.514      0.000      -0.290      -0.286\n",
+      "CurvH0:LGI0                        -0.0515      0.001    -81.852      0.000      -0.053      -0.050\n",
+      "PialCurvK0:PialCurvH0               0.0442      0.000    118.913      0.000       0.043       0.045\n",
+      "PialCurvK0:Area0                   -0.0769      0.001    -75.549      0.000      -0.079      -0.075\n",
+      "PialCurvK0:PialArea0               -0.0187      0.001    -16.214      0.000      -0.021      -0.016\n",
+      "PialCurvK0:LGI0                     0.0114      0.001     14.062      0.000       0.010       0.013\n",
+      "PialCurvH0:Area0                    0.0604      0.001     59.040      0.000       0.058       0.062\n",
+      "PialCurvH0:PialArea0               -0.3898      0.001   -323.395      0.000      -0.392      -0.387\n",
+      "PialCurvH0:LGI0                     0.0321      0.001     43.407      0.000       0.031       0.034\n",
+      "Area0:PialArea0                     0.0084      0.000     24.263      0.000       0.008       0.009\n",
+      "Area0:LGI0                         -0.0329      0.001    -62.457      0.000      -0.034      -0.032\n",
+      "PialArea0:LGI0                      0.0755      0.001    115.907      0.000       0.074       0.077\n",
+      "Age:Sulc0:CurvK0                   -0.0024      0.000     -5.566      0.000      -0.003      -0.002\n",
+      "Age:Sulc0:CurvH0                   -0.0016      0.000     -3.502      0.000      -0.003      -0.001\n",
+      "Age:Sulc0:PialCurvK0             8.591e-05      0.000      0.272      0.786      -0.001       0.001\n",
+      "Age:Sulc0:PialCurvH0                0.0007      0.000      1.866      0.062   -3.33e-05       0.001\n",
+      "Age:Sulc0:Area0                    -0.0005      0.000     -1.096      0.273      -0.001       0.000\n",
+      "Age:Sulc0:PialArea0                 0.0049      0.000     10.699      0.000       0.004       0.006\n",
+      "Age:Sulc0:LGI0                     -0.0002      0.000     -0.506      0.613      -0.001       0.001\n",
+      "Age:CurvK0:CurvH0                   0.0007      0.000      4.978      0.000       0.000       0.001\n",
+      "Age:CurvK0:PialCurvK0            5.093e-05      0.000      0.164      0.870      -0.001       0.001\n",
+      "Age:CurvK0:PialCurvH0               0.0013      0.000      2.578      0.010       0.000       0.002\n",
+      "Age:CurvK0:Area0                    0.0054      0.001      8.538      0.000       0.004       0.007\n",
+      "Age:CurvK0:PialArea0               -0.0033      0.001     -6.312      0.000      -0.004      -0.002\n",
+      "Age:CurvK0:LGI0                    -0.0012      0.000     -4.331      0.000      -0.002      -0.001\n",
+      "Age:CurvH0:PialCurvK0              -0.0031      0.000     -6.480      0.000      -0.004      -0.002\n",
+      "Age:CurvH0:PialCurvH0              -0.0053      0.001     -8.327      0.000      -0.007      -0.004\n",
+      "Age:CurvH0:Area0                   -0.0034      0.001     -5.349      0.000      -0.005      -0.002\n",
+      "Age:CurvH0:PialArea0               -0.0023      0.001     -4.130      0.000      -0.003      -0.001\n",
+      "Age:CurvH0:LGI0                    -0.0003      0.000     -1.043      0.297      -0.001       0.000\n",
+      "Age:PialCurvK0:PialCurvH0        6.689e-05   8.98e-05      0.745      0.456      -0.000       0.000\n",
+      "Age:PialCurvK0:Area0               -0.0003      0.000     -0.840      0.401      -0.001       0.000\n",
+      "Age:PialCurvK0:PialArea0            0.0029      0.001      3.363      0.001       0.001       0.005\n",
+      "Age:PialCurvK0:LGI0                -0.0012      0.000     -4.195      0.000      -0.002      -0.001\n",
+      "Age:PialCurvH0:Area0                0.0012      0.000      3.313      0.001       0.000       0.002\n",
+      "Age:PialCurvH0:PialArea0            0.0046      0.001      5.996      0.000       0.003       0.006\n",
+      "Age:PialCurvH0:LGI0                -0.0006      0.000     -1.950      0.051      -0.001    2.88e-06\n",
+      "Age:Area0:PialArea0                -0.0007      0.000     -2.251      0.024      -0.001   -8.42e-05\n",
+      "Age:Area0:LGI0                      0.0001      0.000      0.259      0.796      -0.001       0.001\n",
+      "Age:PialArea0:LGI0                 -0.0003      0.000     -0.601      0.548      -0.001       0.001\n",
+      "Sulc0:CurvK0:CurvH0                 0.0023      0.000     11.980      0.000       0.002       0.003\n",
+      "Sulc0:CurvK0:PialCurvK0            -0.0115      0.000    -30.406      0.000      -0.012      -0.011\n",
+      "Sulc0:CurvK0:PialCurvH0            -0.0057      0.001     -9.359      0.000      -0.007      -0.004\n",
+      "Sulc0:CurvK0:Area0                 -0.0373      0.001    -51.817      0.000      -0.039      -0.036\n",
+      "Sulc0:CurvK0:PialArea0              0.0070      0.001     10.570      0.000       0.006       0.008\n",
+      "Sulc0:CurvK0:LGI0                  -0.0022      0.000     -5.451      0.000      -0.003      -0.001\n",
+      "Sulc0:CurvH0:PialCurvK0             0.0024      0.001      4.439      0.000       0.001       0.003\n",
+      "Sulc0:CurvH0:PialCurvH0             0.0109      0.001     14.784      0.000       0.009       0.012\n",
+      "Sulc0:CurvH0:Area0                  0.0047      0.001      7.863      0.000       0.004       0.006\n",
+      "Sulc0:CurvH0:PialArea0             -0.0736      0.001   -135.096      0.000      -0.075      -0.072\n",
+      "Sulc0:CurvH0:LGI0                  -0.0025      0.000     -5.507      0.000      -0.003      -0.002\n",
+      "Sulc0:PialCurvK0:PialCurvH0        -0.0010      0.000     -8.762      0.000      -0.001      -0.001\n",
+      "Sulc0:PialCurvK0:Area0              0.0184      0.000     52.024      0.000       0.018       0.019\n",
+      "Sulc0:PialCurvK0:PialArea0         -0.0250      0.001    -28.204      0.000      -0.027      -0.023\n",
+      "Sulc0:PialCurvK0:LGI0              -0.0013      0.000     -4.001      0.000      -0.002      -0.001\n",
+      "Sulc0:PialCurvH0:Area0             -0.0330      0.000    -87.655      0.000      -0.034      -0.032\n",
+      "Sulc0:PialCurvH0:PialArea0         -0.0710      0.001    -90.453      0.000      -0.073      -0.069\n",
+      "Sulc0:PialCurvH0:LGI0              -0.0054      0.000    -14.313      0.000      -0.006      -0.005\n",
+      "Sulc0:Area0:PialArea0               0.0180      0.000     48.006      0.000       0.017       0.019\n",
+      "Sulc0:Area0:LGI0                   -0.0144      0.000    -33.923      0.000      -0.015      -0.014\n",
+      "Sulc0:PialArea0:LGI0                0.0466      0.000    102.277      0.000       0.046       0.048\n",
+      "CurvK0:CurvH0:PialCurvK0            0.0011      0.000      7.882      0.000       0.001       0.001\n",
+      "CurvK0:CurvH0:PialCurvH0           -0.0031      0.000    -13.965      0.000      -0.004      -0.003\n",
+      "CurvK0:CurvH0:Area0                -0.0019      0.000     -5.947      0.000      -0.003      -0.001\n",
+      "CurvK0:CurvH0:PialArea0            -0.0102      0.000    -38.435      0.000      -0.011      -0.010\n",
+      "CurvK0:CurvH0:LGI0                  0.0025      0.000     17.905      0.000       0.002       0.003\n",
+      "CurvK0:PialCurvK0:PialCurvH0        0.0031      0.000     26.367      0.000       0.003       0.003\n",
+      "CurvK0:PialCurvK0:Area0            -0.0140      0.001    -24.563      0.000      -0.015      -0.013\n",
+      "CurvK0:PialCurvK0:PialArea0         0.0190      0.001     23.023      0.000       0.017       0.021\n",
+      "CurvK0:PialCurvK0:LGI0              0.0002      0.000      0.613      0.540      -0.000       0.001\n",
+      "CurvK0:PialCurvH0:Area0            -0.0727      0.001    -90.603      0.000      -0.074      -0.071\n",
+      "CurvK0:PialCurvH0:PialArea0         0.0208      0.001     18.001      0.000       0.019       0.023\n",
+      "CurvK0:PialCurvH0:LGI0              0.0020      0.000      4.071      0.000       0.001       0.003\n",
+      "CurvK0:Area0:PialArea0              0.0009      0.000      2.971      0.003       0.000       0.002\n",
+      "CurvK0:Area0:LGI0                   0.0196      0.001     30.804      0.000       0.018       0.021\n",
+      "CurvK0:PialArea0:LGI0              -0.0139      0.001    -27.324      0.000      -0.015      -0.013\n",
+      "CurvH0:PialCurvK0:PialCurvH0        0.0022      0.000     12.089      0.000       0.002       0.003\n",
+      "CurvH0:PialCurvK0:Area0             0.0700      0.001     86.041      0.000       0.068       0.072\n",
+      "CurvH0:PialCurvK0:PialArea0        -0.0563      0.001    -50.653      0.000      -0.058      -0.054\n",
+      "CurvH0:PialCurvK0:LGI0             -0.0018      0.000     -3.730      0.000      -0.003      -0.001\n",
+      "CurvH0:PialCurvH0:Area0             0.1860      0.001    194.524      0.000       0.184       0.188\n",
+      "CurvH0:PialCurvH0:PialArea0        -0.1857      0.001   -153.565      0.000      -0.188      -0.183\n",
+      "CurvH0:PialCurvH0:LGI0             -0.0097      0.001    -15.230      0.000      -0.011      -0.008\n",
+      "CurvH0:Area0:PialArea0             -0.0288      0.000    -82.492      0.000      -0.030      -0.028\n",
+      "CurvH0:Area0:LGI0                  -0.0198      0.001    -31.639      0.000      -0.021      -0.019\n",
+      "CurvH0:PialArea0:LGI0               0.0086      0.001     15.554      0.000       0.008       0.010\n",
+      "PialCurvK0:PialCurvH0:Area0        -0.0006      0.000     -5.802      0.000      -0.001      -0.000\n",
+      "PialCurvK0:PialCurvH0:PialArea0     0.0383      0.000    108.995      0.000       0.038       0.039\n",
+      "PialCurvK0:PialCurvH0:LGI0      -6.878e-08   9.79e-05     -0.001      0.999      -0.000       0.000\n",
+      "PialCurvK0:Area0:PialArea0          0.0160      0.001     22.272      0.000       0.015       0.017\n",
+      "PialCurvK0:Area0:LGI0              -0.0048      0.000    -13.681      0.000      -0.005      -0.004\n",
+      "PialCurvK0:PialArea0:LGI0           0.0103      0.001     11.868      0.000       0.009       0.012\n",
+      "PialCurvH0:Area0:PialArea0          0.0863      0.001    140.913      0.000       0.085       0.087\n",
+      "PialCurvH0:Area0:LGI0               0.0190      0.000     52.148      0.000       0.018       0.020\n",
+      "PialCurvH0:PialArea0:LGI0           0.0059      0.001      7.812      0.000       0.004       0.007\n",
+      "Area0:PialArea0:LGI0               -0.0058      0.000    -20.366      0.000      -0.006      -0.005\n",
+      "==============================================================================\n",
+      "Omnibus:                   446101.536   Durbin-Watson:                   0.612\n",
+      "Prob(Omnibus):                  0.000   Jarque-Bera (JB):           754981.664\n",
+      "Skew:                           0.422   Prob(JB):                         0.00\n",
+      "Kurtosis:                       4.171   Cond. No.                         86.8\n",
+      "==============================================================================\n",
+      "\n",
+      "Warnings:\n",
+      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# create the formula \n",
+    "features = '(' + '+'.join(df_z.drop(df_z.filter(like = 'Thick'),axis = 1).columns) + ') ** 3'\n",
+    "formula = 'Thick' + sm + ' ~ ' + features\n",
+    "\n",
+    "# create the model\n",
+    "mod = smf.ols(formula=formula, data=df_z)\n",
+    "\n",
+    "results = mod.fit()\n",
+    "\n",
+    "print(results.summary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# grab the p-value and beta values for each of the terms\n",
+    "pvals = results.pvalues\n",
+    "pvals.name = 'p.value'\n",
+    "\n",
+    "coefs = results.params\n",
+    "coefs.name = 'norm.slope'\n",
+    "\n",
+    "# concatenate the coefficients and p-values, and write to a csv\n",
+    "df_out = pd.concat([coefs,pvals],axis = 1)\n",
+    "df_out.index.name = 'term'\n",
+    "df_out.reset_index(inplace=True)\n",
+    "\n",
+    "outdir = projdir + '/results/' + dataset + '/lm/models'\n",
+    "if not os.path.isdir(outdir):\n",
+    "    os.mkdir(outdir)\n",
+    "\n",
+    "filename = outdir + '/coefs/' + area + '-sm' + sm + '-lm-betas.csv'\n",
+    "df_out.to_csv(filename)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:ctmodel-ml]",
+   "language": "python",
+   "name": "conda-env-ctmodel-ml-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}