Skip to content
Snippets Groups Projects
Commit e70def72 authored by Matthew K Defenderfer's avatar Matthew K Defenderfer
Browse files

Set up basic file structure and added current ipynbs

parent f8749ea8
No related branches found
No related tags found
No related merge requests found
name: ctmodel-ml
channels:
- conda-forge
- anaconda
- defaults
dependencies:
- _tflow_1100_select=0.0.1=gpu
- absl-py=0.4.1=py36_0
- astor=0.7.1=py36_0
- bleach=1.5.0=py36_0
- cudatoolkit=9.0=h13b8566_0
- cudnn=7.1.2=cuda9.0_0
- cupti=9.0.176=0
- gast=0.2.0=py36_0
- grpcio=1.12.1=py36hdbcaa40_0
- hdf5=1.10.2=hba1933b_1
- html5lib=0.9999999=py36_0
- libprotobuf=3.6.0=hdbcaa40_0
- markdown=2.6.11=py36_0
- pyyaml=3.13=py36h14c3975_0
- termcolor=1.1.0=py36_1
- werkzeug=0.14.1=py36_0
- yaml=0.1.7=h96e3832_1
- nibabel=2.3.0=pyh24bf2e0_1
- pydicom=1.1.0=py_0
- appdirs=1.4.3=py36h28b3542_0
- asn1crypto=0.24.0=py36_0
- attrs=18.2.0=py36h28b3542_0
- automat=0.7.0=py36_0
- backcall=0.1.0=py36_0
- blas=1.0=mkl
- ca-certificates=2018.03.07=0
- certifi=2018.10.15=py36_0
- cffi=1.11.5=py36he75722e_1
- constantly=15.1.0=py36h28b3542_0
- cryptography=2.3.1=py36hc365091_0
- cycler=0.10.0=py36_0
- cython=0.28.5=py36hf484d3e_0
- dbus=1.13.2=h714fa37_1
- decorator=4.3.0=py36_0
- entrypoints=0.2.3=py36_2
- expat=2.2.6=he6710b0_0
- fontconfig=2.13.0=h9420a91_0
- freetype=2.9.1=h8a8886c_1
- future=0.16.0=py36_0
- glib=2.56.2=hd408876_0
- gmp=6.1.2=h6c8ec71_1
- gst-plugins-base=1.14.0=hbbd80ab_1
- gstreamer=1.14.0=hb453b48_1
- h5py=2.8.0=py36h989c5e5_3
- hyperlink=18.0.0=py36_0
- icu=58.2=h9c2bf20_1
- idna=2.7=py36_0
- incremental=17.5.0=py36_0
- intel-openmp=2019.0=118
- ipykernel=4.9.0=py36_0
- ipython=6.5.0=py36_0
- ipython_genutils=0.2.0=py36_0
- jedi=0.12.1=py36_0
- jinja2=2.10=py36_0
- jpeg=9b=h024ee3a_2
- jsonschema=2.6.0=py36_0
- jupyter_client=5.2.3=py36_0
- jupyter_core=4.4.0=py36_0
- keras-applications=1.0.4=py36_1
- keras-base=2.2.2=py36_0
- keras-preprocessing=1.0.2=py36_1
- kiwisolver=1.0.1=py36hf484d3e_0
- libedit=3.1.20170329=h6b74fdf_2
- libffi=3.2.1=hd88cf55_4
- libgcc-ng=8.2.0=hdf63c60_1
- libgfortran-ng=7.3.0=hdf63c60_0
- libpng=1.6.34=hb9fc6fc_0
- libsodium=1.0.16=h1bed415_0
- libstdcxx-ng=8.2.0=hdf63c60_1
- libuuid=1.0.3=h1bed415_2
- libxcb=1.13=h1bed415_1
- libxml2=2.9.8=h26e45fe_1
- markupsafe=1.0=py36h14c3975_1
- matplotlib=2.2.3=py36hb69df0a_0
- mistune=0.8.3=py36h14c3975_1
- mkl=2019.0=118
- mkl_fft=1.0.4=py36h4414c95_1
- mkl_random=1.0.1=py36h4414c95_1
- nbconvert=5.3.1=py36_0
- nbformat=4.4.0=py36_0
- ncurses=6.1=hf484d3e_0
- notebook=5.6.0=py36_0
- numpy=1.14.5=py36h1b885b7_4
- numpy-base=1.14.5=py36hdbf6ddf_4
- openssl=1.0.2p=h14c3975_0
- pandas=0.23.4=py36h04863e7_0
- pandoc=2.2.3.2=0
- pandocfilters=1.4.2=py36_1
- parso=0.3.1=py36_0
- patsy=0.5.0=py36_0
- pcre=8.42=h439df22_0
- pexpect=4.6.0=py36_0
- pickleshare=0.7.4=py36_0
- pip=10.0.1=py36_0
- prometheus_client=0.3.1=py36h28b3542_0
- prompt_toolkit=1.0.15=py36_0
- protobuf=3.6.0=py36hf484d3e_0
- ptyprocess=0.6.0=py36_0
- pyasn1=0.4.4=py36h28b3542_0
- pyasn1-modules=0.2.2=py36_0
- pycparser=2.18=py36_1
- pydot=1.2.4=py36_0
- pygments=2.2.0=py36_0
- pyopenssl=18.0.0=py36_0
- pyparsing=2.2.0=py36_1
- pyqt=5.9.2=py36h05f1152_2
- python=3.6.6=hc3d631a_0
- python-dateutil=2.7.3=py36_0
- pytz=2018.5=py36_0
- pyzmq=17.1.2=py36h14c3975_0
- qt=5.9.6=h8703b6f_2
- readline=7.0=h7b6447c_5
- scikit-learn=0.20.0=py36h4989274_1
- scipy=1.1.0=py36hd20e5f9_0
- seaborn=0.9.0=py36_0
- send2trash=1.5.0=py36_0
- service_identity=17.0.0=py36h28b3542_0
- setuptools=39.1.0=py36_0
- simplegeneric=0.8.1=py36_2
- sip=4.19.8=py36hf484d3e_0
- six=1.11.0=py36_1
- sqlite=3.24.0=h84994c4_0
- statsmodels=0.9.0=py36h035aef0_0
- tensorboard=1.10.0=py36hf484d3e_0
- tensorflow=1.10.0=gpu_py36h97a2126_0
- tensorflow-base=1.10.0=gpu_py36h6ecc378_0
- tensorflow-gpu=1.10.0=hf154084_0
- terminado=0.8.1=py36_1
- testpath=0.3.1=py36_0
- tk=8.6.8=hbc83047_0
- tornado=5.1=py36h14c3975_0
- traitlets=4.3.2=py36_0
- twisted=18.7.0=py36h14c3975_1
- wcwidth=0.1.7=py36_0
- webencodings=0.5.1=py36_1
- wheel=0.31.1=py36_0
- xz=5.2.4=h14c3975_4
- zeromq=4.2.5=hf484d3e_1
- zlib=1.2.11=ha838bed_2
- zope=1.0=py36_1
- zope.interface=4.5.0=py36h14c3975_0
- pip:
- blinker==1.4
- chardet==3.0.4
- cloudpickle==0.5.6
- configparser==3.5.0
- dask==0.19.1
- kaggle==1.4.7.1
- keras==2.2.2
- keras-vis==0.4.1
- networkx==2.2rc1
- packaging==17.1
- pillow==5.2.0
- pyhamcrest==1.9.0
- python-slugify==1.2.6
- pywavelets==1.0.0
- requests==2.19.1
- scikit-image==0.14.0
- simpleitk==1.1.0
- toolz==0.9.0
- tqdm==4.26.0
- unidecode==1.0.22
- urllib3==1.22
- xgboost==0.90
This diff is collapsed.
%% Cell type:code id: tags:
``` python
import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn import tree
```
%% Cell type:code id: tags:
``` python
from sklearn.datasets import load_boston
boston =load_boston()
```
%% Cell type:code id: tags:
``` python
print (boston.data.shape)
```
%% Output
(506, 13)
%% Cell type:code id: tags:
``` python
print (boston.data)
```
%% Output
[[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
[2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
[2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
...
[6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
[1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
[4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]
%% Cell type:code id: tags:
``` python
# df_x are your input features
df_x=pd.DataFrame(boston.data,columns=boston.feature_names)
# df_y is your output feature (the one you want to predict)
df_y=pd.DataFrame(boston.target)
```
%% Cell type:code id: tags:
``` python
# prints out bunch os statistics
df_x.describe()
```
%% Output
CRIM ZN INDUS CHAS NOX RM \
count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000
mean 3.613524 11.363636 11.136779 0.069170 0.554695 6.284634
std 8.601545 23.322453 6.860353 0.253994 0.115878 0.702617
min 0.006320 0.000000 0.460000 0.000000 0.385000 3.561000
25% 0.082045 0.000000 5.190000 0.000000 0.449000 5.885500
50% 0.256510 0.000000 9.690000 0.000000 0.538000 6.208500
75% 3.677083 12.500000 18.100000 0.000000 0.624000 6.623500
max 88.976200 100.000000 27.740000 1.000000 0.871000 8.780000
AGE DIS RAD TAX PTRATIO B \
count 506.000000 506.000000 506.000000 506.000000 506.000000 506.000000
mean 68.574901 3.795043 9.549407 408.237154 18.455534 356.674032
std 28.148861 2.105710 8.707259 168.537116 2.164946 91.294864
min 2.900000 1.129600 1.000000 187.000000 12.600000 0.320000
25% 45.025000 2.100175 4.000000 279.000000 17.400000 375.377500
50% 77.500000 3.207450 5.000000 330.000000 19.050000 391.440000
75% 94.075000 5.188425 24.000000 666.000000 20.200000 396.225000
max 100.000000 12.126500 24.000000 711.000000 22.000000 396.900000
LSTAT
count 506.000000
mean 12.653063
std 7.141062
min 1.730000
25% 6.950000
50% 11.360000
75% 16.955000
max 37.970000
%% Cell type:code id: tags:
``` python
# Linear models
#reg=linear_model.LinearRegression()
#reg=linear_model.Ridge(alpha=0.9)
reg=linear_model.Lasso(alpha=0.1)
#reg=linear_model.Lars()
# non linear model
#reg=tree.DecisionTreeRegressor(max_depth=10)
```
%% Cell type:code id: tags:
``` python
# 0.2 refers to 20% of all input data will be used to test the model and the remaining 80% to train the model
# x_train y_train 80% of data that are used for training
# x_test y_test 20% of data that are used for testing your model
x_train,x_test,y_train,y_test = train_test_split(df_x,df_y,test_size=0.2, random_state=4)
```
%% Cell type:code id: tags:
``` python
# this is where your model is being trained
reg.fit(x_train,y_train)
```
%% Output
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
normalize=False, positive=False, precompute=False, random_state=None,
selection='cyclic', tol=0.0001, warm_start=False)
%% Cell type:code id: tags:
``` python
# for linear models you can enable the following comment and check out all the coefficients of yout linear model
#reg.coef_
```
%% Cell type:code id: tags:
``` python
# the 20% of the test data that are used for testing purpose
a=reg.predict(x_test)
```
%% Cell type:code id: tags:
``` python
# ensuring that the output has the correct dimensions
a.shape = (a.size, 1)
a
```
%% Output
array([[11.35769564],
[26.63065774],
[17.07212795],
[14.88066872],
[36.41257162],
[24.9585628 ],
[31.94678858],
[18.71968836],
[18.03333259],
[24.31205853],
[29.37517697],
[28.21096667],
[19.31608322],
[29.77081668],
[22.02956911],
[15.80101535],
[21.40010518],
[11.55888929],
[10.03696639],
[14.21676695],
[ 5.93013576],
[20.67875375],
[20.28901268],
[22.045776 ],
[16.91359062],
[20.01181348],
[14.60702376],
[14.47106462],
[19.94873173],
[16.80168678],
[14.47686108],
[23.94606474],
[35.12159987],
[22.18768349],
[17.36984591],
[19.82812892],
[30.64690326],
[35.83418403],
[24.01776652],
[24.25497155],
[36.65259504],
[31.76859732],
[19.93445419],
[31.94878121],
[30.55626307],
[24.85315173],
[40.25718892],
[17.35967841],
[20.58594129],
[23.65915748],
[33.33055041],
[25.46122166],
[18.25223929],
[27.45084254],
[13.61083007],
[22.98211928],
[24.36098849],
[33.24773708],
[17.77844029],
[34.20142858],
[16.18855141],
[20.46046193],
[31.34454514],
[14.83719596],
[39.59888611],
[28.30333095],
[29.56328051],
[ 9.50186015],
[18.44151744],
[21.6004166 ],
[23.21248274],
[22.98510649],
[23.36802977],
[27.76775011],
[16.2541867 ],
[23.87249049],
[16.730009 ],
[25.39057665],
[14.1120706 ],
[19.35830505],
[22.16813444],
[19.3767848 ],
[28.29171375],
[20.01027133],
[30.10784765],
[23.22987591],
[30.21376239],
[19.89171106],
[21.10474733],
[37.4997143 ],
[31.49817385],
[41.24683197],
[18.88741907],
[37.34892447],
[20.22051301],
[23.61804532],
[23.95396337],
[22.14526984],
[12.45469347],
[21.69669994],
[ 9.7580847 ],
[25.09790195]])
%% Cell type:code id: tags:
``` python
# calculate the mean square error (lower the error better yourmodel is)
np.mean((a-y_test)**2)
```
%% Output
0 26.452889
dtype: float64
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment