In [1]:
import matplotlib
%matplotlib inline
In [2]:
import pandas as pd
import numpy as np
import pylab as pl
In [3]:
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target
In [6]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
In [5]:
svm_clf = SVC()
neighbors_clf = KNeighborsClassifier()
clfs = [
("svc", SVC()),
("KNN", KNeighborsClassifier())
]
for name, clf in clfs:
clf.fit(df[iris.feature_names], df.species)
print (name, clf.predict(iris.data))
print ("*"*80)
In [7]:
clf = RandomForestClassifier()
clf.fit(df[iris.feature_names], df.species)
clf.predict(df[iris.feature_names])
pd.crosstab(df.species, clf.predict(df[iris.feature_names]))
Out[7]:
In [8]:
from sklearn import tree
clf = tree.DecisionTreeClassifier(max_features="auto",
min_samples_leaf=10)
clf.fit(df[iris.feature_names], df.species)
Out[8]:
In [10]:
from sklearn.externals.six import StringIO
with open("irisdotfile.dot", 'w') as f:
f = tree.export_graphviz(clf, out_file=f)
use http://www.webgraphviz.com/ to view the file created in the previous step¶
In [12]:
from IPython.core.display import Image
Image(url="https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgq0SHTZpK8BUtTniRJUARv8nK65U2ypmO29Dn_mg3qq2spfZdUWQjjMdK2saHuvxrDJo8DgX5ijr8bXf2NtEFU0dmNOqXYM_L7skaYaru7DFe9m5mY3e-CgQtwWgcCQjXUijJtDmqbwn0/s1600/drop_shadows_background.png",
width=700)
Out[12]:
In [13]:
from sklearn.datasets import load_boston
boston = load_boston()
In [17]:
df = pd.DataFrame(boston.data)
df.head()
Out[17]:
In [19]:
import re
def camel_to_snake(column_name):
"""
converts a string that is camelCase into snake_case
Example:
print camel_to_snake("javaLovesCamelCase")
> java_loves_camel_case
See Also:
http://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-camel-case
"""
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', column_name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
df = pd.DataFrame(boston.data)
#df.columns = [camel_to_snake(col) for col in boston.feature_names[:-1]]
df.columns = [camel_to_snake(col) for col in boston.feature_names[:]]
# add in prices
df['price'] = boston.target
print (len(df)==506)
df.head()
Out[19]:
In [20]:
from sklearn.linear_model import LinearRegression
features = ['age', 'lstat', 'tax']
lm = LinearRegression()
lm.fit(df[features], df.price)
Out[20]:
In [21]:
# add your actual vs. predicted points
pl.scatter(df.price, lm.predict(df[features]))
# add the line of perfect fit
straight_line = np.arange(0, 60)
pl.plot(straight_line, straight_line)
pl.title("Fitted Values")
Out[21]:
In [ ]:
No comments:
Post a Comment