Scikit-Learn-Homework / sklearn_train_bostonHousing.py
ewdlop's picture
Upload 5 files (#1)
9b316cc verified
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
import numpy as np
boston = load_boston()
x = boston.data
y = boston.target
# print(x.shape)
# print(y.shape)
# print(x)
# plt.figure(figsize=(4,3))
# plt.hist(y)
# plt.xlabel('price($1000s)')
# plt.ylabel('count')
# plt.tight_layout()
# plt.show()
# for index, feature_name in enumerate(boston.feature_names):
# plt.figure(figsize=(4,3))
# plt.scatter(x[:, index], y)
# plt.ylabel('Price', size=15)
# plt.xlabel(feature_name, size=15)
# plt.tight_layout()
# plt.show()
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state = 0)
# linear = LinearRegression()
# linear.fit(x_train, y_train)
# linear_predicted = linear.predict(x_test)
# plt.figure(figsize =(4,3))
# plt.suptitle('Linear Regression')
# plt.scatter(y_test,linear_predicted)
# plt.plot([0,50],[0,50], '--k')
# plt.axis('tight')
# plt.xlabel('True price($1000s)')
# plt.ylabel('Predicted price($1000s)')
# plt.tight_layout()
# plt.show
# print("Linear RMS: %r " % np.sqrt(np.mean((linear_predicted - y_train)) ** 2))
# print("Linear intercept: ")
# print(linear.intercept_)
# print("Linear cofficent: ")
# print(linear.coef_)
# neigh = KNeighborsRegressor(n_neighbors=2)
# neigh.fit(x_train, y_train)
# neigh_predicted = neigh.predict(x_test)
# plt.figure(figsize=(4,3))
# plt.suptitle('KNN')
# plt.scatter(y_test, neigh_predicted)
# plt.plot([0,50],[0,50],'--k')
# plt.axis('tight')
# plt.xlabel('True price ($1000s)')
# plt.ylabel('Predicted price ($1000s)')
# plt.tight_layout()
# plt.show()
# print("KNN RMS: %r " % np.sqrt(np.mean((neigh_predicted - y_test) ** 2)))
# from sklearn import tree
# tree = tree.DecisionTreeRegressor()
# tree.fit(x_train, y_train)
# print('Decision Tree Feature Importance: ')
# print(tree.feature_importances_)
# tree_predicted = tree.predict(x_test)
# plt.figure(figsize=(4, 3))
# plt.suptitle('Decision Tree')
# plt.scatter(y_test, tree_predicted)
# plt.plot([0,50],[0,50], '--k')
# plt.axis('tight')
# plt.xlabel('True price ($1000s)')
# plt.ylabel('Predicted price ($1000s)')
# plt.tight_layout()
# plt.show()
# print("Decision Tree RMS: %r " % np.sqrt(np.mean((tree_predicted - y_test) ** 2)))
# from sklearn.ensemble import RandomForestRegressor
# forest = RandomForestRegressor(max_depth=2, random_state=0)
# forest.fit(x_train, y_train)
# print('Random Forest Feature Importance')
# print(forest.feature_importances_)
# forest_predicted = forest.predict(x_test)
# plt.figure(figsize=(4,3))
# plt.suptitle('Random Forest')
# plt.scatter(y_test, forest_predicted)
# plt.plot([0,50],[0,50],'--k')
# plt.axis('tight')
# plt.xlabel('True price ($1000s)')
# plt.ylabel('Predicted price ($1000s)')
# plt.tight_layout()
# plt.show()
# print("Forest RMS: %r " % np.sqrt(np.mean((forest_predicted - y_test) ** 2)))
from sklearn import datasets
from sklearn.model_selection import cross_val_score
import numpy as np
digits = datasets.load_digits()
x = digits.data
y = digits.target
from sklearn.linear_model import Perceptron
perceptron_model = Perceptron(tol=1e-3, random_state=0)
Perceptron_scores = cross_val_score(perceptron_model, x,y, cv=10)
print('Perceptron avg performance: ')
print(np.mean(perceptron_model))
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh_scores = cross_val_score(neigh, x,y, cv=10)
print('KNN avg performance: ')
print(np.mean(neigh))
#the same for decsion tree and random forest