Scikit-Learn-Homework / sklearn_train_bostonHousing.py

Upload 5 files (#1)

9b316cc verified 3 months ago

3.75 kB

	from sklearn.datasets import load_boston
	from sklearn.model_selection import train_test_split
	from sklearn.linear_model import LinearRegression
	from sklearn.neighbors import KNeighborsRegressor
	import matplotlib.pyplot as plt
	import numpy as np
	boston = load_boston()
	x = boston.data
	y = boston.target
	# print(x.shape)
	# print(y.shape)
	# print(x)
	# plt.figure(figsize=(4,3))
	# plt.hist(y)
	# plt.xlabel('price($1000s)')
	# plt.ylabel('count')
	# plt.tight_layout()
	# plt.show()
	# for index, feature_name in enumerate(boston.feature_names):
	# plt.figure(figsize=(4,3))
	# plt.scatter(x[:, index], y)
	# plt.ylabel('Price', size=15)
	# plt.xlabel(feature_name, size=15)
	# plt.tight_layout()
	# plt.show()

	x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state = 0)
	# linear = LinearRegression()
	# linear.fit(x_train, y_train)
	# linear_predicted = linear.predict(x_test)
	# plt.figure(figsize =(4,3))
	# plt.suptitle('Linear Regression')
	# plt.scatter(y_test,linear_predicted)
	# plt.plot([0,50],[0,50], '--k')
	# plt.axis('tight')
	# plt.xlabel('True price($1000s)')
	# plt.ylabel('Predicted price($1000s)')
	# plt.tight_layout()
	# plt.show
	# print("Linear RMS: %r " % np.sqrt(np.mean((linear_predicted - y_train)) ** 2))
	# print("Linear intercept: ")
	# print(linear.intercept_)
	# print("Linear cofficent: ")
	# print(linear.coef_)

	# neigh = KNeighborsRegressor(n_neighbors=2)
	# neigh.fit(x_train, y_train)
	# neigh_predicted = neigh.predict(x_test)
	# plt.figure(figsize=(4,3))
	# plt.suptitle('KNN')
	# plt.scatter(y_test, neigh_predicted)
	# plt.plot([0,50],[0,50],'--k')
	# plt.axis('tight')
	# plt.xlabel('True price ($1000s)')
	# plt.ylabel('Predicted price ($1000s)')
	# plt.tight_layout()
	# plt.show()
	# print("KNN RMS: %r " % np.sqrt(np.mean((neigh_predicted - y_test) ** 2)))

	# from sklearn import tree
	# tree = tree.DecisionTreeRegressor()
	# tree.fit(x_train, y_train)
	# print('Decision Tree Feature Importance: ')
	# print(tree.feature_importances_)
	# tree_predicted = tree.predict(x_test)
	# plt.figure(figsize=(4, 3))
	# plt.suptitle('Decision Tree')
	# plt.scatter(y_test, tree_predicted)
	# plt.plot([0,50],[0,50], '--k')
	# plt.axis('tight')
	# plt.xlabel('True price ($1000s)')
	# plt.ylabel('Predicted price ($1000s)')
	# plt.tight_layout()
	# plt.show()
	# print("Decision Tree RMS: %r " % np.sqrt(np.mean((tree_predicted - y_test) ** 2)))

	# from sklearn.ensemble import RandomForestRegressor
	# forest = RandomForestRegressor(max_depth=2, random_state=0)
	# forest.fit(x_train, y_train)
	# print('Random Forest Feature Importance')
	# print(forest.feature_importances_)
	# forest_predicted = forest.predict(x_test)
	# plt.figure(figsize=(4,3))
	# plt.suptitle('Random Forest')
	# plt.scatter(y_test, forest_predicted)
	# plt.plot([0,50],[0,50],'--k')
	# plt.axis('tight')
	# plt.xlabel('True price ($1000s)')
	# plt.ylabel('Predicted price ($1000s)')
	# plt.tight_layout()
	# plt.show()
	# print("Forest RMS: %r " % np.sqrt(np.mean((forest_predicted - y_test) ** 2)))


	from sklearn import datasets
	from sklearn.model_selection import cross_val_score
	import numpy as np
	digits = datasets.load_digits()
	x = digits.data
	y = digits.target

	from sklearn.linear_model import Perceptron
	perceptron_model = Perceptron(tol=1e-3, random_state=0)
	Perceptron_scores = cross_val_score(perceptron_model, x,y, cv=10)
	print('Perceptron avg performance: ')
	print(np.mean(perceptron_model))

	from sklearn.neighbors import KNeighborsClassifier
	neigh = KNeighborsClassifier(n_neighbors=3)
	neigh_scores = cross_val_score(neigh, x,y, cv=10)
	print('KNN avg performance: ')
	print(np.mean(neigh))

	#the same for decsion tree and random forest