#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

#Getting the dataset
housing_data = pd.read_csv('/content/drive/MyDrive/11th Grade/Advanced Topics Comp. Sci./Machine Learning/Data/BostonHousing.csv')

housing_data.head()

#Data preprocessing
# Check for missing values
print("Missing values in each column:")
print(housing_data.isnull().sum())
print()

Missing values in each column:
crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
b          0
lstat      0
medv       0
dtype: int64

#Assign feature and label vectors
X = housing_data.drop("medv", axis = "columns")
y = housing_data['medv']

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#Create and fit the model
model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

LinearRegression()

#Create predictions
y_pred = model.predict(X_test)

#Model evaluations
print('Coefficients', model.coef_)
print(X_train.columns)
print('Intercept', model.intercept_)
print('MSE: %.2f' % mean_squared_error(y_test, y_pred))
print('R^2: %.2f' % r2_score(y_test, y_pred))

Coefficients [-1.06372562e-01  4.60727522e-02  2.77431050e-02  4.42728626e+00
 -1.39081525e+01  3.72044846e+00 -2.50053593e-03 -1.24722721e+00
  2.92025350e-01 -1.23478432e-02 -8.80824406e-01  6.53167503e-03
 -5.45683176e-01]
Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat'],
      dtype='object')
Intercept 33.97648697262077
MSE: 27.70
R^2: 0.70

#Create a scatter plot of the predicted values vs. actual values
plt.scatter(y_test, y_pred, alpha = 0.8, marker = "^", color = "red")
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Actual vs Predicited Values")
plt.show()

	crim	zn	indus	nox	rm	age	dis	rad	tax	ptratio	b	lstat	medv
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1	296	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2	242	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2	242	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3	222	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3	222	18.7	396.90	5.33	36.2