In [ ]:
#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
In [ ]:
#Getting the dataset
housing_data = pd.read_csv('/content/drive/MyDrive/11th Grade/Advanced Topics Comp. Sci./Machine Learning/Data/BostonHousing.csv')
In [ ]:
housing_data.head()
Out[ ]:
crim zn indus chas nox rm age dis rad tax ptratio b lstat medv
0 0.00632 18.0 2.31 0 0.538 6.575 65.2 4.0900 1 296 15.3 396.90 4.98 24.0
1 0.02731 0.0 7.07 0 0.469 6.421 78.9 4.9671 2 242 17.8 396.90 9.14 21.6
2 0.02729 0.0 7.07 0 0.469 7.185 61.1 4.9671 2 242 17.8 392.83 4.03 34.7
3 0.03237 0.0 2.18 0 0.458 6.998 45.8 6.0622 3 222 18.7 394.63 2.94 33.4
4 0.06905 0.0 2.18 0 0.458 7.147 54.2 6.0622 3 222 18.7 396.90 5.33 36.2
In [ ]:
#Data preprocessing
# Check for missing values
print("Missing values in each column:")
print(housing_data.isnull().sum())
print()
Missing values in each column:
crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
b          0
lstat      0
medv       0
dtype: int64

In [ ]:
#Assign feature and label vectors
X = housing_data.drop("medv", axis = "columns")
y = housing_data['medv']
In [ ]:
#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
In [ ]:
#Create and fit the model
model = LinearRegression()
model.fit(X_train, y_train)
Out[ ]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [ ]:
#Create predictions
y_pred = model.predict(X_test)
In [ ]:
#Model evaluations
print('Coefficients', model.coef_)
print(X_train.columns)
print('Intercept', model.intercept_)
print('MSE: %.2f' % mean_squared_error(y_test, y_pred))
print('R^2: %.2f' % r2_score(y_test, y_pred))
Coefficients [-1.06372562e-01  4.60727522e-02  2.77431050e-02  4.42728626e+00
 -1.39081525e+01  3.72044846e+00 -2.50053593e-03 -1.24722721e+00
  2.92025350e-01 -1.23478432e-02 -8.80824406e-01  6.53167503e-03
 -5.45683176e-01]
Index(['crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis', 'rad', 'tax',
       'ptratio', 'b', 'lstat'],
      dtype='object')
Intercept 33.97648697262077
MSE: 27.70
R^2: 0.70
In [ ]:
#Create a scatter plot of the predicted values vs. actual values
plt.scatter(y_test, y_pred, alpha = 0.8, marker = "^", color = "red")
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Actual vs Predicited Values")
plt.show()
No description has been provided for this image