In [ ]:
#Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
In [ ]:
#Getting the dataset
energy_data = pd.read_excel('/content/drive/MyDrive/11th Grade/Advanced Topics Comp. Sci./Machine Learning/Data/EnergyEfficiency.xlsx')
In [ ]:
energy_data.head()
Out[ ]:
X1 X2 X3 X4 X5 X6 X7 X8 Y1 Y2
0 0.98 514.5 294.0 110.25 7.0 2 0.0 0 15.55 21.33
1 0.98 514.5 294.0 110.25 7.0 3 0.0 0 15.55 21.33
2 0.98 514.5 294.0 110.25 7.0 4 0.0 0 15.55 21.33
3 0.98 514.5 294.0 110.25 7.0 5 0.0 0 15.55 21.33
4 0.90 563.5 318.5 122.50 7.0 2 0.0 0 20.84 28.28
In [ ]:
#Data preprocessing
# Check for missing values
print("Missing values in each column:")
print(energy_data.isnull().sum())
print()
Missing values in each column:
X1    0
X2    0
X3    0
X4    0
X5    0
X6    0
X7    0
X8    0
Y1    0
Y2    0
dtype: int64

In [ ]:
#Assign feature and label vectors
X1 = energy_data.drop("Y1", axis = "columns")
y1 = energy_data['Y1']

X2 = energy_data.drop("Y2", axis = "columns")
y2 = energy_data['Y2']
In [ ]:
#Split data
X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size=0.2)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2)
In [ ]:
#Create and fit the model
model_Y1 = LinearRegression()
model_Y1.fit(X1_train, y1_train)

model_Y2 = LinearRegression()
model_Y2.fit(X2_train, y2_train)
Out[ ]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [ ]:
# Create predictions
Y1_pred = model_Y1.predict(X1_test)
Y2_pred = model_Y2.predict(X2_test)
In [ ]:
#Model evaluations
print(X1_train.columns)
print('Intercept', model_Y1.intercept_)
print('MSE: %.2f' % mean_squared_error(y1_test, Y1_pred))
print('R^2: %.2f' % r2_score(y1_test, Y1_pred))

print(X2_train.columns)
print('Intercept', model_Y2.intercept_)
print('MSE: %.2f' % mean_squared_error(y2_test, Y2_pred))
print('R^2: %.2f' % r2_score(y2_test, Y2_pred))
Index(['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'Y2'], dtype='object')
Intercept 12.201925582622632
MSE: 3.47
R^2: 0.96
Index(['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'Y1'], dtype='object')
Intercept 20.81564233848573
MSE: 3.43
R^2: 0.96
In [ ]:
#Create a scatter plot of the predicted values vs. actual values
plt.scatter(y1_test, Y1_pred, alpha = 0.8, marker = "^", color = "red")
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Actual vs Predicited Values for Heating Loads")
plt.show()

plt.scatter(y2_test, Y2_pred, alpha = 0.8, marker = "^", color = "blue")
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Actual vs Predicited Values for Cooling Loads")
plt.show()
No description has been provided for this image
No description has been provided for this image