Pgr-7
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
df_boston = pd.read_csv("Boston housing dataset.csv")
print("Columns in dataset:", df_boston.columns.tolist())
X = df_boston.drop("MEDV", axis=1)
y = df_boston["MEDV"]
imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
rm_index = df_boston.columns.get_loc("RM") - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print("\nLinear Regression (Boston Housing CSV):")
print(f'Mean Squared Error: {mse:.2f}')
print(f'Root Mean Squared Error: {rmse:.2f}')
print(f'R-squared: {r2:.2f}')
plt.scatter(X_test.iloc[:, rm_index], y_test, color='blue', label='Actual')
plt.plot(X_test.iloc[:, rm_index], y_pred, color='red', label='Predicted', alpha=0.6)
plt.title("Linear Regression: Boston Housing (RM vs MEDV)")
plt.xlabel("Average Number of Rooms (RM)")
plt.ylabel("Median House Value (MEDV)")
plt.legend()
plt.show()
df_auto = sns.load_dataset('mpg').dropna()
X_poly = df_auto[['horsepower', 'weight']]
y_poly = df_auto['mpg']
X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(X_poly, y_poly, test_size=0.2,
random_state=42)
poly = PolynomialFeatures(degree=3)
X_train_poly_transformed = poly.fit_transform(X_train_poly)
X_test_poly_transformed = poly.transform(X_test_poly)
poly_model = LinearRegression()
poly_model.fit(X_train_poly_transformed, y_train_poly)
y_pred_poly = poly_model.predict(X_test_poly_transformed)
mse_poly = mean_squared_error(y_test_poly, y_pred_poly)
rmse_poly = np.sqrt(mse_poly)
r2_poly = r2_score(y_test_poly, y_pred_poly)
print("\nPolynomial Regression (Auto MPG):")
print(f'Mean Squared Error: {mse_poly:.2f}')
print(f'Root Mean Squared Error: {rmse_poly:.2f}')
print(f'R-squared: {r2_poly:.2f}')
plt.scatter(X_test_poly['horsepower'], y_test_poly, color='blue', label='Actual')
plt.scatter(X_test_poly['horsepower'], y_pred_poly, color='red', label='Predicted', alpha=0.6)
plt.title("Polynomial Regression: Auto MPG (Horsepower vs MPG)")
plt.xlabel("Horsepower")
plt.ylabel("Miles per Gallon (MPG)")
plt.legend()
plt.show()
Comments
Post a Comment