Pgr-7

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")


df_boston = pd.read_csv("Boston housing dataset.csv")

print("Columns in dataset:", df_boston.columns.tolist())

X = df_boston.drop("MEDV", axis=1)
y = df_boston["MEDV"]

imputer = SimpleImputer(strategy='mean')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)

rm_index = df_boston.columns.get_loc("RM") - 1 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
y_pred = lr_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("\nLinear Regression (Boston Housing CSV):")
print(f'Mean Squared Error: {mse:.2f}')
print(f'Root Mean Squared Error: {rmse:.2f}')
print(f'R-squared: {r2:.2f}')
plt.scatter(X_test.iloc[:, rm_index], y_test, color='blue', label='Actual')
plt.plot(X_test.iloc[:, rm_index], y_pred, color='red', label='Predicted', alpha=0.6)
plt.title("Linear Regression: Boston Housing (RM vs MEDV)")
plt.xlabel("Average Number of Rooms (RM)")
plt.ylabel("Median House Value (MEDV)")
plt.legend()
plt.show()

df_auto = sns.load_dataset('mpg').dropna()

X_poly = df_auto[['horsepower', 'weight']]
y_poly = df_auto['mpg']

X_train_poly, X_test_poly, y_train_poly, y_test_poly = train_test_split(X_poly, y_poly, test_size=0.2,
random_state=42)


poly = PolynomialFeatures(degree=3)
X_train_poly_transformed = poly.fit_transform(X_train_poly)
X_test_poly_transformed = poly.transform(X_test_poly)

poly_model = LinearRegression()
poly_model.fit(X_train_poly_transformed, y_train_poly)

y_pred_poly = poly_model.predict(X_test_poly_transformed)
mse_poly = mean_squared_error(y_test_poly, y_pred_poly)
rmse_poly = np.sqrt(mse_poly)
r2_poly = r2_score(y_test_poly, y_pred_poly)

print("\nPolynomial Regression (Auto MPG):")
print(f'Mean Squared Error: {mse_poly:.2f}')
print(f'Root Mean Squared Error: {rmse_poly:.2f}')
print(f'R-squared: {r2_poly:.2f}')


plt.scatter(X_test_poly['horsepower'], y_test_poly, color='blue', label='Actual')
plt.scatter(X_test_poly['horsepower'], y_pred_poly, color='red', label='Predicted', alpha=0.6)
plt.title("Polynomial Regression: Auto MPG (Horsepower vs MPG)")
plt.xlabel("Horsepower")
plt.ylabel("Miles per Gallon (MPG)")
plt.legend()
plt.show()

Comments

Popular posts from this blog

Pgr-1

Pgr-4

Pgr-5