Posts

Showing posts from April, 2025

Pgr-5

import numpy as np import matplotlib.pyplot as plt from collections import Counter data = np.random.rand(100) labels = ["Class1" if x <= 0.5 else "Class2" for x in data[:50]] def euclidean_distance(x1, x2):  return abs(x1 - x2) def knn_classifier(train_data, train_labels, test_point, k):  distances = [(euclidean_distance(test_point, train_data[i]), train_labels[i]) for i in   range(len(train_data))]  distances.sort(key=lambda x: x[0])  k_nearest_neighbors = distances[:k]  k_nearest_labels = [label for _, label in k_nearest_neighbors]  return Counter(k_nearest_labels).most_common(1)[0][0]  train_data = data[:50]  train_labels = labels  test_data = data[50:]  k_values = [1, 2, 3, 4, 5, 20, 30]  print("--- k-Nearest Neighbors Classification ---")  print("Training dataset: First 50 points labeled based on the rule (x <= 0.5 -> Class1, x > 0.5 ->  Class2)")  print("Testing dataset: Remaining 50 poi...

Pgr-4

 import pandas as pd def find_s_algorithm(file_path):  data = pd.read_csv(file_path)  print("Training data:")  print(data)  attributes = data.columns[:-1]   class_label = data.columns[-1]   for index, row in data.iterrows():  if row[class_label] == 'Yes':   hypothesis = list(row[attributes])    break  for index, row in data.iterrows():  if row[class_label] == 'Yes':   for i, value in enumerate(row[attributes]):    if hypothesis[i] != value:      hypothesis[i] = '?' # Generalize hypothesis return hypothesis file_path = 'training_data.csv' # Provide the file path hypothesis = find_s_algorithm(file_path) print("\nThe final hypothesis is:", hypothesis)

Pgr-3

import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA iris = load_iris() df = pd.DataFrame(data=iris.data, columns=iris.feature_names) df['target'] = iris.target df['target_names'] = pd.Categorical.from_codes(iris.target, iris.target_names) X = df[iris.feature_names] X_scaled = StandardScaler().fit_transform(X) pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled) plt.figure(figsize=(10, 8)) for target, target_name in zip(sorted(df['target'].unique()), sorted(df['target_names'].unique())):  mask = df['target'] == target  plt.scatter(X_pca[mask, 0], X_pca[mask, 1], label=target_name, alpha=0.8) plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)') plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)') plt.title('PCA of Iris Dataset') plt.legend() ...

Pgr-2

import pandas as pd importseaborn assns import matplotlib.pyplot as plt from sklearn.datasetsimport fetch_california_housing california_housing = fetch_california_housing() df = pd.DataFrame(california_housing.data, columns=california_housing.feature_names) df['target'] = california_housing.target correlation_matrix = df.corr() plt.figure(figsize=(12, 8)) sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f', cbar=True) plt.title("Correlation Matrix Heatmap") plt.show() sns.pairplot(df, height=2.5, plot_kws={'alpha':0.7}) plt.suptitle("Pair Plot of California Housing Dataset", y=1.02) plt.show()

Pgr-1

import pandas as pd from sklearn.datasets import fetch_california_housing california_housing = fetch_california_housing() print(california_housing.DESCR) df = pd.DataFrame(california_housing.data, columns=california_housing.feature_names) df['target'] = california_housing.target print("First 5 rows of the dataset:") print(df.head()) # Display the first 5 rows def plot_histograms(df):  df.hist(bins=30, figsize=(12, 10))  plt.suptitle("Histograms of Numerical Features", fontsize=16)  plt.show() def plot_boxplots(df):  plt.figure(figsize=(12, 10))  for i, feature in enumerate(df.columns):   plt.subplot(3, 4, i+1) # Adjust the number of rows and columns accordingly   sns.boxplot(df[feature])   plt.title(f'Box Plot of {feature}')  plt.tight_layout()  plt.show() import matplotlib.pyplot as plt plot_histograms(df) plot_boxplots(df) print("Outliers Detection:") outliers_summary = {} numerical_features = df.select_dtypes(include=['float64', ...