import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

house_data = pd.read_csv("data.csv")
house_data.head()

X = pd.DataFrame({'intercept' : np.ones(house_data.shape[0]), 'bedrooms' : house_data['bedrooms'], 
                  'bathrooms' : house_data['bathrooms']})
X = X.to_numpy()

def gradient_descent(y, X, beta, n, alpha, iterations):
    for i in range(0, 100):
        gradient =  X.T @ (X @ beta - y)/ n
        beta = beta - alpha * gradient
        
    return beta

y = house_data['price']
y = y.to_numpy()
beta = np.ones(X.shape[1])
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)
n = X_train.shape[0]

beta = gradient_descent(y_train, X_train, beta, n, 0.1, 100)

print(beta)

[ -1639.4363617   22469.94994039 220044.25999952]

def mean_squared_error(y, X, beta, n):
    mse = ((X @ beta - y).T @ (X @ beta - y)) / n
    return mse

def gradient_descent(y, X, beta, n, alpha, iterations, tolerance=1e-5, check_every=100):
    prev_mse = mean_squared_error(y, X, beta, n)
    
    for i in range(iterations):
        gradient = (1 / n) * X.T @ (X @ beta - y)
        beta = beta - alpha * gradient

        # Check MSE change every check_every steps
        if i % check_every == 0:
            current_mse = mean_squared_error(y, X, beta, n)
            improvement = abs(prev_mse - current_mse)

            print(f"Iteration {i}: MSE = {current_mse:.4f}, MSE decreased by = {improvement:.6f}")

            if improvement < tolerance:
                print(f"Early stopping at iteration {i} — MSE improvement below threshold ({tolerance})")
                return beta

            prev_mse = current_mse  # Update for next check

    print(f"Completed full {iterations} iterations. Final MSE: {mean_squared_error(y, X, beta, n):.4f}")
    return beta
beta = gradient_descent(y,X,beta, n, 0.01, 100000)

Iteration 0: MSE = 423321595661.6596, MSE decreased by = 1573807.330444
Iteration 100: MSE = 423314630111.8835, MSE decreased by = 6965549.776062
Iteration 200: MSE = 423312349750.8951, MSE decreased by = 2280360.988403
Iteration 300: MSE = 423311256831.1976, MSE decreased by = 1092919.697571
Iteration 400: MSE = 423310651966.1788, MSE decreased by = 604865.018738
Iteration 500: MSE = 423310263571.7040, MSE decreased by = 388394.474854
Iteration 600: MSE = 423309983438.6240, MSE decreased by = 280133.080017
Iteration 700: MSE = 423309766207.7997, MSE decreased by = 217230.824219
Iteration 800: MSE = 423309591078.8568, MSE decreased by = 175128.942993
Iteration 900: MSE = 423309447161.5002, MSE decreased by = 143917.356506
Iteration 1000: MSE = 423309327819.0140, MSE decreased by = 119342.486267
Iteration 1100: MSE = 423309228440.5083, MSE decreased by = 99378.505676
Iteration 1200: MSE = 423309145527.7907, MSE decreased by = 82912.717590
Iteration 1300: MSE = 423309076292.2950, MSE decreased by = 69235.495667
Iteration 1400: MSE = 423309018454.8802, MSE decreased by = 57837.414795
Iteration 1500: MSE = 423308970130.3876, MSE decreased by = 48324.492676
Iteration 1600: MSE = 423308929750.8499, MSE decreased by = 40379.537659
Iteration 1700: MSE = 423308896008.7933, MSE decreased by = 33742.056580
Iteration 1800: MSE = 423308867812.6891, MSE decreased by = 28196.104187
Iteration 1900: MSE = 423308844250.8060, MSE decreased by = 23561.883179
Iteration 2000: MSE = 423308824561.4093, MSE decreased by = 19689.396667
Iteration 2100: MSE = 423308808108.0151, MSE decreased by = 16453.394226
Iteration 2200: MSE = 423308794358.7682, MSE decreased by = 13749.246887
Iteration 2300: MSE = 423308782869.2329, MSE decreased by = 11489.535278
Iteration 2400: MSE = 423308773268.0205, MSE decreased by = 9601.212402
Iteration 2500: MSE = 423308765244.7816, MSE decreased by = 8023.238892
Iteration 2600: MSE = 423308758540.1738, MSE decreased by = 6704.607849
Iteration 2700: MSE = 423308752937.4780, MSE decreased by = 5602.695801
Iteration 2800: MSE = 423308748255.5933, MSE decreased by = 4681.884705
Iteration 2900: MSE = 423308744343.1832, MSE decreased by = 3912.410095
Iteration 3000: MSE = 423308741073.7833, MSE decreased by = 3269.399902
Iteration 3100: MSE = 423308738341.7140, MSE decreased by = 2732.069275
Iteration 3200: MSE = 423308736058.6641, MSE decreased by = 2283.049866
Iteration 3300: MSE = 423308734150.8368, MSE decreased by = 1907.827332
Iteration 3400: MSE = 423308732556.5638, MSE decreased by = 1594.273010
Iteration 3500: MSE = 423308731224.3117, MSE decreased by = 1332.252075
Iteration 3600: MSE = 423308730111.0172, MSE decreased by = 1113.294495
Iteration 3700: MSE = 423308729180.6943, MSE decreased by = 930.322937
Iteration 3800: MSE = 423308728403.2712, MSE decreased by = 777.423035
Iteration 3900: MSE = 423308727753.6189, MSE decreased by = 649.652344
Iteration 4000: MSE = 423308727210.7377, MSE decreased by = 542.881165
Iteration 4100: MSE = 423308726757.0798, MSE decreased by = 453.657898
Iteration 4200: MSE = 423308726377.9813, MSE decreased by = 379.098511
Iteration 4300: MSE = 423308726061.1883, MSE decreased by = 316.793030
Iteration 4400: MSE = 423308725796.4605, MSE decreased by = 264.727783
Iteration 4500: MSE = 423308725575.2412, MSE decreased by = 221.219299
Iteration 4600: MSE = 423308725390.3795, MSE decreased by = 184.861755
Iteration 4700: MSE = 423308725235.9003, MSE decreased by = 154.479126
Iteration 4800: MSE = 423308725106.8097, MSE decreased by = 129.090637
Iteration 4900: MSE = 423308724998.9356, MSE decreased by = 107.874084
Iteration 5000: MSE = 423308724908.7906, MSE decreased by = 90.145020
Iteration 5100: MSE = 423308724833.4611, MSE decreased by = 75.329468
Iteration 5200: MSE = 423308724770.5121, MSE decreased by = 62.948975
Iteration 5300: MSE = 423308724717.9090, MSE decreased by = 52.603149
Iteration 5400: MSE = 423308724673.9510, MSE decreased by = 43.958008
Iteration 5500: MSE = 423308724637.2178, MSE decreased by = 36.733215
Iteration 5600: MSE = 423308724606.5217, MSE decreased by = 30.696106
Iteration 5700: MSE = 423308724580.8705, MSE decreased by = 25.651123
Iteration 5800: MSE = 423308724559.4351, MSE decreased by = 21.435425
Iteration 5900: MSE = 423308724541.5227, MSE decreased by = 17.912415
Iteration 6000: MSE = 423308724526.5543, MSE decreased by = 14.968445
Iteration 6100: MSE = 423308724514.0458, MSE decreased by = 12.508423
Iteration 6200: MSE = 423308724503.5931, MSE decreased by = 10.452698
Iteration 6300: MSE = 423308724494.8585, MSE decreased by = 8.734680
Iteration 6400: MSE = 423308724487.5594, MSE decreased by = 7.299011
Iteration 6500: MSE = 423308724481.4598, MSE decreased by = 6.099609
Iteration 6600: MSE = 423308724476.3629, MSE decreased by = 5.096985
Iteration 6700: MSE = 423308724472.1033, MSE decreased by = 4.259521
Iteration 6800: MSE = 423308724468.5439, MSE decreased by = 3.559387
Iteration 6900: MSE = 423308724465.5698, MSE decreased by = 2.974182
Iteration 7000: MSE = 423308724463.0842, MSE decreased by = 2.485535
Iteration 7100: MSE = 423308724461.0071, MSE decreased by = 2.077087
Iteration 7200: MSE = 423308724459.2716, MSE decreased by = 1.735535
Iteration 7300: MSE = 423308724457.8210, MSE decreased by = 1.450562
Iteration 7400: MSE = 423308724456.6091, MSE decreased by = 1.211914
Iteration 7500: MSE = 423308724455.5964, MSE decreased by = 1.012756
Iteration 7600: MSE = 423308724454.7500, MSE decreased by = 0.846375
Iteration 7700: MSE = 423308724454.0427, MSE decreased by = 0.707275
Iteration 7800: MSE = 423308724453.4517, MSE decreased by = 0.591003
Iteration 7900: MSE = 423308724452.9578, MSE decreased by = 0.493958
Iteration 8000: MSE = 423308724452.5451, MSE decreased by = 0.412659
Iteration 8100: MSE = 423308724452.2001, MSE decreased by = 0.344971
Iteration 8200: MSE = 423308724451.9119, MSE decreased by = 0.288208
Iteration 8300: MSE = 423308724451.6711, MSE decreased by = 0.240784
Iteration 8400: MSE = 423308724451.4699, MSE decreased by = 0.201233
Iteration 8500: MSE = 423308724451.3018, MSE decreased by = 0.168152
Iteration 8600: MSE = 423308724451.1613, MSE decreased by = 0.140503
Iteration 8700: MSE = 423308724451.0437, MSE decreased by = 0.117554
Iteration 8800: MSE = 423308724450.9455, MSE decreased by = 0.098206
Iteration 8900: MSE = 423308724450.8636, MSE decreased by = 0.081909
Iteration 9000: MSE = 423308724450.7951, MSE decreased by = 0.068481
Iteration 9100: MSE = 423308724450.7377, MSE decreased by = 0.057373
Iteration 9200: MSE = 423308724450.6898, MSE decreased by = 0.047913
Iteration 9300: MSE = 423308724450.6500, MSE decreased by = 0.039856
Iteration 9400: MSE = 423308724450.6165, MSE decreased by = 0.033508
Iteration 9500: MSE = 423308724450.5886, MSE decreased by = 0.027893
Iteration 9600: MSE = 423308724450.5653, MSE decreased by = 0.023254
Iteration 9700: MSE = 423308724450.5457, MSE decreased by = 0.019653
Iteration 9800: MSE = 423308724450.5294, MSE decreased by = 0.016235
Iteration 9900: MSE = 423308724450.5159, MSE decreased by = 0.013550
Iteration 10000: MSE = 423308724450.5045, MSE decreased by = 0.011414
Iteration 10100: MSE = 423308724450.4949, MSE decreased by = 0.009583
Iteration 10200: MSE = 423308724450.4870, MSE decreased by = 0.007874
Iteration 10300: MSE = 423308724450.4803, MSE decreased by = 0.006653
Iteration 10400: MSE = 423308724450.4748, MSE decreased by = 0.005554
Iteration 10500: MSE = 423308724450.4702, MSE decreased by = 0.004639
Iteration 10600: MSE = 423308724450.4662, MSE decreased by = 0.003906
Iteration 10700: MSE = 423308724450.4630, MSE decreased by = 0.003235
Iteration 10800: MSE = 423308724450.4603, MSE decreased by = 0.002686
Iteration 10900: MSE = 423308724450.4581, MSE decreased by = 0.002258
Iteration 11000: MSE = 423308724450.4562, MSE decreased by = 0.001892
Iteration 11100: MSE = 423308724450.4546, MSE decreased by = 0.001587
Iteration 11200: MSE = 423308724450.4532, MSE decreased by = 0.001343
Iteration 11300: MSE = 423308724450.4523, MSE decreased by = 0.000916
Iteration 11400: MSE = 423308724450.4514, MSE decreased by = 0.000977
Iteration 11500: MSE = 423308724450.4504, MSE decreased by = 0.000916
Iteration 11600: MSE = 423308724450.4499, MSE decreased by = 0.000549
Iteration 11700: MSE = 423308724450.4494, MSE decreased by = 0.000488
Iteration 11800: MSE = 423308724450.4490, MSE decreased by = 0.000427
Iteration 11900: MSE = 423308724450.4484, MSE decreased by = 0.000549
Iteration 12000: MSE = 423308724450.4482, MSE decreased by = 0.000244
Iteration 12100: MSE = 423308724450.4479, MSE decreased by = 0.000244
Iteration 12200: MSE = 423308724450.4477, MSE decreased by = 0.000244
Iteration 12300: MSE = 423308724450.4476, MSE decreased by = 0.000122
Iteration 12400: MSE = 423308724450.4473, MSE decreased by = 0.000244
Iteration 12500: MSE = 423308724450.4473, MSE decreased by = 0.000061
Iteration 12600: MSE = 423308724450.4471, MSE decreased by = 0.000183
Iteration 12700: MSE = 423308724450.4471, MSE decreased by = 0.000000
Early stopping at iteration 12700 — MSE improvement below threshold (1e-05)

print(beta)
predictions = X_test @ beta

[  4391.97937421  19231.34338782 223141.57667692]

print(predictions)

[619939.95122996 602617.24383637 787296.13373765 ... 658402.6380056
 471815.11211009 658402.6380056 ]

mean_absolute_error(y_test, predictions)

230680.8267999549

	date	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	view	condition	sqft_above	sqft_basement	yr_built	yr_renovated	street	city	statezip	country
0	2014-05-02 00:00:00	313000.0	3.0	1.50	1340	7912	1.5	0	3	1340	0	1955	2005	18810 Densmore Ave N	Shoreline	WA 98133	USA
1	2014-05-02 00:00:00	2384000.0	5.0	2.50	3650	9050	2.0	4	5	3370	280	1921	0	709 W Blaine St	Seattle	WA 98119	USA
2	2014-05-02 00:00:00	342000.0	3.0	2.00	1930	11947	1.0	0	4	1930	0	1966	0	26206-26214 143rd Ave SE	Kent	WA 98042	USA
3	2014-05-02 00:00:00	420000.0	3.0	2.25	2000	8030	1.0	0	4	1000	1000	1963	0	857 170th Pl NE	Bellevue	WA 98008	USA
4	2014-05-02 00:00:00	550000.0	4.0	2.50	1940	10500	1.0	0	4	1140	800	1976	1992	9105 170th Ave NE	Redmond	WA 98052	USA