AI Cho Mọi Người

AI Cho Mọi Người

Vectorization

 

 

 

Trong bài này, chúng ta sẽ cài đặt bài toán dự đoán giá nhà dùng linear regression bằng hai cách: cài đặt dùng chỉ mục index và dùng những hàm trong numpy đã hỗ trợ kỹ thuật vectorization. Sau đó, chúng ta sẽ quan sát tốc độ của hai loại cài đặt.

 

1. Dự đoán giá nhà theo diện tích

Chúng ta sẽ sử dụng dữ liệu dự đoán giá nhà theo diện tích nhà, được hiển thị ở bảng sau

3.875.38
2.293.45
5.176.94
4.937.1
45.47
3.975.31
6.778.89
2.484.08
2.854.59
6.438.52
6.998.63
2.744.29
2.073.34
3.645.48
5.667.4
1.853.3
4.736.19
4.556.46
6.48.65
2.143.35

 

Code để đọc và xử lý data

import numpy as np
from numpy import genfromtxt
import matplotlib.pyplot as plt

data = genfromtxt('my_house_price_prediction.csv', delimiter=',')

m = data.shape[0]
n = data.shape[1]

x = data[:,0]
y = data[:,1]
X = np.c_[np.ones((m, 1)), x]

plt.scatter(x, y)
plt.xlabel('Diện tích nhà (x 100$m^2$)')
plt.ylabel('Giá nhà (chục lượng vàng)')
plt.show()

 

Cài theo cách dùng chỉ mục index

# No Vectorization - batch gradien descent

theta = np.random.randn(n)

# loss function
def loss_function(theta = theta, x=X, y=y, m=m, n=n):
    loss = 0
    for i in range(m):
        hypo_i = 0
        for j in range(n):
            hypo_i += theta[j]*X[i,j]
        loss_i = (hypo_i - y[i])**2
        loss += loss_i
    loss = (1/m)*loss
    
    return loss


# training
learning_rate = 0.01
theta = np.ones(n)
loss_list = []

epoches = 50
for itr in range(epoches):    
    dev_list = []
    for k in range(n):
        dev_sum = 0        
        
        for i in range(m):
            ## Feed forward
            hypo_i = 0
            for j in range(n):
                hypo_i += theta[j]*X[i,j]
            
            # derivative
            dev_i = (hypo_i - y[i])*X[i,k]
            dev_sum += dev_i
        dev_sum = (2/m)*dev_sum

        dev_list.append(dev_sum)
    
    theta = theta - learning_rate*np.array(dev_list)    
    loss_val = loss_function(theta)    
    loss_list.append(loss_val)
    

plt.plot(np.arange(0, epoches),loss_list)
plt.xlabel('epoch')
plt.ylabel('Giá trị loss')

 

Giá trị loss qua các vòng lặp

 

 

Cài theo phương pháp vectorization

# Vectorization

# Initialize theta
theta = np.random.randn(n)

def cost(theta, X=X, y=y, m=m):
    cost = np.dot(np.dot(X,theta) - y, np.dot(X,theta) - y)
    cost = (1/m)*cost
    return cost

# learning rate
learning_rate = 0.01
cost_list = []

epoches = 500
for i in range(epoches):
    output = np.dot(X,theta)
    loss_grd = output - y
    gradients = (2/m)*np.dot(np.transpose(X), loss_grd)
    
    theta = theta - learning_rate*gradients      
    cost_val = cost(theta)
    cost_list.append(cost_val)

plt.plot(np.arange(0, epoches),cost_list)
plt.xlabel('epoch')
plt.ylabel('Giá trị loss')

 

Giá trị loss qua các vòng lặp

 

2. Dự đoán giá nhà Boston

Dữ liệu nhà Boston được mô tả ở bảng sau

crimzninduschasnoxrmagedisradtaxptratioblacklstatmedv
0.00632182.3100.5386.57565.24.09129615.3396.94.9824
0.0273107.0700.4696.42178.94.9671224217.8396.99.1421.6
0.0323702.1800.4586.99845.86.0622322218.7394.632.9433.4
0.0690502.1800.4587.14754.26.0622322218.7396.95.3336.2
0.0882912.57.8700.5246.01266.65.5605531115.2395.612.4322.9
0.2248912.57.8700.5246.37794.36.3467531115.2392.5220.4515
0.1174712.57.8700.5246.00982.96.2267531115.2396.913.2718.9
0.0937812.57.8700.5245.889395.4509531115.2390.515.7121.7
0.6297608.1400.5385.94961.84.7075430721396.98.2620.4
0.6379608.1400.5386.09684.54.4619430721380.0210.2618.2
0.6273908.1400.5385.83456.54.4986430721395.628.4719.9
1.0539308.1400.5385.93529.34.4986430721386.856.5823.1
0.8027108.1400.5385.45636.63.7965430721288.9911.6920.2
1.2517908.1400.5385.5798.13.7979430721376.5721.0213.6
0.8520408.1400.5385.96589.24.0123430721392.5313.8319.6
1.2324708.1400.5386.14291.73.9769430721396.918.7215.2
0.9884308.1400.5385.8131004.0952430721394.5419.8814.5
0.9557708.1400.5386.04788.84.4534430721306.3817.2814.8
1.1308108.1400.5385.71394.14.233430721360.1722.612.7
1.3547208.1400.5386.0721004.175430721376.7313.0414.5
1.6128208.1400.5386.09696.93.7598430721248.3120.3413.5
0.1750505.9600.4995.96630.23.8473527919.2393.4310.1324.7
0.02763752.9500.4286.59521.85.4011325218.3395.634.3230.8
0.03359752.9500.4287.02415.85.4011325218.3395.621.9834.9
0.141506.9100.4486.1696.65.7209323317.9383.375.8125.3
0.1593606.9100.4486.2116.55.7209323317.9394.467.4424.7
0.1226906.9100.4486.069405.7209323317.9389.399.5521.2
0.1714206.9100.4485.68233.85.1004323317.9396.910.2119.3
0.1883606.9100.4485.78633.35.1004323317.9396.914.1520
0.2292706.9100.4486.0385.55.6894323317.9392.7418.816.6
0.2197706.9100.4485.602626.0877323317.9396.916.219.4
0.08873215.6400.4395.96345.76.8147424316.8395.5613.4519.7
0.04337215.6400.4396.115636.8147424316.8393.979.4320.5
0.04981215.6400.4395.99821.46.8147424316.8396.98.4323.4
0.013675400.415.88847.67.3197346921.1396.914.818.9
0.01311901.2200.4037.24921.98.6966522617.9395.934.8135.4
0.02055850.7400.416.38335.79.1876231317.3396.95.7724.7
0.014321001.3200.4116.81640.58.3248525615.1392.93.9531.6
0.15445255.1300.4536.14529.27.8148828419.7390.686.8623.3
0.14932255.1300.4535.74166.27.2254828419.7395.1113.1518.7
0.17171255.1300.4535.96693.46.8185828419.7378.0814.4416
0.1265255.1300.4536.76243.47.9809828419.7395.589.525
0.0195117.51.3800.41617.10459.59.2229321618.6393.248.0533
0.03584803.3700.3986.2917.86.6115433716.1396.94.6723.5
0.04379803.3700.3985.78731.16.6115433716.1396.910.2419.4
0.0578912.56.0700.4095.87821.46.498434518.9396.218.122
0.1355412.56.0700.4095.59436.86.498434518.9396.913.0917.4
0.08826010.8100.4136.4176.65.2873430519.2383.736.7224.2
0.09164010.8100.4136.0657.85.2873430519.2390.915.5222.8
0.19539010.8100.4136.2456.25.2873430519.2377.177.5423.4
0.07896012.8300.4376.27364.2515539818.7394.926.7824.1
0.09512012.8300.4376.286454.5026539818.7383.238.9421.4
0.10153012.8300.4376.27974.54.0522539818.7373.6611.9720
0.08707012.8300.4376.1445.84.0905539818.7386.9610.2720.8
0.04113254.8600.4266.72733.55.4007428119396.95.2928
0.04462254.8600.4266.61970.45.4007428119395.637.2223.9
0.03551254.8600.4266.16746.75.4007428119390.647.5122.9
0.0505904.4900.4496.389484.7794324718.5396.99.6223.9
0.0573504.4900.4496.6356.14.4377324718.5392.36.5326.6
0.0518804.4900.4496.01545.14.4272324718.5395.9912.8622.5
0.0715104.4900.4496.12156.83.7476324718.5395.158.4422.2
0.056603.4100.4897.00786.33.4217227017.8396.95.523.6
0.0530203.4100.4897.07963.13.4145227017.8396.065.728.7
0.0468403.4100.4896.41766.13.0923227017.8392.188.8122.6
0.028752815.0400.4646.21128.93.6659427018.2396.336.2125
0.042942815.0400.4646.24977.33.615427018.2396.910.5920.6
0.1150402.8900.4456.16369.63.4952227618391.8311.3421.4
0.1486608.5600.526.72779.92.7778538420.9394.769.4227.5
0.1143208.5600.526.78171.32.8561538420.9395.587.6726.5
0.2287608.5600.526.40585.42.7147538420.970.810.6318.6
0.2116108.5600.526.13787.42.7147538420.9394.4713.4419.3
0.171208.5600.525.83691.92.211538420.9395.6718.6619.5
0.1311708.5600.526.12785.22.1224538420.9387.6914.0920.4
0.1280208.5600.526.47497.12.4329538420.9395.2412.2719.8
0.2636308.5600.526.22991.22.5451538420.9391.2315.5519.4
0.10084010.0100.5476.71581.62.6775643217.8395.5910.1622.8
0.14231010.0100.5476.25484.22.2565643217.8388.7410.4518.5
0.13158010.0100.5476.17672.52.7301643217.8393.312.0421.2
0.15098010.0100.5476.02182.62.7474643217.8394.5110.319.2
0.13058010.0100.5475.87273.12.4775643217.8338.6315.3720.4
0.14476010.0100.5475.73165.22.7592643217.8391.513.6119.3
0.06899025.6500.5815.8769.72.2577218819.1389.1514.3722
0.07165025.6500.5816.00484.12.1974218819.1377.6714.2720.3
0.09299025.6500.5815.96192.92.0869218819.1378.0917.9320.5
0.15038025.6500.5815.856971.9444218819.1370.3125.4117.3
0.09849025.6500.5815.87995.82.0063218819.1379.3817.5818.8
0.38735025.6500.5815.61395.61.7572218819.1359.2927.2615.7
0.25915021.8900.6245.693961.7883443721.2392.1117.1916.2
0.32543021.8900.6246.43198.81.8125443721.2396.915.3918
1.19294021.8900.6246.32697.72.271443721.2396.912.2619.6
0.32982021.8900.6245.82295.42.4699443721.2388.6915.0318.4
0.97617021.8900.6245.75798.42.346443721.2262.7617.3115.6
0.32264021.8900.6245.94293.51.9669443721.2378.2516.917.4
0.35233021.8900.6246.45498.41.8498443721.2394.0814.5917.1
0.2498021.8900.6245.85798.21.6686443721.2392.0421.3213.3
0.54452021.8900.6246.15197.91.6687443721.2396.918.4617.8
1.62864021.8900.6245.0191001.4394443721.2396.934.4114.4
3.32105019.5810.8715.4031001.3216540314.7396.926.8213.4
2.37934019.5800.8716.131001.4191540314.7172.9127.813.8
2.36862019.5800.8714.92695.71.4608540314.7391.7129.5314.6
2.33099019.5800.8715.18693.81.5296540314.7356.9928.3217.8
2.73397019.5800.8715.59794.91.5257540314.7351.8521.4515.4
1.6566019.5800.8716.12297.31.618540314.7372.814.121.5
2.14918019.5800.8715.70998.51.6232540314.7261.9515.7919.4
1.41385019.5810.8716.129961.7494540314.7321.0215.1217
2.44668019.5800.8715.272941.7364540314.788.6316.1413.1
1.34284019.5800.6056.0661001.7573540314.7353.896.4324.3
1.42502019.5800.8716.511001.7659540314.7364.317.3923.3
1.27346019.5810.6056.2592.61.7984540314.7338.925.527
1.46336019.5800.6057.48990.81.9709540314.7374.431.7350
1.51902019.5810.6058.37593.92.162540314.7388.453.3250
2.24236019.5800.6055.85491.82.422540314.7395.1111.6422.7
2.924019.5800.6056.101932.2834540314.7240.169.8125
2.01019019.5800.6057.92996.22.0459540314.7369.33.750
1.80028019.5800.6055.87779.22.4259540314.7227.6112.1423.8
2.44953019.5800.6056.40295.22.2625540314.7330.0411.3222.3
1.20742019.5800.6055.87594.62.4259540314.7292.2914.4317.4
2.3139019.5800.6055.8897.32.3887540314.7348.1312.0319.1
0.1391404.0500.515.57288.52.5961529616.6396.914.6923.1
0.0917804.0500.516.41684.12.6463529616.6395.59.0423.6
0.0844704.0500.515.85968.72.7019529616.6393.239.6422.6
0.0666404.0500.516.54633.13.1323529616.6390.965.3329.4
0.0702204.0500.516.0247.23.5549529616.6393.2310.1123.2
0.0542504.0500.516.31573.43.3175529616.6395.66.2924.6
0.0664204.0500.516.8674.42.9153529616.6391.276.9229.9
0.057802.4600.4886.9858.42.829319317.8396.95.0437.2
0.0658802.4600.4887.76583.32.741319317.8395.567.5639.8
0.0688802.4600.4886.14462.22.5979319317.8396.99.4536.2
0.0910302.4600.4887.15592.22.7006319317.8394.124.8237.9
0.1000802.4600.4886.56395.62.847319317.8396.95.6832.5
0.0560202.4600.4887.83153.63.1992319317.8392.634.4550
0.07875453.4400.4376.78241.13.7886539815.2393.876.6832
0.0837453.4400.4377.18538.94.5667539815.2396.95.3934.9
0.09068453.4400.4376.95121.56.4798539815.2377.685.137
0.06911453.4400.4376.73930.86.4798539815.2389.714.6930.5
0.08664453.4400.4377.17826.36.4798539815.2390.492.8736.4
0.02187602.9300.4016.89.96.2196126515.6393.375.0331.1
0.01439602.9300.4016.60418.86.2196126515.6376.74.3829.1
0.04666801.5200.4047.10736.67.309232912.6354.318.6130.3
0.01778951.4700.4037.13513.97.6534340217384.34.4532.9
0.0344582.52.0300.4156.16238.46.27234814.7393.777.4324.1
0.0351952.6800.41617.85333.25.118422414.7392.783.8148.5
0.02009952.6800.41618.03431.95.118422414.7390.552.8850
0.13642010.5900.4895.89122.33.9454427718.6396.910.8722.6
0.22969010.5900.4896.32652.54.3549427718.6394.8710.9724.4
0.13587010.5910.4896.06459.14.2392427718.6381.3214.6624.4
0.37578010.5910.4895.40488.63.665427718.6395.2423.9819.3
0.14052010.5900.4896.37532.33.9454427718.6385.819.3828.1
0.28955010.5900.4895.4129.83.5875427718.6348.9329.5523.7
0.0456013.8910.555.888563.1121527616.4392.813.5123.3
0.4077106.210.5076.16491.33.048830717.4395.2421.4621.7
0.6235606.210.5076.87977.73.2721830717.4390.399.9327.5
0.614706.200.5076.61880.83.2721830717.4396.97.630.1
0.3153306.200.5048.26678.32.8944830717.4385.054.1444.8
0.5269306.200.5048.725832.8944830717.43824.6350
0.3821406.200.5048.0486.53.2157830717.4387.383.1337.6
0.4123806.200.5047.16379.93.2157830717.4372.086.3631.6
0.4417806.200.5046.55221.43.3751830717.4380.343.7631.5
0.53706.200.5045.98168.13.6715830717.4378.3511.6524.3
0.5752906.200.5078.33773.33.8384830717.4385.912.4741.7
0.3314706.200.5078.24770.43.6519830717.4378.953.9548.3
0.4479106.210.5076.72666.53.6519830717.4360.28.0529
0.3304506.200.5076.08661.53.6519830717.4376.7510.8824
0.5205806.210.5076.63176.54.148830717.4388.459.5425.1
0.11329304.9300.4286.89754.36.3361630016.6391.2511.3822
0.1029304.9300.4286.35852.97.0355630016.6372.7511.2222.2
0.12757304.9300.4286.3937.87.0355630016.6374.715.1923.7
0.20608225.8600.4315.59376.57.9549733019.1372.4912.517.6
0.33983225.8600.4316.10834.98.0555733019.1390.189.1624.3
0.16439225.8600.4316.43349.17.8265733019.1374.719.5224.5
0.19073225.8600.4316.71817.57.8265733019.1393.746.5626.2
0.1403225.8600.4316.487137.3967733019.1396.285.924.4
0.21409225.8600.4316.4388.97.3967733019.1377.073.5924.8
0.36894225.8600.4318.2598.48.9067733019.1396.93.5442.8
0.54011203.9700.6477.20381.82.1121526413392.89.5933.8
0.53412203.9700.6477.5289.42.1398526413388.377.2643.1
0.52014203.9700.6478.39891.52.2885526413386.865.9148.8
0.82526203.9700.6477.32794.52.0788526413393.4211.2531
0.55007203.9700.6477.20691.61.9301526413387.898.136.5
0.76162203.9700.6475.5662.81.9865526413392.410.4522.8
0.7857203.9700.6477.01484.62.1329526413384.0714.7930.7
0.5405203.9700.5757.4752.62.872526413390.33.1643.5
0.16211206.9600.4646.2416.34.429322318.6396.96.5925.2
0.1146206.9600.4646.53858.73.9175322318.6394.967.7324.4
0.22188206.9610.4647.69151.84.3665322318.6390.776.5835.2
0.05644406.4110.4476.75832.94.0776425417.6396.93.5332.4
0.21038203.3300.44296.81232.24.1007521614.9396.94.8535.1
0.03705203.3300.44296.96837.25.2447521614.9392.234.5935.4
0.06129203.3310.44297.64549.75.2119521614.9377.073.0146
0.01501901.2110.4017.92324.85.885119813.6395.523.1650
0.00906902.9700.47.08820.87.3073128515.3394.727.8532.2
0.01096552.2500.3896.45331.97.3073130015.3394.728.2322
0.01965801.7600.3856.2331.59.0892124118.2341.612.9320.1
0.045952.55.3200.4056.31545.67.3172629316.6396.97.622.3
0.03502804.9500.4116.86127.95.1167424519.2396.93.3328.5
0.03615804.9500.4116.6323.45.1167424519.2396.94.727.9
0.08265013.9200.4376.12718.45.5027428916396.98.5823.9
0.05372013.9200.4376.549515.9604428916392.857.3927.1
0.14103013.9200.4375.79586.32428916396.915.8420.3
0.03537346.0900.4336.5940.45.4917732916.1395.759.522
0.09266346.0900.4336.49518.45.4917732916.1383.618.6726.4
0.1346.0900.4336.98217.75.4917732916.1390.434.8633.1
0.05515332.1800.4727.23641.14.022722218.4393.686.9336.1
0.05479332.1800.4726.61658.13.37722218.4393.368.9328.4
0.07503332.1800.4727.4271.93.0992722218.4396.96.4733.4
0.4929809.900.5446.63582.53.3175430418.4396.94.5422.8
0.349409.900.5445.97276.73.1025430418.4396.249.9720.3
2.6354809.900.5444.97337.82.5194430418.4350.4512.6416.1
0.7904109.900.5446.12252.82.6403430418.4396.95.9822.1
0.2616909.900.5446.02390.42.834430418.4396.311.7219.4
0.2535609.900.5445.70577.73.945430418.4396.4211.516.2
0.3182709.900.5445.91483.23.9986430418.4390.718.3317.8
0.2452209.900.5445.78271.74.0317430418.4396.915.9419.8
0.4020209.900.5446.38267.23.5325430418.4395.2110.3623.1
0.167607.3800.4936.42652.34.5404528719.6396.97.223.8
0.3410907.3800.4936.41540.14.7211528719.6396.96.1225
0.1918607.3800.4936.43114.75.4159528719.6393.685.0824.6
0.2410307.3800.4936.08343.75.4159528719.6396.912.7922.2
0.0661703.2400.465.86825.85.2146443016.9382.449.9719.3
0.0454403.2400.466.14432.25.8736443016.9368.579.0919.8
0.0508305.1900.5156.31638.16.4584522420.2389.715.6822.2
0.0373805.1900.5156.3138.56.4584522420.2389.46.7520.7
0.0342705.1900.5155.86946.35.2311522420.2396.99.819.5
0.0330605.1900.5156.05937.34.8122522420.2396.148.5120.6
0.0549705.1900.5155.98545.44.8122522420.2396.99.7419
0.0615105.1900.5155.96858.54.8122522420.2396.99.2918.7
0.01301351.5200.4427.24149.37.0379128415.5394.745.4932.7
0.0249801.8900.5186.5459.76.2669142215.9389.968.6516.5
0.02543553.7800.4846.69656.45.7321537017.6396.97.1823.9
0.03049553.7800.4846.87428.16.4654537017.6387.974.6131.2
0.0187854.1500.4296.51627.78.5353435117.9392.436.3623.1
0.01501802.0100.4356.63529.78.344428017390.945.9924.5
0.02899401.2500.4296.93934.58.7921133519.7389.855.8926.6
0.07244601.6900.4115.88418.510.7103441118.3392.337.7918.6
8.98296018.110.776.21297.42.12222466620.2377.7317.617.8
3.8497018.110.776.395912.50522466620.2391.3413.2721.7
5.20177018.110.776.12783.42.72272466620.2395.4311.4822.7
4.26131018.100.776.11281.32.50912466620.2390.7412.6722.6
4.54192018.100.776.398882.51822466620.2374.567.7925
3.67822018.100.775.36296.22.10362466620.2380.7910.1920.8
4.55587018.100.7183.56187.91.61322466620.2354.77.1227.5
3.69695018.100.7184.96391.41.75232466620.2316.031421.9
13.5222018.100.6313.8631001.51062466620.2131.4213.3323.1
4.89822018.100.6314.971001.33252466620.2375.523.2650
6.53876018.110.6317.01697.51.20242466620.2392.052.9650
9.2323018.100.6316.2161001.16912466620.2366.159.5350
8.26725018.110.6685.87589.61.12962466620.2347.888.8850
11.1081018.100.6684.9061001.17422466620.2396.934.7713.8
18.4982018.100.6684.1381001.1372466620.2396.937.9713.8
15.288018.100.6716.64993.31.34492466620.2363.0223.2413.9
9.82349018.100.6716.79498.81.3582466620.2396.921.2413.3
9.18702018.100.75.5361001.58042466620.2396.923.611.3
7.99248018.100.75.521001.53312466620.2396.924.5612.3
20.0849018.100.74.36891.21.43952466620.2285.8330.638.8
24.3938018.100.74.6521001.46722466620.2396.928.2810.5
22.5971018.100.7589.51.51842466620.2396.931.997.4
8.15174018.100.75.3998.91.72812466620.2396.920.8511.5
5.29305018.100.76.05182.52.16782466620.2378.3818.7623.2
11.5779018.100.75.036971.772466620.2396.925.689.7
13.3598018.100.6935.88794.71.78212466620.2396.916.3512.7
5.87205018.100.6936.405961.67682466620.2396.919.3712.5
38.3518018.100.6935.4531001.48962466620.2396.930.595
25.0461018.100.6935.9871001.58882466620.2396.926.775.6
14.2362018.100.6936.3431001.57412466620.2396.920.327.2
24.8017018.100.6935.349961.70282466620.2396.919.778.3
11.9511018.100.6595.6081001.28522466620.2332.0912.1327.9
7.40389018.100.5975.61797.91.45472466620.2314.6426.417.2
28.6558018.100.5975.1551001.58942466620.2210.9720.0816.3
45.7461018.100.6934.5191001.65822466620.288.2736.987
18.0846018.100.6796.4341001.83472466620.227.2529.057.2
25.9406018.100.6795.30489.11.64752466620.2127.3626.6410.4
73.5341018.100.6795.9571001.80262466620.216.4520.628.8
11.8123018.100.7186.82476.51.7942466620.248.4522.748.4
8.79212018.100.5845.56570.62.06352466620.23.6517.1611.7
15.8603018.100.6795.89695.41.90962466620.27.6824.398.3
37.6619018.100.6796.20278.71.86292466620.218.8214.5210.9
7.36711018.100.6796.19378.11.93562466620.296.7321.5211
9.33889018.100.6796.3895.61.96822466620.260.7224.089.5
10.0623018.100.5846.83394.32.08822466620.281.3319.6914.1
6.44405018.100.5846.42574.82.20042466620.297.9512.0316.1
5.58107018.100.7136.43687.92.31582466620.2100.1916.2214.3
13.9134018.100.7136.208952.22222466620.2100.6315.1711.7
15.1772018.100.746.1521001.91422466620.29.3226.458.7
9.39063018.100.745.62793.91.81722466620.2396.922.8812.8
22.0511018.100.745.81892.41.86622466620.2391.4522.1110.5
9.72418018.100.746.40697.22.06512466620.2385.9619.5217.1
5.66637018.100.746.2191002.00482466620.2395.6916.5918.4
9.96654018.100.746.4851001.97842466620.2386.7318.8515.4
12.8023018.100.745.85496.61.89562466620.2240.5223.7910.8
10.6718018.100.746.45994.81.98792466620.243.0623.9811.8
9.92485018.100.746.25196.62.1982466620.2388.5216.4412.6
9.32909018.100.7136.18598.72.26162466620.2396.918.1314.1
5.44114018.100.7136.65598.22.35522466620.2355.2917.7315.2
5.09017018.100.7136.29791.82.36822466620.2385.0917.2716.1
8.24809018.100.7137.39399.32.45272466620.2375.8716.7417.8
4.75237018.100.7136.52586.52.43582466620.250.9218.1314.1
8.20058018.100.7135.93680.32.77922466620.23.516.9413.5
7.75223018.100.7136.30183.72.78312466620.2272.2116.2314.9
6.80117018.100.7136.08184.42.71752466620.2396.914.720
4.81213018.100.7136.701902.59752466620.2255.2316.4216.4
3.69311018.100.7136.37688.42.56712466620.2391.4314.6517.7
6.65492018.100.7136.317832.73442466620.2396.913.9919.5
5.82115018.100.7136.51389.92.80162466620.2393.8210.2920.2
7.83932018.100.6556.20965.42.96342466620.2396.913.2221.4
3.1636018.100.6555.75948.23.06652466620.2334.414.1319.9
3.77498018.100.6555.95284.72.87152466620.222.0117.1519
4.42228018.100.5846.00394.52.54032466620.2331.2921.3219.1
15.5757018.100.585.926712.90842466620.2368.7418.1319.1
13.0751018.100.585.71356.72.82372466620.2396.914.7620.1
4.03841018.100.5326.22990.73.09932466620.2395.3312.8719.6
3.56868018.100.586.437752.89652466620.2393.3714.3623.2
8.05579018.100.5845.42795.42.42982466620.2352.5818.1413.8
4.87141018.100.6146.48493.62.30532466620.2396.2118.6816.7
15.0234018.100.6145.30497.32.10072466620.2349.4824.9112
10.233018.100.6146.18596.72.17052466620.2379.718.0314.6
14.3337018.100.6146.229881.95122466620.2383.3213.1121.4
5.82401018.100.5326.24264.73.42422466620.2396.910.7423
5.70818018.100.5326.7574.93.33172466620.2393.077.7423.7
2.81838018.100.5325.76240.34.09832466620.2392.9210.4221.8
2.37857018.100.5835.87141.93.7242466620.2370.7313.3420.6
5.69175018.100.5836.11479.83.54592466620.2392.6814.9819.1
4.83567018.100.5835.90553.23.15232466620.2388.2211.4520.6
0.15086027.7400.6095.45492.71.8209471120.1395.0918.0615.2
0.20746027.7400.6095.093981.8226471120.1318.4329.688.1
0.10574027.7400.6095.98398.81.8681471120.1390.1118.0713.6
0.11132027.7400.6095.98383.52.1099471120.1396.913.3520.1
0.1733109.6900.5855.707542.3817639119.2396.912.0121.8
0.2683809.6900.5855.79470.62.8927639119.2396.914.118.3
0.1778309.6900.5855.56973.52.3999639119.2395.7715.117.5
0.06263011.9300.5736.59369.12.4786127321391.999.6722.4
0.04527011.9300.5736.1276.72.2875127321396.99.0820.6
0.06076011.9300.5736.976912.1675127321396.95.6423.9
0.04741011.9300.5736.0380.82.505127321396.97.8811.9

 

Code để đọc và xử lý data. Với dữ liệu nhà Boston, chúng ta cần chuẩn hóa dữ liệu vì các cột dữ liệu có đơn vị dữ liệu khác nhau và các giá trị phân bố khác nhau. (Sau khi làm xong ví dụ này, các bạn hãy chạy thử chương trình khi data không được chuẩn hóa, và quan sát, so sánh kết quả chương trình gốc).

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

data = pd.read_csv('BostonHousing.csv')

def normal(x:list):
    maxi = max(x)
    mini = min(x)
    avg = np.mean(x)
    new = [(i-avg)/(maxi-mini) for i in x ]
    
    return new

df = data.copy()
df = df.apply(normal, axis=0)

Xd = df.drop(columns=['medv'])
Xd.insert(0, 'X0', 1) # bias 

# numpy array format
y = df.medv.values
X = Xd.values

# sample size
m = len(df.index)
n = X.shape[1]
theta = np.ones(n)

 

Cài theo cách dùng chỉ mục index

# No Vectorization - batch gradien descent

# cost function
def cost_loop(theta = theta, x=X, y=y, m=m, n=n):
    cost = 0
    for i in range(m):
        hypo_i = 0
        for j in range(n):
            hypo_i += theta[j]*X[i,j]
        cost_i = (hypo_i - y[i])**2
        cost += cost_i
    cost = (1/m)*cost
    
    return cost

# training
learning_rate = 0.01
theta = np.ones(n)
cost_list = []

for itr in range(500):    
    dev_list = []
    for k in range(n):
        dev_sum = 0
        for i in range(m):
            hypo_i = 0
            for j in range(n):
                hypo_i += theta[j]*X[i,j]
            dev_i = (hypo_i - y[i])*X[i,k]
            dev_sum += dev_i
        dev_sum = (2/m)*dev_sum

        dev_list.append(dev_sum)
    
    theta = theta - learning_rate*np.array(dev_list)    
    cost_val = cost_loop(theta)    
    cost_list.append(cost_val)
    

plt.plot(np.arange(0, 500),cost_list)
plt.xlabel('epoch')
plt.ylabel('Giá trị loss')

 

Giá trị loss qua các vòng lặp

 

Cài theo phương pháp vectorization

# Vectorization

# Initialize theta
theta = np.ones(n)

def cost(theta, X=X, y=y, m=m):
    cost = np.dot(np.dot(X,theta) - y, np.dot(X,theta) - y)
    cost = (1/m)*cost
    return cost

# learning rate
learning_rate = 0.01
theta = np.ones(n)
cost_list = []

for i in range(500):  
    output = np.dot(X,theta)
    loss_grd = output - y
    gradients = (2/m)*np.dot(np.transpose(X), loss_grd)
    theta = theta - learning_rate*gradients       
    cost_val = cost(theta)
    cost_list.append(cost_val)

plt.plot(np.arange(0, 500),cost_list)
plt.xlabel('epoch')
plt.ylabel('Giá trị loss')

 

Giá trị loss qua các vòng lặp

 

Ở ví dụ này, khi chạy hai đoạn code để huấn luyện mô hình, chúng ta dễ dạng nhận ra cách cài đặt theo vectorization nhanh hơn rất nhiều so với cách dùng chỉ mục index thông thường.

Thật may mắn là chúng ta không cần phải là siêu sao coding để cài được những chương trình dùng kỹ thuật vectorization. Các thư viện phổ biến hiện nay đã làm những phần việc này. Khi các bạn muốn xây dựng một hàm hay thực hiện một tác vụ nào đó, các bạn nên dùng những hàm có sẵn trong thư viện (nếu có) vì code chúng ta cài thường tệ hơn code trong các thư viện ^^.