In [1]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
df=pd.read_csv("homeprice.csv")
In [2]:
df.head(3)
Out[2]:
area bedrooms age price
0 2600 3.0 20 550000
1 3000 4.0 15 565000
2 3200 NaN 18 610000
In [3]:
import math
miss=math.floor(df.bedrooms.mean())
miss
Out[3]:
3
In [5]:
df.bedrooms=df.bedrooms.fillna(miss)
df.head(3)
Out[5]:
area bedrooms age price
0 2600 3.0 20 550000
1 3000 4.0 15 565000
2 3200 3.0 18 610000
In [6]:
reg = linear_model.LinearRegression()
reg.fit(df[["area","bedrooms","age"]],df.price)
Out[6]:
LinearRegression()
In [7]:
reg.predict([[160,1,1]])
Out[7]:
array([372835.])
In [8]:
reg.coef_
Out[8]:
array([   137.25, -26025.  ,  -6825.  ])
In [9]:
reg.intercept_
Out[9]:
383725.0
In [10]:
reg.predict([[1600,1,1]])
Out[10]:
array([570475.])
In [11]:
reg.predict([[2890,1,1]])
Out[11]:
array([747527.5])
In [ ]: