close

 

#scikit-learn套件

------簡單回歸-------
#簡單回歸 Y=aX+b
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_validation  import train_test_split
plt.style.use('ggplot')
plt.rcParams['font.family']='SimHei' #⿊體

df1=pd.read_csv("2012MLB.csv",encoding="big5")
df1.head()
df1.describe() 

#切分訓練 測試資料
x=df1[['R']]
y=df1[['HR']]

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=20170816) #30%測試資料 70%訓練資料 設定種子值
x_test


#簡單回歸 Y=aX+b

from sklearn import datasets,linear_model


#linear regression物件

regr=linear_model.LinearRegression()

#訓練模型 用fit
regr.fit(x_train,y_train)   

r_squared = regr.score(x_train, y_train)

print('y=ax+b的 a係數:',regr.coef_)
print('y=ax+b的 b截距:',regr.intercept_ )
print('R平方',r_squared)


plt.scatter(x_test,y_test,color='blue',marker='x')  #marker='x' 只是圖中點的形狀
plt.plot(x_test,regr.predict(x_test),color='green')
plt.xlabel('R')
plt.ylabel('HR')
plt.show()

#預測
print('預測R=600 HR為:',regr.predict(600))
 

回歸.PNG

 

 

 

-----多變項回歸--------

#多變項回歸 y=aX1 +bX2 +C
#切分訓練 測試資料
x=df1[['R','H']]
y=df1[['HR']]

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)


#標準化 :為了避免偏向某個變數去做訓練
from sklearn.preprocessing  import StandardScaler
sc=StandardScaler()

sc.fit(x_train)

x_train_nor=sc.transform(x_train)
x_test_nor=sc.transform(x_test)

x_train_nor[0:10]

#訓練模型 
regr=linear_model.LinearRegression()

#訓練模型 用fit
regr.fit(x_train_nor,y_train)   

r_squared = regr.score(x_train_nor, y_train)

print('y=aX1+bX2+c的 a、b係數:',regr.coef_)
print('y=aX1+bX2+c的 c截距:',regr.intercept_ )
print('R平方:',r_squared)
 

output:

y=aX1+bX2+c的 a、b係數: [[ 30.26147953 -21.58287086]]
y=aX1+bX2+c的 c截距: [ 151.47619048]
R平方 0.543692252781

 

 

 

 

arrow
arrow
    全站熱搜

    to52016 發表在 痞客邦 留言(0) 人氣()