#scikit-learn套件
------簡單回歸-------
#簡單回歸 Y=aX+b
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_validation import train_test_split
plt.style.use('ggplot')
plt.rcParams['font.family']='SimHei' #⿊體
df1=pd.read_csv("2012MLB.csv",encoding="big5")
df1.head()
df1.describe()
#切分訓練 測試資料
x=df1[['R']]
y=df1[['HR']]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=20170816) #30%測試資料 70%訓練資料 設定種子值
x_test
#簡單回歸 Y=aX+b
from sklearn import datasets,linear_model
#linear regression物件
regr=linear_model.LinearRegression()
#訓練模型 用fit
regr.fit(x_train,y_train)
r_squared = regr.score(x_train, y_train)
print('y=ax+b的 a係數:',regr.coef_)
print('y=ax+b的 b截距:',regr.intercept_ )
print('R平方',r_squared)
plt.scatter(x_test,y_test,color='blue',marker='x') #marker='x' 只是圖中點的形狀
plt.plot(x_test,regr.predict(x_test),color='green')
plt.xlabel('R')
plt.ylabel('HR')
plt.show()
#預測
print('預測R=600 HR為:',regr.predict(600))
-----多變項回歸--------
#多變項回歸 y=aX1 +bX2 +C
#切分訓練 測試資料
x=df1[['R','H']]
y=df1[['HR']]
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
#標準化 :為了避免偏向某個變數去做訓練
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
sc.fit(x_train)
x_train_nor=sc.transform(x_train)
x_test_nor=sc.transform(x_test)
x_train_nor[0:10]
#訓練模型
regr=linear_model.LinearRegression()
#訓練模型 用fit
regr.fit(x_train_nor,y_train)
r_squared = regr.score(x_train_nor, y_train)
print('y=aX1+bX2+c的 a、b係數:',regr.coef_)
print('y=aX1+bX2+c的 c截距:',regr.intercept_ )
print('R平方:',r_squared)
output:
留言列表