问耕 栗子 发自 麦蒿寺
量子位 出品 | 公众号 QbitAI
1 import numpy as np
2 import pandas as pd
1 dataset = pd.read_csv('Data.csv')
2 X = dataset.iloc[ : , :-1].values
3 Y = dataset.iloc[ : , 3].values
1 from sklearn.preprocessing import Imputer
2 imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0)
3 imputer = imputer.fit(X[ : , 1:3])
4 X[ : , 1:3] = imputer.transform(X[ : , 1:3])
1 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
2 labelencoder_X = LabelEncoder()
3 X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0])
1 onehotencoder = OneHotEncoder(categorical_features = [0])
2 X = onehotencoder.fit_transform(X).toarray()
3 labelencoder_Y = LabelEncoder()
4 Y = labelencoder_Y.fit_transform(Y)
1 from sklearn.cross_validation import train_test_split
2 X_train, X_test, Y_train, Y_test = train_test_split( X , Y , test_size = 0.2, random_state = 0)
1 from sklearn.preprocessing import StandardScaler
2 sc_X = StandardScaler()
3 X_train = sc_X.fit_transform(X_train)
4 X_test = sc_X.fit_transform(X_test)
1 import pandas as pd
2 import numpy as np
3 import matplotlib.pyplot as plt
4
5 dataset = pd.read_csv('studentscores.csv')
6 X = dataset.iloc[ : , : 1 ].values
7 Y = dataset.iloc[ : , 1 ].values
8
9 from sklearn.cross_validation import train_test_split
10 X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1/4, random_state = 0)
1 from sklearn.linear_model import LinearRegression
2 regressor = LinearRegression()
3 regressor = regressor.fit(X_train, Y_train)
1 Y_pred Y_pred == regressor.predict(X_test) regressor.predict(X_test)
1 plt.scatter(X_train , Y_train, plt.scatter(X_train , Y_train, colorcolor == ''redred'')
2 plt.plot(X_train , regressor.predict(X_train), ) plt.plot(X_train , regressor.p color ='blue')
1 plt.scatter(X_test , Y_test, color = 'red')
2 plt.plot(X_test , regressor.predict(X_test), color ='blue')
欢迎光临 智客公社 (http://bbs.cnaiplus.com/) | Powered by Discuz! X3.4 |