四季花海
以下是您经过测试且完全有效的代码:data_train = pd.read_csv(r"train.csv")data_test = pd.read_csv(r"test.csv")columns = ['Id', 'HomeTeam', 'AwayTeam', 'Full_Time_Home_Goals']col = ['Id', 'HomeTeam', 'AwayTeam']data_test = data_test[col]data_train = data_train[columns]data_train = data_train.dropna()data_test = data_test.dropna()data_train['Full_Time_Home_Goals'] = data_train['Full_Time_Home_Goals'].astype(int)from sklearn import preprocessingdef encode_features(df_train, df_test): features = ['HomeTeam', 'AwayTeam'] df_combined = pd.concat([df_train[features], df_test[features]]) for feature in features: le = preprocessing.LabelEncoder() le = le.fit(df_combined[feature]) df_train[feature] = le.transform(df_train[feature]) df_test[feature] = le.transform(df_test[feature]) return df_train, df_testdata_train, data_test = encode_features(data_train, data_test)print(data_train.head())print(data_test.head())# X_all would contain all columns required for prediction and y_all would have that one columns we want to predicty_all = data_train['Full_Time_Home_Goals']X_all = data_train.drop(['Full_Time_Home_Goals'], axis=1)from sklearn.model_selection import train_test_splitnum_test = 0.20 # 80-20 splitX_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=num_test, random_state=23)from sklearn.ensemble import RandomForestClassifierfrom sklearn.metrics import make_scorer, accuracy_scorefrom sklearn.model_selection import GridSearchCV# Using Random Forest and using parameters that we definedclf = RandomForestClassifier()parameters = {'n_estimators': [4, 6, 9], 'max_features': ['log2', 'sqrt', 'auto'], 'criterion': ['entropy', 'gini'], 'max_depth': [2, 3, 5, 10], 'min_samples_split': [2, 3, 5], 'min_samples_leaf': [1, 5, 8] }acc_scorer = make_scorer(accuracy_score)grid_obj = GridSearchCV(clf, parameters, scoring=acc_scorer)grid_obj = grid_obj.fit(X_train, y_train)clf = grid_obj.best_estimator_clf.fit(X_train, y_train)predictions = clf.predict(X_test)print(accuracy_score(y_test, predictions))ids = data_test['Id']predictions = clf.predict(data_test)df_preds = pd.DataFrame({"id":ids, "predictions":predictions})df_preds Id HomeTeam AwayTeam Full_Time_Home_Goals0 1 55 440 31 2 158 493 22 3 178 745 13 4 185 410 14 5 249 57 2 Id HomeTeam AwayTeam0 190748 284 541 190749 124 4412 190750 446 573 190751 185 6374 190752 749 4820.33213786556261704id predictions0 190748 11 190749 12 190750 13 190751 14 190752 1... ... ...375 191123 1376 191124 1377 191125 1378 191126 1379 191127 1380 rows × 2 columns