import numpy as np import pandas as pd def load_data(): df_train = pd.read_csv('train.csv') df_test = pd.read_csv('test.csv') return df_train, df_test def preprocess_data(df_train, df_test): # V4 df_train['V4'].fillna(0, inplace=True) df_test['V4'].fillna(0, inplace=True) # V22 df_train['V22'].fillna(df_train['V22'].mean(), inplace=True) df_test['V22'].fillna(df_test['V22'].mean(), inplace=True) # V27 df_train['V27'].fillna(df_train['V27'].mean(), inplace=True) df_test['V27'].fillna(df_test['V27'].mean(), inplace=True) # V29 df_train['V29'].fillna(0, inplace=True) df_test['V29'].fillna(0, inplace=True) # V37 df_train['V37'].fillna(df_train['V37'].mean(), inplace=True) df_test['V37'].fillna(df_test['V37'].mean(), inplace=True) X_train = df_train.drop('Class', axis=1).reset_index(drop=True) y_train = df_train['Class'].reset_index(drop=True) X_test = df_test.drop('Class', axis=1).reset_index(drop=True) y_test = df_test['Class'].reset_index(drop=True) return X_train, y_train, X_test, y_test def predict(): hyper_params = { 'C': np.logspace(-3, 3, 7, 10, 20), 'penalty': ['l1', 'l2', 'elasticnet'], 'solver': ['liblinear', 'saga', 'lbfgs', 'newton-cg'], 'max_iter': [50, 100, 1000, 2500, 5000], 'class_weight': ['balanced', None], 'tol': [1e-4, 1e-3, 1e-2, 1e-1, 1], } df_train, df_test = load_data() X_train, y_train, X_test, y_test = preprocess_data(df_train, df_test) # Model from sklearn.linear_model import LogisticRegression from sklearn.model_selection import GridSearchCV model = GridSearchCV( estimator=LogisticRegression(), param_grid=hyper_params, cv=5, n_jobs=-1, verbose=1 ) model.fit(X_train, y_train) print(model.score(X_test, y_test)) print('Best parameters:', model.best_params_) if __name__ == "__main__": predict()