import numpy as np import pandas as pd def load_data(): df_train = pd.read_csv('train.csv') df_test = pd.read_csv('test.csv') return df_train, df_test def preprocess_data(df_train, df_test): # V4 df_train['V4'].fillna(0, inplace=True) df_test['V4'].fillna(0, inplace=True) # V22 df_train['V22'].fillna(df_train['V22'].mean(), inplace=True) df_test['V22'].fillna(df_test['V22'].mean(), inplace=True) # V27 df_train['V27'].fillna(df_train['V27'].mean(), inplace=True) df_test['V27'].fillna(df_test['V27'].mean(), inplace=True) # V29 df_train['V29'].fillna(0, inplace=True) df_test['V29'].fillna(0, inplace=True) # V37 df_train['V37'].fillna(df_train['V37'].mean(), inplace=True) df_test['V37'].fillna(df_test['V37'].mean(), inplace=True) X_train = df_train.drop('Class', axis=1).reset_index(drop=True) y_train = df_train['Class'].reset_index(drop=True) X_test = df_test.drop('Class', axis=1).reset_index(drop=True) y_test = df_test['Class'].reset_index(drop=True) return X_train, y_train, X_test, y_test def predict(): hyper_params = { "n_estimators": [50, 100, 200, 300, 400, 500, 600, 700], "learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5], "max_depth": [1, 3, 4, 5, 6, 7, 8, 9, 10], "min_samples_split": [2, 5, 10, 15, 100], "tol": [1e-4, 1e-3, 1e-2, 1e-1, 1e-0], "validation_fraction": [0.1, 0.2, 0.3, 0.4, 0.5], "min_samples_leaf": [1, 2, 5, 10], "subsample": [0.6, 0.7, 0.8, 0.9, 1.0], "max_features": ["auto", "sqrt", "log2"], } df_train, df_test = load_data() X_train, y_train, X_test, y_test = preprocess_data(df_train, df_test) # Model from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import RandomizedSearchCV model = RandomizedSearchCV( GradientBoostingClassifier(), hyper_params, n_iter=100000, scoring="roc_auc", n_jobs=-1, cv=5, verbose=1, ) model.fit(X_train, y_train) print(model.score(X_test, y_test)) print('Best parameters:', model.best_params_) if __name__ == "__main__": predict() #### Logistic Regression # Best parameters: {'C': 20.0, 'class_weight': None, 'max_iter': 50, 'penalty': 'l2', 'solver': 'lbfgs', 'tol': 0.0001} # Accuracy: 0.8660287081339713 # Hyperparams tuned: # hyper_params = { # 'C': np.logspace(-3, 3, 7, 10, 20), # 'penalty': ['l1', 'l2', 'elasticnet'], # 'solver': ['liblinear', 'saga', 'lbfgs', 'newton-cg'], # 'max_iter': [50, 100, 1000, 2500, 5000], # 'class_weight': ['balanced', None], # 'tol': [1e-4, 1e-3, 1e-2, 1e-1, 1], # } ######################