diff --git a/a2/code/hyper_param.py b/a2/code/hyper_param.py index 0c96de9b..d82d127a 100644 --- a/a2/code/hyper_param.py +++ b/a2/code/hyper_param.py @@ -34,12 +34,17 @@ def preprocess_data(df_train, df_test): def predict(): hyper_params = { - 'C': np.logspace(-3, 3, 7, 10, 20), - 'penalty': ['l1', 'l2', 'elasticnet'], - 'solver': ['liblinear', 'saga', 'lbfgs', 'newton-cg'], - 'max_iter': [50, 100, 1000, 2500, 5000], - 'class_weight': ['balanced', None], - 'tol': [1e-4, 1e-3, 1e-2, 1e-1, 1], + "n_estimators": [50, 100, 200, 300, 400, 500, 600, 700], + "learning_rate": [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5], + "max_depth": [1, 3, 4, 5, 6, 7, 8, 9, 10], + "min_samples_split": [2, 5, 10, 15, 100], + "ccp_alpha": [0.0, 0.001, 0.005, 0.01, 0.05], + "loss": ["deviance", "exponential"], + "tol": [1e-4, 1e-3, 1e-2, 1e-1, 1e-0], + "validation_fraction": [0.1, 0.2, 0.3, 0.4, 0.5], + "min_samples_leaf": [1, 2, 5, 10], + "subsample": [0.6, 0.7, 0.8, 0.9, 1.0], + "max_features": ["auto", "sqrt", "log2"], } df_train, df_test = load_data() @@ -47,11 +52,11 @@ def predict(): X_train, y_train, X_test, y_test = preprocess_data(df_train, df_test) # Model - from sklearn.linear_model import LogisticRegression + from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import GridSearchCV model = GridSearchCV( - estimator=LogisticRegression(), + estimator=GradientBoostingClassifier(), param_grid=hyper_params, cv=5, n_jobs=-1, @@ -67,3 +72,17 @@ def predict(): if __name__ == "__main__": predict() + +#### Logistic Regression +# Best parameters: {'C': 20.0, 'class_weight': None, 'max_iter': 50, 'penalty': 'l2', 'solver': 'lbfgs', 'tol': 0.0001} +# Accuracy: 0.8660287081339713 +# Hyperparams tuned: +# hyper_params = { +# 'C': np.logspace(-3, 3, 7, 10, 20), +# 'penalty': ['l1', 'l2', 'elasticnet'], +# 'solver': ['liblinear', 'saga', 'lbfgs', 'newton-cg'], +# 'max_iter': [50, 100, 1000, 2500, 5000], +# 'class_weight': ['balanced', None], +# 'tol': [1e-4, 1e-3, 1e-2, 1e-1, 1], +# } +######################