70 lines
2.0 KiB
Python
70 lines
2.0 KiB
Python
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
def load_data():
|
||
|
df_train = pd.read_csv('train.csv')
|
||
|
df_test = pd.read_csv('test.csv')
|
||
|
return df_train, df_test
|
||
|
|
||
|
|
||
|
def preprocess_data(df_train, df_test):
|
||
|
# V4
|
||
|
df_train['V4'].fillna(0, inplace=True)
|
||
|
df_test['V4'].fillna(0, inplace=True)
|
||
|
# V22
|
||
|
df_train['V22'].fillna(df_train['V22'].mean(), inplace=True)
|
||
|
df_test['V22'].fillna(df_test['V22'].mean(), inplace=True)
|
||
|
# V27
|
||
|
df_train['V27'].fillna(df_train['V27'].mean(), inplace=True)
|
||
|
df_test['V27'].fillna(df_test['V27'].mean(), inplace=True)
|
||
|
# V29
|
||
|
df_train['V29'].fillna(0, inplace=True)
|
||
|
df_test['V29'].fillna(0, inplace=True)
|
||
|
# V37
|
||
|
df_train['V37'].fillna(df_train['V37'].mean(), inplace=True)
|
||
|
df_test['V37'].fillna(df_test['V37'].mean(), inplace=True)
|
||
|
|
||
|
X_train = df_train.drop('Class', axis=1).reset_index(drop=True)
|
||
|
y_train = df_train['Class'].reset_index(drop=True)
|
||
|
X_test = df_test.drop('Class', axis=1).reset_index(drop=True)
|
||
|
y_test = df_test['Class'].reset_index(drop=True)
|
||
|
|
||
|
return X_train, y_train, X_test, y_test
|
||
|
|
||
|
|
||
|
def predict():
|
||
|
hyper_params = {
|
||
|
'C': np.logspace(-3, 3, 7, 10, 20),
|
||
|
'penalty': ['l1', 'l2', 'elasticnet'],
|
||
|
'solver': ['liblinear', 'saga', 'lbfgs', 'newton-cg'],
|
||
|
'max_iter': [50, 100, 1000, 2500, 5000],
|
||
|
'class_weight': ['balanced', None],
|
||
|
'tol': [1e-4, 1e-3, 1e-2, 1e-1, 1],
|
||
|
}
|
||
|
|
||
|
df_train, df_test = load_data()
|
||
|
|
||
|
X_train, y_train, X_test, y_test = preprocess_data(df_train, df_test)
|
||
|
|
||
|
# Model
|
||
|
from sklearn.linear_model import LogisticRegression
|
||
|
from sklearn.model_selection import GridSearchCV
|
||
|
|
||
|
model = GridSearchCV(
|
||
|
estimator=LogisticRegression(),
|
||
|
param_grid=hyper_params,
|
||
|
cv=5,
|
||
|
n_jobs=-1,
|
||
|
verbose=1
|
||
|
)
|
||
|
|
||
|
model.fit(X_train, y_train)
|
||
|
print(model.score(X_test, y_test))
|
||
|
|
||
|
print('Best parameters:', model.best_params_)
|
||
|
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
predict()
|