main
Gasper Spagnolo 2022-09-05 20:22:54 +02:00
parent 0ca4a677c4
commit 01f2fc36b9
1 changed files with 8 additions and 1 deletions

View File

@ -6,7 +6,9 @@ import calendar
#patch_sklearn()
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
# Improving dataset and modelling: https://medium.com/@maryamuzakariya/project-predict-stock-prices-using-random-forest-regression-model-in-python-fbe4edf01664
rf_reg_grid = {
'bootstrap': [True],
'max_depth': [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
@ -114,6 +116,11 @@ def split_data(df, split_value):
X_train, y_train = df_train.drop("price", axis=1), df_train["price"]
X_test, y_test = df_val.drop("price", axis = 1), df_val["price"]
# Standardize features by removing the mean and scaling to unit variance.
scale = StandardScaler()
x_train = scale.fit_transform(x_train)
x_test = scale.transform(x_test)
return X_train, X_test, y_train, y_test
def find_best_hyperparameters_and_train(X_train, y_train):
@ -150,7 +157,7 @@ def main():
# Prepare data for moddeling
df = process_data(df=df)
# Split data into train and test sets
X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.99999)
X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.99)
# Model the data
model = find_best_hyperparameters_and_train(