From 0ca4a677c46b4762c4cd90b7cf38e995b0249e60 Mon Sep 17 00:00:00 2001 From: Gasper Spagnolo Date: Mon, 5 Sep 2022 15:46:48 +0200 Subject: [PATCH] Lets go --- main.py | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/main.py b/main.py index 3097557..b008d77 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,11 @@ import pandas as pd -from sklearn.model_selection import GridSearchCV -from sklearn.ensemble import RandomForestRegressor import time from datetime import datetime, timezone, timedelta import calendar +#from sklearnex import patch_sklearn # broken :( +#patch_sklearn() +from sklearn.model_selection import GridSearchCV +from sklearn.ensemble import RandomForestRegressor rf_reg_grid = { 'bootstrap': [True], @@ -17,6 +19,9 @@ rf_reg_grid = { best_params = { 'bootstrap': [True], 'max_depth': [90], 'max_features': ['auto'], 'min_samples_leaf': [4], 'min_samples_split': [10], 'n_estimators': [100] } + +report_file = '/results/report.txt' + def get_data_and_store_csv(symbol, interval, start, end = None, limit=1000): """ start and end must be isoformat YYYY-MM-DD @@ -74,8 +79,8 @@ def get_stored_data(symbol, interval, start, end): start = calendar.timegm(datetime.fromisoformat(start).timetuple()) * 1000 end = calendar.timegm(datetime.fromisoformat(end).timetuple()) * 1000 df = pd.read_csv( - #f'/crypto_prediction/data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv', - f'data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv', + f'/crypto_prediction/data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv', + #'data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv', parse_dates=['Opentime'] ) return df @@ -113,10 +118,11 @@ def split_data(df, split_value): def find_best_hyperparameters_and_train(X_train, y_train): global rf_reg_grid + global report_file gs_rf_reg = GridSearchCV( RandomForestRegressor(), - param_grid=best_params, + param_grid=rf_reg_grid, cv=5, refit=True, n_jobs=-1 @@ -124,19 +130,27 @@ def find_best_hyperparameters_and_train(X_train, y_train): gs_rf_reg.fit(X_train, y_train) - print("Best hyperparameters: ", gs_rf_reg.best_params_) + report(report_file, f'Best hyperparams:\n{gs_rf_reg.best_params_}') return gs_rf_reg +def report(f_name, report_data): + with open(f_name, 'a') as f: + f.write(report_data) + f.write('\n') + f.flush() + f.close() + + def main(): - global models + global report_file # Read data into panda dataframe #df = get_data_and_store_csv('XMRUSDT', '1m', '2020-01-01', '2022-09-04') df = get_stored_data('XMRUSDT', '1m', '2020-01-01', '2022-09-04') # Prepare data for moddeling df = process_data(df=df) # Split data into train and test sets - X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.9999) + X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.99999) # Model the data model = find_best_hyperparameters_and_train( @@ -144,15 +158,18 @@ def main(): y_train=y_train ) - # Score our modello - print("Test data:\n", X_test) - print("Prediction results:\n", model.predict(X_test)) - print("Correct values:\n", y_test) - print('Model scored:\n', model.score(X_test, y_test)) + # Do a report + report(report_file, f'Test data:\n{X_test}') + report(report_file, f'Prediction results:\n{model.predict(X_test)}') + report(report_file, f'Correct values:\n{y_test}') + report(report_file, f'Model scored:\n{model.score(X_test, y_test)}') if __name__ == "__main__": start_time = time.time() + prettified_time = datetime.now().strftime("%H:%M:%S") + report(report_file, f"\n Starting! {prettified_time}") main() end_time = time.time() - start_time print(f'\n\nCompleted...\t\t\t\t\t{end_time // 3600} h {(end_time // 60) % 60} m {int(end_time % 60)}s') + report(report_file, f'\n\nCompleted...\t\t\t\t\t{end_time // 3600} h {(end_time // 60) % 60} m {int(end_time % 60)}s')