main
Gasper Spagnolo 2022-09-05 15:46:48 +02:00
parent 802d88b9d7
commit 0ca4a677c4
1 changed files with 30 additions and 13 deletions

43
main.py
View File

@ -1,9 +1,11 @@
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
import time
from datetime import datetime, timezone, timedelta
import calendar
#from sklearnex import patch_sklearn # broken :(
#patch_sklearn()
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
rf_reg_grid = {
'bootstrap': [True],
@ -17,6 +19,9 @@ rf_reg_grid = {
best_params = {
'bootstrap': [True], 'max_depth': [90], 'max_features': ['auto'], 'min_samples_leaf': [4], 'min_samples_split': [10], 'n_estimators': [100]
}
report_file = '/results/report.txt'
def get_data_and_store_csv(symbol, interval, start, end = None, limit=1000):
"""
start and end must be isoformat YYYY-MM-DD
@ -74,8 +79,8 @@ def get_stored_data(symbol, interval, start, end):
start = calendar.timegm(datetime.fromisoformat(start).timetuple()) * 1000
end = calendar.timegm(datetime.fromisoformat(end).timetuple()) * 1000
df = pd.read_csv(
#f'/crypto_prediction/data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
f'data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
f'/crypto_prediction/data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
#'data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
parse_dates=['Opentime']
)
return df
@ -113,10 +118,11 @@ def split_data(df, split_value):
def find_best_hyperparameters_and_train(X_train, y_train):
global rf_reg_grid
global report_file
gs_rf_reg = GridSearchCV(
RandomForestRegressor(),
param_grid=best_params,
param_grid=rf_reg_grid,
cv=5,
refit=True,
n_jobs=-1
@ -124,19 +130,27 @@ def find_best_hyperparameters_and_train(X_train, y_train):
gs_rf_reg.fit(X_train, y_train)
print("Best hyperparameters: ", gs_rf_reg.best_params_)
report(report_file, f'Best hyperparams:\n{gs_rf_reg.best_params_}')
return gs_rf_reg
def report(f_name, report_data):
with open(f_name, 'a') as f:
f.write(report_data)
f.write('\n')
f.flush()
f.close()
def main():
global models
global report_file
# Read data into panda dataframe
#df = get_data_and_store_csv('XMRUSDT', '1m', '2020-01-01', '2022-09-04')
df = get_stored_data('XMRUSDT', '1m', '2020-01-01', '2022-09-04')
# Prepare data for moddeling
df = process_data(df=df)
# Split data into train and test sets
X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.9999)
X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.99999)
# Model the data
model = find_best_hyperparameters_and_train(
@ -144,15 +158,18 @@ def main():
y_train=y_train
)
# Score our modello
print("Test data:\n", X_test)
print("Prediction results:\n", model.predict(X_test))
print("Correct values:\n", y_test)
print('Model scored:\n', model.score(X_test, y_test))
# Do a report
report(report_file, f'Test data:\n{X_test}')
report(report_file, f'Prediction results:\n{model.predict(X_test)}')
report(report_file, f'Correct values:\n{y_test}')
report(report_file, f'Model scored:\n{model.score(X_test, y_test)}')
if __name__ == "__main__":
start_time = time.time()
prettified_time = datetime.now().strftime("%H:%M:%S")
report(report_file, f"\n Starting! {prettified_time}")
main()
end_time = time.time() - start_time
print(f'\n\nCompleted...\t\t\t\t\t{end_time // 3600} h {(end_time // 60) % 60} m {int(end_time % 60)}s')
report(report_file, f'\n\nCompleted...\t\t\t\t\t{end_time // 3600} h {(end_time // 60) % 60} m {int(end_time % 60)}s')