Lets go
parent
802d88b9d7
commit
0ca4a677c4
43
main.py
43
main.py
|
@ -1,9 +1,11 @@
|
|||
import pandas as pd
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
import time
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import calendar
|
||||
#from sklearnex import patch_sklearn # broken :(
|
||||
#patch_sklearn()
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
|
||||
rf_reg_grid = {
|
||||
'bootstrap': [True],
|
||||
|
@ -17,6 +19,9 @@ rf_reg_grid = {
|
|||
best_params = {
|
||||
'bootstrap': [True], 'max_depth': [90], 'max_features': ['auto'], 'min_samples_leaf': [4], 'min_samples_split': [10], 'n_estimators': [100]
|
||||
}
|
||||
|
||||
report_file = '/results/report.txt'
|
||||
|
||||
def get_data_and_store_csv(symbol, interval, start, end = None, limit=1000):
|
||||
"""
|
||||
start and end must be isoformat YYYY-MM-DD
|
||||
|
@ -74,8 +79,8 @@ def get_stored_data(symbol, interval, start, end):
|
|||
start = calendar.timegm(datetime.fromisoformat(start).timetuple()) * 1000
|
||||
end = calendar.timegm(datetime.fromisoformat(end).timetuple()) * 1000
|
||||
df = pd.read_csv(
|
||||
#f'/crypto_prediction/data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
|
||||
f'data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
|
||||
f'/crypto_prediction/data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
|
||||
#'data/{symbol}--interval-{interval}--start-{start}--end-{end}.csv',
|
||||
parse_dates=['Opentime']
|
||||
)
|
||||
return df
|
||||
|
@ -113,10 +118,11 @@ def split_data(df, split_value):
|
|||
|
||||
def find_best_hyperparameters_and_train(X_train, y_train):
|
||||
global rf_reg_grid
|
||||
global report_file
|
||||
|
||||
gs_rf_reg = GridSearchCV(
|
||||
RandomForestRegressor(),
|
||||
param_grid=best_params,
|
||||
param_grid=rf_reg_grid,
|
||||
cv=5,
|
||||
refit=True,
|
||||
n_jobs=-1
|
||||
|
@ -124,19 +130,27 @@ def find_best_hyperparameters_and_train(X_train, y_train):
|
|||
|
||||
gs_rf_reg.fit(X_train, y_train)
|
||||
|
||||
print("Best hyperparameters: ", gs_rf_reg.best_params_)
|
||||
report(report_file, f'Best hyperparams:\n{gs_rf_reg.best_params_}')
|
||||
|
||||
return gs_rf_reg
|
||||
|
||||
def report(f_name, report_data):
|
||||
with open(f_name, 'a') as f:
|
||||
f.write(report_data)
|
||||
f.write('\n')
|
||||
f.flush()
|
||||
f.close()
|
||||
|
||||
|
||||
def main():
|
||||
global models
|
||||
global report_file
|
||||
# Read data into panda dataframe
|
||||
#df = get_data_and_store_csv('XMRUSDT', '1m', '2020-01-01', '2022-09-04')
|
||||
df = get_stored_data('XMRUSDT', '1m', '2020-01-01', '2022-09-04')
|
||||
# Prepare data for moddeling
|
||||
df = process_data(df=df)
|
||||
# Split data into train and test sets
|
||||
X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.9999)
|
||||
X_train, X_test, y_train, y_test = split_data(df=df, split_value=0.99999)
|
||||
|
||||
# Model the data
|
||||
model = find_best_hyperparameters_and_train(
|
||||
|
@ -144,15 +158,18 @@ def main():
|
|||
y_train=y_train
|
||||
)
|
||||
|
||||
# Score our modello
|
||||
print("Test data:\n", X_test)
|
||||
print("Prediction results:\n", model.predict(X_test))
|
||||
print("Correct values:\n", y_test)
|
||||
print('Model scored:\n', model.score(X_test, y_test))
|
||||
# Do a report
|
||||
report(report_file, f'Test data:\n{X_test}')
|
||||
report(report_file, f'Prediction results:\n{model.predict(X_test)}')
|
||||
report(report_file, f'Correct values:\n{y_test}')
|
||||
report(report_file, f'Model scored:\n{model.score(X_test, y_test)}')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
start_time = time.time()
|
||||
prettified_time = datetime.now().strftime("%H:%M:%S")
|
||||
report(report_file, f"\n Starting! {prettified_time}")
|
||||
main()
|
||||
end_time = time.time() - start_time
|
||||
print(f'\n\nCompleted...\t\t\t\t\t{end_time // 3600} h {(end_time // 60) % 60} m {int(end_time % 60)}s')
|
||||
report(report_file, f'\n\nCompleted...\t\t\t\t\t{end_time // 3600} h {(end_time // 60) % 60} m {int(end_time % 60)}s')
|
||||
|
|
Loading…
Reference in New Issue