diff --git a/.gitignore b/.gitignore index 3722fbe..2a82c41 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ data/*.csv data.csv prepared.png model.h5 +*.csv # C extensions *.so diff --git a/README.md b/README.md index 4982378..c94b6ec 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ ![stockpredictor ai logo](https://user-images.githubusercontent.com/53996451/224323224-3ec1cd20-747c-42ad-9fb1-ba6e0ecb358b.png) -> Please note that this is still in the testing phase, so if you encounter any errors, please report them. - --- # Content Table @@ -71,27 +69,27 @@ To evaluate the trained model, execute the following: python eval.py ``` -After running this command, the accuracy, rewards, mean squared error (MSE), and root mean squared error (RMSE) will be plotted. +After running this command, the root mean squared error (RMSE), mean R-squared and Total Rewards will be plotted. --- ## 5. Fine Tuning the LSTM RL Model -Fine tuning the LSTM RL model will take more time as it will train until it reaches a specific R2 threshold. By default, the R2 threshold is set to 2.85 (An R2 average of 0.95). However, if you want to set a different R2 threshold, open the fine_tune.py file and change the accuracy_threshold variable to your desired target accuracy. +Fine tuning the LSTM RL model will take more time as it will train until it reaches a specific R2 threshold. By default, the R2 threshold is set to 2.982 (An R2 average of 0.994). However, if you want to set a different R2 threshold, open the fine_tune.py file and change the accuracy_threshold variable to your desired target accuracy. To fine tune the model, execute the following: ``` python fine_tune.py ``` -It's advisable to stay hydrated with some water while waiting for the fine-tuning process to finish, as it may take some time. Once the fine-tuning is complete, attempt to run the "finetune.py" script until it loops only three times. If it fails to continue, keep running it until it has looped three times at least once. +It's advisable to stay hydrated with some water while waiting for the fine-tuning process to finish, as it may take some time. Once the fine-tuning is complete, attempt to run the "finetune.py" script until it loops only three times. If it doesnt loop 3 times, keep rerunning it until it has looped three times at least once. After fine-tuning is complete, it is recommended to re-evaluate the model. --- -## 6. Utilizing the Model for Stock Market Prediction -After completing the previous steps, you can use the model to predict the stock market for as many days as you want. The script will prompt you to enter the number of days to predict, and after plotting the predictions, it will ask you to rate the predictions on a scale of 1 to 10. The model will receive a reward based on your rating, which will help it improve its future predictions. +## 4. Utilizing the Model for Stock Market Prediction +Once the previous steps have been completed, the model can be utilized to forecast the stock market for the next 30 days beyond the latest date in the data. The predictions will be shown in the command line and saved as a CSV file. To use the model for prediction, run the following command: diff --git a/fine_tune.py b/fine_tune.py index ec976a3..ad94468 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -43,7 +43,7 @@ def create_sequences(data, timesteps): model = load_model('model.h5') # Define reward threshold -reward_threshold = 2.85 +reward_threshold = 2.982 # Initialize rewards rewards = [] diff --git a/predict.py b/predict.py index 9849716..ad62bf1 100644 --- a/predict.py +++ b/predict.py @@ -4,10 +4,8 @@ import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler -import tensorflow as tf from tensorflow.keras.models import load_model - -print("TensorFlow version:", tf.__version__) +import matplotlib.pyplot as plt # Load data data = pd.read_csv("data.csv") @@ -19,25 +17,34 @@ # Define time steps timesteps = 100 -# Extract the last 60 days of data -last_60_days = data_norm[-60:] +# Create sequences of timesteps +def create_sequences(data, timesteps): + X = [] + for i in range(timesteps, len(data)): + X.append(data[i-timesteps:i]) + return np.array(X) -# Create a sequence of input data to predict the next 30 days -X_test = np.array([last_60_days[i-timesteps:i] for i in range(timesteps, len(last_60_days)+1)]) +X_data = create_sequences(data_norm, timesteps) # Load model -tf.config.run_functions_eagerly(True) model = load_model('model.h5') +model.summary() + +num_predictions = 365 + +# Make predictions for next num_predictions days +X_pred = X_data[-num_predictions:].reshape((num_predictions, timesteps, X_data.shape[2])) +y_pred = model.predict(X_pred)[:, 0] -# Evaluate model -y_pred = model.predict(X_test) +# Inverse transform predictions +y_pred = scaler.inverse_transform(np.hstack([np.zeros((len(y_pred), 17)), np.array(y_pred).reshape(-1, 1)]))[:, -1] -# Inverse transform the predicted values -y_pred_inv = scaler.inverse_transform(np.hstack((X_test[:, -1, :-1], y_pred.reshape(-1, 1)))) +# Generate date index for predictions +last_date = data['Date'].iloc[-1] +index = pd.date_range(last_date, periods=num_predictions, freq='D', tz='UTC').tz_localize(None) -# Get the predicted prices for the next 30 days -predicted_prices = y_pred_inv[:, -1] +# Save predictions in a CSV file +predictions = pd.DataFrame({'Date': index, 'Predicted Close': y_pred}) +predictions.to_csv('predictions.csv', index=False) -# Print the predicted prices -for i, price in enumerate(predicted_prices): - print(f"Day {i+1}: ${price:.2f}") +print(predictions) diff --git a/train.py b/train.py index 3f2ac78..98c7bd9 100644 --- a/train.py +++ b/train.py @@ -6,7 +6,7 @@ from sklearn.preprocessing import MinMaxScaler import tensorflow as tf from tensorflow.keras.models import Sequential, load_model -from tensorflow.keras.layers import LSTM, Dense, Dropout +from tensorflow.keras.layers import LSTM, Dense, Dropout, Lambda from tensorflow.keras.callbacks import Callback from sklearn.metrics import accuracy_score from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping @@ -16,7 +16,8 @@ # Define reward function def get_reward(y_true, y_pred): mse = np.mean((y_true - y_pred)**2) - reward = 1 / (1 + mse) # Reward is inversely proportional to the MSE + acc = np.mean(y_true / y_pred) + reward = (acc - mse) return reward # Load data @@ -50,11 +51,13 @@ def create_sequences(data, timesteps): # Build model model = Sequential() -model.add(LSTM(units=50, return_sequences=True, input_shape=(timesteps, X_train.shape[2]))) +model.add(LSTM(units=300, return_sequences=True, input_shape=(timesteps, X_train.shape[2]))) model.add(Dropout(0.2)) -model.add(LSTM(units=50, return_sequences=True)) +model.add(LSTM(units=200, return_sequences=True)) model.add(Dropout(0.2)) -model.add(LSTM(units=50)) +model.add(LSTM(units=130, return_sequences=True)) +model.add(Dropout(0.2)) +model.add(LSTM(units=100)) model.add(Dropout(0.2)) model.add(Dense(units=1))