-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathMCForecastTools.py
172 lines (133 loc) · 7.08 KB
/
MCForecastTools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Import libraries and dependencies
import numpy as np
import pandas as pd
import os
import alpaca_trade_api as tradeapi
import datetime as dt
import pytz
class MCSimulation:
"""
A Python class for runnning Monte Carlo simulation on portfolio price data.
...
Attributes
----------
portfolio_data : pandas.DataFrame
portfolio dataframe
weights: list(float)
portfolio investment breakdown
nSim: int
number of samples in simulation
nTrading: int
number of trading days to simulate
simulated_return : pandas.DataFrame
Simulated data from Monte Carlo
confidence_interval : pandas.Series
the 95% confidence intervals for simulated final cumulative returns
"""
def __init__(self, portfolio_data, weights="", num_simulation=1000, num_trading_days=252):
"""
Constructs all the necessary attributes for the MCSimulation object.
Parameters
----------
portfolio_data: pandas.DataFrame
DataFrame containing stock price information from Alpaca API
weights: list(float)
A list fractions representing percentage of total investment per stock. DEFAULT: Equal distribution
num_simulation: int
Number of simulation samples. DEFAULT: 1000 simulation samples
num_trading_days: int
Number of trading days to simulate. DEFAULT: 252 days (1 year of business days)
"""
# Check to make sure that all attributes are set
if not isinstance(portfolio_data, pd.DataFrame):
raise TypeError("portfolio_data must be a Pandas DataFrame")
# Set weights if empty, otherwise make sure sum of weights equals one.
if weights == "":
num_stocks = len(portfolio_data.columns.get_level_values(0).unique())
weights = [1.0/num_stocks for s in range(0,num_stocks)]
else:
if round(sum(weights),2) < .99:
raise AttributeError("Sum of portfolio weights must equal one.")
# Calculate daily return if not within dataframe
if not "daily_return" in portfolio_data.columns.get_level_values(1).unique():
close_df = portfolio_data.xs('close',level=1,axis=1).pct_change()
tickers = portfolio_data.columns.get_level_values(0).unique()
column_names = [(x,"daily_return") for x in tickers]
close_df.columns = pd.MultiIndex.from_tuples(column_names)
portfolio_data = portfolio_data.merge(close_df,left_index=True,right_index=True).reindex(columns=tickers,level=0)
# Set class attributes
self.portfolio_data = portfolio_data
self.weights = weights
self.nSim = num_simulation
self.nTrading = num_trading_days
self.simulated_return = ""
def calc_cumulative_return(self):
"""
Calculates the cumulative return of a stock over time using a Monte Carlo simulation (Brownian motion with drift).
"""
# Get closing prices of each stock
last_prices = self.portfolio_data.xs('close',level=1,axis=1)[-1:].values.tolist()[0]
# Calculate the mean and standard deviation of daily returns for each stock
daily_returns = self.portfolio_data.xs('daily_return',level=1,axis=1)
mean_returns = daily_returns.mean().tolist()
std_returns = daily_returns.std().tolist()
# Initialize empty Dataframe to hold simulated prices
portfolio_cumulative_returns = pd.DataFrame()
# Run the simulation of projecting stock prices 'nSim' number of times
for n in range(self.nSim):
if n % 10 == 0:
print(f"Running Monte Carlo simulation number {n}.")
# Create a list of lists to contain the simulated values for each stock
simvals = [[p] for p in last_prices]
# For each stock in our data:
for s in range(len(last_prices)):
# Simulate the returns for each trading day
for i in range(self.nTrading):
# Calculate the simulated price using the last price within the list
simvals[s].append(simvals[s][-1] * (1 + np.random.normal(mean_returns[s], std_returns[s])))
# Calculate the daily returns of simulated prices
sim_df = pd.DataFrame(simvals).T.pct_change()
# Use the `dot` function with the weights to multiply weights with each column's simulated daily returns
sim_df = sim_df.dot(self.weights)
# Calculate the normalized, cumulative return series
portfolio_cumulative_returns[n] = (1 + sim_df.fillna(0)).cumprod()
# Set attribute to use in plotting
self.simulated_return = portfolio_cumulative_returns
# Calculate 95% confidence intervals for final cumulative returns
self.confidence_interval = portfolio_cumulative_returns.iloc[-1, :].quantile(q=[0.025, 0.975])
return portfolio_cumulative_returns
def plot_simulation(self):
"""
Visualizes the simulated stock trajectories using calc_cumulative_return method.
"""
# Check to make sure that simulation has run previously.
if not isinstance(self.simulated_return,pd.DataFrame):
self.calc_cumulative_return()
# Use Pandas plot function to plot the return data
plot_title = f"{self.nSim} Simulations of Cumulative Portfolio Return Trajectories Over the Next {self.nTrading} Trading Days."
return self.simulated_return.plot(legend=None,title=plot_title)
def plot_distribution(self):
"""
Visualizes the distribution of cumulative returns simulated using calc_cumulative_return method.
"""
# Check to make sure that simulation has run previously.
if not isinstance(self.simulated_return,pd.DataFrame):
self.calc_cumulative_return()
# Use the `plot` function to create a probability distribution histogram of simulated ending prices
# with markings for a 95% confidence interval
plot_title = f"Distribution of Final Cumuluative Returns Across All {self.nSim} Simulations"
plt = self.simulated_return.iloc[-1, :].plot(kind='hist', bins=10,density=True,title=plot_title)
plt.axvline(self.confidence_interval.iloc[0], color='r')
plt.axvline(self.confidence_interval.iloc[1], color='r')
return plt
def summarize_cumulative_return(self):
"""
Calculate final summary statistics for Monte Carlo simulated stock data.
"""
# Check to make sure that simulation has run previously.
if not isinstance(self.simulated_return,pd.DataFrame):
self.calc_cumulative_return()
metrics = self.simulated_return.iloc[-1].describe()
ci_series = self.confidence_interval
ci_series.index = ["95% CI Lower","95% CI Upper"]
return metrics.append(ci_series)