From 7eda0a414c976811558609364d4fd0a439f4ec3c Mon Sep 17 00:00:00 2001 From: Jamie McCorriston Date: Tue, 18 Feb 2020 10:36:14 -0500 Subject: [PATCH 1/4] Performance improvements and other changes. --- alphalens/performance.py | 213 +++++------------------------------ alphalens/plotting.py | 26 +++-- alphalens/tears.py | 64 +++++++---- alphalens/utils.py | 232 +++++++++++++++++++++++++++++++++++++-- setup.py | 1 + 5 files changed, 309 insertions(+), 227 deletions(-) diff --git a/alphalens/performance.py b/alphalens/performance.py index 8f616e10..d70f0a79 100644 --- a/alphalens/performance.py +++ b/alphalens/performance.py @@ -17,6 +17,7 @@ import numpy as np import warnings +import empyrical as ep from pandas.tseries.offsets import BDay from scipy import stats from statsmodels.regression.linear_model import OLS @@ -329,169 +330,27 @@ def factor_alpha_beta(factor_data, return alpha_beta -def cumulative_returns(returns, period, freq=None): +def cumulative_returns(returns): """ - Builds cumulative returns from 'period' returns. This function simulates - the cumulative effect that a series of gains or losses (the 'returns') - have on an original amount of capital over a period of time. - - if F is the frequency at which returns are computed (e.g. 1 day if - 'returns' contains daily values) and N is the period for which the retuns - are computed (e.g. returns after 1 day, 5 hours or 3 days) then: - - if N <= F the cumulative retuns are trivially computed as Compound Return - - if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the - cumulative returns are computed building and averaging N interleaved sub - portfolios (started at subsequent periods 1,2,..,N) each one rebalancing - every N periods. This correspond to an algorithm which trades the factor - every single time it is computed, which is statistically more robust and - with a lower volatity compared to an algorithm that trades the factor - every N periods and whose returns depend on the specific starting day of - trading. - - Also note that when the factor is not computed at a specific frequency, for - exaple a factor representing a random event, it is not efficient to create - multiples sub-portfolios as it is not certain when the factor will be - traded and this would result in an underleveraged portfolio. In this case - the simulated portfolio is fully invested whenever an event happens and if - a subsequent event occur while the portfolio is still invested in a - previous event then the portfolio is rebalanced and split equally among the - active events. + Computes cumulative returns from simple daily returns. Parameters ---------- returns: pd.Series - pd.Series containing factor 'period' forward returns, the index - contains timestamps at which the trades are computed and the values - correspond to returns after 'period' time - period: pandas.Timedelta or string - Length of period for which the returns are computed (1 day, 2 mins, - 3 hours etc). It can be a Timedelta or a string in the format accepted - by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc) - freq : pandas DateOffset, optional - Used to specify a particular trading calendar. If not present - returns.index.freq will be used + pd.Series containing daily factor returns (i.e. '1D' returns). Returns ------- Cumulative returns series : pd.Series Example: - 2015-07-16 09:30:00 -0.012143 - 2015-07-16 12:30:00 0.012546 - 2015-07-17 09:30:00 0.045350 - 2015-07-17 12:30:00 0.065897 - 2015-07-20 09:30:00 0.030957 + 2015-01-05 1.001310 + 2015-01-06 1.000805 + 2015-01-07 1.001092 + 2015-01-08 0.999200 """ - if not isinstance(period, pd.Timedelta): - period = pd.Timedelta(period) + return ep.cum_returns(returns, starting_value=1) - if freq is None: - freq = returns.index.freq - - if freq is None: - freq = BDay() - warnings.warn("'freq' not set, using business day calendar", - UserWarning) - - # - # returns index contains factor computation timestamps, then add returns - # timestamps too (factor timestamps + period) and save them to 'full_idx' - # Cumulative returns will use 'full_idx' index,because we want a cumulative - # returns value for each entry in 'full_idx' - # - trades_idx = returns.index.copy() - returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq) - full_idx = trades_idx.union(returns_idx) - - # - # Build N sub_returns from the single returns Series. Each sub_retuns - # stream will contain non-overlapping returns. - # In the next step we'll compute the portfolio returns averaging the - # returns happening on those overlapping returns streams - # - sub_returns = [] - while len(trades_idx) > 0: - - # - # select non-overlapping returns starting with first timestamp in index - # - sub_index = [] - next = trades_idx.min() - while next <= trades_idx.max(): - sub_index.append(next) - next = utils.add_custom_calendar_timedelta(next, period, freq) - # make sure to fetch the next available entry after 'period' - try: - i = trades_idx.get_loc(next, method='bfill') - next = trades_idx[i] - except KeyError: - break - - sub_index = pd.DatetimeIndex(sub_index, tz=full_idx.tz) - subret = returns[sub_index] - - # make the index to have all entries in 'full_idx' - subret = subret.reindex(full_idx) - - # - # compute intermediate returns values for each index in subret that are - # in between the timestaps at which the factors are computed and the - # timestamps at which the 'period' returns actually happen - # - for pret_idx in reversed(sub_index): - - pret = subret[pret_idx] - - # get all timestamps between factor computation and period returns - pret_end_idx = \ - utils.add_custom_calendar_timedelta(pret_idx, period, freq) - slice = subret[(subret.index > pret_idx) & ( - subret.index <= pret_end_idx)].index - - if pd.isnull(pret): - continue - - def rate_of_returns(ret, period): - return ((np.nansum(ret) + 1)**(1. / period)) - 1 - - # compute intermediate 'period' returns values, note that this also - # moves the final 'period' returns value from trading timestamp to - # trading timestamp + 'period' - for slice_idx in slice: - sub_period = utils.diff_custom_calendar_timedeltas( - pret_idx, slice_idx, freq) - subret[slice_idx] = rate_of_returns(pret, period / sub_period) - - subret[pret_idx] = np.nan - - # transform returns as percentage change from previous value - subret[slice[1:]] = (subret[slice] + 1).pct_change()[slice[1:]] - - sub_returns.append(subret) - trades_idx = trades_idx.difference(sub_index) - - # - # Compute portfolio cumulative returns averaging the returns happening on - # overlapping returns streams. - # - sub_portfolios = pd.concat(sub_returns, axis=1) - portfolio = pd.Series(index=sub_portfolios.index) - - for i, (index, row) in enumerate(sub_portfolios.iterrows()): - - # check the active portfolios, count() returns non-nans elements - active_subfolios = row.count() - - # fill forward portfolio value - portfolio.iloc[i] = portfolio.iloc[i - 1] if i > 0 else 1. - - if active_subfolios <= 0: - continue - - # current portfolio is the average of active sub_portfolios - portfolio.iloc[i] *= (row + 1).mean(skipna=True) - - return portfolio def positions(weights, period, freq=None): @@ -709,7 +568,7 @@ def compute_mean_returns_spread(mean_returns, def quantile_turnover(quantile_factor, quantile, period=1): """ - Computes the proportion of names in a factor quantile that were + Computes the daily proportion of names in a factor quantile that were not in that quantile in the previous period. Parameters @@ -718,10 +577,8 @@ def quantile_turnover(quantile_factor, quantile, period=1): DataFrame with date, asset and factor quantile. quantile : int Quantile on which to perform turnover analysis. - period: string or int, optional - Period over which to calculate the turnover. If it is a string it must - follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m', - '3h', '1D1h', etc). + period: int, optional + Number of days over which to calculate the turnover. Returns ------- quant_turnover : pd.Series @@ -732,14 +589,7 @@ def quantile_turnover(quantile_factor, quantile, period=1): quant_name_sets = quant_names.groupby(level=['date']).apply( lambda x: set(x.index.get_level_values('asset'))) - if isinstance(period, int): - name_shifted = quant_name_sets.shift(period) - else: - shifted_idx = utils.add_custom_calendar_timedelta( - quant_name_sets.index, -pd.Timedelta(period), - quantile_factor.index.levels[0].freq) - name_shifted = quant_name_sets.reindex(shifted_idx) - name_shifted.index = quant_name_sets.index + name_shifted = quant_name_sets.shift(period) new_names = (quant_name_sets - name_shifted).dropna() quant_turnover = new_names.apply( @@ -765,10 +615,8 @@ def factor_rank_autocorrelation(factor_data, period=1): each period, the factor quantile/bin that factor value belongs to, and (optionally) the group the asset belongs to. - See full explanation in utils.get_clean_factor_and_forward_returns - period: string or int, optional - Period over which to calculate the turnover. If it is a string it must - follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m', - '3h', '1D1h', etc). + period: int, optional + Number of days over which to calculate the turnover. Returns ------- autocorr : pd.Series @@ -785,14 +633,7 @@ def factor_rank_autocorrelation(factor_data, period=1): columns='asset', values='factor') - if isinstance(period, int): - asset_shifted = asset_factor_rank.shift(period) - else: - shifted_idx = utils.add_custom_calendar_timedelta( - asset_factor_rank.index, -pd.Timedelta(period), - factor_data.index.levels[0].freq) - asset_shifted = asset_factor_rank.reindex(shifted_idx) - asset_shifted.index = asset_factor_rank.index + asset_shifted = asset_factor_rank.shift(period) autocorr = asset_factor_rank.corrwith(asset_shifted, axis=1) autocorr.name = period @@ -800,7 +641,7 @@ def factor_rank_autocorrelation(factor_data, period=1): def common_start_returns(factor, - prices, + returns, before, after, cumulative=False, @@ -845,10 +686,8 @@ def common_start_returns(factor, index: -before to after """ - if cumulative: - returns = prices - else: - returns = prices.pct_change(axis=0) + if not cumulative: + returns = returns.apply(cumulative_returns, axis=0) all_returns = [] @@ -893,7 +732,7 @@ def common_start_returns(factor, def average_cumulative_return_by_quantile(factor_data, - prices, + returns, periods_before=10, periods_after=15, demeaned=True, @@ -952,16 +791,18 @@ def average_cumulative_return_by_quantile(factor_data, --------------------------------------------------- """ - def cumulative_return(q_fact, demean_by): - return common_start_returns(q_fact, prices, + def cumulative_return_around_event(q_fact, demean_by): + return common_start_returns(q_fact, returns, periods_before, periods_after, True, True, demean_by) def average_cumulative_return(q_fact, demean_by): - q_returns = cumulative_return(q_fact, demean_by) - return pd.DataFrame({'mean': q_returns.mean(axis=1), - 'std': q_returns.std(axis=1)}).T + q_returns = cumulative_return_around_event(q_fact, demean_by) + q_returns.replace([np.inf, -np.inf], np.nan, inplace=True) + + return pd.DataFrame({'mean': q_returns.mean(skipna=True, axis=1), + 'std': q_returns.std(skipna=True, axis=1)}).T if by_group: # diff --git a/alphalens/plotting.py b/alphalens/plotting.py index 96cf7a04..0c8e3f66 100644 --- a/alphalens/plotting.py +++ b/alphalens/plotting.py @@ -152,11 +152,11 @@ def plot_turnover_table(autocorrelation_data, quantile_turnover): for period in sorted(quantile_turnover.keys()): for quantile, p_data in quantile_turnover[period].iteritems(): turnover_table.loc["Quantile {} Mean Turnover ".format(quantile), - "{}".format(period)] = p_data.mean() + "{}D".format(period)] = p_data.mean() auto_corr = pd.DataFrame() for period, p_data in autocorrelation_data.iteritems(): auto_corr.loc["Mean Factor Rank Autocorrelation", - "{}".format(period)] = p_data.mean() + "{}D".format(period)] = p_data.mean() print("Turnover Analysis") utils.print_table(turnover_table.apply(lambda x: x.round(3))) @@ -607,7 +607,7 @@ def plot_factor_rank_auto_correlation(factor_autocorrelation, if ax is None: f, ax = plt.subplots(1, 1, figsize=(18, 6)) - factor_autocorrelation.plot(title='{} Period Factor Rank Autocorrelation' + factor_autocorrelation.plot(title='{}D Period Factor Rank Autocorrelation' .format(period), ax=ax) ax.set(ylabel='Autocorrelation Coefficient', xlabel='') ax.axhline(0.0, linestyle='-', color='black', lw=1) @@ -646,7 +646,7 @@ def plot_top_bottom_quantile_turnover(quantile_turnover, period=1, ax=None): turnover = pd.DataFrame() turnover['top quantile turnover'] = quantile_turnover[max_quantile] turnover['bottom quantile turnover'] = quantile_turnover[min_quantile] - turnover.plot(title='{} Period Top and Bottom Quantile Turnover' + turnover.plot(title='{}D Period Top and Bottom Quantile Turnover' .format(period), ax=ax, alpha=0.6, lw=0.8) ax.set(ylabel='Proportion Of Names New To Quantile', xlabel="") @@ -711,7 +711,11 @@ def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None): return ax -def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None): +def plot_cumulative_returns(factor_returns, + period, + freq=None, + title=None, + ax=None): """ Plots the cumulative returns of the returns series passed in. @@ -720,7 +724,7 @@ def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None): factor_returns : pd.Series Period wise returns of dollar neutral portfolio weighted by factor value. - period: pandas.Timedelta or string + period : pandas.Timedelta or string Length of period for which the returns are computed (e.g. 1 day) if 'period' is a string it must follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc) @@ -742,7 +746,7 @@ def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None): if ax is None: f, ax = plt.subplots(1, 1, figsize=(18, 6)) - factor_returns = perf.cumulative_returns(factor_returns, period, freq) + factor_returns = perf.cumulative_returns(factor_returns) factor_returns.plot(ax=ax, lw=3, color='forestgreen', alpha=0.6) ax.set(ylabel='Cumulative Returns', @@ -756,7 +760,7 @@ def plot_cumulative_returns(factor_returns, period, freq, title=None, ax=None): def plot_cumulative_returns_by_quantile(quantile_returns, period, - freq, + freq=None, ax=None): """ Plots the cumulative returns of various factor quantiles. @@ -765,7 +769,7 @@ def plot_cumulative_returns_by_quantile(quantile_returns, ---------- quantile_returns : pd.DataFrame Returns by factor quantile - period: pandas.Timedelta or string + period : pandas.Timedelta or string Length of period for which the returns are computed (e.g. 1 day) if 'period' is a string it must follow pandas.Timedelta constructor format (e.g. '1 days', '1D', '30m', '3h', '1D1h', etc) @@ -787,7 +791,9 @@ def plot_cumulative_returns_by_quantile(quantile_returns, ret_wide = quantile_returns.unstack('factor_quantile') - cum_ret = ret_wide.apply(perf.cumulative_returns, period=period, freq=freq) + cum_ret = ret_wide.apply(perf.cumulative_returns) + + cum_ret = cum_ret.loc[:, ::-1] # we want negative quantiles as 'red' cum_ret.plot(lw=2, ax=ax, cmap=cm.coolwarm) diff --git a/alphalens/tears.py b/alphalens/tears.py index 57094f19..7b8bae64 100644 --- a/alphalens/tears.py +++ b/alphalens/tears.py @@ -191,6 +191,7 @@ def create_returns_tear_sheet(factor_data, If True, display graphs separately for each group. """ + factor_returns = perf.factor_returns(factor_data, long_short, group_neutral) @@ -257,28 +258,28 @@ def create_returns_tear_sheet(factor_data, UserWarning ) - for p in factor_returns: - + # Compute cumulative returns from daily simple returns, if '1D' + # returns are provided. + if '1D' in factor_returns: title = ('Factor Weighted ' + ('Group Neutral ' if group_neutral else '') + ('Long/Short ' if long_short else '') - + "Portfolio Cumulative Return ({} Period)".format(p)) + + 'Portfolio Cumulative Return (1D Period)') plotting.plot_cumulative_returns( - factor_returns[p], - period=p, - freq=trading_calendar, + factor_returns['1D'], + period='1D', title=title, ax=gf.next_row() ) plotting.plot_cumulative_returns_by_quantile( - mean_quant_ret_bydate[p], - period=p, - freq=trading_calendar, + mean_quant_ret_bydate['1D'], + period='1D', ax=gf.next_row() ) + ax_mean_quantile_returns_spread_ts = [gf.next_row() for x in range(fr_cols)] plotting.plot_mean_quantile_returns_spread_time_series( @@ -404,8 +405,15 @@ def create_turnover_tear_sheet(factor_data, turnover_periods=None): """ if turnover_periods is None: - turnover_periods = utils.get_forward_returns_columns( - factor_data.columns) + input_periods = utils.get_forward_returns_columns( + factor_data.columns, + require_exact_day_multiple=True, + ) + turnover_periods = list( + map( + (lambda x: pd.Timedelta(x).days), input_periods.get_values() + ) + ) quantile_factor = factor_data['factor_quantile'] @@ -481,6 +489,7 @@ def create_full_tear_sheet(factor_data, If True, display graphs separately for each group. """ + plotting.plot_quantile_statistics_table(factor_data) create_returns_tear_sheet(factor_data, long_short, @@ -496,7 +505,7 @@ def create_full_tear_sheet(factor_data, @plotting.customize def create_event_returns_tear_sheet(factor_data, - prices, + returns, avgretplot=(5, 15), long_short=True, group_neutral=False, @@ -537,7 +546,7 @@ def create_event_returns_tear_sheet(factor_data, avg_cumulative_returns = \ perf.average_cumulative_return_by_quantile( factor_data, - prices, + returns, periods_before=before, periods_after=after, demeaned=long_short, @@ -575,7 +584,7 @@ def create_event_returns_tear_sheet(factor_data, avg_cumret_by_group = \ perf.average_cumulative_return_by_quantile( factor_data, - prices, + returns, periods_before=before, periods_after=after, demeaned=long_short, @@ -597,7 +606,7 @@ def create_event_returns_tear_sheet(factor_data, @plotting.customize def create_event_study_tear_sheet(factor_data, - prices=None, + returns, avgretplot=(5, 15), rate_of_ret=True, n_bars=50): @@ -637,10 +646,10 @@ def create_event_study_tear_sheet(factor_data, plt.show() gf.close() - if prices is not None and avgretplot is not None: + if returns is not None and avgretplot is not None: create_event_returns_tear_sheet(factor_data=factor_data, - prices=prices, + returns=returns, avgretplot=avgretplot, long_short=long_short, group_neutral=False, @@ -648,8 +657,8 @@ def create_event_study_tear_sheet(factor_data, by_group=False) factor_returns = perf.factor_returns(factor_data, - demeaned=False, - equal_weight=True) + demeaned=False, + equal_weight=True) mean_quant_ret, std_quantile = \ perf.mean_return_by_quantile(factor_data, @@ -673,7 +682,7 @@ def create_event_study_tear_sheet(factor_data, fr_cols = len(factor_returns.columns) vertical_sections = 2 + fr_cols * 1 - gf = GridFigure(rows=vertical_sections, cols=1) + gf = GridFigure(rows=vertical_sections + 1, cols=1) plotting.plot_quantile_returns_bar(mean_quant_ret, by_group=False, @@ -692,13 +701,20 @@ def create_event_study_tear_sheet(factor_data, UserWarning ) - for p in factor_returns: + if '1D' in factor_returns: plotting.plot_cumulative_returns( - factor_returns[p], - period=p, + factor_returns['1D'], + period='1D', freq=trading_calendar, - ax=gf.next_row() + ax=gf.next_row(), + ) + + plotting.plot_cumulative_returns( + factor_returns['1D'], + period='1D', + freq=trading_calendar, + ax=gf.next_row(), ) plt.show() diff --git a/alphalens/utils.py b/alphalens/utils.py index 9421a0a0..ad0de1fa 100644 --- a/alphalens/utils.py +++ b/alphalens/utils.py @@ -16,6 +16,7 @@ import pandas as pd import numpy as np import re +import warnings from IPython.display import display from pandas.tseries.offsets import CustomBusinessDay, Day, BusinessDay @@ -342,6 +343,37 @@ def compute_forward_returns(factor, return df +def backshift_returns_series(series, N): + """Shift a multi-indexed series backwards by N observations in the first level. + + This can be used to convert backward-looking returns into a forward-returns series. + """ + ix = series.index + dates, sids = ix.levels + date_labels, sid_labels = map(np.array, ix.labels) + + # Output date labels will contain the all but the last N dates. + new_dates = dates[:-N] + + # Output data will remove the first M rows, where M is the index of the + # last record with one of the first N dates. + cutoff = date_labels.searchsorted(N) + new_date_labels = date_labels[cutoff:] - N + new_sid_labels = sid_labels[cutoff:] + new_values = series.values[cutoff:] + + assert new_date_labels[0] == 0 + + new_index = pd.MultiIndex( + levels=[new_dates, sids], + labels=[new_date_labels, new_sid_labels], + sortorder=1, + names=ix.names, + ) + + return pd.Series(data=new_values, index=new_index) + + def demean_forward_returns(factor_data, grouper=None): """ Convert forward returns to returns relative to mean @@ -558,6 +590,7 @@ def get_clean_factor(factor, factor_copy = factor.copy() factor_copy.index = factor_copy.index.rename(['date', 'asset']) + factor_copy = factor_copy[np.isfinite(factor_copy)] merged_data = forward_returns.copy() merged_data['factor'] = factor_copy @@ -639,7 +672,8 @@ def get_clean_factor_and_forward_returns(factor, groupby_labels=None, max_loss=0.35, zero_aware=False, - cumulative_returns=True): + cumulative_returns=True, + is_returns=False): """ Formats the factor data, pricing data, and group mappings into a DataFrame that contains aligned MultiIndex indices of timestamp and asset. The @@ -785,9 +819,14 @@ def get_clean_factor_and_forward_returns(factor, -------------------------------------------------------- """ - forward_returns = compute_forward_returns(factor, prices, periods, - filter_zscore, - cumulative_returns) + if not is_returns: + forward_returns = compute_forward_returns(factor, prices, periods, + filter_zscore, + cumulative_returns) + else: + forward_returns = prices + forward_returns.index.levels[0].name = "date" + forward_returns.index.levels[1].name = "asset" factor_data = get_clean_factor(factor, forward_returns, groupby=groupby, groupby_labels=groupby_labels, @@ -852,12 +891,26 @@ def std_conversion(period_std, base_period): return period_std / np.sqrt(conversion_factor) -def get_forward_returns_columns(columns): +def get_forward_returns_columns(columns, require_exact_day_multiple=False): """ Utility that detects and returns the columns that are forward returns """ - pattern = re.compile(r"^(\d+([Dhms]|ms|us|ns))+$", re.IGNORECASE) - valid_columns = [(pattern.match(col) is not None) for col in columns] + + # If exact day multiples are required in the forward return periods, + # drop all other columns (e.g. drop 3D12h). + if require_exact_day_multiple: + pattern = re.compile(r"^(\d+([D]))+$", re.IGNORECASE) + valid_columns = [(pattern.match(col) is not None) for col in columns] + + if sum(valid_columns) < len(valid_columns): + warnings.warn( + "Skipping return periods that aren't exact multiples" \ + + " of days." + ) + else: + pattern = re.compile(r"^(\d+([Dhms]|ms|us|ns]))+$", re.IGNORECASE) + valid_columns = [(pattern.match(col) is not None) for col in columns] + return columns[valid_columns] @@ -963,3 +1016,168 @@ def diff_custom_calendar_timedeltas(start, end, freq): timediff = end - start delta_days = timediff.components.days - actual_days return timediff - pd.Timedelta(days=delta_days) + +def subportfolio_cumulative_returns(returns, period, freq=None): + """ + Builds cumulative returns from 'period' returns. This function simulates + the cumulative effect that a series of gains or losses (the 'returns') + have on an original amount of capital over a period of time. + + if F is the frequency at which returns are computed (e.g. 1 day if + 'returns' contains daily values) and N is the period for which the retuns + are computed (e.g. returns after 1 day, 5 hours or 3 days) then: + - if N <= F the cumulative retuns are trivially computed as Compound Return + - if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the + cumulative returns are computed building and averaging N interleaved sub + portfolios (started at subsequent periods 1,2,..,N) each one rebalancing + every N periods. This correspond to an algorithm which trades the factor + every single time it is computed, which is statistically more robust and + with a lower volatity compared to an algorithm that trades the factor + every N periods and whose returns depend on the specific starting day of + trading. + + Also note that when the factor is not computed at a specific frequency, for + exaple a factor representing a random event, it is not efficient to create + multiples sub-portfolios as it is not certain when the factor will be + traded and this would result in an underleveraged portfolio. In this case + the simulated portfolio is fully invested whenever an event happens and if + a subsequent event occur while the portfolio is still invested in a + previous event then the portfolio is rebalanced and split equally among the + active events. + + Parameters + ---------- + returns: pd.Series + pd.Series containing factor 'period' forward returns, the index + contains timestamps at which the trades are computed and the values + correspond to returns after 'period' time + period: pandas.Timedelta or string + Length of period for which the returns are computed (1 day, 2 mins, + 3 hours etc). It can be a Timedelta or a string in the format accepted + by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc) + freq : pandas DateOffset, optional + Used to specify a particular trading calendar. If not present + returns.index.freq will be used + + Returns + ------- + Cumulative returns series : pd.Series + Example: + 2015-07-16 09:30:00 -0.012143 + 2015-07-16 12:30:00 0.012546 + 2015-07-17 09:30:00 0.045350 + 2015-07-17 12:30:00 0.065897 + 2015-07-20 09:30:00 0.030957 + """ + + if not isinstance(period, pd.Timedelta): + period = pd.Timedelta(period) + + if freq is None: + freq = returns.index.freq + + if freq is None: + freq = BDay() + warnings.warn("'freq' not set, using business day calendar", + UserWarning) + + # + # returns index contains factor computation timestamps, then add returns + # timestamps too (factor timestamps + period) and save them to 'full_idx' + # Cumulative returns will use 'full_idx' index,because we want a cumulative + # returns value for each entry in 'full_idx' + # + trades_idx = returns.index.copy() + returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq) + full_idx = trades_idx.union(returns_idx) + + # + # Build N sub_returns from the single returns Series. Each sub_retuns + # stream will contain non-overlapping returns. + # In the next step we'll compute the portfolio returns averaging the + # returns happening on those overlapping returns streams + # + sub_returns = [] + print(returns.shape) + while len(trades_idx) > 0: + + # + # select non-overlapping returns starting with first timestamp in index + # + sub_index = [] + next = trades_idx.min() + while next <= trades_idx.max(): + sub_index.append(next) + next = utils.add_custom_calendar_timedelta(next, period, freq) + # make sure to fetch the next available entry after 'period' + try: + i = trades_idx.get_loc(next, method='bfill') + next = trades_idx[i] + except KeyError: + break + + sub_index = pd.DatetimeIndex(sub_index, tz=full_idx.tz) + subret = returns[sub_index] + + # make the index to have all entries in 'full_idx' + subret = subret.reindex(full_idx) + + # + # compute intermediate returns values for each index in subret that are + # in between the timestaps at which the factors are computed and the + # timestamps at which the 'period' returns actually happen + # + for pret_idx in reversed(sub_index): + + pret = subret[pret_idx] + + # get all timestamps between factor computation and period returns + pret_end_idx = \ + utils.add_custom_calendar_timedelta(pret_idx, period, freq) + slice = subret[(subret.index > pret_idx) & ( + subret.index <= pret_end_idx)].index + + if pd.isnull(pret): + continue + + def rate_of_returns(ret, period): + return ((np.nansum(ret) + 1)**(1. / period)) - 1 + + # compute intermediate 'period' returns values, note that this also + # moves the final 'period' returns value from trading timestamp to + # trading timestamp + 'period' + for slice_idx in slice: + sub_period = utils.diff_custom_calendar_timedeltas( + pret_idx, slice_idx, freq) + subret[slice_idx] = rate_of_returns(pret, period / sub_period) + + subret[pret_idx] = np.nan + + # transform returns as percentage change from previous value + subret[slice[1:]] = (subret[slice] + 1).pct_change()[slice[1:]] + + sub_returns.append(subret) + trades_idx = trades_idx.difference(sub_index) + + # + # Compute portfolio cumulative returns averaging the returns happening on + # overlapping returns streams. + # + sub_portfolios = pd.concat(sub_returns, axis=1) + portfolio = pd.Series(index=sub_portfolios.index) + + for i, (index, row) in enumerate(sub_portfolios.iterrows()): + + # check the active portfolios, count() returns non-nans elements + active_subfolios = row.count() + + # fill forward portfolio value + portfolio.iloc[i] = portfolio.iloc[i - 1] if i > 0 else 1. + + if active_subfolios <= 0: + continue + + # current portfolio is the average of active sub_portfolios + portfolio.iloc[i] *= (row + 1).mean(skipna=True) + + return portfolio diff --git a/setup.py b/setup.py index 3546dac5..a67d3ffd 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ 'seaborn>=0.6.0', 'statsmodels>=0.6.1', 'IPython>=3.2.3', + 'empyrical==0.5.0', ] extra_reqs = { From d074b39751b414c26bc0772be68c745c8fd14c71 Mon Sep 17 00:00:00 2001 From: Jamie McCorriston Date: Fri, 28 Feb 2020 10:54:16 -0500 Subject: [PATCH 2/4] Addressed Luca's feedback. --- alphalens/performance.py | 2 +- alphalens/tears.py | 16 ---- alphalens/utils.py | 165 --------------------------------------- 3 files changed, 1 insertion(+), 182 deletions(-) diff --git a/alphalens/performance.py b/alphalens/performance.py index d70f0a79..a9fa4eba 100644 --- a/alphalens/performance.py +++ b/alphalens/performance.py @@ -568,7 +568,7 @@ def compute_mean_returns_spread(mean_returns, def quantile_turnover(quantile_factor, quantile, period=1): """ - Computes the daily proportion of names in a factor quantile that were + Computes the proportion of names in a factor quantile that were not in that quantile in the previous period. Parameters diff --git a/alphalens/tears.py b/alphalens/tears.py index 7b8bae64..a806c4eb 100644 --- a/alphalens/tears.py +++ b/alphalens/tears.py @@ -701,21 +701,5 @@ def create_event_study_tear_sheet(factor_data, UserWarning ) - - if '1D' in factor_returns: - plotting.plot_cumulative_returns( - factor_returns['1D'], - period='1D', - freq=trading_calendar, - ax=gf.next_row(), - ) - - plotting.plot_cumulative_returns( - factor_returns['1D'], - period='1D', - freq=trading_calendar, - ax=gf.next_row(), - ) - plt.show() gf.close() diff --git a/alphalens/utils.py b/alphalens/utils.py index ad0de1fa..6322e4f0 100644 --- a/alphalens/utils.py +++ b/alphalens/utils.py @@ -1016,168 +1016,3 @@ def diff_custom_calendar_timedeltas(start, end, freq): timediff = end - start delta_days = timediff.components.days - actual_days return timediff - pd.Timedelta(days=delta_days) - -def subportfolio_cumulative_returns(returns, period, freq=None): - """ - Builds cumulative returns from 'period' returns. This function simulates - the cumulative effect that a series of gains or losses (the 'returns') - have on an original amount of capital over a period of time. - - if F is the frequency at which returns are computed (e.g. 1 day if - 'returns' contains daily values) and N is the period for which the retuns - are computed (e.g. returns after 1 day, 5 hours or 3 days) then: - - if N <= F the cumulative retuns are trivially computed as Compound Return - - if N > F (e.g. F 1 day, and N is 3 days) then the returns overlap and the - cumulative returns are computed building and averaging N interleaved sub - portfolios (started at subsequent periods 1,2,..,N) each one rebalancing - every N periods. This correspond to an algorithm which trades the factor - every single time it is computed, which is statistically more robust and - with a lower volatity compared to an algorithm that trades the factor - every N periods and whose returns depend on the specific starting day of - trading. - - Also note that when the factor is not computed at a specific frequency, for - exaple a factor representing a random event, it is not efficient to create - multiples sub-portfolios as it is not certain when the factor will be - traded and this would result in an underleveraged portfolio. In this case - the simulated portfolio is fully invested whenever an event happens and if - a subsequent event occur while the portfolio is still invested in a - previous event then the portfolio is rebalanced and split equally among the - active events. - - Parameters - ---------- - returns: pd.Series - pd.Series containing factor 'period' forward returns, the index - contains timestamps at which the trades are computed and the values - correspond to returns after 'period' time - period: pandas.Timedelta or string - Length of period for which the returns are computed (1 day, 2 mins, - 3 hours etc). It can be a Timedelta or a string in the format accepted - by Timedelta constructor ('1 days', '1D', '30m', '3h', '1D1h', etc) - freq : pandas DateOffset, optional - Used to specify a particular trading calendar. If not present - returns.index.freq will be used - - Returns - ------- - Cumulative returns series : pd.Series - Example: - 2015-07-16 09:30:00 -0.012143 - 2015-07-16 12:30:00 0.012546 - 2015-07-17 09:30:00 0.045350 - 2015-07-17 12:30:00 0.065897 - 2015-07-20 09:30:00 0.030957 - """ - - if not isinstance(period, pd.Timedelta): - period = pd.Timedelta(period) - - if freq is None: - freq = returns.index.freq - - if freq is None: - freq = BDay() - warnings.warn("'freq' not set, using business day calendar", - UserWarning) - - # - # returns index contains factor computation timestamps, then add returns - # timestamps too (factor timestamps + period) and save them to 'full_idx' - # Cumulative returns will use 'full_idx' index,because we want a cumulative - # returns value for each entry in 'full_idx' - # - trades_idx = returns.index.copy() - returns_idx = utils.add_custom_calendar_timedelta(trades_idx, period, freq) - full_idx = trades_idx.union(returns_idx) - - # - # Build N sub_returns from the single returns Series. Each sub_retuns - # stream will contain non-overlapping returns. - # In the next step we'll compute the portfolio returns averaging the - # returns happening on those overlapping returns streams - # - sub_returns = [] - print(returns.shape) - while len(trades_idx) > 0: - - # - # select non-overlapping returns starting with first timestamp in index - # - sub_index = [] - next = trades_idx.min() - while next <= trades_idx.max(): - sub_index.append(next) - next = utils.add_custom_calendar_timedelta(next, period, freq) - # make sure to fetch the next available entry after 'period' - try: - i = trades_idx.get_loc(next, method='bfill') - next = trades_idx[i] - except KeyError: - break - - sub_index = pd.DatetimeIndex(sub_index, tz=full_idx.tz) - subret = returns[sub_index] - - # make the index to have all entries in 'full_idx' - subret = subret.reindex(full_idx) - - # - # compute intermediate returns values for each index in subret that are - # in between the timestaps at which the factors are computed and the - # timestamps at which the 'period' returns actually happen - # - for pret_idx in reversed(sub_index): - - pret = subret[pret_idx] - - # get all timestamps between factor computation and period returns - pret_end_idx = \ - utils.add_custom_calendar_timedelta(pret_idx, period, freq) - slice = subret[(subret.index > pret_idx) & ( - subret.index <= pret_end_idx)].index - - if pd.isnull(pret): - continue - - def rate_of_returns(ret, period): - return ((np.nansum(ret) + 1)**(1. / period)) - 1 - - # compute intermediate 'period' returns values, note that this also - # moves the final 'period' returns value from trading timestamp to - # trading timestamp + 'period' - for slice_idx in slice: - sub_period = utils.diff_custom_calendar_timedeltas( - pret_idx, slice_idx, freq) - subret[slice_idx] = rate_of_returns(pret, period / sub_period) - - subret[pret_idx] = np.nan - - # transform returns as percentage change from previous value - subret[slice[1:]] = (subret[slice] + 1).pct_change()[slice[1:]] - - sub_returns.append(subret) - trades_idx = trades_idx.difference(sub_index) - - # - # Compute portfolio cumulative returns averaging the returns happening on - # overlapping returns streams. - # - sub_portfolios = pd.concat(sub_returns, axis=1) - portfolio = pd.Series(index=sub_portfolios.index) - - for i, (index, row) in enumerate(sub_portfolios.iterrows()): - - # check the active portfolios, count() returns non-nans elements - active_subfolios = row.count() - - # fill forward portfolio value - portfolio.iloc[i] = portfolio.iloc[i - 1] if i > 0 else 1. - - if active_subfolios <= 0: - continue - - # current portfolio is the average of active sub_portfolios - portfolio.iloc[i] *= (row + 1).mean(skipna=True) - - return portfolio From 96a1c7e0d27958c3cfc34c073e7f5dd1c64a8261 Mon Sep 17 00:00:00 2001 From: Gerry Manoim Date: Tue, 10 Mar 2020 14:08:02 -0400 Subject: [PATCH 3/4] Style --- alphalens/performance.py | 7 +- alphalens/plotting.py | 7 +- alphalens/tears.py | 517 ++++++++++++++++++++------------------- alphalens/utils.py | 12 +- 4 files changed, 284 insertions(+), 259 deletions(-) diff --git a/alphalens/performance.py b/alphalens/performance.py index a9fa4eba..b8465b84 100644 --- a/alphalens/performance.py +++ b/alphalens/performance.py @@ -346,13 +346,12 @@ def cumulative_returns(returns): 2015-01-05 1.001310 2015-01-06 1.000805 2015-01-07 1.001092 - 2015-01-08 0.999200 + 2015-01-08 0.999200 """ return ep.cum_returns(returns, starting_value=1) - def positions(weights, period, freq=None): """ Builds net position values time series, the portfolio percentage invested @@ -845,7 +844,9 @@ def average_cumulative_return(q_fact, demean_by): all_returns = [] for group, g_data in factor_data.groupby('group'): g_fq = g_data['factor_quantile'] - avgcumret = g_fq.groupby(g_fq).apply(cumulative_return, g_fq) + avgcumret = g_fq.groupby(g_fq).apply( + cumulative_return_around_event, g_fq + ) all_returns.append(avgcumret) q_returns = pd.concat(all_returns, axis=1) q_returns = pd.DataFrame({'mean': q_returns.mean(axis=1), diff --git a/alphalens/plotting.py b/alphalens/plotting.py index 0c8e3f66..5c2ec548 100644 --- a/alphalens/plotting.py +++ b/alphalens/plotting.py @@ -711,10 +711,10 @@ def plot_monthly_ic_heatmap(mean_monthly_ic, ax=None): return ax -def plot_cumulative_returns(factor_returns, +def plot_cumulative_returns(factor_returns, period, - freq=None, - title=None, + freq=None, + title=None, ax=None): """ Plots the cumulative returns of the returns series passed in. @@ -793,7 +793,6 @@ def plot_cumulative_returns_by_quantile(quantile_returns, cum_ret = ret_wide.apply(perf.cumulative_returns) - cum_ret = cum_ret.loc[:, ::-1] # we want negative quantiles as 'red' cum_ret.plot(lw=2, ax=ax, cmap=cm.coolwarm) diff --git a/alphalens/tears.py b/alphalens/tears.py index a806c4eb..5b0f2a99 100644 --- a/alphalens/tears.py +++ b/alphalens/tears.py @@ -59,9 +59,9 @@ def close(self): @plotting.customize -def create_summary_tear_sheet(factor_data, - long_short=True, - group_neutral=False): +def create_summary_tear_sheet( + factor_data, long_short=True, group_neutral=False +): """ Creates a small summary tear sheet with returns, information, and turnover analysis. @@ -83,43 +83,45 @@ def create_summary_tear_sheet(factor_data, """ # Returns Analysis - mean_quant_ret, std_quantile = \ - perf.mean_return_by_quantile(factor_data, - by_group=False, - demeaned=long_short, - group_adjust=group_neutral) - - mean_quant_rateret = \ - mean_quant_ret.apply(utils.rate_of_return, axis=0, - base_period=mean_quant_ret.columns[0]) - - mean_quant_ret_bydate, std_quant_daily = \ - perf.mean_return_by_quantile(factor_data, - by_date=True, - by_group=False, - demeaned=long_short, - group_adjust=group_neutral) + mean_quant_ret, std_quantile = perf.mean_return_by_quantile( + factor_data, + by_group=False, + demeaned=long_short, + group_adjust=group_neutral, + ) + + mean_quant_rateret = mean_quant_ret.apply( + utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] + ) + + mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile( + factor_data, + by_date=True, + by_group=False, + demeaned=long_short, + group_adjust=group_neutral, + ) mean_quant_rateret_bydate = mean_quant_ret_bydate.apply( utils.rate_of_return, axis=0, - base_period=mean_quant_ret_bydate.columns[0] + base_period=mean_quant_ret_bydate.columns[0], ) compstd_quant_daily = std_quant_daily.apply( - utils.std_conversion, axis=0, - base_period=std_quant_daily.columns[0] + utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0] ) - alpha_beta = perf.factor_alpha_beta(factor_data, - demeaned=long_short, - group_adjust=group_neutral) + alpha_beta = perf.factor_alpha_beta( + factor_data, demeaned=long_short, group_adjust=group_neutral + ) mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread( mean_quant_rateret_bydate, - factor_data['factor_quantile'].max(), - factor_data['factor_quantile'].min(), - std_err=compstd_quant_daily) + factor_data["factor_quantile"].max(), + factor_data["factor_quantile"].min(), + std_err=compstd_quant_daily, + ) periods = utils.get_forward_returns_columns(factor_data.columns) @@ -129,31 +131,42 @@ def create_summary_tear_sheet(factor_data, plotting.plot_quantile_statistics_table(factor_data) - plotting.plot_returns_table(alpha_beta, - mean_quant_rateret, - mean_ret_spread_quant) + plotting.plot_returns_table( + alpha_beta, mean_quant_rateret, mean_ret_spread_quant + ) - plotting.plot_quantile_returns_bar(mean_quant_rateret, - by_group=False, - ylim_percentiles=None, - ax=gf.next_row()) + plotting.plot_quantile_returns_bar( + mean_quant_rateret, + by_group=False, + ylim_percentiles=None, + ax=gf.next_row(), + ) # Information Analysis ic = perf.factor_information_coefficient(factor_data) plotting.plot_information_table(ic) # Turnover Analysis - quantile_factor = factor_data['factor_quantile'] + quantile_factor = factor_data["factor_quantile"] - quantile_turnover = \ - {p: pd.concat([perf.quantile_turnover(quantile_factor, q, p) - for q in range(1, int(quantile_factor.max()) + 1)], - axis=1) - for p in periods} + quantile_turnover = { + p: pd.concat( + [ + perf.quantile_turnover(quantile_factor, q, p) + for q in range(1, int(quantile_factor.max()) + 1) + ], + axis=1, + ) + for p in periods + } autocorrelation = pd.concat( - [perf.factor_rank_autocorrelation(factor_data, period) for period in - periods], axis=1) + [ + perf.factor_rank_autocorrelation(factor_data, period) + for period in periods + ], + axis=1, + ) plotting.plot_turnover_table(autocorrelation, quantile_turnover) @@ -162,10 +175,9 @@ def create_summary_tear_sheet(factor_data, @plotting.customize -def create_returns_tear_sheet(factor_data, - long_short=True, - group_neutral=False, - by_group=False): +def create_returns_tear_sheet( + factor_data, long_short=True, group_neutral=False, by_group=False +): """ Creates a tear sheet for returns analysis of a factor. @@ -191,140 +203,150 @@ def create_returns_tear_sheet(factor_data, If True, display graphs separately for each group. """ + factor_returns = perf.factor_returns( + factor_data, long_short, group_neutral + ) - factor_returns = perf.factor_returns(factor_data, - long_short, - group_neutral) - - mean_quant_ret, std_quantile = \ - perf.mean_return_by_quantile(factor_data, - by_group=False, - demeaned=long_short, - group_adjust=group_neutral) + mean_quant_ret, std_quantile = perf.mean_return_by_quantile( + factor_data, + by_group=False, + demeaned=long_short, + group_adjust=group_neutral, + ) - mean_quant_rateret = \ - mean_quant_ret.apply(utils.rate_of_return, axis=0, - base_period=mean_quant_ret.columns[0]) + mean_quant_rateret = mean_quant_ret.apply( + utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] + ) - mean_quant_ret_bydate, std_quant_daily = \ - perf.mean_return_by_quantile(factor_data, - by_date=True, - by_group=False, - demeaned=long_short, - group_adjust=group_neutral) + mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile( + factor_data, + by_date=True, + by_group=False, + demeaned=long_short, + group_adjust=group_neutral, + ) mean_quant_rateret_bydate = mean_quant_ret_bydate.apply( - utils.rate_of_return, axis=0, - base_period=mean_quant_ret_bydate.columns[0] + utils.rate_of_return, + axis=0, + base_period=mean_quant_ret_bydate.columns[0], ) - compstd_quant_daily = \ - std_quant_daily.apply(utils.std_conversion, axis=0, - base_period=std_quant_daily.columns[0]) + compstd_quant_daily = std_quant_daily.apply( + utils.std_conversion, axis=0, base_period=std_quant_daily.columns[0] + ) - alpha_beta = perf.factor_alpha_beta(factor_data, - factor_returns, - long_short, - group_neutral) + alpha_beta = perf.factor_alpha_beta( + factor_data, factor_returns, long_short, group_neutral + ) - mean_ret_spread_quant, std_spread_quant = \ - perf.compute_mean_returns_spread(mean_quant_rateret_bydate, - factor_data['factor_quantile'].max(), - factor_data['factor_quantile'].min(), - std_err=compstd_quant_daily) + mean_ret_spread_quant, std_spread_quant = perf.compute_mean_returns_spread( + mean_quant_rateret_bydate, + factor_data["factor_quantile"].max(), + factor_data["factor_quantile"].min(), + std_err=compstd_quant_daily, + ) fr_cols = len(factor_returns.columns) vertical_sections = 2 + fr_cols * 3 gf = GridFigure(rows=vertical_sections, cols=1) - plotting.plot_returns_table(alpha_beta, - mean_quant_rateret, - mean_ret_spread_quant) + plotting.plot_returns_table( + alpha_beta, mean_quant_rateret, mean_ret_spread_quant + ) - plotting.plot_quantile_returns_bar(mean_quant_rateret, - by_group=False, - ylim_percentiles=None, - ax=gf.next_row()) + plotting.plot_quantile_returns_bar( + mean_quant_rateret, + by_group=False, + ylim_percentiles=None, + ax=gf.next_row(), + ) - plotting.plot_quantile_returns_violin(mean_quant_rateret_bydate, - ylim_percentiles=(1, 99), - ax=gf.next_row()) + plotting.plot_quantile_returns_violin( + mean_quant_rateret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row() + ) trading_calendar = factor_data.index.levels[0].freq if trading_calendar is None: trading_calendar = pd.tseries.offsets.BDay() warnings.warn( "'freq' not set in factor_data index: assuming business day", - UserWarning + UserWarning, ) # Compute cumulative returns from daily simple returns, if '1D' # returns are provided. - if '1D' in factor_returns: - title = ('Factor Weighted ' - + ('Group Neutral ' if group_neutral else '') - + ('Long/Short ' if long_short else '') - + 'Portfolio Cumulative Return (1D Period)') + if "1D" in factor_returns: + title = ( + "Factor Weighted " + + ("Group Neutral " if group_neutral else "") + + ("Long/Short " if long_short else "") + + "Portfolio Cumulative Return (1D Period)" + ) plotting.plot_cumulative_returns( - factor_returns['1D'], - period='1D', - title=title, - ax=gf.next_row() + factor_returns["1D"], period="1D", title=title, ax=gf.next_row() ) plotting.plot_cumulative_returns_by_quantile( - mean_quant_ret_bydate['1D'], - period='1D', - ax=gf.next_row() + mean_quant_ret_bydate["1D"], period="1D", ax=gf.next_row() ) - - ax_mean_quantile_returns_spread_ts = [gf.next_row() - for x in range(fr_cols)] + ax_mean_quantile_returns_spread_ts = [ + gf.next_row() for x in range(fr_cols) + ] plotting.plot_mean_quantile_returns_spread_time_series( mean_ret_spread_quant, std_err=std_spread_quant, bandwidth=0.5, - ax=ax_mean_quantile_returns_spread_ts + ax=ax_mean_quantile_returns_spread_ts, ) plt.show() gf.close() if by_group: - mean_return_quantile_group, mean_return_quantile_group_std_err = \ - perf.mean_return_by_quantile(factor_data, - by_date=False, - by_group=True, - demeaned=long_short, - group_adjust=group_neutral) + ( + mean_return_quantile_group, + mean_return_quantile_group_std_err, + ) = perf.mean_return_by_quantile( + factor_data, + by_date=False, + by_group=True, + demeaned=long_short, + group_adjust=group_neutral, + ) mean_quant_rateret_group = mean_return_quantile_group.apply( - utils.rate_of_return, axis=0, - base_period=mean_return_quantile_group.columns[0] + utils.rate_of_return, + axis=0, + base_period=mean_return_quantile_group.columns[0], ) - num_groups = len(mean_quant_rateret_group.index - .get_level_values('group').unique()) + num_groups = len( + mean_quant_rateret_group.index.get_level_values("group").unique() + ) vertical_sections = 1 + (((num_groups - 1) // 2) + 1) gf = GridFigure(rows=vertical_sections, cols=2) - ax_quantile_returns_bar_by_group = [gf.next_cell() - for _ in range(num_groups)] - plotting.plot_quantile_returns_bar(mean_quant_rateret_group, - by_group=True, - ylim_percentiles=(5, 95), - ax=ax_quantile_returns_bar_by_group) + ax_quantile_returns_bar_by_group = [ + gf.next_cell() for _ in range(num_groups) + ] + plotting.plot_quantile_returns_bar( + mean_quant_rateret_group, + by_group=True, + ylim_percentiles=(5, 95), + ax=ax_quantile_returns_bar_by_group, + ) plt.show() gf.close() @plotting.customize -def create_information_tear_sheet(factor_data, - group_neutral=False, - by_group=False): +def create_information_tear_sheet( + factor_data, group_neutral=False, by_group=False +): """ Creates a tear sheet for information analysis of a factor. @@ -348,7 +370,7 @@ def create_information_tear_sheet(factor_data, columns_wide = 2 fr_cols = len(ic.columns) - rows_when_wide = (((fr_cols - 1) // columns_wide) + 1) + rows_when_wide = ((fr_cols - 1) // columns_wide) + 1 vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols gf = GridFigure(rows=vertical_sections, cols=columns_wide) @@ -361,20 +383,21 @@ def create_information_tear_sheet(factor_data, if not by_group: - mean_monthly_ic = \ - perf.mean_information_coefficient(factor_data, - group_adjust=group_neutral, - by_group=False, - by_time="M") + mean_monthly_ic = perf.mean_information_coefficient( + factor_data, + group_adjust=group_neutral, + by_group=False, + by_time="M", + ) ax_monthly_ic_heatmap = [gf.next_cell() for x in range(fr_cols)] - plotting.plot_monthly_ic_heatmap(mean_monthly_ic, - ax=ax_monthly_ic_heatmap) + plotting.plot_monthly_ic_heatmap( + mean_monthly_ic, ax=ax_monthly_ic_heatmap + ) if by_group: - mean_group_ic = \ - perf.mean_information_coefficient(factor_data, - group_adjust=group_neutral, - by_group=True) + mean_group_ic = perf.mean_information_coefficient( + factor_data, group_adjust=group_neutral, by_group=True + ) plotting.plot_ic_by_group(mean_group_ic, ax=gf.next_row()) @@ -406,16 +429,13 @@ def create_turnover_tear_sheet(factor_data, turnover_periods=None): if turnover_periods is None: input_periods = utils.get_forward_returns_columns( - factor_data.columns, - require_exact_day_multiple=True, + factor_data.columns, require_exact_day_multiple=True, ) turnover_periods = list( - map( - (lambda x: pd.Timedelta(x).days), input_periods.get_values() - ) + map((lambda x: pd.Timedelta(x).days), input_periods.get_values()) ) - quantile_factor = factor_data['factor_quantile'] + quantile_factor = factor_data["factor_quantile"] quantile_turnover = { p: pd.concat( @@ -429,40 +449,43 @@ def create_turnover_tear_sheet(factor_data, turnover_periods=None): } autocorrelation = pd.concat( - [perf.factor_rank_autocorrelation(factor_data, period) for period in - turnover_periods], axis=1) + [ + perf.factor_rank_autocorrelation(factor_data, period) + for period in turnover_periods + ], + axis=1, + ) plotting.plot_turnover_table(autocorrelation, quantile_turnover) fr_cols = len(turnover_periods) columns_wide = 1 - rows_when_wide = (((fr_cols - 1) // 1) + 1) + rows_when_wide = ((fr_cols - 1) // 1) + 1 vertical_sections = fr_cols + 3 * rows_when_wide + 2 * fr_cols gf = GridFigure(rows=vertical_sections, cols=columns_wide) for period in turnover_periods: if quantile_turnover[period].isnull().all().all(): continue - plotting.plot_top_bottom_quantile_turnover(quantile_turnover[period], - period=period, - ax=gf.next_row()) + plotting.plot_top_bottom_quantile_turnover( + quantile_turnover[period], period=period, ax=gf.next_row() + ) for period in autocorrelation: if autocorrelation[period].isnull().all(): continue - plotting.plot_factor_rank_auto_correlation(autocorrelation[period], - period=period, - ax=gf.next_row()) + plotting.plot_factor_rank_auto_correlation( + autocorrelation[period], period=period, ax=gf.next_row() + ) plt.show() gf.close() @plotting.customize -def create_full_tear_sheet(factor_data, - long_short=True, - group_neutral=False, - by_group=False): +def create_full_tear_sheet( + factor_data, long_short=True, group_neutral=False, by_group=False +): """ Creates a full tear sheet for analysis and evaluating single return predicting (alpha) factor. @@ -489,28 +512,26 @@ def create_full_tear_sheet(factor_data, If True, display graphs separately for each group. """ - plotting.plot_quantile_statistics_table(factor_data) - create_returns_tear_sheet(factor_data, - long_short, - group_neutral, - by_group, - set_context=False) - create_information_tear_sheet(factor_data, - group_neutral, - by_group, - set_context=False) + create_returns_tear_sheet( + factor_data, long_short, group_neutral, by_group, set_context=False + ) + create_information_tear_sheet( + factor_data, group_neutral, by_group, set_context=False + ) create_turnover_tear_sheet(factor_data, set_context=False) @plotting.customize -def create_event_returns_tear_sheet(factor_data, - returns, - avgretplot=(5, 15), - long_short=True, - group_neutral=False, - std_bar=True, - by_group=False): +def create_event_returns_tear_sheet( + factor_data, + returns, + avgretplot=(5, 15), + long_short=True, + group_neutral=False, + std_bar=True, + by_group=False, +): """ Creates a tear sheet to view the average cumulative returns for a factor within a window (pre and post event). @@ -543,73 +564,76 @@ def create_event_returns_tear_sheet(factor_data, before, after = avgretplot - avg_cumulative_returns = \ - perf.average_cumulative_return_by_quantile( - factor_data, - returns, - periods_before=before, - periods_after=after, - demeaned=long_short, - group_adjust=group_neutral) + avg_cumulative_returns = perf.average_cumulative_return_by_quantile( + factor_data, + returns, + periods_before=before, + periods_after=after, + demeaned=long_short, + group_adjust=group_neutral, + ) - num_quantiles = int(factor_data['factor_quantile'].max()) + num_quantiles = int(factor_data["factor_quantile"].max()) vertical_sections = 1 if std_bar: - vertical_sections += (((num_quantiles - 1) // 2) + 1) + vertical_sections += ((num_quantiles - 1) // 2) + 1 cols = 2 if num_quantiles != 1 else 1 gf = GridFigure(rows=vertical_sections, cols=cols) - plotting.plot_quantile_average_cumulative_return(avg_cumulative_returns, - by_quantile=False, - std_bar=False, - ax=gf.next_row()) + plotting.plot_quantile_average_cumulative_return( + avg_cumulative_returns, + by_quantile=False, + std_bar=False, + ax=gf.next_row(), + ) if std_bar: - ax_avg_cumulative_returns_by_q = [gf.next_cell() - for _ in range(num_quantiles)] + ax_avg_cumulative_returns_by_q = [ + gf.next_cell() for _ in range(num_quantiles) + ] plotting.plot_quantile_average_cumulative_return( avg_cumulative_returns, by_quantile=True, std_bar=True, - ax=ax_avg_cumulative_returns_by_q) + ax=ax_avg_cumulative_returns_by_q, + ) plt.show() gf.close() if by_group: - groups = factor_data['group'].unique() + groups = factor_data["group"].unique() num_groups = len(groups) vertical_sections = ((num_groups - 1) // 2) + 1 gf = GridFigure(rows=vertical_sections, cols=2) - avg_cumret_by_group = \ - perf.average_cumulative_return_by_quantile( - factor_data, - returns, - periods_before=before, - periods_after=after, - demeaned=long_short, - group_adjust=group_neutral, - by_group=True) - - for group, avg_cumret in avg_cumret_by_group.groupby(level='group'): - avg_cumret.index = avg_cumret.index.droplevel('group') + avg_cumret_by_group = perf.average_cumulative_return_by_quantile( + factor_data, + returns, + periods_before=before, + periods_after=after, + demeaned=long_short, + group_adjust=group_neutral, + by_group=True, + ) + + for group, avg_cumret in avg_cumret_by_group.groupby(level="group"): + avg_cumret.index = avg_cumret.index.droplevel("group") plotting.plot_quantile_average_cumulative_return( avg_cumret, by_quantile=False, std_bar=False, title=group, - ax=gf.next_cell()) + ax=gf.next_cell(), + ) plt.show() gf.close() @plotting.customize -def create_event_study_tear_sheet(factor_data, - returns, - avgretplot=(5, 15), - rate_of_ret=True, - n_bars=50): +def create_event_study_tear_sheet( + factor_data, returns, avgretplot=(5, 15), rate_of_ret=True, n_bars=50 +): """ Creates an event study tear sheet for analysis of a specific event. @@ -640,65 +664,64 @@ def create_event_study_tear_sheet(factor_data, plotting.plot_quantile_statistics_table(factor_data) gf = GridFigure(rows=1, cols=1) - plotting.plot_events_distribution(events=factor_data['factor'], - num_bars=n_bars, - ax=gf.next_row()) + plotting.plot_events_distribution( + events=factor_data["factor"], num_bars=n_bars, ax=gf.next_row() + ) plt.show() gf.close() if returns is not None and avgretplot is not None: - create_event_returns_tear_sheet(factor_data=factor_data, - returns=returns, - avgretplot=avgretplot, - long_short=long_short, - group_neutral=False, - std_bar=True, - by_group=False) - - factor_returns = perf.factor_returns(factor_data, - demeaned=False, - equal_weight=True) - - mean_quant_ret, std_quantile = \ - perf.mean_return_by_quantile(factor_data, - by_group=False, - demeaned=long_short) + create_event_returns_tear_sheet( + factor_data=factor_data, + returns=returns, + avgretplot=avgretplot, + long_short=long_short, + group_neutral=False, + std_bar=True, + by_group=False, + ) + + factor_returns = perf.factor_returns( + factor_data, demeaned=False, equal_weight=True + ) + + mean_quant_ret, std_quantile = perf.mean_return_by_quantile( + factor_data, by_group=False, demeaned=long_short + ) if rate_of_ret: - mean_quant_ret = \ - mean_quant_ret.apply(utils.rate_of_return, axis=0, - base_period=mean_quant_ret.columns[0]) - - mean_quant_ret_bydate, std_quant_daily = \ - perf.mean_return_by_quantile(factor_data, - by_date=True, - by_group=False, - demeaned=long_short) + mean_quant_ret = mean_quant_ret.apply( + utils.rate_of_return, axis=0, base_period=mean_quant_ret.columns[0] + ) + + mean_quant_ret_bydate, std_quant_daily = perf.mean_return_by_quantile( + factor_data, by_date=True, by_group=False, demeaned=long_short + ) if rate_of_ret: mean_quant_ret_bydate = mean_quant_ret_bydate.apply( - utils.rate_of_return, axis=0, - base_period=mean_quant_ret_bydate.columns[0] + utils.rate_of_return, + axis=0, + base_period=mean_quant_ret_bydate.columns[0], ) fr_cols = len(factor_returns.columns) vertical_sections = 2 + fr_cols * 1 gf = GridFigure(rows=vertical_sections + 1, cols=1) - plotting.plot_quantile_returns_bar(mean_quant_ret, - by_group=False, - ylim_percentiles=None, - ax=gf.next_row()) + plotting.plot_quantile_returns_bar( + mean_quant_ret, by_group=False, ylim_percentiles=None, ax=gf.next_row() + ) - plotting.plot_quantile_returns_violin(mean_quant_ret_bydate, - ylim_percentiles=(1, 99), - ax=gf.next_row()) + plotting.plot_quantile_returns_violin( + mean_quant_ret_bydate, ylim_percentiles=(1, 99), ax=gf.next_row() + ) trading_calendar = factor_data.index.levels[0].freq if trading_calendar is None: trading_calendar = pd.tseries.offsets.BDay() warnings.warn( "'freq' not set in factor_data index: assuming business day", - UserWarning + UserWarning, ) plt.show() diff --git a/alphalens/utils.py b/alphalens/utils.py index 6322e4f0..1a343904 100644 --- a/alphalens/utils.py +++ b/alphalens/utils.py @@ -344,9 +344,11 @@ def compute_forward_returns(factor, def backshift_returns_series(series, N): - """Shift a multi-indexed series backwards by N observations in the first level. - - This can be used to convert backward-looking returns into a forward-returns series. + """Shift a multi-indexed series backwards by N observations in + the first level. + + This can be used to convert backward-looking returns into a + forward-returns series. """ ix = series.index dates, sids = ix.levels @@ -901,10 +903,10 @@ def get_forward_returns_columns(columns, require_exact_day_multiple=False): if require_exact_day_multiple: pattern = re.compile(r"^(\d+([D]))+$", re.IGNORECASE) valid_columns = [(pattern.match(col) is not None) for col in columns] - + if sum(valid_columns) < len(valid_columns): warnings.warn( - "Skipping return periods that aren't exact multiples" \ + "Skipping return periods that aren't exact multiples" + " of days." ) else: From 32cfed87e826ff62f00cf1194c77bc67c52b541b Mon Sep 17 00:00:00 2001 From: David Michalowicz Date: Mon, 20 Apr 2020 13:03:06 -0400 Subject: [PATCH 4/4] MAINT: Test fixes + miscellaneous cleanup --- .../examples/tear_sheet_walk_through.ipynb | 2 +- alphalens/performance.py | 45 +- alphalens/plotting.py | 2 +- alphalens/tears.py | 52 +- alphalens/tests/test_performance.py | 554 ++++++++---------- alphalens/tests/test_tears.py | 24 +- alphalens/utils.py | 41 +- setup.py | 2 +- 8 files changed, 358 insertions(+), 364 deletions(-) diff --git a/alphalens/examples/tear_sheet_walk_through.ipynb b/alphalens/examples/tear_sheet_walk_through.ipynb index 8b86405a..318f09df 100644 --- a/alphalens/examples/tear_sheet_walk_through.ipynb +++ b/alphalens/examples/tear_sheet_walk_through.ipynb @@ -1674,7 +1674,7 @@ "outputs": [], "source": [ "quantile_factor = factor_data['factor_quantile']\n", - "turnover_period = '1D'" + "turnover_period = 1" ] }, { diff --git a/alphalens/performance.py b/alphalens/performance.py index b8465b84..1b51c02c 100644 --- a/alphalens/performance.py +++ b/alphalens/performance.py @@ -578,6 +578,7 @@ def quantile_turnover(quantile_factor, quantile, period=1): Quantile on which to perform turnover analysis. period: int, optional Number of days over which to calculate the turnover. + Returns ------- quant_turnover : pd.Series @@ -616,14 +617,13 @@ def factor_rank_autocorrelation(factor_data, period=1): - See full explanation in utils.get_clean_factor_and_forward_returns period: int, optional Number of days over which to calculate the turnover. + Returns ------- autocorr : pd.Series Rolling 1 period (defined by time_rule) autocorrelation of factor values. - """ - grouper = [factor_data.index.get_level_values('date')] ranks = factor_data.groupby(grouper)['factor'].rank() @@ -658,17 +658,18 @@ def common_start_returns(factor, factor : pd.DataFrame DataFrame with at least date and equity as index, the columns are irrelevant - prices : pd.DataFrame - A wide form Pandas DataFrame indexed by date with assets - in the columns. Pricing data should span the factor - analysis time period plus/minus an additional buffer window - corresponding to after/before period parameters. + returns : pd.DataFrame + A wide form Pandas DataFrame indexed by date with assets in the + columns. Returns data should span the factor analysis time period + plus/minus an additional buffer window corresponding to after/before + period parameters. before: How many returns to load before factor date after: How many returns to load after factor date cumulative: bool, optional - Return cumulative returns + Whether or not the given returns are cumulative. If False the given + returns are assumed to be daily. mean_by_date: bool, optional If True, compute mean returns for each date and return that instead of a return series for each asset @@ -684,7 +685,6 @@ def common_start_returns(factor, Dataframe containing returns series for each factor aligned to the same index: -before to after """ - if not cumulative: returns = returns.apply(cumulative_returns, axis=0) @@ -714,9 +714,6 @@ def common_start_returns(factor, series.index = range(starting_index - day_zero_index, ending_index - day_zero_index) - if cumulative: - series = (series / series.loc[0, :]) - 1 - if demean_by is not None: mean = series.loc[:, demean_equities].mean(axis=1) series = series.loc[:, equities] @@ -749,11 +746,11 @@ def average_cumulative_return_by_quantile(factor_data, each period, the factor quantile/bin that factor value belongs to, and (optionally) the group the asset belongs to. - See full explanation in utils.get_clean_factor_and_forward_returns - prices : pd.DataFrame - A wide form Pandas DataFrame indexed by date with assets - in the columns. Pricing data should span the factor - analysis time period plus/minus an additional buffer window - corresponding to periods_after/periods_before parameters. + returns : pd.DataFrame + A wide form Pandas DataFrame indexed by date with assets in the + columns. Returns data should span the factor analysis time period + plus/minus an additional buffer window corresponding to periods_after/ + periods_before parameters. periods_before : int, optional How many periods before factor to plot periods_after : int, optional @@ -765,6 +762,7 @@ def average_cumulative_return_by_quantile(factor_data, neutral portfolio) by_group : bool If True, compute cumulative returns separately for each group + Returns ------- cumulative returns and std deviation : pd.DataFrame @@ -791,10 +789,15 @@ def average_cumulative_return_by_quantile(factor_data, """ def cumulative_return_around_event(q_fact, demean_by): - return common_start_returns(q_fact, returns, - periods_before, - periods_after, - True, True, demean_by) + return common_start_returns( + q_fact, + returns, + periods_before, + periods_after, + cumulative=True, + mean_by_date=True, + demean_by=demean_by, + ) def average_cumulative_return(q_fact, demean_by): q_returns = cumulative_return_around_event(q_fact, demean_by) diff --git a/alphalens/plotting.py b/alphalens/plotting.py index 5c2ec548..4cecd6b6 100644 --- a/alphalens/plotting.py +++ b/alphalens/plotting.py @@ -629,7 +629,7 @@ def plot_top_bottom_quantile_turnover(quantile_turnover, period=1, ax=None): quantile_turnover: pd.Dataframe Quantile turnover (each DataFrame column a quantile). period: int, optional - Period over which to calculate the turnover + Period over which to calculate the turnover. ax : matplotlib.Axes, optional Axes upon which to plot. diff --git a/alphalens/tears.py b/alphalens/tears.py index 5b0f2a99..5616dd1e 100644 --- a/alphalens/tears.py +++ b/alphalens/tears.py @@ -124,6 +124,7 @@ def create_summary_tear_sheet( ) periods = utils.get_forward_returns_columns(factor_data.columns) + periods = list(map(lambda p: pd.Timedelta(p).days, periods)) fr_cols = len(periods) vertical_sections = 2 + fr_cols * 3 @@ -430,9 +431,11 @@ def create_turnover_tear_sheet(factor_data, turnover_periods=None): if turnover_periods is None: input_periods = utils.get_forward_returns_columns( factor_data.columns, require_exact_day_multiple=True, - ) - turnover_periods = list( - map((lambda x: pd.Timedelta(x).days), input_periods.get_values()) + ).get_values() + turnover_periods = utils.timedelta_strings_to_integers(input_periods) + else: + turnover_periods = utils.timedelta_strings_to_integers( + turnover_periods, ) quantile_factor = factor_data["factor_quantile"] @@ -483,9 +486,10 @@ def create_turnover_tear_sheet(factor_data, turnover_periods=None): @plotting.customize -def create_full_tear_sheet( - factor_data, long_short=True, group_neutral=False, by_group=False -): +def create_full_tear_sheet(factor_data, + long_short=True, + group_neutral=False, + by_group=False): """ Creates a full tear sheet for analysis and evaluating single return predicting (alpha) factor. @@ -523,15 +527,13 @@ def create_full_tear_sheet( @plotting.customize -def create_event_returns_tear_sheet( - factor_data, - returns, - avgretplot=(5, 15), - long_short=True, - group_neutral=False, - std_bar=True, - by_group=False, -): +def create_event_returns_tear_sheet(factor_data, + returns, + avgretplot=(5, 15), + long_short=True, + group_neutral=False, + std_bar=True, + by_group=False): """ Creates a tear sheet to view the average cumulative returns for a factor within a window (pre and post event). @@ -544,9 +546,9 @@ def create_event_returns_tear_sheet( quantile/bin that factor value belongs to and (optionally) the group the asset belongs to. - See full explanation in utils.get_clean_factor_and_forward_returns - prices : pd.DataFrame - A DataFrame indexed by date with assets in the columns containing the - pricing data. + returns : pd.DataFrame + A DataFrame indexed by date with assets in the columns containing daily + returns. - See full explanation in utils.get_clean_factor_and_forward_returns avgretplot: tuple (int, int) - (before, after) If not None, plot quantile average cumulative returns @@ -631,9 +633,11 @@ def create_event_returns_tear_sheet( @plotting.customize -def create_event_study_tear_sheet( - factor_data, returns, avgretplot=(5, 15), rate_of_ret=True, n_bars=50 -): +def create_event_study_tear_sheet(factor_data, + returns, + avgretplot=(5, 15), + rate_of_ret=True, + n_bars=50): """ Creates an event study tear sheet for analysis of a specific event. @@ -644,9 +648,9 @@ def create_event_study_tear_sheet( containing the values for a single event, forward returns for each period, the factor quantile/bin that factor value belongs to, and (optionally) the group the asset belongs to. - prices : pd.DataFrame, required only if 'avgretplot' is provided - A DataFrame indexed by date with assets in the columns containing the - pricing data. + returns : pd.DataFrame, required only if 'avgretplot' is provided + A DataFrame indexed by date with assets in the columns containing daily + returns. - See full explanation in utils.get_clean_factor_and_forward_returns avgretplot: tuple (int, int) - (before, after), optional If not None, plot event style average cumulative returns within a diff --git a/alphalens/tests/test_performance.py b/alphalens/tests/test_performance.py index aa1d2f2c..0d244854 100644 --- a/alphalens/tests/test_performance.py +++ b/alphalens/tests/test_performance.py @@ -266,85 +266,85 @@ def test_mean_return_by_quantile(self, [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 4.0, '1D', + '1B', 4.0, 1, [nan, 1.0, 1.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 4.0, '1D', + '1D', 4.0, 1, [nan, 1.0, 1.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 4.0, '2D', + '1B', 4.0, 2, [nan, nan, 0.0, 1.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 4.0, '2D', + '1D', 4.0, 2, [nan, nan, 0.0, 1.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 4.0, '3D', + '1B', 4.0, 3, [nan, nan, nan, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 4.0, '3D', + '1D', 4.0, 3, [nan, nan, nan, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 3.0, '1D', + '1B', 3.0, 1, [nan, 0.0, 0.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 3.0, '1D', + '1D', 3.0, 1, [nan, 0.0, 0.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 3.0, '2D', + '1B', 3.0, 2, [nan, nan, 0.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 3.0, '2D', + '1D', 3.0, 2, [nan, nan, 0.0, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 3.0, '3D', + '1B', 3.0, 3, [nan, nan, nan, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 3.0, '3D', + '1D', 3.0, 3, [nan, nan, nan, 0.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0]], - '1B', 2.0, '1D', + '1B', 2.0, 1, [nan, 1.0, 1.0, 1.0]), ([[1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0]], - '1D', 2.0, '1D', + '1D', 2.0, 1, [nan, 1.0, 1.0, 1.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 3.0, 2.0, 4.0], @@ -358,7 +358,7 @@ def test_mean_return_by_quantile(self, [1.0, 3.0, 2.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 3.0, 2.0, 4.0]], - '1B', 3.0, '4D', + '1B', 3.0, 4, [nan, nan, nan, nan, 0., 0., 0., 0., 0., 0., 0., 0.]), @@ -374,7 +374,7 @@ def test_mean_return_by_quantile(self, [1.0, 3.0, 2.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 3.0, 2.0, 4.0]], - '1D', 3.0, '4D', + '1D', 3.0, 4, [nan, nan, nan, nan, 0., 0., 0., 0., 0., 0., 0., 0.]), @@ -390,7 +390,7 @@ def test_mean_return_by_quantile(self, [1.0, 3.0, 2.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', 3.0, '10D', + '1B', 3.0, 10, [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 0., 1.]), @@ -406,7 +406,7 @@ def test_mean_return_by_quantile(self, [1.0, 3.0, 2.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', 3.0, '10D', + '1D', 3.0, 10, [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 0., 1.]) @@ -612,130 +612,67 @@ def test_factor_alpha_beta(self, fwd_return_vals, alpha, beta): assert_frame_equal(ab, expected) - @parameterized.expand([([1.0, 0.5, 1.0, 0.5, 0.5], - '1D', '1D', - [1.0, 2.0, 3.0, 6.0, 9.0, 13.50]), - ([1.0, 0.5, 1.0, 0.5, 0.5], - '1D', '45m', - [1., 2., 2., 3., 3.0, 6.0, 6.0, 9.0, 9.0, 13.50]), - ([0.1, 0.1, 0.1, 0.1, 0.1], - '1D', '1D', - [1.0, 1.1, 1.21, 1.331, 1.4641, 1.61051]), - ([-0.1, -0.1, -0.1, -0.1, -0.1], - '1D', '1D', - [1.0, 0.9, 0.81, 0.729, 0.6561, 0.59049]), - ([1.0, 0.5, 1.0, 0.5, 0.5], - '1B', '1D', - [1.0, 2.0, 3.0, 6.0, 9.0, 13.50]), - ([1.0, 0.5, 1.0, 0.5, 0.5], - '1B', '45m', - [1., 2., 2., 3., 3.0, 6.0, 6.0, 9.0, 9.0, 13.50]), - ([0.1, 0.1, 0.1, 0.1, 0.1], - '1B', '1D', - [1.0, 1.1, 1.21, 1.331, 1.4641, 1.61051]), - ([-0.1, -0.1, -0.1, -0.1, -0.1], - '1B', '1D', - [1.0, 0.9, 0.81, 0.729, 0.6561, 0.59049]), - ([1.0, 0.5, 1.0, 0.5, 0.5], - '1CD', '1D', - [1.0, 2.0, 3.0, 6.0, 9.0, 13.50]), - ([1.0, 0.5, 1.0, 0.5, 0.5], - '1CD', '45m', - [1., 2., 2., 3., 3.0, 6.0, 6.0, 9.0, 9.0, 13.50]), - ([0.1, 0.1, 0.1, 0.1, 0.1], - '1CD', '1D', - [1.0, 1.1, 1.21, 1.331, 1.4641, 1.61051]), - ([-0.1, -0.1, -0.1, -0.1, -0.1], - '1CD', '1D', - [1.0, 0.9, 0.81, 0.729, 0.6561, 0.59049]), - ([1.0, nan, 0.5, nan, 1.0, nan, 0.5, nan, 0.5], - '20S', '20s', - [1.0, 2., 2., 3., 3.0, 6.0, 6.0, 9.0, 9.0, 13.50]), - ([0.1, 0, 0.1, 0, 0.1, 0, 0.1, 0, 0.1], - '10T', '10m', - [1.0, 1.1, 1.1, 1.21, 1.21, 1.331, 1.331, 1.4641, - 1.4641, 1.61051]), - ([3.0, 0.0, 0.0], - '1H', '2h', - [1.0, 2.0, 3.0, 3.0, 3.0]), - ([1.0, 1.0, 1.0, 1.0, 1.0], - '1H', '2h', - [1.0, 1.4142, 2.0, 2.8284, 4.0, 5.6568, 8.0]), - ([0.1, 0.1, 0.1, 0.1, 0.1], - '1H', '2h', - [1.0, 1.0488, 1.1, 1.15368, 1.21, 1.26905, 1.331]), - ([-0.1, -0.1, -0.1, -0.1, -0.1], - '1T', '2m', - [1.0, 0.94868, 0.9, 0.8538, 0.81, 0.76843, 0.729]), - ([-0.75, -0.75, -0.75, -0.75, -0.75], - '1D', '2D', - [1., 0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625]), - ([-0.75, -0.75, -0.75, -0.75, -0.75], - '1B', '2D', - [1., 0.5, 0.25, 0.125, 0.0625, 0.03125, 0.015625]), - ([3.0, 3.0, 3.0, 3.0, 3.0], - '1D', '2D', - [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]), - ([3.0, 3.0, 3.0, 3.0, 3.0], - '1B', '2D', - [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]), - ([3.0, 3.0, 3.0, 3.0, 3.0], - '1CD', '2D', - [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]), - ([3.0, -0.75, 3.0, -0.75, 3.0], - '1H', '2h', - [1.0, 2.0, 2.5, 3.125, 3.90625, 4.88281, 9.76562]), - ([3.0, -0.75], - '1D', '2D', - [1.0, 2.0, 2.5, 1.25]), - ([3.0, -0.75], - '1B', '2D', - [1.0, 2.0, 2.5, 1.25]), - ([7.0, -0.875, 7.0, -0.875, 7.0], - '1D', '3D', - [1.0, 2.0, 2.5, 3.75, 3.75, 5.625, 7.03125, - 14.0625]), - ([7.0, -0.875, 7.0, -0.875, 7.0], - '1B', '3D', - [1.0, 2.0, 2.5, 3.75, 3.75, 5.625, 7.03125, - 14.0625]), - ([7.0, -0.875, 7.0, -0.875, 7.0], - '1CD', '3D', - [1.0, 2.0, 2.5, 3.75, 3.75, 5.625, 7.03125, - 14.0625]), - ([7.0, -0.875, nan, 7.0, -0.875], - '1D', '3D', - [1.0, 2.0, 2.5, 3.125, 3.90625, 4.88281, 6.10351, - 3.05175]), - ([7.0, -0.875, nan, 7.0, -0.875], - '1B', '3D', - [1.0, 2.0, 2.5, 3.125, 3.90625, 4.88281, 6.10351, - 3.05175]), - ([7.0, nan, nan, -0.875, 7.0, nan, nan, nan, 7.0, - nan, -0.875], - '1H', '3h', - [1.0, 2.0, 4.0, 8.0, 4.0, 5.0, 6.25, 12.5, 12.5, - 25., 50., 62.5, 31.25, 15.625]), - ([15., nan, nan, -0.9375, 15., nan, nan, nan, 15.], - '1D', '4D', - [1.0, 2.0, 4.0, 8.0, 10.0, 12.5, 15.625, 19.53125, - 39.0625, 78.125, 156.25, 312.5, 625.0]), - ([15., nan, nan, -0.9375, 15., nan, nan, nan, 15.], - '1B', '4D', - [1.0, 2.0, 4.0, 8.0, 10.0, 12.5, 15.625, 19.53125, - 39.0625, 78.125, 156.25, 312.5, 625.0]), - ([15.0, -0.9375, 15.0, -0.9375], - '1D', '4D', - [1.0, 2.0, 2.5, 3.75, 4.6875, 4.6875, 5.85937, - 2.92968]), - ([15.0, -0.9375, 15.0, -0.9375], - '1B', '4D', - [1.0, 2.0, 2.5, 3.75, 4.6875, 4.6875, 5.85937, - 2.92968]), - ]) - def test_cumulative_returns(self, returns, ret_freq, period_len, + @parameterized.expand([ + ( + [1.0, 0.5, 1.0, 0.5, 0.5], + '1D', + '1D', + [2.0, 3.0, 6.0, 9.0, 13.50], + ), + ( + [0.1, 0.1, 0.1, 0.1, 0.1], + '1D', + '1D', + [1.1, 1.21, 1.331, 1.4641, 1.61051], + ), + ( + [-0.1, -0.1, -0.1, -0.1, -0.1], + '1D', + '1D', + [0.9, 0.81, 0.729, 0.6561, 0.59049], + ), + ( + [1.0, 0.5, 1.0, 0.5, 0.5], + '1B', + '1D', + [2.0, 3.0, 6.0, 9.0, 13.50], + ), + ( + [0.1, 0.1, 0.1, 0.1, 0.1], + '1B', + '1D', + [1.1, 1.21, 1.331, 1.4641, 1.61051], + ), + ( + [-0.1, -0.1, -0.1, -0.1, -0.1], + '1B', + '1D', + [0.9, 0.81, 0.729, 0.6561, 0.59049], + ), + ( + [1.0, 0.5, 1.0, 0.5, 0.5], + '1CD', + '1D', + [2.0, 3.0, 6.0, 9.0, 13.50], + ), + ( + [0.1, 0.1, 0.1, 0.1, 0.1], + '1CD', + '1D', + [1.1, 1.21, 1.331, 1.4641, 1.61051], + ), + ( + [-0.1, -0.1, -0.1, -0.1, -0.1], + '1CD', + '1D', + [0.9, 0.81, 0.729, 0.6561, 0.59049], + ), + ]) + def test_cumulative_returns(self, + returns, + ret_freq, + period_len, expected_vals): - if 'CD' in ret_freq: ret_freq_class = CDay(weekmask='Tue Wed Thu Fri Sun') ret_freq = ret_freq_class @@ -748,7 +685,7 @@ def test_cumulative_returns(self, returns, ret_freq, period_len, index = date_range('1/1/1999', periods=len(returns), freq=ret_freq) returns = Series(returns, index=index) - cum_ret = cumulative_returns(returns, period_len, ret_freq_class) + cum_ret = cumulative_returns(returns) expected = Series(expected_vals, index=cum_ret.index) @@ -758,25 +695,25 @@ def test_cumulative_returns(self, returns, ret_freq, period_len, [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1B', '1D', + '1B', 1, [nan, 1.0, 1.0, 1.0]), ([[1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0], [1.0, 2.0, 3.0, 4.0]], - '1D', '1D', + '1D', 1, [nan, 1.0, 1.0, 1.0]), ([[4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0]], - '1B', '1D', + '1B', 1, [nan, -1.0, -1.0, -1.0]), ([[4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0], [4.0, 3.0, 2.0, 1.0], [1.0, 2.0, 3.0, 4.0]], - '1D', '1D', + '1D', 1, [nan, -1.0, -1.0, -1.0]), ([[1.0, 2.0, 3.0, 4.0], [2.0, 1.0, 4.0, 3.0], @@ -790,7 +727,7 @@ def test_cumulative_returns(self, returns, ret_freq, period_len, [2.0, 1.0, 4.0, 3.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0]], - '1B', '3D', + '1B', 3, [nan, nan, nan, 1.0, 1.0, 1.0, 0.6, -0.6, -1.0, 1.0, -0.6, -1.0]), @@ -806,7 +743,7 @@ def test_cumulative_returns(self, returns, ret_freq, period_len, [2.0, 1.0, 4.0, 3.0], [2.0, 1.0, 4.0, 3.0], [4.0, 3.0, 2.0, 1.0]], - '1D', '3D', + '1D', 3, [nan, nan, nan, 1.0, 1.0, 1.0, 0.6, -0.6, -1.0, 1.0, -0.6, -1.0]) @@ -835,70 +772,75 @@ def test_factor_rank_autocorrelation(self, assert_series_equal(fa, expected) - @parameterized.expand([(2, 3, False, False, - [[0.075, 0.241868], [0.075, 0.241868], - [0.075, 0.241868], [0.075, 0.241868], - [0.075, 0.241868], [0.075, 0.241868]]), - (3, 2, False, True, - [[0.0, 0.241868], [0.0, 0.241868], - [0.0, 0.241868], [0.0, 0.241868], - [0.0, 0.241868], [0.0, 0.241868]]), - (3, 5, True, False, - [[0.075, 0.0], [0.075, 0.0], [0.075, 0.0], - [0.075, 0.0], [0.075, 0.0], [0.075, 0.0], - [0.075, 0.0], [0.075, 0.0], [0.075, 0.0]]), - (1, 4, True, True, - [[0., 0.], [0., 0.], [0., 0.], - [0., 0.], [0., 0.], [0., 0.]]), - (6, 6, False, False, - [[0.075, 0.243614], [0.075, 0.242861], - [0.075, 0.242301], [0.075, 0.241868], - [0.075, 0.241868], [0.075, 0.241868], - [0.075, 0.241868], [0.075, 0.241868], - [0.075, 0.241868], [0.075, 0.241868], - [0.075, 0.241868], [0.075, 0.242301], - [0.075, 0.242861]]), - (6, 6, False, True, - [[0.0, 0.243614], [0.0, 0.242861], [0.0, 0.242301], - [0.0, 0.241868], [0.0, 0.241868], [0.0, 0.241868], - [0.0, 0.241868], [0.0, 0.241868], [0.0, 0.241868], - [0.0, 0.241868], [0.0, 0.241868], [0.0, 0.242301], - [0.0, 0.242861]]), - (6, 6, True, False, - [[0.075, 0.0], [0.075, 0.0], [0.075, 0.0], - [0.075, 0.0], [0.075, 0.0], [0.075, 0.0], - [0.075, 0.0], [0.075, 0.0], [0.075, 0.0], - [0.075, 0.0], [0.075, 0.0], [0.075, 0.0], - [0.075, 0.0]]), - (6, 6, True, True, - [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], - [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], - [0., 0.], [0., 0.], [0., 0.]]), - ]) - def test_common_start_returns(self, before, after, mean_by_date, demeaned, + @parameterized.expand([ + ( + 2, 3, False, False, + [[4.93048307, 8.68843922], [6.60404312, 12.22369139], + [8.92068367, 17.1794088], [12.1275523, 24.12861778], + [16.5694159, 33.8740100], [22.7273233, 47.53995233]], + ), + ( + 3, 2, False, True, + [[0.0, 5.63219176], [0.0, 7.96515233], + [0.0, 11.2420646], [0.0, 15.8458720], + [0.0, 22.3134160], [0.0, 31.3970961]], + ), + ( + 3, 5, True, False, + [[3.7228318, 2.6210478], [4.9304831, 3.6296796], [6.6040431, 5.0193734], # noqa + [8.9206837, 6.9404046], [12.127552, 9.6023405], [16.569416, 13.297652], # noqa + [22.727323, 18.434747], [31.272682, 25.584180], [34.358565, 25.497254]], # noqa + ), + ( + 1, 4, True, True, + [[0., 0.], [0., 0.], [0., 0.], + [0., 0.], [0., 0.], [0., 0.]], + ), + ( + 6, 6, False, False, + [[2.02679565, 2.38468223], [2.38769454, 3.22602748], + [2.85413029, 4.36044469], [3.72283181, 6.16462715], + [4.93048307, 8.68843922], [6.60404312, 12.2236914], + [8.92068367, 17.1794088], [12.1275523, 24.1286178], + [16.5694159, 33.8740100], [22.7273233, 47.5399523], + [31.2726821, 66.7013483], [34.3585654, 70.1828776], + [37.9964585, 74.3294620]], + ), + ( + 6, 6, False, True, + [[0.0, 2.20770299], [0.0, 2.95942924], [0.0, 3.97022414], + [0.0, 5.63219176], [0.0, 7.96515233], [0.0, 11.2420646], + [0.0, 15.8458720], [0.0, 22.3134160], [0.0, 31.3970962], + [0.0, 44.1512888], [0.0, 62.0533954], [0.0, 65.8668371], + [0.0, 70.4306483]], + ), + ( + 6, 6, True, False, + [[2.0267957, 0.9562173], [2.3876945, 1.3511898], [2.8541303, 1.8856194], # noqa + [3.7228318, 2.6210478], [4.9304831, 3.6296796], [6.6040431, 5.0193734], # noqa + [8.9206837, 6.9404046], [12.127552, 9.6023405], [16.569416, 13.297652], # noqa + [22.727323, 18.434747], [31.272682, 25.584180], [34.358565, 25.497254], # noqa + [37.996459, 25.198051]], + ), + ( + 6, 6, True, True, + [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], + [0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.], + [0., 0.], [0., 0.], [0., 0.]], + ), + ]) + def test_common_start_returns(self, + before, + after, + mean_by_date, + demeaned, expected_vals): dr = date_range(start='2015-1-17', end='2015-2-2') dr.name = 'date' tickers = ['A', 'B', 'C', 'D'] r1, r2, r3, r4 = (1.20, 1.40, 0.90, 0.80) - prices = DataFrame(index=dr, columns=tickers, - data=[[r1**1, r2**1, r3**1, r4**1], - [r1**2, r2**2, r3**2, r4**2], - [r1**3, r2**3, r3**3, r4**3], - [r1**4, r2**4, r3**4, r4**4], - [r1**5, r2**5, r3**5, r4**5], - [r1**6, r2**6, r3**6, r4**6], - [r1**7, r2**7, r3**7, r4**7], - [r1**8, r2**8, r3**8, r4**8], - [r1**9, r2**9, r3**9, r4**9], - [r1**10, r2**10, r3**10, r4**10], - [r1**11, r2**11, r3**11, r4**11], - [r1**12, r2**12, r3**12, r4**12], - [r1**13, r2**13, r3**13, r4**13], - [r1**14, r2**14, r3**14, r4**14], - [r1**15, r2**15, r3**15, r4**15], - [r1**16, r2**16, r3**16, r4**16], - [r1**17, r2**17, r3**17, r4**17]]) + data = [[r1**i, r2**i, r3**i, r4**i] for i in range(1, 18)] + returns = DataFrame(data=data, index=dr, columns=tickers) dr2 = date_range(start='2015-1-21', end='2015-1-29') factor = DataFrame(index=dr2, columns=tickers, data=[[3, 4, 2, 1], @@ -915,76 +857,89 @@ def test_common_start_returns(self, before, after, mean_by_date, demeaned, cmrt = common_start_returns( factor, - prices, + returns, before, after, - False, - mean_by_date, - factor if demeaned else None) + cumulative=True, + mean_by_date=mean_by_date, + demean_by=factor if demeaned else None, + ) cmrt = DataFrame({'mean': cmrt.mean(axis=1), 'std': cmrt.std(axis=1)}) expected = DataFrame(index=range(-before, after + 1), columns=['mean', 'std'], data=expected_vals) assert_frame_equal(cmrt, expected) - @parameterized.expand([(1, 2, False, 4, - [[1.00, 0.0, -0.50, -0.75], - [0.0, 0.0, 0.0, 0.0], - [0.00, 0.00, 0.00, 0.00], - [0.0, 0.0, 0.0, 0.0], - [-0.20, 0.0, 0.25, 0.5625], - [0.0, 0.0, 0.0, 0.0], - [-0.3333333, 0.0, 0.50, 1.25], - [0.0, 0.0, 0.0, 0.0]]), - (1, 2, True, 4, - [[0.8833333, 0.0, -0.5625, -1.015625], - [0.0, 0.0, 0.0, 0.0], - [-0.1166667, 0.0, -0.0625, -0.265625], - [0.0, 0.0, 0.0, 0.0], - [-0.3166667, 0.0, 0.1875, 0.296875], - [0.0, 0.0, 0.0, 0.0], - [-0.4500000, 0.0, 0.4375, 0.984375], - [0.0, 0.0, 0.0, 0.0]]), - (3, 0, False, 4, - [[7.0, 3.0, 1.0, 0.0], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 0.0], - [-0.488, -0.36, -0.2, 0.0], - [0.0, 0.0, 0.0, 0.0], - [-0.703704, -0.55555555, -0.333333333, 0.0], - [0.0, 0.0, 0.0, 0.0]]), - (0, 3, True, 4, - [[0.0, -0.5625, -1.015625, -1.488281], - [0.0, 0.0, 0.0, 0.0], - [0.0, -0.0625, -0.265625, -0.613281], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.1875, 0.296875, 0.339844], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.4375, 0.984375, 1.761719], - [0.0, 0.0, 0.0, 0.0]]), - (3, 3, False, 2, - [[3.5, 1.5, 0.5, 0.0, -0.25, -0.375, -0.4375], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [-0.595852, -0.457778, -0.266667, 0.0, 0.375, - 0.90625, 1.664062], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]), - (3, 3, True, 2, - [[2.047926, 0.978888, 0.383333, 0.0, -0.3125, - -0.640625, -1.050781], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], - [-2.047926, -0.978888, -0.383333, 0.0, 0.3125, - 0.640625, 1.050781], - [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]]), - ]) - def test_average_cumulative_return_by_quantile(self, before, after, - demeaned, quantiles, + @parameterized.expand([ + ( + 1, 2, False, 4, + [[0.00512695, 0.00256348, 0.00128174, 6.40869e-4], + [0.00579185, 0.00289592, 0.00144796, 7.23981e-4], + [1.00000000, 1.00000000, 1.00000000, 1.00000000], + [0.00000000, 0.00000000, 0.00000000, 0.00000000], + [7.15814531, 8.94768164, 11.1846020, 13.9807526], + [2.93784787, 3.67230984, 4.59038730, 5.73798413], + [39.4519043, 59.1778564, 88.7667847, 133.150177], + [28.3717330, 42.5575995, 63.8363992, 95.7545989]], + ), + ( + 1, 2, True, 4, + [[-11.898667, -17.279462, -25.236885, -37.032252], + [7.82587034, 11.5529583, 17.0996881, 25.3636472], + [-10.903794, -16.282025, -24.238167, -36.032893], + [7.82140124, 11.5507268, 17.0985737, 25.3630906], + [-4.7456488, -8.3343438, -14.053565, -23.052140], + [4.91184665, 7.91180853, 12.5481552, 19.6734224], + [27.5481102, 41.8958311, 63.5286176, 96.1172844], + [20.5510133, 31.0075980, 46.7385910, 70.3923129]], + ), + ( + 3, 0, False, 4, + [[7.0, 3.0, 1.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + [-0.488, -0.36, -0.2, 0.0], + [0.0, 0.0, 0.0, 0.0], + [-0.703704, -0.55555555, -0.333333333, 0.0], + [0.0, 0.0, 0.0, 0.0]], + ), + ( + 0, 3, True, 4, + [[-17.279462, -25.236885, -37.032252, -54.550061], + [11.5529583, 17.0996881, 25.3636472, 37.6887906], + [-16.282025, -24.238167, -36.032893, -53.550382], + [11.5507268, 17.0985737, 25.3630906, 37.6885125], + [-8.3343438, -14.053565, -23.052140, -37.074441], + [7.91180853, 12.5481552, 19.6734224, 30.5748605], + [41.8958311, 63.5286176, 96.1172844, 145.174884], + [31.0075980, 46.7385910, 70.3923129, 105.944230]]), + ( + 3, 3, False, 2, + [[0.5102539, 0.50512695, 0.50256348, 0.50128174, 0.50064087, 0.50032043, 0.50016022], # noqa + [0.0115837, 0.00579185, 0.00289592, 1.44796e-3, 7.23981e-4, 3.61990e-4, 1.80995e-4], # noqa + [11.057696, 16.0138929, 23.3050248, 34.0627690, 49.9756934, 73.5654648, 108.600603], # noqa + [7.2389454, 10.6247239, 15.6450367, 23.1025693, 34.1977045, 50.7264595, 75.3771641]], # noqa + ), + ( + 3, 3, True, 2, + [[-5.273721, -7.754383, -11.40123, -16.78074, -24.73753, -36.53257, -54.05022], # noqa + [3.6239580, 5.3146000, 7.8236356, 11.551843, 17.099131, 25.363369, 37.688652], # noqa + [5.2737212, 7.7543830, 11.401231, 16.780744, 24.737526, 36.532572, 54.050221], # noqa + [3.6239580, 5.3146000, 7.8236356, 11.551843, 17.099131, 25.363369, 37.688652]], # noqa + ), + ]) + def test_average_cumulative_return_by_quantile(self, + before, + after, + demeaned, + quantiles, expected_vals): dr = date_range(start='2015-1-15', end='2015-2-1') dr.name = 'date' tickers = ['A', 'B', 'C', 'D'] r1, r2, r3, r4 = (1.25, 1.50, 1.00, 0.50) data = [[r1**i, r2**i, r3**i, r4**i] for i in range(1, 19)] - prices = DataFrame(index=dr, columns=tickers, data=data) + returns = DataFrame(index=dr, columns=tickers, data=data) dr2 = date_range(start='2015-1-21', end='2015-1-26') dr2.name = 'date' factor = DataFrame( @@ -997,11 +952,11 @@ def test_average_cumulative_return_by_quantile(self, before, after, [3, 4, 2, 1]]).stack() factor_data = get_clean_factor_and_forward_returns( - factor, prices, quantiles=quantiles, periods=range( + factor, returns, quantiles=quantiles, periods=range( 0, after + 1), filter_zscore=False) avgrt = average_cumulative_return_by_quantile( - factor_data, prices, before, after, demeaned) + factor_data, returns, before, after, demeaned) arrays = [] for q in range(1, quantiles + 1): arrays.append((q, 'mean')) @@ -1011,35 +966,44 @@ def test_average_cumulative_return_by_quantile(self, before, after, index=index, columns=range(-before, after + 1), data=expected_vals) assert_frame_equal(avgrt, expected) - @parameterized.expand([(0, 2, False, 4, - [[0.0, -0.50, -0.75], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [0.0, 0.25, 0.5625], - [0.0, 0.0, 0.0], - [0.0, 0.50, 1.25], - [0.0, 0.0, 0.0]]), - (0, 3, True, 4, - [[0.0, -0.5625, -1.015625, -1.488281], - [0.0, 0.0, 0.0, 0.0], - [0.0, -0.0625, -0.265625, -0.613281], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.1875, 0.296875, 0.339844], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.4375, 0.984375, 1.761719], - [0.0, 0.0, 0.0, 0.0]]), - (0, 3, False, 2, - [[0.0, -0.25, -0.375, -0.4375], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.375, 0.90625, 1.664062], - [0.0, 0.0, 0.0, 0.0]]), - (0, 3, True, 2, - [[0.0, -0.3125, -0.640625, -1.050781], - [0.0, 0.0, 0.0, 0.0], - [0.0, 0.3125, 0.640625, 1.050781], - [0.0, 0.0, 0.0, 0.0]]), - ]) + @parameterized.expand([ + ( + 0, 2, False, 4, + [[0.0292969, 0.0146484, 7.32422e-3], + [0.0241851, 0.0120926, 6.04628e-3], + [1.0000000, 1.0000000, 1.00000000], + [0.0000000, 0.0000000, 0.00000000], + [3.5190582, 4.3988228, 5.49852848], + [1.0046375, 1.2557969, 1.56974616], + [10.283203, 15.424805, 23.1372070], + [5.2278892, 7.8418338, 11.7627508]], + ), + ( + 0, 3, True, 4, + [[-3.6785927, -5.1949205, -7.4034407, -10.641996], + [1.57386873, 2.28176590, 3.33616491, 4.90228915], + [-2.7078896, -4.2095690, -6.4107649, -9.6456583], + [1.55205002, 2.27087143, 3.33072273, 4.89956999], + [-0.1888313, -0.8107462, -1.9122365, -3.7724977], + [0.55371389, 1.02143924, 1.76795263, 2.94536298], + [6.57531357, 10.2152357, 15.7264421, 24.0601522], + [3.67596914, 5.57112656, 8.43221341, 12.7447568]], + ), + ( + 0, 3, False, 2, + [[0.51464844, 0.50732422, 0.50366211, 0.50183105], + [0.01209256, 0.00604628, 0.00302314, 0.00151157], + [6.90113068, 9.91181374, 14.3178678, 20.7894856], + [3.11499629, 4.54718783, 6.66416616, 9.80049950]], + ), + ( + 0, 3, True, 2, + [[-3.1932411, -4.7022448, -6.9071028, -10.143827], + [1.56295067, 2.27631715, 3.33344356, 4.90092953], + [3.19324112, 4.70224476, 6.90710282, 10.1438273], + [1.56295067, 2.27631715, 3.33344356, 4.90092953]], + ), + ]) def test_average_cumulative_return_by_quantile_2(self, before, after, demeaned, quantiles, expected_vals): diff --git a/alphalens/tests/test_tears.py b/alphalens/tests/test_tears.py index 5c09a10e..99f09887 100644 --- a/alphalens/tests/test_tears.py +++ b/alphalens/tests/test_tears.py @@ -136,8 +136,8 @@ class TearsTestCase(TestCase): event_factor = DataFrame(index=factor_index, columns=tickers, data=event_data).stack() - all_prices = [prices, bprices, intraday_prices] - all_factors = [factor, bfactor, intraday_factor] + all_prices = [prices, bprices] + all_factors = [factor, bfactor] all_events = [event_factor, bevent_factor] def __localize_prices_and_factor(self, prices, factor, tz): @@ -186,10 +186,12 @@ def test_create_information_tear_sheet( create_information_tear_sheet( factor_data, group_neutral=False, by_group=False) - @parameterized.expand([(2, (2, 3, 6), None, 20), - (4, (1, 2, 3, 7), None, None), - (2, (2, 3, 6), ['1D', '2D'], 20), - (4, (1, 2, 3, 7), ['1D'], None)]) + @parameterized.expand([ + (2, (2, 3, 6), None, 20), + (4, (1, 2, 3, 7), None, None), + (2, (2, 3, 6), ['1D', '2D'], 20), + (4, (1, 2, 3, 7), ['1D'], None), + ]) def test_create_turnover_tear_sheet( self, quantiles, @@ -230,10 +232,12 @@ def test_create_summary_tear_sheet( create_summary_tear_sheet( factor_data, long_short=False, group_neutral=False) - @parameterized.expand([(2, (1, 5, 10), None, None), - (3, (2, 4, 6), 20, 'US/Eastern'), - (4, (1, 8), 20, None), - (4, (1, 2, 3, 7), None, 'US/Eastern')]) + @parameterized.expand([ + (2, (1, 5, 10), None, None), + (3, (2, 4, 6), 20, 'US/Eastern'), + (4, (1, 8), 20, None), + (4, (1, 2, 3, 7), None, 'US/Eastern'), + ]) def test_create_full_tear_sheet( self, quantiles, diff --git a/alphalens/utils.py b/alphalens/utils.py index 1a343904..5b44cf4c 100644 --- a/alphalens/utils.py +++ b/alphalens/utils.py @@ -674,8 +674,7 @@ def get_clean_factor_and_forward_returns(factor, groupby_labels=None, max_loss=0.35, zero_aware=False, - cumulative_returns=True, - is_returns=False): + cumulative_returns=True): """ Formats the factor data, pricing data, and group mappings into a DataFrame that contains aligned MultiIndex indices of timestamp and asset. The @@ -819,16 +818,19 @@ def get_clean_factor_and_forward_returns(factor, -------------------------------------------------------- | LULU |-0.03| 0.05|-0.009| 2.7 | G1 | 2 -------------------------------------------------------- - """ - if not is_returns: - forward_returns = compute_forward_returns(factor, prices, periods, - filter_zscore, - cumulative_returns) - else: - forward_returns = prices - forward_returns.index.levels[0].name = "date" - forward_returns.index.levels[1].name = "asset" + See Also + -------- + utils.get_clean_factor + For use when forward returns are already available. + """ + forward_returns = compute_forward_returns( + factor, + prices, + periods, + filter_zscore, + cumulative_returns, + ) factor_data = get_clean_factor(factor, forward_returns, groupby=groupby, groupby_labels=groupby_labels, @@ -949,6 +951,23 @@ def timedelta_to_string(timedelta): return format +def timedelta_strings_to_integers(sequence): + """ + Converts pandas string representations of timedeltas into integers of days. + + Parameters + ---------- + sequence : iterable + List or array of timedelta string representations, e.g. ['1D', '5D']. + + Returns + ------- + sequence : list + Integer days corresponding to the input sequence, e.g. [1, 5]. + """ + return list(map(lambda x: pd.Timedelta(x).days, sequence)) + + def add_custom_calendar_timedelta(input, timedelta, freq): """ Add timedelta to 'input' taking into consideration custom frequency, which diff --git a/setup.py b/setup.py index a67d3ffd..bad182e8 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ 'seaborn>=0.6.0', 'statsmodels>=0.6.1', 'IPython>=3.2.3', - 'empyrical==0.5.0', + 'empyrical>=0.5.0', ] extra_reqs = {