Source code for hftbacktest.stats.stats

import inspect
from abc import ABC, abstractmethod
from typing import Any, List, Type, Mapping, Literal

import numpy as np
import polars as pl
from numpy.typing import NDArray

from .metrics import (
    Metric,
    SR,
    Sortino,
    Ret,
    MaxDrawdown,
    DailyTradingValue,
    ReturnOverMDD,
    ReturnOverTrade,
    MaxPositionValue, DailyNumberOfTrades
)
from .utils import resample, monthly, daily, hourly


def compute_metrics(
        df: pl.DataFrame,
        metrics: List[Metric | Type[Metric]],
        kwargs: Mapping[str, Any]
) -> Mapping[str, Any]:
    context = {
        'start': df['timestamp'][0],
        'end': df['timestamp'][-1],
    }

    for metric in metrics:
        if isinstance(metric, type):
            sig = inspect.signature(metric.__init__)
            valid_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
            metric = metric(**valid_kwargs)

        ret = metric.compute(df, context)

        for key, value in ret.items():
            context[key] = value

    return context


[docs] class Stats: """ **Example** .. code-block:: python import numpy as np from hftbacktest.stats import LinearAssetRecord asset0_record = np.load('backtest_result.npz')['0'] stats = ( LinearAssetRecord(asset0_record) .resample('10s') .monthly() .stats(book_size=100000) ) stats.summary() stats.plot() """ def __init__(self, entire: pl.DataFrame, splits: List[Mapping[str, Any]], kwargs: Mapping[str, Any]): self.entire = entire self.splits = splits self.kwargs = kwargs
[docs] def summary(self, pretty: bool = False): """ Displays the statistics summary. Args: pretty: Returns the statistics in a pretty-printed format. """ df = pl.DataFrame(self.splits) return df
[docs] def plot(self, price_as_ret: bool = False, backend: Literal['matplotlib', 'holoviews'] = 'matplotlib'): """ Plots the equity curves and positions over time along with the price chart. Args: price_as_ret: Plots the price chart in cumulative returns if set to `True`; otherwise, it plots the price chart in raw price terms. backend: Specifies which plotting library is used to plot the charts. The default is 'matplotlib'. """ if backend == 'matplotlib': self.plot_matplotlib(price_as_ret) elif backend == 'holoviews': return self.plot_holoviews(price_as_ret) else: raise ValueError(f'{backend} is unsupported')
def plot_holoviews(self, price_as_ret: bool = False): import holoviews as hv entire_df = self.entire kwargs = self.kwargs equity = entire_df['equity_wo_fee'] - entire_df['fee'] equity_wo_fee = entire_df['equity_wo_fee'] book_size = kwargs.get('book_size') if book_size is not None: if price_as_ret: equity_plt = hv.Overlay([ hv.Curve( (entire_df['timestamp'], equity / book_size * 100), label='Equity', vdims=['Cumulative Returns (%)'] ), hv.Curve( (entire_df['timestamp'], equity_wo_fee / book_size * 100), label='Equity w/o fee', vdims=['Cumulative Returns (%)'] ), hv.Curve( (entire_df['timestamp'], (entire_df['price'] / entire_df['price'][0] - 1.0) * 100), label='Price', vdims=['Cumulative Returns (%)'] ).opts(alpha=0.2, color='black') ]) else: equity_plt = hv.Overlay([ hv.Curve( (entire_df['timestamp'], equity / book_size * 100), label='Equity', vdims=['Cumulative Returns (%)'] ), hv.Curve( (entire_df['timestamp'], equity_wo_fee / book_size * 100), label='Equity w/o fee', vdims=['Cumulative Returns (%)'] ) ]) * hv.Curve( (entire_df['timestamp'], entire_df['price']), label='Price', vdims=['Price'] ).opts(xlabel='timestamp', alpha=0.2, color='black') else: equity_plt = hv.Overlay([ hv.Curve( (entire_df['timestamp'], equity), label='Equity', vdims=['Equity'] ), hv.Curve( (entire_df['timestamp'], equity_wo_fee), label='Equity w/o fee', vdims=['Equity'] ) ]) * hv.Curve( (entire_df['timestamp'], entire_df['price']), label='Price', vdims=['Price'] ).opts(xlabel='timestamp', alpha=0.2, color='black') px_plt = hv.Curve( (entire_df['timestamp'], entire_df['price']), label='Price', vdims=['Price'] ).opts(xlabel='timestamp', alpha=0.2, color='black') pos_plt = hv.Curve( (entire_df['timestamp'], entire_df['position']), label='Position', vdims=['Position (Qty)'] ) plt1 = equity_plt.opts(yformatter='$%.2f') plt1.opts(multi_y=True, width=1000, height=400, legend_position='right', show_grid=True) plt2 = pos_plt.opts(yformatter='$%d') * px_plt plt2.opts(multi_y=True, width=1000, height=400, legend_position='right', show_grid=True) return (plt1.relabel('Equity') + plt2.relabel('Position')).cols(1) def plot_matplotlib(self, price_as_ret: bool = False): from matplotlib import pyplot as plt fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) fig.subplots_adjust(hspace=0) fig.set_size_inches(10, 6) entire_df = self.entire kwargs = self.kwargs equity = entire_df['equity_wo_fee'] - entire_df['fee'] equity_wo_fee = entire_df['equity_wo_fee'] book_size = kwargs.get('book_size') if book_size is not None: if price_as_ret: ax1.plot(entire_df['timestamp'], equity / book_size * 100) ax1.plot(entire_df['timestamp'], equity_wo_fee / book_size * 100) ax1.plot(entire_df['timestamp'], (entire_df['price'] / entire_df['price'][0] - 1.0) * 100, 'black', alpha=0.2) ax1.set_ylabel('Cumulative Returns (%)') ax1.legend(['Equity', 'Equity w/o fee', 'Price']) else: ax1.plot(entire_df['timestamp'], equity / book_size * 100) ax1.plot(entire_df['timestamp'], equity_wo_fee / book_size * 100) ax1_ = ax1.twinx() ax1_.plot(entire_df['timestamp'], entire_df['price'], 'black', alpha=0.2) ax1.set_ylabel('Cumulative Returns (%)') ax1_.set_ylabel('Price') ax1.legend(['Equity', 'Equity w/o fee']) ax1_.legend(['Price']) else: ax1.plot(entire_df['timestamp'], equity) ax1.plot(entire_df['timestamp'], equity_wo_fee) ax1_ = ax1.twinx() ax1_.plot(entire_df['timestamp'], entire_df['price'], 'black', alpha=0.2) ax1.set_ylabel('Equity') ax1_.set_ylabel('Price') ax1.legend(['Equity', 'Equity w/o fee', 'Price']) ax1_.legend(['Price']) ax1.grid() ax2.plot(entire_df['timestamp'], entire_df['position']) ax2_ = ax2.twinx() ax2_.plot(entire_df['timestamp'], entire_df['price'], 'black', alpha=0.2) ax2.set_ylabel('Position (Qty)') ax2_.set_ylabel('Price') ax2.legend(['Position']) ax2_.legend(['Price']) ax2.grid()
class Record(ABC): DEFAULT_METRICS = ( SR, Sortino, Ret, MaxDrawdown, DailyNumberOfTrades, DailyTradingValue, ReturnOverMDD, ReturnOverTrade, MaxPositionValue ) def __init__(self, data: NDArray | pl.DataFrame): self._contract_size = 1.0 self._time_unit = 'ns' self._frequency = '10s' self._partition = None if isinstance(data, np.ndarray): self.df = pl.DataFrame(data) elif isinstance(data, pl.DataFrame): self.df = data else: raise ValueError def contract_size(self, contract_size: float) -> 'Self': """ Sets the contract size. The default value is `1.0`. Args: contract_size: The asset's contract size. """ self._contract_size = contract_size return self def time_unit(self, time_unit: str) -> 'Self': """ Sets the time unit for converting timestamps in the records to datetime. The default value is `ns`. Args: time_unit: The unit of time of the timesteps since epoch time. This internally uses `Polars`, please see `polars.from_epoch <https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.from_epoch.html>`_ for more details. """ self._time_unit = time_unit return self def resample(self, frequency: str) -> 'Self': """ Sets the resampling frequency for downsampling the record. This could affect the calculation of the metrics related to the sampling interval. Additionally, it reduces the time required for computing the metrics and plotting the charts. The default value is `10s`. Args: frequency: Interval of the window. This internally uses `Polars`, please see `polars.DataFrame.group_by_dynamic <https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.group_by_dynamic.html>`_ for more details. """ self._frequency = frequency return self def monthly(self) -> 'Self': """ Generates monthly statistics. """ self._partition = 'monthly' return self def daily(self) -> 'Self': """ Generates daily statistics. """ self._partition = 'daily' return self @abstractmethod def prepare(self): raise NotImplementedError def stats( self, metrics: List[Metric | Type[Metric]] | None = None, **kwargs: Any ) -> Stats: """ **Examples** .. code-block:: python stats = record.stats([SR('SR365', trading_days_per_year=365), AnnualRet(trading_days_per_year=365)] Args: metrics: The metrics specified in this list will be computed for the record. Each metric should be a class derived from the `Metric` class. If the class type, instead of an instance, is specified, an instance of the class will be constructed with the provided ``kwargs``. The default value is a list of :class:`SR <metrics.SR>`, :class:`Sortino <metrics.Sortino>`, :class:`Ret <metrics.Ret>`, :class:`MaxDrawdown <metrics.MaxDrawdown>`, :class:`DailyNumberOfTrades <metrics.DailyNumberOfTrades>`, :class:`DailyTradingValue <metrics.DailyTradingValue>`, :class:`ReturnOverMDD <metrics.ReturnOverMDD>`, :class:`ReturnOverTrade <metrics.rTrade>`, and :class:`MaxPositionValue <metrics.MaxPositionValue>`. kwargs: Keyword arguments that will be used to construct the `Metric` instance. Returns: The statistics for the specified metrics of the record. """ if metrics is None: metrics = Record.DEFAULT_METRICS if not isinstance(self.df['timestamp'].dtype, pl.Datetime): self.df = self.df.with_columns( pl.from_epoch('timestamp', time_unit=self._time_unit) ) if 'num_trades_' not in self.df: if 'num_trades' not in self.df: # This may not reflect the exact value since information could be lost between recording intervals. num_trades = self.df['position'].diff().fill_null(0).abs() num_trades = num_trades.set(num_trades > 0, 1) self.df = self.df.with_columns( num_trades.alias('num_trades_') ) else: self.df = self.df.with_columns( pl.col('num_trades').diff().fill_null(0).alias('num_trades_') ) if 'trading_volume_' not in self.df: if 'trading_volume' not in self.df: # This may not reflect the exact value since information could be lost between recording intervals. self.df = self.df.with_columns( pl.col('position').diff().fill_null(0).abs().alias('trading_volume_') ) else: self.df = self.df.with_columns( pl.col('trading_volume').diff().fill_null(0).alias('trading_volume_') ) # Prepares the asset type-specific data by computing it from the state records. self.prepare() if self._frequency is not None: # The DataFrame should be sorted by timestamp, even though it won't be resampled. self.df = self.df.set_sorted('timestamp') self.df = resample(self.df, self._frequency) if self._partition == 'monthly': splits = monthly(self.df) elif self._partition == 'daily': splits = daily(self.df) elif self._partition == 'hourly': splits = hourly(self.df) else: splits = [] stats = [compute_metrics(df, metrics, kwargs) for df in splits] # For the entire period. stats.append(compute_metrics(self.df, metrics, kwargs)) return Stats(self.df, stats, kwargs)
[docs] class LinearAssetRecord(Record): def prepare(self): if 'equity_wo_fee' not in self.df: self.df = self.df.with_columns( ( pl.col('balance') + pl.col('position') * pl.col('price') * self._contract_size ).alias('equity_wo_fee') ) if 'trading_value_' not in self.df: if 'trading_value' not in self.df: # This may not reflect the exact value since information could be lost between recording intervals. self.df = self.df.with_columns( ( pl.col('position').diff().fill_null(0) * pl.col('price') * self._contract_size ).alias('trading_value_') ) else: self.df = self.df.with_columns( pl.col('trading_value').diff().fill_null(0).alias('trading_value_') )
[docs] class InverseAssetRecord(Record): def prepare(self): if 'equity_wo_fee' not in self.df: self.df = self.df.with_columns( ( -pl.col('balance') - pl.col('position') / pl.col('price') * self._contract_size ).alias('equity_wo_fee') ) if 'trading_value_' not in self.df: if 'trading_value' not in self.df: # This may not reflect the exact value since information could be lost between recording intervals. self.df = self.df.with_columns( ( (pl.col('position').diff().fill_null(0) / pl.col('price')) * self._contract_size ).alias('trading_value_') ) else: self.df = self.df.with_columns( pl.col('trading_value').diff().fill_null(0).alias('trading_value_') )