注文遅延データ

より現実的なバックテスト結果を得るためには、レイテンシを考慮することが重要です。したがって、フィードデータと注文データの両方をタイムスタンプ付きで収集し、注文遅延を測定する必要があります。最良のアプローチは、自分自身の注文遅延を収集することです。ライブ取引に基づいて注文遅延を収集するか、定期的に埋められない価格で注文を提出し、記録のためにそれらをキャンセルすることで注文遅延を収集できます。ただし、それらにアクセスできない場合や目標を設定したい場合は、人工的に注文遅延を生成する必要があります。このレイテンシは、フィードレイテンシ、取引量、イベントの数などの要因に基づいてモデル化できます。このガイドでは、調整のための乗数とオフセットを使用して、フィードレイテンシから注文遅延を生成する簡単な方法を示します。

まず、フィードデータを読み込みます。

[ ]:

import numpy as np

data = np.load("btcusdt_20200201.npz")["data"]
data

array([(3758096386, 1580515202342000000, 1580515202497052000, 9364.51, 1.197, 0, 0, 0.),
       (3758096386, 1580515202342000000, 1580515202497346000, 9365.67, 0.02 , 0, 0, 0.),
       (3758096386, 1580515202342000000, 1580515202497352000, 9365.86, 0.01 , 0, 0, 0.),
       ...,
       (3489660929, 1580601599836000000, 1580601599962961000, 9351.47, 3.914, 0, 0, 0.),
       (3489660929, 1580601599836000000, 1580601599963461000, 9397.78, 0.1  , 0, 0, 0.),
       (3489660929, 1580601599848000000, 1580601599973647000, 9348.14, 3.98 , 0, 0, 0.)],
      dtype=[('ev', '<i8'), ('exch_ts', '<i8'), ('local_ts', '<i8'), ('px', '<f8'), ('qty', '<f8'), ('order_id', '<u8'), ('ival', '<i8'), ('fval', '<f8')])

操作を簡単にするために、DataFrameに変換します。

[2]:

import polars as pl

df = pl.DataFrame(data)
df

[2]:

shape: (27_532_602, 8)

ev	exch_ts	local_ts	px	qty	order_id	ival	fval
i64	i64	i64	f64	f64	u64	i64	f64
3758096386	1580515202342000000	1580515202497052000	9364.51	1.197	0	0	0.0
3758096386	1580515202342000000	1580515202497346000	9365.67	0.02	0	0	0.0
3758096386	1580515202342000000	1580515202497352000	9365.86	0.01	0	0	0.0
3758096386	1580515202342000000	1580515202497357000	9366.36	0.002	0	0	0.0
3758096386	1580515202342000000	1580515202497363000	9366.36	0.003	0	0	0.0
…	…	…	…	…	…	…	…
3489660929	1580601599812000000	1580601599944404000	9397.79	0.0	0	0	0.0
3489660929	1580601599826000000	1580601599952176000	9354.8	4.07	0	0	0.0
3489660929	1580601599836000000	1580601599962961000	9351.47	3.914	0	0	0.0
3489660929	1580601599836000000	1580601599963461000	9397.78	0.1	0	0	0.0
3489660929	1580601599848000000	1580601599973647000	9348.14	3.98	0	0	0.0

有効な取引所タイムスタンプと有効なローカルタイムスタンプの両方を持つイベントのみを選択して、フィードレイテンシを取得します。

[ ]:

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

df = df.filter(
    (pl.col("ev") & EXCH_EVENT == EXCH_EVENT)
    & (pl.col("ev") & LOCAL_EVENT == LOCAL_EVENT)
)

約1秒間隔にリサンプリングして行数を減らします。

[ ]:

df = (
    df.with_columns(pl.col("local_ts").alias("ts"))
    .group_by_dynamic("ts", every="1000000000i")
    .agg(pl.col("exch_ts").last(), pl.col("local_ts").last())
    .drop("ts")
)

df

shape: (86_394, 2)

exch_ts	local_ts
i64	i64
1580515202843000000	1580515202979365000
1580515203551000000	1580515203943566000
1580515203789000000	1580515204875639000
1580515204127000000	1580515205962135000
1580515204738000000	1580515206983780000
…	…
1580601595869000000	1580601595997115000
1580601596865000000	1580601596994060000
1580601597864000000	1580601597987786000
1580601598870000000	1580601598997068000
1580601599848000000	1580601599973647000

構造化されたNumPy配列に戻します。

[5]:

data = df.to_numpy(structured=True)
data

[5]:

array([(1580515202843000000, 1580515202979365000),
       (1580515203551000000, 1580515203943566000),
       (1580515203789000000, 1580515204875639000), ...,
       (1580601597864000000, 1580601597987786000),
       (1580601598870000000, 1580601598997068000),
       (1580601599848000000, 1580601599973647000)],
      dtype=[('exch_ts', '<i8'), ('local_ts', '<i8')])

注文遅延を生成します。注文遅延は、注文リクエストが取引所のマッチングエンジンに到達するまでのレイテンシと、応答がローカルに戻るまでのレイテンシの2つのコンポーネントで構成されます。注文遅延はフィードレイテンシと同じではなく、フィードレイテンシに比例する必要はありません。ただし、簡単のために、注文遅延をフィードレイテンシに比例するようにモデル化し、乗数とオフセットを使用します。

[ ]:

mul_entry = 4
offset_entry = 0

mul_resp = 3
offset_resp = 0

order_latency = np.zeros(
    len(data),
    dtype=[("req_ts", "i8"), ("exch_ts", "i8"), ("resp_ts", "i8"), ("_padding", "i8")],
)
for i, (exch_ts, local_ts) in enumerate(data):
    feed_latency = local_ts - exch_ts
    order_entry_latency = mul_entry * feed_latency + offset_entry
    order_resp_latency = mul_resp * feed_latency + offset_resp

    req_ts = local_ts
    order_exch_ts = req_ts + order_entry_latency
    resp_ts = order_exch_ts + order_resp_latency

    order_latency[i] = (req_ts, order_exch_ts, resp_ts, 0)

order_latency

array([(1580515202979365000, 1580515203524825000, 1580515203933920000, 0),
       (1580515203943566000, 1580515205513830000, 1580515206691528000, 0),
       (1580515204875639000, 1580515209222195000, 1580515212482112000, 0),
       ...,
       (1580601597987786000, 1580601598482930000, 1580601598854288000, 0),
       (1580601598997068000, 1580601599505340000, 1580601599886544000, 0),
       (1580601599973647000, 1580601600476235000, 1580601600853176000, 0)],
      dtype=[('req_ts', '<i8'), ('exch_ts', '<i8'), ('resp_ts', '<i8'), ('_padding', '<i8')])

[7]:

df_order_latency = pl.DataFrame(order_latency)
df_order_latency

[7]:

shape: (86_394, 4)

req_ts	exch_ts	resp_ts	_padding
i64	i64	i64	i64
1580515202979365000	1580515203524825000	1580515203933920000	0
1580515203943566000	1580515205513830000	1580515206691528000	0
1580515204875639000	1580515209222195000	1580515212482112000	0
1580515205962135000	1580515213302675000	1580515218808080000	0
1580515206983780000	1580515215966900000	1580515222704240000	0
…	…	…	…
1580601595997115000	1580601596509575000	1580601596893920000	0
1580601596994060000	1580601597510300000	1580601597897480000	0
1580601597987786000	1580601598482930000	1580601598854288000	0
1580601598997068000	1580601599505340000	1580601599886544000	0
1580601599973647000	1580601600476235000	1580601600853176000	0

レイテンシに無効な負の値がないか確認します。

[ ]:

order_entry_latency = df_order_latency["exch_ts"] - df_order_latency["req_ts"]
order_resp_latency = df_order_latency["resp_ts"] - df_order_latency["exch_ts"]

[9]:

(order_entry_latency <= 0).sum()

[9]:

[10]:

(order_resp_latency <= 0).sum()

[10]:

ここでは、速度を上げるためにnjitを使用してプロセス全体をメソッドにラップします。

[ ]:

import numpy as np
import polars as pl
from numba import njit

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT

from hftbacktest import EXCH_EVENT, LOCAL_EVENT


@njit
def generate_order_latency_nb(
    data, order_latency, mul_entry, offset_entry, mul_resp, offset_resp
):
    for i in range(len(data)):
        exch_ts = data[i].exch_ts
        local_ts = data[i].local_ts
        feed_latency = local_ts - exch_ts
        order_entry_latency = mul_entry * feed_latency + offset_entry
        order_resp_latency = mul_resp * feed_latency + offset_resp

        req_ts = local_ts
        order_exch_ts = req_ts + order_entry_latency
        resp_ts = order_exch_ts + order_resp_latency

        order_latency[i].req_ts = req_ts
        order_latency[i].exch_ts = order_exch_ts
        order_latency[i].resp_ts = resp_ts


def generate_order_latency(
    feed_file, output_file=None, mul_entry=1, offset_entry=0, mul_resp=1, offset_resp=0
):
    data = np.load(feed_file)["data"]
    df = pl.DataFrame(data)

    df = (
        df.filter(
            (pl.col("ev") & EXCH_EVENT == EXCH_EVENT)
            & (pl.col("ev") & LOCAL_EVENT == LOCAL_EVENT)
        )
        .with_columns(pl.col("local_ts").alias("ts"))
        .group_by_dynamic("ts", every="1000000000i")
        .agg(pl.col("exch_ts").last(), pl.col("local_ts").last())
        .drop("ts")
    )

    data = df.to_numpy(structured=True)

    order_latency = np.zeros(
        len(data),
        dtype=[
            ("req_ts", "i8"),
            ("exch_ts", "i8"),
            ("resp_ts", "i8"),
            ("_padding", "i8"),
        ],
    )
    generate_order_latency_nb(
        data, order_latency, mul_entry, offset_entry, mul_resp, offset_resp
    )

    if output_file is not None:
        np.savez_compressed(output_file, data=order_latency)

    return order_latency

[ ]:

order_latency = generate_order_latency(
    "btcusdt_20200201.npz",
    output_file="feed_latency_20200201.npz",
    mul_entry=4,
    mul_resp=3,
)