VeighNa量化社区
你的开源社区量化交易平台
Member
avatar
加入于:
帖子: 19
声望: 18

很多时候,需要用CSV文件直接回测

先看用法:

from vnpy.app.cta_strategy.csv_backtesting import CsvBacktestingEngine, OptimizationSetting
from vnpy.app.cta_strategy.base import BacktestingMode
from vnpy.app.cta_strategy.strategies.atr_rsi_strategy import ( 
    AtrRsiStrategy,
)
from datetime import datetime

engine = CsvBacktestingEngine()
engine.set_parameters(
    vt_symbol="IF88.CFFEX",
    interval="1m",
    start=datetime(2016, 1, 1),
    end=datetime(2019, 4, 30),
    rate=0.3/10000,
    slippage=0.2,
    size=300,
    pricetick=0.2,
    capital=1_000_000,
)
engine.add_strategy(AtrRsiStrategy, {})

engine.load_data("data.csv", names = [
    "datetime",
    "open_price",
    "high_price",
    "low_price",
    "close_price",
    "volume",
    "open_interest",
])

engine.run_backtesting()
df = engine.calculate_result()
engine.calculate_statistics()
engine.show_chart()

将下列代码命名为csv_backtesting.py保存到 cta_strategy 目录下并且与backtesting.py 同一目录

import pandas as pd
from vnpy.app.cta_strategy.backtesting import *


def generate_bar_from_row(row, symbol, exchange):
    """
    Generate bar from row.
    """
    return BarData(
        symbol=symbol,
        exchange=Exchange(exchange),
        interval=Interval.MINUTE,
        open_price=row["open"],
        high_price=row["high"],
        low_price=row["low"],
        close_price=row["close"],
        open_interest=row["open_interest"] or 0,
        volume=row["volume"],
        datetime=row.name.to_pydatetime(),
        gateway_name="DB",
    )


def generate_tick_from_row(row, symbol, exchange):
    """
    Generate tick from row.
    """
    return TickData(
        symbol=symbol,
        exchange=Exchange(exchange),
        datetime=row["datetime"],
        name=row["name"],
        volume=row["volume"],
        open_interest=row["open_interest"],
        last_price=row["last_price"],
        last_volume=row["last_volume"],
        limit_up=row["limit_up"],
        limit_down=row["limit_down"],
        open_price=row["open_price"],
        high_price=row["high_price"],
        low_price=row["low_price"],
        pre_close=row["pre_close"],
        bid_price_1=row["bid_price_1"],
        bid_price_2=row["bid_price_2"],
        bid_price_3=row["bid_price_3"],
        bid_price_4=row["bid_price_4"],
        bid_price_5=row["bid_price_5"],
        ask_price_1=row["ask_price_1"],
        ask_price_2=row["ask_price_2"],
        ask_price_3=row["ask_price_3"],
        ask_price_4=row["ask_price_4"],
        ask_price_5=row["ask_price_5"],
        bid_volume_1=row["bid_volume_1"],
        bid_volume_2=row["bid_volume_2"],
        bid_volume_3=row["bid_volume_3"],
        bid_volume_4=row["bid_volume_4"],
        bid_volume_5=row["bid_volume_5"],
        ask_volume_1=row["ask_volume_1"],
        ask_volume_2=row["ask_volume_2"],
        ask_volume_3=row["ask_volume_3"],
        ask_volume_4=row["ask_volume_4"],
        ask_volume_5=row["ask_volume_5"],
        gateway_name="DB",
    )


class CsvBacktestingEngine(BacktestingEngine):
    def __init__(self):
        super().__init__()

    def load_data(
        self,
        filename: str,
        names: list = [
            "datetime",
            "open_price",
            "high_price",
            "low_price",
            "close_price",
            "volume",
            "open_interest",
        ],
        compression: any = None,
        parse_dates: bool = True,
        skiprows: int = 1,
    ):
        """
        Load Bar Names: [
            "datetime",
            "open_price",
            "high_price",
            "low_price",
            "close_price",
            "volume",
            "open_interest",
        ]

        Load Tick Names: [
            "datetime",
            "name",
            "volume",
            "open_interest",
            "last_price",
            "last_volume",
            "limit_up",
            "limit_down",
            "open_price",
            "high_price",
            "low_price",
            "pre_close",
            "bid_price_1",
            "bid_price_2",
            "bid_price_3",
            "bid_price_4",
            "bid_price_5",
            "ask_price_1",
            "ask_price_2",
            "ask_price_3",
            "ask_price_4",
            "ask_price_5",
            "bid_volume_1",
            "bid_volume_2",
            "bid_volume_3",
            "bid_volume_4",
            "bid_volume_5",
            "ask_volume_1",
            "ask_volume_2",
            "ask_volume_3",
            "ask_volume_4",
            "ask_volume_5",
        ]
        """
        self.output("开始加载历史数据")

        if not self.end:
            self.end = datetime.now()

        if self.start >= self.end:
            self.output("起始日期必须小于结束日期")
            return

        self.history_data.clear()  # Clear previously loaded history data

        # Load 30 days of data each time and allow for progress update
        progress_delta = timedelta(days=30)
        total_delta = self.end - self.start
        interval_delta = INTERVAL_DELTA_MAP[self.interval]

        start = self.start
        end = self.start + progress_delta
        progress = 0

        while start < self.end:
            end = min(end, self.end)  # Make sure end time stays within set range

            df = pd.read_csv(filename, compression, parse_dates, skiprows, names,)
            # Generate
            symbol, exchange = self.vt_symbol.split(".")
            data = []
            if df is not None and not df.empty:
                for ix, row in df.iterrows():
                    if row["datetime"] > self.start and row["datetime"] < self.end:
                        if self.mode == BacktestingMode.BAR:
                            data.append(generate_bar_from_row(row, symbol, exchange))
                        else:
                            data.append(generate_tick_from_row(row, symbol, exchange))
            else:
                self.output("Csv file has no Data!")
                return

            self.history_data.extend(data)

            progress += progress_delta / total_delta
            progress = min(progress, 1)
            progress_bar = "#" * int(progress * 10)
            self.output(f"加载进度:{progress_bar} [{progress:.0%}]")

            start = end + interval_delta
            end += progress_delta + interval_delta

        self.output(f"历史数据加载完成,数据量:{len(self.history_data)}")
Member
avatar
加入于:
帖子: 2
声望: 0

请大神帮我看看,这个是什么原因造成的。
description
数据格式是这样的。
description

Member
avatar
加入于:
帖子: 41
声望: 3

小白请教下data.csv文件要放在那个文件夹内?
data.csv文件是tick数据?

Member
avatar
加入于:
帖子: 7
声望: 0

wesley wrote:

请大神帮我看看,这个是什么原因造成的。
description
数据格式是这样的。
description
你这代码是什么软件写的?

Member
加入于:
帖子: 50
声望: 2

你们加载不上去 多半是 数据 有问题

Member
avatar
加入于:
帖子: 38
声望: 1

好像载入数据有点问题,这个数据可以正常csv_loader进vnpy,

engine.load_data("D:\Tmp\rb_hot_1min.txt", names = [
"datetime",
"open_price",
"high_price",
"low_price",
"close_price",
"volume",

#"open_interest",

])

engine.run_backtesting()

df = engine.calculate_result()

engine.calculate_statistics()

engine.show_chart()


TypeError Traceback (most recent call last)

<ipython-input-22-2da52defcc9d> in <module>
5 "low_price",
6 "close_price",
----> 7 "volume"
8 #"open_interest",
9 ])

d:\vnstudio\lib\site-packages\vnpy\app\cta_strategy\csv_backtesting.py in load_data(self, filename, names, compression, parse_dates, skiprows)
154 end = min(end, self.end) # Make sure end time stays within set range
155
--> 156 df = pd.read_csv(filename, compression, parse_dates, skiprows, names,)
157 # Generate
158 symbol, exchange = self.vt_symbol.split(".")

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
700 skip_blank_lines=skip_blank_lines)
701
--> 702 return _read(filepath_or_buffer, kwds)
703
704 parser_f.name = name

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
427
428 # Create the parser.
--> 429 parser = TextFileReader(filepath_or_buffer, **kwds)
430
431 if chunksize or iterator:

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in init(self, f, engine, **kwds)
888 # might mutate self.engine
889 self.engine = self._check_file_or_buffer(f, engine)
--> 890 self.options, self.engine = self._clean_options(options, engine)
891
892 if 'has_index_names' in kwds:

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in _clean_options(self, options, engine)
976 " sep=None with delim_whitespace=False")
977 engine = 'python'
--> 978 elif sep is not None and len(sep) > 1:
979 if engine == 'c' and sep == r'\s+':
980 result['delim_whitespace'] = True

TypeError: object of type 'bool' has no len()

数据格式是这样的
"Datetime","Open","High","Low","Close","Volume"
2013/4/15 09:01:00,3800,3800,3785,3791,113892
2013/4/15 09:02:00,3791,3792,3783,3788,56304
2013/4/15 09:03:00,3788,3791,3781,3783,48004
2013/4/15 09:04:00,3783,3795,3783,3794,42608
2013/4/15 09:05:00,3793,3793,3788,3788,27592

Member
加入于:
帖子: 50
声望: 2

eastpeace wrote:

好像载入数据有点问题,这个数据可以正常csv_loader进vnpy,

engine.load_data("D:\Tmp\rb_hot_1min.txt", names = [
"datetime",
"open_price",
"high_price",
"low_price",
"close_price",
"volume",

#"open_interest",

])

engine.run_backtesting()

df = engine.calculate_result()

engine.calculate_statistics()

engine.show_chart()


TypeError Traceback (most recent call last)

<ipython-input-22-2da52defcc9d> in <module>
5 "low_price",
6 "close_price",
----> 7 "volume"
8 #"open_interest",
9 ])

d:\vnstudio\lib\site-packages\vnpy\app\cta_strategy\csv_backtesting.py in load_data(self, filename, names, compression, parse_dates, skiprows)
154 end = min(end, self.end) # Make sure end time stays within set range
155
--> 156 df = pd.read_csv(filename, compression, parse_dates, skiprows, names,)
157 # Generate
158 symbol, exchange = self.vt_symbol.split(".")

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
700 skip_blank_lines=skip_blank_lines)
701
--> 702 return _read(filepath_or_buffer, kwds)
703
704 parser_f.name = name

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
427
428 # Create the parser.
--> 429 parser = TextFileReader(filepath_or_buffer, **kwds)
430
431 if chunksize or iterator:

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in init(self, f, engine, **kwds)
888 # might mutate self.engine
889 self.engine = self._check_file_or_buffer(f, engine)
--> 890 self.options, self.engine = self._clean_options(options, engine)
891
892 if 'has_index_names' in kwds:

d:\vnstudio\lib\site-packages\pandas\io\parsers.py in _clean_options(self, options, engine)
976 " sep=None with delim_whitespace=False")
977 engine = 'python'
--> 978 elif sep is not None and len(sep) > 1:
979 if engine == 'c' and sep == r'\s+':
980 result['delim_whitespace'] = True

TypeError: object of type 'bool' has no len()

数据格式是这样的
"Datetime","Open","High","Low","Close","Volume"
2013/4/15 09:01:00,3800,3800,3785,3791,113892
2013/4/15 09:02:00,3791,3792,3783,3788,56304
2013/4/15 09:03:00,3788,3791,3781,3783,48004
2013/4/15 09:04:00,3783,3795,3783,3794,42608
2013/4/15 09:05:00,3793,3793,3788,3788,27592

我跟你遇到一样的问题 。我感觉应该还是 表头的问题 。希望大佬能解答一下

Member
avatar
加入于:
帖子: 3
声望: 2

同样遇到了这个问题,load_data函数无法读取csv文件,但是在外面读取没有问题。很费解。

Member
avatar
加入于:
帖子: 50
声望: 1

请教一下,运行时,十几M的1min csv文件加载非常慢,一分钟加载进度才增加了3%,这个有可能是什么原因?

description

Member
avatar
加入于:
帖子: 50
声望: 1

文件格式如图

description

Member
avatar
加入于:
帖子: 12
声望: 0

晴空 wrote:

请教一下,运行时,十几M的1min csv文件加载非常慢,一分钟加载进度才增加了3%,这个有可能是什么原因?

description
换64g 内存

Member
avatar
加入于:
帖子: 103
声望: 7

欢乐马1618 wrote:

小白请教下data.csv文件要放在那个文件夹内?
data.csv文件是tick数据?
需要导入到vnpy数据库中

Member
avatar
加入于:
帖子: 44
声望: 0

好慢

Member
avatar
加入于:
帖子: 16
声望: 0

这种方法太慢了,

Member
avatar
加入于:
帖子: 16
声望: 0

代码冗余。while循环里除了最后一个for循环,其他的提到外面会加速不少

Member
avatar
加入于:
帖子: 19
声望: 18

湘水量化 wrote:

这种方法太慢了,

也可以存成Pickle,应该是最快的方式

© 2015-2022 上海韦纳软件科技有限公司
备案服务号:沪ICP备18006526号

沪公网安备 31011502017034号

【用户协议】
【隐私政策】
【免责条款】