From 6a82e579f20c12c794c798643f53ab0fd17b0f5b Mon Sep 17 00:00:00 2001 From: "vn.py" Date: Tue, 10 Oct 2017 17:06:44 +0800 Subject: [PATCH] =?UTF-8?q?[Add]=E5=A2=9E=E5=8A=A0=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E6=B8=85=E6=B4=97=E8=84=9A=E6=9C=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/DataRecording/DR_setting.json | 9 ++- examples/DataRecording/runDataCleaning.py | 84 +++++++++++++++++++++++ 2 files changed, 90 insertions(+), 3 deletions(-) create mode 100644 examples/DataRecording/runDataCleaning.py diff --git a/examples/DataRecording/DR_setting.json b/examples/DataRecording/DR_setting.json index 07030299..008c647e 100644 --- a/examples/DataRecording/DR_setting.json +++ b/examples/DataRecording/DR_setting.json @@ -3,13 +3,16 @@ "tick": [ + ["rb1801", "CTP"], + ["m1801", "CTP"], + ["TA801", "CTP"] ], "bar": [ - ["BTC_CNY_SPOT", "OKCOIN"], - ["LTC_CNY_SPOT", "OKCOIN"], - ["ETH_CNY_SPOT", "OKCOIN"] + ["rb1801", "CTP"], + ["m1801", "CTP"], + ["TA801", "CTP"] ], "active": diff --git a/examples/DataRecording/runDataCleaning.py b/examples/DataRecording/runDataCleaning.py new file mode 100644 index 00000000..d99be501 --- /dev/null +++ b/examples/DataRecording/runDataCleaning.py @@ -0,0 +1,84 @@ +# encoding: UTF-8 + +import json +from datetime import datetime, timedelta, time + +from pymongo import MongoClient + +from vnpy.trader.app.ctaStrategy.ctaBase import MINUTE_DB_NAME, TICK_DB_NAME + + +# 这里以商品期货为例 +MORNING_START = time(9, 0) +MORNING_REST = time(10, 15) +MORNING_RESTART = time(10, 30) +MORNING_END = time(11, 30) +AFTERNOON_START = time(13, 30) +AFTERNOON_END = time(15, 0) +NIGHT_START = time(21, 0) +NIGHT_END = time(2, 30) + + +#---------------------------------------------------------------------- +def cleanData(dbName, collectionName, start): + """清洗数据""" + print u'\n清洗数据库:%s, 集合:%s, 起始日:%s' %(dbName, collectionName, start) + + mc = MongoClient('localhost', 27017) # 创建MongoClient + cl = mc[dbName][collectionName] # 获取数据集合 + d = {'datetime':{'$gte':start}} # 只过滤从start开始的数据 + cx = cl.find(d) # 获取数据指针 + + # 遍历数据 + for data in cx: + # 获取时间戳对象 + time = data['datetime'].time() + + # 默认需要清洗 + cleanRequired = True + + # 如果在交易事件内,则为有效数据,无需清洗 + if ((MORNING_START <= dt < MORNING_REST) or + (MORNING_RESTART <= dt < MORNING_END) or + (AFTERNOON_START <= dt < AFTERNOON_END) or + (dt >= NIGHT_START) or + (dt < NIGHT_END)): + cleanRequired = False + + # 如果需要清洗 + if cleanRequired: + print u'删除无效数据,时间戳:%s' %data['datetime'] + cl.delete_one(data) + + print u'清洗完成,数据库:%s, 集合:%s' %(dbName, collectionName) + + + +#---------------------------------------------------------------------- +def runDataCleaning(): + """运行数据清洗""" + print u'开始数据清洗工作' + + # 加载配置 + setting = {} + with open("DR_setting.json") as f: + setting = json.load(f) + + # 遍历执行清洗 + today = datetime.now() + start = today - timedelta(10) # 清洗过去10天数据 + start.replace(hour=0, minute=0, second=0, microsecond=0) + + for l in setting['tick']: + symbol = l[0] + cleanData(TICK_DB_NAME, symbol, start) + + for l in setting['bar']: + symbol = l[0] + cleanData(MINUTE_DB_NAME, symbol, start) + + print u'数据清洗工作完成' + + +if __name__ == '__main__': + runDataCleaning() \ No newline at end of file