Merge pull request #2168 from 1122455801/data_analysis20191031

[Add] Data analysis
This commit is contained in:
vn.py 2019-11-01 10:54:20 +08:00 committed by GitHub
commit 020a35b4fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 494 additions and 0 deletions

View File

@ -0,0 +1,137 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from vnpy.trader.constant import Exchange,Interval\n",
"from data_analysis import DataAnalysis\n",
"from datetime import datetime\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"herramiento = DataAnalysis()\n",
"herramiento.load_history( \n",
" symbol=\"XBTUSD\", \n",
" exchange=Exchange.BITMEX, \n",
" interval=Interval.MINUTE, \n",
" start=datetime(2019, 9, 1), \n",
" end=datetime(2019, 10, 30),\n",
" rate = 8/10000,\n",
" index_3to1 = [\"ATR\",\"CCI\"],\n",
" index_1to1 = [\"STDDEV\",\"SMA\"],\n",
" index_2to2 = [\"AROON\"],\n",
" index_2to1 = [\"AROONOSC\"],\n",
" index_4to1 = [\"BOP\"],\n",
" window_index=30,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"data = herramiento.base_analysis()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"herramiento.show_chart(data[:1500], boll_wide=2.8)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# 多时间周期分析\n",
"intervals = [\"5min\",\"15min\",\"30min\",\"1h\",\"2h\",\"4h\"]\n",
"herramiento.multi_time_frame_analysis(intervals=intervals)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,357 @@
from datetime import datetime
import warnings
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import adfuller as ADF
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import talib
from vnpy.trader.constant import Exchange, Interval
from vnpy.trader.database import database_manager
warnings.filterwarnings("ignore")
class DataAnalysis:
def __init__(self):
""""""
self.symbol = ""
self.exchange = None
self.interval = None
self.start = None
self.end = None
self.rate = 0.0
self.window_volatility = 20
self.window_index = 20
self.orignal = pd.DataFrame()
self.index_1to1 = []
self.index_2to2 = []
self.index_3to1 = []
self.index_2to1 = []
self.index_4to1 = []
self.intervals = []
self.results = {}
def load_history(
self,
symbol: str,
exchange: Exchange,
interval: Interval,
start: datetime,
end: datetime,
rate: float = 0.0,
index_1to1: list = None,
index_2to2: list = None,
index_3to1: list = None,
index_2to1: list = None,
index_4to1: list = None,
window_index: int = 20,
window_volatility: int = 20,
):
""""""
output("开始加载历史数据")
self.window_volatility = window_volatility
self.window_index = window_index
self.rate = rate
self.index_1to1 = index_1to1
self.index_2to2 = index_2to2
self.index_3to1 = index_3to1
self.index_2to1 = index_2to1
self.index_4to1 = index_4to1
# Load history data from database
bars = database_manager.load_bar_data(
symbol=symbol,
exchange=exchange,
interval=interval,
start=start,
end=end,
)
output(f"历史数据加载完成,数据量:{len(bars)}")
# Generate history data in DataFrame
t = []
o = []
h = []
l = []
c = []
v = []
for bar in bars:
time = bar.datetime
open_price = bar.open_price
high_price = bar.high_price
low_price = bar.low_price
close_price = bar.close_price
volume = bar.volume
t.append(time)
o.append(open_price)
h.append(high_price)
l.append(low_price)
c.append(close_price)
v.append(volume)
self.orignal["open"] = o
self.orignal["high"] = h
self.orignal["low"] = l
self.orignal["close"] = c
self.orignal["volume"] = v
self.orignal.index = t
def base_analysis(self, df: DataFrame = None):
""""""
if df is None:
df = self.orignal
if df is None:
output("数据为空,请输入数据")
close_price = df["close"]
output("第一步:画出行情图,检查数据断点")
close_price.plot(figsize=(20, 8), title="close_price")
plt.show()
random_test(close_price)
stability_test(close_price)
autocorrelation_test(close_price)
self.relative_volatility_analysis(df)
self.growth_analysis(df)
self.calculate_index(df)
return df
def relative_volatility_analysis(self, df: DataFrame = None):
"""
相对波动率
"""
output("第五步:相对波动率分析")
df["volatility"] = talib.ATR(
np.array(df["high"]),
np.array(df["low"]),
np.array(df["close"]),
self.window_volatility
)
df["fixed_cost"] = df["close"] * self.rate
df["relative_vol"] = df["volatility"] - df["fixed_cost"]
df["relative_vol"].plot(figsize=(20, 6), title="relative volatility")
plt.show()
df["relative_vol"].hist(bins=200, figsize=(20, 6), grid=False)
plt.show()
statitstic_info(df["relative_vol"])
def growth_analysis(self, df: DataFrame = None):
"""
百分比K线变化率
"""
output("第六步:变化率分析")
df["pre_close"] = df["close"].shift(1).fillna(0)
df["g%"] = 100 * (df["close"] - df["pre_close"]) / df["close"]
df["g%"].plot(figsize=(20, 6), title="growth", ylim=(-5, 5))
plt.show()
df["g%"].hist(bins=200, figsize=(20, 6), grid=False)
plt.show()
statitstic_info(df["g%"])
def calculate_index(self, df: DataFrame = None):
""""""
output("第七步计算相关技术指标返回DataFrame\n")
if self.index_1to1:
for i in self.index_1to1:
func = getattr(talib, i)
df[i] = func(
np.array(df["close"]),
self.window_index
)
if self.index_3to1:
for i in self.index_3to1:
func = getattr(talib, i)
df[i] = func(
np.array(df["high"]),
np.array(df["low"]),
np.array(df["close"]),
self.window_index
)
if self.index_2to2:
for i in self.index_2to2:
func = getattr(talib, i)
result_down, result_up = func(
np.array(df["high"]),
np.array(df["low"]),
self.window_index
)
up = i + "_UP"
down = i + "_DOWN"
df[up] = result_up
df[down] = result_down
if self.index_2to1:
for i in self.index_2to1:
func = getattr(talib, i)
df[i] = func(
np.array(df["high"]),
np.array(df["low"]),
self.window_index
)
if self.index_4to1:
for i in self.index_4to1:
func = getattr(talib, i)
df[i] = func(
np.array(df["open"]),
np.array(df["high"]),
np.array(df["low"]),
np.array(df["close"]),
)
return df
def multi_time_frame_analysis(self, intervals: list = None, df: DataFrame = None):
""""""
if not intervals:
output("请输入K线合成周期")
return
if df is None:
df = self.orignal
if df is None:
output("请先加载数据")
return
for interval in intervals:
output("------------------------------------------------")
output(f"合成{interval}周期K先并开始数据分析")
data = pd.DataFrame()
data["open"] = df["open"].resample(interval, how="first")
data["high"] = df["high"].resample(interval, how="max")
data["low"] = df["low"].resample(interval, how="min")
data["close"] = df["close"].resample(interval, how="last")
data["volume"] = df["volume"].resample(interval, how="sum")
result = self.base_analysis(data)
self.results[interval] = result
def show_chart(self, data, boll_wide):
""""""
data["boll_up"] = data["SMA"] + data["STDDEV"] * boll_wide
data["boll_down"] = data["SMA"] - data["STDDEV"] * boll_wide
up_signal = []
down_signal = []
len_data = len(data["close"])
for i in range(1, len_data):
if data.iloc[i]["close"] > data.iloc[i]["boll_up"]and data.iloc[i-1]["close"] < data.iloc[i - 1]["boll_up"]:
up_signal.append(i)
elif data.iloc[i]["close"] < data.iloc[i]["boll_down"] and data.iloc[i-1]["close"] > data.iloc[i - 1]["boll_down"]:
down_signal.append(i)
fig = plt.figure(figsize=(20, 8))
close = data["close"]
plt.plot(close, lw=1)
plt.plot(close, '^', markersize=5, color='r', label='UP signal', markevery=up_signal)
plt.plot(close, 'v', markersize=5, color='g', label='DOWN signal', markevery=down_signal)
plt.plot(data["boll_up"], lw=0.5, color="r")
plt.plot(data["boll_down"], lw=0.5, color="g")
plt.legend()
plt.show()
data["ATR"].plot(figsize=(20, 3), title="ATR")
plt.show()
def random_test(close_price):
""""""
acorr_result = acorr_ljungbox(close_price, lags=1)
p_value = acorr_result[1]
if p_value < 0.05:
output("第二步:随机性检验:非纯随机性")
else:
output("第二步:随机性检验:纯随机性")
output(f"白噪声检验结果:{acorr_result}\n")
def stability_test(close_price):
""""""
statitstic = ADF(close_price)
t_s = statitstic[1]
t_c = statitstic[4]["10%"]
if t_s > t_c:
output("第三步:平稳性检验:存在单位根,时间序列不平稳")
else:
output("第三步:平稳性检验:不存在单位根,时间序列平稳")
output(f"ADF检验结果{statitstic}\n")
def autocorrelation_test(close_price):
""""""
output("第四步:画出自相关性图,观察自相关特性")
plot_acf(close_price, lags=60)
plt.show()
plot_pacf(close_price, lags=60).show()
plt.show()
def statitstic_info(df):
""""""
mean = round(df.mean(), 4)
median = round(df.median(), 4)
output(f"样本平均数:{mean}, 中位数: {median}")
skew = round(df.skew(), 4)
kurt = round(df.kurt(), 4)
if skew == 0:
skew_attribute = "对称分布"
elif skew > 0:
skew_attribute = "分布偏左"
else:
skew_attribute = "分布偏右"
if kurt == 0:
kurt_attribute = "正态分布"
elif kurt > 0:
kurt_attribute = "分布陡峭"
else:
kurt_attribute = "分布平缓"
output(f"偏度为:{skew},属于{skew_attribute};峰度为:{kurt},属于{kurt_attribute}\n")
def output(msg):
"""
Output message of backtesting engine.
"""
print(f"{datetime.now()}\t{msg}")