vnpy/tutorial/performance/Python Performance.ipynb
2017-05-05 23:24:39 +08:00

512 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 这个测试目标在于仿造一个类似于实盘中,不断有新的数据推送过来,\n",
"# 然后需要计算移动平均线数值,这么一个比较常见的任务。\n",
"\n",
"from __future__ import division\n",
"import time\n",
"import random\n",
"\n",
"# 生成测试用的数据\n",
"data = []\n",
"data_length = 100000 # 总数据量\n",
"ma_length = 500 # 移动均线的窗口\n",
"test_times = 10 # 测试次数\n",
"\n",
"for i in range(data_length):\n",
" data.append(random.randint(1, 100))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时1.16959998608秒\n",
"单个数据点耗时11.7547737294微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 计算500期的移动均线并将结果保存到一个列表里返回\n",
"def ma_basic(data, ma_length):\n",
" \n",
" # 用于保存均线输出结果的列表\n",
" ma = []\n",
" \n",
" # 计算均线用的数据窗口\n",
" data_window = data[:ma_length]\n",
" \n",
" # 测试用数据(去除了之前初始化用的部分)\n",
" test_data = data[ma_length:]\n",
" \n",
" # 模拟实盘不断收到新数据推送的情景,遍历历史数据计算均线\n",
" for new_tick in test_data:\n",
" # 移除最老的数据点并增加最新的数据点\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" # 遍历求均线\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" # 返回数据\n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_basic(data, ma_length)\n",
"\n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时2.11879999638秒\n",
"单个数据点耗时21.2944723254微秒\n",
"最后10个移动平均值 [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
]
}
],
"source": [
"# 改用numpy首先是一种常见的错误用法\n",
"import numpy as np\n",
"\n",
"def ma_numpy_wrong(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" # 使用numpy求均线注意这里本质上每次循环\n",
" # 都在创建一个新的numpy数组对象开销很大\n",
" data_array = np.array(data_window)\n",
" ma.append(data_array.mean())\n",
" \n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_numpy_wrong(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.614300012589秒\n",
"单个数据点耗时6.17386947325微秒\n",
"最后10个移动平均值 [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
]
}
],
"source": [
"# numpy的正确用法\n",
"def ma_numpy_right(data, ma_length):\n",
" ma = []\n",
" \n",
" # 用numpy数组来缓存计算窗口内的数据\n",
" data_window = np.array(data[:ma_length])\n",
" \n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" # 使用numpy数组的底层数据偏移来实现数据更新\n",
" data_window[0:ma_length-1] = data_window[1:ma_length]\n",
" data_window[-1] = new_tick\n",
" ma.append(data_window.mean())\n",
" \n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_numpy_right(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.043700003624秒\n",
"单个数据点耗时0.439196016321微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 使用numba加速ma_numba函数和ma_basic完全一样\n",
"import numba\n",
"\n",
"@numba.jit\n",
"def ma_numba(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
"\n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_numba(data, ma_length)\n",
"\n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.0348000049591秒\n",
"单个数据点耗时0.349748793559微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 将均线计算改写为高速算法\n",
"def ma_online(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" # 缓存的窗口内数据求和结果\n",
" sum_buffer = 0\n",
" \n",
" for new_tick in test_data:\n",
" old_tick = data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" # 如果缓存结果为空,则先通过遍历求第一次结果\n",
" if not sum_buffer:\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" # 将求和结果缓存下来\n",
" sum_buffer = sum_tick\n",
" else:\n",
" # 这里的算法将计算复杂度从O(n)降低到了O(1)\n",
" sum_buffer = sum_buffer - old_tick + new_tick\n",
" ma.append(sum_buffer/ma_length)\n",
" \n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_online(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.0290000200272秒\n",
"单个数据点耗时0.29145748771微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 高速算法和numba结合ma_online_numba函数和ma_online完全一样\n",
"@numba.jit\n",
"def ma_online_numba(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" sum_buffer = 0\n",
" \n",
" for new_tick in test_data:\n",
" old_tick = data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" if not sum_buffer:\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" sum_buffer = sum_tick\n",
" else:\n",
" sum_buffer = sum_buffer - old_tick + new_tick\n",
" ma.append(sum_buffer/ma_length)\n",
"\n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_online_numba(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\"\"\"\n",
"# 基础的cython加速\n",
"def ma_cython(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" return ma\n",
" \n",
"\n",
"# cython和高速算法\n",
"def ma_cython_online(data, ma_length):\n",
" # 静态声明变量\n",
" cdef int sum_buffer, sum_tick, old_tick, new_tick\n",
"\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" sum_buffer = 0\n",
" \n",
" for new_tick in test_data:\n",
" old_tick = data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" if not sum_buffer:\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" sum_buffer = sum_tick\n",
" else:\n",
" sum_buffer = sum_buffer - old_tick + new_tick\n",
" ma.append(sum_buffer/ma_length)\n",
" \n",
" return ma\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.600800013542秒\n",
"单个数据点耗时6.03819109088微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 基础cython加速\n",
"from test import ma_cython\n",
"\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_cython(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.00980000495911秒\n",
"单个数据点耗时0.0984925121518微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 高速算法和cython结合\n",
"from test import ma_cython_online\n",
"\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_cython_online(data, ma_length)\n",
"\n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}