增加一篇关于Python性能提升的文章和相关代码

This commit is contained in:
chenxy123 2016-12-04 21:59:45 +08:00
parent 7b5e7c6ab0
commit 03e4f73139
6 changed files with 587 additions and 0 deletions

5
vn.how/README.md Normal file
View File

@ -0,0 +1,5 @@
# vn.py项目的实战应用指南
本文件夹下的内容主要是围绕vn.py在实际交易中的一系列具体应用包括说明文档和代码例子。
* performance《百倍加速Python量化策略的算法性能提升指南》

View File

@ -0,0 +1,511 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# 这个测试目标在于仿造一个类似于实盘中,不断有新的数据推送过来,\n",
"# 然后需要计算移动平均线数值,这么一个比较常见的任务。\n",
"\n",
"from __future__ import division\n",
"import time\n",
"import random\n",
"\n",
"# 生成测试用的数据\n",
"data = []\n",
"data_length = 100000 # 总数据量\n",
"ma_length = 500 # 移动均线的窗口\n",
"test_times = 10 # 测试次数\n",
"\n",
"for i in range(data_length):\n",
" data.append(random.randint(1, 100))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时1.16959998608秒\n",
"单个数据点耗时11.7547737294微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 计算500期的移动均线并将结果保存到一个列表里返回\n",
"def ma_basic(data, ma_length):\n",
" \n",
" # 用于保存均线输出结果的列表\n",
" ma = []\n",
" \n",
" # 计算均线用的数据窗口\n",
" data_window = data[:ma_length]\n",
" \n",
" # 测试用数据(去除了之前初始化用的部分)\n",
" test_data = data[ma_length:]\n",
" \n",
" # 模拟实盘不断收到新数据推送的情景,遍历历史数据计算均线\n",
" for new_tick in test_data:\n",
" # 移除最老的数据点并增加最新的数据点\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" # 遍历求均线\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" # 返回数据\n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_basic(data, ma_length)\n",
"\n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时2.11879999638秒\n",
"单个数据点耗时21.2944723254微秒\n",
"最后10个移动平均值 [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
]
}
],
"source": [
"# 改用numpy首先是一种常见的错误用法\n",
"import numpy as np\n",
"\n",
"def ma_numpy_wrong(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" # 使用numpy求均线注意这里本质上每次循环\n",
" # 都在创建一个新的numpy数组对象开销很大\n",
" data_array = np.array(data_window)\n",
" ma.append(data_array.mean())\n",
" \n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_numpy_wrong(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.614300012589秒\n",
"单个数据点耗时6.17386947325微秒\n",
"最后10个移动平均值 [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
]
}
],
"source": [
"# numpy的正确用法\n",
"def ma_numpy_right(data, ma_length):\n",
" ma = []\n",
" \n",
" # 用numpy数组来缓存计算窗口内的数据\n",
" data_window = np.array(data[:ma_length])\n",
" \n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" # 使用numpy数组的底层数据偏移来实现数据更新\n",
" data_window[0:ma_length-1] = data_window[1:ma_length]\n",
" data_window[-1] = new_tick\n",
" ma.append(data_window.mean())\n",
" \n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_numpy_right(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.043700003624秒\n",
"单个数据点耗时0.439196016321微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 使用numba加速ma_numba函数和ma_basic完全一样\n",
"import numba\n",
"\n",
"@numba.jit\n",
"def ma_numba(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
"\n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_numba(data, ma_length)\n",
"\n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
" \n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.0348000049591秒\n",
"单个数据点耗时0.349748793559微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 将均线计算改写为高速算法\n",
"def ma_online(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" # 缓存的窗口内数据求和结果\n",
" sum_buffer = 0\n",
" \n",
" for new_tick in test_data:\n",
" old_tick = data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" # 如果缓存结果为空,则先通过遍历求第一次结果\n",
" if not sum_buffer:\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" # 将求和结果缓存下来\n",
" sum_buffer = sum_tick\n",
" else:\n",
" # 这里的算法将计算复杂度从O(n)降低到了O(1)\n",
" sum_buffer = sum_buffer - old_tick + new_tick\n",
" ma.append(sum_buffer/ma_length)\n",
" \n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_online(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.0290000200272秒\n",
"单个数据点耗时0.29145748771微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 高速算法和numba结合ma_online_numba函数和ma_online完全一样\n",
"@numba.jit\n",
"def ma_online_numba(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" sum_buffer = 0\n",
" \n",
" for new_tick in test_data:\n",
" old_tick = data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" if not sum_buffer:\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" sum_buffer = sum_tick\n",
" else:\n",
" sum_buffer = sum_buffer - old_tick + new_tick\n",
" ma.append(sum_buffer/ma_length)\n",
"\n",
" return ma\n",
"\n",
"# 运行测试\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_online_numba(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"\"\"\"\n",
"# 基础的cython加速\n",
"def ma_cython(data, ma_length):\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" \n",
" for new_tick in test_data:\n",
" data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" return ma\n",
" \n",
"\n",
"# cython和高速算法\n",
"def ma_cython_online(data, ma_length):\n",
" # 静态声明变量\n",
" cdef int sum_buffer, sum_tick, old_tick, new_tick\n",
"\n",
" ma = []\n",
" data_window = data[:ma_length]\n",
" test_data = data[ma_length:]\n",
" sum_buffer = 0\n",
" \n",
" for new_tick in test_data:\n",
" old_tick = data_window.pop(0)\n",
" data_window.append(new_tick)\n",
" \n",
" if not sum_buffer:\n",
" sum_tick = 0\n",
" for tick in data_window:\n",
" sum_tick += tick\n",
" ma.append(sum_tick/ma_length)\n",
" \n",
" sum_buffer = sum_tick\n",
" else:\n",
" sum_buffer = sum_buffer - old_tick + new_tick\n",
" ma.append(sum_buffer/ma_length)\n",
" \n",
" return ma\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.600800013542秒\n",
"单个数据点耗时6.03819109088微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 基础cython加速\n",
"from test import ma_cython\n",
"\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_cython(data, ma_length)\n",
" \n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"单次耗时0.00980000495911秒\n",
"单个数据点耗时0.0984925121518微秒\n",
"最后10个移动平均值 [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
]
}
],
"source": [
"# 高速算法和cython结合\n",
"from test import ma_cython_online\n",
"\n",
"start = time.time()\n",
"\n",
"for i in range(test_times):\n",
" result = ma_cython_online(data, ma_length)\n",
"\n",
"time_per_test = (time.time()-start)/test_times\n",
"time_per_point = time_per_test/(data_length - ma_length)\n",
"\n",
"print u'单次耗时:%s秒' %time_per_test\n",
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
"print u'最后10个移动平均值', result[-10:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,16 @@
# 使用说明
### 使用步骤
1. 在当前文件夹下打开cmd窗口
2. 输入ipython notebook运行
3. 打开Python Performance笔记本使用Shift+回车逐个Cell运行
### 编译Cython
打开cmd输入运行
> python setup.py build_ext --inplace
### 文件说明
* Python Performance.ipynbJupyter Notebook笔记本
* test.pyxCython模块的源代码
* test_setup.py编译test.pyx所需的配置文件
* test.pyd编译好的Cython模块可以在Python里直接import

BIN
vn.how/performance/test.pyd Normal file

Binary file not shown.

View File

@ -0,0 +1,48 @@
#encoding:utf-8
from __future__ import division
# 基础的cython加速
def ma_cython(data, ma_length):
ma = []
data_window = data[:ma_length]
test_data = data[ma_length:]
for new_tick in test_data:
data_window.pop(0)
data_window.append(new_tick)
sum_tick = 0
for tick in data_window:
sum_tick += tick
ma.append(sum_tick/ma_length)
return ma
# cython和高速算法
def ma_cython_online(data, ma_length):
# 静态声明变量
cdef int sum_buffer, sum_tick, old_tick, new_tick
ma = []
data_window = data[:ma_length]
test_data = data[ma_length:]
sum_buffer = 0
for new_tick in test_data:
old_tick = data_window.pop(0)
data_window.append(new_tick)
if not sum_buffer:
sum_tick = 0
for tick in data_window:
sum_tick += tick
ma.append(sum_tick/ma_length)
sum_buffer = sum_tick
else:
sum_buffer = sum_buffer - old_tick + new_tick
ma.append(sum_buffer/ma_length)
return ma

View File

@ -0,0 +1,7 @@
from distutils.core import setup
from Cython.Build import cythonize
setup(
name = 'cython test',
ext_modules = cythonize("test.pyx"),
)