diff --git a/vn.how/README.md b/vn.how/README.md new file mode 100644 index 00000000..fc9365b2 --- /dev/null +++ b/vn.how/README.md @@ -0,0 +1,5 @@ +# vn.py项目的实战应用指南 + +本文件夹下的内容主要是围绕vn.py在实际交易中的一系列具体应用,包括说明文档和代码例子。 + +* performance:《百倍加速!Python量化策略的算法性能提升指南》 \ No newline at end of file diff --git a/vn.how/performance/Python Performance.ipynb b/vn.how/performance/Python Performance.ipynb new file mode 100644 index 00000000..ec3e8152 --- /dev/null +++ b/vn.how/performance/Python Performance.ipynb @@ -0,0 +1,511 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# 这个测试目标在于仿造一个类似于实盘中,不断有新的数据推送过来,\n", + "# 然后需要计算移动平均线数值,这么一个比较常见的任务。\n", + "\n", + "from __future__ import division\n", + "import time\n", + "import random\n", + "\n", + "# 生成测试用的数据\n", + "data = []\n", + "data_length = 100000 # 总数据量\n", + "ma_length = 500 # 移动均线的窗口\n", + "test_times = 10 # 测试次数\n", + "\n", + "for i in range(data_length):\n", + " data.append(random.randint(1, 100))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:1.16959998608秒\n", + "单个数据点耗时:11.7547737294微秒\n", + "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" + ] + } + ], + "source": [ + "# 计算500期的移动均线,并将结果保存到一个列表里返回\n", + "def ma_basic(data, ma_length):\n", + " \n", + " # 用于保存均线输出结果的列表\n", + " ma = []\n", + " \n", + " # 计算均线用的数据窗口\n", + " data_window = data[:ma_length]\n", + " \n", + " # 测试用数据(去除了之前初始化用的部分)\n", + " test_data = data[ma_length:]\n", + " \n", + " # 模拟实盘不断收到新数据推送的情景,遍历历史数据计算均线\n", + " for new_tick in test_data:\n", + " # 移除最老的数据点并增加最新的数据点\n", + " data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " \n", + " # 遍历求均线\n", + " sum_tick = 0\n", + " for tick in data_window:\n", + " sum_tick += tick\n", + " ma.append(sum_tick/ma_length)\n", + " \n", + " # 返回数据\n", + " return ma\n", + "\n", + "# 运行测试\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_basic(data, ma_length)\n", + "\n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + " \n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:2.11879999638秒\n", + "单个数据点耗时:21.2944723254微秒\n", + "最后10个移动平均值: [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n" + ] + } + ], + "source": [ + "# 改用numpy(首先是一种常见的错误用法)\n", + "import numpy as np\n", + "\n", + "def ma_numpy_wrong(data, ma_length):\n", + " ma = []\n", + " data_window = data[:ma_length]\n", + " test_data = data[ma_length:]\n", + " \n", + " for new_tick in test_data:\n", + " data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " \n", + " # 使用numpy求均线,注意这里本质上每次循环\n", + " # 都在创建一个新的numpy数组对象,开销很大\n", + " data_array = np.array(data_window)\n", + " ma.append(data_array.mean())\n", + " \n", + " return ma\n", + "\n", + "# 运行测试\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_numpy_wrong(data, ma_length)\n", + " \n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + " \n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:0.614300012589秒\n", + "单个数据点耗时:6.17386947325微秒\n", + "最后10个移动平均值: [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n" + ] + } + ], + "source": [ + "# numpy的正确用法\n", + "def ma_numpy_right(data, ma_length):\n", + " ma = []\n", + " \n", + " # 用numpy数组来缓存计算窗口内的数据\n", + " data_window = np.array(data[:ma_length])\n", + " \n", + " test_data = data[ma_length:]\n", + " \n", + " for new_tick in test_data:\n", + " # 使用numpy数组的底层数据偏移来实现数据更新\n", + " data_window[0:ma_length-1] = data_window[1:ma_length]\n", + " data_window[-1] = new_tick\n", + " ma.append(data_window.mean())\n", + " \n", + " return ma\n", + "\n", + "# 运行测试\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_numpy_right(data, ma_length)\n", + " \n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + " \n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:0.043700003624秒\n", + "单个数据点耗时:0.439196016321微秒\n", + "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" + ] + } + ], + "source": [ + "# 使用numba加速,ma_numba函数和ma_basic完全一样\n", + "import numba\n", + "\n", + "@numba.jit\n", + "def ma_numba(data, ma_length):\n", + " ma = []\n", + " data_window = data[:ma_length]\n", + " test_data = data[ma_length:]\n", + " \n", + " for new_tick in test_data:\n", + " data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " sum_tick = 0\n", + " for tick in data_window:\n", + " sum_tick += tick\n", + " ma.append(sum_tick/ma_length)\n", + "\n", + " return ma\n", + "\n", + "# 运行测试\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_numba(data, ma_length)\n", + "\n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + " \n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:0.0348000049591秒\n", + "单个数据点耗时:0.349748793559微秒\n", + "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" + ] + } + ], + "source": [ + "# 将均线计算改写为高速算法\n", + "def ma_online(data, ma_length):\n", + " ma = []\n", + " data_window = data[:ma_length]\n", + " test_data = data[ma_length:]\n", + " \n", + " # 缓存的窗口内数据求和结果\n", + " sum_buffer = 0\n", + " \n", + " for new_tick in test_data:\n", + " old_tick = data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " \n", + " # 如果缓存结果为空,则先通过遍历求第一次结果\n", + " if not sum_buffer:\n", + " sum_tick = 0\n", + " for tick in data_window:\n", + " sum_tick += tick\n", + " ma.append(sum_tick/ma_length)\n", + " \n", + " # 将求和结果缓存下来\n", + " sum_buffer = sum_tick\n", + " else:\n", + " # 这里的算法将计算复杂度从O(n)降低到了O(1)\n", + " sum_buffer = sum_buffer - old_tick + new_tick\n", + " ma.append(sum_buffer/ma_length)\n", + " \n", + " return ma\n", + "\n", + "# 运行测试\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_online(data, ma_length)\n", + " \n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + "\n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:0.0290000200272秒\n", + "单个数据点耗时:0.29145748771微秒\n", + "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" + ] + } + ], + "source": [ + "# 高速算法和numba结合,ma_online_numba函数和ma_online完全一样\n", + "@numba.jit\n", + "def ma_online_numba(data, ma_length):\n", + " ma = []\n", + " data_window = data[:ma_length]\n", + " test_data = data[ma_length:]\n", + " \n", + " sum_buffer = 0\n", + " \n", + " for new_tick in test_data:\n", + " old_tick = data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " \n", + " if not sum_buffer:\n", + " sum_tick = 0\n", + " for tick in data_window:\n", + " sum_tick += tick\n", + " ma.append(sum_tick/ma_length)\n", + " sum_buffer = sum_tick\n", + " else:\n", + " sum_buffer = sum_buffer - old_tick + new_tick\n", + " ma.append(sum_buffer/ma_length)\n", + "\n", + " return ma\n", + "\n", + "# 运行测试\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_online_numba(data, ma_length)\n", + " \n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + "\n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "# 基础的cython加速\n", + "def ma_cython(data, ma_length):\n", + " ma = []\n", + " data_window = data[:ma_length]\n", + " test_data = data[ma_length:]\n", + " \n", + " for new_tick in test_data:\n", + " data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " \n", + " sum_tick = 0\n", + " for tick in data_window:\n", + " sum_tick += tick\n", + " ma.append(sum_tick/ma_length)\n", + " \n", + " return ma\n", + " \n", + "\n", + "# cython和高速算法\n", + "def ma_cython_online(data, ma_length):\n", + " # 静态声明变量\n", + " cdef int sum_buffer, sum_tick, old_tick, new_tick\n", + "\n", + " ma = []\n", + " data_window = data[:ma_length]\n", + " test_data = data[ma_length:]\n", + " sum_buffer = 0\n", + " \n", + " for new_tick in test_data:\n", + " old_tick = data_window.pop(0)\n", + " data_window.append(new_tick)\n", + " \n", + " if not sum_buffer:\n", + " sum_tick = 0\n", + " for tick in data_window:\n", + " sum_tick += tick\n", + " ma.append(sum_tick/ma_length)\n", + " \n", + " sum_buffer = sum_tick\n", + " else:\n", + " sum_buffer = sum_buffer - old_tick + new_tick\n", + " ma.append(sum_buffer/ma_length)\n", + " \n", + " return ma\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:0.600800013542秒\n", + "单个数据点耗时:6.03819109088微秒\n", + "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" + ] + } + ], + "source": [ + "# 基础cython加速\n", + "from test import ma_cython\n", + "\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_cython(data, ma_length)\n", + " \n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + "\n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "单次耗时:0.00980000495911秒\n", + "单个数据点耗时:0.0984925121518微秒\n", + "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" + ] + } + ], + "source": [ + "# 高速算法和cython结合\n", + "from test import ma_cython_online\n", + "\n", + "start = time.time()\n", + "\n", + "for i in range(test_times):\n", + " result = ma_cython_online(data, ma_length)\n", + "\n", + "time_per_test = (time.time()-start)/test_times\n", + "time_per_point = time_per_test/(data_length - ma_length)\n", + "\n", + "print u'单次耗时:%s秒' %time_per_test\n", + "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", + "print u'最后10个移动平均值:', result[-10:]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/vn.how/performance/README.md b/vn.how/performance/README.md new file mode 100644 index 00000000..3b1edff7 --- /dev/null +++ b/vn.how/performance/README.md @@ -0,0 +1,16 @@ +# 使用说明 + +### 使用步骤 +1. 在当前文件夹下打开cmd窗口 +2. 输入ipython notebook运行 +3. 打开Python Performance笔记本,使用Shift+回车逐个Cell运行 + +### 编译Cython +打开cmd,输入运行: +> python setup.py build_ext --inplace + +### 文件说明 +* Python Performance.ipynb:Jupyter Notebook笔记本 +* test.pyx:Cython模块的源代码 +* test_setup.py:编译test.pyx所需的配置文件 +* test.pyd:编译好的Cython模块,可以在Python里直接import diff --git a/vn.how/performance/test.pyd b/vn.how/performance/test.pyd new file mode 100644 index 00000000..5888c475 Binary files /dev/null and b/vn.how/performance/test.pyd differ diff --git a/vn.how/performance/test.pyx b/vn.how/performance/test.pyx new file mode 100644 index 00000000..b5c0a11c --- /dev/null +++ b/vn.how/performance/test.pyx @@ -0,0 +1,48 @@ +#encoding:utf-8 + +from __future__ import division + +# 基础的cython加速 +def ma_cython(data, ma_length): + ma = [] + data_window = data[:ma_length] + test_data = data[ma_length:] + + for new_tick in test_data: + data_window.pop(0) + data_window.append(new_tick) + + sum_tick = 0 + for tick in data_window: + sum_tick += tick + ma.append(sum_tick/ma_length) + + return ma + + +# cython和高速算法 +def ma_cython_online(data, ma_length): + # 静态声明变量 + cdef int sum_buffer, sum_tick, old_tick, new_tick + + ma = [] + data_window = data[:ma_length] + test_data = data[ma_length:] + sum_buffer = 0 + + for new_tick in test_data: + old_tick = data_window.pop(0) + data_window.append(new_tick) + + if not sum_buffer: + sum_tick = 0 + for tick in data_window: + sum_tick += tick + ma.append(sum_tick/ma_length) + + sum_buffer = sum_tick + else: + sum_buffer = sum_buffer - old_tick + new_tick + ma.append(sum_buffer/ma_length) + + return ma \ No newline at end of file diff --git a/vn.how/performance/test_setup.py b/vn.how/performance/test_setup.py new file mode 100644 index 00000000..e0b2737f --- /dev/null +++ b/vn.how/performance/test_setup.py @@ -0,0 +1,7 @@ +from distutils.core import setup +from Cython.Build import cythonize + +setup( + name = 'cython test', + ext_modules = cythonize("test.pyx"), +) \ No newline at end of file