vnpy/vn.how/performance/Python Performance.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 这个测试目标在于仿造一个类似于实盘中，不断有新的数据推送过来，\n",
    "# 然后需要计算移动平均线数值，这么一个比较常见的任务。\n",
    "\n",
    "from __future__ import division\n",
    "import time\n",
    "import random\n",
    "\n",
    "# 生成测试用的数据\n",
    "data = []\n",
    "data_length = 100000    # 总数据量\n",
    "ma_length = 500         # 移动均线的窗口\n",
    "test_times = 10         # 测试次数\n",
    "\n",
    "for i in range(data_length):\n",
    "    data.append(random.randint(1, 100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：1.16959998608秒\n",
      "单个数据点耗时：11.7547737294微秒\n",
      "最后10个移动平均值： [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
     ]
    }
   ],
   "source": [
    "# 计算500期的移动均线，并将结果保存到一个列表里返回\n",
    "def ma_basic(data, ma_length):\n",
    "    \n",
    "    # 用于保存均线输出结果的列表\n",
    "    ma = []\n",
    "    \n",
    "    # 计算均线用的数据窗口\n",
    "    data_window = data[:ma_length]\n",
    "    \n",
    "    # 测试用数据（去除了之前初始化用的部分）\n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    # 模拟实盘不断收到新数据推送的情景，遍历历史数据计算均线\n",
    "    for new_tick in test_data:\n",
    "        # 移除最老的数据点并增加最新的数据点\n",
    "        data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "        \n",
    "        # 遍历求均线\n",
    "        sum_tick = 0\n",
    "        for tick in data_window:\n",
    "            sum_tick += tick\n",
    "        ma.append(sum_tick/ma_length)\n",
    "        \n",
    "    # 返回数据\n",
    "    return ma\n",
    "\n",
    "# 运行测试\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_basic(data, ma_length)\n",
    "\n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "    \n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：2.11879999638秒\n",
      "单个数据点耗时：21.2944723254微秒\n",
      "最后10个移动平均值： [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
     ]
    }
   ],
   "source": [
    "# 改用numpy（首先是一种常见的错误用法）\n",
    "import numpy as np\n",
    "\n",
    "def ma_numpy_wrong(data, ma_length):\n",
    "    ma = []\n",
    "    data_window = data[:ma_length]\n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "        \n",
    "        # 使用numpy求均线，注意这里本质上每次循环\n",
    "        # 都在创建一个新的numpy数组对象，开销很大\n",
    "        data_array = np.array(data_window)\n",
    "        ma.append(data_array.mean())\n",
    "        \n",
    "    return ma\n",
    "\n",
    "# 运行测试\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_numpy_wrong(data, ma_length)\n",
    "    \n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "    \n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：0.614300012589秒\n",
      "单个数据点耗时：6.17386947325微秒\n",
      "最后10个移动平均值： [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
     ]
    }
   ],
   "source": [
    "# numpy的正确用法\n",
    "def ma_numpy_right(data, ma_length):\n",
    "    ma = []\n",
    "    \n",
    "    # 用numpy数组来缓存计算窗口内的数据\n",
    "    data_window = np.array(data[:ma_length])\n",
    "    \n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        # 使用numpy数组的底层数据偏移来实现数据更新\n",
    "        data_window[0:ma_length-1] = data_window[1:ma_length]\n",
    "        data_window[-1] = new_tick\n",
    "        ma.append(data_window.mean())\n",
    "        \n",
    "    return ma\n",
    "\n",
    "# 运行测试\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_numpy_right(data, ma_length)\n",
    "    \n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "    \n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：0.043700003624秒\n",
      "单个数据点耗时：0.439196016321微秒\n",
      "最后10个移动平均值： [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
     ]
    }
   ],
   "source": [
    "# 使用numba加速，ma_numba函数和ma_basic完全一样\n",
    "import numba\n",
    "\n",
    "@numba.jit\n",
    "def ma_numba(data, ma_length):\n",
    "    ma = []\n",
    "    data_window = data[:ma_length]\n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "        sum_tick = 0\n",
    "        for tick in data_window:\n",
    "            sum_tick += tick\n",
    "        ma.append(sum_tick/ma_length)\n",
    "\n",
    "    return ma\n",
    "\n",
    "# 运行测试\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_numba(data, ma_length)\n",
    "\n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "    \n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：0.0348000049591秒\n",
      "单个数据点耗时：0.349748793559微秒\n",
      "最后10个移动平均值： [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
     ]
    }
   ],
   "source": [
    "# 将均线计算改写为高速算法\n",
    "def ma_online(data, ma_length):\n",
    "    ma = []\n",
    "    data_window = data[:ma_length]\n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    # 缓存的窗口内数据求和结果\n",
    "    sum_buffer = 0\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        old_tick = data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "            \n",
    "        # 如果缓存结果为空，则先通过遍历求第一次结果\n",
    "        if not sum_buffer:\n",
    "            sum_tick = 0\n",
    "            for tick in data_window:\n",
    "                sum_tick += tick\n",
    "            ma.append(sum_tick/ma_length)\n",
    "            \n",
    "            # 将求和结果缓存下来\n",
    "            sum_buffer = sum_tick\n",
    "        else:\n",
    "            # 这里的算法将计算复杂度从O(n)降低到了O(1)\n",
    "            sum_buffer = sum_buffer - old_tick + new_tick\n",
    "            ma.append(sum_buffer/ma_length)\n",
    "        \n",
    "    return ma\n",
    "\n",
    "# 运行测试\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_online(data, ma_length)\n",
    "    \n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "\n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：0.0290000200272秒\n",
      "单个数据点耗时：0.29145748771微秒\n",
      "最后10个移动平均值： [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
     ]
    }
   ],
   "source": [
    "# 高速算法和numba结合，ma_online_numba函数和ma_online完全一样\n",
    "@numba.jit\n",
    "def ma_online_numba(data, ma_length):\n",
    "    ma = []\n",
    "    data_window = data[:ma_length]\n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    sum_buffer = 0\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        old_tick = data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "            \n",
    "        if not sum_buffer:\n",
    "            sum_tick = 0\n",
    "            for tick in data_window:\n",
    "                sum_tick += tick\n",
    "            ma.append(sum_tick/ma_length)\n",
    "            sum_buffer = sum_tick\n",
    "        else:\n",
    "            sum_buffer = sum_buffer - old_tick + new_tick\n",
    "            ma.append(sum_buffer/ma_length)\n",
    "\n",
    "    return ma\n",
    "\n",
    "# 运行测试\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_online_numba(data, ma_length)\n",
    "    \n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "\n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "# 基础的cython加速\n",
    "def ma_cython(data, ma_length):\n",
    "    ma = []\n",
    "    data_window = data[:ma_length]\n",
    "    test_data = data[ma_length:]\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "        \n",
    "        sum_tick = 0\n",
    "        for tick in data_window:\n",
    "            sum_tick += tick\n",
    "        ma.append(sum_tick/ma_length)\n",
    "    \n",
    "    return ma\n",
    "    \n",
    "\n",
    "# cython和高速算法\n",
    "def ma_cython_online(data, ma_length):\n",
    "    # 静态声明变量\n",
    "    cdef int sum_buffer, sum_tick, old_tick, new_tick\n",
    "\n",
    "    ma = []\n",
    "    data_window = data[:ma_length]\n",
    "    test_data = data[ma_length:]\n",
    "    sum_buffer = 0\n",
    "    \n",
    "    for new_tick in test_data:\n",
    "        old_tick = data_window.pop(0)\n",
    "        data_window.append(new_tick)\n",
    "            \n",
    "        if not sum_buffer:\n",
    "            sum_tick = 0\n",
    "            for tick in data_window:\n",
    "                sum_tick += tick\n",
    "            ma.append(sum_tick/ma_length)\n",
    "            \n",
    "            sum_buffer = sum_tick\n",
    "        else:\n",
    "            sum_buffer = sum_buffer - old_tick + new_tick\n",
    "            ma.append(sum_buffer/ma_length)\n",
    "        \n",
    "    return ma\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：0.600800013542秒\n",
      "单个数据点耗时：6.03819109088微秒\n",
      "最后10个移动平均值： [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
     ]
    }
   ],
   "source": [
    "# 基础cython加速\n",
    "from test import ma_cython\n",
    "\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_cython(data, ma_length)\n",
    "    \n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "\n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "单次耗时：0.00980000495911秒\n",
      "单个数据点耗时：0.0984925121518微秒\n",
      "最后10个移动平均值： [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
     ]
    }
   ],
   "source": [
    "# 高速算法和cython结合\n",
    "from test import ma_cython_online\n",
    "\n",
    "start = time.time()\n",
    "\n",
    "for i in range(test_times):\n",
    "    result = ma_cython_online(data, ma_length)\n",
    "\n",
    "time_per_test = (time.time()-start)/test_times\n",
    "time_per_point = time_per_test/(data_length - ma_length)\n",
    "\n",
    "print u'单次耗时：%s秒' %time_per_test\n",
    "print u'单个数据点耗时：%s微秒' %(time_per_point*1000000)\n",
    "print u'最后10个移动平均值：', result[-10:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}