{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 这个测试目标在于仿造一个类似于实盘中,不断有新的数据推送过来,\n", "# 然后需要计算移动平均线数值,这么一个比较常见的任务。\n", "\n", "from __future__ import division\n", "import time\n", "import random\n", "\n", "# 生成测试用的数据\n", "data = []\n", "data_length = 100000 # 总数据量\n", "ma_length = 500 # 移动均线的窗口\n", "test_times = 10 # 测试次数\n", "\n", "for i in range(data_length):\n", " data.append(random.randint(1, 100))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:1.16959998608秒\n", "单个数据点耗时:11.7547737294微秒\n", "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" ] } ], "source": [ "# 计算500期的移动均线,并将结果保存到一个列表里返回\n", "def ma_basic(data, ma_length):\n", " \n", " # 用于保存均线输出结果的列表\n", " ma = []\n", " \n", " # 计算均线用的数据窗口\n", " data_window = data[:ma_length]\n", " \n", " # 测试用数据(去除了之前初始化用的部分)\n", " test_data = data[ma_length:]\n", " \n", " # 模拟实盘不断收到新数据推送的情景,遍历历史数据计算均线\n", " for new_tick in test_data:\n", " # 移除最老的数据点并增加最新的数据点\n", " data_window.pop(0)\n", " data_window.append(new_tick)\n", " \n", " # 遍历求均线\n", " sum_tick = 0\n", " for tick in data_window:\n", " sum_tick += tick\n", " ma.append(sum_tick/ma_length)\n", " \n", " # 返回数据\n", " return ma\n", "\n", "# 运行测试\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_basic(data, ma_length)\n", "\n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", " \n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:2.11879999638秒\n", "单个数据点耗时:21.2944723254微秒\n", "最后10个移动平均值: [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n" ] } ], "source": [ "# 改用numpy(首先是一种常见的错误用法)\n", "import numpy as np\n", "\n", "def ma_numpy_wrong(data, ma_length):\n", " ma = []\n", " data_window = data[:ma_length]\n", " test_data = data[ma_length:]\n", " \n", " for new_tick in test_data:\n", " data_window.pop(0)\n", " data_window.append(new_tick)\n", " \n", " # 使用numpy求均线,注意这里本质上每次循环\n", " # 都在创建一个新的numpy数组对象,开销很大\n", " data_array = np.array(data_window)\n", " ma.append(data_array.mean())\n", " \n", " return ma\n", "\n", "# 运行测试\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_numpy_wrong(data, ma_length)\n", " \n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", " \n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:0.614300012589秒\n", "单个数据点耗时:6.17386947325微秒\n", "最后10个移动平均值: [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n" ] } ], "source": [ "# numpy的正确用法\n", "def ma_numpy_right(data, ma_length):\n", " ma = []\n", " \n", " # 用numpy数组来缓存计算窗口内的数据\n", " data_window = np.array(data[:ma_length])\n", " \n", " test_data = data[ma_length:]\n", " \n", " for new_tick in test_data:\n", " # 使用numpy数组的底层数据偏移来实现数据更新\n", " data_window[0:ma_length-1] = data_window[1:ma_length]\n", " data_window[-1] = new_tick\n", " ma.append(data_window.mean())\n", " \n", " return ma\n", "\n", "# 运行测试\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_numpy_right(data, ma_length)\n", " \n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", " \n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:0.043700003624秒\n", "单个数据点耗时:0.439196016321微秒\n", "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" ] } ], "source": [ "# 使用numba加速,ma_numba函数和ma_basic完全一样\n", "import numba\n", "\n", "@numba.jit\n", "def ma_numba(data, ma_length):\n", " ma = []\n", " data_window = data[:ma_length]\n", " test_data = data[ma_length:]\n", " \n", " for new_tick in test_data:\n", " data_window.pop(0)\n", " data_window.append(new_tick)\n", " sum_tick = 0\n", " for tick in data_window:\n", " sum_tick += tick\n", " ma.append(sum_tick/ma_length)\n", "\n", " return ma\n", "\n", "# 运行测试\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_numba(data, ma_length)\n", "\n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", " \n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:0.0348000049591秒\n", "单个数据点耗时:0.349748793559微秒\n", "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" ] } ], "source": [ "# 将均线计算改写为高速算法\n", "def ma_online(data, ma_length):\n", " ma = []\n", " data_window = data[:ma_length]\n", " test_data = data[ma_length:]\n", " \n", " # 缓存的窗口内数据求和结果\n", " sum_buffer = 0\n", " \n", " for new_tick in test_data:\n", " old_tick = data_window.pop(0)\n", " data_window.append(new_tick)\n", " \n", " # 如果缓存结果为空,则先通过遍历求第一次结果\n", " if not sum_buffer:\n", " sum_tick = 0\n", " for tick in data_window:\n", " sum_tick += tick\n", " ma.append(sum_tick/ma_length)\n", " \n", " # 将求和结果缓存下来\n", " sum_buffer = sum_tick\n", " else:\n", " # 这里的算法将计算复杂度从O(n)降低到了O(1)\n", " sum_buffer = sum_buffer - old_tick + new_tick\n", " ma.append(sum_buffer/ma_length)\n", " \n", " return ma\n", "\n", "# 运行测试\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_online(data, ma_length)\n", " \n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", "\n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:0.0290000200272秒\n", "单个数据点耗时:0.29145748771微秒\n", "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" ] } ], "source": [ "# 高速算法和numba结合,ma_online_numba函数和ma_online完全一样\n", "@numba.jit\n", "def ma_online_numba(data, ma_length):\n", " ma = []\n", " data_window = data[:ma_length]\n", " test_data = data[ma_length:]\n", " \n", " sum_buffer = 0\n", " \n", " for new_tick in test_data:\n", " old_tick = data_window.pop(0)\n", " data_window.append(new_tick)\n", " \n", " if not sum_buffer:\n", " sum_tick = 0\n", " for tick in data_window:\n", " sum_tick += tick\n", " ma.append(sum_tick/ma_length)\n", " sum_buffer = sum_tick\n", " else:\n", " sum_buffer = sum_buffer - old_tick + new_tick\n", " ma.append(sum_buffer/ma_length)\n", "\n", " return ma\n", "\n", "# 运行测试\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_online_numba(data, ma_length)\n", " \n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", "\n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"\"\"\n", "# 基础的cython加速\n", "def ma_cython(data, ma_length):\n", " ma = []\n", " data_window = data[:ma_length]\n", " test_data = data[ma_length:]\n", " \n", " for new_tick in test_data:\n", " data_window.pop(0)\n", " data_window.append(new_tick)\n", " \n", " sum_tick = 0\n", " for tick in data_window:\n", " sum_tick += tick\n", " ma.append(sum_tick/ma_length)\n", " \n", " return ma\n", " \n", "\n", "# cython和高速算法\n", "def ma_cython_online(data, ma_length):\n", " # 静态声明变量\n", " cdef int sum_buffer, sum_tick, old_tick, new_tick\n", "\n", " ma = []\n", " data_window = data[:ma_length]\n", " test_data = data[ma_length:]\n", " sum_buffer = 0\n", " \n", " for new_tick in test_data:\n", " old_tick = data_window.pop(0)\n", " data_window.append(new_tick)\n", " \n", " if not sum_buffer:\n", " sum_tick = 0\n", " for tick in data_window:\n", " sum_tick += tick\n", " ma.append(sum_tick/ma_length)\n", " \n", " sum_buffer = sum_tick\n", " else:\n", " sum_buffer = sum_buffer - old_tick + new_tick\n", " ma.append(sum_buffer/ma_length)\n", " \n", " return ma\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:0.600800013542秒\n", "单个数据点耗时:6.03819109088微秒\n", "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" ] } ], "source": [ "# 基础cython加速\n", "from test import ma_cython\n", "\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_cython(data, ma_length)\n", " \n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", "\n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "单次耗时:0.00980000495911秒\n", "单个数据点耗时:0.0984925121518微秒\n", "最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n" ] } ], "source": [ "# 高速算法和cython结合\n", "from test import ma_cython_online\n", "\n", "start = time.time()\n", "\n", "for i in range(test_times):\n", " result = ma_cython_online(data, ma_length)\n", "\n", "time_per_test = (time.time()-start)/test_times\n", "time_per_point = time_per_test/(data_length - ma_length)\n", "\n", "print u'单次耗时:%s秒' %time_per_test\n", "print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n", "print u'最后10个移动平均值:', result[-10:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }