512 lines
16 KiB
Plaintext
512 lines
16 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 这个测试目标在于仿造一个类似于实盘中,不断有新的数据推送过来,\n",
|
||
"# 然后需要计算移动平均线数值,这么一个比较常见的任务。\n",
|
||
"\n",
|
||
"from __future__ import division\n",
|
||
"import time\n",
|
||
"import random\n",
|
||
"\n",
|
||
"# 生成测试用的数据\n",
|
||
"data = []\n",
|
||
"data_length = 100000 # 总数据量\n",
|
||
"ma_length = 500 # 移动均线的窗口\n",
|
||
"test_times = 10 # 测试次数\n",
|
||
"\n",
|
||
"for i in range(data_length):\n",
|
||
" data.append(random.randint(1, 100))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:1.16959998608秒\n",
|
||
"单个数据点耗时:11.7547737294微秒\n",
|
||
"最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 计算500期的移动均线,并将结果保存到一个列表里返回\n",
|
||
"def ma_basic(data, ma_length):\n",
|
||
" \n",
|
||
" # 用于保存均线输出结果的列表\n",
|
||
" ma = []\n",
|
||
" \n",
|
||
" # 计算均线用的数据窗口\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" \n",
|
||
" # 测试用数据(去除了之前初始化用的部分)\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" # 模拟实盘不断收到新数据推送的情景,遍历历史数据计算均线\n",
|
||
" for new_tick in test_data:\n",
|
||
" # 移除最老的数据点并增加最新的数据点\n",
|
||
" data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" \n",
|
||
" # 遍历求均线\n",
|
||
" sum_tick = 0\n",
|
||
" for tick in data_window:\n",
|
||
" sum_tick += tick\n",
|
||
" ma.append(sum_tick/ma_length)\n",
|
||
" \n",
|
||
" # 返回数据\n",
|
||
" return ma\n",
|
||
"\n",
|
||
"# 运行测试\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_basic(data, ma_length)\n",
|
||
"\n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
" \n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:2.11879999638秒\n",
|
||
"单个数据点耗时:21.2944723254微秒\n",
|
||
"最后10个移动平均值: [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 改用numpy(首先是一种常见的错误用法)\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"def ma_numpy_wrong(data, ma_length):\n",
|
||
" ma = []\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" \n",
|
||
" # 使用numpy求均线,注意这里本质上每次循环\n",
|
||
" # 都在创建一个新的numpy数组对象,开销很大\n",
|
||
" data_array = np.array(data_window)\n",
|
||
" ma.append(data_array.mean())\n",
|
||
" \n",
|
||
" return ma\n",
|
||
"\n",
|
||
"# 运行测试\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_numpy_wrong(data, ma_length)\n",
|
||
" \n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
" \n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:0.614300012589秒\n",
|
||
"单个数据点耗时:6.17386947325微秒\n",
|
||
"最后10个移动平均值: [49.804000000000002, 49.832000000000001, 49.799999999999997, 49.899999999999999, 49.892000000000003, 49.887999999999998, 49.927999999999997, 50.052, 50.106000000000002, 49.981999999999999]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# numpy的正确用法\n",
|
||
"def ma_numpy_right(data, ma_length):\n",
|
||
" ma = []\n",
|
||
" \n",
|
||
" # 用numpy数组来缓存计算窗口内的数据\n",
|
||
" data_window = np.array(data[:ma_length])\n",
|
||
" \n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" # 使用numpy数组的底层数据偏移来实现数据更新\n",
|
||
" data_window[0:ma_length-1] = data_window[1:ma_length]\n",
|
||
" data_window[-1] = new_tick\n",
|
||
" ma.append(data_window.mean())\n",
|
||
" \n",
|
||
" return ma\n",
|
||
"\n",
|
||
"# 运行测试\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_numpy_right(data, ma_length)\n",
|
||
" \n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
" \n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:0.043700003624秒\n",
|
||
"单个数据点耗时:0.439196016321微秒\n",
|
||
"最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 使用numba加速,ma_numba函数和ma_basic完全一样\n",
|
||
"import numba\n",
|
||
"\n",
|
||
"@numba.jit\n",
|
||
"def ma_numba(data, ma_length):\n",
|
||
" ma = []\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" sum_tick = 0\n",
|
||
" for tick in data_window:\n",
|
||
" sum_tick += tick\n",
|
||
" ma.append(sum_tick/ma_length)\n",
|
||
"\n",
|
||
" return ma\n",
|
||
"\n",
|
||
"# 运行测试\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_numba(data, ma_length)\n",
|
||
"\n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
" \n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:0.0348000049591秒\n",
|
||
"单个数据点耗时:0.349748793559微秒\n",
|
||
"最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 将均线计算改写为高速算法\n",
|
||
"def ma_online(data, ma_length):\n",
|
||
" ma = []\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" # 缓存的窗口内数据求和结果\n",
|
||
" sum_buffer = 0\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" old_tick = data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" \n",
|
||
" # 如果缓存结果为空,则先通过遍历求第一次结果\n",
|
||
" if not sum_buffer:\n",
|
||
" sum_tick = 0\n",
|
||
" for tick in data_window:\n",
|
||
" sum_tick += tick\n",
|
||
" ma.append(sum_tick/ma_length)\n",
|
||
" \n",
|
||
" # 将求和结果缓存下来\n",
|
||
" sum_buffer = sum_tick\n",
|
||
" else:\n",
|
||
" # 这里的算法将计算复杂度从O(n)降低到了O(1)\n",
|
||
" sum_buffer = sum_buffer - old_tick + new_tick\n",
|
||
" ma.append(sum_buffer/ma_length)\n",
|
||
" \n",
|
||
" return ma\n",
|
||
"\n",
|
||
"# 运行测试\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_online(data, ma_length)\n",
|
||
" \n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
"\n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:0.0290000200272秒\n",
|
||
"单个数据点耗时:0.29145748771微秒\n",
|
||
"最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 高速算法和numba结合,ma_online_numba函数和ma_online完全一样\n",
|
||
"@numba.jit\n",
|
||
"def ma_online_numba(data, ma_length):\n",
|
||
" ma = []\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" sum_buffer = 0\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" old_tick = data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" \n",
|
||
" if not sum_buffer:\n",
|
||
" sum_tick = 0\n",
|
||
" for tick in data_window:\n",
|
||
" sum_tick += tick\n",
|
||
" ma.append(sum_tick/ma_length)\n",
|
||
" sum_buffer = sum_tick\n",
|
||
" else:\n",
|
||
" sum_buffer = sum_buffer - old_tick + new_tick\n",
|
||
" ma.append(sum_buffer/ma_length)\n",
|
||
"\n",
|
||
" return ma\n",
|
||
"\n",
|
||
"# 运行测试\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_online_numba(data, ma_length)\n",
|
||
" \n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
"\n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"\"\"\"\n",
|
||
"# 基础的cython加速\n",
|
||
"def ma_cython(data, ma_length):\n",
|
||
" ma = []\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" \n",
|
||
" sum_tick = 0\n",
|
||
" for tick in data_window:\n",
|
||
" sum_tick += tick\n",
|
||
" ma.append(sum_tick/ma_length)\n",
|
||
" \n",
|
||
" return ma\n",
|
||
" \n",
|
||
"\n",
|
||
"# cython和高速算法\n",
|
||
"def ma_cython_online(data, ma_length):\n",
|
||
" # 静态声明变量\n",
|
||
" cdef int sum_buffer, sum_tick, old_tick, new_tick\n",
|
||
"\n",
|
||
" ma = []\n",
|
||
" data_window = data[:ma_length]\n",
|
||
" test_data = data[ma_length:]\n",
|
||
" sum_buffer = 0\n",
|
||
" \n",
|
||
" for new_tick in test_data:\n",
|
||
" old_tick = data_window.pop(0)\n",
|
||
" data_window.append(new_tick)\n",
|
||
" \n",
|
||
" if not sum_buffer:\n",
|
||
" sum_tick = 0\n",
|
||
" for tick in data_window:\n",
|
||
" sum_tick += tick\n",
|
||
" ma.append(sum_tick/ma_length)\n",
|
||
" \n",
|
||
" sum_buffer = sum_tick\n",
|
||
" else:\n",
|
||
" sum_buffer = sum_buffer - old_tick + new_tick\n",
|
||
" ma.append(sum_buffer/ma_length)\n",
|
||
" \n",
|
||
" return ma\n",
|
||
"\"\"\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:0.600800013542秒\n",
|
||
"单个数据点耗时:6.03819109088微秒\n",
|
||
"最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 基础cython加速\n",
|
||
"from test import ma_cython\n",
|
||
"\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_cython(data, ma_length)\n",
|
||
" \n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
"\n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"单次耗时:0.00980000495911秒\n",
|
||
"单个数据点耗时:0.0984925121518微秒\n",
|
||
"最后10个移动平均值: [49.804, 49.832, 49.8, 49.9, 49.892, 49.888, 49.928, 50.052, 50.106, 49.982]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# 高速算法和cython结合\n",
|
||
"from test import ma_cython_online\n",
|
||
"\n",
|
||
"start = time.time()\n",
|
||
"\n",
|
||
"for i in range(test_times):\n",
|
||
" result = ma_cython_online(data, ma_length)\n",
|
||
"\n",
|
||
"time_per_test = (time.time()-start)/test_times\n",
|
||
"time_per_point = time_per_test/(data_length - ma_length)\n",
|
||
"\n",
|
||
"print u'单次耗时:%s秒' %time_per_test\n",
|
||
"print u'单个数据点耗时:%s微秒' %(time_per_point*1000000)\n",
|
||
"print u'最后10个移动平均值:', result[-10:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 2",
|
||
"language": "python",
|
||
"name": "python2"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
}
|