#include "stdafx.h"
#include <stdio.h>
typedef struct Result
{
Result():low(0), high(0), sum(0){}
Result(int lowVal, int highVal, int sumVal) : low(lowVal)
, high(highVal), sum(sumVal) {}
int low;
int high;
int sum;
}Result;
Result findMaxCrossArray(int* arr, int low, int mid, int high)
{
int crossleft = 0;
int crossright = 0;
int crossSum = 0;
int leftSum = -0xffff;
int sum = 0;
for (int i = mid; i >= low; i--)
{
sum += arr[i];
if (sum > leftSum)
{
leftSum = sum;
crossleft = i;
}
}
int rightsum = -0xffff;
sum = 0;
for (int j = mid + 1; j <= high; j++)
{
sum += arr[j];
if (sum > leftSum)
{
rightsum = sum;
crossright = j;
}
}
return Result(crossleft, crossright, leftSum + rightsum);
}
Result findMaxMumSubArray(int* a, int low, int high)
{
printf("called findMaxMumSubArray>> Low:%d, High:%d\n", low, high);
Result* pResult = &Result();
if (low == high)
{
pResult = &Result(low, high, a[low]);
printf("oneItem------>low:%d, high:%d, sum:%d\n", pResult->low, pResult->high, pResult->sum);
}
else
{
int mid = (low + high) / 2;
Result leftResult = findMaxMumSubArray(a, low, mid);
printf("left------>low:%d, high:%d, sum:%d\n", leftResult.low, leftResult.high, leftResult.sum);
Result rightResult = findMaxMumSubArray(a, mid + 1, high);
printf("right------>low:%d, high:%d, sum:%d\n", rightResult.low, rightResult.high, rightResult.sum);
Result crossResult = findMaxCrossArray(a, low, mid, high);
printf("cross------>low:%d, high:%d, sum:%d\n", crossResult.low, crossResult.high, crossResult.sum);
if (leftResult.sum >= rightResult.sum && leftResult.sum >= crossResult.sum)
{
pResult = &leftResult;
}
else if (rightResult.sum >= leftResult.sum && rightResult.sum >= crossResult.sum)
{
pResult = &rightResult;
}
else
pResult = &crossResult;
}
printf(">>>>>>>>>result------>low:%d, high:%d, sum:%d\n", pResult->low, pResult->high, pResult->sum);
return *pResult;
}
int _tmain(int argc, _TCHAR* argv[])
{
int arr[] = { 13, -3, -25, 20, -3, -16, -23, 18, 20, -7, 12, -5, -22, 15, -4, 7 };
Result r = findMaxMumSubArray(arr, 0, 15);
printf("low:%d, high:%d, sum:%d\n", r.low, r.high, r.sum);
getchar();
return 0;
}
-----------------------------------------------------------------------------
输出:
-------------------------------------------------------------------------------
called findMaxMumSubArray>> Low:0, High:15
called findMaxMumSubArray>> Low:0, High:7
called findMaxMumSubArray>> Low:0, High:3
called findMaxMumSubArray>> Low:0, High:1
called findMaxMumSubArray>> Low:0, High:0
oneItem------>low:0, high:0, sum:13
>>>>>>>>>result------>low:0, high:0, sum:13
left------>low:0, high:0, sum:13
called findMaxMumSubArray>> Low:1, High:1
oneItem------>low:1, high:1, sum:-3
>>>>>>>>>result------>low:1, high:1, sum:-3
right------>low:1, high:1, sum:-3
cross------>low:0, high:0, sum:-65522
>>>>>>>>>result------>low:0, high:0, sum:13
left------>low:0, high:0, sum:13
called findMaxMumSubArray>> Low:2, High:3
called findMaxMumSubArray>> Low:2, High:2
oneItem------>low:2, high:2, sum:-25
>>>>>>>>>result------>low:2, high:2, sum:-25
left------>low:2, high:2, sum:-25
called findMaxMumSubArray>> Low:3, High:3
oneItem------>low:3, high:3, sum:20
>>>>>>>>>result------>low:3, high:3, sum:20
right------>low:3, high:3, sum:20
cross------>low:2, high:3, sum:-5
>>>>>>>>>result------>low:3, high:3, sum:20
right------>low:3, high:3, sum:20
cross------>low:0, high:0, sum:-65525
>>>>>>>>>result------>low:3, high:3, sum:20
left------>low:3, high:3, sum:20
called findMaxMumSubArray>> Low:4, High:7
called findMaxMumSubArray>> Low:4, High:5
called findMaxMumSubArray>> Low:4, High:4
oneItem------>low:4, high:4, sum:-3
>>>>>>>>>result------>low:4, high:4, sum:-3
left------>low:4, high:4, sum:-3
called findMaxMumSubArray>> Low:5, High:5
oneItem------>low:5, high:5, sum:-16
>>>>>>>>>result------>low:5, high:5, sum:-16
right------>low:5, high:5, sum:-16
cross------>low:4, high:0, sum:-65538
>>>>>>>>>result------>low:4, high:4, sum:-3
left------>low:4, high:4, sum:-3
called findMaxMumSubArray>> Low:6, High:7
called findMaxMumSubArray>> Low:6, High:6
oneItem------>low:6, high:6, sum:-23
>>>>>>>>>result------>low:6, high:6, sum:-23
left------>low:6, high:6, sum:-23
called findMaxMumSubArray>> Low:7, High:7
oneItem------>low:7, high:7, sum:18
>>>>>>>>>result------>low:7, high:7, sum:18
right------>low:7, high:7, sum:18
cross------>low:6, high:7, sum:-5
>>>>>>>>>result------>low:7, high:7, sum:18
right------>low:7, high:7, sum:18
cross------>low:5, high:7, sum:-21
>>>>>>>>>result------>low:7, high:7, sum:18
right------>low:7, high:7, sum:18
cross------>low:3, high:0, sum:-65515
>>>>>>>>>result------>low:3, high:3, sum:20
left------>low:3, high:3, sum:20
called findMaxMumSubArray>> Low:8, High:15
called findMaxMumSubArray>> Low:8, High:11
called findMaxMumSubArray>> Low:8, High:9
called findMaxMumSubArray>> Low:8, High:8
oneItem------>low:8, high:8, sum:20
>>>>>>>>>result------>low:8, high:8, sum:20
left------>low:8, high:8, sum:20
called findMaxMumSubArray>> Low:9, High:9
oneItem------>low:9, high:9, sum:-7
>>>>>>>>>result------>low:9, high:9, sum:-7
right------>low:9, high:9, sum:-7
cross------>low:8, high:0, sum:-65515
>>>>>>>>>result------>low:8, high:8, sum:20
left------>low:8, high:8, sum:20
called findMaxMumSubArray>> Low:10, High:11
called findMaxMumSubArray>> Low:10, High:10
oneItem------>low:10, high:10, sum:12
>>>>>>>>>result------>low:10, high:10, sum:12
left------>low:10, high:10, sum:12
called findMaxMumSubArray>> Low:11, High:11
oneItem------>low:11, high:11, sum:-5
>>>>>>>>>result------>low:11, high:11, sum:-5
right------>low:11, high:11, sum:-5
cross------>low:10, high:0, sum:-65523
>>>>>>>>>result------>low:10, high:10, sum:12
right------>low:10, high:10, sum:12
cross------>low:8, high:0, sum:-65522
>>>>>>>>>result------>low:8, high:8, sum:20
left------>low:8, high:8, sum:20
called findMaxMumSubArray>> Low:12, High:15
called findMaxMumSubArray>> Low:12, High:13
called findMaxMumSubArray>> Low:12, High:12
oneItem------>low:12, high:12, sum:-22
>>>>>>>>>result------>low:12, high:12, sum:-22
left------>low:12, high:12, sum:-22
called findMaxMumSubArray>> Low:13, High:13
oneItem------>low:13, high:13, sum:15
>>>>>>>>>result------>low:13, high:13, sum:15
right------>low:13, high:13, sum:15
cross------>low:12, high:13, sum:-7
>>>>>>>>>result------>low:13, high:13, sum:15
left------>low:13, high:13, sum:15
called findMaxMumSubArray>> Low:14, High:15
called findMaxMumSubArray>> Low:14, High:14
oneItem------>low:14, high:14, sum:-4
>>>>>>>>>result------>low:14, high:14, sum:-4
left------>low:14, high:14, sum:-4
called findMaxMumSubArray>> Low:15, High:15
oneItem------>low:15, high:15, sum:7
>>>>>>>>>result------>low:15, high:15, sum:7
right------>low:15, high:15, sum:7
cross------>low:14, high:15, sum:3
>>>>>>>>>result------>low:15, high:15, sum:7
right------>low:15, high:15, sum:7
cross------>low:13, high:0, sum:-65520
>>>>>>>>>result------>low:13, high:13, sum:15
right------>low:13, high:13, sum:15
cross------>low:8, high:0, sum:-65515
>>>>>>>>>result------>low:8, high:8, sum:20
right------>low:8, high:8, sum:20
cross------>low:7, high:11, sum:38
>>>>>>>>>result------>low:7, high:11, sum:38
low:7, high:11, sum:38
-----------------------------------------------------------------------------------------
问题描述:
给定一只股票在某段时间内的历史价格变化曲线,找出一个能够实现收益最大化的时间段。
理解:
为找出最大化的收益,需要考虑的是在买进和卖出时的价格变化幅度,因此从该股票的每日变化幅度来考虑问题比较合适。由此,可以将上述问题稍作变形:给定一只股票在某段时间内的每日变化幅度,找出一个合适的买进和卖出时间,以实现收益最大化。因此,将输入数据转换如下,并试图在整个时间段中找到一个累加和最大的子区间,亦即最大子数组。
天 | 0 | 1 | 2 | 3 | 4 |
价格 | 10 | 11 | 7 | 10 | 6 |
变化 | 1 | -4 | 3 | -4 |
暴力求解方法:
首先能够想到的是在一个给定数组(区间)中,其子数组(子区间)的个数是C(2,n),很容易就能遍历完所有子数组从而找出最大的那个,其最坏情况渐进时间复杂度是Θ(n2)。假设每日变化幅度保存在数组A中(A的下标从1到n),A.length表示A的元素个数,最终结果以元组形式返回;给出伪码如下:
BRUTE_FORCE(A)
i = 1
sum = -infinity
for i <= A.length, inc by 1
j = i
last_sum = 0
for j <= A.length, inc by 1
last_sum += A[j]
if last_sum > sum
sum = last_sum
start = i
end = j
return (start, end, sum)
分治求解方法:
上述方法的渐进时间复杂度差强人意。类比于归并排序,有时采用分治策略能够获得更好的时间复杂度。分治策略通常包含分解成子问题、解决子问题、合并子问题。由此可以推出大致的解决思路:首先依然假设数据输入如上一个方法那样,然后考虑将A[1...n]拆分为规模大致相同的两个子数组left[1...mid]和right[mid+1...n],其中mid=(1+n)/2向下取整,那么可以肯定,最大子数组要么在这两个子数组中,要么横跨这两个子数组,因此可以分别求解这三种情况,取其中最大的子数组并返回即可。
对于left/right子数组可递归求解,而对于横跨两个子数组的情况,如果能够使得该情况下的求解时间复杂度为O(n),那么应该能让整体的最坏时间复杂度低于Θ(n2)。如果仅仅是通过遍历所有包含A[mid]和A[mid+1]的子数组来找最大子数组,那么很显然仅求解该情况就需要Θ(n2)的时间。可以推断横跨两个子数组的最大子数组,必须由两个分别在left/right中的子数组组成,这两个子数组在分别包含了A[mid]和A[mid+1]的所有子数组中是最大的;因为如果存在一个不满足上述条件的最大子数组,那么总可以用上述方法找到一个更大的子数组。
根据上述思路,很容易推知求解横跨两个子数组的情况只需要O(n)的时间。由此给出伪码如下:
(1)子过程:找出横跨两个子数组的最大子数组
FIND_CROSSING_MAX_SUBARRAY(A, low, mid, high)
left_sum = -infinity
sum = 0
i = mid
for i >= low, dec by 1
sum += A[i]
if sum > left_sum
left_sum = sum
left_index = i
right_sum = -infinity
sum = 0
i = mid + 1
for i <= high, inc by 1
sum += A[i]
if sum > right_sum
right_sum = sum
right_index = i
return (left_index, right_index, left_sum+right_sum)
(2)主过程:分治法找出最大子数组
FIND_MAX_SUBARRAY(A, low, high)
if low == high
return (low, high, A[low])
else
mid = down_trunc((low + high) / 2)
(left_start, left_end, left_sum) =
FIND_MAX_SUBARRAY(A, low, mid)
(right_start, right_end, right_sum) =
FIND_MAX_SUBARRAY(A, mid+1, high)
(cross_start, cross_end, cross_sum) =
FIND_CROSSING_MAX_SUBARRAY(A, low, mid, high)
if left_sum > right_sum and left_sum > cross_sum
return (left_start, left_end, left_sum)
else if right_sum > left_sum and right_sum > cross_sum
return (right_start, right_end, right_sum)
else
return (cross_start, cross_end, cross_sum)
可以看出上述算法渐进时间复杂度为Θ(nlg(n))。
缩减问题规模的方法:
在查找过程中,是否可以根据现有的信息,来缩减需要排查的子数组个数,进而获得更好的时间复杂度呢?一个思路是不再重复检查以前累加过的元素,即从左至右累加元素,保存其中的最大子数组,如果在加入一个元素后累加和为负数,则从该元素的后一个元素重新累加,直至整个数组遍历完毕。该思路有效的前提是证明以下几个假设:
- 可以将最大子数组来源分为三种:已经遍历完的数组部分、未遍历的数组部分以及跨越这两部分的子数组
- 可以假设当从左至右累加直至累加和为负,所得的最大子数组是当前已遍历完的数组部分中最大的
- 可以假设当累加和为负时,潜在的最大子数组不可能从该元素或该元素左边的元素开始
假设1不证自明。
假设从A[1]累加到A[i]时第一次遇到其累加和为负(1<=i<=n),那么A[i]一定为负,且A[1]+...+A[i-1]>=0。当i<=2时,显然此时假设2成立。当i>2时,可以认为在A[1]...A[i]中,所有子数组可分为三种:从A[1]开始向右拓展、从A[i]开始向左拓展以及不包含A[1]和A[i]的中间子数组;显然从A[i]向左拓展的不可能是最大子数组,而如果不包含A[1]和A[i]的中间子数组是最大子数组,那么可以使该中间子数组加上其左边的部分构成一个新的子数组,而且该子数组总是大于等于这个中间子数组,因为其左边部分总是大于等于0,所以该情况下假设2也得证。综合来看假设2是成立的。
对于假设3,显然潜在的最大子数组不可能从A[i]开始,因为A[i]<0。当潜在的最大子数组从A[i]的左边开始时,假设其从A[j]开始(1<=j<i)。显然j不能等于1,因为A[1]+...+A[i]<0;当j>1时,A[j]+...+A[i]一定是负数,因为A[1]+...+A[j-1]一定大于等于0而A[1]+...+A[i]一定为负。所以综合来看,从A[i]或者A[i]的左边寻找潜在的子数组是没有意义的。
伪码如下,时间复杂度为Θ(n)。对于全部是负数的情况,特殊处理即可,不影响时间复杂度。
LINEAR_SEARCH_MAX_SUBARRAY(A)
sum = -infinity
start = 0
end = 0
cur_sum = 0
cur_start_index = 1
i = 1
for i <= A.length, inc by 1
cur_sum += A[i]
if cur_sum < 0
cur_sum = 0
cur_start_index = i + 1
else
if sum < cur_sum
sum = cur_sum
start = cur_start_index
end = i
return (start, end, sum)
#include "stdafx.h"
#include <stdio.h>
typedef struct Result
{
Result():low(0), high(0), sum(0){}
Result(int lowVal, int highVal, int sumVal) : low(lowVal)
, high(highVal), sum(sumVal) {}
int low;
int high;
int sum;
}Result;
Result findMaxCrossArray(int* arr, int low, int mid, int high)
{
int crossleft = 0;
int crossright = 0;
int crossSum = 0;
int leftSum = -0xffff;
int sum = 0;
for (int i = mid; i >= low; i--)
{
sum += arr[i];
if (sum > leftSum)
{
leftSum = sum;
crossleft = i;
}
}
int rightsum = -0xffff;
sum = 0;
for (int j = mid + 1; j <= high; j++)
{
sum += arr[j];
if (sum > leftSum)
{
rightsum = sum;
crossright = j;
}
}
return Result(crossleft, crossright, leftSum + rightsum);
}
Result findMaxMumSubArray(int* a, int low, int high)
{
printf("called findMaxMumSubArray>> Low:%d, High:%d\n", low, high);
Result* pResult = &Result();
if (low == high)
{
pResult = &Result(low, high, a[low]);
printf("oneItem------>low:%d, high:%d, sum:%d\n", pResult->low, pResult->high, pResult->sum);
}
else
{
int mid = (low + high) / 2;
Result leftResult = findMaxMumSubArray(a, low, mid);
printf("left------>low:%d, high:%d, sum:%d\n", leftResult.low, leftResult.high, leftResult.sum);
Result rightResult = findMaxMumSubArray(a, mid + 1, high);
printf("right------>low:%d, high:%d, sum:%d\n", rightResult.low, rightResult.high, rightResult.sum);
Result crossResult = findMaxCrossArray(a, low, mid, high);
printf("cross------>low:%d, high:%d, sum:%d\n", crossResult.low, crossResult.high, crossResult.sum);
if (leftResult.sum >= rightResult.sum && leftResult.sum >= crossResult.sum)
{
pResult = &leftResult;
}
else if (rightResult.sum >= leftResult.sum && rightResult.sum >= crossResult.sum)
{
pResult = &rightResult;
}
else
pResult = &crossResult;
}
printf(">>>>>>>>>result------>low:%d, high:%d, sum:%d\n", pResult->low, pResult->high, pResult->sum);
return *pResult;
}
int _tmain(int argc, _TCHAR* argv[])
{
int arr[] = { 13, -3, -25, 20, -3, -16, -23, 18, 20, -7, 12, -5, -22, 15, -4, 7 };
Result r = findMaxMumSubArray(arr, 0, 15);
printf("low:%d, high:%d, sum:%d\n", r.low, r.high, r.sum);
getchar();
return 0;
}
-----------------------------------------------------------------------------
输出:
-------------------------------------------------------------------------------
called findMaxMumSubArray>> Low:0, High:15
called findMaxMumSubArray>> Low:0, High:7
called findMaxMumSubArray>> Low:0, High:3
called findMaxMumSubArray>> Low:0, High:1
called findMaxMumSubArray>> Low:0, High:0
oneItem------>low:0, high:0, sum:13
>>>>>>>>>result------>low:0, high:0, sum:13
left------>low:0, high:0, sum:13
called findMaxMumSubArray>> Low:1, High:1
oneItem------>low:1, high:1, sum:-3
>>>>>>>>>result------>low:1, high:1, sum:-3
right------>low:1, high:1, sum:-3
cross------>low:0, high:0, sum:-65522
>>>>>>>>>result------>low:0, high:0, sum:13
left------>low:0, high:0, sum:13
called findMaxMumSubArray>> Low:2, High:3
called findMaxMumSubArray>> Low:2, High:2
oneItem------>low:2, high:2, sum:-25
>>>>>>>>>result------>low:2, high:2, sum:-25
left------>low:2, high:2, sum:-25
called findMaxMumSubArray>> Low:3, High:3
oneItem------>low:3, high:3, sum:20
>>>>>>>>>result------>low:3, high:3, sum:20
right------>low:3, high:3, sum:20
cross------>low:2, high:3, sum:-5
>>>>>>>>>result------>low:3, high:3, sum:20
right------>low:3, high:3, sum:20
cross------>low:0, high:0, sum:-65525
>>>>>>>>>result------>low:3, high:3, sum:20
left------>low:3, high:3, sum:20
called findMaxMumSubArray>> Low:4, High:7
called findMaxMumSubArray>> Low:4, High:5
called findMaxMumSubArray>> Low:4, High:4
oneItem------>low:4, high:4, sum:-3
>>>>>>>>>result------>low:4, high:4, sum:-3
left------>low:4, high:4, sum:-3
called findMaxMumSubArray>> Low:5, High:5
oneItem------>low:5, high:5, sum:-16
>>>>>>>>>result------>low:5, high:5, sum:-16
right------>low:5, high:5, sum:-16
cross------>low:4, high:0, sum:-65538
>>>>>>>>>result------>low:4, high:4, sum:-3
left------>low:4, high:4, sum:-3
called findMaxMumSubArray>> Low:6, High:7
called findMaxMumSubArray>> Low:6, High:6
oneItem------>low:6, high:6, sum:-23
>>>>>>>>>result------>low:6, high:6, sum:-23
left------>low:6, high:6, sum:-23
called findMaxMumSubArray>> Low:7, High:7
oneItem------>low:7, high:7, sum:18
>>>>>>>>>result------>low:7, high:7, sum:18
right------>low:7, high:7, sum:18
cross------>low:6, high:7, sum:-5
>>>>>>>>>result------>low:7, high:7, sum:18
right------>low:7, high:7, sum:18
cross------>low:5, high:7, sum:-21
>>>>>>>>>result------>low:7, high:7, sum:18
right------>low:7, high:7, sum:18
cross------>low:3, high:0, sum:-65515
>>>>>>>>>result------>low:3, high:3, sum:20
left------>low:3, high:3, sum:20
called findMaxMumSubArray>> Low:8, High:15
called findMaxMumSubArray>> Low:8, High:11
called findMaxMumSubArray>> Low:8, High:9
called findMaxMumSubArray>> Low:8, High:8
oneItem------>low:8, high:8, sum:20
>>>>>>>>>result------>low:8, high:8, sum:20
left------>low:8, high:8, sum:20
called findMaxMumSubArray>> Low:9, High:9
oneItem------>low:9, high:9, sum:-7
>>>>>>>>>result------>low:9, high:9, sum:-7
right------>low:9, high:9, sum:-7
cross------>low:8, high:0, sum:-65515
>>>>>>>>>result------>low:8, high:8, sum:20
left------>low:8, high:8, sum:20
called findMaxMumSubArray>> Low:10, High:11
called findMaxMumSubArray>> Low:10, High:10
oneItem------>low:10, high:10, sum:12
>>>>>>>>>result------>low:10, high:10, sum:12
left------>low:10, high:10, sum:12
called findMaxMumSubArray>> Low:11, High:11
oneItem------>low:11, high:11, sum:-5
>>>>>>>>>result------>low:11, high:11, sum:-5
right------>low:11, high:11, sum:-5
cross------>low:10, high:0, sum:-65523
>>>>>>>>>result------>low:10, high:10, sum:12
right------>low:10, high:10, sum:12
cross------>low:8, high:0, sum:-65522
>>>>>>>>>result------>low:8, high:8, sum:20
left------>low:8, high:8, sum:20
called findMaxMumSubArray>> Low:12, High:15
called findMaxMumSubArray>> Low:12, High:13
called findMaxMumSubArray>> Low:12, High:12
oneItem------>low:12, high:12, sum:-22
>>>>>>>>>result------>low:12, high:12, sum:-22
left------>low:12, high:12, sum:-22
called findMaxMumSubArray>> Low:13, High:13
oneItem------>low:13, high:13, sum:15
>>>>>>>>>result------>low:13, high:13, sum:15
right------>low:13, high:13, sum:15
cross------>low:12, high:13, sum:-7
>>>>>>>>>result------>low:13, high:13, sum:15
left------>low:13, high:13, sum:15
called findMaxMumSubArray>> Low:14, High:15
called findMaxMumSubArray>> Low:14, High:14
oneItem------>low:14, high:14, sum:-4
>>>>>>>>>result------>low:14, high:14, sum:-4
left------>low:14, high:14, sum:-4
called findMaxMumSubArray>> Low:15, High:15
oneItem------>low:15, high:15, sum:7
>>>>>>>>>result------>low:15, high:15, sum:7
right------>low:15, high:15, sum:7
cross------>low:14, high:15, sum:3
>>>>>>>>>result------>low:15, high:15, sum:7
right------>low:15, high:15, sum:7
cross------>low:13, high:0, sum:-65520
>>>>>>>>>result------>low:13, high:13, sum:15
right------>low:13, high:13, sum:15
cross------>low:8, high:0, sum:-65515
>>>>>>>>>result------>low:8, high:8, sum:20
right------>low:8, high:8, sum:20
cross------>low:7, high:11, sum:38
>>>>>>>>>result------>low:7, high:11, sum:38
low:7, high:11, sum:38
-----------------------------------------------------------------------------------------
问题描述:
给定一只股票在某段时间内的历史价格变化曲线,找出一个能够实现收益最大化的时间段。
理解:
为找出最大化的收益,需要考虑的是在买进和卖出时的价格变化幅度,因此从该股票的每日变化幅度来考虑问题比较合适。由此,可以将上述问题稍作变形:给定一只股票在某段时间内的每日变化幅度,找出一个合适的买进和卖出时间,以实现收益最大化。因此,将输入数据转换如下,并试图在整个时间段中找到一个累加和最大的子区间,亦即最大子数组。
天 | 0 | 1 | 2 | 3 | 4 |
价格 | 10 | 11 | 7 | 10 | 6 |
变化 | 1 | -4 | 3 | -4 |
暴力求解方法:
首先能够想到的是在一个给定数组(区间)中,其子数组(子区间)的个数是C(2,n),很容易就能遍历完所有子数组从而找出最大的那个,其最坏情况渐进时间复杂度是Θ(n2)。假设每日变化幅度保存在数组A中(A的下标从1到n),A.length表示A的元素个数,最终结果以元组形式返回;给出伪码如下:
BRUTE_FORCE(A)
i = 1
sum = -infinity
for i <= A.length, inc by 1
j = i
last_sum = 0
for j <= A.length, inc by 1
last_sum += A[j]
if last_sum > sum
sum = last_sum
start = i
end = j
return (start, end, sum)
分治求解方法:
上述方法的渐进时间复杂度差强人意。类比于归并排序,有时采用分治策略能够获得更好的时间复杂度。分治策略通常包含分解成子问题、解决子问题、合并子问题。由此可以推出大致的解决思路:首先依然假设数据输入如上一个方法那样,然后考虑将A[1...n]拆分为规模大致相同的两个子数组left[1...mid]和right[mid+1...n],其中mid=(1+n)/2向下取整,那么可以肯定,最大子数组要么在这两个子数组中,要么横跨这两个子数组,因此可以分别求解这三种情况,取其中最大的子数组并返回即可。
对于left/right子数组可递归求解,而对于横跨两个子数组的情况,如果能够使得该情况下的求解时间复杂度为O(n),那么应该能让整体的最坏时间复杂度低于Θ(n2)。如果仅仅是通过遍历所有包含A[mid]和A[mid+1]的子数组来找最大子数组,那么很显然仅求解该情况就需要Θ(n2)的时间。可以推断横跨两个子数组的最大子数组,必须由两个分别在left/right中的子数组组成,这两个子数组在分别包含了A[mid]和A[mid+1]的所有子数组中是最大的;因为如果存在一个不满足上述条件的最大子数组,那么总可以用上述方法找到一个更大的子数组。
根据上述思路,很容易推知求解横跨两个子数组的情况只需要O(n)的时间。由此给出伪码如下:
(1)子过程:找出横跨两个子数组的最大子数组
FIND_CROSSING_MAX_SUBARRAY(A, low, mid, high)
left_sum = -infinity
sum = 0
i = mid
for i >= low, dec by 1
sum += A[i]
if sum > left_sum
left_sum = sum
left_index = i
right_sum = -infinity
sum = 0
i = mid + 1
for i <= high, inc by 1
sum += A[i]
if sum > right_sum
right_sum = sum
right_index = i
return (left_index, right_index, left_sum+right_sum)
(2)主过程:分治法找出最大子数组
FIND_MAX_SUBARRAY(A, low, high)
if low == high
return (low, high, A[low])
else
mid = down_trunc((low + high) / 2)
(left_start, left_end, left_sum) =
FIND_MAX_SUBARRAY(A, low, mid)
(right_start, right_end, right_sum) =
FIND_MAX_SUBARRAY(A, mid+1, high)
(cross_start, cross_end, cross_sum) =
FIND_CROSSING_MAX_SUBARRAY(A, low, mid, high)
if left_sum > right_sum and left_sum > cross_sum
return (left_start, left_end, left_sum)
else if right_sum > left_sum and right_sum > cross_sum
return (right_start, right_end, right_sum)
else
return (cross_start, cross_end, cross_sum)
可以看出上述算法渐进时间复杂度为Θ(nlg(n))。
缩减问题规模的方法:
在查找过程中,是否可以根据现有的信息,来缩减需要排查的子数组个数,进而获得更好的时间复杂度呢?一个思路是不再重复检查以前累加过的元素,即从左至右累加元素,保存其中的最大子数组,如果在加入一个元素后累加和为负数,则从该元素的后一个元素重新累加,直至整个数组遍历完毕。该思路有效的前提是证明以下几个假设:
- 可以将最大子数组来源分为三种:已经遍历完的数组部分、未遍历的数组部分以及跨越这两部分的子数组
- 可以假设当从左至右累加直至累加和为负,所得的最大子数组是当前已遍历完的数组部分中最大的
- 可以假设当累加和为负时,潜在的最大子数组不可能从该元素或该元素左边的元素开始
假设1不证自明。
假设从A[1]累加到A[i]时第一次遇到其累加和为负(1<=i<=n),那么A[i]一定为负,且A[1]+...+A[i-1]>=0。当i<=2时,显然此时假设2成立。当i>2时,可以认为在A[1]...A[i]中,所有子数组可分为三种:从A[1]开始向右拓展、从A[i]开始向左拓展以及不包含A[1]和A[i]的中间子数组;显然从A[i]向左拓展的不可能是最大子数组,而如果不包含A[1]和A[i]的中间子数组是最大子数组,那么可以使该中间子数组加上其左边的部分构成一个新的子数组,而且该子数组总是大于等于这个中间子数组,因为其左边部分总是大于等于0,所以该情况下假设2也得证。综合来看假设2是成立的。
对于假设3,显然潜在的最大子数组不可能从A[i]开始,因为A[i]<0。当潜在的最大子数组从A[i]的左边开始时,假设其从A[j]开始(1<=j<i)。显然j不能等于1,因为A[1]+...+A[i]<0;当j>1时,A[j]+...+A[i]一定是负数,因为A[1]+...+A[j-1]一定大于等于0而A[1]+...+A[i]一定为负。所以综合来看,从A[i]或者A[i]的左边寻找潜在的子数组是没有意义的。
伪码如下,时间复杂度为Θ(n)。对于全部是负数的情况,特殊处理即可,不影响时间复杂度。
LINEAR_SEARCH_MAX_SUBARRAY(A)
sum = -infinity
start = 0
end = 0
cur_sum = 0
cur_start_index = 1
i = 1
for i <= A.length, inc by 1
cur_sum += A[i]
if cur_sum < 0
cur_sum = 0
cur_start_index = i + 1
else
if sum < cur_sum
sum = cur_sum
start = cur_start_index
end = i
return (start, end, sum)