实验中的问题

在merge_sort中调用pthread_create创建线程对子数组排序,这导致实际运行中创建线程的次数为O(2^n),其中n为输入的数字个数,即指数级别,这显然是得不偿失的,使用超出cpu核心线程数的线程数量进行排序并不能提高速度,反而会因为创建线程和上下文切换而降低性能。实际测试也证明了这一点 ,在对下文中的1Mints.txt进行排序可发现,能使该程序正常运行的输入数据量在13000左右,而且排序速度不及单线程排序,因此本实验的代码是有问题的。

以下为我的做法,由于时间比较仓促,代码中仍有许多不足之处,仅作抛砖引玉之用,如有疏漏,还请指出。

关于多路归并陈硕,linux多线程服务端编程,12.8.3,“用{make, push, pop}_heap()实现多路归并,P549

实验的思路很简单:

1、将原始数组分为N段,使用N个线程将各个分段分别排序

2、将各个分段归并得到最终的排序结果

多线程即体现在使用多个线程分别排序输入的各个分段。

首先,写出排序函数,这里使用归并排序,代码如下:

mergeSort.h
#ifndef _MERGESORT_H_
#define _MERGESORT_H_
extern void mergeSort(int nums[], int lo, int hi, int temp[]);
static void merge(int nums[], int lo, int mid, int hi, int temp[]);
#endif
mergerSort.c
#include "mergeSort.h"
static void merge(int nums[], int lo, int mid, int hi, int temp[])
{
int i = lo, j = mid + 1;
int k;
for (k = lo; k <= hi; k++)
temp[k] = nums[k];
for (k = lo; k <= hi; k++)
{
if (i > mid) nums[k] = temp[j++];
else if (j > hi) nums[k] = temp[i++];
else if (temp[i] < temp[j]) nums[k] = temp[i++];
else nums[k] = temp[j++];
}
}
void mergeSort(int nums[], int lo, int hi, int temp[])
{
if (lo >= hi) return;
int mid = lo + (hi - lo) / 2;
mergeSort(nums, lo, mid, temp);
mergeSort(nums, mid + 1, hi, temp);
merge(nums, lo, mid ,hi, temp);
}

然后是主程序,读入输入并创建线程分段排序(这里以4线程为例)seg保存每个分段的起始与终止下标

mergeN()将分段排序后的数组归并并输出

程序使用barrier等待排序线程完毕

思路

关键在于如何将多路输入归并得到最终输出,这里采用了小顶堆的方案,即堆顶元素总是最小的,将每个节点与分段关联起来,建立最小堆,每次取出堆顶的元素将其写入输出,如果堆顶元素所在的分段仍有数据可读,则将其加入堆中重新使堆有序化,再取出zui'xi如此往复,直到堆为空。

堆节点示意图

java多线程 自增序列_java线程实现排序

其中,num为具体的数值,segNo用来指示处于哪个分段,index用于指示处于在分段中的位置

多路归并示意图

java多线程 自增序列_sed_02

java多线程 自增序列_#include_03

multi_thread_sort.c
#include 
#include 
#include 
#include 
#include 
#include "mergeSort.h"
#include "minHeap.h"
define NUMS_SIZE 1000000 //1M
define THREAD_NUM 4
typedef struct interval_tag
{
int begin;
int end;
}interval_t;
pthread_barrier_t b;
int nums[NUMS_SIZE];
int temp[NUMS_SIZE];
interval_t seg[THREAD_NUM];
void err_exit(const char *errmsg)
{
perror(errmsg);
exit(-1);
}
void mergeN()
{
heap_node_t *minHeap[THREAD_NUM];
int heap_size = THREAD_NUM;
for (int i = 0; i < heap_size; i++)
{
minHeap[i] = (heap_node_t *)malloc(sizeof(heap_node_t));
minHeap[i]->num = nums[seg[i].begin];
minHeap[i]->segNo = i;
minHeap[i]->index = seg[i].begin;
}
make_heap(minHeap, minHeap + heap_size);//[ )
int j = 0;
while(heap_size)
{
pop_heap(minHeap, minHeap + heap_size);
temp[j++] = minHeap[heap_size - 1]->num;
if (++minHeap[heap_size - 1]->index <= seg[minHeap[heap_size - 1]->segNo].end)
{
minHeap[heap_size - 1]->num = nums[minHeap[heap_size - 1]->index];
push_heap(minHeap, minHeap + heap_size);
}
else
heap_size--;
}
}
void *sort_thread_func(void *arg)
{
interval_t *interval = (interval_t *)arg;
mergeSort(nums, interval->begin, interval->end, temp);
pthread_barrier_wait(&b);
pthread_exit(NULL);
}
int main(int argc, char **argv)
{
FILE *fp;
if ((fp = fopen("1Mints.txt", "r")) == NULL)
err_exit("fopen error\n");
for (int i = 0; i < NUMS_SIZE; i++)
fscanf(fp, "%d", &nums[i]);
fclose(fp);
for (int i = 0; i < THREAD_NUM; i++)//[ ],seg保存每个分段的开始与结束下标
{
seg[i].begin = i * (NUMS_SIZE / THREAD_NUM);
seg[i].end = (i + 1 < THREAD_NUM) ? (i + 1) * (NUMS_SIZE / THREAD_NUM) - 1 : NUMS_SIZE - 1;
}
pthread_t tid;
struct timeval start, end;
long long startusec, endusec;
double elapsed;
gettimeofday(&start, NULL);
pthread_barrier_init(&b, NULL, THREAD_NUM + 1);
for (int i = 0; i < THREAD_NUM; i++)
{
int err = pthread_create(&tid, NULL, sort_thread_func, (void *)&seg[i]);
if (err != 0)
err_exit("pthread_create error\n");
}
pthread_barrier_wait(&b);
mergeN();
gettimeofday(&end, NULL);
startusec = start.tv_sec * 1000000 + start.tv_usec;
endusec = end.tv_sec * 1000000 + end.tv_usec;
elapsed = (double)(endusec - startusec) / 1000000.0;
printf("sort took %.4f seconds\n", elapsed);
// for (int i = 0; i < NUMS_SIZE; i++)
// printf("%d\n", temp[i]);
}

###以下封装了一些堆的操作,这里使用小顶堆

+ `make_heap`,建立堆

+ `push_heap`,将序列的尾元素加入堆中,并使堆整体有序化

+ `pop_heap`,将堆顶元素移出堆,并使剩下部分有序化

+ minHeap.h
```c
#ifndef MINHEAP_H_
#define MINHEAP_H_
typedef struct heap_node_tag
{
int num;
int segNo;
int index;
}heap_node_t;
static void min_heap_sink(heap_node_t **begin, heap_node_t **end, int i);
static void min_heap_swim(heap_node_t **begin, heap_node_t **end, int i);
extern void make_heap(heap_node_t **begin, heap_node_t **end);
extern void push_heap(heap_node_t **begin, heap_node_t **end);
extern void pop_heap(heap_node_t **begin, heap_node_t **end);
#endif
minHeap.c
#include "minHeap.h"
//leftChild = 2 * i + 1, rightChild = 2 * i + 2;
//parent = (i - 1) / 2;
static void min_heap_sink(heap_node_t **begin, heap_node_t **end, int i)
{
int child;
heap_node_t *initialNode = begin[i];
for (; 2 * i + 1 < end - begin; i = child)
{
child = 2 * i + 1;
if (child + 1 < end - begin && begin[child]->num > begin[child + 1]->num)//取子节点中最小的
child = child + 1;
if (initialNode->num > begin[child]->num)
begin[i] = begin[child];
else
break;
}
begin[i] = initialNode;
}
static void min_heap_swim(heap_node_t **begin, heap_node_t **end, int i)
{
heap_node_t *initialNode = begin[i];
for (; i > 0 && initialNode->num < begin[(i - 1) / 2]->num; i = (i - 1) / 2)
begin[i] = begin[(i - 1) / 2];
begin[i] = initialNode;
}
void make_heap(heap_node_t **begin, heap_node_t **end)
{
for (int i = (end - begin) / 2 - 1; i >= 0; i--)
{
min_heap_sink(begin, end, i);
}
}
void push_heap(heap_node_t **begin, heap_node_t **end)
{
min_heap_swim(begin, end, end - begin - 1);
}
void pop_heap(heap_node_t **begin, heap_node_t **end)
{
heap_node_t *tempNode = *begin;
*begin = *(end - 1);
*(end - 1) = tempNode;
min_heap_sink(begin, end - 1, 0);
}

另给出单线程排序代码

single_thread_sort.c
#include 
#include 
#include 
#include 
#include "mergeSort.h"
#define NUMS_SIZE 13000 //1M
int nums[NUMS_SIZE];
int temp[NUMS_SIZE];
void err_exit(const char *errmsg)
{
perror(errmsg);
exit(-1);
}
int main(int argc, char **argv)
{
FILE *fp;
if ((fp = fopen("data.txt", "r")) == NULL)
err_exit("fopen error\n");
int i;
for (i = 0; i < NUMS_SIZE; i++)
fscanf(fp, "%d", &nums[i]);
fclose(fp);
struct timeval start, end;
long long startusec, endusec;
double elapsed;
gettimeofday(&start, NULL);
mergeSort(nums, 0, NUMS_SIZE - 1, temp);
gettimeofday(&end, NULL);
startusec = start.tv_sec * 1000000 + start.tv_usec;
endusec = end.tv_sec * 1000000 + end.tv_usec;
elapsed = (double)(endusec - startusec) / 1000000.0;
printf("sort took %.4f seconds\n", elapsed);
// for (i = 0; i < NUMS_SIZE; i++)
// printf("%d\n", temp[i]);
}