众所周知,著名的8大排序算法相信大家都看过,但我唯独对归并排序是情有独钟。因为这个算法,是一个可以轻松而愉快的进行并行排序的东西,而且归并排序是稳定的。当数量达到一定级别的时候,无论再优秀的算法,都会消耗一定的时间。所以,这个时候,就必须利用多线程去加速计算,但多线程又是一个极其难以控制的东西,因为必须将主体分为绝对互不影响的N部分就分别计算后统一,恰恰归并排序就有这个性质,并且还是均分的。归并的思想是什么呢?是分治。就是说把一个大的个体分成N部分,划分后去解决一个问题可以使得问题变得简单或得到更快的速度。
废话不多说,先看看测试,我的测试是对2600W左右的数据进行排序(跑亿级的数据2个线程估计还是得费点时间),因为我为了简单仅仅做了个例程,设计上就只开2个线程和单线程对比下而已,看看有莫得问题,实际应用这种设计肯定不行的哈。这个还是算上创建线程等额外耗时,基本上都有2倍左右的差距
#include<Windows.h> #include <cstdlib> #include <ctime> #include <iostream> #include <process.h> #include <csignal> #include <map> using namespace std; int threadIdentity[2]; map<string, void*> _map; void Merge(int* src,int start,int mid,int end,int* tmp) { int i, j, k; for(i=start,j=mid+1,k=start;i<=mid&&j<=end;) { if(src[i]<src[j]) { tmp[k++] = src[i++]; } else { tmp[k++] = src[j++]; } } for(;i<=mid;++i) { tmp[k++] = src[i]; } for(;j<=end;++j) { tmp[k++] = src[j]; } for(i=start;i<=end;++i) { src[i] = tmp[i]; } } void MergeSort(int* src,int start,int end,int* tmp) { if(start<end) { int mid = (start + end) / 2 ; MergeSort(src, start, mid,tmp); MergeSort(src, mid + 1, end,tmp); Merge(src, start, mid, end, tmp); } } void print(int* src,int start,int end) { for (int i = start; i<=end; ++i) { cout << src[i] << ' '; } cout << endl<<endl; } struct MultiArith_ { int identity; int* src; int* tmp; int start; int end; }; unsigned _stdcall multiArith(void* arg) { MultiArith_* pma = (MultiArith_*)arg; MergeSort(pma->src, pma->start, pma->end, pma->tmp); threadIdentity[pma->identity] = 1; raise(SIGINT); return 0; } void handler(int sig) { if(threadIdentity[0]==1&&threadIdentity[1]==1) { int* src = (int*)_map["a"]; int* tmp = (int*)_map["tmp"]; int size = *(int*)_map["size"]; // print(src, 0, (size - 1) / 2); // print(src, (size - 1) / 2 + 1, size - 1); Merge(src, 0, (size - 1) / 2, size - 1, tmp); // print(src, 0,size-1); cout << "Finished!2 Thread Use Time(ms):" << clock() - *(clock_t*)_map["t1"] << endl; for (int i = 0; i<size; ++i) { src[i] = rand(); } clock_t t1 = clock(); MergeSort(src, 0, size - 1, tmp); cout << "Finished!Use Time(ms):" << clock() - t1 << endl; } signal(SIGINT, handler); } int main() { signal(SIGINT, handler); srand(time(0)); int size = 1024/4*1024*1024/10; int* a = (int*)malloc(sizeof(int)*size); int* tmp = (int*)malloc(sizeof(int)*size); int mid = (size - 1) / 2; for (int i = 0; i<size; ++i) { a[i] = rand(); } //print(a, 0, size - 1); _map["a"] = a; _map["tmp"] = tmp; _map["size"] = &size; MultiArith_ ma[2]; ma[0].src = a; ma[0].tmp = tmp; ma[0].start = 0; ma[0].end = mid; ma[0].identity = 0; ma[1].src = a; ma[1].tmp = tmp; ma[1].start = mid+1; ma[1].end = size-1; ma[1].identity = 1; threadIdentity[0] = 0; threadIdentity[1] = 0; clock_t t1 = clock(); _map["t1"] = &t1; _beginthreadex(0, 0, multiArith, &ma[0], 0, 0); _beginthreadex(0, 0, multiArith, &ma[1], 0, 0); Sleep(INFINITE); }