当参加排序的数的量太大,或内存不足以存放时,需要使用外排序。外排序可以使用插入排序的思想,也可以用归并排序的思想。
下面是自己实现的归并排序思想的外排序,虽然基本做到了正确排序,且对内存的占用可以控制,但时间效率略低。
代码示例
GenerateRandomNumber.h文件,生成随机数序列。
#pragma once
#include <random>
#include <time.h>
#include <fstream>
#include <iostream>
using namespace std;
bool GenerateRandom(int num = 100, string fileName = "")
{
bool flag = fileName != "";
ofstream outfile;
try
{
if (flag)
{
outfile.open(fileName);
if (!outfile)
throw "随机数输出文件创建失败!";
}
srand((unsigned int)time(NULL));
while (num--)
{
uint32_t n = rand();
if (flag)
outfile << n << endl;
cout << n << endl;
}
outfile.close();
}
catch (const char* msg)
{
cout << msg << endl;
return false;
}
return true;
}
DiskSort.h磁盘排序本体了
#pragma once
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <queue>
#include <cstdio>
using namespace std;
#define MemoryLimit 1000 //Byte
struct File
{
string filepath;
int currentValue;
int fileIndex, totalCnt, currentPos;
bool operator<(const File& f) const
{
return currentValue > f.currentValue;
}
};
void PrintFile(vector<int> num, string& path)
{
ofstream out;
out.open(path);
for (auto n : num)
out << n << endl;
out.close();
}
void seek_to_line(File file, char* num_str)
{
ifstream in;
in.open(file.filepath);
int line = file.currentPos;
++line;
while (line--)
{
in.getline(num_str, 100);
}
in.close();
return;
}
string DiskSort(string infile_path)
{
int total=0, cnt = 0;
auto found = infile_path.find_last_of('\\');
string outfile_path = infile_path.substr(0, found + 1) + "sorted.txt";
uint32_t maxCnt = MemoryLimit / sizeof(int32_t);
vector<int32_t> memN(maxCnt);//存放读入内存的数据
vector<File> tmpfiles;
ifstream infile; ofstream outfile;
try
{
infile.open(infile_path);
if (!infile)
throw "输入文件打开失败!";
char num_str[100];
int num_cnt = 0, file_cnt = 0;
string tmpfile_path = infile_path.substr(0, found + 1) + "tmpout";
while (infile.getline(num_str, 100))
{
memN[num_cnt++] = stoi(num_str);
if (num_cnt == maxCnt)//达到内存限制
{
total += num_cnt;
sort(memN.begin(), memN.begin() + num_cnt);
string tmp = tmpfile_path + to_string(file_cnt) + ".txt";
PrintFile(memN, tmp);
tmpfiles.push_back({ tmp,0,file_cnt++,num_cnt,0 });
num_cnt = 0;
}
}
infile.close();
if (num_cnt > 0)
{
total += num_cnt;
sort(memN.begin(), memN.begin() + num_cnt);
string tmp = tmpfile_path + to_string(file_cnt);
PrintFile(memN, tmp);
tmpfiles.push_back({ tmp,0,file_cnt++,num_cnt,0 });
}
priority_queue<File> pq;
ofstream outfile;
outfile.open(outfile_path);
for (auto f : tmpfiles)
{
seek_to_line(f, num_str);
if (strcmp(num_str, "") != 0)
{
f.currentValue = stoi(num_str);
++f.currentPos;
pq.push(f);
}
}
int prev_value = INT_MIN;
bool sorted = true;
while (pq.size() > 0)
{
++cnt;
auto f = pq.top(); pq.pop();
sorted = prev_value <= f.currentValue;
prev_value = f.currentValue;
outfile << f.currentValue << endl;
cout << f.currentValue << endl;
if (f.currentPos == f.totalCnt)//该文件已读取完
{
remove(f.filepath.c_str());
continue;
}
seek_to_line(f, num_str);
if (strcmp(num_str, "") != 0)
{
f.currentValue = stoi(num_str);
++f.currentPos;
pq.push(f);
}
}
outfile.close();
sorted = cnt == total;
cout << "排序完成" << endl;
cout << "共读取:" << total << ";" << "被排序:" << cnt << endl;
cout << "正确性:";
if (sorted) cout << "正确" << endl;
else cout << "错误" << endl;
}
catch (const char* msg)
{
cout << msg << endl;
}
return outfile_path;
}
main文件
#include <iostream>
#include <ctime>
#include <string>
#include "../DiskSort/GenerateRandomNumber.h"
#include "../DiskSort/DiskSort.h"
using namespace std;
int main()
{
int cnt = 1000000;
string filepath = "C:\\WorkSpace\\CPP\\DiskSort\\DiskSort\\randomlist.txt";
if (GenerateRandom(cnt, filepath))
{
cout << "生成成功!" << endl;
time_t start, end;
start = clock();
DiskSort(filepath);
end = clock();
cout << (double)(end - start) / CLOCKS_PER_SEC << "s"<< endl;
}
else
cout << "生成失败!" << endl;
return 0;
}