# 一亿个数中选出最大的1000个数

Published On 三月 20, 2016

category algorithm | tags sort

CONTENTS

O(Nlog(N/M))

## 2. 测试结果

#### 随机数保存在内存中

1亿的数组占用的存储空间很大，那到底有多大呢？容量为1x10^8 的int型数组占用的内存空间为4x10^8 个字节约等于400Mb，不能以普通数组的形式保存在栈上，但可以保存在堆上，也就是malloc或者new。

#### 比较结果

Heap 3.241
Quick 1.164
partial_sort 5.879

## 4. 最后附上C++代码

#include<iostream>
#include <time.h>
#include<random>
#include<algorithm>
using namespace std;

#define N 100000000
#define M 1000
void Heap_FindMaxM(int A[], vector<int>& vec);
void Insert_FindMaxM(int A[], vector<int>& vec);
void Quick_FindMaxM(int A[], int p, int r, vector<int>& vec);
bool comp(int i, int j) { return (i > j); }
int main()
{
int* A = new int[N + 1];
uniform_int_distribution<unsigned> u(0, 1000000000);//生成0~10亿均匀的随机无符号整数
default_random_engine e;
for (size_t i = 1; i <= N; i++)
A[i] = u(e);
cout << "Random numbers generated!" << endl;
vector<int> vec1, vec2, vec3;
clock_t t = clock();
Heap_FindMaxM(A, vec1);
t = clock() - t;
cout << (double)t / CLOCKS_PER_SEC << " seconds！" << endl;

t = clock();
Insert_FindMaxM(A, vec2);
t = clock() - t;
cout << (double)t / CLOCKS_PER_SEC << " seconds！" << endl;

t = clock();
Quick_FindMaxM(A, 1, N, vec3);
t = clock() - t;
cout << (double)t / CLOCKS_PER_SEC << " seconds！" << endl;

//t = clock();
//partial_sort(A+1, A+M+1, A+N+1, comp);
//t = clock() - t;
//for (int i = 1; i <= M; i++)
//  vec3.push_back(A[i]);
//cout << "partial_sort takes"<<(double)t / CLOCKS_PER_SEC << " seconds！" << endl;

sort(vec1.begin(), vec1.end());
sort(vec3.begin(), vec3.end());
if (vec1 == vec2&&vec2 == vec3)
cout << "Right!" << endl;
delete[] A;
return 0;
}

/************Heap************/
void Heaplify(int H[], int i)
{
int j = 2 * i;//j为左孩子的序号
int temp = H[i];
while (j <= M)
{
if (j<M&&H[j]>H[j + 1])
j++;
if (temp<H[j])
break;
H[j / 2] = H[j];//把孩子节点移到父节点的位置
j *= 2;
}
H[j / 2] = temp;
}
void Heap_FindMaxM(int A[], vector<int>& vec)
{
int i, H[M + 1];
for (i = 1; i <= M; i++)
{
H[i] = A[i];
}
for (i = M / 2; i >= 1; i--)//建堆
Heaplify(H, i);
for (i = M + 1; i <= N; i++)
{
if (A[i]>H[1])
H[1] = A[i];//弹出堆顶，换入新的数
Heaplify(H, 1);//调整堆
}

int count = 0;
for (i = 1; i <= M; i++)
vec.push_back(H[i]);
cout << "Heap finished in ";
}

typedef struct node
{
int data;
{
LinkList p, q, r = NULL;
p->data = data;

{
}
else
{
while (q != NULL&&data >= q->data)
{
r = q;
}
}
}
void Insert_FindMaxM(int A[], vector<int>& vec)
{
int i;
for (i = 1; i <= M; i++)
{
}
while (++i <= N)
{
{
free(p);
}
}
//销毁线性链表
int count = 0;
while (p != NULL)
{
count++;
free(p);
}
if (count != M)
cerr << "Error in linklist_findmaxM" << endl;
cout << "Linklist finished in ";
}

/************Quick************/
/*

*/
int partition(int A[], int p, int r)
{
int i = p, j = r + 1, temp;
while (1)
{
while (A[++i] >= A[p]);
while (A[--j] < A[p]);
if (i < j)
{
temp = A[i];
A[i] = A[j];
A[j] = temp;
}
else
{
temp = A[p];
A[p] = A[j];
A[j] = temp;
return j;
}

}
}
void Quick_FindMaxM(int A[], int p, int r, vector<int>& vec)
{
if (p >= r)
return;
int q = partition(A, p, r);
if (q == M)
{
cout << "QUICK finished in ";
for (int i = 1; i <= M; i++)
vec.push_back(A[i]);
}
else if (q > M)
Quick_FindMaxM(A, p, q - 1, vec);
else
Quick_FindMaxM(A, q + 1, r, vec);
}