最近点对问题的复杂度为O(n)的解法
在算法导论中给出的方法:分治过程中同时进行归并排序的分治法,它是分治求最短距离的时候同时对左子集和右子集进行归并归并,最终复杂度为T(n) = 2T(n/2) + O(n) = O(nlogn)
然而,按照《数据结构与算法》黑皮书280页的描述,取d=min(d1,d2),那么在mid_x±d区间中平均有个点
那么我们直接对这个区间中的点进行快速排序,再遍历排序后的点计算每一个点和最近6个点的距离并更新min就可以。
那么快速排序需要的时间为O(log
),而遍历时间小于
log
,则总的合并时间为O(
log
)
所以,T(n)=2(n/2) + O(log
)
根据主定理:
可以得时间复杂度T(n)为O(n)
此方法我们称为有内部快速排序的分治法
然而它也有缺陷,在最坏情况下,它的复杂度可以达到O(n²)
相对于算法导论中正常的无内部排序归并分治法,上面这个方法是快很多的,缺点就是不稳定,而无内部排序归并分治法比较稳定,两者各有优势
下面是代码实现和10万到100万的实测数据
其中:
force_get_min是蛮力法,平均复杂度O(n²)
dc_merge_get_min是算法导论中给出的无内部排序归并分治法,平均复杂度O(nlogn)
dc_sort_get_min则是上面介绍的有内部排序分治法,平均复杂度O(n)
#include <iostream>
#include <vector>
#include <algorithm>
#include <string>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <windows.h>
using namespace std;
struct Point
{
double x = 0;
double y = 0;
};
double force_get_min(struct Point *P, int n)
{
double mmin, t;
mmin = (P[0].x - P[1].x)*(P[0].x - P[1].x) + (P[0].y - P[1].y)*(P[0].y - P[1].y);
for (register int i = 0; i<n - 1; i++)
{
for (register int k = i + 1; k<n; k++)
{
t = (P[k].x - P[i].x)*(P[k].x - P[i].x) + (P[k].y - P[i].y)*(P[k].y - P[i].y);
if (mmin>t)
mmin = t;
}
}
return sqrt(mmin);
}
void sort_x_point(struct Point *P, int left, int right)
{
Point temp;
if (right - left>0)
{
int i = left, j = right, flag = 0;
while (i<j)
{
if (P[i].x>P[j].x)
{
temp = P[i];
P[i] = P[j];
P[j] = temp;
if (flag == 0)
{
i++;
flag = 1;
}
else
{
j--;
flag = 0;
}
}
else
{
if (flag == 0)
{
j--;
}
else
{
i++;
}
}
}
sort_x_point(P, left, i - 1);
sort_x_point(P, i + 1, right);
}
}
void sort_y_point(struct Point *P, int left, int right)
{
Point temp;
if (right - left>0)
{
int i = left, j = right, flag = 0;
while (i<j)
{
if (P[i].y>P[j].y)
{
temp = P[i];
P[i] = P[j];
P[j] = temp;
if (flag == 0)
{
i++;
flag = 1;
}
else
{
j--;
flag = 0;
}
}
else
{
if (flag == 0)
{
j--;
}
else
{
i++;
}
}
}
sort_y_point(P, left, i - 1);
sort_y_point(P, i + 1, right);
}
}
double get_distance(struct Point P, struct Point Q)
{
return (P.x - Q.x)*(P.x - Q.x) + (P.y - Q.y)*(P.y - Q.y);
}
double dc_sort_get_min(struct Point *P, int left, int right)
{
if (right - left == 2)
{
double t1, t2, t3, t;
t1 = get_distance(P[left], P[left + 1]);
t2 = get_distance(P[left + 1], P[left + 2]);
t3 = get_distance(P[left], P[left + 2]);
t = t1 < t2 ? t1 : t2;
t = t < t3 ? t : t3;
return t;
}
if (right - left == 1)
return get_distance(P[left], P[left + 1]);
if (left == right)
return 1000000000000000;
double mid_x = 0;
mid_x = P[(right + left) / 2].x;
int loc_i, left_i, right_i;
loc_i = (right + left) / 2 + 1;
double left_min, right_min;
left_min = dc_sort_get_min(P, left, loc_i - 1);
right_min = dc_sort_get_min(P, loc_i, right);
double mmin = left_min<right_min ? left_min : right_min;
double temp_t;
for (left_i = left; left_i<loc_i; left_i++)
{
if (mid_x - P[left_i].x < mmin)
break;
}
for (right_i = right; right_i >= loc_i; right_i--)
{
if (P[right_i].x - mid_x < mmin)
break;
}
sort_y_point(P, loc_i, right_i);
for (int i = left_i; i<loc_i; i++)
{
for (int k = loc_i; k <= right_i; k++)
{
if (P[i].y - P[k].y > mmin)
continue;
if (P[k].y - P[i].y > mmin)
break;
temp_t = get_distance(P[k], P[i]);
if (temp_t<mmin)
mmin = temp_t;
}
}
return mmin;
}
double dc_merge_get_min(int left, int right, struct Point *px, struct Point *py,struct Point *ptemp,struct Point *check){
if (left > right)
return 1000000000;
if (left == right)
{
py[left] = px[left];
return 1000000000;
}
if (right - left == 1)
{
if (px[left].y > px[right].y)
{
py[left] = px[right];
py[right] = px[left];
}
else
{
py[left] = px[left];
py[right] = px[right];
}
return get_distance(px[left], px[right]);
}
if (right - left == 2){
int flag;
if (px[left].y < px[left + 1].y)
{
py[left] = px[left];
flag = left + 1;
}
else
{
py[left] = px[left + 1];
flag = left;
}
if (px[left + 2].y < py[left].y)
{
py[left + 1] = py[left];
py[left + 2] = px[flag];
py[left] = px[left + 2];
}
else
{
if (px[flag].y < px[left + 2].y)
{
py[left + 1] = px[flag];
py[left + 2] = px[left + 2];
}
else
{
py[left + 1] = px[left + 2];
py[left + 2] = px[flag];
}
}
double x1, x2, x3,x;
x1 = get_distance(px[left], px[left + 1]);
x2 = get_distance(px[left], px[right]);
x3 = get_distance(px[left + 1], px[right]);
x = x1 < x2 ? x1 : x2;
x = x < x3 ? x : x3;
return x;
}
double a, b, min_distance;
int center = (left + right) / 2;
a = dc_merge_get_min(left, center, px, py, ptemp, check);
b = dc_merge_get_min(center + 1, right, px, py, ptemp, check);
min_distance = a > b ? b : a;
double midx = px[center].x;
int leftend = center, length = right - left + 1, tempi = left;
center = center + 1;
while (left <= leftend && center <= right)
{
if (py[left].y <= py[center].y)
ptemp[tempi++] = py[left++];
else
ptemp[tempi++] = py[center++];
}
while (left <= leftend)
ptemp[tempi++] = py[left++];
while (center <= right)
ptemp[tempi++] = py[center++];
int checki = 0;
double temp;
for (int i = 0; i < length; i++, right--)
{
py[right] = ptemp[right];
if (py[right].x > midx - min_distance && py[right].x < midx + min_distance)
{
check[checki++] = py[right];
}
}
for (int i = 0; i < checki; i++)
{
for (int j = i + 1; j < i + 8 && j < checki; j++)
{
temp = get_distance(check[i], check[j]);
min_distance = min_distance < temp ? min_distance : temp;
}
}
return min_distance;
}
int main()
{
int num = 10;
int mod = 100000;
int *a = new int[num];
for (int i = 1; i <= num; i++)
a[i - 1] = i*mod;
struct Point *A = new struct Point[num * mod + 1];
struct Point *B = new struct Point[num * mod + 1];
struct Point *C = new struct Point[num * mod + 1];
struct Point *Check = new struct Point[num * mod + 1];
struct Point *TEMP = new struct Point[num * mod + 1];
double fenzhi_get = 0, dc_get_m = 0;
double time = 0, total_time = 0;
long long t1, t2, tt1=0,tt2=0;
for (int k = 0; k<num; k++)
{
cout << "数据规模为:" << a[k] << endl;
tt1 = 0;
tt2 = 0;
for (int b = 0; b < 20; b++)
{
for (int i = 0; i < a[k]; i++)
{
A[i].x = rand() % 10000000000000000;
A[i].y = rand() % 10000000000000000;
B[i].x = A[i].x;
B[i].y = A[i].y;
}
t1 = clock();
sort_x_point(B, 0, a[k] - 1);
fenzhi_get = sqrt(dc_merge_get_min(0, a[k] - 1, B, C, TEMP, Check));
t2 = clock();
tt1 += t2 - t1;
t1 = clock();
sort_x_point(A, 0, a[k] - 1);
dc_get_m = sqrt(dc_sort_get_min(A, 0, a[k] - 1));
t2 = clock();
tt2 += t2 - t1;
}
cout << "无内部排序分治20次平均时间:" << tt1 / 20<< endl;
cout << "有内部排序分治20次平均时间:" << tt2 / 20 <<endl;
}
delete[] A;
delete[] B;
delete[] C;
delete[] Check;
delete[] TEMP;
cin >> t1;
return 0;
}
10万到100万规模的实测数据:
以上还是有一些考虑不足地方的,欢迎交流