LeetCode #149 Max Points on a Line

（Week 3 算法作业）

文章目录

题目
分析

12ms算法

其他优秀算法

8ms算法

sample①
sample②

4ms算法

sample①
sample②

结语

题目

Max Points on a Line

Given n points on a 2D plane, find the maximum number of points that lie on the same straight line.

Example 1:

Input: [[1,1],[2,2],[3,3]]
Output: 3
Explanation:
^
|
|        o
|     o
|  o  
+------------->
0  1  2  3  4

Example 2:

Input: [[1,1],[3,2],[5,3],[4,1],[2,3],[1,4]]
Output: 4
Explanation:
^
|
|  o
|     o        o
|        o
|  o        o
+------------------->
0  1  2  3  4  5  6

Difficulty: Hard

分析

要求出在同一条直线上最多的点数。

有一些需要注意的特殊情况：

1. 输入中可能有多个坐标相同的点，它们应当被看作不同的点。

如：

Input: [[0,0],[1,1],[0,0]]
Expected Output: 3

Input: [[1,1],[1,1],[1,1]]
Expected Output: 3

2. 输入中可能有很接近的两条直线。

Input: [[0,0],[94911151,94911150],[94911152,94911151]]
Expected Output: 2

如果直接用 double除法去算这三个点两两相连产生的直线的斜率和截距，就会得出错误的输出：3。

所以，需要使用分数来表示直线的斜率和截距（如果你需要记录它们的话）。

12ms算法

算法思路：

使用自定义的结构体 Line来表示直线，使用 map<Line, int>来记录每条直线上点对的个数。
对每个点 points[i]，从 j=i+1开始向后扫描 points[j]，设输入有 n 个点，第1个点要往后扫描 (n-1) 个点，第2个点要往后扫描 (n-2) 个点，以此类推，共扫描 $n(n-1)/2$ 个点对。
每扫描一个点对，就检测这个点对的连线。在 map里将这条线对应的值增加1，即有新的一个点对在这条线上。
如果某个点对中的两个点相同，所有经过这个点的直线在 map中的值都要增加1。
最后，找出map中映射值最大的一个键，即穿过点数最多的线。假设这条线穿过 k 个点，键的映射值为 m ，那么这条直线上会有 $k(k-1)/2$ 个点对，有 $k(k-1)/2=m$ ，可算得 $k=(\sqrt{8m+1}+1)/2$ 。

/**
 * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * };
 */

typedef pair<int,int> paint;

// 求最大公约数 
int measure(int x, int y)
{	
	int z = y;
	while(x%y!=0)
	{
		z = x%y;
		x = y;
		y = z;	
	}
	return z;
}

struct Line {
    paint a;    // 斜率(当直线为竖线时，a=(x,0))
    paint b;    // 截距(当直线为竖线时，b=(1,0))
    
    Line() : a(paint(0, 0)), b(paint(0, 0)){}
    Line(int m, int n, int x, int y) : a(paint(m, n)), b(paint(x, y)) {}
    bool operator < (const Line &l2) const{
    	return a < l2.a || a == l2.a && b < l2.b;
	}
	double get(int x) const{    // 获取直线上与横坐标x对应的纵坐标
		int gcd = measure(a.second, b.second);
		int aa = b.second / gcd;
		int bb = a.second / gcd;
		int m = a.first * x * aa + b.first * bb;
		int n = a.second * aa;
		return (double)m / (double)n;
	}
};


class Solution {
public:
    int maxPoints(vector<Point>& points) {
	
    	if(points.size() < 3) return points.size();
    	
        map<Line, int> lines;   // 键为直线，映射值为在这条直线上的点对的个数
        
        for(int i = 0; i < points.size() - 1; i++){
        	int same = 0;   // 记录扫描points[j]时遇到的与points[i]相同的点
        	set<int*> s;    // 记录points[i]与points[j]的连线在map中的映射值的指针
        	for(int j = i + 1; j < points.size(); j++){
        		int deltax = points[i].x - points[j].x;
        		int deltay = points[i].y - points[j].y;
        		
        		if(deltax == 0 && deltay == 0){ // 两个点相同
        			same++;
				}
				else if(deltax == 0 && deltay != 0){    // 两个点在同一竖线上
					int x = points[i].x;
        			int& num = lines[Line(x, 0, 1, 0)];
					num += 1;
        			s.insert(&num);
				}
				else{
        			int gcd = measure(deltay, deltax);
        			int a1 = deltay / gcd;
        			int a2 = deltax / gcd;
        			
        			int m = points[i].y * deltax - deltay * points[i].x;
        			int gcd2 = measure(m, deltax);
        			int b1 = m / gcd2;
        			int b2 = deltax / gcd2;
        			
        			int& num = lines[Line(a1, a2, b1, b2)];
					num += 1;
        			s.insert(&num);
				}
			}
			// 有same个与points[i]相同的点，相当于还有same个点对在经过这个点的直线上
			// 需要将直线的映射值增加相应的数量
			for(int* num : s){  
				*num += same;
			}
			// 如果集合s为空，说明扫描到的points[j]全都与points[i]相同
			// 需要扫描lines，看是否有经过该点的直线
			if(s.empty()){
				for(auto& l : lines){
					if(l.first.get(points[i].x) == points[i].y){
						l.second += same;
					}
				}
			}
		}
		
		// 输入的所有点全都相同的情况
		if(lines.size() == 0) return points.size();
		
		// 找出最大的点对数
		int max = 1;
		map<Line, int>::iterator iter = lines.begin();
		while(iter != lines.end()){
			if(iter->second > max) max = iter->second;
			iter++;
		}
		
		// 由点对数量算出点数
		max = (sqrt(8 * max + 1) + 1) / 2;
		
		return max;
		
    }
};

算法复杂度为 $O(n^2)$ 。

其他优秀算法

8ms算法

sample①

算法思路：

对每个点对，遍历points，看有几个点和这个点对在同一直线上。
遍历每个点对，进行上述操作，找出最大的点数。

/**
 * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * };
 */
class Solution {
public:
    int maxPoints(vector<Point>& points) {
            int res = 0 ;
    for( int i = 0 ; i < points.size() ; i ++){ //first point.
        int duplicate = 1 ; 
        for( int j = i + 1 ; j < points.size() ; j++){ //second point.
            
            int count = 0 ;
            
            long long x1 = points[i].x , y1 = points[i].y ;  //use long type to avoid overflow.
            long long x2 = points[j].x , y2 = points[j].y ; 
            
            if( x1 == x2 && y1 == y2 ){ //if two points are duplicated.
                duplicate ++ ;
                continue ;
            }
            
            for( int k = 0 ; k < points.size() ; k++){ //find the third point.
                int x3 = points[k].x , y3 = points[k].y ;
                
                if( x1 * y2 + x2 * y3 + x3 * y1 - x3 * y2 - x2 * y1 - x1 * y3 == 0 ) //uses determinant multiplication.
                    count ++ ;
            }
            res = max(res , count) ; 
        }
        res = max(res , duplicate) ;
    }
    return res ;   
    }
};

虽然算法复杂度为 $O(n^3)$ ，但是操作比较简单，不用排除重复的直线；且没有使用有复杂操作的STL，用时反而较短。思路也很简捷。

sample②

算法思路：

对每个 points[i] ，建立一个 unordered_map<int, unordered_map<int, int>> 类型的 line ，line[x][y] 记录了与 points[i] 有斜率为 y/x 的连线的 points[j] 的个数。因为过一个点的两条不同直线必有不同的斜率，所以可以确定这些 points[j] 都在同一条直线上。
因为max points事实上是确定的，所以对每个 points[i] ，可以不从 j = 0 开始扫描 points[j] 。因为只要扫描 points[i] 扫描到了max points对应的直线的第一个点，那么这轮循环所确定的 maxcnt 必然是最终答案，所以对前面或后面的points[i]，是否从 j = 0 开始扫描 points[j] 都是没关系的。
每次循环时都把计算出的点数与上一轮循环确定的 maxcnt 或 count 比较，层层循环最终确定 maxcnt 。

class Solution {
    int gcd (int x, int y) {
        if (x > y)
            std::swap(x, y);
        while (x > 0) {
            int tmp = x;
            x = y % x;
            y = tmp;
        }
        return y;
    }
    
public:
    int maxPoints(vector<Point>& points) {
        int n = points.size(), maxcnt = 0;
        if (n <= 2) return n;
        for (int i = 0; i < n; i++) {
            int overlap = 0, count = 0;
            unordered_map<int, unordered_map<int, int>> line;
            for (int j = i + 1; j < n; j++) {
                int dx = points[j].x - points[i].x;
                int dy = points[j].y - points[i].y;
                if (dx == 0 && dy == 0)
                    overlap++;
                else {
                    if (dx < 0)
                        dx = -dx, dy = -dy;
                    else if (dx == 0)
                        dy = std::abs(dy);
                    int dvs = gcd(dx, std::abs(dy));
                    if (dvs != 0)
                        dx /= dvs, dy /= dvs;
                    count = std::max(count, ++line[dx][dy]);
                }
            }
            
            maxcnt = std::max(maxcnt, count + overlap + 1);
        }
        return maxcnt;
    }
};

算法复杂度为 $O(n^2)$ 。

4ms算法

sample①

算法思路：

类似于上面的8ms算法的sample②。

但是因为没有采用嵌套的 unordered_map 结构，而是使用 pair<int, int> 来表示斜率，所以比前一个算法更快。

class Solution {
public:
    int maxPoints(vector<Point>& points) {
        int result = 0;
        for (size_t i = 0; i < points.size(); ++i) {
            Point pt1 = points[i];
            std::unordered_map<std::pair<int, int>, int, pair_hash> slope_map;
            int same_count = 0, v_count = 0, h_count = 0;
            for (size_t j = i + 1; j < points.size(); ++j) {
                Point pt2 = points[j];
                if (pt2.x == pt1.x && pt2.y == pt1.y) {
                ++same_count;
                } else if (pt2.x == pt1.x) {
                  ++v_count;
                } else if (pt2.y == pt1.y) {
                  ++h_count;
                }  else {
                  int dx = pt2.x - pt1.x;
                  int dy = pt2.y - pt1.y;
                  int gcd = __gcd(dx, dy);
                  std::pair<int, int> key(dx / gcd, dy / gcd);
                  ++slope_map[key];
                }
            }
            int max_slope_count = 0;
            for (auto it = slope_map.begin(); it != slope_map.end(); ++it) {
                max_slope_count = std::max(max_slope_count, it->second);
            }
            max_slope_count = std::max(max_slope_count, v_count);
            max_slope_count = std::max(max_slope_count, h_count);
            result = std::max(result, max_slope_count + 1 + same_count);
        }
        return result;
    }
  
private:
    struct pair_hash {
        template<typename U, typename V> 
        size_t operator() (const std::pair<U, V> & key_) const {
            size_t result = std::hash<U>()(key_.first);
            result += result * 31 + std::hash<V>()(key_.second);
            return result;
        }
    };
};

算法复杂度为 $O(n^2)$ 。

sample②

算法思路：

在总体思路上，也类似于上面的8ms算法的sample②和4ms算法的sample①，只是细节处有所不同。

// 算法是，遍历每个点，每个点都有一个map，以该点为基点往下找点，组成一条直线，并在map里找到这条直线，并将它的数目加1.
// 需要注意的是，第一，最后map里的点，是不包括该点的，此外，往下找点时，如果遇到跟该点一样的点，也需要做另外处理。最后的数目，需要加上该点以及它的duplicate的数目
// 第二，斜率如果用double存储，精度会不够
// 这里斜率多写了一个结构体，会约掉最大公约数，这样的话，只有y和x都相等的情况下，才相等。然后再用 斜率a = ay * bx和斜率 b = by * ax来进行比较，这里就算相等，也当是小于来看待！！！，comparator只需要在小于的情况下返回true，大于等于是false！！
/**
 * Definition for a point.
 * struct Point {
 *     int x;
 *     int y;
 *     Point() : x(0), y(0) {}
 *     Point(int a, int b) : x(a), y(b) {}
 * };
 */
class Solution {
public:
    static int gcd (int a, int b)
    {
        if (b == 0)
            return a;
        else
            return gcd(b, a % b);
    }
    struct Slope
    {
        Slope() : x(0), y(0) {};
        Slope(int ix, int iy)
        {
            int g = gcd(ix, iy);
            y = iy / g;
            x = ix / g;
        };
        int y;
        int x;
    };
    
    struct MyCmp
    {
        
        bool operator()(const Slope &a, const Slope &b) const
        {         
            if (a.x == b.x && a.y == b.y)
            {
                return false;
            }
            
            long long da = a.y * b.x;
            long long db = b.y * a.x;
            
            return da <= db;
        }
    };
    int maxPoints(vector<Point>& points) {
        if (points.size() < 3)
            return points.size();
        int res = 0;
        for (int i = 0; i < points.size(); i++)
        {
            map<Slope, int, MyCmp> lines;
            int duplicated = 1; // 1 means point i itself. if We encounter point i's duplicates later, plus one;
            for(int j = i + 1; j < points.size(); j++)
            {
                Slope slope;
                if (points[i].x == points[j].x && points[i].y == points[j].y)
                {
                    duplicated++;   //产生duplicate，不需要再进行斜率的考虑了
                    continue;
                }
                slope = Slope(points[i].y - points[j].y, points[i].x - points[j].x);
                
                lines[slope]++;
            }
            res = max(res, duplicated);
            for (auto it : lines)
            {
                res = max(res, it.second + duplicated);
            }
            
        }
        return res;
    }
};

结语

比较一下12ms和8ms②、4ms①②的算法，可以看出，前者在循环之外使用 map 来记录所有的直线，在循环结束后再寻找max points，就需要考虑截距的问题，而且占用空间也比较多；后者在 points[i] 循环内使用 map 或 unordered_map ，每循环一次更新一次max points，就没有前者的这些问题。

这次做的并不算好，仅仅beats不到一半的提交。

但通过研读多个sample算法、比较思路，从别人的不同的角度去看问题，我也很有收获。