数据离散化的两种方法

为什么要离散化？

现在给出这样的一个问题，有1e5个数，每个数的范围在-1e18至1e18之间，有1e5次询问，每次有一个k，询问第k大的数字出现的次数。
当然，第一想法是用map解决，但是掐指一算复杂度，用map计数以后，对于每一次询问，都需要对map进行一遍遍历，显然复杂度过不去。
这时候我们注意到，对于这个问题，我们要关心的只是数据间的大小关系，而不是具体的数值，所以我们只需要将其一一映射即可。对于一组数据3,100001357,-256,378,4,1，如果将这些数据处理成3,6,1,5,4,2，这样的话，直接通过下标索引，就能找到我们要的答案。

样例输入

6
3 100001357 -256 378 4 1

样例输出

3 6 1 5 4 2

方法一：

通过结构体记录当前的值和当前值的位置，先对其值从小到大排序，在用Rank数组将结构体排序后的id设为下标，i设为它的值。

#include<bits/stdc++.h>
#define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
#define lson rt<<1, l, mid
#define rson rt<<1|1, mid+1, r
#define lowbit(x) x & (-x)
using namespace std;
typedef long long ll;
typedef pair<ll, int> pli;
typedef pair<int, int> pii;
typedef pair<ll, ll> pll;
typedef pair<double, double> pdd;
template<class T>
void read(T &res) {
  int f = 1; res = 0;
  char c = getchar();
  while(c < '0' || c > '9') { if(c == '-') f = -1; c = getchar(); }
  while(c >= '0' && c <= '9') { res = res * 10 + c - '0'; c = getchar(); }
  res *= f;
}
const int N = 1e5+5;
//结构体设置它的值和所在位置
struct xx {
  int val, id;
  xx(int vv = 0, int ii = 0) {
    val = vv; id = ii;
  }
  bool operator < (const xx &c) const {
    return val < c.val;
  }
}a[N];
int n;
int Rank[N];
int main() {
  read(n);
  for(int i = 1; i <= n; ++i) {
    read(a[i].val); a[i].id = i;
  }
  sort(a+1, a+n+1);
  for(int i = 1; i <= n; ++i) {
    Rank[a[i].id] = i;
  }
  for(int i = 1; i <= n; ++i) {
    printf("%d ", Rank[i]);
  }
  return 0;
}

方法二：

用一个拷贝数组，对其排序+去重，用他们的位置作为离散化后的值，这里可以用二分优化。

#include<bits/stdc++.h>
#define fio ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
#define lson rt<<1, l, mid
#define rson rt<<1|1, mid+1, r
using namespace std;
typedef long long ll;
typedef pair<ll, int> pli;
typedef pair<int, int> pii;
typedef pair<ll, ll> pll;
template<class T>
void read(T &res) {
  int f = 1; res = 0;
  char c = getchar();
  while(c < '0' || c > '9') { if(c == '-') f = -1; c = getchar(); }
  while(c >= '0' && c <= '9') { res = res * 10 + c - '0'; c = getchar(); }
  res *= f;
}
const int N = 1e5+5;
int n, cnt;
//n是原数组长度，cnt是离散化后的数组长度
int a[N], tmp[N];
//a数组是原数组，tmp数组是原数组的拷贝数组
int main(){
  read(n);
  for(int i = 1; i <= n; ++i) {
    read(a[i]); tmp[i] = a[i];
  }
  sort(tmp+1, tmp+n+1);
  cnt = unique(tmp+1, tmp+n+1)-tmp-1;
  for(int i = 1; i<= n; ++i) {
    a[i] = lower_bound(tmp+1, tmp+cnt+1, a[i])-tmp;
  }
  for(int i = 1; i <= n; ++i) {
    printf("%d ", a[i]);
  }
  return 0;
}

cornivores

发布了28 篇原创文章 · 获赞 14 · 访问量 2964

私信关注