13.3 IntSet 整数集合
13.3.1 ADT(C#)
-
用接口封装类,相当于只开放类的部分功能
eg:
public interface ISet_my<T> { uint[] GetCoordinate(T element); uint[] Add(T element); //加入元素element,返回offset uint[] Remove(T element); //bool IsMember(T element); string GetBinString(); //get 二进制流,其实还是string流而不是bitString string PrintBinString(); //返回BinString string GetIntString(); //返回intString string PrintIntString(); ISet_my<T> Union(ISet_my<T> ISet_2); //并集 ISet_my<T> Intersect(ISet_my<T> ISet_2); //交集 ISet_my<T> DiffSet(ISet_my<T> ISet_2); //差集 ISet_my<T> Complement(); //补集 }
13.3.2 核心思想
IntSet (整数集合) 的核心思想就是压缩集合存储更多数据,比如:
用一个int32空间 存32个int
比起用32个int32空间 存32个int,IntSet的开销瞬间就小了是不是!
13.3.2 实现原理
-
bitMap(位图) 和 GetCoordinate()
public class IntSet_my : ISet_my <uint>
{
//member
private readonly uint[] _bitMap; //_bitMap的比特流表示IntSet存储信息
public uint _bitSize { get; } //_bitMap的比特流占多少位二进制
//contributor
public IntSet_my(uint bitSize_0)
{
_bitSize = bitSize_0;
_bitMap = new uint[_bitSize / 32 + 1]; //at lease 1 int32 to store IntSet
for (int i = 0; i < _bitMap.Length; i++) //initialize IntSet as null
_bitMap[i] = 0;
}
//get ( index_bitMap, index_bit )
public uint[] GetCoordinate(uint e)
{
uint[] coordiante = new uint[2]; //二元坐标
if (e > _bitSize) //e不在
throw new ArgumentOutOfRangeException();
coordiante[0] = e / 32; // bitMap index
coordiante[1] = e % 32; // bit index in bitMap[ coordiante[0] ]
return coordiante;
}
- Add()和Remove()
//add int to IntSet
public uint[] Add( uint e )
{
uint[] coordi = GetCoordinate( e ); //get coordinate
_bitMap[coordi[0]] |= (uint)Math.Pow(2, coordi[1]); //add
return coordi; //return index
}
//remove int from IntSet
public uint[] Remove(uint e)
{
uint[] coordi = GetCoordinate(e); //get coordinate
_bitMap[coordi[0]] &= (~coordi[1]); //remove
return coordi; //return index
}
- GetBinString()获取二进制字符串和 GetIntString 获取整数字符串
public string GetBinString()
{
string binString = string.Empty; //null string
for (int i = 0; i < _bitMap.Length;) //each _bitMap[]
{
//10进制int->2进制int->2进制string, 并在左侧补齐到32位2进制
binString = Convert.ToString(_bitMap[i++], 2).PadLeft(32, '0') + binString; //补0方便之后输出!
}
return binString;
}
public string PrintBinString()
{
string result = GetBinString(); //get binString
Console.WriteLine("\n从右往左, 第0位为表示0的二进制位\n");
for (int i = result.Length - 1; i >= 0; ) //each _bitMap[]
{
//print binString
for(int k = 0; k < 4; k++) //each 4 bit intervene " "
Console.Write(result[i--]);
Console.Write(" ");
}
Console.Write("\n");
return result;
}
public string GetIntString()
{
string binString = this.GetBinString();
string intString = string.Empty;
for (int i = binString.Length - 1, j = 0; i >= 0; j++) //each _bitMap[]
{
//print IntString
if (binString[i--] == '1')
intString += " " + Convert.ToString(j);
}
return intString;
}
public string PrintIntString()
{
string result = this.GetIntString(); //get binString
Console.Write("\nIntSet中元素升序排列: {0} \n", result);
return result;
}
- Union(), Intersect(), Differ()
- 进行交、并、补运算的单位是bitMap[] !
(虽然根本上是 计算机中的bit ) - 用接口封装!
//Intersect
public IntSet_my Intersect(IntSet_my IntSet_2)
{
IntSet_my IntersectSet;
IntersectSet = new IntSet_my(this._bitSize < IntSet_2._bitSize ?
this._bitSize : IntSet_2._bitSize); //交集大小取小的集合大小
for (int i = 0; i < IntersectSet._bitMap.Length; i++) // IntersectSet[0~IntSet_2.bitSize) = union
IntersectSet._bitMap[i] = this._bitMap[i] & IntSet_2._bitMap[i]; //运算为&, 实质上是int32->二进制&->int32
return IntersectSet;
}
//Differ
public IntSet_my Differ(IntSet_my IntSet_2)
{
IntSet_my DifferSet = new IntSet_my(_bitSize);
int differ_Size = _bitMap.Length < IntSet_2._bitMap.Length ? this._bitMap.Length : IntSet_2._bitMap.Length;
for (int i = 0; i < differ_Size; i++) // DifferSet[0~_bitMap.Length) = Differ
DifferSet._bitMap[i] = _bitMap[i] & (~IntSet_2._bitMap[i]);
return DifferSet;
}
//Complement
public IntSet_my Complement()
{
IntSet_my CompliSet = new IntSet_my(_bitSize);
for (uint i = 0; i < _bitMap.Length; i++) // CompliSet[0~IntSet_2.bitSize) = Compli
CompliSet._bitMap[i] = ~this._bitMap[i];
return CompliSet;
}
//接口
ISet_my<uint> ISet_my<uint>.Union(ISet_my<uint> ISet_2)
{
return Union(ISet_2 as IntSet_my);
}
ISet_my<uint> ISet_my<uint>.Intersect(ISet_my<uint> ISet_2)
{
return Intersect(ISet_2 as IntSet_my);
}
ISet_my<uint> ISet_my<uint>.DiffSet(ISet_my<uint> ISet_2)
{
return Differ(ISet_2 as IntSet_my);
}
ISet_my<uint> ISet_my<uint>.Complement()
{
return Complement();
}
13.3.3 遇到的问题及教训
-
Convert方法不了解
在10进制int转二进制int时一直想着要转成计算机bit,但不知道要用什么方法实现
比如:
Convert.ToBase64CharArray( Byte[] inArray, Int32 offsetIn, Int32 length, Char[] outArray, Int32 offsetOut)
- inArray 为要转化的 8-bit unsigned integer 数组
- offsetIn 为 inArray转化部分的起始索引
- length 为 inArray转化部分的长度
- outArray 为要输出的字符数组
- offsetOut 为要输出的字符数组的起始索引
ToBase64只支持int8,bitMap的存储单位是int32。
这个好像只能靠积累了…
-
设计时,print功能和getValue功能要分开
-
交集差集补集合都是以int32 数组元素为单位计算
-
string == ‘1’ 而不是 string == “1” 或者 string == 1 !!!
-
在VS里,int 如何转化加入string
- 不能暴力转char ,eg: (char) j, 这样ASCII和Unicode编码不兼容,会乱码
- 只有转成Unicode的string,eg: Conver.ToString(j)
13.3.4 仍需要解决的问题
- 泛型接口具体使用方法???
- Unicode, ASCII, Utf-8 编码区别