C#常用集合的实现原理(List)

以下讨论仅针对泛型集合。

List 顺序线性表

List 集合类是顺序线性表

Add操作是O(1)或是O(n)的,由于List的容量是动态扩容的,在未扩容之前,其Add操作是O(1),而在需要扩容的时候,会拷贝已存在的那些元素同时添加新的元素,此时的Add操作是O(n)的。

       public void Add(T item) {
            if (_size == _items.Length) EnsureCapacity(_size + 1);
            _items[_size++] = item;
            _version++;
        }

        
       private void EnsureCapacity(int min) {
            if (_items.Length < min) {
                int newCapacity = _items.Length == 0? _defaultCapacity : _items.Length * 2;
                // Allow the list to grow to maximum possible capacity (~2G elements) before encountering overflow.
                // Note that this check works even when _items.Length overflowed thanks to the (uint) cast
                if ((uint)newCapacity > Array.MaxArrayLength) newCapacity = Array.MaxArrayLength;
                if (newCapacity < min) newCapacity = min;
                Capacity = newCapacity;
            }
        }

注意在触发扩容时调用的是他的Capacity属性,找到它的定义如下:

       public int Capacity {
            get {
                Contract.Ensures(Contract.Result<int>() >= 0);
                return _items.Length;
            }
            set {
                if (value < _size) {
                    ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.value, ExceptionResource.ArgumentOutOfRange_SmallCapacity);
                }
                Contract.EndContractBlock();
 
                if (value != _items.Length) {
                    if (value > 0) {
                        T[] newItems = new T[value];
                        if (_size > 0) {
                            Array.Copy(_items, 0, newItems, 0, _size);
                        }
                        _items = newItems;
                    }
                    else {
                        _items = _emptyArray;
                    }
                }
            }
        }

其中有一个Array.Copy的操作会触发一次gc,所以在声明List时,如果已知它的大小,那么尽量选取大于这个大小的值且为4的倍数。

而对于Contains方法,其是按照线性检索的,其复杂度是O(n)。

        public bool Contains(T item) {
            if ((Object) item == null) {
                for(int i=0; i<_size; i++)
                    if ((Object) _items[i] == null)
                        return true;
                return false;
            }
            else {
                EqualityComparer<T> c = EqualityComparer<T>.Default;
                for(int i=0; i<_size; i++) {
                    if (c.Equals(_items[i], item)) return true;
                }
                return false;
            }
        }

而BinarySearch方法,其是按二分查找的,其复杂度是O(lg n)。

       public int BinarySearch(int index, int count, T item, IComparer<T> comparer) {
            if (index < 0)
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.index, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
            if (count < 0)
                ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
            if (_size - index < count)
                ThrowHelper.ThrowArgumentException(ExceptionResource.Argument_InvalidOffLen);
            Contract.Ensures(Contract.Result<int>() <= index + count);
            Contract.EndContractBlock();
 
            return Array.BinarySearch<T>(_items, index, count, item, comparer);
        }
   internal static int InternalBinarySearch(T[] array, int index, int length, T value, IComparer<T> comparer)
        {
            Contract.Requires(array != null, "Check the arguments in the caller!");
            Contract.Requires(index >= 0 && length >= 0 && (array.Length - index >= length), "Check the arguments in the caller!");
 
            int lo = index;
            int hi = index + length - 1;
            while (lo <= hi)
            {
                int i = lo + ((hi - lo) >> 1);
                int order = comparer.Compare(array[i], value);
 
                if (order == 0) return i;
                if (order < 0)
                {
                    lo = i + 1;
                }
                else
                {
                    hi = i - 1;
                }
            }
 
            return ~lo;
        }

      public bool Remove(T item) {
            int index = IndexOf(item);
            if (index >= 0) {
                RemoveAt(index);
                return true;
            }
 
            return false;
        }

       public void RemoveAt(int index) {
            if ((uint)index >= (uint)_size) {
                ThrowHelper.ThrowArgumentOutOfRangeException();
            }
            Contract.EndContractBlock();
            _size--;
            if (index < _size) {
                Array.Copy(_items, index + 1, _items, index, _size - index);
            }
            _items[_size] = default(T);
            _version++;
        }

如果是Remove方法需要先通过IndexOf使用顺序查找到指定的元素的下标,在调用RemoveAt方法进行删除操作,本质上是通过数组拷贝覆盖掉要删除的元素。

主要通过内部的索引器语法进行修改操作。

       public T this[int index] {
            get {
                // Following trick can reduce the range check by one
                if ((uint) index >= (uint)_size) {
                    ThrowHelper.ThrowArgumentOutOfRangeException();
                }
                Contract.EndContractBlock();
                return _items[index]; 
            }
 
            set {
                if ((uint) index >= (uint)_size) {
                    ThrowHelper.ThrowArgumentOutOfRangeException();
                }
                Contract.EndContractBlock();
                _items[index] = value;
                _version++;
            }
        }

排序

        public static void Sort<T>(T[] array, int index, int length, System.Collections.Generic.IComparer<T> comparer) {
            if (array==null)
                throw new ArgumentNullException("array");
            if (index < 0 || length < 0)
                throw new ArgumentOutOfRangeException((length<0 ? "length" : "index"), Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
            if (array.Length - index < length)
                throw new ArgumentException(Environment.GetResourceString("Argument_InvalidOffLen"));
            Contract.EndContractBlock();
 
            if (length > 1) {
                // <
 
 
 
                if ( comparer == null || comparer == Comparer<T>.Default ) {
                    if(TrySZSort(array, null, index, index + length - 1)) {
                        return;
                    }
                }
                
#if FEATURE_LEGACYNETCF
                if (CompatibilitySwitches.IsAppEarlierThanWindowsPhone8)
                    MangoArraySortHelper<T>.Default.Sort(array, index, length, comparer);                
                else
                    ArraySortHelper<T>.Default.Sort(array, index, length, comparer);                
#else
                ArraySortHelper<T>.Default.Sort(array, index, length, comparer);                
#endif
            }
        }

如果compare为空或默认的情况下,那么调用的是TrySZSort方法。

这是一个extern方法,说明它是由CLR直接实现的,我们无法得知它的具体算法或是意图。从注释中可以得知,这个做法的目的是提高性能(明白注释的优势了吧)。每次使用IComparer<T>的Compare方法进行比较的时候相当于是一次虚方法的调用,CLR需要计算它的偏移量,也无法将其内联。这个细节相对于直接进行int的大小比较来说,也是有较大开销的。使用TrySZSort这种外部方法进行排序,有助于提高在特定情况下的执行效率。

因此,我们应该可以有足够信心来推断出TrySZSort的作用。TrySZSort方法的作用是对一些可以直接进行比较的原生类型(如int等)进行排序,如果它发现自己无法支持数组中元素的类型,那么就返回false,否则便排序后并返回true。

否则只有特定情况下(支持早期某些平台WindowsPhone8)调用的是MangoArraySortHelper.Default的Sort方法,这里不做讨论。

其他都调用的是ArraySortHelper.Default的Sort方法。

主要分两种情况,第一种,指定泛型集合时,T实现了IComparable接口,第二种没有实现这个接口。

        private static IArraySortHelper<T> CreateArraySortHelper()
        {
            if (typeof(IComparable<T>).IsAssignableFrom(typeof(T)))
            {
                defaultArraySortHelper = (IArraySortHelper<T>)RuntimeTypeHandle.Allocate(typeof(GenericArraySortHelper<string>).TypeHandle.Instantiate(new Type[] { typeof(T) }));
            }
            else
            {
                defaultArraySortHelper = new ArraySortHelper<T>();                        
            }
            return defaultArraySortHelper;
        }

如果是第一种情况T实现了IComparable接口,那么排序方法如下:

  public void Sort(T[] keys, int index, int length, IComparer<T> comparer)
        {
            Contract.Assert(keys != null, "Check the arguments in the caller!");
            Contract.Assert(index >= 0 && length >= 0 && (keys.Length - index >= length), "Check the arguments in the caller!");
 
            try
            {
#if FEATURE_LEGACYNETCF
                // Pre-Apollo Windows Phone call the overload that sorts the keys, not values this achieves the same result
                if (comparer == null && CompatibilitySwitches.IsAppEarlierThanWindowsPhone8)
                    comparer = Comparer<T>.Default;
 
                if (comparer == null || (comparer == Comparer<T>.Default && !CompatibilitySwitches.IsAppEarlierThanWindowsPhone8)) {
#else
                if (comparer == null || comparer == Comparer<T>.Default) {
#endif
 
#if FEATURE_CORECLR
                    // Since QuickSort and IntrospectiveSort produce different sorting sequence for equal keys the upgrade 
                    // to IntrospectiveSort was quirked. However since the phone builds always shipped with the new sort aka 
                    // IntrospectiveSort and we would want to continue using this sort moving forward CoreCLR always uses the new sort.
 
                    IntrospectiveSort(keys, index, length);
#else
                    // call the faster version of our sort algorithm if the user doesn't provide a comparer
                    if (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5)
                    {
                        IntrospectiveSort(keys, index, length);
                    }
                    else
                    {
                        DepthLimitedQuickSort(keys, index, length + index - 1, IntrospectiveSortUtilities.QuickSortDepthThreshold);
                    }
#endif
                }
                else
                {
#if FEATURE_CORECLR
                    // Since QuickSort and IntrospectiveSort produce different sorting sequence for equal keys the upgrade 
                    // to IntrospectiveSort was quirked. However since the phone builds always shipped with the new sort aka 
                    // IntrospectiveSort and we would want to continue using this sort moving forward CoreCLR always uses the new sort.
 
                    ArraySortHelper<T>.IntrospectiveSort(keys, index, length, comparer);
#else
                    if (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5)
                    {
                        ArraySortHelper<T>.IntrospectiveSort(keys, index, length, comparer);
                    }
                    else
                    {
                        ArraySortHelper<T>.DepthLimitedQuickSort(keys, index, length + index - 1, comparer, IntrospectiveSortUtilities.QuickSortDepthThreshold);
                    }
#endif
                }
            }
            catch (IndexOutOfRangeException)
            {
                IntrospectiveSortUtilities.ThrowOrIgnoreBadComparer(comparer);
            }
            catch (Exception e)
            {
                throw new InvalidOperationException(Environment.GetResourceString("InvalidOperation_IComparerFailed"), e);
            }
        }

如果是第二种情况T没有实现IComparable接口,那么排序方法如下:

  public void Sort(T[] keys, int index, int length, IComparer<T> comparer)
        {
            Contract.Assert(keys != null, "Check the arguments in the caller!");
            Contract.Assert( index >= 0 && length >= 0 && (keys.Length - index >= length), "Check the arguments in the caller!");
 
            // Add a try block here to detect IComparers (or their
            // underlying IComparables, etc) that are bogus.
            try
            {
                if (comparer == null)
                {
                    comparer = Comparer<T>.Default;
                }
 
#if FEATURE_CORECLR
                // Since QuickSort and IntrospectiveSort produce different sorting sequence for equal keys the upgrade 
                // to IntrospectiveSort was quirked. However since the phone builds always shipped with the new sort aka 
                // IntrospectiveSort and we would want to continue using this sort moving forward CoreCLR always uses the new sort.
 
                IntrospectiveSort(keys, index, length, comparer);
#else
                if (BinaryCompatibility.TargetsAtLeast_Desktop_V4_5)
                {
                    IntrospectiveSort(keys, index, length, comparer);
                }
                else
                {
                    DepthLimitedQuickSort(keys, index, length + index - 1, comparer, IntrospectiveSortUtilities.QuickSortDepthThreshold);
                }
#endif
            }
            catch (IndexOutOfRangeException)
            {
                IntrospectiveSortUtilities.ThrowOrIgnoreBadComparer(comparer);
            }
            catch (Exception e)
            {
                throw new InvalidOperationException(Environment.GetResourceString("InvalidOperation_IComparerFailed"), e);
            }
        }

总体来看,主要排序决策如下:

如果.net版本至少是4.5 则执行内省排序IntrospectiveSort

否则都执行深度限制快速排序DepthLimitedQuickSort, 深度限制是32

先来看IntrospectiveSort:

        internal static void IntrospectiveSort(T[] keys, int left, int length, IComparer<T> comparer)
        {
            Contract.Requires(keys != null);
            Contract.Requires(comparer != null);
            Contract.Requires(left >= 0);
            Contract.Requires(length >= 0);
            Contract.Requires(length <= keys.Length);
            Contract.Requires(length + left <= keys.Length);
 
            if (length < 2)
                return;
 
            IntroSort(keys, left, length + left - 1, 2 * IntrospectiveSortUtilities.FloorLog2(keys.Length), comparer);
        }
 
        private static void IntroSort(T[] keys, int lo, int hi, int depthLimit, IComparer<T> comparer)
        {
            Contract.Requires(keys != null);
            Contract.Requires(comparer != null);
            Contract.Requires(lo >= 0);
            Contract.Requires(hi < keys.Length);
 
            while (hi > lo)
            {
                int partitionSize = hi - lo + 1;
                if (partitionSize <= IntrospectiveSortUtilities.IntrosortSizeThreshold)
                {
                    if (partitionSize == 1)
                    {
                        return;
                    }
                    if (partitionSize == 2)
                    {
                        SwapIfGreater(keys, comparer, lo, hi);
                        return;
                    }
                    if (partitionSize == 3)
                    {
                        SwapIfGreater(keys, comparer, lo, hi-1);
                        SwapIfGreater(keys, comparer, lo, hi);
                        SwapIfGreater(keys, comparer, hi-1, hi);
                        return;
                    }
 
                    InsertionSort(keys, lo, hi, comparer);
                    return;
                }
 
                if (depthLimit == 0)
                {
                    Heapsort(keys, lo, hi, comparer);
                    return;
                }
                depthLimit--;
 
                int p = PickPivotAndPartition(keys, lo, hi, comparer);
                // Note we've already partitioned around the pivot and do not have to move the pivot again.
                IntroSort(keys, p + 1, hi, depthLimit, comparer);
                hi = p - 1;
            }
        }

它的基本思路是这样的:

  1. 一般情况下,使用前后双指针快排,以枢轴元素为界划分出两段,再递归排序。
  2. 如果排序区间小于一定的阈值(16),快排的递归开销相对而言会比较大,于是使用插入排序。
  3. 如果检测到递归层数过大(2 * IntrospectiveSortUtilities.FloorLog2(keys.Length)),可以认为快排出了严重的划分不均问题。为了避免爆栈问题,或者快排再次划分不均导致复杂度退化为 O(n^2),于是使用堆排序。

IntroSort 相对快排的改进的精髓就在这个堆排序上。在十大基本排序算法中,排除掉需要使用辅助空间的,剩下的算法中,堆排序是唯一一个最差的时间复杂度控制在 O(n log n) 的。而快排如果划分不当的话,最差复杂度会退化为 O(n ^ 2)。

换句话说,对于再怎么刁难的序列,堆排序的时间都不会太差,它可以在 IntroSort 中起到一个“兜底”的作用。

既然堆排序的时间复杂度,最好最坏平均三种情况下都是 O(n log n),干脆只用纯的堆排序排不就好了?干嘛用快排这种会陷入到 O(n ^ 2) 的算法?

这里就牵涉到数据结构的理论里经常忽略的常数了。堆排序的常数太大了,而且是所有 O(n log n) 级的基本排序中,常数最大的一个。对于快排、归并、堆排,大家的用时虽然都是 O(n log n) 级的,但是堆排序所花的时间能达到快排的 1.6 ~ 2 倍。所以堆排序并不能取代快排的地位,只能作为快排划分退化后的保障手段,使得总体的情况“不至于太差”。

(堆排常数大是因为它和其他排序相比,空间访问连续性很差,它在访问堆中的父子节点的时候是跳着访问的,极易造成访存失效)。

但是,有破绽!

如果我们故意构造一个能让快排每次划分都很衰的序列。。。

那 IntroSort 会经常性地陷入堆排。详解如下:

自己写的排序会比C#自带的排序快吗? - 知乎

再看DepthLimitedQuickSort:

       internal static void DepthLimitedQuickSort(T[] keys, int left, int right, IComparer<T> comparer, int depthLimit)
        {
            do
            {
                if (depthLimit == 0)
                {
                    Heapsort(keys, left, right, comparer);
                    return;
                }
 
                int i = left;
                int j = right;
 
                // pre-sort the low, middle (pivot), and high values in place.
                // this improves performance in the face of already sorted data, or 
                // data that is made up of multiple sorted runs appended together.
                int middle = i + ((j - i) >> 1);
                SwapIfGreater(keys, comparer, i, middle);  // swap the low with the mid point
                SwapIfGreater(keys, comparer, i, j);   // swap the low with the high
                SwapIfGreater(keys, comparer, middle, j); // swap the middle with the high
 
                T x = keys[middle];
                do
                {
                    while (comparer.Compare(keys[i], x) < 0) i++;
                    while (comparer.Compare(x, keys[j]) < 0) j--;
                    Contract.Assert(i >= left && j <= right, "(i>=left && j<=right)  Sort failed - Is your IComparer bogus?");
                    if (i > j) break;
                    if (i < j)
                    {
                        T key = keys[i];
                        keys[i] = keys[j];
                        keys[j] = key;
                    }
                    i++;
                    j--;
                } while (i <= j);
 
                // The next iteration of the while loop is to "recursively" sort the larger half of the array and the
                // following calls recrusively sort the smaller half.  So we subtrack one from depthLimit here so
                // both sorts see the new value.
                depthLimit--;
 
                if (j - left <= right - i)
                {
                    if (left < j) DepthLimitedQuickSort(keys, left, j, comparer, depthLimit);
                    left = i;
                }
                else
                {
                    if (i < right) DepthLimitedQuickSort(keys, i, right, comparer, depthLimit);
                    right = j;
                }
            } while (left < right);
        }

这是一个深度限制 32 也就是说快排的轮次最大进行32轮, 32轮能排2^32个数的快排算法。与之前的算法相比,主要区别在于:

最多比较32轮快排;

中点的位置选取上:int middle = i + ((j - i) >> 1),主要是为了在已经相对有序的数据面前提高性能。

Guess you like

Origin blog.csdn.net/dmk17771552304/article/details/120687129