Python3.5源码分析-List概述

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_33339479/article/details/81807096

Python3源码分析

本文环境python3.5.2。
参考书籍<<Python源码剖析>>
python官网

Python3的List对象

list对象是一个变长对象,在运行时动态调整其所维护的内存和元素,并且支持插入删除等操作,list的定义如下;

#define PyObject_VAR_HEAD      PyVarObject ob_base;
#define Py_INVALID_SIZE (Py_ssize_t)-1

/* Nothing is actually declared to be a PyObject, but every pointer to
 * a Python object can be cast to a PyObject*.  This is inheritance built
 * by hand.  Similarly every pointer to a variable-size Python object can,
 * in addition, be cast to PyVarObject*.
 */
typedef struct _object {
    _PyObject_HEAD_EXTRA
    Py_ssize_t ob_refcnt;                                       // 引用计数值
    struct _typeobject *ob_type;                                // 基本的类型 type
} PyObject;

typedef struct {
    PyObject ob_base;                                           // 对象基本信息
    Py_ssize_t ob_size; /* Number of items in variable part */  // 变长对象的元素个数
} PyVarObject;
...
typedef struct {
    PyObject_VAR_HEAD                                           // 变量头部信息
    /* Vector of pointers to list elements.  list[0] is ob_item[0], etc. */
    PyObject **ob_item;                                         // 元素指向具体指

    /* ob_item contains space for 'allocated' elements.  The number
     * currently in use is ob_size.
     * Invariants:
     *     0 <= ob_size <= allocated
     *     len(list) == ob_size
     *     ob_item == NULL implies ob_size == allocated == 0
     * list.sort() temporarily sets allocated to -1 to detect mutations.
     *
     * Items must normally not be NULL, except during construction when
     * the list is not yet visible outside the function that builds it.
     */
    Py_ssize_t allocated;                                       // 当前空间
} PyListObject;

其中,ob_size就是list实际拥有的元素数量,allocated指的是实际申请的内存空间,比如列表A实际申请了5元素的空间,但是此时A只包含了2个元素,则ob_size为2,allocated为5。

List的创建和初始化

在新建List的时候会调用BUILD_LIST指令新建List,BUILD_LIST对应的字节码指令是,

    TARGET(BUILD_LIST) {
        PyObject *list =  PyList_New(oparg);        // 生成新的列表调用,传入初始化时传入的参数值
        if (list == NULL)
            goto error;
        while (--oparg >= 0) {
            PyObject *item = POP();
            PyList_SET_ITEM(list, oparg, item);     // 将初始化的列表值设置到列表中
        }
        PUSH(list);                                 // 压入list
        DISPATCH();                                 // 执行下一条指令
    }

此时查看PyList_New函数,

PyObject *
PyList_New(Py_ssize_t size)
{
    PyListObject *op;
    size_t nbytes;
#ifdef SHOW_ALLOC_COUNT
    static int initialized = 0;
    if (!initialized) {
        Py_AtExit(show_alloc);
        initialized = 1;
    }
#endif

    if (size < 0) {                                             // 如果传入的大小小于0则报错
        PyErr_BadInternalCall();
        return NULL;
    }
    /* Check for overflow without an actual overflow,
     *  which can cause compiler to optimise out */
    if ((size_t)size > PY_SIZE_MAX / sizeof(PyObject *))        // 计算申请的大小是否超过数组最大的索引值,PY_SIZE_MAX如果是64位则是最大的有符号int64值
        return PyErr_NoMemory();
    nbytes = size * sizeof(PyObject *);                         // 需要申请的内存字节大小
    if (numfree) {                                              // 缓冲池可用
        numfree--;
        op = free_list[numfree];                                // 获取缓冲的对象
        _Py_NewReference((PyObject *)op);
#ifdef SHOW_ALLOC_COUNT
        count_reuse++;
#endif
    } else {
        op = PyObject_GC_New(PyListObject, &PyList_Type);       // 生成新的对象
        if (op == NULL)
            return NULL;
#ifdef SHOW_ALLOC_COUNT
        count_alloc++;
#endif
    }
    if (size <= 0)                                              // 传入小于等于0
        op->ob_item = NULL;                                     // 则list实例的ob_item为NULL
    else {
        op->ob_item = (PyObject **) PyMem_MALLOC(nbytes);       // 申请nbytes字节大小的内存,并将申请内存的首地址设置到ob_item
        if (op->ob_item == NULL) {                              // 如果为空则报错
            Py_DECREF(op);
            return PyErr_NoMemory();
        }
        memset(op->ob_item, 0, nbytes);                         // 将对应nbytes字节的数据设置为0
    }
    Py_SIZE(op) = size;                                         // 设置ob->size 为size
    op->allocated = size;                                       // 设置allocated值
    _PyObject_GC_TRACK(op);                                     // 加入GC列表
    return (PyObject *) op;
}

在生成新的list的时候会先检查缓冲池是否拥有,默认会维护80个PyListObject对象;

#ifndef PyList_MAXFREELIST
#define PyList_MAXFREELIST 80
#endif
static PyListObject *free_list[PyList_MAXFREELIST];
static int numfree = 0;

在调用list_dealloc函数释放列表对象时,会检查缓冲池是否已经满了,如果没有满则添加到缓冲池中,

static void
list_dealloc(PyListObject *op)
{
    Py_ssize_t i;
    PyObject_GC_UnTrack(op);
    Py_TRASHCAN_SAFE_BEGIN(op)
    if (op->ob_item != NULL) {
        /* Do it backwards, for Christian Tismer.
           There's a simple test case where somehow this reduces
           thrashing when a *very* large list is created and
           immediately deleted. */
        i = Py_SIZE(op);
        while (--i >= 0) {
            Py_XDECREF(op->ob_item[i]);                         // 减少列表对象的引用计数
        }
        PyMem_FREE(op->ob_item);                                // 释放ob_item申请的内存空间
    }
    if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op))  // 如果缓冲池未满
        free_list[numfree++] = op;                              // 添加到缓冲池列表中
    else
        Py_TYPE(op)->tp_free((PyObject *)op);                   // 大于缓冲池大小则直接释放内存
    Py_TRASHCAN_SAFE_END(op)
}

当缓冲池没有可用list的时候,则新生成一个list,调用PyObject_GC_New(PyListObject, &PyList_Type)生成一个;

#define PyObject_GC_New(type, typeobj) \
                ( (type *) _PyObject_GC_New(typeobj) )      // 调用_PyObject_GC_New生成

...


PyObject *
_PyObject_GC_New(PyTypeObject *tp)
{
    PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));     // 申请内存空间
    if (op != NULL)
        op = PyObject_INIT(op, tp);                             // 初始化op
    return op;                                                  // 返回
}
...
#define PyObject_INIT(op, typeobj) \
    ( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) )    // 设置ob_type,增加引用计数

...

#define Py_TYPE(ob)             (((PyObject*)(ob))->ob_type)    // 设置ob_type类型

此时新生成的list对象就基本初始化完成。

当调用给list设值时,会尝试如下字节码;

 14           0 LOAD_CONST               0 (1)
              3 LOAD_CONST               1 (2)
              6 BUILD_LIST               2
              9 STORE_NAME               0 (a)

 15          12 LOAD_CONST               2 (10)
             15 LOAD_NAME                0 (a)
             18 LOAD_CONST               0 (1)
             21 STORE_SUBSCR

其中,a初始化为包含1,2两个元素的列表,然后设置a[1]=10,此时调用STORE_SUBSCR来设置,

    TARGET(STORE_SUBSCR) {
        PyObject *sub = TOP();                      // 获取需要设置的key
        PyObject *container = SECOND();             // 获取list实例
        PyObject *v = THIRD();                      // 获取待设置的值
        int err;
        STACKADJ(-3);                               // 栈移动三个值
        /* v[w] = u */
        err = PyObject_SetItem(container, sub, v);  // 设置值
        Py_DECREF(v);
        Py_DECREF(container);
        Py_DECREF(sub);
        if (err != 0)
            goto error;
        DISPATCH();
    }

此时调用了PyObject_SetItem函数来将值设置到列表中,

扫描二维码关注公众号,回复: 2931825 查看本文章
int
PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value)
{
    PyMappingMethods *m;

    if (o == NULL || key == NULL || value == NULL) {                    // 检查传入参数
        null_error();
        return -1;
    }
    m = o->ob_type->tp_as_mapping;                                      // 获取类型的tp_as_mapping方法
    if (m && m->mp_ass_subscript)
        return m->mp_ass_subscript(o, key, value);                      // 调用字典的设置方法

    if (o->ob_type->tp_as_sequence) {                                   // 调用tp_as_sequence方法
        if (PyIndex_Check(key)) {                                       // 检查key
            Py_ssize_t key_value;
            key_value = PyNumber_AsSsize_t(key, PyExc_IndexError);
            if (key_value == -1 && PyErr_Occurred())
                return -1;
            return PySequence_SetItem(o, key_value, value);             // 调用设置值
        }
        else if (o->ob_type->tp_as_sequence->sq_ass_item) {
            type_error("sequence index must be "
                       "integer, not '%.200s'", key);
            return -1;
        }
    }

    type_error("'%.200s' object does not support item assignment", o);
    return -1;
}

由于此时,传入的list对象,此时的PyList_Type对应的tp_as_mapping为list_as_mapping;

static PyMappingMethods list_as_mapping = {
    (lenfunc)list_length,
    (binaryfunc)list_subscript,
    (objobjargproc)list_ass_subscript
};

此时就是调用了list_ass_subscript方法来处理设值,

static int
list_ass_subscript(PyListObject* self, PyObject* item, PyObject* value)
{
    if (PyIndex_Check(item)) {                                          // item索引值检查
        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);      // 检查值是否合法,不合法返回PyExc_IndexError
        if (i == -1 && PyErr_Occurred())                                // 如果检查不合法则返回错误
            return -1;
        if (i < 0)                                                      // 如果小于0, 则转换成正值索引
            i += PyList_GET_SIZE(self);
        return list_ass_item(self, i, value);                           // 设值值
    }
    ...
}

此时先检查索引值是否合法,然后将负值索引转换成正值索引,然后调用list_ass_item处理;

static int
list_ass_item(PyListObject *a, Py_ssize_t i, PyObject *v)
{
    PyObject *old_value;
    if (i < 0 || i >= Py_SIZE(a)) {                             // 检查索引值是否超出范围
        PyErr_SetString(PyExc_IndexError,
                        "list assignment index out of range");       
        return -1;
    }
    if (v == NULL)
        return list_ass_slice(a, i, i+1, v);                    // 如果v为NULL则删除对应i索引处的值
    Py_INCREF(v);
    old_value = a->ob_item[i];                                  // 获取旧值
    a->ob_item[i] = v;                                          // 设值新值
    Py_DECREF(old_value);
    return 0;
}

至此一个List的设置过程就完成了。

List的添加和删除元素

a本来是[1,2]的列表,调用a.insert(5,105)时,对应的字节码如下;

 18          45 LOAD_NAME                0 (a)
             48 LOAD_ATTR                3 (insert)
             51 LOAD_CONST               3 (5)
             54 LOAD_CONST               4 (105)
             57 CALL_FUNCTION            2 (2 positional, 0 keyword pair)

此时就会调用listinsert函数,

static PyObject *
listinsert(PyListObject *self, PyObject *args)
{
    Py_ssize_t i;
    PyObject *v;
    if (!PyArg_ParseTuple(args, "nO:insert", &i, &v))           // 解析参数和列表
        return NULL;
    if (ins1(self, i, v) == 0)                                  // 插入数据
        Py_RETURN_NONE;
    return NULL;
}

此时解析了相关参数为tuple后调用ins1插入数据,

static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
    Py_ssize_t i, n = Py_SIZE(self);                // 获取当前的列表大小
    PyObject **items;
    if (v == NULL) {                                // 检查是否为空
        PyErr_BadInternalCall();
        return -1;
    }
    if (n == PY_SSIZE_T_MAX) {                      // 检查是否已经是列表允许的最大值了
        PyErr_SetString(PyExc_OverflowError,
            "cannot add more objects to list");
        return -1;
    }

    if (list_resize(self, n+1) == -1)               // 重新扩招self大小
        return -1;

    if (where < 0) {                                // 如果索引为负值,则转换成正值
        where += n;                                 // 计算出正值索引
        if (where < 0)
            where = 0;                              // 如果转换后还是负值则置0
    }
    if (where > n)                              
        where = n;                                  // 如果where大于最大值则置为最大值n
    items = self->ob_item;
    for (i = n; --i >= where; )                     // 将大于where索引值的元素移动后一位
        items[i+1] = items[i]; 
    Py_INCREF(v);   
    items[where] = v;                               // 设置where为传入的v
    return 0;
}

首先会调整列表的大小,调用list_resize函数,

static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
    PyObject **items;
    size_t new_allocated;
    Py_ssize_t allocated = self->allocated;                     // 获取当前的申请内存的大小

    /* Bypass realloc() when a previous overallocation is large enough
       to accommodate the newsize.  If the newsize falls lower than half
       the allocated size, then proceed with the realloc() to shrink the list.
    */ 
    if (allocated >= newsize && newsize >= (allocated >> 1)) {  // 如果已经申请的大小大于newsize并且newsize>=已有空间的一半
        assert(self->ob_item != NULL || newsize == 0);
        Py_SIZE(self) = newsize;                                // 设置ob_size为newsize
        return 0;
    }

    /* This over-allocates proportional to the list size, making room
     * for additional growth.  The over-allocation is mild, but is
     * enough to give linear-time amortized behavior over a long
     * sequence of appends() in the presence of a poorly-performing
     * system realloc().
     * The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
     */
    new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6);     // 否则就调整申请空间大小

    /* check for integer overflow */
    if (new_allocated > PY_SIZE_MAX - newsize) {                // 如果新申请的大小最大的大小
        PyErr_NoMemory();                                       // 则报错
        return -1;
    } else {
        new_allocated += newsize;                               // 新增newsize
    }

    if (newsize == 0)
        new_allocated = 0;
    items = self->ob_item;
    if (new_allocated <= (PY_SIZE_MAX / sizeof(PyObject *)))    // 检查新申请的大小是否在允许范围内
        PyMem_RESIZE(items, PyObject *, new_allocated);         // 重新调整ob_item的申请空间
    else
        items = NULL;
    if (items == NULL) {
        PyErr_NoMemory();
        return -1;
    }
    self->ob_item = items;                                      // 重置item
    Py_SIZE(self) = newsize;                                    // 重置size 为newsize
    self->allocated = new_allocated;                            // 重置申请的空间大小
    return 0;
}

在调整过列表大小后,列表会判断如果传入的负值索引比列表元素还大则设置在0位,如果转换的正值索引比列表长度还大则设置为最后一位,并将大于索引值的元素,往后移动一位。此时就是list的插入过程。

当列表a=[1,2,10,105],调用a.remove(10)时,对应的字节码如下;

 20          71 LOAD_NAME                0 (a)
             74 LOAD_ATTR                4 (remove)
             77 LOAD_CONST               2 (10)
             80 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
             83 POP_TOP

此时调用了list的remove属性,对应调用了listremove方法,

static PyObject *
listremove(PyListObject *self, PyObject *v)
{
    Py_ssize_t i;

    for (i = 0; i < Py_SIZE(self); i++) {                       // 循环遍历lise
        int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ);     // 比较列表中的值与传入值v进行比较
        if (cmp > 0) {                                          // 如果找到
            if (list_ass_slice(self, i, i+1,
                               (PyObject *)NULL) == 0)          // 设置该位为NULL
                Py_RETURN_NONE;
            return NULL;
        }
        else if (cmp < 0)                                       
            return NULL;
    }
    PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");     // 如果没有找到则返回该错误
    return NULL;
}

其中调用了PyObject_RichCompareBool函数,进行比较;

int
PyObject_RichCompareBool(PyObject *v, PyObject *w, int op)
{
    PyObject *res;
    int ok;

    /* Quick result when objects are the same.
       Guarantees that identity implies equality. */
    if (v == w) {                               // 快速检查,如果两个相同则返回
        if (op == Py_EQ)
            return 1;
        else if (op == Py_NE)
            return 0;
    }

    res = PyObject_RichCompare(v, w, op);       // 比较v,w
    if (res == NULL)
        return -1;
    if (PyBool_Check(res))
        ok = (res == Py_True);
    else
        ok = PyObject_IsTrue(res);
    Py_DECREF(res);
    return ok;                                  // 返回结果
}

继续调用了PyObject_RichCompare函数进行比较,

PyObject *
PyObject_RichCompare(PyObject *v, PyObject *w, int op)
{
    ...
    res = do_richcompare(v, w, op);                     // 进行比较
    ...
}

查看do_richcompare函数比较可知,

/* Perform a rich comparison, raising TypeError when the requested comparison
   operator is not supported. */
static PyObject *
do_richcompare(PyObject *v, PyObject *w, int op)
{
    richcmpfunc f;
    PyObject *res;
    int checked_reverse_op = 0;

    if (v->ob_type != w->ob_type &&
        PyType_IsSubtype(w->ob_type, v->ob_type) &&
        (f = w->ob_type->tp_richcompare) != NULL) {     // 检查v,w的两个类型是否相同,检查是否w,v是否是子类型,检查w对应的tp_richcompare是否为空
        checked_reverse_op = 1;                         
        res = (*f)(w, v, _Py_SwappedOp[op]);            // 直接调用w的tp_richcompare方法进行比较
        if (res != Py_NotImplemented)
            return res;                                 // 返回结果
        Py_DECREF(res);
    }
    if ((f = v->ob_type->tp_richcompare) != NULL) {     // 检查v是否有tp_richcompare方法
        res = (*f)(v, w, op);                           // 调用v的tp_richcompare进行比较
        if (res != Py_NotImplemented)                   
            return res;                                 // 返回结果    
        Py_DECREF(res);
    }
    if (!checked_reverse_op && (f = w->ob_type->tp_richcompare) != NULL) { // 如果checked_reverse_op为0,w的tp_richcompare不为空
        res = (*f)(w, v, _Py_SwappedOp[op]);            // 调用w的tp_richcompare方法进行比较
        if (res != Py_NotImplemented)
            return res;                                 // 返回结果
        Py_DECREF(res);
    }
    /* If neither object implements it, provide a sensible default
       for == and !=, but raise an exception for ordering. */
    switch (op) {                                       // 如果都没有实现该方法则使用简易的默认方法
    case Py_EQ:
        res = (v == w) ? Py_True : Py_False;            // 判断是否相等
        break;
    case Py_NE:
        res = (v != w) ? Py_True : Py_False;            // 判断是否相等
        break;
    default:
        /* XXX Special-case None so it doesn't show as NoneType() */
        PyErr_Format(PyExc_TypeError,
                     "unorderable types: %.100s() %s %.100s()",
                     v->ob_type->tp_name,
                     opstrings[op],
                     w->ob_type->tp_name);
        return NULL;
    }
    Py_INCREF(res);
    return res;                                         // 返回结果
}

由于此时列表a中的元素是long型的元素,则调用了long_richcompare方法进行两个值之间的比较,

static PyObject *
long_richcompare(PyObject *self, PyObject *other, int op)
{
    int result;
    PyObject *v;
    CHECK_BINOP(self, other);
    if (self == other)                                                      // 如果两者相等
        result = 0;                                                         // 直接返回
    else
        result = long_compare((PyLongObject*)self, (PyLongObject*)other);   // 比较两个long型
    /* Convert the return value to a Boolean */
    switch (op) {                                                           // 根据op 返回不同的结果
    case Py_EQ:
        v = TEST_COND(result == 0);
        break;
    case Py_NE:
        v = TEST_COND(result != 0);
        break;
    case Py_LE:
        v = TEST_COND(result <= 0);
        break;
    case Py_GE:
        v = TEST_COND(result >= 0);
        break;
    case Py_LT:
        v = TEST_COND(result == -1);
        break;
    case Py_GT:
        v = TEST_COND(result == 1);
        break;
    default:
        PyErr_BadArgument();
        return NULL;
    }
    Py_INCREF(v);
    return v;
}

其中又调用了long_compare函数进行比较,

static int
long_compare(PyLongObject *a, PyLongObject *b)
{
    Py_ssize_t sign;

    if (Py_SIZE(a) != Py_SIZE(b)) {             // 比较大小是否相同
        sign = Py_SIZE(a) - Py_SIZE(b);         // 获取两个值的差值
    }
    else {
        Py_ssize_t i = Py_ABS(Py_SIZE(a));      // 获取a值大小
        while (--i >= 0 && a->ob_digit[i] == b->ob_digit[i])
            ;
        if (i < 0)
            sign = 0;
        else {
            sign = (sdigit)a->ob_digit[i] - (sdigit)b->ob_digit[i];
            if (Py_SIZE(a) < 0)
                sign = -sign;
        }
    }
    return sign < 0 ? -1 : sign > 0 ? 1 : 0;    // 根据sign的处理结果返回
}

至此就比较出来了两个值的大小,如果相同则会继续执行listremove
中的list_ass_slice函数,将对应索引位置的值置空,该函数具体的执行流程大家可自行阅读,至此一个列表中的元素的删除过程已经分析完成。

总结

列表的初始化和插入数据和删除数据基本上的流程分析完成,其中还有很多其他的细节在流程中没有涉及到,列表在元素增长或者减少的过程中,都会涉及到都申请得到内存进行相关操作,此时的大量细节还需要大家自行查看。

猜你喜欢

转载自blog.csdn.net/qq_33339479/article/details/81807096