Use python to implement basic data structures [04/4]

illustrate

        If you need to use this knowledge but don't have it, it will be frustrating and may lead to rejection from the interview. Whether you spend a few days "blitzing" or using fragmented time to continue learning, it is worthwhile to work on the data structure. So what data structures are there in Python? Lists, dictionaries, sets, and...stacks? Does Python have a stack? This series of articles will give detailed puzzle pieces.


Chapter 13: Binary Tree

The binary Tree: Binary tree, each node has only two child nodes.

class _BinTreeNode:
    def __init__(self, data):
        self.data = data
        self.left = None
        self.right = None


# 三种depth-first遍历
def preorderTrav(subtree):
    """ 先(根)序遍历"""
    if subtree is not None:
        print(subtree.data)
        preorderTrav(subtree.left)
        preorderTrav(subtree.right)


def inorderTrav(subtree):
    """ 中(根)序遍历"""
    if subtree is not None:
        preorderTrav(subtree.left)
        print(subtree.data)
        preorderTrav(subtree.right)


def postorderTrav(subtree):
        preorderTrav(subtree.left)
    """ Post (root) order traversal"""
    if subtree is not None:
        preorderTrav(subtree.right)
        print(subtree.data)


# 宽度优先遍历(bradth-First Traversal): 一层一层遍历, 使用queue
def breadthFirstTrav(bintree):
    from queue import Queue    # py3
    q = Queue()
    q.put(bintree)
    while not q.empty():
        node = q.get()
        print(node.data)
        if node.left is not None:
            q.put(node.left)
        if node.right is not None:
            q.put(node.right)


class _ExpTreeNode:
    __slots__ = ('element', 'left', 'right')

    def __init__(self, data):
        self.element = data
        self.left = None 
        self.right = None 

    def __repr__(self): 
        return '<_ExpTreeNode: {} {} {}>'.format( 
            self.element, self.left, self.right) 

from queue import Queue 
class ExpressionTree : 
    """ 
    Expression tree: A binary tree in which operators are stored in inner nodes and operands are stored in leaf nodes. (The symbol tree is really hard to type) 
        * 
       / \ 
      + - 
     / \ / \ 
     9 3 8 4 
    (9+3) * (8-4) 

    Expression Tree Abstract Data Type can implement binary operators 
    ExpressionTree(expStr): user string as constructor param 
    evaluate(varDict): evaluates the expression and returns the numeric result
    toString(): constructs and retutns a string representation of the expression Usage 

    : 
        vars = {'a': 5, 'b': 12} 
        if treeNode.left is None and treeNode.right is None: 
            return str(treeNode.element) #The leaf node is the operand and returns directly 
        else:12}
        expTree = ExpressionTree("(a/(b-3))")
        print('The result = ', expTree.evaluate(vars))
    """ 

    def __init__(self, expStr): 
        self._expTree = None 
        self._buildTree(expStr) 

    def evaluate(self, varDict): 
        return self._evalTree(self._expTree, varDict) 

    def __str__(self): 
        return self. _buildString(self._expTree) 

    def _buildString(self, treeNode): 
        """ Add parentheses before a subtree is traversed, and add closing parentheses after the subtree is traversed """ 
        # print(treeNode) 
            expStr = '(' 
            expStr += self._buildString(treeNode.left) 
            expStr += str(treeNode.element) 
            expStr += self._buildString(treeNode.right) 
            expStr += ')' 
            return expStr
 
    def _evalTree(self, subtree, varDict ): 
        # Is it a leaf node? If so, it means it is an operand and returns directly 
        if subtree.left is None and subtree.right is None: 
            # Is the operand a legal number 
            if subtree.element >= '0' and subtree.element <= '9': 
                return int(subtree.element) 
            else: # The operand is a variable 
                assert subtree.element in varDict, 'invalid variable.' 
                return varDict[subtree.element] 
        else:# operator evaluates its subexpressions 
            lvalue = self._evalTree(subtree.left, varDict) 
            rvalue = self._evalTree(subtree.right, varDict) 
            print(subtree.element) 
            return self._computeOp(lvalue, subtree.element,rvalue)

    def _computeOp(self, left, op, right):
        assert op
        op_func = {
            '+': lambda left, right: left + right,    # or import operator, operator.add
            '-': lambda left, right: left - right,
            '*': lambda left, right: left * right,
            '/': lambda left, right: left / right,
            '%': lambda left, right: left % right,
        }
        return op_func[op](left, right)

    def _buildTree(self, expStr):
        expQ = Queue()
        for token in expStr:    # 遍历表达式字符串的每个字符
            expQ.put(token)
        self._expTree = _ExpTreeNode(None)    # 创建root节点
        self._recBuildTree(self._expTree, expQ)

    def _recBuildTree(self, curNode, expQ):
        token = expQ.get()
        if token == '(':
            curNode.left = _ExpTreeNode(None)
            self._recBuildTree(curNode.left, expQ)

            # next token will be an operator: + = * / %
            curNode.element = expQ.get()
            curNode.right = _ExpTreeNode(None)
            self._recBuildTree(curNode.right, expQ)

            # the next token will be ')', remmove it
            expQ.get()

        else:  # the token is a digit that has to be converted to an int.
            curNode.element = token


vars = {'a': 5, 'b': 12}
expTree = ExpressionTree("((2*7)+8)")
print(expTree)
print('The result = ', expTree.evaluate(vars))

Heap: One of the most direct applications of binary trees is to implement heaps. The heap is a complete binary tree. The values ​​of the non-leaf nodes in the largest heap are larger than the children, and the values ​​of the non-leaf nodes in the smallest heap are smaller than the children. Python has a built-in heapq module to help us implement heap operations, such as using the built-in heapq module to implement heap sorting:

# Use python's built-in heapq to implement heap sort 
def heapsort(iterable): 
    from heapq import heappush, heappop 
    h = [] 
    for value in iterable: 
        heappush(h, value) 
    return [heappop(h) for i in range(len(h) ))]

However, generally when implementing a heap, it is not actually implemented by counting nodes, but by using arrays, which is more efficient. Why can it be implemented with an array? Because of the nature of a complete binary tree, the relationship between subscripts can be used to represent the relationship between nodes. This has been explained in the docstring of MaxHeap

class MaxHeap: 
    """ 
    Heaps: 
    Complete binary tree. The values ​​of the non-leaf nodes of the maximum heap are larger than the children, and the values ​​of the non-leaf nodes of the minimum heap are smaller than the children. Heap 
    contains two properties, order property and shape property(a complete binary tree), when inserting 
    a new node, always maintain these two attributes. 
    Insertion operation: maintain the heap attributes and complete binary tree attributes, sift-up operation maintains the heap attributes. 
    Extract operation: only obtain the root node data, and convert the tree After the bottom rightmost node is copied to the root node, the sift-down operation maintains the heap attribute. 

    Use an array to implement the heap. Starting from the root node, number each node from top to bottom from left to right. According to 
    the properties of a complete binary tree, Define a node i, and the numbers of its parent and child nodes are: 
        parent = (i-1) // 2 
        left = 2 * i + 1 
        rgiht = 2 * i + 2 
    Using an array to implement a heap is more efficient and saves money The memory usage of tree nodes can also avoid complex pointer operations and reduce 
    the difficulty of debugging. 

    """ 

    def __init__(self, maxSize): 
        self._elements = Array(maxSize) # Array ADT implemented in Chapter 2
        self._count = 0

    def __len__(self):
        return self._count

    def capacity(self):
        return len(self._elements)

    def add(self, value):
        assert self._count < self.capacity(), 'can not add to full heap'
        self._elements[self._count] = value
        self._count += 1
        self._siftUp(self._count - 1)
        self.assert_keep_heap()    # 确定每一步add操作都保持堆属性

    def extract(self):
        assert self._count > 0, 'can not extract from an empty heap'
        value = self._elements[0]    # save root value
        self._count -= 1
        self._elements[0] = self._elements[self._count]    # 最右下的节点放到root后siftDown
        self._siftDown(0)
        self.assert_keep_heap()
        return value

    def _siftUp(self, ndx):
        if ndx > 0:
            parent = (ndx - 1) // 2
            # print(ndx, parent)
            if self._elements[ndx] > self._elements[parent]:    # swap
                self._elements[ndx], self._elements[parent] = self._elements[parent], self._elements[ndx]
                self._siftUp(parent)    # 递归

    def _siftDown(self, ndx):
        left = 2 * ndx + 1
        right = 2 * ndx + 2
        # determine which node contains the larger value
        largest = ndx
        if (left < self._count and
            self._elements[left] >= self._elements[largest] and 
            self._elements[left] >= self._elements[right]): # This is not written in the original book. In fact, the one you are looking for may not be largest largest 
            = left 
        elif right < self._count and self._elements[right] >= self._elements[largest]: 
            largest = right 
        if largest != ndx: 
            self._elements[ndx], self._elements[largest] = self._elements[largest], self._elements[ndx] 
            self._siftDown(largest) 

    def __repr__(self): 
        return ' '.join(map(str, self._elements)) 

    def assert_keep_heap(self): 
        """ I added this function to Verify that after each add or extract, the property of the maximum heap is still maintained """ 
        _len = len(self)
        for i in range(0, int((_len-1)/2)): # Internal node (non-leaf node) l 
            = 2 * i + 1 
            r = 2 * i + 2 
            if l < _len and r < _len : 
                assert self._elements[i] >= self._elements[l] and self._elements[i] >= self._elements[r] 

def test_MaxHeap(): 
    """ Unit test case for maximum heap implementation""" 
    _len = 10 
    h = MaxHeap(_len) 
    for i in range(_len): 
        h.add(i) 
        h.assert_keep_heap() 
    for i in range(_len): 
        # Make sure that the largest number comes out every time, when adding 
        assert h.extract() == _len-i-1 

test_MaxHeap() 

def simpleHeapSort(theSeq): added from small to large :
    """ Use your own implementation of MaxHeap to implement heap sorting, and directly modify the original array to implement inplace sorting"""
    if not theSeq: 
        return theSeq 
    _len = len(theSeq) 
    heap = MaxHeap(_len) 
    for i in theSeq: 
        heap.add(i) 
    for i in reversed(range(_len)): 
        theSeq[i] = heap.extract() 
    return theSeq 


def test_simpleHeapSort(): 
    """ Use some test cases to prove that the implemented heap sort can work""" 
    def _is_sorted(seq): 
        for i in range(len(seq)-1): 
            if seq[i] > seq[i+1]: 
                return False 
        return True 

    from random import randint 
    assert simpleHeapSort([]) == [] 
    for i in range(1000): 
        _len = randint(1, 100) 
        to_sort = []
        for i in range(_len): 
            to_sort.append(randint(0, 100)) 
        simpleHeapSort(to_sort) # Note that in-place sorting is used here, directly changing the array 
        assert _is_sorted(to_sort) 


test_simpleHeapSort()

Chapter 14: Search Trees

Binary difference tree finding properties: for each internal node V, 1. All keys smaller than V.key are stored in the left subtree of V. 2. All keys greater than V.key are stored in the right subtree of V. Performing an in-order traversal on the BST will result in an ascending key sequence.

class _BSTMapNode: 
    __slots__ = ('key', 'value', 'left', 'right') 

    def __init__(self, key, value): 
        self.key = key 
        self.value = value 
        self.left = None 
        self.right = None 

    def __repr__(self): 
        return '<{}:{}> left:{}, right:{}'.format( 
            self.key, self.value, self.left, self.right) 

    __str__ = __repr__ 


class BSTMap: 
    """ BST, tree nodes contain key payloads. Use BST to implement the Map ADT previously implemented with hash. 
    Properties: For each internal node V, 
    1. For node V, all keys less than V.key are stored In the left subtree of V. 
    2. All keys greater than V.key are stored in the right subtree of V. 
    Performing in-order traversal on BST will get the ascending key sequence 
    """ 
    def __init__(self):
        self._root = None
        self._size = 0 
        self._rval = None # As the return value of remove 

    def __len__(self): 
        return self._size 

    def __iter__(self): 
        return _BSTMapIterator(self._root, self._size) 

    def __contains__(self, key) : 
        return self._bstSearch(self._root, key) is not None 

    def valueOf(self, key): 
        node = self._bstSearch(self._root, key) 
        assert node is not None, 'Invalid map key.' 
        return node. value 

    def _bstSearch(self, subtree, target): 
        if subtree is None: # Recursive exit, traverse to the bottom of the tree if no key is found or the tree is empty 
            return None 
        elif target < subtree.key:
            return self._bstSearch(subtree.left, target) 
        elif target > subtree.key: 
            return self._bstSearch(subtree.right, target) 
        return subtree # Return reference 

    def _bstMinumum(self, subtree): 
        """ Follow the tree 
        If subtree is None: return None elif subtree.left 
            is None: return 
            subtree 
        else 
        : 
            return subtree._bstMinumum(self, subtree.left) 
    def add( self, key, value): 
        """ Add or replace the value of a key, O(N) """ 
        node = self._bstSearch(self._root, key) 
        if node is not None:# if key already exists, update value

            node.value = value 
            return False 
        else: # insert a new entry 
            self._root = self._bstInsert(self._root, key, value) 
            self._size += 1 
            return True 

    def _bstInsert(self, subtree, key, value): 
        """ New nodes are always inserted at the leaf nodes of the tree""" 
        if subtree is None: 
            subtree = _BSTMapNode(key, value) 
        elif key < subtree.key: 
            subtree.left = self._bstInsert(subtree.left , key, value) 
        elif key > subtree.key: 
            subtree.right = self._bstInsert(subtree.right, key, value) 
        # Note that there is no else statement here. It should be judged whether there is one in the add function where it is called. Repeat key 
        return subtree

    def remove(self, key): 
        """ O(N) 
        There are three types of deleted nodes: 
        1. Leaf node: directly set its father's pointer to the node to None 
        2. The node has a child: delete After the node, the father points to a suitable child 
        3 of the node. The node has two children: 
            (1) Find the node N to be deleted and its successor S (the next node after in-order traversal) 
            (2) Copy the key of S to N 
            (3) Delete the successor S from the right subtree of N (that is, the smallest one in the right subtree of N) "" 
        " 
        assert key in self, 'invalid map key' 
        self._root = self._bstRemove(self. _root, key) 
        self._size -= 1 
        return self._rval 

    def _bstRemove(self, subtree, target): 
        # search for the item in the tree 
        if subtree is None: 
            return subtree 
        elif target < subtree.key:
            subtree.left = self._bstRemove(subtree.left, target)
            return subtree
        elif target > subtree.key:
            subtree.right = self._bstRemove(subtree.right, target)
            return subtree

        else:    # found the node containing the item
            self._rval = subtree.value
            if subtree.left is None and subtree.right is None:
                # 叶子node
                return None
            elif subtree.left is None or subtree.right is None:
                # 有一个孩子节点
                if subtree.left is not None:
                    return subtree.left
                else:
                    return subtree.right 
            else: # There are two child nodes 
                successor = self._bstMinumum(subtree.right) 
                subtree.key = successor.key 
                subtree.value = successor.value 
                subtree.right = self._bstRemove(subtree.right, successor.key ) 
                return subtree 

    def __repr__(self): 
        return '->'.join([str(i) for i in self]) 

    def assert_keep_bst_property(self, subtree): 
        """ This function is written to verify that add and delete operations are always maintained The properties of bst """ 
        if subtree is None: 
            return 
        if subtree.left is not None and subtree.right is not None:
            assert subtree.left.value <= subtree.value
            assert subtree.right.value >= subtree.value
            self.assert_keep_bst_property(subtree.left)
            self.assert_keep_bst_property(subtree.right)

        elif subtree.left is None and subtree.right is not None:
            assert subtree.right.value >= subtree.value
            self.assert_keep_bst_property(subtree.right)

        elif subtree.left is not None and subtree.right is None:
            assert subtree.left.value <= subtree.value
            self.assert_keep_bst_property(subtree.left)


class _BSTMapIterator:
    def __init__(self, root, size):
        self._theKeys = Array(size)
        self._curItem = 0
        self._bstTraversal(root)
        self._curItem = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self._curItem < len(self._theKeys):
            key = self._theKeys[self._curItem]
            self._curItem += 1
            return key
        else:
            raise StopIteration

    def _bstTraversal(self, subtree):
        if subtree is not None:
            self._bstTraversal(subtree.left)
            self._theKeys[self._curItem] = subtree.key
            self._curItem += 1
            self._bstTraversal(subtree.right)


def test_BSTMap():
    l = [60, 25, 100, 35, 17, 80] 
    bst = BSTMap() 
    for i in l: 
        bst.add(i) 

def test_HashMap(): 
    """ Previously used to test the map implemented with hash, change Test for Map implemented in BST """ 
    # h = HashMap() 
    h = BSTMap() 
    assert len(h) == 0 
    h.add('a', 'a') 
    assert h.valueOf('a') == 'a' 
    assert len(h) == 1 

    a_v = h.remove('a') 
    assert a_v == 'a' 
    assert len(h) == 0 

    h.add('a', 'a') 
    h.add('b', 'b') 
    assert len(h) == 2 
    assert h.valueOf('b') == 'b' 
    b_v = h.remove('b')
    assert b_v == 'b'
    assert len(h) == 1
    h.remove('a')

    assert len(h) == 0

    _len = 10
    for i in range(_len):
        h.add(str(i), i)
    assert len(h) == _len
    for i in range(_len):
        assert str(i) in h
    for i in range(_len):
        print(len(h))
        print('bef', h)
        _ = h.remove(str(i))
        assert _ == i
        print('aft', h)
        print(len(h))
    assert len(h) == 0

test_HashMap()

Guess you like

Origin blog.csdn.net/gongdiwudu/article/details/118111542
Recommended