illustrate
If you need to use this knowledge but don't have it, it will be frustrating and may lead to rejection from the interview. Whether you spend a few days "blitzing" or using fragmented time to continue learning, it is worthwhile to work on the data structure. So what data structures are there in Python? Lists, dictionaries, sets, and...stacks? Does Python have a stack? This series of articles will give detailed puzzle pieces.
Chapter 13: Binary Tree
The binary Tree: Binary tree, each node has only two child nodes.
class _BinTreeNode: def __init__(self, data): self.data = data self.left = None self.right = None # 三种depth-first遍历 def preorderTrav(subtree): """ 先(根)序遍历""" if subtree is not None: print(subtree.data) preorderTrav(subtree.left) preorderTrav(subtree.right) def inorderTrav(subtree): """ 中(根)序遍历""" if subtree is not None: preorderTrav(subtree.left) print(subtree.data) preorderTrav(subtree.right) def postorderTrav(subtree): preorderTrav(subtree.left) """ Post (root) order traversal""" if subtree is not None: preorderTrav(subtree.right) print(subtree.data) # 宽度优先遍历(bradth-First Traversal): 一层一层遍历, 使用queue def breadthFirstTrav(bintree): from queue import Queue # py3 q = Queue() q.put(bintree) while not q.empty(): node = q.get() print(node.data) if node.left is not None: q.put(node.left) if node.right is not None: q.put(node.right) class _ExpTreeNode: __slots__ = ('element', 'left', 'right') def __init__(self, data): self.element = data self.left = None self.right = None def __repr__(self): return '<_ExpTreeNode: {} {} {}>'.format( self.element, self.left, self.right) from queue import Queue class ExpressionTree : """ Expression tree: A binary tree in which operators are stored in inner nodes and operands are stored in leaf nodes. (The symbol tree is really hard to type) * / \ + - / \ / \ 9 3 8 4 (9+3) * (8-4) Expression Tree Abstract Data Type can implement binary operators ExpressionTree(expStr): user string as constructor param evaluate(varDict): evaluates the expression and returns the numeric result toString(): constructs and retutns a string representation of the expression Usage : vars = {'a': 5, 'b': 12} if treeNode.left is None and treeNode.right is None: return str(treeNode.element) #The leaf node is the operand and returns directly else:12} expTree = ExpressionTree("(a/(b-3))") print('The result = ', expTree.evaluate(vars)) """ def __init__(self, expStr): self._expTree = None self._buildTree(expStr) def evaluate(self, varDict): return self._evalTree(self._expTree, varDict) def __str__(self): return self. _buildString(self._expTree) def _buildString(self, treeNode): """ Add parentheses before a subtree is traversed, and add closing parentheses after the subtree is traversed """ # print(treeNode) expStr = '(' expStr += self._buildString(treeNode.left) expStr += str(treeNode.element) expStr += self._buildString(treeNode.right) expStr += ')' return expStr def _evalTree(self, subtree, varDict ): # Is it a leaf node? If so, it means it is an operand and returns directly if subtree.left is None and subtree.right is None: # Is the operand a legal number if subtree.element >= '0' and subtree.element <= '9': return int(subtree.element) else: # The operand is a variable assert subtree.element in varDict, 'invalid variable.' return varDict[subtree.element] else:# operator evaluates its subexpressions lvalue = self._evalTree(subtree.left, varDict) rvalue = self._evalTree(subtree.right, varDict) print(subtree.element) return self._computeOp(lvalue, subtree.element,rvalue) def _computeOp(self, left, op, right): assert op op_func = { '+': lambda left, right: left + right, # or import operator, operator.add '-': lambda left, right: left - right, '*': lambda left, right: left * right, '/': lambda left, right: left / right, '%': lambda left, right: left % right, } return op_func[op](left, right) def _buildTree(self, expStr): expQ = Queue() for token in expStr: # 遍历表达式字符串的每个字符 expQ.put(token) self._expTree = _ExpTreeNode(None) # 创建root节点 self._recBuildTree(self._expTree, expQ) def _recBuildTree(self, curNode, expQ): token = expQ.get() if token == '(': curNode.left = _ExpTreeNode(None) self._recBuildTree(curNode.left, expQ) # next token will be an operator: + = * / % curNode.element = expQ.get() curNode.right = _ExpTreeNode(None) self._recBuildTree(curNode.right, expQ) # the next token will be ')', remmove it expQ.get() else: # the token is a digit that has to be converted to an int. curNode.element = token vars = {'a': 5, 'b': 12} expTree = ExpressionTree("((2*7)+8)") print(expTree) print('The result = ', expTree.evaluate(vars))
Heap: One of the most direct applications of binary trees is to implement heaps. The heap is a complete binary tree. The values of the non-leaf nodes in the largest heap are larger than the children, and the values of the non-leaf nodes in the smallest heap are smaller than the children. Python has a built-in heapq module to help us implement heap operations, such as using the built-in heapq module to implement heap sorting:
# Use python's built-in heapq to implement heap sort def heapsort(iterable): from heapq import heappush, heappop h = [] for value in iterable: heappush(h, value) return [heappop(h) for i in range(len(h) ))]
However, generally when implementing a heap, it is not actually implemented by counting nodes, but by using arrays, which is more efficient. Why can it be implemented with an array? Because of the nature of a complete binary tree, the relationship between subscripts can be used to represent the relationship between nodes. This has been explained in the docstring of MaxHeap
class MaxHeap: """ Heaps: Complete binary tree. The values of the non-leaf nodes of the maximum heap are larger than the children, and the values of the non-leaf nodes of the minimum heap are smaller than the children. Heap contains two properties, order property and shape property(a complete binary tree), when inserting a new node, always maintain these two attributes. Insertion operation: maintain the heap attributes and complete binary tree attributes, sift-up operation maintains the heap attributes. Extract operation: only obtain the root node data, and convert the tree After the bottom rightmost node is copied to the root node, the sift-down operation maintains the heap attribute. Use an array to implement the heap. Starting from the root node, number each node from top to bottom from left to right. According to the properties of a complete binary tree, Define a node i, and the numbers of its parent and child nodes are: parent = (i-1) // 2 left = 2 * i + 1 rgiht = 2 * i + 2 Using an array to implement a heap is more efficient and saves money The memory usage of tree nodes can also avoid complex pointer operations and reduce the difficulty of debugging. """ def __init__(self, maxSize): self._elements = Array(maxSize) # Array ADT implemented in Chapter 2 self._count = 0 def __len__(self): return self._count def capacity(self): return len(self._elements) def add(self, value): assert self._count < self.capacity(), 'can not add to full heap' self._elements[self._count] = value self._count += 1 self._siftUp(self._count - 1) self.assert_keep_heap() # 确定每一步add操作都保持堆属性 def extract(self): assert self._count > 0, 'can not extract from an empty heap' value = self._elements[0] # save root value self._count -= 1 self._elements[0] = self._elements[self._count] # 最右下的节点放到root后siftDown self._siftDown(0) self.assert_keep_heap() return value def _siftUp(self, ndx): if ndx > 0: parent = (ndx - 1) // 2 # print(ndx, parent) if self._elements[ndx] > self._elements[parent]: # swap self._elements[ndx], self._elements[parent] = self._elements[parent], self._elements[ndx] self._siftUp(parent) # 递归 def _siftDown(self, ndx): left = 2 * ndx + 1 right = 2 * ndx + 2 # determine which node contains the larger value largest = ndx if (left < self._count and self._elements[left] >= self._elements[largest] and self._elements[left] >= self._elements[right]): # This is not written in the original book. In fact, the one you are looking for may not be largest largest = left elif right < self._count and self._elements[right] >= self._elements[largest]: largest = right if largest != ndx: self._elements[ndx], self._elements[largest] = self._elements[largest], self._elements[ndx] self._siftDown(largest) def __repr__(self): return ' '.join(map(str, self._elements)) def assert_keep_heap(self): """ I added this function to Verify that after each add or extract, the property of the maximum heap is still maintained """ _len = len(self) for i in range(0, int((_len-1)/2)): # Internal node (non-leaf node) l = 2 * i + 1 r = 2 * i + 2 if l < _len and r < _len : assert self._elements[i] >= self._elements[l] and self._elements[i] >= self._elements[r] def test_MaxHeap(): """ Unit test case for maximum heap implementation""" _len = 10 h = MaxHeap(_len) for i in range(_len): h.add(i) h.assert_keep_heap() for i in range(_len): # Make sure that the largest number comes out every time, when adding assert h.extract() == _len-i-1 test_MaxHeap() def simpleHeapSort(theSeq): added from small to large : """ Use your own implementation of MaxHeap to implement heap sorting, and directly modify the original array to implement inplace sorting""" if not theSeq: return theSeq _len = len(theSeq) heap = MaxHeap(_len) for i in theSeq: heap.add(i) for i in reversed(range(_len)): theSeq[i] = heap.extract() return theSeq def test_simpleHeapSort(): """ Use some test cases to prove that the implemented heap sort can work""" def _is_sorted(seq): for i in range(len(seq)-1): if seq[i] > seq[i+1]: return False return True from random import randint assert simpleHeapSort([]) == [] for i in range(1000): _len = randint(1, 100) to_sort = [] for i in range(_len): to_sort.append(randint(0, 100)) simpleHeapSort(to_sort) # Note that in-place sorting is used here, directly changing the array assert _is_sorted(to_sort) test_simpleHeapSort()
Chapter 14: Search Trees
Binary difference tree finding properties: for each internal node V, 1. All keys smaller than V.key are stored in the left subtree of V. 2. All keys greater than V.key are stored in the right subtree of V. Performing an in-order traversal on the BST will result in an ascending key sequence.
class _BSTMapNode: __slots__ = ('key', 'value', 'left', 'right') def __init__(self, key, value): self.key = key self.value = value self.left = None self.right = None def __repr__(self): return '<{}:{}> left:{}, right:{}'.format( self.key, self.value, self.left, self.right) __str__ = __repr__ class BSTMap: """ BST, tree nodes contain key payloads. Use BST to implement the Map ADT previously implemented with hash. Properties: For each internal node V, 1. For node V, all keys less than V.key are stored In the left subtree of V. 2. All keys greater than V.key are stored in the right subtree of V. Performing in-order traversal on BST will get the ascending key sequence """ def __init__(self): self._root = None self._size = 0 self._rval = None # As the return value of remove def __len__(self): return self._size def __iter__(self): return _BSTMapIterator(self._root, self._size) def __contains__(self, key) : return self._bstSearch(self._root, key) is not None def valueOf(self, key): node = self._bstSearch(self._root, key) assert node is not None, 'Invalid map key.' return node. value def _bstSearch(self, subtree, target): if subtree is None: # Recursive exit, traverse to the bottom of the tree if no key is found or the tree is empty return None elif target < subtree.key: return self._bstSearch(subtree.left, target) elif target > subtree.key: return self._bstSearch(subtree.right, target) return subtree # Return reference def _bstMinumum(self, subtree): """ Follow the tree If subtree is None: return None elif subtree.left is None: return subtree else : return subtree._bstMinumum(self, subtree.left) def add( self, key, value): """ Add or replace the value of a key, O(N) """ node = self._bstSearch(self._root, key) if node is not None:# if key already exists, update value node.value = value return False else: # insert a new entry self._root = self._bstInsert(self._root, key, value) self._size += 1 return True def _bstInsert(self, subtree, key, value): """ New nodes are always inserted at the leaf nodes of the tree""" if subtree is None: subtree = _BSTMapNode(key, value) elif key < subtree.key: subtree.left = self._bstInsert(subtree.left , key, value) elif key > subtree.key: subtree.right = self._bstInsert(subtree.right, key, value) # Note that there is no else statement here. It should be judged whether there is one in the add function where it is called. Repeat key return subtree def remove(self, key): """ O(N) There are three types of deleted nodes: 1. Leaf node: directly set its father's pointer to the node to None 2. The node has a child: delete After the node, the father points to a suitable child 3 of the node. The node has two children: (1) Find the node N to be deleted and its successor S (the next node after in-order traversal) (2) Copy the key of S to N (3) Delete the successor S from the right subtree of N (that is, the smallest one in the right subtree of N) "" " assert key in self, 'invalid map key' self._root = self._bstRemove(self. _root, key) self._size -= 1 return self._rval def _bstRemove(self, subtree, target): # search for the item in the tree if subtree is None: return subtree elif target < subtree.key: subtree.left = self._bstRemove(subtree.left, target) return subtree elif target > subtree.key: subtree.right = self._bstRemove(subtree.right, target) return subtree else: # found the node containing the item self._rval = subtree.value if subtree.left is None and subtree.right is None: # 叶子node return None elif subtree.left is None or subtree.right is None: # 有一个孩子节点 if subtree.left is not None: return subtree.left else: return subtree.right else: # There are two child nodes successor = self._bstMinumum(subtree.right) subtree.key = successor.key subtree.value = successor.value subtree.right = self._bstRemove(subtree.right, successor.key ) return subtree def __repr__(self): return '->'.join([str(i) for i in self]) def assert_keep_bst_property(self, subtree): """ This function is written to verify that add and delete operations are always maintained The properties of bst """ if subtree is None: return if subtree.left is not None and subtree.right is not None: assert subtree.left.value <= subtree.value assert subtree.right.value >= subtree.value self.assert_keep_bst_property(subtree.left) self.assert_keep_bst_property(subtree.right) elif subtree.left is None and subtree.right is not None: assert subtree.right.value >= subtree.value self.assert_keep_bst_property(subtree.right) elif subtree.left is not None and subtree.right is None: assert subtree.left.value <= subtree.value self.assert_keep_bst_property(subtree.left) class _BSTMapIterator: def __init__(self, root, size): self._theKeys = Array(size) self._curItem = 0 self._bstTraversal(root) self._curItem = 0 def __iter__(self): return self def __next__(self): if self._curItem < len(self._theKeys): key = self._theKeys[self._curItem] self._curItem += 1 return key else: raise StopIteration def _bstTraversal(self, subtree): if subtree is not None: self._bstTraversal(subtree.left) self._theKeys[self._curItem] = subtree.key self._curItem += 1 self._bstTraversal(subtree.right) def test_BSTMap(): l = [60, 25, 100, 35, 17, 80] bst = BSTMap() for i in l: bst.add(i) def test_HashMap(): """ Previously used to test the map implemented with hash, change Test for Map implemented in BST """ # h = HashMap() h = BSTMap() assert len(h) == 0 h.add('a', 'a') assert h.valueOf('a') == 'a' assert len(h) == 1 a_v = h.remove('a') assert a_v == 'a' assert len(h) == 0 h.add('a', 'a') h.add('b', 'b') assert len(h) == 2 assert h.valueOf('b') == 'b' b_v = h.remove('b') assert b_v == 'b' assert len(h) == 1 h.remove('a') assert len(h) == 0 _len = 10 for i in range(_len): h.add(str(i), i) assert len(h) == _len for i in range(_len): assert str(i) in h for i in range(_len): print(len(h)) print('bef', h) _ = h.remove(str(i)) assert _ == i print('aft', h) print(len(h)) assert len(h) == 0 test_HashMap()