*illustrate
If you need to use this knowledge but don't have it, it will be frustrating and may lead to rejection from the interview. Whether you spend a few days "blitzing" or using fragmented time to continue learning, it is worthwhile to work on the data structure. So what data structures are there in Python? Lists, dictionaries, sets, and...stacks? Does Python have a stack? This series of articles will give detailed puzzle pieces.
Chapter 5: Searching and Sorting
Sorting and searching are the most basic and frequent operations. Python has built-in in operator and bisect binary operation module to implement search, and built-in sorted method to implement sorting operation. Two points and quick sorting are also often tested in interviews. This chapter talks about basic sorting and searching.
def binary_search(sorted_seq, val): """ Implement bisect.bisect_left in the standard library """ low = 0 high = len(sorted_seq) - 1 while low <= high: mid = (high + low) // 2 if sorted_seq[mid] == val: return mid elif val < sorted_seq[mid]: high = mid - 1 else: low = mid + 1 return low def bubble_sort(seq): # O(n^2), n(n- 1)/2 = 1/2(n^2 + n) n = len(seq) for i in range(n-1): for j in range(n-1-i): # The reason why n-1 is here It is also necessary to subtract i because the largest element in each round of bubbling will bubble to the end, and there is no need to compare if seq[j] > seq[j+1]: seq[j], seq[j+1] = seq[j+1], seq[j] """ seq[j] def select_sort(seq): can be seen as an improvement of bubbling. Each time we find the smallest element to exchange, only exchange is required in each round. Once """ n = len(seq) for i in range(n-1): min_idx = i # assume the ith element is the smallest for j in range(i+1, n): if seq[j] < seq [min_idx]: # find the minist element index min_idx = j if min_idx != i: # swap seq[i], seq[min_idx] = seq[min_idx], seq[i] def insertion_sort(seq): """ each Select the next element and insert it into the sorted array. Initially, the sorted array has only one element """ n = len(seq) for i in range(1, n): value = seq[i] # save the value to be positioned # find the position where value fits in the ordered part of the list pos = i while pos > 0 and value < seq[pos-1]: # Shift the items to the right during the search seq[pos] = seq[pos-1] pos -= 1 seq[pos] = value def merge_sorted_list(listA, listB): """ 归并两个有序数组 """ new_list = list() a = b = 0 while a < len(listA) and b < len(listB): if listA[a] < listB[b]: new_list.append(listA[a]) a += 1 else: new_list.append(listB[b]) b += 1 while a < len(listA): new_list.append(listA[a]) a += 1 while b < len(listB): new_list.append(listB[b]) b += 1 return new_list
Chapter 6: Linked Structure
List is the most commonly used data structure, but the efficiency of list will be very low when adding or subtracting elements in the middle. At this time, linked list will be more suitable. The disadvantage is that the average time complexity of obtaining elements becomes O(n)
# 单链表实现 class ListNode: def __init__(self, data): self.data = data self.next = None def travsersal(head, callback): curNode = head while curNode is not None: callback(curNode.data) curNode = curNode.next def unorderdSearch(head, target): curNode = head while curNode is not None and curNode.data != target: curNode = curNode.next return curNode is not None # Given the head pointer, prepend an item to an unsorted linked list. def prepend(head, item): newNode = ListNode(item) newNode.next = head head = newNode # Given the head reference, remove a target from a linked list def remove(head, target): predNode = None curNode = head while curNode is not None and curNode.data != target: # 寻找目标 predNode = curNode curNode = curNode.data if curNode is not None: if curNode is head: head = curNode.next else: predNode.next = curNode.next
Chapter 7: Stacks
The stack is also a data structure commonly used in computers. The stack is a last-in-first-out data structure. It can be understood as putting plates in a bucket. The one put in first will be pressed to the ground. When you take the plate, the last one will be put down. will be taken out first.
class Stack: """ Stack ADT, using a python list Stack() isEmpty() length() pop(): assert not empty peek(): assert not empty, return top of non-empty stack without removing it push(item) """ def __init__(self): self._items = list() def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._items) def peek(self): assert not self.isEmpty() return self._items[-1] def pop(self): assert not self.isEmpty() return self._items.pop() def push(self, item): self._items.append(item) class Stack: """ Stack ADT, use linked list It is very simple to implement using list, but if a large number of push operations are involved, the complexity will degrade to O(n) and linked list can guarantee that it is still O(1) in the worst case """ def __init__(self): self._top = None # top node, _StackNode or None self._size = 0 # int def isEmpty( self): return self._top is None def __len__(self): return self._size def peek(self): assert not self.isEmpty() return self._top.item def pop(self): assert not self.isEmpty( ) node = self._top self.top = self._top.next self._size -= 1 return node.item def _push(self, item): self._top = _StackNode(item, self._top) self._size += 1 class _StackNode: def __init__(self, item, link): self.item = item self.next = link
Chapter 8: Queues
Queues are also commonly used data structures, such as sending messages, etc. Celery can use the list provided by redis to implement message queues. In this chapter we use list and linked list to implement queues and priority queues.
class Queue: """ Queue ADT, use list. List implementation, simple but the worst push and pop efficiency is O(n) Queue() isEmpty() length() enqueue(item) dequeue() """ def __init__( self): self._qList = list() def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._qList) def enquee(self, item): self._qList.append (item) def dequeue(self): assert not self.isEmpty() return self._qList.pop(0) from array import Array #Array implemented in the Array chapter ADT class Queue: """ circular Array, implemented through head and tail pointers. The complexity of list's built-in append and pop will degrade. Using ring array implementation can make the time complexity of enqueue and dequeue operations O(1). The disadvantage is that the array length needs to be fixed. """ def __init__(self, maxSize): self._count = 0 self._front = 0 self._back = maxSize - 1 self._qArray = Array(maxSize) def isEmpty(self): return self._count == 0 def isFull(self): return self._count == len(self._qArray) def __len__(self): return len(self._count) def enqueue(self, item): assert not self.isFull() maxSize = len(self ._qArray) self._back = (self._back + 1) % maxSize # Move the tail pointer self._qArray[self._back] = item self._count += 1 def dequeue(self): assert not self.isFull() item = self._qArray[self._front] maxSize = len(self._qArray) self._front = (self._front + 1) % maxSize self._count -= 1 return item class _QueueNode: def __init__(self, item): self.item = item class Queue: """ Queue ADT, linked list implementation. In order to improve the environment The type array has a maximum number limit, so use a linked list with head and tail nodes instead. """ def __init__(self): self._qhead = None self._qtail = None self._qsize = 0 def isEmpty(self): return self._qhead is None def __len__(self): return self._count def enqueue(self, item): node = _QueueNode(item) # Create a new node and point the tail node to it if self.isEmpty (): self._qhead = node else: self._qtail.next = node self._qtail = node self._qcount += 1 def dequeue(self): assert not self.isEmpty(), 'Can not dequeue from an empty queue ' node = self._qhead if self._qhead is self._qtail: self._qtail = None self._qhead = self._qhead.next # Move the head node forward self._count -= 1 return node.item class UnboundedPriorityQueue: """ PriorityQueue ADT: Add priority p to each item. High priority is dequeueed first. There are two types: - bounded PriorityQueue: Limit the priority to a range [ 0...p) - unbounded PriorityQueue: Unlimited priority PriorityQueue() BPriorityQueue(numLevels): create a bounded PriorityQueue with priority in range [0, numLevels-1] isEmpty() length() enqueue(item, priority): If it is a bounded PriorityQueue, priority must be within the range dequeue(): the highest priority is dequeued, and those with the same priority are in FIFO order - two implementation methods: 1. When entering the queue, it is always at the end of the queue, and the dequeue operation is found Dequeue with the highest priority, the dequeue operation is O(n) 2. Always maintain the queue in order, find the insertion position every time you enter the queue, the dequeue operation is O(1) (Note that if you use list to implement list.append and pop operations, the complexity will be degraded due to memory allocation) """ from collections import namedtuple _PriorityQEntry = namedtuple('_PriorityQEntry', 'item, priority') # Use method 1, use built-in list to implement unbounded PriorityQueue def __init__(self): self._qlist = list() def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._qlist) def enqueue(self, item, priority ): entry = UnboundedPriorityQueue._PriorityQEntry(item, priority) self._qlist.append(entry) def deque(self): assert not self.isEmpty(), 'can not deque from an empty queue' Since the dequeue operation needs to traverse to find the item with the highest priority, it is an O(n) operation on average. However, for BoundedPriorityQueue, using a queue array can achieve constant time, exchanging space for time. For example, if you want to pop up an element, just find the first non-empty queue pop-up element. (Small numbers represent high priority and will be dequeued first) qlist [0] -> ["white"] [1] [2] -> ["black", "green"] [3] -> ["purple", "yellow"] """ # Implementation of the bounded Priority Queue ADT using an array of # # queues in which the queues are implemented using a linked list. from array import Array # 第二章定义的ADT def __init__(self, numLevels): self._qSize = 0 self._qLevels = Array(numLevels) for i in range(numLevels): self._qLevels[i] = Queue() # 上一节讲到用linked list实现的Queue def isEmpty(self): return len(self) == 0 def __len__(self): return len(self._qSize) def enqueue(self, item, priority): assert priority >= 0 and priority < len(self._qLevels), 'invalid priority' self._qLevel[priority].enque(item) # Directly find the slot corresponding to priority and enqueue def deque(self): assert not self.isEmpty (), 'can not deque from an empty queue' i = 0 p = len(self._qLevels) while i < p and not self._qLevels[i].isEmpty(): # Find the first non-empty queue i + = 1 return self._qLevels[i].dequeue()