Original: https://blog.csdn.net/xiaofei2017/article/details/80924800
#encoding=utf-8 ''' @author: sea ''' import threading import os import sys from kafka import KafkaConsumer, TopicPartition, OffsetAndMetadata from consumers.db_util import * from consumers.json_dispose import * from collections import OrderedDict threads = [] # col_dic, sql_dic = get() class MyThread(threading.Thread): def __init__(self, thread_name, topic, partition): threading.Thread.__init__(self) self.thread_name = thread_name # self.keyName = keyName self.partition = partition self.topic = topic def run(self): print("Starting " + self.name) Consumer(self.thread_name, self.topic, self.partition) def stop(self): sys.exit() def Consumer(thread_name, topic, partition): broker_list = '172.16.90.63:6667, 172.16.90.58:6667, 172.16.90.59:6667' ' '' Fetch_min_bytes (int) - the minimum amount for the acquisition request data server returned, otherwise please wait fetch_max_wait_ms (int) - If there is not enough data to meet the requirements fetch_min_bytes given immediately, before responding server will fetch request blocked the maximum amount of time (in milliseconds) fetch_max_bytes (int) - acquisition request to the server to be the maximum amount of data returned. This is not the absolute maximum, if the first message is greater than this value for the first non-empty partition get in, it will return a message to ensure that consumers can make progress. Note: Users parallel execution of multiple agents of extraction, so the memory using the number of agents depends on the subject partition contains. Supported Kafka version> = 0.10.1.0. Default value: 52428800 (50 MB). enable_auto_commit (bool) - If True, the consumer will submit periodic offset in the background. Default value: True. max_poll_records (int) - a single call to return the maximum number of records poll (). Default value: 500 max_poll_interval_ms (int) - poll () call using the maximum delay between when the user group management. This sets an upper limit for the amount of time before consumers get more records can be idle. If poll () before the expiration of this timeout is not called, the user is considered a failure, and the group will be re-balanced in order to partition reassigned to another member. 300,000 default '' Consumer= KafkaConsumer(bootstrap_servers=broker_list, group_id="xiaofesi", client_id=thread_name, enable_auto_commit=False, fetch_min_bytes=1024*1024,#1M # fetch_max_bytes=1024 * 1024 * 1024 * 10, fetch_max_wait_ms=60000,#30s request_timeout_ms=305000, # consumer_timeout_ms=1, #5000 = max_poll_records, # max_poll_interval_ms = 60000 None This parameter ) # detect database last saved offset, this offset is already offset spending the last of the last offset + 1, that is, the consumption of the start bit dic = get_kafka (topic, Partition) TP = TopicPartition (topic, Partition) Print (the thread THREAD_NAME, TP, dic [ ' offset ' ]) # allocate the consumer TopicPartition, is the topic and partition, according to the parameters, I was three consumers, three threads, each partition consumer spending consumer.assign ([TP]) # reset start bit this consumer spending consumer.seek (TP, dic [ ' offset ' ]) Print ( "The program runs for the first time \ t thread: " , the thread THREAD_NAME, " Zoning: " , Partition, " Offset: " , dic [ ' offset ' ], " \ t start spending ... " ) NUM = 0 # records the consumer consumer number # end_offset = consumer.end_offsets ([TP]) [TP] # Print (end_offset) the while True: args = OrderedDict () msg = consumer.poll (timeout_ms = 60000 ) end_offset = consumer.end_offsets ([TP]) [ TP] Print ( 'Saved offset ' , consumer.committed (TP), ' the latest offset, ' , end_offset) IF len (msg)> 0: Print ( " thread: " , the thread THREAD_NAME, " Zoning: " , Partition, " maximum offset: " , end_offset, " presence or absence of data, " , len (MSG)) Lines = 0 for data in msg.values (): for Line in data: Lines +. 1 = Line = the eval (line.value.decode ( ' UTF-. 8 ' )) '' ' do something ' '' # thread in this Article the number of batches message Print (THREAD_NAME, " Lines " , Lines) # data saved to the database # is_succeed save_to_db = (args, the thread THREAD_NAME) is_succeed = True IF is_succeed: # update each topic stored in the database of their own, partition offset is_succeed1 = update_offset (topic, partition, end_offset) #Submit offset offsets manually format: {TopicPartition: OffsetAndMetadata (offset_num, None)} consumer.commit (= offsets {TP: (OffsetAndMetadata (end_offset, None))}) Print (THREAD_NAME, " to DB SUSS " , NUM +. 1 ) IF is_succeed1 == 0: # system exits? This did not try os.exit () '' ' sys.exit () can only exit the thread, that is to say the other two threads running, the main program does not quit ' '' the else : os.exit () the else : Print (THREAD_NAME, ' no data ' ) num+=1 print(thread_name,"第",num,"次") if __name__ == '__main__': try: t1 = MyThread("Thread-0", "test", 0) threads.append(t1) t2 = MyThread("Thread-1", "test", 1) threads.append(t2) t3 = MyThread("Thread-2", "test", 2) threads.append(t3) for t in threads: t.start() for t in threads: t.join() print("exit program with 0") except: print("Error: failed to run consumer program")