kafka python multithreading, manual submission

Original: https://blog.csdn.net/xiaofei2017/article/details/80924800

 

#encoding=utf-8
'''
@author: sea
'''
import threading
 
import os
import sys
from kafka import KafkaConsumer, TopicPartition, OffsetAndMetadata
 
from consumers.db_util import *
from consumers.json_dispose import *
from collections import OrderedDict
 
 
threads = []
# col_dic, sql_dic = get()
 
 
class MyThread(threading.Thread):
    def __init__(self, thread_name, topic, partition):
        threading.Thread.__init__(self)
        self.thread_name = thread_name
        # self.keyName = keyName
        self.partition = partition
        self.topic = topic
 
    def run(self):
        print("Starting " + self.name)
        Consumer(self.thread_name, self.topic, self.partition)
 
    def stop(self):
        sys.exit()
 
 
def Consumer(thread_name, topic, partition):
    broker_list = '172.16.90.63:6667, 172.16.90.58:6667, 172.16.90.59:6667' 
    ' '' 
    Fetch_min_bytes (int) - the minimum amount for the acquisition request data server returned, otherwise please wait 
    fetch_max_wait_ms (int) - If there is not enough data to meet the requirements fetch_min_bytes given immediately, before responding server will fetch request blocked the maximum amount of time (in milliseconds) 
    fetch_max_bytes (int) - acquisition request to the server to be the maximum amount of data returned. This is not the absolute maximum, if the first message is greater than this value for the first non-empty partition get in, 
                            it will return a message to ensure that consumers can make progress. Note: Users parallel execution of multiple agents of extraction, so the memory using the number of agents depends on the subject partition contains. 
                            Supported Kafka version> = 0.10.1.0. Default value: 52428800 (50 MB). 
    enable_auto_commit (bool) - If True, the consumer will submit periodic offset in the background. Default value: True. 
    max_poll_records (int) - a single call to return the maximum number of records poll (). Default value: 500 
    max_poll_interval_ms (int) - poll () call using the maximum delay between when the user group management. This sets an upper limit for the amount of time before consumers get more records can be idle. 
                                If poll () before the expiration of this timeout is not called, the user is considered a failure, and the group will be re-balanced in order to partition reassigned to another member. 300,000 default 
    '' 
    Consumer= KafkaConsumer(bootstrap_servers=broker_list,
                             group_id="xiaofesi",
                             client_id=thread_name,
                             enable_auto_commit=False,
                             fetch_min_bytes=1024*1024,#1M
                             # fetch_max_bytes=1024 * 1024 * 1024 * 10,
                             fetch_max_wait_ms=60000,#30s
                             request_timeout_ms=305000,
                             # consumer_timeout_ms=1,
                             #5000 = max_poll_records, 
                             # max_poll_interval_ms = 60000 None This parameter 
                             )
     # detect database last saved offset, this offset is already offset spending the last of the last offset + 1, that is, the consumption of the start bit 
    dic = get_kafka (topic, Partition) 
    TP = TopicPartition (topic, Partition)
     Print (the thread THREAD_NAME, TP, dic [ ' offset ' ])
     # allocate the consumer TopicPartition, is the topic and partition, according to the parameters, I was three consumers, three threads, each partition consumer spending 
    consumer.assign ([TP])
     # reset start bit this consumer spending 
    consumer.seek (TP, dic [ ' offset ' ])
     Print ( "The program runs for the first time \ t thread: " , the thread THREAD_NAME, " Zoning: " , Partition, " Offset: " , dic [ ' offset ' ], " \ t start spending ... " ) 
    NUM = 0 # records the consumer consumer number 
    # end_offset = consumer.end_offsets ([TP]) [TP] 
    # Print (end_offset) 
    the while True: 
        args = OrderedDict () 
        msg = consumer.poll (timeout_ms = 60000 ) 
        end_offset = consumer.end_offsets ([TP]) [ TP]
         Print ( 'Saved offset ' , consumer.committed (TP), ' the latest offset, ' , end_offset)
         IF len (msg)> 0:
             Print ( " thread: " , the thread THREAD_NAME, " Zoning: " , Partition, " maximum offset: " , end_offset, " presence or absence of data, " , len (MSG)) 
            Lines = 0
             for data in msg.values ():
                 for Line in data: 
                    Lines +. 1 =
                    Line = the eval (line.value.decode ( ' UTF-. 8 ' ))
                     '' ' 
                    do something 
                    ' '' 
            # thread in this Article the number of batches message 
            Print (THREAD_NAME, " Lines " , Lines)
             # data saved to the database 
            
#              is_succeed save_to_db = (args, the thread THREAD_NAME) 
            is_succeed = True
             IF is_succeed:
                 # update each topic stored in the database of their own, partition offset 
                is_succeed1 = update_offset (topic, partition, end_offset)
                 #Submit offset offsets manually format: {TopicPartition: OffsetAndMetadata (offset_num, None)} 
                consumer.commit (= offsets {TP: (OffsetAndMetadata (end_offset, None))})
                 Print (THREAD_NAME, " to DB SUSS " , NUM +. 1 )
                 IF is_succeed1 == 0:
                     # system exits? This did not try 
                    os.exit ()
                     '' ' 
                    sys.exit () can only exit the thread, that is to say the other two threads running, the main program does not quit 
                    ' '' 
            the else : 
                os.exit () 
        the else :
             Print (THREAD_NAME, ' no data ' )
        num+=1
        print(thread_name,"",num,"")
 
 
if __name__ == '__main__':
    try:
        t1 = MyThread("Thread-0", "test", 0)
        threads.append(t1)
        t2 = MyThread("Thread-1", "test", 1)
        threads.append(t2)
        t3 = MyThread("Thread-2", "test", 2)
        threads.append(t3)
 
        for t in threads:
            t.start()
 
        for t in threads:
            t.join()
 
        print("exit program with 0")
    except:
        print("Error: failed to run consumer program")

 

Guess you like

Origin www.cnblogs.com/lshan/p/11647485.html
Recommended