pykafka基本生产消费常用api

pykafka基本生产消费常用api

生产者

  • 案例
#coding=utf-8

import time
from pykafka import KafkaClient


class KafkaTest(object):
    """
    测试kafka常用api
    """
    def __init__(self, host="192.168.237.129:9092"):
        self.host = host
        self.client = KafkaClient(hosts=self.host)

    def producer_partition(self):
        """
        生产者分区查看,主要查看生产消息时offset的变化
        :return:
        """        client = KafkaClient(hosts='192.168.66.194:9092', zookeeper_hosts='192.168.66.194:2181')
        topic = client.topics[b'PD_Topic']
        #topic = self.client.topics["test_topic".encode()]
        partitions = topic.partitions
        print (u"查看所有分区 {}".format(partitions))

        earliest_offset = topic.earliest_available_offsets()
        print(u"获取最早可用的offset {}".format(earliest_offset))

        # 生产消息之前看看offset
        last_offset = topic.latest_available_offsets()
        print(u"最近可用offset {}".format(last_offset))

        # 同步生产消息
        p = topic.get_producer(sync=True)
        p.produce(str(time.time()).encode())

        # 查看offset的变化
        last_offset = topic.latest_available_offsets()
        print(u"最近可用offset {}".format(last_offset))

    def producer_designated_partition(self):
        """
        往指定分区写消息,如果要控制打印到某个分区,
        需要在获取生产者的时候指定选区函数,
        并且在生产消息的时候额外指定一个key
        :return:
        """

        def assign_patition(pid, key):
            """
            指定特定分区, 这里测试写入第一个分区(id=0)
            :param pid: 为分区列表
            :param key:
            :return:
            """
            print("为消息分配partition {} {}".format(pid, key))
            return pid[0]

        topic = self.client.topics["test_topic".encode()]
        p = topic.get_producer(sync=True, partitioner=assign_patition)
        p.produce(str(time.time()).encode(), partition_key=b"partition_key_0")

    def async_produce_message(self):
        """
        异步生产消息,消息会被推到一个队列里面,
        另外一个线程会在队列中消息大小满足一个阈值(min_queued_messages)
        或到达一段时间(linger_ms)后统一发送,默认5s
        :return:
        """        client = KafkaClient(hosts='192.168.66.194:9092', zookeeper_hosts='192.168.66.194:2181')
        topic = client.topics[b'PD_Topic'] #topic = self.client.topics["kafka_test".encode()] last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) # 记录最初的偏移量 old_offset = last_offset[0].offset[0] p = topic.get_producer(sync=False, partitioner=lambda pid, key: pid[0]) p.produce(str(time.time()).encode()) s_time = time.time() while True: last_offset = topic.latest_available_offsets() print("最近可用offset {}".format(last_offset)) if last_offset[0].offset[0] != old_offset: e_time = time.time() print('cost time {}'.format(e_time-s_time)) break time.sleep(1) def get_produce_message_report(self): """ 查看异步发送消报告,默认会等待5s后才能获得报告 """
        client = KafkaClient(hosts='192.168.66.194:9092', zookeeper_hosts='192.168.66.194:2181')
        topic = client.topics[b'PD_Topic'] #topic = self.client.topics["kafka_test".encode()] last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) p = topic.get_producer(sync=False, delivery_reports=True, partitioner=lambda pid, key: pid[0]) p.produce(str(time.time()).encode()) s_time = time.time() delivery_report = p.get_delivery_report() e_time = time.time() print ('等待{}s, 递交报告{}'.format(e_time-s_time, delivery_report)) last_offset = topic.latest_available_offsets() print("最近的偏移量 offset {}".format(last_offset)) if __name__ == '__main__': kafka_ins = KafkaTest() # kafka_ins.producer_partition() # kafka_ins.producer_designated_partition() # kafka_ins.async_produce_message() kafka_ins.get_produce_message_report()
  • 注意要点: 多进程使用pykafka共享一个client,会造成只有进程能够正常的写入数据,如果使用了dliver_report(包括同步),会导致子进程彻底阻塞掉不可用

消费者

  • pykafka消费者分为simple和balanced两种 
    1. simple适用于需要消费指定分区且不需要自动的重分配(自定义)
    2. balanced自动分配则选择
  • 案例
#coding=utf-8

from pykafka import KafkaClient


class KafkaTest(object):
    def __init__(self, host="192.168.237.129:9092"):
        self.host = host
        self.client = KafkaClient(hosts=self.host)

    def simple_consumer(self, offset=0):
        """
        消费者指定消费
        :param offset:
        :return:
        """

        topic = self.client.topics["kafka_test".encode()]
        partitions = topic.partitions
        last_offset = topic.latest_available_offsets()
        print("最近可用offset {}".format(last_offset))  # 查看所有分区
        consumer = topic.get_simple_consumer(b"simple_consumer_group", partitions=[partitions[0]])  # 选择一个分区进行消费
        offset_list = consumer.held_offsets
        print("当前消费者分区offset情况{}".format(offset_list))  # 消费者拥有的分区offset的情况
        consumer.reset_offsets([(partitions[0], offset)])  # 设置offset
        msg = consumer.consume()
        print("消费 :{}".format(msg.value.decode()))
        msg = consumer.consume()
        print("消费 :{}".format(msg.value.decode()))
        msg = consumer.consume()
        print("消费 :{}".format(msg.value.decode()))
        offset = consumer.held_offsets
        print("当前消费者分区offset情况{}".format(offset)) # 3

    def balance_consumer(self, offset=0):
        """
        使用balance consumer去消费kafka
        :return:
        """
        topic = self.client.topics["kafka_test".encode()]
        # managed=True 设置后,使用新式reblance分区方法,不需要使用zk,而False是通过zk来实现reblance的需要使用zk
        consumer = topic.get_balanced_consumer(b"consumer_group_balanced2", managed=True)
        partitions = topic.partitions
        print("分区 {}".format(partitions))
        earliest_offsets = topic.earliest_available_offsets()
        print("最早可用offset {}".format(earliest_offsets))
        last_offsets = topic.latest_available_offsets()
        print("最近可用offset {}".format(last_offsets))
        offset = consumer.held_offsets
        print("当前消费者分区offset情况{}".format(offset))
        while True:
            msg = consumer.consume()
            offset = consumer.held_offsets
            print("{}, 当前消费者分区offset情况{}".format(msg.value.decode(), offset))

if __name__ == '__main__':
    kafka_ins = KafkaTest()
    # kafka_ins.simple_consumer()
    kafka_ins.balance_consumer()

猜你喜欢

转载自blog.csdn.net/qq_37279279/article/details/80264904