我使用的kafka版本是:0.7.2

jdk版本是:1.6.0_20

http://kafka.apache.org/07/quickstart.html官方给的示例并不是很完整,以下代码是经过我补充的并且编译后能运行的。

Producer Code


1. import java.util.*;  
2. import kafka.message.Message;  
3. import kafka.producer.ProducerConfig;  
4. import kafka.javaapi.producer.Producer;  
5. import kafka.javaapi.producer.ProducerData;  
6.   
7. public class ProducerSample {  
8.   
9.   
10. public static void main(String[] args) {  
11. new ProducerSample();  
12.   
13. new Properties();  
14. "zk.connect", "127.0.0.1:2181");  
15. "serializer.class", "kafka.serializer.StringEncoder");  
16.   
17. new ProducerConfig(props);  
18. new Producer<String, String>(config);  
19. new ProducerData<String, String>("test-topic", "test-message2");  
20.         producer.send(data);  
21.         producer.close();  
22.     }  
23. }



Consumer Code



1. import java.nio.ByteBuffer;  
2. import java.util.HashMap;  
3. import java.util.List;  
4. import java.util.Map;  
5. import java.util.Properties;  
6. import java.util.concurrent.ExecutorService;  
7. import java.util.concurrent.Executors;  
8. import kafka.consumer.Consumer;  
9. import kafka.consumer.ConsumerConfig;  
10. import kafka.consumer.KafkaStream;  
11. import kafka.javaapi.consumer.ConsumerConnector;  
12. import kafka.message.Message;  
13. import kafka.message.MessageAndMetadata;  
14.   
15. public class ConsumerSample {  
16.   
17. public static void main(String[] args) {  
18. // specify some consumer properties  
19. new Properties();  
20. "zk.connect", "localhost:2181");  
21. "zk.connectiontimeout.ms", "1000000");  
22. "groupid", "test_group");  
23.   
24. // Create the connection to the cluster  
25. new ConsumerConfig(props);  
26.         ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(consumerConfig);  
27.   
28. // create 4 partitions of the stream for topic “test-topic”, to allow 4 threads to consume  
29. new HashMap<String, Integer>();  
30. "test-topic", 4);  
31.         Map<String, List<KafkaStream<Message>>> topicMessageStreams =  
32.                 consumerConnector.createMessageStreams(map);  
33. "test-topic");  
34.   
35. // create list of 4 threads to consume from each of the partitions   
36. 4);  
37.   
38. // consume the messages in the threads  
39. for (final KafkaStream<Message> stream : streams) {  
40. new Runnable() {  
41. public void run() {  
42. for (MessageAndMetadata msgAndMetadata : stream) {  
43. // process message (msgAndMetadata.message())  
44. "topic: " + msgAndMetadata.topic());  
45.                         Message message = (Message) msgAndMetadata.message();  
46.                         ByteBuffer buffer = message.payload();  
47. "white-space:pre">  </span>byte[] bytes = new byte[message.payloadSize()];  
48.                         buffer.get(bytes);  
49. new String(bytes);  
50. "message content: " + tmp);  
51.                     }  
52.                 }  
53.             });  
54.         }  
55.   
56.     }  
57. }


分别启动zookeeper,kafka server之后,依次运行Producer,Consumer的代码


运行ProducerSample:


kafka 示例代码_java

运行ConsumerSample:


kafka 示例代码_zookeeper_02

由于本人不熟悉java的多线程,将官方给的Consumer Code做点小改动,如下所示:


   
 
   
 
 
1. import java.nio.ByteBuffer;  
2. import java.util.HashMap;  
3. import java.util.List;  
4. import java.util.Map;  
5. import java.util.Properties;  
6. import kafka.consumer.Consumer;  
7. import kafka.consumer.ConsumerConfig;  
8. import kafka.consumer.KafkaStream;  
9. import kafka.javaapi.consumer.ConsumerConnector;  
10. import kafka.message.Message;  
11. import kafka.message.MessageAndMetadata;  
12.   
13. public class ConsumerSample2 {  
14.   
15. public static void main(String[] args) {  
16. // specify some consumer properties  
17. new Properties();  
18. "zk.connect", "localhost:2181");  
19. "zk.connectiontimeout.ms", "1000000");  
20. "groupid", "test_group");  
21.   
22. // Create the connection to the cluster  
23. new ConsumerConfig(props);  
24.         ConsumerConnector consumerConnector = Consumer.createJavaConsumerConnector(consumerConfig);  
25.   
26. new HashMap<String, Integer>();  
27. "test-topic", 1);  
28.         Map<String, List<KafkaStream<Message>>> topicMessageStreams =  
29.                 consumerConnector.createMessageStreams(map);  
30. "test-topic");  
31.   
32. for (final KafkaStream<Message> stream : streams) {  
33. for (MessageAndMetadata msgAndMetadata : stream) {  
34. // process message (msgAndMetadata.message())  
35. "topic: " + msgAndMetadata.topic());  
36.                 Message message = (Message) msgAndMetadata.message();  
37.                 ByteBuffer buffer = message.payload();  
38. byte[] bytes = new byte[message.payloadSize()];  
39.                 buffer.get(bytes);  
40. new String(bytes);  
41. "message content: " + tmp);  
42.             }  
43.         }</strong>  
44.     }  
45. }


我在Producer端又发送了一条“test-message2”的消息,Consumer收到了两条消息,如下所示:



kafka 示例代码_zookeeper_03

kafka作为分布式日志收集或系统监控服务,我们有必要在合适的场合使用它。kafka的部署包括zookeeper环境/kafka环境,同时还需要进行一些配置操作.接下来介绍如何使用kafka.

    我们使用3个zookeeper实例构建zk集群,使用2个kafka broker构建kafka集群.

    其中kafka为0.8V,zookeeper为3.4.5V

 

一.Zookeeper集群构建

    我们有3个zk实例,分别为zk-0,zk-1,zk-2;如果你仅仅是测试使用,可以使用1个zk实例.

    1) zk-0

    调整配置文件:

1. clientPort=2181  
2. server.0=127.0.0.1:2888:3888  
3. server.1=127.0.0.1:2889:3889  
4. server.2=127.0.0.1:2890:3890  
5. ##只需要修改上述配置,其他配置保留默认值

    启动zookeeper

1. ./zkServer.sh start

    2) zk-1

    调整配置文件(其他配置和zk-0一只):


1. clientPort=2182  
2. ##只需要修改上述配置,其他配置保留默认值

    启动zookeeper

 

1. ./zkServer.sh start

    3) zk-2

    调整配置文件(其他配置和zk-0一只):

1. clientPort=2183  
2. ##只需要修改上述配置,其他配置保留默认值

    启动zookeeper

 

1. ./zkServer.sh start

  

二. Kafka集群构建

    因为Broker配置文件涉及到zookeeper的相关约定,因此我们先展示broker配置文件.我们使用2个kafka broker来构建这个集群环境,分别为kafka-0,kafka-1.

    1) kafka-0

    在config目录下修改配置文件为:

1. broker.id=0
2. port=9092
3. num.network.threads=2
4. num.io.threads=2
5. socket.send.buffer.bytes=1048576
6. socket.receive.buffer.bytes=1048576
7. socket.request.max.bytes=104857600
8. log.dir=./logs  
9. num.partitinotallow=2
10. log.flush.interval.messages=10000
11. log.flush.interval.ms=1000
12. log.retention.hours=168
13. #log.retention.bytes=1073741824
14. log.segment.bytes=536870912
15. ##replication机制,让每个topic的partitions在kafka-cluster中备份2个  
16. ##用来提高cluster的容错能力..  
17. default.replication.factor=1
18. log.cleanup.interval.mins=10
19. zookeeper.cnotallow=127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183
20. zookeeper.connection.timeout.ms=1000000

    因为kafka用scala语言编写,因此运行kafka需要首先准备scala相关环境。

1. > cd kafka-0
2. > ./sbt update  
3. > ./sbt package
4. > ./sbt assembly-package-dependency

    其中最后一条指令执行有可能出现异常,暂且不管。 启动kafka broker:

1. > JMS_PORT=9997

    因为zookeeper环境已经正常运行了,我们无需通过kafka来挂载启动zookeeper.如果你的一台机器上部署了多个kafka broker,你需要声明JMS_PORT.

    2) kafka-1

1. broker.id=1
2. port=9093
3. ##其他配置和kafka-0保持一致

    然后和kafka-0一样执行打包命令,然后启动此broker.

1. > JMS_PORT=9998


    仍然可以通过如下指令查看topic的"partition"/"replicas"的分布和存活情况.

1. > bin/kafka-list-topic.sh --zookeeper localhost:2181
2. topic: my-replicated-topic  partition: 0    leader: 2   replicas: 1,2,0 isr: 2
3. topic: test partition: 0    leader: 0   replicas: 0 isr: 0

    到目前为止环境已经OK了,那我们就开始展示编程实例吧。[配置参数详解]

 

三.项目准备

    项目基于maven构建,不得不说kafka java客户端实在是太糟糕了;构建环境会遇到很多麻烦。建议参考如下pom.xml;其中各个依赖包必须版本协调一致。如果kafka client的版本和kafka server的版本不一致,将会有很多异常,比如"broker id not exists"等;因为kafka从0.7升级到0.8之后(正名为2.8.0),client与server通讯的protocol已经改变.

1. <dependencies>  
2.     <dependency>  
3.         <groupId>log4j</groupId>  
4.         <artifactId>log4j</artifactId>  
5. 1.2.14</version>  
6.     </dependency>  
7.     <dependency>  
8.         <groupId>org.apache.kafka</groupId>  
9. 8.2</artifactId>  
10. 0.8.0</version>  
11.         <exclusions>  
12.             <exclusion>  
13.                 <groupId>log4j</groupId>  
14.                 <artifactId>log4j</artifactId>  
15.             </exclusion>  
16.         </exclusions>  
17.     </dependency>  
18.     <dependency>  
19.         <groupId>org.scala-lang</groupId>  
20.         <artifactId>scala-library</artifactId>  
21. 2.8.2</version>  
22.     </dependency>  
23.     <dependency>  
24.         <groupId>com.yammer.metrics</groupId>  
25.         <artifactId>metrics-core</artifactId>  
26. 2.2.0</version>  
27.     </dependency>  
28.     <dependency>  
29.         <groupId>com.101tec</groupId>  
30.         <artifactId>zkclient</artifactId>  
31. 0.3</version>  
32.     </dependency>  
33. </dependencies>

 

四.Producer端代码

    1) producer.properties文件:此文件放在/resources目录下

1. #partitioner.class=  
2. ##broker列表可以为kafka server的子集,因为producer需要从broker中获取metadata  
3. ##尽管每个broker都可以提供metadata,此处还是建议,将所有broker都列举出来  
4. metadata.broker.list=127.0.0.1:9092,127.0.0.1:9093
5. ##,127.0.0.1:9093
6. ##同步,建议为async  
7. producer.type=sync  
8. compression.codec=0
9. serializer.class=kafka.serializer.StringEncoder  
10. ##在producer.type=async时有效  
11. #batch.num.messages=100


    2) LogProducer.java代码样例

1. package
2.   
3. import
4. import
5. import
6. import
7.   
8. import
9. import
10. import
11. public class
12.   
13. private
14. public LogProducer() throws
15. new
16. "producer.properties"));  
17. new
18. new
19.     }  
20.   
21.       
22. public void
23. if(topicName == null || message == null){  
24. return;  
25.         }  
26. new KeyedMessage<String, String>(topicName,message);//如果具有多个partitions,请使用new KeyedMessage(String topicName,K key,V value).
27.         inner.send(km);  
28.     }  
29.       
30. public void
31. if(topicName == null || messages == null){  
32. return;  
33.         }  
34. if(messages.isEmpty()){  
35. return;  
36.         }  
37. new
38. for(String entry : messages){  
39. new
40.             kms.add(km);  
41.         }  
42.         inner.send(kms);  
43.     }  
44.       
45. public void
46.         inner.close();  
47.     }  
48.       
49. /**
50.      * @param args
51.      */
52. public static void
53. null;  
54. try{  
55. new
56. int i=0;  
57. while(true){  
58. "test-topic", "this is a sample"
59.                 i++;  
60. 2000);  
61.             }  
62. catch(Exception e){  
63.             e.printStackTrace();  
64. finally{  
65. if(producer != null){  
66.                 producer.close();  
67.             }  
68.         }  
69.   
70.     }  
71.   
72. }

 

五.Consumer端

     1) consumer.properties:文件位于/resources目录下

1. zookeeper.cnotallow=127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183
2. ##,127.0.0.1:2182,127.0.0.1:2183
3. # timeout in ms for
4. zookeeper.connectiontimeout.ms=1000000
5. #consumer group id  
6. group.id=test-group  
7. #consumer timeout  
8. #consumer.timeout.ms=5000
9. auto.commit.enable=true
10. auto.commit.interval.ms=60000

    2) LogConsumer.java代码样例

1. package
2.   
3. import
4. import
5. import
6. import
7. import
8. import
9.   
10. import
11. import
12. import
13. import
14. import
15. import
16. public class
17.   
18. private
19. private
20. private int
21. private
22. private
23. private
24. public LogConsumer(String topic,int partitionsNum,MessageExecutor executor) throws
25. new
26. "consumer.properties"));  
27. new
28. this.topic = topic;  
29. this.partitionsNum = partitionsNum;  
30. this.executor = executor;  
31.     }  
32.       
33. public void start() throws
34.         connector = Consumer.createJavaConsumerConnector(config);  
35. new
36.         topics.put(topic, partitionsNum);  
37. byte[], byte[]>>> streams = connector.createMessageStreams(topics);  
38. byte[], byte[]>> partitions = streams.get(topic);  
39.         threadPool = Executors.newFixedThreadPool(partitionsNum);  
40. for(KafkaStream<byte[], byte[]> partition : partitions){  
41. new
42.         }   
43.     }  
44.   
45.           
46. public void
47. try{  
48.             threadPool.shutdownNow();  
49. catch(Exception e){  
50. //
51. finally{  
52.             connector.shutdown();  
53.         }  
54.           
55.     }  
56.       
57. class MessageRunner implements
58. private KafkaStream<byte[], byte[]> partition;  
59.           
60. byte[], byte[]> partition) {  
61. this.partition = partition;  
62.         }  
63.           
64. public void
65. byte[], byte[]> it = partition.iterator();  
66. while(it.hasNext()){  
67. //connector.commitOffsets();手动提交offset,当autocommit.enable=false时使用
68. byte[],byte[]> item = it.next();  
69. "partiton:"
70. "offset:"
71. new String(item.message()));//UTF-8,注意异常
72.             }  
73.         }  
74.     }  
75.       
76. interface
77.           
78. public void
79.     }  
80.       
81. /**
82.      * @param args
83.      */
84. public static void
85. null;  
86. try{  
87. new
88.                   
89. public void
90.                     System.out.println(message);  
91.                       
92.                 }  
93.             };  
94. new LogConsumer("test-topic", 2, executor);  
95.             consumer.start();  
96. catch(Exception e){  
97.             e.printStackTrace();  
98. finally{  
99. //          if(consumer != null){
100. //              consumer.close();
101. //          }
102.         }  
103.   
104.     }  
105.   
106. }

    需要提醒的是,上述LogConsumer类中,没有太多的关注异常情况,必须在MessageExecutor.execute()方法中抛出异常时的情况.

    在测试时,建议优先启动consumer,然后再启动producer,这样可以实时的观测到最新的消息。

转载:http://kafka.apache.org/downloads.html