使用kafka我们需要先安装zookeeper、kafka其中zookeeper的安装可以参考这篇文章:点击打开链接,而kafka的安装很简单,只需要解压缩包就可以了,所以这里就不说了,如果不会就google、百度!!!

废话少说来看代码:

一、Producer(消息生产者)

package com.kafka.test.test1;
import java.util.Properties; 
import kafka.javaapi.producer.Producer; 
import kafka.producer.KeyedMessage; 
import kafka.producer.ProducerConfig; 
import kafka.serializer.StringEncoder;
@SuppressWarnings("deprecation")
public class KafkaProducer {
	 public static void testProducer() {
	        Properties props = new Properties();
	        //kafka实例的列表
	        props.put("metadata.broker.list", "127.0.0.1:9092,127.0.0.1:9093,127.0.0.1:9094");
	        props.put("serializer.class", StringEncoder.class.getName());
	        props.put("key.serializer.class", "kafka.serializer.StringEncoder");
	        //这个是计算partition分区的算法,要么默认要么自己实现partition来重写
			props.put("partitioner.class", "com.kafka.test.test1.JavaKafkaProducerPartitioner");
			// 值为0,1,-1,可以参考
			// http://kafka.apache.org/08/configuration.html
	        //0: 不论写入是否成功,server不需要给Producer发送Response,如果发生异常,server会终止连接,触发Producer更新meta数据;
	        //1: Leader写入成功后即发送Response,此种情况如果Leader fail,会丢失数据
	        //-1: 等待所有ISR接收到消息后再给Producer发送Response,这是最强保证 
	        props.put("request.required.acks", "1");
	        //压缩配置:0不压缩、 1用gzip压缩、2用snappy压缩
	        props.put("compression.codec","2");
	        //要压缩的主题,即对那些消息要进行压缩
	        props.put("compressed.topics","test8");
	        //同步、异步:sync同步 、async异步 ,使用异步发送速度快很多
	        //个人大体测试了一下使用同步和异步速度绝对不在一个级别,有兴趣的可以自己测测!!!
	        props.put("producer.type", "async");
	        //在异步的情况下添加如下配置:
	        //缓存
	        props.put("queue.buffering.max.ms", "5000");
	        //消息条数
	        props.put("queue.buffering.max.messages", "2000");
	        //如果是异步指定每次批量发送的数据量默认200
	        props.put("batch.num.messages", "500");
	        //消息异常后的信息
	        props.put("queue.enqueue.timeout.ms", "-1");
            //消息生产者配置
			ProducerConfig config = new ProducerConfig(props);
            //按照消息生产者配置创建消息生产者
	        Producer<String, String> producer = new Producer<String, String>(config);
	        //消息
	        int i=0;
	        long  begin = System.currentTimeMillis();
	        while(true) {
	        	 StringBuffer msg = new StringBuffer("hello world 8 hello kafka test");
	        	 msg.append(i);
	 	        //消息的创建,创建的时候指定了topic的name,具体分配到topic下的那个partition(分区)是controller按照给的算法来决定的
	 	        KeyedMessage<String, String> data = new KeyedMessage<String, String>("test8", msg.toString());
	 	        //发送消息
	 	        producer.send(data);
	 	        i++;
	 	        if(i >=320000)
	 	        	break;
	        }
	        long  end = System.currentTimeMillis();
	        long total =(end-begin)/1000;
	        System.out.println("耗时:"+total);
	        //释放资源
	        producer.close();
	    }
	    public static void main(String[] args) {
	        testProducer();
	    }
}

二、Partition(消息生产者选择Topic的partition(分区)的算法)

package com.kafka.test.test1;

import kafka.producer.Partitioner;
import kafka.utils.VerifiableProperties;

/**
 * producer按照自己的算法来决定将信息发送到那个partitions(分区)
 * @author Administrator
 *
 */
public class JavaKafkaProducerPartitioner implements Partitioner {
	 /**
     * 无参构造函数
     */
    public JavaKafkaProducerPartitioner() {
        this(new VerifiableProperties());
    }

    /**
     * 构造函数,必须给定
     *
     * @param properties 上下文
     */
    public JavaKafkaProducerPartitioner(VerifiableProperties properties) {
        // nothings
    }

    @Override
    public int partition(Object key, int numPartitions) {
        int num = Integer.valueOf(((String) key).replaceAll("key_", "").trim());
        return num % numPartitions;
    }
}

三、Consumer(消息消费者)

package com.kafka.test.test1;
	import java.io.UnsupportedEncodingException;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;
	import java.util.Properties;
	import java.util.concurrent.ExecutorService;
	import java.util.concurrent.Executors;
	import kafka.consumer.ConsumerConfig;
	import kafka.consumer.ConsumerIterator;
	import kafka.consumer.KafkaStream;
	import kafka.javaapi.consumer.ConsumerConnector;
	public class Consumers {
		private final ConsumerConnector consumer;
		private final String topic;
		private ExecutorService executor; // 任务具体执行放到一个线程执行中
		public Consumers(String a_zookeeper, String a_groupId, String a_topic) {
			this.consumer = kafka.consumer.Consumer
					.createJavaConsumerConnector(createConsumerConfig(a_zookeeper,
							a_groupId));
			this.topic = a_topic;
		}
		private static ConsumerConfig createConsumerConfig(String a_zookeeper,
				String a_groupId) {
			Properties props = new Properties();
			props.put("zookeeper.connect", a_zookeeper);  //zookeeper集群地址
			props.put("group.id", a_groupId); //消费者组
			props.put("zookeeper.session.timeout.ms", "1000"); //zookeeper session的有效期,如果1000之内kafka没有请求则会话失效
			props.put("zookeeper.sync.time.ms", "1000"); 	// ms和zookeeper同步时间
			props.put("auto.commit.interval.ms", "1000"); //自动提交数据到zookeeper的时间间隔
			props.put("auto.offset.reset", "smallest");  //自动偏移复位
			props.put("serializer.class", "kafka.serializer.StringEncoder"); 
			props.put("queued.max.message.chunks", "50");  //最大取多少块缓存到消费者(默认10)
			props.put("rebalance.max.retries", "5");  //此值用于控制,注册节点的重试次数
			props.put("fetch.min.bytes", "6553600");  //每次feth将得到多条消息,此值为总大小,提升此值,将会消耗更多的consumer端内存
			props.put("fetch.wait.max.ms", "5000");  //当消息的尺寸不足时,server阻塞的时间,如果超时,消息将立即发送给consumer
			return new ConsumerConfig(props);
		}
		public void shutdown() {
			if (consumer != null)
				consumer.shutdown();
			if (executor != null)
				executor.shutdown();
		}
		public void run(int a_numThreads) {
			//创建并发的consumers
			Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
			//描述读取哪个topic,需要几个线程读
			topicCountMap.put(topic, new Integer(a_numThreads));
			//创建Streams
			Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap = consumer
					.createMessageStreams(topicCountMap);
			List<KafkaStream<byte[], byte[]>> streams = consumerMap.get(topic);
			System.out.println("streams.size = " + streams.size());
			executor = Executors.newFixedThreadPool(a_numThreads);
			int threadNumber = 0;
			for (final KafkaStream stream : streams) {
				executor.submit(new ConsumerTest(stream, threadNumber));
				threadNumber++;
			}
		}
		public class ConsumerTest implements Runnable {
			private KafkaStream m_stream;
			private int m_threadNumber;
			public ConsumerTest(KafkaStream a_stream, int a_threadNumber) {
				m_threadNumber = a_threadNumber;
				m_stream = a_stream;
			}
			public void run() {
				//一个线程一个KafkaStream对象
				//一个kafkaStream对象中包含多条消息即Message(consumer可以设置每次读取的数据块的大小,个人感觉也可以理解为一个kafkaStream对象)
				//而iterator就是对kafkaStream中的message的迭代
				ConsumerIterator<byte[], byte[]> it = m_stream.iterator();
				while (it.hasNext()) {
					byte [] bb = it.next().message();
					try {
						String str = new String(bb,"utf-8");
						System.out.println("消息:"+str);
					} catch (UnsupportedEncodingException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
				}
				System.out.println("Shutting down Thread: " + m_threadNumber);
			}
		}
		public static void main(String[] args) {
			String zooKeeper = "127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183";
			String groupId = "group2";
			String topic = "test8";
			int threads = 3;
			Consumers example = new Consumers(zooKeeper, groupId, topic);
			example.run(threads);
		}
	}

server.properties 配置文件,这里配置的不是很全,因为有很多用了默认的配置,只是简单列出来看看而已,具体配置自己按照之前的配置属性列表和业务需求进行修改

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# see kafka.server.KafkaConfig for additional details and defaults

############################# Server Basics #############################

# The id of the broker. This must be set to a unique integer for each broker.
broker.id=0
port=9092
#如果要做集群则host.name是必须有的
host.name=127.0.0.1
# Switch to enable topic deletion or not, default value is false
#delete.topic.enable=true

############################# Socket Server Settings #############################

# The address the socket server listens on. It will get the value returned from 
# java.net.InetAddress.getCanonicalHostName() if not configured.
#   FORMAT:
#     listeners = listener_name://host_name:port
#   EXAMPLE:
#     listeners = PLAINTEXT://your.host.name:9092
#listeners=PLAINTEXT://:9094

# Hostname and port the broker will advertise to producers and consumers. If not set, 
# it uses the value for "listeners" if configured.  Otherwise, it will use the value
# returned from java.net.InetAddress.getCanonicalHostName().
#advertised.listeners=PLAINTEXT://your.host.name:9094

# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details
#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL

# The number of threads handling network requests
num.network.threads=3

# The number of threads doing disk I/O
num.io.threads=8

# The send buffer (SO_SNDBUF) used by the socket server
socket.send.buffer.bytes=102400

# The receive buffer (SO_RCVBUF) used by the socket server
socket.receive.buffer.bytes=102400

# The maximum size of a request that the socket server will accept (protection against OOM)
socket.request.max.bytes=104857600


############################# Log Basics #############################

# A comma seperated list of directories under which to store log files
#log.dirs=/tmp/kafka-logs
log.dirs=D:\kafka\kafka_2.10\log

# The default number of log partitions per topic. More partitions allow greater
# parallelism for consumption, but this will also result in more files across
# the brokers.
num.partitions=3

# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
# This value is recommended to be increased for installations with data dirs located in RAID array.
num.recovery.threads.per.data.dir=1

############################# Log Flush Policy #############################

# Messages are immediately written to the filesystem but by default we only fsync() to sync
# the OS cache lazily. The following configurations control the flush of data to disk.
# There are a few important trade-offs here:
#    1. Durability: Unflushed data may be lost if you are not using replication.
#    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
#    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks.
# The settings below allow one to configure the flush policy to flush data after a period of time or
# every N messages (or both). This can be done globally and overridden on a per-topic basis.

# The number of messages to accept before forcing a flush of data to disk
#log.flush.interval.messages=10000

# The maximum amount of time a message can sit in a log before we force a flush
#log.flush.interval.ms=1000

############################# Log Retention Policy #############################

# The following configurations control the disposal of log segments. The policy can
# be set to delete segments after a period of time, or after a given size has accumulated.
# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
# from the end of the log.

# The minimum age of a log file to be eligible for deletion due to age
log.retention.hours=168

# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
# segments don't drop below log.retention.bytes. Functions independently of log.retention.hours.
#log.retention.bytes=1073741824f

# The maximum size of a log segment file. When this size is reached a new log segment will be created.
log.segment.bytes=1073741824

# The interval at which log segments are checked to see if they can be deleted according
# to the retention policies
log.retention.check.interval.ms=300000

############################# Zookeeper #############################

# Zookeeper connection string (see zookeeper docs for details).
# This is a comma separated host:port pairs, each corresponding to a zk
# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
# You can also append an optional chroot string to the urls to specify the
# root directory for all kafka znodes.
#zookeeper.connect=127.0.0.1:2181
zookeeper.connect=127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183
# Timeout in ms for connecting to zookeeper
zookeeper.connection.timeout.ms=10000
#partiton分区的备份的线程数
num.replica.fetchers=2

简单描述:

这里使用了本地的集群,其中zookeeper使用本地的三个服务做了本机伪集群而kafka也是本地开启三个服务,具体安装就不在这里阐述了,可以看文章开始给的链接!!!