1. 编写Schema
1.1 student.proto
1. package protobuf;
2. option java_package = "com.topsec.trd";
3. option java_outer_classname = "StudentProto";
4. message Student {
5. 1;
6. 2;
7. 3;
8. 4;
9. 5;
10.
11. }
1.2 studnet.avsc
1. namespace java com.topsec.trd
2. struct StudentThrift {
3. 1: string name,
4. 2: i32 age,
5. 3: i32 sex,
6. 4: optional string aliass,
7. 5: list<string> interest
8. }
1.3 studnet.avsc
1. {"namespace": "com.topsec.trd",
2. "type": "record",
3. "name": "StudentAvro",
4. "fields": [
5. "name": "name", "type": "string"},
6. "name": "alias", "type": ["string", "null"]},
7. "name": "age", "type": ["int", "null"]},
8. "name": "sex", "type": ["string", "null"]},
9. "name": "interet", "type": {"type": "array", "items": "string"}}
10. ]
11. }
2. 生成bean
生成protobuf bean
protoc --java_out=. student.proto
生成thrift bean
thrift-0.10.0.exe -gen java student.thrift
生成avro bean
java -jar lib/avro-tools-1.7.7.jar compile schema src/main/resource/student.avsc src/main/java
3. 编写测试代码
1. public class ProtoBuf {
2. private final static int TIMES = 10000000;
3. public static void main(String[] args) throws IOException {
4. long start = System.currentTimeMillis();
5. for(int i = 0; i < TIMES; i++) {
6. deserialize();
7. }
8. long end = System.currentTimeMillis();
9. "ProtoBuf total time \t" + (end -start));
10. }
11.
12. public static byte [] serializeAsBytes() {
13. return makeStudent().build().toByteArray();
14. }
15.
16. public static InputStream serializeAsStream() throws IOException {
17. new ByteArrayOutputStream();
18. makeStudent().build().writeDelimitedTo(baos);
19. return new ByteArrayInputStream(baos.toByteArray());
20. }
21.
22. private static Builder makeStudent() {
23. StudentProto.Student.Builder student = StudentProto.Student.newBuilder();
24. "小明");
25. 0);
26. 18);
27. new ArrayList<String>();
28. "吃饭");
29. "睡觉");
30. "打豆豆");
31. student.addAllInterest(interests);
32. return student;
33. }
34.
35. public static void deserialize() throws IOException {
36. byte [] bytes = serializeAsBytes();
37. StudentProto.Student student = StudentProto.Student.parseFrom(bytes);
38. System.out.println(student.getName());
39. }
40. }
1. public class Thrift {
2. private final static int TIMES = 10000000;
3. private final static TSerializer SERIALIZER = new TSerializer(new TBinaryProtocol.Factory());
4. private final static TDeserializer DESERIALIZER = new TDeserializer(new TBinaryProtocol.Factory());
5.
6. public static void main(String[] args) throws TException {
7. long start = System.currentTimeMillis();
8. for(int i = 0; i < TIMES; i++) {
9. deserialize();
10. }
11. long end = System.currentTimeMillis();
12. "Thrift total time \t" + (end -start));
13. }
14.
15. public static byte [] serialize() throws TException {
16. new StudentThrift();
17. "小明");
18. 0);
19. 18);
20. new ArrayList<String>();
21. "吃饭");
22. "睡觉");
23. "打豆豆");
24. stu.setInterest(interests);
25.
26. return SERIALIZER.serialize(stu);
27. }
28.
29. public static void deserialize() throws TException {
30. byte [] bytes = serialize();
31. new StudentThrift();
32. DESERIALIZER.deserialize(stu, bytes);
33. // System.out.println(stu);
34. }
35.
36. }
1. public class AVRO {
2. private final static int TIMES = 10000000;
3. public static void main(String[] args) throws IOException {
4. long start = System.currentTimeMillis();
5. deserializeAsBytes();
6. long end = System.currentTimeMillis();
7. "Avro total time \t" + (end -start));
8. }
9.
10. public static byte [] serializeAsBytes() throws IOException {
11. new StudentAvro();
12. "小明");
13. "女");
14. 18);
15. new ArrayList<CharSequence>();
16. "吃饭");
17. "睡觉");
18. "打豆豆");
19. student.setInteret(interests);
20. new ByteArrayOutputStream();
21. new SpecificDatumWriter<StudentAvro>(StudentAvro.class);
22. new DataFileWriter<StudentAvro>(userDatumWriter);
23. dataFileWriter.create(student.getSchema(), baos);
24. dataFileWriter.append(student);
25. dataFileWriter.close();
26. return baos.toByteArray();
27. }
28.
29. public static byte [] serializeAsBytes(int times) throws IOException {
30. new ByteArrayOutputStream();
31. new SpecificDatumWriter<StudentAvro>(StudentAvro.class);
32. new DataFileWriter<StudentAvro>(userDatumWriter);
33. new StudentAvro().getSchema(), baos);
34. for(int i = 0; i < times; i++) {
35. new StudentAvro();
36. "小明");
37. "女");
38. 18);
39. new ArrayList<CharSequence>();
40. "吃饭");
41. "睡觉");
42. "打豆豆");
43. student.setInteret(interests);
44.
45. dataFileWriter.append(student);
46. }
47. dataFileWriter.close();
48. return baos.toByteArray();
49. }
50.
51. public static void deserializeAsBytes() throws IOException {
52. new SeekableByteArrayInput(serializeAsBytes(TIMES));
53. new SpecificDatumReader<StudentAvro>(StudentAvro.class);
54. new DataFileReader<StudentAvro>(sbai, datumReader);
55. null;
56. while (dataFileReader.hasNext()) {
57. user = dataFileReader.next(user);
58. System.out.println(user.getName());
59. }
60. dataFileReader.close();
61. }
62.
63. }
4. 测试结果
private final static int TIMES = 100000;
ProtoBuf total time
282
229
Avro total time
694
private final static int TIMES = 1000000;
ProtoBuf total time
988
1248
Avro total time
2079
private final static int TIMES = 10000000;
ProtoBuf total time
7368
10675
Avro total time
15025
4.1 小结
项/技术 | avro | thrift | protobuf |
速度 | 慢 | 中等 | 快 |
序列化到1个stream | 是 | 否 | 否 |
5.工程下载地址
(包含测试代码及三种schema和生成bean的工具等)
源代码下载地址-点击下载
6.protobuf分析
6.1 protobuf特点
(a)占用空间小
一条消息数据,用protobuf序列化后的大小是json的10分之一,xml格式的20分之一,是二进制序列化的10分之一(极端情况下,会大于等于直接序列化),总体看来ProtoBuf的优势还是很明显的。
(b)解析速度快
解析速度快,主要归功于protobuf对message 没有动态解析,没有了动态解析的处理序列化速度自然快了。就比如xml ,获取文件之后,还需要解析标签、节点、字段,每一个都需要遍历,而protobuf不需要,直接将field装入流。
(c)兼容性好
fieldNumber 为每个field定义一个编号,其一保证不重复,其二保证其在流中的位置。如若当前数据流中有某个字段,而解析方没有相关的解析代码,解析放会直接skip 吊这个field,而且读数据的position也会后移,保证后续读取不出问题。
参考文章:
http://www.jianshu.com/p/ec39f79c0412
https://www.ibm.com/developerworks/cn/linux/l-cn-gpb
6.2 字节流分析
1. private StudentProto.Student request(int age) {
2. StudentProto.Student.Builder builder = StudentProto.Student.newBuilder();
3. "小明");
4. 0);
5. builder.setAge(age);
6. return builder.build();
7. }
1. static final int TAG_TYPE_BITS = 3;
2. /** Makes a tag value given a field number and wire type. */
3. static int makeTag(final int fieldNumber, final int wireType) {
4. return (fieldNumber << TAG_TYPE_BITS) | wireType;
5. }
字符串存储 :leg+value,leg是字符串的长度
分析字节流: