Datahub小结

原创

byte01 2022-08-19 20:38:35 ©著作权

文章标签 datahub html java 数据 文章分类 云平台云计算

©著作权归作者所有：来自51CTO博客作者byte01的原创作品，请联系作者获取转载授权，否则将追究法律责任

往datahub发送数据时，建议使用Producer。好处是不用设置shardId，这样datahub在增加或减少shard时，业务代码都不需要变更。
另外datahub的shardId只会往前增，老的数据不用，只能停用。

<dependency>
    <groupId>com.aliyun.datahub</groupId>
    <artifactId>aliyun-sdk-datahub</artifactId>
    <version>2.18.0-public</version>
</dependency>
<dependency>
      <groupId>com.aliyun.datahub</groupId>
      <artifactId>datahub-client-library</artifactId>
      <version>1.1.12-public</version>
</dependency>

import com.aliyun.datahub.client.exception.AuthorizationFailureException;
import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.exception.InvalidParameterException;
import com.aliyun.datahub.client.exception.MalformedRecordException;
import com.aliyun.datahub.client.exception.NoPermissionException;
import com.aliyun.datahub.client.exception.ShardNotFoundException;
import com.aliyun.datahub.client.model.Field;
import com.aliyun.datahub.client.model.FieldType;
import com.aliyun.datahub.client.model.RecordEntry;
import com.aliyun.datahub.client.model.RecordSchema;
import com.aliyun.datahub.client.model.TupleRecordData;
import com.aliyun.datahub.clientlibrary.config.ProducerConfig;
import com.aliyun.datahub.clientlibrary.producer.Producer;
import com.aliyun.datahub.exception.ResourceNotFoundException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;

public class DatahubWriter {
    private static final Logger LOG = LoggerFactory.getLogger(DatahubWriter.class);

    private static void sleep(long milliSeconds) {
        try {
            TimeUnit.MILLISECONDS.sleep(milliSeconds);
        } catch (InterruptedException e) {
            // TODO:自行处理异常
        }
    }

    private static List<RecordEntry> genRecords(RecordSchema schema) {
        List<RecordEntry> recordEntries = new ArrayList<>();
        for (int cnt = 0; cnt < 10; ++cnt) {
            RecordEntry entry = new RecordEntry();
            entry.addAttribute("key1", "value1");
            entry.addAttribute("key2", "value2");

            TupleRecordData data = new TupleRecordData(schema);
            data.setField("field1", "testValue");
            data.setField("field2", 1);

            entry.setRecordData(data);
            recordEntries.add(entry);
        }
        return recordEntries;
    }

    private static void sendRecords(Producer producer, List<RecordEntry> recordEntries) {
        int maxRetry = 3;
        while (true) {
            try {
                // 自动选择shard写入
                producer.send(recordEntries, maxRetry);

                // 指定写入shard "0"
                // producer.send(recordEntries, "0", maxRetry);
                LOG.error("send records: {}", recordEntries.size());
                break;
            } catch (MalformedRecordException e) {
                // record 格式非法，根据业务场景选择忽略或直接抛异常
                LOG.error("write fail", e);
                throw e;
            } catch (InvalidParameterException |
                    AuthorizationFailureException |
                    NoPermissionException e) {
                // 请求参数非法
                // 签名不正确
                // 没有权限
                LOG.error("write fail", e);
                throw e;
            } catch (ShardNotFoundException e) {
                // shard 不存在, 如果不是写入自己指定的shard，可以不用处理
                LOG.error("write fail", e);
                sleep(1000);
            } catch (ResourceNotFoundException e) {
                // project, topic 或 shard 不存在
                LOG.error("write fail", e);
                throw e;
            } catch (DatahubClientException e) {
                // 基类异常，包含网络问题等，可以选择重试
                LOG.error("write fail", e);
                sleep(1000);
            }
        }
    }

    public static void main(String[] args) {
        // Endpoint以Region: 华东1为例，其他Region请按实际情况填写
                String endpoint = "http://dh-cn-hangzhou.aliyuncs.com";
                String accessId = "<YourAccessKeyId>";
                String accessKey = "<YourAccessKeySecret>";
                String projectName = "<YourProjectName>";
                String topicName = "<YourTopicName>";

        RecordSchema schema = new RecordSchema();
        schema.addField(new Field("field1", FieldType.STRING));
        schema.addField(new Field("field2", FieldType.BIGINT));

        ProducerConfig config = new ProducerConfig(endpoint, accessId, accessKey);
        Producer producer = new Producer(projectName, topicName, config);

        // 根据场景控制循环
        boolean stop = false;
        try {
            while (!stop) {
                List<RecordEntry> recordEntries = genRecords(schema);
                sendRecords(producer, recordEntries);
            }
        } finally {
            // 确保资源正确释放
            producer.close();
        }
    }
}

上面示例中的RecordSchema也可以通过datahubclient动态获取：
RecordSchema recordSchema = datahubClient.getTopic(projectName, topicName).getRecordSchema();

初始化datahubClient的办法：

// https://help.aliyun.com/document_detail/158841.html
// Endpoint以Region: 华东1为例，其他Region请按实际情况填写
String endpoint = "http://dh-cn-hangzhou.aliyuncs.com";
String accessId = "<YourAccessKeyId>";
String accessKey = "<YourAccessKeySecret>";
// 创建DataHubClient实例
DatahubClient datahubClient = DatahubClientBuilder.newBuilder()
        .setDatahubConfig(
                new DatahubConfig(endpoint,
                        // 是否开启二进制传输，服务端2.12版本开始支持
                        new AliyunAccount(accessId, accessKey), true))
                        //专有云使用出错尝试将参数设置为           false
        // HttpConfig可不设置，不设置时采用默认值
        .setHttpConfig(new HttpConfig()
                .setCompressType(HttpConfig.CompressType.LZ4) // 读写数据推荐打开网络传输 LZ4压缩
                .setConnTimeout(10000))
        .build();

可能会出现的报错：

[{"errorcode":"LimitExceeded","index":1,"message":"The limit of throughput rate is exceeded."},{"errorcode":"LimitExceeded","index":2,"message":"The limit of throughput rate is exceeded."}],"requestId":"202203101111111111111"}

报错的原因，超过了相关指标。

指标描述及查看：

Web Console目前提供Metric功能，用户可以通过Metric界面查看准实时的Topic级别流量等信息，目前提供的指标有：

QPS:读写Request/Second
RPS: 读写Record/Second
Throughput:读写Throughput/Second (单位KB)
Latency:读写请求Latency/Request （单位微秒）
https://help.aliyun.com/document_detail/158786.html

相关限制描述【超过会报错】：

Datahub小结_datahub