package com.kyexpress.plugin.utils;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.kudu.ColumnSchema;
import org.apache.kudu.Type;
import org.apache.kudu.client.Insert;
import org.apache.kudu.client.KuduClient;
import org.apache.kudu.client.KuduPredicate;
import org.apache.kudu.client.KuduScanner;
import org.apache.kudu.client.KuduSession;
import org.apache.kudu.client.KuduTable;
import org.apache.kudu.client.PartialRow;
import org.apache.kudu.client.RowResult;
import org.apache.kudu.client.RowResultIterator;
import org.apache.kudu.client.SessionConfiguration;
import org.apache.kudu.client.Update;
import org.apache.kudu.client.Upsert;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.Maps;
import com.kyexpress.dsp.dataflow.plugin.core.collector.TaskCollector;
import com.kyexpress.dsp.dataflow.plugin.core.collector.communication.Communication;
import com.kyexpress.dsp.dataflow.plugin.core.collector.communication.CommunicationTool;
import com.kyexpress.dsp.dataflow.plugin.core.common.constant.CoreConstant;
import com.kyexpress.plugin.model.SinkTaskInfo; public class KuduUtils {
private static Logger logger = LoggerFactory.getLogger(KuduUtils.class);
private KuduClient client;
private KuduSession kuduSession;
private static String INSERT = "I";
private static String UPDATE = "U";
private static String DELETE = "D";
private static String DATAFLOW_FLAG = "dataflow_flag";
private static String DATAFLOW_TIME = "dataflow_time";
private static int BATCH_SIZE = 100;
private static String OPERATE_TIME = "ts";
private static String HASH_DIFF = "hash_diff";
private static String START_DATE = "start_date";
private static String END_DATE = "end_date";
private static String FINAL_END_DATE = "1970-01-01 00:00:00";
private static FastDateFormat df = FastDateFormat.getInstance("yyyy-MM-dd HH:mm:ss"); public KuduUtils(String kuduUrl) {
try {
client = new KuduClient.KuduClientBuilder(kuduUrl).build();
kuduSession = client.newSession();
} catch (Exception e) {
logger.error("初始化kudu连接失败", e);
throw e;
}
} public void close() {
try {
if (null != kuduSession) {
kuduSession.close();
}
if (null != client) {
client.close();
}
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
} /**
* 获取KuduTable对象
*
* @param tableName
* @return
* @throws Exception
*/
private KuduTable getKuduTable(String tableName) throws Exception {
KuduTable kuduTable = null;
//
try {
// 打开表
kuduTable = client.openTable(tableName);
// 创建KuduSession对象 kudu必须通过KuduSession写入数据
kuduSession = client.newSession();
// 采用flush方式 手动刷新
kuduSession.setFlushMode(SessionConfiguration.FlushMode.MANUAL_FLUSH);
kuduSession.setMutationBufferSpace(1000);
} catch (Exception e) {
logger.error(e.getMessage(), e);
throw e;
}
return kuduTable;
}
public void addFullData(List<ObjectNode> insertDatas, SinkTaskInfo sinkTaskInfo, boolean flag, String type,
Communication communication, TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
// 打开表
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
Map<String, Type> typeMap = getTypeMap(kuduTable);
// 如果是从topic需要把从的topic主键和主的topic主键形成映射添加进去
if (!flag) {
getAddKeyToMap(sinkTaskInfo, flag);
}
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
// 目标字段与目标取值方式
Map<String, String> toColumnValueMap = sinkTaskInfo.getToColumnValueMap();
Timestamp time = new Timestamp(System.currentTimeMillis());
int val = 0;
for (int i = 0; i < insertDatas.size(); i++) {
ObjectNode objectNode = insertDatas.get(i);
Upsert upsert = kuduTable.newUpsert();
PartialRow row = upsert.getRow();
fillRow(row, typeMap, objectNode, itemKeyMap, toColumnValueMap);
if (type.equals(UPDATE)) {
row.addString(DATAFLOW_FLAG, UPDATE);
} else {
row.addString(DATAFLOW_FLAG, INSERT);
}
row.addTimestamp(DATAFLOW_TIME, time);
kuduSession.apply(upsert);
val++;
if (val % BATCH_SIZE == 0) {
kuduSession.flush();
val = 0;
}
}
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, insertDatas.size());
} catch (Exception e) {
taskCollector.collectDirtyRecord(insertDatas.toString(), e, e.getMessage());
logger.error("批量处理kudu数据失败,失败原因:", e);
}
}
public void deleteFullData(List<ObjectNode> deleteDatas, SinkTaskInfo sinkTaskInfo, boolean flag,
Communication communication, TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
Map<String, Type> typeMap = getTypeMap(kuduTable);
if (!flag) {
getAddKeyToMap(sinkTaskInfo, flag);
}
// 目标字段与目标取值方式
Map<String, String> toColumnValueMap = sinkTaskInfo.getToColumnValueMap();
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
Timestamp time = new Timestamp(System.currentTimeMillis());
int val = 0;
String tarKey = sinkTaskInfo.getBusinessPrimaryKey();
String sourKey = "";
for (String key : itemKeyMap.keySet()) {
if (itemKeyMap.get(key).equals(tarKey)) {
sourKey = key;
break;
}
}
for (int i = 0; i < deleteDatas.size(); i++) {
ObjectNode objectNode = deleteDatas.get(i);
Upsert upsert = kuduTable.newUpsert();
PartialRow row = upsert.getRow();
JsonNode jsonNode = objectNode.get(sourKey);
// 主topic
if (flag) {
objectNode.removeAll();
// 删除只保留主键,并设置增量标识
row.addString(DATAFLOW_FLAG, DELETE);
} else {
// 如果是从的,就需要把对应目标设置为空
Iterator<String> fieldNames = objectNode.fieldNames();
while (fieldNames.hasNext()) {
String fieldName = fieldNames.next();
objectNode.set(fieldName, null);
}
}
// 需要把来源业务主键放进去,作为目标修改的主键
objectNode.set(sourKey, jsonNode);
fillRow(row, typeMap, objectNode, itemKeyMap, toColumnValueMap);
row.addTimestamp(DATAFLOW_TIME, time);
kuduSession.apply(upsert);
val++;
if (val % BATCH_SIZE == 0) {
kuduSession.flush();
val = 0;
}
}
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, deleteDatas.size());
} catch (Exception e) {
taskCollector.collectDirtyRecord(deleteDatas.toString(), e, e.getMessage());
logger.error("批量处理kudu数据失败,失败原因:", e);
}
} /**
* 根据是否是主从标识,设置主topic的参数
*
* @param sinkTaskInfo
* @param flag
*/
private void getAddKeyToMap(SinkTaskInfo sinkTaskInfo, boolean flag) {
if (!flag) {
sinkTaskInfo.getItemKeyMap().put(sinkTaskInfo.getColumnKey(), sinkTaskInfo.getBusinessPrimaryKey());
sinkTaskInfo.getToColumnValueMap().put(sinkTaskInfo.getBusinessPrimaryKey(),
sinkTaskInfo.getBusinessPrimaryKeyValueType());
} }
public void batchInsertZipperData(List<ObjectNode> insertDatas, SinkTaskInfo sinkTaskInfo, String type,
Communication communication, TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
// 打开表
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
Map<String, Type> typeMap = getTypeMap(kuduTable);
// 物理主键
String physicalKey = getPhysicalKuduKey(kuduTable);
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
// 目标字段与目标取值方式
Map<String, String> toColumnValueMap = sinkTaskInfo.getToColumnValueMap();
Map<String, Boolean> isChangeMap = sinkTaskInfo.getIsChangeMap();
String busPrimaryKey = "";
for (String obj : itemKeyMap.keySet()) {
if (itemKeyMap.get(obj).equals(sinkTaskInfo.getBusinessPrimaryKey())) {
busPrimaryKey = obj;
break;
}
}
int val = 0;
String ts = "";
String busPrimaryKeyValue = "";
for (int i = 0; i < insertDatas.size(); i++) {
//
ObjectNode objectNode = insertDatas.get(i);
Insert insert = kuduTable.newInsert();
PartialRow row = insert.getRow();
fillRow(row, typeMap, objectNode, itemKeyMap, toColumnValueMap);
// 如果从来源获取的值为空,则给初始化值
busPrimaryKeyValue = JsonNodeTools.asText(objectNode, busPrimaryKey, null);
busPrimaryKeyValue = busPrimaryKeyValue == null ? "" : busPrimaryKeyValue;
ts = JsonNodeTools.asText(objectNode, OPERATE_TIME, null);
// 获取到的ts为空就去当前系统时间
ts = ts == null ? String.valueOf(System.currentTimeMillis()) : ts;
// 设置主键
int pkHashCode = busPrimaryKeyValue.concat("_").concat(ts).hashCode();
row.addInt(physicalKey, pkHashCode);
// 根据订阅变化获取hashCoded值
row.addInt(HASH_DIFF, getHashDiffByIsChangeField(objectNode, isChangeMap));
row.addTimestamp(START_DATE, new Timestamp(Long.valueOf(ts)));
row.addTimestamp(END_DATE, new Timestamp(df.parse(FINAL_END_DATE).getTime()));
kuduSession.apply(insert);
// 设置
val++;
if (val % BATCH_SIZE == 0) {
kuduSession.flush();
val = 0;
}
}
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, insertDatas.size());
} catch (Exception e) {
logger.error("批量处理kudu数据失败,失败原因:", e);
logger.info("批量处理kudu数据失败,开始逐条进行处理");
insertZipperData(insertDatas, sinkTaskInfo, type, communication, taskCollector);
}
}
private void insertZipperData(List<ObjectNode> insertDatas, SinkTaskInfo sinkTaskInfo, String type,
Communication communication, TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
// 打开表
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
Map<String, Type> typeMap = getTypeMap(kuduTable);
// 物理主键
String physicalKey = getPhysicalKuduKey(kuduTable);
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
// 目标字段与目标取值方式
Map<String, String> toColumnValueMap = sinkTaskInfo.getToColumnValueMap();
Map<String, Boolean> isChangeMap = sinkTaskInfo.getIsChangeMap();
String busPrimaryKey = "";
for (String obj : itemKeyMap.keySet()) {
if (itemKeyMap.get(obj).equals(sinkTaskInfo.getBusinessPrimaryKey())) {
busPrimaryKey = obj;
break;
}
}
String ts = "";
String busPrimaryKeyValue = "";
for (int i = 0; i < insertDatas.size(); i++) {
ObjectNode objectNode = insertDatas.get(i);
try {
Insert insert = kuduTable.newInsert();
PartialRow row = insert.getRow();
fillRow(row, typeMap, objectNode, itemKeyMap, toColumnValueMap);
// 如果从来源获取的值为空,则给初始化值
busPrimaryKeyValue = JsonNodeTools.asText(objectNode, busPrimaryKey, null);
busPrimaryKeyValue = busPrimaryKeyValue == null ? "" : busPrimaryKeyValue;
ts = JsonNodeTools.asText(objectNode, OPERATE_TIME, null);
// 获取到的ts为空就去当前系统时间
ts = ts == null ? String.valueOf(System.currentTimeMillis()) : ts;
// 拉链表主键的值是通过业务主键和时间戳组合获取的hashcode
int pkHashCode = busPrimaryKeyValue.concat("_").concat(ts).hashCode();
row.addInt(physicalKey, pkHashCode);
// 根据订阅变化获取hashCoded值
row.addInt(HASH_DIFF, getHashDiffByIsChangeField(objectNode, isChangeMap));
row.addTimestamp(START_DATE, new Timestamp(Long.valueOf(ts)));
row.addTimestamp(END_DATE, new Timestamp(df.parse(FINAL_END_DATE).getTime()));
kuduSession.apply(insert);
// 设置
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, 1);
} catch (Exception e) {
communication.increaseCounter(CommunicationTool.WRITE_FAILED_RECORDS, 1);
taskCollector.collectDirtyRecord(objectNode.toString(), e, e.getMessage());
}
}
} catch (Exception e) {
taskCollector.collectDirtyRecord(insertDatas.toString(), e, e.getMessage());
logger.error("批量处理kudu数据失败,失败原因:", e);
}
}
public void batchUpdateZipperData(List<ObjectNode> updateDatas, SinkTaskInfo sinkTaskInfo, String type,
Communication communication, TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
// 打开表
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
Map<String, Type> typeMap = getTypeMap(kuduTable);
// 物理主键
String physicalKey = getPhysicalKuduKey(kuduTable);
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
// 目标字段与目标取值方式
Map<String, String> toColumnValueMap = sinkTaskInfo.getToColumnValueMap();
Map<String, Boolean> isChangeMap = sinkTaskInfo.getIsChangeMap();
// 备用对象
Map<String, String> keyMap = Maps.newConcurrentMap(); String sourceBusPrimaryKey = "";
String targetBusPrimaryKey = sinkTaskInfo.getBusinessPrimaryKey();
for (String obj : itemKeyMap.keySet()) {
if (itemKeyMap.get(obj).equals(targetBusPrimaryKey)) {
sourceBusPrimaryKey = obj;
break;
}
}
int val = 0;
String ts = "";
String sourceBusPrimaryKeyValue = "";
for (int i = 0; i < updateDatas.size(); i++) {
ObjectNode node = updateDatas.get(i);
// 如果属于订阅变化则新增一条否则更新对应的历史记录
ObjectNode oldObj = (ObjectNode) node.get(CoreConstant.KAFKA_OLD);
ObjectNode objectNode = (ObjectNode) node.get(CoreConstant.KAFKA_DATA);
Iterator<String> oldKeys = oldObj.fieldNames();
boolean flag = false;
while (oldKeys.hasNext()) {
String key = oldKeys.next();
keyMap.put(key, itemKeyMap.get(key));
if (isChangeMap.get(key)) {
flag = true;
}
}
// 如果从来源获取的值为空,则给初始化值
sourceBusPrimaryKeyValue = JsonNodeTools.asText(objectNode, sourceBusPrimaryKey, null);
sourceBusPrimaryKeyValue = sourceBusPrimaryKeyValue == null ? "" : sourceBusPrimaryKeyValue;
ts = JsonNodeTools.asText(objectNode, OPERATE_TIME, null);
ts = ts == null ? String.valueOf(System.currentTimeMillis()) : ts;
// 新增,并更新历史的end_date
if (flag) {
Insert insert = kuduTable.newInsert();
PartialRow row = insert.getRow();
fillRow(row, typeMap, objectNode, itemKeyMap, toColumnValueMap);
// 设置主键
int pkHashCode = sourceBusPrimaryKeyValue.concat("_").concat(ts).hashCode();
row.addInt(physicalKey, pkHashCode);
// 根据订阅变化获取hashCoded值
row.addInt(HASH_DIFF, getHashDiffByIsChangeField(objectNode, isChangeMap));
row.addTimestamp(START_DATE, new Timestamp(Long.valueOf(ts)));
row.addTimestamp(END_DATE, new Timestamp(df.parse(FINAL_END_DATE).getTime()));
Update update = updateHistoryEndDate(kuduTable, physicalKey, targetBusPrimaryKey,
sourceBusPrimaryKey, ts, typeMap.get(targetBusPrimaryKey), objectNode);
kuduSession.apply(update);
kuduSession.apply(insert);
// 设置
val++;
if (val % BATCH_SIZE == 0) {
kuduSession.flush();
val = 0;
}
} else {
// 更新历史字段
updateHistoryData(kuduTable, physicalKey, targetBusPrimaryKey, sourceBusPrimaryKey, typeMap, keyMap,
objectNode, toColumnValueMap);
}
}
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, updateDatas.size());
} catch (Exception e) {
logger.error("批量处理kudu数据失败,失败原因:", e);
logger.error("update批量处理kudu数据失败,进行单条处理");
updateZipperData(updateDatas, sinkTaskInfo, type, communication, taskCollector);
}
} /**
* 拉链表单条修改
*
* @param insertDatas
* @param sinkTaskInfo
* @param type
* @param communication
* @param taskCollector
* @throws Exception
*/
public void updateZipperData(List<ObjectNode> updateDatas, SinkTaskInfo sinkTaskInfo, String type,
Communication communication, TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
// 打开表
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
Map<String, Type> typeMap = getTypeMap(kuduTable);
// 物理主键
String physicalKey = getPhysicalKuduKey(kuduTable);
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
// 目标字段与目标取值方式
Map<String, String> toColumnValueMap = sinkTaskInfo.getToColumnValueMap();
Map<String, Boolean> isChangeMap = sinkTaskInfo.getIsChangeMap();
// 备用对象
Map<String, String> keyMap = Maps.newConcurrentMap(); String sourceBusPrimaryKey = "";
String targetBusPrimaryKey = sinkTaskInfo.getBusinessPrimaryKey();
for (String obj : itemKeyMap.keySet()) {
if (itemKeyMap.get(obj).equals(targetBusPrimaryKey)) {
sourceBusPrimaryKey = obj;
break;
}
}
String ts = "";
String sourceBusPrimaryKeyValue = "";
for (int i = 0; i < updateDatas.size(); i++) {
ObjectNode node = updateDatas.get(i);
// 如果属于订阅变化则新增一条否则更新对应的历史记录
ObjectNode oldObj = (ObjectNode) node.get(CoreConstant.KAFKA_OLD);
ObjectNode objectNode = (ObjectNode) node.get(CoreConstant.KAFKA_DATA);
try {
Iterator<String> oldKeys = oldObj.fieldNames();
boolean flag = false;
while (oldKeys.hasNext()) {
String key = oldKeys.next();
keyMap.put(key, itemKeyMap.get(key));
if (isChangeMap.get(key)) {
flag = true;
}
}
// 如果从来源获取的值为空,则给初始化值
sourceBusPrimaryKeyValue = JsonNodeTools.asText(objectNode, sourceBusPrimaryKey, null);
sourceBusPrimaryKeyValue = sourceBusPrimaryKeyValue == null ? "" : sourceBusPrimaryKeyValue;
ts = JsonNodeTools.asText(objectNode, OPERATE_TIME, null);
ts = ts == null ? String.valueOf(System.currentTimeMillis()) : ts;
// 新增,并更新历史的end_date
if (flag) {
Insert insert = kuduTable.newInsert();
PartialRow row = insert.getRow();
fillRow(row, typeMap, objectNode, itemKeyMap, toColumnValueMap);
// 设置主键
int pkHashCode = sourceBusPrimaryKeyValue.concat("_").concat(ts).hashCode();
row.addInt(physicalKey, pkHashCode);
// 根据订阅变化获取hashCoded值
row.addInt(HASH_DIFF, getHashDiffByIsChangeField(objectNode, isChangeMap));
row.addTimestamp(START_DATE, new Timestamp(Long.valueOf(ts)));
row.addTimestamp(END_DATE, new Timestamp(df.parse(FINAL_END_DATE).getTime()));
Update update = updateHistoryEndDate(kuduTable, physicalKey, targetBusPrimaryKey,
sourceBusPrimaryKey, ts, typeMap.get(targetBusPrimaryKey), objectNode);
kuduSession.apply(update);
kuduSession.apply(insert);
} else {
// 更新历史字段
updateHistoryData(kuduTable, physicalKey, targetBusPrimaryKey, sourceBusPrimaryKey, typeMap,
keyMap, objectNode, toColumnValueMap);
}
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, 1);
} catch (Exception e) {
communication.increaseCounter(CommunicationTool.WRITE_FAILED_RECORDS, 1);
taskCollector.collectDirtyRecord(objectNode.toString(), e, e.getMessage());
}
}
} catch (Exception e) {
taskCollector.collectDirtyRecord(updateDatas.toString(), e, e.getMessage());
logger.error("单条处理kudu数据方式失败,记录失败数据,失败原因:", e);
}
} /**
* 非订阅变化字段需要更新old中历史字段中的值
*
* @param kuduTable
* @param physicalKey
* @param targetBusPrimaryKey
* @param sourceBusPrimaryKeyValue
* @param typeMap
* @param keyMap
* @param objectNode
* @param toColumnValueMap
* @throws Exception
*/
private void updateHistoryData(KuduTable kuduTable, String physicalKey, String targetBusPrimaryKey,
String sourceBusPrimaryKey, Map<String, Type> typeMap, Map<String, String> keyMap, ObjectNode objectNode,
Map<String, String> toColumnValueMap) throws Exception {
List<String> columnIndexes = new ArrayList<String>();
// 设置只查询主键字段
columnIndexes.add(physicalKey);
// 获取scanner扫描器
KuduScanner.KuduScannerBuilder builder = client.newScannerBuilder(kuduTable)
.setProjectedColumnNames(columnIndexes);
// 设置id条件
addBuilderParmert(builder, typeMap.get(targetBusPrimaryKey), objectNode, kuduTable, sourceBusPrimaryKey,
targetBusPrimaryKey);
KuduScanner scanner = builder.build();
int autoId = 0;
int val = 0;
while (scanner.hasMoreRows()) {
RowResultIterator rowResults = scanner.nextRows();
while (rowResults.hasNext()) {
val++;
RowResult result = rowResults.next();
Update update = kuduTable.newUpdate();
PartialRow row = update.getRow();
autoId = result.getInt(physicalKey);
row.addInt(physicalKey, autoId);
fillRow(row, typeMap, objectNode, keyMap, toColumnValueMap);
kuduSession.apply(update);
}
if (val > 0 && val % BATCH_SIZE == 0) {
kuduSession.flush();
val = 0;
}
}
kuduSession.flush();
} /**
* 设置更新历史endDate
*
* @param kuduTable
* @param objectNode
* @param physicalKey
* @param targetBusPrimaryKey
* @param sourceBusPrimaryKeyValue
* @param ts
* @return
* @throws Exception
*/
private Update updateHistoryEndDate(KuduTable kuduTable, String physicalKey, String targetBusPrimaryKey,
String sourceBusPrimaryKey, String ts, Type type, ObjectNode objectNode) throws Exception {
List<String> columnIndexes = new ArrayList<String>();
// 设置只查询主键字段
columnIndexes.add(physicalKey);
// 获取scanner扫描器
KuduScanner.KuduScannerBuilder builder = client.newScannerBuilder(kuduTable)
.setProjectedColumnNames(columnIndexes);
// 设置查询条件
addBuilderParmert(builder, type, objectNode, kuduTable, sourceBusPrimaryKey, targetBusPrimaryKey);
builder.addPredicate(KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(END_DATE),
KuduPredicate.ComparisonOp.EQUAL, new Timestamp(df.parse(FINAL_END_DATE).getTime())));
KuduScanner scanner = builder.build();
int autoId = 0;
while (scanner.hasMoreRows()) {
RowResultIterator rowResults = scanner.nextRows();
while (rowResults.hasNext()) {
RowResult result = rowResults.next();
autoId = result.getInt(physicalKey);
}
}
Update update = kuduTable.newUpdate();
// 获取row对象
PartialRow row = update.getRow();
row.addInt(physicalKey, autoId);
row.addTimestamp(END_DATE, new Timestamp(Long.valueOf(ts)));
return update; }
private void addBuilderParmert(KuduScanner.KuduScannerBuilder builder, Type type, ObjectNode objectNode,
KuduTable kuduTable, String sourceBusPrimaryKey, String targetBusPrimaryKey) {
if (type == Type.INT8 || type == Type.INT16 || type == Type.INT32 || type == Type.INT64 || type == Type.FLOAT
|| type == Type.DECIMAL || type == Type.DOUBLE) {
Number value = JsonNodeTools.asNumber(objectNode, sourceBusPrimaryKey, null);
if (type == Type.INT8) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, value.byteValue()));
}
if (type == Type.INT16) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, value.shortValue()));
}
if (type == Type.INT32) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, value.intValue()));
}
if (type == Type.INT64) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, value.longValue()));
}
if (type == Type.FLOAT) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, value.floatValue()));
}
if (type == Type.DOUBLE) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, value.doubleValue()));
}
if (type == Type.DECIMAL) {
builder.addPredicate(
KuduPredicate.newComparisonPredicate(kuduTable.getSchema().getColumn(targetBusPrimaryKey),
KuduPredicate.ComparisonOp.EQUAL, toBigDecimal(value)));
}
}
if (type == Type.STRING) {
String value = JsonNodeTools.asText(objectNode, sourceBusPrimaryKey, null);
builder.addPredicate(KuduPredicate.newComparisonPredicate(
kuduTable.getSchema().getColumn(targetBusPrimaryKey), KuduPredicate.ComparisonOp.EQUAL, value));
}
if (type == Type.UNIXTIME_MICROS) {
Timestamp value = JsonNodeTools.asTimestamp(objectNode, targetBusPrimaryKey, null);
builder.addPredicate(KuduPredicate.newComparisonPredicate(
kuduTable.getSchema().getColumn(targetBusPrimaryKey), KuduPredicate.ComparisonOp.EQUAL, value));
}
} private int getHashDiffByIsChangeField(ObjectNode objectNode, Map<String, Boolean> isChangeMap) {
StringBuilder bf = new StringBuilder();
Set<String> keys = isChangeMap.keySet();
int hashCode = 0;
for (String key : keys) {
if (isChangeMap.get(key)) {
String val = JsonNodeTools.asText(objectNode, key, null);
bf.append(val);
bf.append("_");
} }
if (bf.length() > 0) {
hashCode = bf.substring(0, bf.length() - 1).hashCode();
}
return hashCode; }
/**
* 拉链表任务
*
* @param deleteDatas
* @param sinkTaskInfo
* @param communication
* @param taskCollector
* @throws Exception
*/
public void deleteZipperData(List<ObjectNode> deleteDatas, SinkTaskInfo sinkTaskInfo, Communication communication,
TaskCollector taskCollector) throws Exception {
// flag 校验是主topic还是从topic
try {
// 打开表
KuduTable kuduTable = getKuduTable(sinkTaskInfo.getTargetTableName());
// 物理主键
String physicalKey = getPhysicalKuduKey(kuduTable);
// 来源字段与目标字段
Map<String, String> itemKeyMap = sinkTaskInfo.getItemKeyMap();
Map<String, Type> typeMap = getTypeMap(kuduTable);
String sourceBusPrimaryKey = "";
//
String targetBusPrimaryKey = sinkTaskInfo.getBusinessPrimaryKey();
for (String obj : itemKeyMap.keySet()) {
if (itemKeyMap.get(obj).equals(targetBusPrimaryKey)) {
sourceBusPrimaryKey = obj;
break;
}
}
int val = 0;
String ts = "";
String sourceBusPrimaryKeyValue = "";
for (int i = 0; i < deleteDatas.size(); i++) {
ObjectNode objectNode = deleteDatas.get(i);
// 如果从来源获取的值为空,则给初始化值
sourceBusPrimaryKeyValue = JsonNodeTools.asText(objectNode, sourceBusPrimaryKey, null);
sourceBusPrimaryKeyValue = sourceBusPrimaryKeyValue == null ? "" : sourceBusPrimaryKeyValue;
ts = JsonNodeTools.asText(objectNode, OPERATE_TIME, null);
ts = ts == null ? String.valueOf(System.currentTimeMillis()) : ts;
Update update = updateHistoryEndDate(kuduTable, physicalKey, targetBusPrimaryKey, sourceBusPrimaryKey,
ts, typeMap.get(targetBusPrimaryKey), objectNode);
kuduSession.apply(update);
val++;
if (val % BATCH_SIZE == 0) {
kuduSession.flush();
val = 0;
}
}
kuduSession.flush();
communication.increaseCounter(CommunicationTool.WRITE_SUCCEED_RECORDS, deleteDatas.size());
} catch (Exception e) {
taskCollector.collectDirtyRecord(deleteDatas.toString(), e, e.getMessage());
logger.error("批量处理kudu数据失败,失败原因:", e);
}
} private Map<String, Type> getTypeMap(KuduTable table) {
Map<String, Type> typeMap = new HashMap<>();
for (ColumnSchema columnSchema : table.getSchema().getColumns()) {
typeMap.put(columnSchema.getName(), columnSchema.getType());
}
return typeMap;
} private String getPhysicalKuduKey(KuduTable table) {
List<ColumnSchema> columns = table.getSchema().getColumns();
for (ColumnSchema obj : columns) {
if (obj.isKey()) {
return obj.getName();
}
}
return null;
} private void fillRow(PartialRow row, Map<String, Type> typeMap, ObjectNode objectNode,
Map<String, String> itemKeyMap, Map<String, String> toColumnValueMap) {
Iterator<String> fieldNames = objectNode.fieldNames();
while (fieldNames.hasNext()) {
String fieldName = fieldNames.next();
String targetName = itemKeyMap.get(fieldName);
String valueType = toColumnValueMap.get(targetName);
// 不存在映射,直接跳过
if (StringUtils.isBlank(targetName)) {
continue;
}
Type type = typeMap.get(targetName);
boolean flag = false;
// 非源值即是时间戳
if (!valueType.equals(CoreConstant.TARGET_COLUMN_VALUE_TYPE_SOURCE)) {
type = Type.UNIXTIME_MICROS;
flag = true;
}
if (type == null) {
continue;
}
if (type == Type.INT8 || type == Type.INT16 || type == Type.INT32 || type == Type.INT64
|| type == Type.FLOAT || type == Type.DECIMAL || type == Type.DOUBLE) {
Number value = JsonNodeTools.asNumber(objectNode, fieldName, null);
if (value == null) {
row.setNull(targetName);
continue;
}
if (type == Type.INT8) {
row.addByte(targetName, value.byteValue());
continue;
}
if (type == Type.INT16) {
row.addShort(targetName, value.shortValue());
continue;
}
if (type == Type.INT32) {
row.addInt(targetName, value.intValue());
continue;
}
if (type == Type.INT64) {
row.addLong(targetName, value.longValue());
continue;
}
if (type == Type.FLOAT) {
row.addFloat(targetName, value.floatValue());
continue;
}
if (type == Type.DOUBLE) {
row.addDouble(fieldName, value.doubleValue());
continue;
} if (type == Type.DECIMAL) {
row.addDecimal(targetName, toBigDecimal(value));
continue;
}
continue;
}
if (type == Type.STRING) {
String value = JsonNodeTools.asText(objectNode, fieldName, null);
if (value == null) {
row.setNull(targetName);
continue;
}
row.addString(targetName, value);
continue;
}
if (type == Type.UNIXTIME_MICROS) {
Timestamp value = JsonNodeTools.asTimestamp(objectNode, fieldName, null);
if (flag) {
value = new Timestamp(System.currentTimeMillis());
}
if (value == null) {
row.setNull(targetName);
continue;
}
row.addTimestamp(targetName, value);
continue;
}
}
} private BigDecimal toBigDecimal(Number number) {
if (number instanceof BigDecimal) {
return (BigDecimal) number;
}
if (number instanceof BigInteger) {
return new BigDecimal((BigInteger) number);
}
if (number.doubleValue() == number.longValue()) {
return BigDecimal.valueOf(number.longValue());
}
return BigDecimal.valueOf(number.doubleValue());
}
}
java da重连 java连接kudu
转载本文章为转载内容,我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题,欢迎原作者联系我们进行内容更正或删除文章。
提问和评论都可以,用心的回复会被更多人看到
评论
发布评论
相关文章
-
java 实现区域连 java连接kudu
概述不是给纯小白看的,也就不谈概念这种废话了,直接上代码。。。使用一个叫KuduPlus的小工具辅助测试。Maven依赖
java 实现区域连 java 大数据 数据库 Kudu -
java rabitmq自动重连 java连接rabbitmq
由于最近公司业务需要,对RabbitMQ做了一些研究 引入依赖Jar包 <dependency> <groupId>com.rabbitmq</groupId> <artifactId>amqp-client</artifactId> <version>4.2.0</version&g
java rabitmq自动重连 Rabbit 消息队列 rabbit-client java