为什么需要TTL
- 状态不需要一次存储
- 状态有效期有时间限制,超过时间需要重置状态(业务上)
TTL设置
StateTtlConfig ttlConfig = StateTtlConfig
// 状态有效时间
.newBuilder(Time.seconds(10))
//设置状态更新类型
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
// 已过期但还未被清理掉的状态如何处理
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
// 过期对象的清理策略
.cleanupFullSnapshot()
.build();
当然,上方仅是状态过期设置,还未与我们的状态进行绑定起来,如果需要将过期配置与我们的状态绑定起来,我们需要在我们需要设置TTL的状态描述器中进行对应的开启
ex:
//MapState 状态管理配置
MapStateDescriptor<String, Location> mapStateDescriptor = new MapStateDescriptor<>("locationState",
TypeInformation.of(String.class),
TypeInformation.of(Location.class));
//启用状态存活时间设置
mapStateDescriptor.enableTimeToLive(ttlConfig);
// 获取状态实例
this.locationState = getRuntimeContext().getMapState(mapStateDescriptor);
TTL设置分析
TTL作用域
目前(Flink1.12)状态的TTL是针对于状态的处理时间定义的
TTL过期时间:
包package org.apache.flink.api.common.time;
下
通过
newBuilder(Time.seconds(1))
设置
表示状态的过期时间
一旦设置了 TTL,那么如果上次访问的时间戳 + TTL超过了当前时间,则表明状态过期了。
状态时间戳更新时机
UpdateType
通过
setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
设置
表示状态时间戳的更新的时机(延长状态有效期),是一个 Enum 对象。
如果设置为 Disabled,则表明不更新时间戳;
如果设置为 OnCreateAndWrite,则表明当状态创建或每次写入时都会更新时间戳;
如果设置为 OnReadAndWrite,在状态创建、写入、读取均会更新状态的时间戳。
过期状态处理策略(是否返回给用户)
StateVisibility
通过
setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
设置
表示对已过期但还未被清理掉的状态如何处理
如果设置为 ReturnExpiredIfNotCleanedUp,那么即使这个状态的时间戳表明它已经过期了,但是只要还未被真正清理掉,就会被返回给调用方;(即即使状态过期了,仍会把过期的状态返回给用户)
如果设置为 NeverReturnExpired,那么一旦这个状态过期了,那么永远不会被返回给调用方,只会返回空状态,避免了过期状态带来的干扰。(过期的状态不会返回给用户)
过期状态清理策略
CleanupStrategies
.cleanupIncrementally() 增量清理
.cleanupFullSnapshot() 全量清理
表示过期对象的清理策略
当设置为 FULL_STATE_SCAN_SNAPSHOT 时,对应的是 EmptyCleanupStrategy 类,表示对过期状态不做主动清理,当执行完整快照(Snapshot / Checkpoint)时,会生成一个较小的状态文件,但本地状态并不会减小。唯有当作业重启并从上一个快照点恢复后,本地状态才会实际减小,因此可能仍然不能解决内存压力的问题。
为了应对这个问题,Flink 还提供了增量清理的枚举值,分别是针对 Heap StateBackend 的 INCREMENTAL_CLEANUP(对应 IncrementalCleanupStrategy 类)
以及对 RocksDB StateBackend 有效的 ROCKSDB_COMPACTION_FILTER(对应 RocksdbCompactFilterCleanupStrategy 类)
对于增量清理功能,Flink 可以被配置为每读取若干条记录就执行一次清理操作,而且可以指定每次要清理多少条失效记录;对于 RocksDB 的状态清理,则是通过 JNI 来调用 C++ 语言编写的 FlinkCompactionFilter 来实现,底层是通过 RocksDB 提供的后台 Compaction 操作来实现对失效状态过滤的。
DEMO
package com.leilei;
import cn.hutool.core.util.RandomUtil;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.state.MapState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.WindowedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.streaming.api.functions.windowing.RichWindowFunction;
import org.apache.flink.streaming.api.windowing.windows.GlobalWindow;
import org.apache.flink.util.Collector;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Random;
/**
* @author lei
* @version 1.0
* @desc flink 有状态计算 状态过期设置(ttl)
* @date 2021-03-19 15:39
*/
public class Flink_State_3_TTL {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.setParallelism(4);
final DataStreamSource<Location> locationSource = env.addSource(new LocationSource());
final WindowedStream<Location, Integer, GlobalWindow> windowedStream = locationSource.keyBy(Location::getVehicleId)
.countWindow(1);
windowedStream.apply(new SpeedAlarmWindow()).print();
env.execute("state-ttl");
}
public static class SpeedAlarmWindow extends RichWindowFunction<Location, String, Integer, GlobalWindow> {
MapState<String, Location> locationState;
@Override
public void apply(Integer integer, GlobalWindow window, Iterable<Location> locationList, Collector<String> out) throws Exception {
for (Location location : locationList) {
final String key = location.getVehicleId().toString();
final Location preLocation = locationState.get(key);
if (preLocation == null) {
if (location.getGpsSpeed() > location.getLimitSpeed()) {
locationState.put(key, location);
out.collect(location.toString() + "超速开始");
return;
}
} else {
if (location.getGpsSpeed() > location.getLimitSpeed()) {
locationState.put(key, location);
out.collect(location.toString() + "持续超速中" + ">> " +
"上一条超速数据为:" + "\n" + preLocation.toString());
} else {
locationState.remove(key);
out.collect(location.toString() + "超速结束");
}
}
}
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
StateTtlConfig ttlConfig = StateTtlConfig
// 状态有效时间
.newBuilder(Time.seconds(10))
//设置状态更新类型
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
// 已过期但还未被清理掉的状态如何处理
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
// 过期对象的清理策略
.cleanupFullSnapshot()
.build();
//MapState 状态管理配置
MapStateDescriptor<String, Location> mapStateDescriptor = new MapStateDescriptor<>("locationState",
TypeInformation.of(String.class),
TypeInformation.of(Location.class));
//启用状态存活时间设置
mapStateDescriptor.enableTimeToLive(ttlConfig);
this.locationState = getRuntimeContext().getMapState(mapStateDescriptor);
}
}
public static class LocationSource implements SourceFunction<Location> {
Boolean flag = true;
@Override
public void run(SourceContext<Location> ctx) throws Exception {
Random random = new Random();
while (flag) {
int vehicleId = random.nextInt(2) + 1;
Location location = Location.builder()
.vehicleId(vehicleId)
.plate("川A000" + vehicleId)
.color("绿")
.date(Integer.parseInt(LocalDate.now().format(DateTimeFormatter.BASIC_ISO_DATE)))
.gpsSpeed(RandomUtil.randomInt(88, 100))
.limitSpeed(RandomUtil.randomInt(88, 95))
.devTime(System.currentTimeMillis())
.build();
ctx.collect(location);
Thread.sleep(RandomUtil.randomInt(5,15)*1000);
}
}
@Override
public void cancel() {
flag = false;
}
}
}
@NoArgsConstructor
@AllArgsConstructor
@Data
@Builder
public class Location {
private Integer vehicleId;
private String plate;
private String color;
private Integer date;
private Integer gpsSpeed;
private Integer limitSpeed;
private Long devTime;
}