缓存工具类

Flink 缓存维表_scala

 

  •  
package com.duo.utils
import com.alibaba.fastjson.JSONObject
import scala.collection.mutableimport scala.collection.mutable.ListBuffer
/** * Author z * Date 2021-04-05 13:02:57 */object CacheUtil { /** * 缓存表在map中 * * @param sql * @param key * @return */ def cacheMap(sql: String, key: ListBuffer[String]): mutable.Map[String, JSONObject] = { val map: mutable.Map[String, JSONObject] = mutable.Map[String, JSONObject]() val listBuffer = MySqlUtil.query(sql) val map_key = new mutable.StringBuilder() if (listBuffer.nonEmpty) { listBuffer.foreach(elem => { map_key.clear() key.foreach(x => { map_key ++= x map_key ++= "=" map_key ++= elem.getString(x) map_key ++= "-" }) map += map_key.toString() -> elem }) } map } /** * 缓存表在list中 * * @param sql * @return */ def cacheList(sql: String): ListBuffer[JSONObject] = { val listBuffer = MySqlUtil.query(sql) if (listBuffer.nonEmpty) { listBuffer } else { new ListBuffer[JSONObject]() } } /** * * @param tableName * @param key id=1 name=1 */ def cacheMapByKey(tableName: String, key: ListBuffer[(String, String)]): mutable.Map[String, JSONObject] = { val map: mutable.Map[String, JSONObject] = mutable.Map[String, JSONObject]() // mysql查询数据 val listBuffer = MySqlUtil.queryByKey(tableName, key) if (listBuffer.nonEmpty) { listBuffer.foreach(elem => { map += key.mkString("-") -> elem }) } map }   def main(args: Array[String]): Unit = { cacheList("select * from student").foreach(x=>println(x)) }}

 

在主程序中缓存维表

Flink 缓存维表_scala

 

将整个mysql维表缓存在内存中,定时刷新维表

  •  
package com.duo.mytest
import java.util.{Timer, TimerTask}
import com.alibaba.fastjson.JSONObjectimport com.duo.model.StudentTaskimport com.duo.myfunc.{TableProcessFunc, TableProcessFuncMain}import com.duo.mytest.FlinkAsyncIO.StuToJsonimport com.duo.utils.CacheUtilimport org.apache.flink.streaming.api.scala._import org.apache.flink.table.api.EnvironmentSettingsimport org.apache.flink.table.api.bridge.scala._
import scala.collection.mutableimport scala.collection.mutable.ListBuffer
/** * Author z * Date 2021-04-04 09:45:40 */object FlinkCacheDimJoinMain { private var map: mutable.Map[String, JSONObject] = mutable.Map[String, JSONObject]() private var list: ListBuffer[JSONObject] = new ListBuffer[JSONObject]() def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment val settings = EnvironmentSettings .newInstance() .useBlinkPlanner() .inStreamingMode() .build() val tableEnv = StreamTableEnvironment.create(env, settings) val timer = new Timer() timer.schedule(new TimerTask { override def run(): Unit = { list= CacheUtil.cacheList("select * from student") } }, // 延迟多久 执行第一次 10, // 执行间隔 1000 * 10) val sourceDS = env.fromElements( StuToJson(StudentTask(1, "吃饭", 1, System.currentTimeMillis())), StuToJson(StudentTask(2, "看电影", 1, System.currentTimeMillis())), StuToJson(StudentTask(3, "约会", 3, System.currentTimeMillis())), StuToJson(StudentTask(4, "逛街", 5, System.currentTimeMillis())), StuToJson(StudentTask(5, "吃饭", 6, System.currentTimeMillis())), StuToJson(StudentTask(6, "写代码", 7, System.currentTimeMillis())), StuToJson(StudentTask(7, "看女朋友", 8, System.currentTimeMillis())), StuToJson(StudentTask(8, "吃饭", 7, System.currentTimeMillis())) ) val wide_stu_tag = new OutputTag[JSONObject]("wide_stu_tag") if(list.nonEmpty) Thread.sleep(500) val dimDS1 = sourceDS.process(new TableProcessFuncMain( wide_stu_tag, "select * from student", ListBuffer( ("id", "stu_Id"), ("name", "stu_name"), ("age", "stu_age")), list ) { override def getKey(i: JSONObject): ListBuffer[(String, String)] = { ListBuffer(("id", i.getString("stuId"))) } override def join(i: JSONObject, dimListJSON: ListBuffer[JSONObject], dimFields: ListBuffer[(String, String)]): ListBuffer[JSONObject] = { val list = new ListBuffer[JSONObject]() if (dimListJSON != null && dimListJSON.nonEmpty && dimFields.nonEmpty) { dimListJSON.foreach(x => { val value = i.clone().asInstanceOf[JSONObject] dimFields.foreach(y => { value.put(y._2, x.getString(y._1)) }) list += value }) list } else { dimFields.foreach(y => { i.put(y._2, "null") }) list += i } } }) val outputDS = dimDS1.getSideOutput(wide_stu_tag) // 对于没有关联到的侧输出流数据,继续关联 val dimDS2 = outputDS.process(new TableProcessFuncMain( wide_stu_tag, "select * from student", ListBuffer( ("id", "stu_Id"), ("name", "stu_name"), ("age", "stu_age")), list ) { override def getKey(i: JSONObject): ListBuffer[(String, String)] = {        // 从流中获取,要查询维表的信息,如:id=10 ListBuffer(("id", i.getString("stuId"))) } override def join(i: JSONObject, dimListJSON: ListBuffer[JSONObject], dimFields: ListBuffer[(String, String)]): ListBuffer[JSONObject] = { val list = new ListBuffer[JSONObject]() if (dimListJSON != null && dimListJSON.nonEmpty && dimFields.nonEmpty) { dimListJSON.foreach(x => { val value = i.clone().asInstanceOf[JSONObject] dimFields.foreach(y => { value.put(y._2, x.getString(y._1)) }) list += value }) list } else { dimFields.foreach(y => { i.put(y._2, "null") }) list += i } }    })     val ds = dimDS1.union(dimDS2).flatMap(x => x)       ds.print()     env.execute()    }}

 

Process处理类

  •  
package com.duo.myfunc
import java.util.{Timer, TimerTask}
import com.alibaba.fastjson.JSONObjectimport com.duo.mytrait.DimProcessTraitimport com.duo.utils.MySqlUtilimport org.apache.flink.configuration.Configurationimport org.apache.flink.streaming.api.functions.ProcessFunctionimport org.apache.flink.streaming.api.scala.OutputTagimport org.apache.flink.util.Collector
import scala.collection.mutableimport scala.collection.mutable.ListBuffer

/** * Author z * Date 2021-03-24 18:59:53 */abstract class TableProcessFuncMain extends ProcessFunction[JSONObject, ListBuffer[JSONObject]] with DimProcessTrait { private var sqlStr: String = _ private var fields = new ListBuffer[(String, String)]() private var listDim: ListBuffer[JSONObject] = new ListBuffer[JSONObject]() private var wide_stu_tag: OutputTag[JSONObject] = _ def this(wide_stu_tag: OutputTag[JSONObject], sql: String, fields: ListBuffer[(String, String)], listDim: ListBuffer[JSONObject]) { this this.wide_stu_tag = wide_stu_tag this.sqlStr = sql this.fields = fields this.listDim = listDim } override def processElement( i: JSONObject, context: ProcessFunction[JSONObject, ListBuffer[JSONObject]]#Context, collector: Collector[ListBuffer[JSONObject]]): Unit = { val listbuffer = ListBuffer[JSONObject]() if (listDim != null && listDim.nonEmpty) { val key = getKey(i)      // 获取需要的维表信息 listDim.foreach(x => { var bool: Boolean = false key.foreach(y => { if (x.getString(y._1) == y._2) bool = true else bool = false }) if (bool) { listbuffer += x } }) } if (listbuffer.nonEmpty) { val jSONObjects = join(i, listbuffer, this.fields) collector.collect(jSONObjects) } else { if (wide_stu_tag != null) { // 等待以下查询结果,将数据放入侧输出流中 Thread.sleep(1000) context.output(wide_stu_tag, i) } else { // 输出没有关联的数据 if (listbuffer.nonEmpty) { val jSONObjects = join(i, listbuffer, this.fields) collector.collect(jSONObjects) } } }  } }

 

验证结果:

Flink 缓存维表_flink_03

 

在Process中缓存维表

Flink 缓存维表_scala

 

  •  
package com.duo.myfunc
import java.util.{Timer, TimerTask}
import com.alibaba.fastjson.JSONObjectimport com.duo.mytrait.DimProcessTraitimport com.duo.utils.MySqlUtilimport org.apache.flink.configuration.Configurationimport org.apache.flink.streaming.api.functions.ProcessFunctionimport org.apache.flink.streaming.api.scala.OutputTagimport org.apache.flink.util.Collector
import scala.collection.mutableimport scala.collection.mutable.ListBuffer

/** * Author z * Date 2021-03-24 18:59:53 */abstract class TableProcessFunc extends ProcessFunction[JSONObject, ListBuffer[JSONObject]] with DimProcessTrait { private var sql: String = _ private var fields = new ListBuffer[(String, String)]() private var map: mutable.Map[String, JSONObject] = mutable.Map[String, JSONObject]() private var wide_stu_tag: OutputTag[JSONObject] = _ private var key = "id"
def this(wide_stu_tag: OutputTag[JSONObject], sql: String, key: String, fields: ListBuffer[(String, String)]) { this this.wide_stu_tag = wide_stu_tag this.sql = sql this.fields = fields this.key = key } /** * 初始化 * * @param parameters */ override def open(parameters: Configuration): Unit = { // 进入之后先缓存 cacheData() // 定时程序,间隔一段时间获取一次 val timer = new Timer() timer.schedule(new TimerTask { override def run(): Unit = { cacheData() } }, // 延迟多久 执行第一次 1000, // 执行间隔 1000 * 10) } def cacheData(): Unit = { if (sql != null) { val listBuffer = MySqlUtil.query(sql) if (listBuffer.nonEmpty) { listBuffer.foreach(elem => { map += elem.getString(this.key) -> elem }) map.foreach(x => println("map缓存》》》》》》》》》》》》》 " + x)) } } } override def processElement( i: JSONObject, context: ProcessFunction[JSONObject, ListBuffer[JSONObject]]#Context, collector: Collector[ListBuffer[JSONObject]]): Unit = { val listbuffer = ListBuffer[JSONObject]() if (map.nonEmpty) { listbuffer ++= map.get(i.getString(this.key)) } if (listbuffer.nonEmpty) { val jSONObjects = join(i, listbuffer, this.fields) collector.collect(jSONObjects) } else { if (wide_stu_tag != null) { Thread.sleep(1000) context.output(wide_stu_tag, i) } else { Thread.sleep(500) if (listbuffer.nonEmpty) { val jSONObjects = join(i, listbuffer, this.fields) collector.collect(jSONObjects) } } }  }  }

 

Flink 缓存维表_scala_05