部分情况下:
saveAsNewAPIHadoopDataset不能用
大坑,
org.apache.hadoop.mapred
和
org.apache.hadoop.mapreduce两个包的混乱
package com.xiaomi.mishell.statusbar import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Put import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapred.TableOutputFormat import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.mapred.JobConf import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD.rddToPairRDDFunctions object SparkWriteHbaseTest { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local") val sc = new SparkContext(sparkConf) val conf = HBaseConfiguration.create() conf.set("hbase.zookeeper.quorum", "10.38.161.138") conf.set("hbase.zookeeper.property.clientPort", "2181") val tablename = "table1" //初始化jobconf,TableOutputFormat必须是org.apache.hadoop.hbase.mapred包下的! val jobConf = new JobConf(conf) jobConf.setOutputFormat(classOf[TableOutputFormat]) jobConf.set(TableOutputFormat.OUTPUT_TABLE, tablename) val indataRDD = sc.makeRDD(Array("1,jack,15", "2,Lily,16", "3,mike,16")) val rdd = indataRDD.map(_.split(',')).map { arr => { /*一个Put对象就是一行记录,在构造方法中指定主键 * 所有插入的数据必须用org.apache.hadoop.hbase.util.Bytes.toBytes方法转换 * Put.add方法接收三个参数:列族,列名,数据 */ val put = new Put(Bytes.toBytes(arr(0).toInt)) put.add(Bytes.toBytes("group1"), Bytes.toBytes("col1"), Bytes.toBytes(arr(1))) (new ImmutableBytesWritable, put) } } rdd.saveAsHadoopDataset(jobConf) sc.stop() } }