最近在项目里面需要从kafka推送的数据读取数据,spark streaming处理,由于推过来的是Json字符串,需要转换成dataFrame做进一步处理,但是Json字符串字段很多,而且还不固定;我想转换代码如下:

val NewsDF = sqlContext.createDataFrame(NewsRdd,classOf[News])

但是怎么吧json字符串转换成JavaBean呢。这么多的字段,有没有偷懒的方法呢?
jackson提供了很好的方法,曾经尝试使用scala中的ObjectMapper,但是我的json是嵌套的复杂类型,出现了一个错误,在网上没有找到很好的方法,无奈放弃了。

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonProcessingException;
import org.codehaus.jackson.map.DeserializationConfig;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.SerializationConfig;
import org.codehaus.jackson.map.annotate.JsonSerialize;
import org.codehaus.jackson.map.util.JSONPObject;
import org.codehaus.jackson.type.JavaType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class JsonMapper {
    private static Logger logger = LoggerFactory.getLogger(JsonMapper.class);
    private ObjectMapper mapper;

    public JsonMapper(JsonSerialize.Inclusion inclusion) {
        mapper = new ObjectMapper();
        //设置输出时包含属性的风格
        mapper.setSerializationInclusion(inclusion);
        //设置输入时忽略在JSON字符串中存在但Java对象实际没有的属性
        mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false);
        //禁止使用int代表Enum的order()來反序列化Enum,非常危險
        mapper.configure(DeserializationConfig.Feature.FAIL_ON_NUMBERS_FOR_ENUMS, true);
        mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
    }
    /**
     * 创建输出全部属性到Json字符串的Mapper.
     */
     public static JsonMapper buildNormalMapper() {
        return new JsonMapper(JsonSerialize.Inclusion.ALWAYS);
      }
     /**
     * 创建只输出非空属性到Json字符串的Mapper.
     */
     public static JsonMapper buildNonNullMapper() {
        return new JsonMapper(JsonSerialize.Inclusion.NON_NULL);
     }
    /**
      * 创建只输出初始值被改变的属性到Json字符串的Mapper.
     * */
    public static JsonMapper buildNonDefaultMapper() {
        return new JsonMapper(JsonSerialize.Inclusion.NON_DEFAULT);
    }
    /**
     * 创建只输出非Null且非Empty(如List.isEmpty)的属性到Json字符串的Mapper.
     */
     public static JsonMapper buildNonEmptyMapper() {
        return new JsonMapper(JsonSerialize.Inclusion.NON_EMPTY);
     }
     /**
     * 如果对象为Null, 返回"null".
     * 如果集合为空集合, 返回"[]".
     */
     public String toJson(Object object) {
        try {
              return mapper.writeValueAsString(object);
            } catch (IOException e) {
              throw NestedException.wrap(e);
            }
        }
     /**
     * 如果JSON字符串为Null或"null"字符串, 返回Null.
     * 如果JSON字符串为"[]", 返回空集合.
     *
     * 如需读取集合如List/Map, 且不是List<String>这种简单类型时,先使用函數constructParametricType构造类型.
     * @see #constructParametricType(Class, Class...)
     */
     public <T> T fromJson(String jsonString, Class<T> clazz) {
        if (StringUtils.isEmpty(jsonString)) {
              return null;
        }
        try {
              return mapper.readValue(jsonString, clazz);
            } catch (IOException e) {
              throw NestedException.wrap(e);
            }
     }

     /**
     * 如果JSON字符串为Null或"null"字符串, 返回Null.
     * 如果JSON字符串为"[]", 返回空集合.
     *
     * 如需读取集合如List/Map, 且不是List<String>这种简单类型时,先使用函數constructParametricType构造类型.
     * @see #constructParametricType(Class, Class...)
     */
     @SuppressWarnings("unchecked")
    public <T> T fromJson(String jsonString, JavaType javaType) {
        if (StringUtils.isEmpty(jsonString)) {
              return null;
            }
        try {
              return (T) mapper.readValue(jsonString, javaType);
            } catch (IOException e) {
              throw NestedException.wrap(e);
            }
    }
     @SuppressWarnings("unchecked")
    public <T> T fromJson(String jsonString, Class<?> parametrized, Class<?>... parameterClasses) {
        return (T) this.fromJson(jsonString, constructParametricType(parametrized, parameterClasses));
     }
     @SuppressWarnings("unchecked")
    public <T> List<T> fromJsonToList(String jsonString, Class<T> classMeta){
        return (List<T>) this.fromJson(jsonString,constructParametricType(List.class, classMeta));
     }
    @SuppressWarnings("unchecked")
    public <T> T fromJson(JsonNode node, Class<?> parametrized, Class<?>... parameterClasses) {
        JavaType javaType = constructParametricType(parametrized, parameterClasses);
        try {
            return (T) mapper.readValue(node, javaType);
        } catch (IOException e) {
              throw NestedException.wrap(e);
        }
    }

    @SuppressWarnings("unchecked")
    public <T> T pathAtRoot(String json, String path, Class<?> parametrized, Class<?>... parameterClasses){
        JsonNode rootNode = parseNode(json);
        JsonNode node = rootNode.path(path);
        return (T) fromJson(node, parametrized, parameterClasses);
    }

    @SuppressWarnings("unchecked")
    public <T> T pathAtRoot(String json, String path, Class<T> clazz){
        JsonNode rootNode = parseNode(json);
        JsonNode node = rootNode.path(path);
        return (T) fromJson(node, clazz);
    }
     /**
     * 構造泛型的Type如List<MyBean>, 则调用constructParametricType(ArrayList.class,MyBean.class)
     *             Map<String,MyBean>则调用(HashMap.class,String.class, MyBean.class)
     */
     public JavaType constructParametricType(Class<?> parametrized, Class<?>... parameterClasses) {
        return mapper.getTypeFactory().constructParametricType(parametrized, parameterClasses);
     }
    /**
       * 當JSON裡只含有Bean的部分屬性時,更新一個已存在Bean,只覆蓋該部分的屬性.
     */
    @SuppressWarnings("unchecked")
    public <T> T update(T object, String jsonString) {
       try {
              return (T) mapper.readerForUpdating(object).readValue(jsonString);
            } catch (JsonProcessingException e) {
              logger.warn("update json string:" + jsonString + " to object:" + object + " error.", e);
            } catch (IOException e) {
              logger.warn("update json string:" + jsonString + " to object:" + object + " error.", e);
            }
        return null;
    }
    /**
     * 輸出JSONP格式數據.
     */
    public String toJsonP(String functionName, Object object) {
    return toJson(new JSONPObject(functionName, object));
    }
    /**
     * 設定是否使用Enum的toString函數來讀寫Enum,
     * 為False時時使用Enum的name()函數來讀寫Enum, 默認為False.
     * 注意本函數一定要在Mapper創建後, 所有的讀寫動作之前調用.
     */
    public void setEnumUseToString(boolean value) {
          mapper.configure(SerializationConfig.Feature.WRITE_ENUMS_USING_TO_STRING, value);
          mapper.configure(DeserializationConfig.Feature.READ_ENUMS_USING_TO_STRING, value);
    }
    /**
     * 取出Mapper做进一步的设置或使用其他序列化API.
     */
    public ObjectMapper getMapper() {
        return mapper;
    }
    public JsonNode parseNode(String json){
       try {
              return mapper.readValue(json, JsonNode.class);
            } catch (IOException e) {
              throw NestedException.wrap(e);
            }
    }
    /**
     * 输出全部属性
     * @param object
     * @return
     */
    public static String toNormalJson(Object object){
       return new JsonMapper(JsonSerialize.Inclusion.ALWAYS).toJson(object);
    }

    /**
     * 输出非空属性
     * @param object
     * @return
     */
    public static String toNonNullJson(Object object){
       return new JsonMapper(JsonSerialize.Inclusion.NON_NULL).toJson(object);
    }

     /**
      * 输出初始值被改变部分的属性
      * @param object
      * @return
      */
     public static String toNonDefaultJson(Object object){
        return new JsonMapper(JsonSerialize.Inclusion.NON_DEFAULT).toJson(object);
     }

     /**
     * 输出非Null且非Empty(如List.isEmpty)的属性
     * @param object
     * @return
     */
     public static String toNonEmptyJson(Object object){
        return new JsonMapper(JsonSerialize.Inclusion.NON_EMPTY).toJson(object);
     }
     public void setDateFormat(String dateFormat){
         mapper.setDateFormat(new SimpleDateFormat(dateFormat));
     }
     public static String toLogJson(Object object){
        JsonMapper jsonMapper = new JsonMapper(JsonSerialize.Inclusion.NON_EMPTY);
        jsonMapper.setDateFormat("yyyy-MM-dd HH:mm:ss");
        return jsonMapper.toJson(object);
     }

该方法提供了很好的扩展性,也支持复杂类型的特殊情况,包括内嵌的List,或者内嵌的Json对象等都支持。很强大,能让我偷懒~~~~

val news = rdd.map(_._2).map( x => JsonMapper.buildNormalMapper.fromJson(x.trim, classOf[News]))
  val sqlContext = new SQLContext(sc)
  val newsDF = sqlContext.createDataFrame(news,classOf[News])

这样子就很舒服,一个一个的去new,然后去set很多的值,不存在的。这样子你只需要边缘OB一波就行啦。