Python2未知编码字符串转为utf-8编码
Python的编码问题总是非常烦人,一不小心就出来乱码,本函数采用的方法为:分别用try判断是否能解码成功,如果能成功表明为相应编码,解码成功后再编码为utf-8,代码如下:
# -*- coding: utf-8 -*-
def to_utf8(data):
try:
# print("Trying decode with utf-8 ...")
data_de = data.decode("utf-8")
# print("Use utf-8 decode success")
# print("Trying encode with utf-8 ...")
# data = data_de.encode("utf-8")
# print("Use utf-8 encode success\n")
return data
except:
# print("Use utf-8 decode failed\n")
pass
try:
# print("Trying decode with ascii ...")
data_de = data.decode("ascii")
# print("Use ascii decode success")
# print("Trying encode with utf-8 ...")
data = data_de.encode("utf-8")
# print("Use utf-8 encode success\n")
return data
except:
# print("Use ascii decode failed\n")
pass
try:
# print("Trying decode with gbk ...")
data_de = data.decode("gbk")
# print("Use gbk decode success")
# print("Trying encode with utf-8 ...")
data = data_de.encode("utf-8")
# print("Use utf-8 encode success\n")
return data
except:
# print("Use gbk decode failed\n")
pass
try:
# print("Trying decode with utf-16-le ...")
data_de = data.decode("utf-16-le")
# print("Use utf-16-le decode success")
# print("Trying encode with utf-8 ...")
data = data_de.encode("utf-8")
# print("Use utf-8 encode success\n")
return data
except:
# print("Use utf-16-le decode failed\n")
pass
# 如果以上几种都不能解码成功,返回传过来的字符串
return data