1.如果不知道文本的编码可以使用chardet库测试

# 测试文本编码
import chardet

filename = "./all_data.txt"
with open(filename, "rb") as f: 
    data = f.read() 
    encodeing_type = chardet.detect(data) 
    print(encodeing_type)

2.转码,途中gb2312报错,百度后尝试范围更大的gb18030

# 将文本转为utf-8编码
import codecs 
import chardet
# 输入输出文件
filename_in = "all_data.txt" 
filename_out = "all_data_utf8.txt"
# 输入输出编码
encode_in = "gb18030" # gb2312会报错,可能范围小了
encode_out = "utf-8" 
# 进行转码
with codecs.open(filename=filename_in, mode='r', encoding=encode_in) as fi: 
    data = fi.read() 
    with open(filename_out, mode='w', encoding=encode_out) as fo: 
        fo.write(data) 
# 测试转码是否成功 
with open(filename_out, "rb") as f: 
    data = f.read() 
    encodeing_type = chardet.detect(data) 
    print(encodeing_type)