JPG有损压缩知识点整理(待完善)
查阅资料:jpeg的压缩算法包括以下5个步骤:
- 图像分割成8*8的小块,压缩过程是每个小块单独处理的
- 颜色空间转换RGB->YCbCr
- 离散余弦变换DCT
- 数据量化
- Huffman编码
代码已经复现,但是从步骤2开始就已经不是模型可以推理的格式了,步骤2-5是在遍历步骤1中每个8*8的小块中进行的,输出的结果是str类型,所以不能推理。
python代码实现
import cv2
import hashlib
import numpy as np
import matplotlib.pyplot as plt
import base64
'''
jpeg压缩函数
data:要压缩的灰度图像数据流
quality_scale控制压缩质量(1-99),默认为50,值越小图像约清晰
return:得到压缩后的图像数据,为FFD9开头的jpeg格式字符串
'''
def compress(img_data,quality_scale=50):
#获取图像数据流宽高
h,w=img_data.shape
#标准亮度量化表
Qy=np.array([[16,11,10,16,24,40,51,61],
[12,12,14,19,26,58,60,55],
[14,13,16,24,40,57,69,56],
[14,17,22,29,51,87,80,62],
[18,22,37,56,68,109,103,77],
[24,35,55,64,81,104,113,92],
[49,64,78,87,103,121,120,101],
[72,92,95,98,112,100,103,99]],dtype=np.uint8)
#根据压缩质量重新计算量化表
if quality_scale<=0:
quality_scale=1
elif quality_scale>=100:
quality_scale=99
for i in range(64):
tmp=int((Qy[int(i/8)][i%8]*quality_scale+50)/100)
if tmp<=0:
tmp=1
elif tmp>255:
tmp=255
Qy[int(i/8)][i%8]=tmp
#Z字型
ZigZag =[
0, 1, 5, 6,14,15,27,28,
2, 4, 7,13,16,26,29,42,
3, 8,12,17,25,30,41,43,
9,11,18,24,31,40,44,53,
10,19,23,32,39,45,52,54,
20,22,33,38,46,51,55,60,
21,34,37,47,50,56,59,61,
35,36,48,49,57,58,62,63]
#DC哈夫曼编码表
standard_dc_nrcodes=[0, 0, 7, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
standard_dc_values=[4, 5, 3, 2, 6, 1, 0, 7, 8, 9, 10, 11]
pos_in_table = 0;
code_value = 0;
dc_huffman_table=[0]*16
for i in range(1,9):
for j in range(1,standard_dc_nrcodes[i-1]+1):
dc_huffman_table[standard_dc_values[pos_in_table]]=bin(code_value)[2:].rjust(i,'0')
# ac_huffman_table[standard_ac_values[pos_in_table]].length=k
pos_in_table+=1
code_value+=1
code_value <<=1
#AC哈夫曼编码表
standard_ac_nrcodes=[0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d]
standard_ac_values=[0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
0xf9, 0xfa]
pos_in_table = 0;
code_value = 0;
ac_huffman_table=[0]*256
for i in range(1,17):
for j in range(1,standard_ac_nrcodes[i-1]+1):
ac_huffman_table[standard_ac_values[pos_in_table]]=bin(code_value)[2:].rjust(i,'0')
pos_in_table+=1
code_value+=1
code_value <<=1
#转成float类型
img_data=img_data.astype(np.float64)
#存储最后的哈夫曼编码
result=''
#分成8*8的块
for i in range(h//8):
for j in range(w//8):
block=img_data[i*8:(i+1)*8,j*8:(j+1)*8]
block=cv2.dct(block)
#数据量化
block[:]=np.round(block/Qy)
#把量化后的二维矩阵转成一维数组
arr=[0]*64
notnull_num=0
for k in range(64):
tmp=int(block[int(k/8)][k%8])
arr[ZigZag[k]]=tmp;
#统计arr数组中有多少个非0元素
if tmp!=0:
notnull_num+=1
#RLE编码
#标识连续0的个数
time=0
for k in range(64):
if arr[k]==0 and time<15:
time+=1
else:
#BIT编码
#处理括号中第二个数
data=arr[k]
data2=bin(np.abs(data))[2:]
data1=len(data2)
if data<0:
data2=bin(np.abs(data)^(2**data1-1))[2:].rjust(data1,'0')
if data==0:
data1=0
if arr[k]==0:
data2=''
#哈夫曼编码,序列化
#直流
if k==0:
result+=dc_huffman_table[time*16+data1]
else:
result+=ac_huffman_table[time*16+data1]
result+=data2
time=0
#判断是否要添加EOB
if int(arr[k])!=0:
notnull_num-=1
#AC系数没有非空
if notnull_num==0 and k<63:
#添加EOB
result+='1010'
break
#补足为8的整数倍,以便编码成16进制数据
if len(result)%8!=0:
result=result.ljust(len(result)+8-len(result)%8,'0')
result=hex(int(result,2))[2:]
res=''
#添加jpeg文件头
#SOI(文件头),共89个字节
res+='FFD8'
#APP0图像识别信息
res+='FFE000104A46494600010100000100010000'
#DQT定义量化表
res+='FFDB004300'
#64字节的量化表
for i in range(64):
res+=hex(Qy[int(i/8)][i%8])[2:].rjust(2,'0')
#SOF0图像基本信息,13个字节
res+='FFC0000B08'
res+=hex(h)[2:].rjust(4,'0')
res+=hex(w)[2:].rjust(4,'0')
res+='01012200'
#DHT定义huffman表,33个字节+183
res+='FFC4001F0000010501010101010100000000000000'
for i in standard_dc_values:
res+=hex(i)[2:].rjust(2,'0')
res+='FFC400B5100002010303020403050504040000017D'
for i in standard_ac_values:
res+=hex(i)[2:].rjust(2,'0')
#SOS扫描行开始,10个字节
res+='FFDA0008010100003F00'
#压缩的图像数据(一个个扫描行),数据存放顺序是从左到右、从上到下
res+=result
#EOI文件尾0
res+='FFD9'
return res, result
'''
jpeg解压缩
img:解压缩的jpeg灰度图像文件
return:返回解压缩后的图像原数据,为多维数组形式
'''
def decompress(img):
#jpeg解码的所有参数都是从编码后的jpeg文件中读取的
with open(img,'rb') as f:
img_data=f.read()
res=''
for i in img_data:
res+=hex(i)[2:].rjust(2,'0').upper()
ZigZag =[
0, 1, 5, 6,14,15,27,28,
2, 4, 7,13,16,26,29,42,
3, 8,12,17,25,30,41,43,
9,11,18,24,31,40,44,53,
10,19,23,32,39,45,52,54,
20,22,33,38,46,51,55,60,
21,34,37,47,50,56,59,61,
35,36,48,49,57,58,62,63]
#获取亮度量化表
Qy=np.zeros((8,8))
for i in range(64):
Qy[int(i/8)][i%8]=int(res[50+i*2:52+i*2],16)
#获取SOF0图像基本信息,图像的宽高
h=int(res[188:192],16)
w=int(res[192:196],16)
#获取DHT定义huffman表
standard_dc_values=res[246:270]
standard_dc_nrcodes=[0, 0, 7, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]
standard_ac_values=res[312:636]
standard_ac_nrcodes=[0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d]
#生成dc哈夫曼表
pos_in_table = 0;
code_value = 0;
reverse_dc_huffman_table={}
for i in range(1,9):
for j in range(1,standard_dc_nrcodes[i-1]+1):
reverse_dc_huffman_table[bin(code_value)[2:].rjust(i,'0')]=standard_dc_values[pos_in_table*2:pos_in_table*2+2]
pos_in_table+=1
code_value+=1
code_value <<=1
#生成ac哈夫曼表
pos_in_table = 0;
code_value = 0;
reverse_ac_huffman_table={}
for i in range(1,17):
for j in range(1,standard_ac_nrcodes[i-1]+1):
reverse_ac_huffman_table[bin(code_value)[2:].rjust(i,'0')]=standard_ac_values[pos_in_table*2:pos_in_table*2+2]
pos_in_table+=1
code_value+=1
code_value <<=1
#获取压缩的图像数据
result=res[656:-4]
#得到哈夫曼编码后的01字符串
result=bin(int(result,16))[2:]
img_data=np.zeros((h,w))
pos=0
for j in range(h//8):
for k in range(w//8):
#逆dc哈夫曼编码
#正向最大匹配
arr=[0]
#计算EOB块中0的个数
num=0
for i in range(8,2,-1):
tmp=reverse_dc_huffman_table.get(result[pos:pos+i])
#匹配成功
if(tmp):
time=0
data1=int(tmp[1],16)
pos+=i
data2=result[pos:pos+data1]
data=int(data2,2)
arr[0]=data
pos+=data1
num+=1
break
#逆ac哈夫曼编码
while(num<64):
#AC系数编码长度是从16bits到2bits
for i in range(16,1,-1):
tmp=reverse_ac_huffman_table.get(result[pos:pos+i])
if(tmp):
pos+=i
if(tmp=='00'):
arr+=([0]*(64-num))
num=64
break
time=int(tmp[0],16)
data1=int(tmp[1],16)
data2=result[pos:pos+data1]
pos+=data1
#data2为空,赋值为0,应对(15,0)这种情况
data2=data2 if data2 else '0'
if data2[0]=='0':
#负数
data=-int(data2,2)^(2**data1-1)
else:
data=int(data2,2)
num+=time+1
#time个0
arr+=([0]*time)
#非零值或最后一个单元0
arr.append(data)
break
#逆ZigZag扫描,得到block量化块
block=np.zeros((8,8))
for i in range(64):
block[int(i/8)][i%8]=arr[ZigZag[i]]
#逆量化
block=block*Qy
#逆DCT变换
block=cv2.idct(block)
img_data[j*8:(j+1)*8,k*8:(k+1)*8]=block
return img_data
def main():
#原始图像路径,灰度图像
img_path='./1.bmp'
#读取原始图像,cv2.imread()默认是用color模式读取的,保持原样读取要加上第二个参数-1,即CV_LOAD_IMAGE_GRAYSCALE
#得到图像原数据流
img_data=cv2.imread(img_path,0)
print(img_data.shape)
# 得到压缩后图像数据
img_compress, data_result=compress(img_data,50)
print(type(data_result))
# #存储压缩后的图像
# img_compress_path='./img_compress.jpg'
# with open(img_compress_path,'wb') as f:
# f.write(base64.b16decode(img_compress.upper()))
# #jpeg图像解压缩测试
# img_decompress=decompress(img_compress_path)
# #结果展示
# # plt.rcParams['font.sans-serif'] = ['SimHei'] # 中文乱码
# #子图1,原始图像
# plt.subplot(121)
# #imshow()对图像进行处理,画出图像,show()进行图像显示
# plt.imshow(img_data,cmap=plt.cm.gray)
# plt.title('原始图像')
# #不显示坐标轴
# plt.axis('off')
# #子图2,jpeg压缩后解码图像
# plt.subplot(122)
# plt.imshow(img_decompress,cmap=plt.cm.gray)
# plt.title('jpeg图像')
# plt.axis('off')
# plt.show()
if __name__ == '__main__':
main()