八邻域算法:8邻域就是判断周围8个像素点。如果这8个点中255的个数大于某个阈值则判断这个点为噪音,阈值可以根据实际情况修改。
from PIL import Image
import tesserocr
def book_clear(image, threshold):
image = image.convert("L")
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
img = image.point(table, "1")
img.save("img1.png")
# img.show()
result = tesserocr.image_to_text(img)
print('灰度二值化之后:' + result)
return img
def depoint(img2):
"""传入二值化后的图片进行降噪"""
pixdata = img2.load()
w, h = img2.size
print(w, h)
for y in range(1, h - 1):
for x in range(1, w - 1):
count = 0
if pixdata[x, y - 1] > 245:
count = count + 1
if pixdata[x, y + 1] > 245:
count = count + 1
if pixdata[x - 1, y] > 245:
count = count + 1
if pixdata[x + 1, y] > 245:
count = count + 1
if pixdata[x - 1, y - 1] > 245:
count = count + 1
if pixdata[x - 1, y + 1] > 245:
count = count + 1
if pixdata[x + 1, y - 1] > 245:
count = count + 1
if pixdata[x + 1, y + 1] > 245:
count = count + 1
if count > 6: # 控制领域判定大小
pixdata[x, y] = 255
img2.save("img2.png")
# img.show()
result = tesserocr.image_to_text(img2)
print('八领域降噪之后:' + result)
return img
img = Image.open("check.png")
book_clear(img,60)#灰度化+二值化
img2 = Image.open("img1.png")
eight_img = depoint(img2)八领域降噪效果还是不错,但是发现tesserocr还是太垃圾,没有了噪点识别率还是很低。后面再继续深入看看分割、旋转识别,以及深度学习识别。
















