八邻域算法:8邻域就是判断周围8个像素点。如果这8个点中255的个数大于某个阈值则判断这个点为噪音,阈值可以根据实际情况修改。

from PIL import Image
import tesserocr


def book_clear(image, threshold):
    image = image.convert("L")
    table = []
    for i in range(256):
        if i < threshold:
            table.append(0)
        else:
            table.append(1)
    img = image.point(table, "1")
    img.save("img1.png")
    # img.show()
    result = tesserocr.image_to_text(img)
    print('灰度二值化之后:' + result)
    return img


def depoint(img2):
    """传入二值化后的图片进行降噪"""
    pixdata = img2.load()
    w, h = img2.size
    print(w, h)
    for y in range(1, h - 1):
        for x in range(1, w - 1):
            count = 0
            if pixdata[x, y - 1] > 245:
                count = count + 1
            if pixdata[x, y + 1] > 245:
                count = count + 1
            if pixdata[x - 1, y] > 245:
                count = count + 1
            if pixdata[x + 1, y] > 245:
                count = count + 1
            if pixdata[x - 1, y - 1] > 245:
                count = count + 1
            if pixdata[x - 1, y + 1] > 245:
                count = count + 1
            if pixdata[x + 1, y - 1] > 245:
                count = count + 1
            if pixdata[x + 1, y + 1] > 245:
                count = count + 1
            if count > 6:  # 控制领域判定大小
                pixdata[x, y] = 255
    img2.save("img2.png")
    # img.show()
    result = tesserocr.image_to_text(img2)
    print('八领域降噪之后:' + result)
    return img


img = Image.open("check.png")
book_clear(img,60)#灰度化+二值化
img2 = Image.open("img1.png")
eight_img = depoint(img2)

八领域降噪效果还是不错,但是发现tesserocr还是太垃圾,没有了噪点识别率还是很低。后面再继续深入看看分割、旋转识别,以及深度学习识别。