OrionVision/utils/ocr.py

import cv2
import pytesseract
import numpy as np
from pytesseract import Output

# img_source = cv2.VideoCapture(0)


def get_grayscale(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)


def thresholding(image):
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]


def opening(image):
    kernel = np.ones((5, 5), np.uint8)
    return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)


def canny(image):
    return cv2.Canny(image, 100, 200)


def test_performance():
    img_source = cv2.imread('./img.png')
    gray = get_grayscale(img_source)
    thresh = thresholding(gray)
    opening = opening(gray)
    canny = canny(gray)

    for img in [img_source, gray, thresh, opening, canny]:
        d = pytesseract.image_to_data(img, output_type=Output.DICT)
        print(d.keys())

        n_boxes = len(d['text'])

        # back to RGB
        if len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

        for i in range(n_boxes):
            if int(d['conf'][i]) > 60:
                (text, x, y, w, h) = (d['text'][i], d['left'][i], d['top'][i], d['width'][i], d['height'][i])
                # don't show empty text
                if text and text.strip() != "":
                    img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    img = cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)

        cv2.imshow('img', img)
        cv2.waitKey(0)

# print(pytesseract.image_to_data(img_source, output_type=Output.DICT)['text'])

def identy_char(img_path=None, img_source=None):
    '''
    @description: 识别单个字符
    @param {img_path} 图片路径
    @param {img_source} 图片源
    @return {ident_char} 识别到的字符
    '''

    ident_char = None
    if img_path != None and img_source.any() == None:
        source_img = cv2.imread(img_path)

    elif img_path == None and img_source.any() != None:
        source_img = img_source

    else:
        raise ValueError('img_path and img_source cannot be None OR Exist at the same time')

    d =  pytesseract.image_to_data(source_img, output_type=Output.DICT, config='--psm 10')  # --psm 10: single char
    print(d['text'])
    for text in d['text']:
        # print(text)
        if len(text) == 1:
            ident_char = text
            break
    if ident_char:
        return ident_char
    else:
        return None

if __name__ == "__main__":
    # identy_char('./img.png')
    source_img = cv2.imread('./img.png')
    print(identy_char(img_source=source_img))
    # print(identy_char('./coffee-ocr.jpg'))