This commit is contained in:
raiots 2023-05-30 22:10:33 +08:00
parent 91b2eb58f9
commit f80c02e923
2 changed files with 30 additions and 24 deletions

BIN
coffee-ocr.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

54
ocr.py
View File

@ -3,7 +3,8 @@ import pytesseract
import numpy as np import numpy as np
from pytesseract import Output from pytesseract import Output
img_source = cv2.imread('images/coffee.jpg') # img_source = cv2.VideoCapture(0)
img_source = cv2.imread('./coffee-ocr.jpg')
def get_grayscale(image): def get_grayscale(image):
@ -23,26 +24,31 @@ def canny(image):
return cv2.Canny(image, 100, 200) return cv2.Canny(image, 100, 200)
gray = get_grayscale(img_source) def test_performance():
thresh = thresholding(gray) gray = get_grayscale(img_source)
opening = opening(gray) thresh = thresholding(gray)
canny = canny(gray) opening = opening(gray)
canny = canny(gray)
for img in [img_source, gray, thresh, opening, canny]:
d = pytesseract.image_to_data(img, output_type=Output.DICT) for img in [img_source, gray, thresh, opening, canny]:
n_boxes = len(d['text']) d = pytesseract.image_to_data(img, output_type=Output.DICT)
print(d.keys())
# back to RGB
if len(img.shape) == 2: n_boxes = len(d['text'])
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
# back to RGB
for i in range(n_boxes): if len(img.shape) == 2:
if int(d['conf'][i]) > 60: img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
(text, x, y, w, h) = (d['text'][i], d['left'][i], d['top'][i], d['width'][i], d['height'][i])
# don't show empty text for i in range(n_boxes):
if text and text.strip() != "": if int(d['conf'][i]) > 60:
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) (text, x, y, w, h) = (d['text'][i], d['left'][i], d['top'][i], d['width'][i], d['height'][i])
img = cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3) # don't show empty text
if text and text.strip() != "":
cv2.imshow('img', img) img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.waitKey(0) img = cv2.putText(img, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 3)
cv2.imshow('img', img)
cv2.waitKey(0)
print(pytesseract.image_to_data(img_source, output_type=Output.DICT)['text'])