Skip to content

Commit

Permalink
create ocr-data
Browse files Browse the repository at this point in the history
  • Loading branch information
Christine-DK committed Jun 13, 2023
1 parent 60e3167 commit 746520f
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 0 deletions.
Empty file added ocr-data/DDI-100.ipynb
Empty file.
49 changes: 49 additions & 0 deletions ocr-data/scripts/examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import cv2
from generator import Generator
from utils import draw_word_boxes, combine_masks


def test_doc():
gen = Generator("../data/pdf_dataset")
img, mask, data = gen.get_doc()
cv2.imshow("image", cv2.resize(img, (0, 0), fx=0.2, fy=0.2))
cv2.imshow("mask", cv2.resize(mask, (0, 0), fx=0.2, fy=0.2))
draw_word_boxes(img, data, word_color=0)
cv2.imshow("image with boxes", cv2.resize(img, (0, 0), fx=0.2, fy=0.2))
cv2.waitKey()


def test_str():
gen = Generator("../data/pdf_dataset")
img, data, delimeters = gen.get_string()
cv2.imshow("image", cv2.resize(img, (0, 0), fx=2, fy=2))
print(data)
for delim in delimeters:
cv2.line(img, (delim, 0), (delim, 32), color=0, thickness=2)
cv2.imshow("image with delims", cv2.resize(img, (0, 0), fx=2, fy=2))
cv2.waitKey()


def test_char():
gen = Generator("../data/pdf_dataset")
img, data = gen.get_char()
cv2.imshow("image", cv2.resize(img, (0, 0), fx=2, fy=2))
print(data)
cv2.waitKey()


def test_mask():
gen = Generator("../data/pdf_dataset")
_, mask1, _ = gen.get_doc()
_, mask2, _ = gen.get_doc()
mask2 = cv2.resize(mask2, (mask1.shape[1], mask1.shape[0]))
img = combine_masks(mask1, mask2)
cv2.imshow("image", cv2.resize(img, (0, 0), fx=.2, fy=.2))
cv2.waitKey()


if __name__ == "__main__":
test_doc()
test_str()
test_char()
test_mask()
77 changes: 77 additions & 0 deletions ocr-data/scripts/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import cv2
import numpy as np
import pickle

from glob import glob
from pathlib import Path
from random import choice

from .utils import get_image_from_box


class Generator:
def __init__(self, pickle_path=None, dataset_path=None, book_paths=None):
"""
Class Generator implements simple random sample choice.
One of the arguments must be specified.
:param pickle_path: str - path to pickle file with image paths
:param dataset_path: str - path to full dataset
:param book_paths: str - paths to book directories.
"""
if dataset_path is None and book_paths is None and pickle_path is None:
raise ValueError("At least one argument must be specified")
if dataset_path is not None:
book_paths = glob(f"{dataset_path}/*")
if book_paths is not None:
self.paths = []
for book in book_paths:
self.paths += glob(f"{book}/gen_imgs/*")
if pickle_path is not None:
with open(pickle_path, "rb") as f:
paths = pickle.load(f)
self.paths = paths

def get_doc(self):
"""
Returns random document sample from dataset.
:return: (img, masks, data)
"""
path = choice(self.paths)
img_path = Path(choice(self.paths))
boxes_path = img_path.parent.parent.joinpath('gen_boxes').joinpath(img_path.stem + ".pickle")
mask_paths = sorted(img_path.parent.parent.joinpath('gen_masks').glob(img_path.stem + '_*'))
with open(boxes_path, "rb") as f:
data = pickle.load(f)
img = cv2.imread(str(img_path), 0)
masks = []
for mask_path in mask_paths:
masks.append(cv2.imread(str(mask_path), 0))
return img, masks, data

def get_string(self):
"""
Returns sample with random single word string from dataset.
:return: (img, str, list) - img of word, string representation, list of char x axis delimiters
"""
img, mask, data = self.get_doc()
word = choice(data)
cut_img, delimiters = get_image_from_box(img, word)
shift = np.min(word['box'], axis=0)
for char in word['chars']:
char['box'] -= shift
return cut_img, word['text'], delimiters

def get_char(self):
"""
Returns sample with random single char string from dataset.
:return: (img, str) - img with letter and letter
"""
img, _, boxes = self.get_doc()
word = choice(boxes)
char = choice(word['chars'])
cut_img = get_image_from_box(img, char['box'])
return cut_img, char['text']
68 changes: 68 additions & 0 deletions ocr-data/scripts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import cv2
import numpy as np


def draw_word_boxes(img, word_box_list, word_color=(255, 0, 0), letter_color=None, fill=False):
"""
Draws boxes on the image. Inplace
:param img: np.array - image
:param word_box_list: list - boxes
:param word_color: tuple or int or None - boundary color fow words
:param letter_color: tuple or list or None - boundary color for letters
:param fill: bool - whether to fill the boxes with color or just draw a quadrilateral
"""
thickness = -1 if fill else 2
for word in word_box_list:
if word_color is not None:
cv2.polylines(img, [word['box'][[0, 1, 3, 2], ::-1].reshape((-1, 1, 2))], True, word_color,
thickness=thickness)

if letter_color is not None:
for char in word['chars']:
cv2.polylines(img, [char['box'][[0, 1, 3, 2], ::-1].reshape((-1, 1, 2))], True, letter_color,
thickness=thickness)


def get_image_from_box(image, data, height=32):
"""
Cuts image with bounding box using perspective Transform
:param image: numpy.ndarray: image
:param data: dict: corresponding word data box
:param height: int: height of the result image
:return: (np.ndarray, list): cut image, list of char x axis delimiters
"""
box = data['box']
scale = np.sqrt((box[0, 1] - box[1, 1])**2 + (box[0, 0] - box[1, 0])**2) / height
w = int(np.sqrt((box[1, 1] - box[2, 1])**2 + (box[1, 0] - box[2, 0])**2) / scale)
pts1 = np.float32(box)[:, ::-1]
pts1 = pts1[[1, 0, 3, 2]]
pts2 = np.float32([[0, 0], [height, 0], [0, w], [height, w]])[:, ::-1]
M = cv2.getPerspectiveTransform(pts1, pts2)
result_img = cv2.warpPerspective(image, M, (w, height))

begin = box[0][1]
dist = (box[2][1] - begin)
delimiters = []
for (char, next_char) in zip(data['chars'], data['chars'][1:]):
left = (char['box'][3][1] - begin) / dist * w
right = (next_char['box'][0][1] - begin) / dist * w
delimiters.append(int((left + right) / 2))
return result_img, delimiters


def combine_masks(true_mask, predicted_mask):
"""
Combines true and predicted masks into one image for convenient comparison.
:param true_mask: 2D np.ndarray - gray image with true mask
:param predicted_mask: 2D np.ndarray - gray image with predicted mask
:return: 3D np.ndarray - colored image with both masks
"""
if true_mask.shape != predicted_mask.shape:
raise ValueError("Shapes do not match")
if true_mask.ndim != 2:
raise ValueError("Masks should be greyscaled")

img = np.zeros(true_mask.shape + (3,))
img[:, :, 2] = 255 - predicted_mask
img[:, :, 1] = 255 - true_mask
return img
30 changes: 30 additions & 0 deletions ocr-data/scripts/visualization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import cv2
import random
import pickle

from glob import glob
from pathlib import Path
from utils import draw_word_boxes
from generator import Generator


def random_show(gen):
img, mask, data = gen.get_doc()
mask = mask.copy()
cv2.imshow("image", cv2.resize(img, (0, 0), fx=0.2, fy=0.2))
cv2.imshow("mask", cv2.resize(mask, (0, 0), fx=0.2, fy=0.2))

draw_word_boxes(img, data, word_color=0)
cv2.imshow("image with boxes", cv2.resize(img, (0, 0), fx=0.2, fy=0.2))
cv2.waitKey(1000)


def show_dataset(dataset_path):
gen = Generator(dataset_path)
while True:
random_show(gen)


if __name__ == "__main__":
dataset_path = "../data/pdf_dataset"
show_dataset(dataset_path)

0 comments on commit 746520f

Please sign in to comment.