-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocessing.m
149 lines (110 loc) · 4.11 KB
/
preprocessing.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
% PREPROCESSING FOR OCR
% AUTHOR: ALEKSANDAR VUCENOVIC, 01635282
% Preprocessing steps for the OCR are done in this file. Preprocessing
% steps include image upscaling,for a better letter and digit quality,
% image straightening, for the template matching not having to rotate the
% templates around the image, boundingboxes and finally the image
% segmentation, to isolate each letter and digit for the template matching.
% return: a cell array containing all the blobs found in the image
% usage: call preprocessing to get a cellarray containg all image blobs,
% which are potential characters
function patches = preprocessing(img)
% convert to binary image
[x, y, z] = size(img);
% upscale and apply adaptive threshold
img = imresize(img, 3);
if z == 3
img = rgb2gray(im2double(img));
end
img = imbinarize(img,'adaptive','ForegroundPolarity','dark','Sensitivity',0.45);
% straighten image
angle = calcRotationAngle(img);
img = imrotate(img, -angle, 'bicubic');
img = 1-imclearborder(1 - img);
% dilate and fill
edgeImg = edge(img, 'prewitt');
structElement = strel('square',2);
edgeImgDilate = imdilate(edgeImg, structElement);
filledImg= imfill(edgeImgDilate,'holes');
% use regionprops to get bounding boxes of objects
box = regionprops(logical(filledImg), 'BoundingBox', 'Centroid');
% delete lines to get characters only
box_corrected = deleteLines(box);
% slice boxes which include two characters
box_sliced = sliceBoxes(box_corrected);
if length(box_sliced) > 1
% get centroids of characters
centroidsXY = vertcat(box_sliced.Centroid);
% sort the indices column-wise
% source:
% https://stackoverflow.com/questions/43076798/how-to-control-the-order-of-detected-objects-by-regionprops-in-matlab
[~, ~, centroidsXY(:, 2)] = histcounts(centroidsXY(:, 2), 3);
[~, sortIndex] = sortrows(centroidsXY, [2 1]);
box_sliced = box_sliced(sortIndex);
end
% segment the regions by cropping image using bounding box rectangle
% coordinates, save the first three letters of a label first,
% then the 3 digit code, then the author
patches = [];
for k = 1:length(box_sliced)
subImage = imcrop(img, box_sliced(k).BoundingBox);
patches = [patches, struct("image",imresize(subImage, [42,24]))];
end
end
function angle = calcRotationAngle(image)
% calculates the angle to be rotated at in a range of -45 to 45 degrees
% usage: calculate the angle and rotate the image using 'bicubic' method
% author: aleksandar vucenovic, 01635282
% precondition
if ~ismatrix(image)
error('The image must be binarized!')
end
% angle calculation using hough
% edge detection using prewitt
BW = edge(image,'prewitt');
% perform the hough transform.
[H, T, ~] = hough(BW,'Theta',-90:0.1:89.9);
% find the dominant lines, by calculating variance at angles, folding
% image, return column to angle
data = var(H);
data = data(1:900) + data(end-900+1:end);
[~, column] = max(data);
angle = -T(column);
angle = mod(45 + angle,90) - 45;
end
function box_sliced = sliceBoxes(box_corrected)
% author: anand eichner & aleksandar vucenovic
% slice boxes which include two characters
% the characters are monospaced, and the width should always be smaller
% than the height
box_sliced = [];
for i = 1:length(box_corrected)
b = box_corrected(i);
coord = b.BoundingBox;
if coord(3) > coord(4)
wide = coord(3) / 2;
x = coord(1);
b1.BoundingBox = [coord(1), coord(2), wide, coord(4)];
b2.BoundingBox = [coord(1) + wide, coord(2), wide, coord(4)];
b1.Centroid = b.Centroid;
b1.Centroid(1) = coord(1) + wide / 2;
b2.Centroid = b.Centroid;
b2.Centroid(1) = coord(1) + wide / 2 * 3;
box_sliced = [box_sliced; b1 ; b2];
else
box_sliced = [box_sliced; b];
end
end
end
function box_corrected = deleteLines(box)
% author: aleksandar vucenovic
% delete lines from label to only have boxes and centroids around chars
box_corrected = [];
for i = 1:length(box)
b = box(i);
coord = b.BoundingBox(3:4);
if coord(1) < (coord(2) * 10)
box_corrected = [box_corrected; b];
end
end
end