-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen_more.py
116 lines (101 loc) · 3.87 KB
/
gen_more.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Author: Ankush Gupta
# Date: 2015
"""
Entry-point for generating synthetic text images, as described in:
@InProceedings{Gupta16,
author = "Gupta, A. and Vedaldi, A. and Zisserman, A.",
title = "Synthetic Data for Text Localisation in Natural Images",
booktitle = "IEEE Conference on Computer Vision and Pattern Recognition",
year = "2016",
}
"""
import numpy as np
import h5py
import os, sys, traceback
import os.path as osp
from synthgen import *
from common import *
import wget, tarfile
## Define some configuration variables:
NUM_IMG = -1#-1 #-1 # no. of images to use for generation (-1 to use all available):
INSTANCE_PER_IMAGE = 1 # no. of times to use the same image
SECS_PER_IMG = 5 #max time per image in seconds
# path to the data-file, containing image, depth and segmentation:
DATA_PATH = 'data'
DB_FNAME = osp.join(DATA_PATH,'dset_8000.h5')
# url of the data (google-drive public file):
OUT_FILE = 'results/zangwen/SynthText_8000.h5'
def add_res_to_db(imgname,res,db):
"""
Add the synthetically generated text image instance
and other metadata to the dataset.
"""
ninstance = len(res)
for i in range(ninstance):
dname = "%s_%d"%(imgname, i)
db['data'].create_dataset(dname,data=res[i]['img'])
db['data'][dname].attrs['charBB'] = res[i]['charBB']
db['data'][dname].attrs['wordBB'] = res[i]['wordBB']
#db['data'][dname].attrs['txt'] = res[i]['txt']
# L = res[i]['txt']
# L = [n.encode("ascii", "ignore") for n in L]
# db['data'][dname].attrs['txt'] = L
db['data'][dname].attrs.create('txt', res[i]['txt'], dtype=h5py.special_dtype(vlen=str))
def main(viz=False):
# open databases:
print (colorize(Color.BLUE,'getting data..',bold=True))
db = h5py.File(DB_FNAME,'r')
print (colorize(Color.BLUE,'\t-> done',bold=True))
# open the output h5 file:
out_db = h5py.File(OUT_FILE,'w')
out_db.create_group('/data')
print (colorize(Color.GREEN,'Storing the output in: '+OUT_FILE, bold=True))
# get the names of the image files in the dataset:
imnames = sorted(db['image'].keys())
N = len(imnames)
global NUM_IMG
if NUM_IMG < 0:
NUM_IMG = N
start_idx,end_idx = 0,min(NUM_IMG, N)
RV3 = RendererV3(DATA_PATH,max_time=SECS_PER_IMG)
for i in range(start_idx,end_idx):
imname = imnames[i]
try:
# get the image:
img = Image.fromarray(db['image'][imname][:])
# get the pre-computed depth:
# there are 2 estimates of depth (represented as 2 "channels")
# here we are using the second one (in some cases it might be
# useful to use the other one):
depth = db['depth'][imname][:].T
depth = depth[:,:,1]
# get segmentation:
seg = db['seg'][imname][:].astype('float32')
area = db['seg'][imname].attrs['area']
label = db['seg'][imname].attrs['label']
# re-size uniformly:
sz = depth.shape[:2][::-1]
img = np.array(img.resize(sz,Image.ANTIALIAS))
seg = np.array(Image.fromarray(seg).resize(sz,Image.NEAREST))
print (colorize(Color.RED,'%d of %d'%(i,end_idx-1), bold=True))
res = RV3.render_text(img,depth,seg,area,label,
ninstance=INSTANCE_PER_IMAGE,viz=viz)
if len(res) > 0:
# non-empty : successful in placing text:
add_res_to_db(imname,res,out_db)
# visualize the output:
if viz:
if 'q' in input(colorize(Color.RED,'continue? (enter to continue, q to exit): ',True)):
break
except:
traceback.print_exc()
print (colorize(Color.GREEN,'>>>> CONTINUING....', bold=True))
continue
db.close()
out_db.close()
if __name__=='__main__':
import argparse
parser = argparse.ArgumentParser(description='Genereate Synthetic Scene-Text Images')
parser.add_argument('--viz',action='store_true',dest='viz',default=False,help='flag for turning on visualizations')
args = parser.parse_args()
main(args.viz)