Skip to content

Commit

Permalink
keyword search and correction in folder streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
AnantShankhdhar committed Mar 14, 2021
1 parent d48c300 commit cf4092b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 3 deletions.
2 changes: 1 addition & 1 deletion deep_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def text_from_recording(self):
# path - filepath to the directory containing all the audio files
# at the moment we only support .wav extension
def folder_stream(self, path):
wav_files = glob.glob(path+'*.wav')
wav_files = glob.glob(path+'/*.wav')
arr = []
for i in wav_files:
x = self.text_from_file(i, stream=True)
Expand Down
70 changes: 70 additions & 0 deletions keyword_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import gensim.downloader as api
from audio_gen import user_audio
from text_gen import text_gen

word_vectors = api.load("glove-wiki-gigaword-100")
import os
import glob


class keyword_search:
def __init__(self,vectors=word_vectors,topn=10):
self.vectors = vectors
self.topn = topn
def from_text(self,text,keyword):
res = self.vectors.most_similar(keyword,topn=self.topn)
words = []
words.append(keyword)
for r in res:
words.append(r[0])
sentence = text.lower().split()
#print(words)
#print(sentence)
check = any(item in sentence for item in words)
'''if(check):
print("keyword "+keyword+" was detected in the text" )
else:
print("keyword "+keyword+" was not detected in the text" )'''
return check

def from_audio(self,keyword,model=None):
recording = text_gen(model)
text = recording.text_from_recording()
check = self.from_text(text,keyword)

'''if(check):
print("keyword "+keyword+" was detected in the audio" )
else:
print("keyword "+keyword+" was not detected in the audio" )'''
return check
def from_file(self,filename,keyword,model=None):#returns a dictionary of names and labels from file containing recorded audio
recording = text_gen(model)
text = recording.text_from_file(filename)
check = self.from_text(text,keyword)
#print(text)
'''if(check):
print("keyword "+keyword+" was detected in the audio" )
else:
print("keyword "+keyword+" was not detected in the audio" )'''
return check
def folder_stream(self,keyword,path,model=None):
wav_files = glob.glob(path+'/*.wav')
print(wav_files)
arr = []
for i in wav_files:
x = self.from_file(i,keyword,model=model)
if(x):
arr.append(i)

with open('results.txt', 'w') as file :
for line in arr:
file.write("".join(line)+' \n')
file.close()

return arr






4 changes: 2 additions & 2 deletions wav2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def text_from_recording(self):
# path - filepath to the directory containing all the audio files
# at the moment we only support .wav extension
def folder_stream(self, path):
wav_files = glob.glob(path+'*.wav')
wav_files = glob.glob(path+'/*.wav')
arr = []
for i in wav_files:
x = self.text_from_file(i, stream=True)
Expand All @@ -51,4 +51,4 @@ def folder_stream(self, path):
file.write("".join(line)+' \n')
file.close()

return arr
return arr

0 comments on commit cf4092b

Please sign in to comment.