From cf4092bb515c78915d30bb8f89d8a0c14e89fc42 Mon Sep 17 00:00:00 2001 From: Anant Shankhdhar Date: Sun, 14 Mar 2021 17:20:45 +0530 Subject: [PATCH] keyword search and correction in folder streaming --- deep_speech.py | 2 +- keyword_search.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++ wav2vec.py | 4 +-- 3 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 keyword_search.py diff --git a/deep_speech.py b/deep_speech.py index 605439f..8a69b17 100644 --- a/deep_speech.py +++ b/deep_speech.py @@ -52,7 +52,7 @@ def text_from_recording(self): # path - filepath to the directory containing all the audio files # at the moment we only support .wav extension def folder_stream(self, path): - wav_files = glob.glob(path+'*.wav') + wav_files = glob.glob(path+'/*.wav') arr = [] for i in wav_files: x = self.text_from_file(i, stream=True) diff --git a/keyword_search.py b/keyword_search.py new file mode 100644 index 0000000..4428b3e --- /dev/null +++ b/keyword_search.py @@ -0,0 +1,70 @@ +import gensim.downloader as api +from audio_gen import user_audio +from text_gen import text_gen + +word_vectors = api.load("glove-wiki-gigaword-100") +import os +import glob + + +class keyword_search: + def __init__(self,vectors=word_vectors,topn=10): + self.vectors = vectors + self.topn = topn + def from_text(self,text,keyword): + res = self.vectors.most_similar(keyword,topn=self.topn) + words = [] + words.append(keyword) + for r in res: + words.append(r[0]) + sentence = text.lower().split() + #print(words) + #print(sentence) + check = any(item in sentence for item in words) + '''if(check): + print("keyword "+keyword+" was detected in the text" ) + else: + print("keyword "+keyword+" was not detected in the text" )''' + return check + + def from_audio(self,keyword,model=None): + recording = text_gen(model) + text = recording.text_from_recording() + check = self.from_text(text,keyword) + + '''if(check): + print("keyword "+keyword+" was detected in the audio" ) + else: + print("keyword "+keyword+" was not detected in the audio" )''' + return check + def from_file(self,filename,keyword,model=None):#returns a dictionary of names and labels from file containing recorded audio + recording = text_gen(model) + text = recording.text_from_file(filename) + check = self.from_text(text,keyword) + #print(text) + '''if(check): + print("keyword "+keyword+" was detected in the audio" ) + else: + print("keyword "+keyword+" was not detected in the audio" )''' + return check + def folder_stream(self,keyword,path,model=None): + wav_files = glob.glob(path+'/*.wav') + print(wav_files) + arr = [] + for i in wav_files: + x = self.from_file(i,keyword,model=model) + if(x): + arr.append(i) + + with open('results.txt', 'w') as file : + for line in arr: + file.write("".join(line)+' \n') + file.close() + + return arr + + + + + + diff --git a/wav2vec.py b/wav2vec.py index 1bdb0df..5a51df6 100644 --- a/wav2vec.py +++ b/wav2vec.py @@ -40,7 +40,7 @@ def text_from_recording(self): # path - filepath to the directory containing all the audio files # at the moment we only support .wav extension def folder_stream(self, path): - wav_files = glob.glob(path+'*.wav') + wav_files = glob.glob(path+'/*.wav') arr = [] for i in wav_files: x = self.text_from_file(i, stream=True) @@ -51,4 +51,4 @@ def folder_stream(self, path): file.write("".join(line)+' \n') file.close() - return arr \ No newline at end of file + return arr