From cf4092bb515c78915d30bb8f89d8a0c14e89fc42 Mon Sep 17 00:00:00 2001
From: Anant Shankhdhar <anantshankhdhar0808@gmail.com>
Date: Sun, 14 Mar 2021 17:20:45 +0530
Subject: [PATCH] keyword search and correction in folder streaming

---
 deep_speech.py    |  2 +-
 keyword_search.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 wav2vec.py        |  4 +--
 3 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 keyword_search.py

diff --git a/deep_speech.py b/deep_speech.py
index 605439f..8a69b17 100644
--- a/deep_speech.py
+++ b/deep_speech.py
@@ -52,7 +52,7 @@ def text_from_recording(self):
 	# path - filepath to the directory containing all the audio files
 	# at the moment we only support .wav extension	
 	def folder_stream(self, path):
-		wav_files = glob.glob(path+'*.wav')
+		wav_files = glob.glob(path+'/*.wav')
 		arr = []
 		for i in wav_files:
 			x = self.text_from_file(i, stream=True)
diff --git a/keyword_search.py b/keyword_search.py
new file mode 100644
index 0000000..4428b3e
--- /dev/null
+++ b/keyword_search.py
@@ -0,0 +1,70 @@
+import gensim.downloader as api
+from audio_gen import user_audio
+from text_gen import text_gen
+
+word_vectors = api.load("glove-wiki-gigaword-100")
+import os
+import glob
+
+
+class keyword_search:
+	def __init__(self,vectors=word_vectors,topn=10):
+		self.vectors = vectors
+		self.topn = topn
+	def from_text(self,text,keyword):
+		res = self.vectors.most_similar(keyword,topn=self.topn)
+		words = []
+		words.append(keyword)
+		for r in res:
+			words.append(r[0])
+		sentence = text.lower().split()
+		#print(words)
+		#print(sentence)
+		check = any(item in sentence for item in words)
+		'''if(check):
+			print("keyword "+keyword+" was detected in the text" )
+		else:
+			print("keyword "+keyword+" was not detected in the text" )'''	
+		return check
+	
+	def from_audio(self,keyword,model=None):
+		recording = text_gen(model)
+		text = recording.text_from_recording()
+		check =  self.from_text(text,keyword)
+		
+		'''if(check):
+			print("keyword "+keyword+" was detected in the audio" )
+		else:
+			print("keyword "+keyword+" was not detected in the audio" )'''
+		return check
+	def from_file(self,filename,keyword,model=None):#returns a dictionary of names and labels from file containing recorded audio
+		recording = text_gen(model)
+		text = recording.text_from_file(filename)
+		check =  self.from_text(text,keyword)
+		#print(text)
+		'''if(check):
+			print("keyword "+keyword+" was detected in the audio" )
+		else:
+			print("keyword "+keyword+" was not detected in the audio" )'''
+		return check
+	def folder_stream(self,keyword,path,model=None):
+		wav_files = glob.glob(path+'/*.wav')
+		print(wav_files)
+		arr = []
+		for i in wav_files:
+		    x = self.from_file(i,keyword,model=model)
+		    if(x):
+		    	arr.append(i)
+
+		with open('results.txt', 'w') as file :
+		    for line in arr:
+		        file.write("".join(line)+' \n')
+		    file.close()
+
+		return arr 
+	
+		
+		
+	
+		
+	
diff --git a/wav2vec.py b/wav2vec.py
index 1bdb0df..5a51df6 100644
--- a/wav2vec.py
+++ b/wav2vec.py
@@ -40,7 +40,7 @@ def text_from_recording(self):
     # path - filepath to the directory containing all the audio files
     # at the moment we only support .wav extension  
     def folder_stream(self, path):
-        wav_files = glob.glob(path+'*.wav')
+        wav_files = glob.glob(path+'/*.wav')
         arr = []
         for i in wav_files:
             x = self.text_from_file(i, stream=True)
@@ -51,4 +51,4 @@ def folder_stream(self, path):
                 file.write("".join(line)+' \n')
             file.close()
 
-        return arr 
\ No newline at end of file
+        return arr