keyword search and correction in folder streaming

AchintyaX · Mar 14, 2021 · cf4092b · cf4092b
1 parent d48c300
commit cf4092b
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 3 deletions.
diff --git a/deep_speech.py b/deep_speech.py
@@ -52,7 +52,7 @@ def text_from_recording(self):
 	# path - filepath to the directory containing all the audio files
 	# at the moment we only support .wav extension	
 	def folder_stream(self, path):
-		wav_files = glob.glob(path+'*.wav')
+		wav_files = glob.glob(path+'/*.wav')
 		arr = []
 		for i in wav_files:
 			x = self.text_from_file(i, stream=True)

diff --git a/keyword_search.py b/keyword_search.py
@@ -0,0 +1,70 @@
+import gensim.downloader as api
+from audio_gen import user_audio
+from text_gen import text_gen
+
+word_vectors = api.load("glove-wiki-gigaword-100")
+import os
+import glob
+
+
+class keyword_search:
+	def __init__(self,vectors=word_vectors,topn=10):
+		self.vectors = vectors
+		self.topn = topn
+	def from_text(self,text,keyword):
+		res = self.vectors.most_similar(keyword,topn=self.topn)
+		words = []
+		words.append(keyword)
+		for r in res:
+			words.append(r[0])
+		sentence = text.lower().split()
+		#print(words)
+		#print(sentence)
+		check = any(item in sentence for item in words)
+		'''if(check):
+			print("keyword "+keyword+" was detected in the text" )
+		else:
+			print("keyword "+keyword+" was not detected in the text" )'''	
+		return check
+
+	def from_audio(self,keyword,model=None):
+		recording = text_gen(model)
+		text = recording.text_from_recording()
+		check =  self.from_text(text,keyword)
+
+		'''if(check):
+			print("keyword "+keyword+" was detected in the audio" )
+		else:
+			print("keyword "+keyword+" was not detected in the audio" )'''
+		return check
+	def from_file(self,filename,keyword,model=None):#returns a dictionary of names and labels from file containing recorded audio
+		recording = text_gen(model)
+		text = recording.text_from_file(filename)
+		check =  self.from_text(text,keyword)
+		#print(text)
+		'''if(check):
+			print("keyword "+keyword+" was detected in the audio" )
+		else:
+			print("keyword "+keyword+" was not detected in the audio" )'''
+		return check
+	def folder_stream(self,keyword,path,model=None):
+		wav_files = glob.glob(path+'/*.wav')
+		print(wav_files)
+		arr = []
+		for i in wav_files:
+		    x = self.from_file(i,keyword,model=model)
+		    if(x):
+		    	arr.append(i)
+
+		with open('results.txt', 'w') as file :
+		    for line in arr:
+		        file.write("".join(line)+' \n')
+		    file.close()
+
+		return arr 
+
+
+
+
+
+
diff --git a/wav2vec.py b/wav2vec.py
@@ -40,7 +40,7 @@ def text_from_recording(self):
     # path - filepath to the directory containing all the audio files
     # at the moment we only support .wav extension  
     def folder_stream(self, path):
-        wav_files = glob.glob(path+'*.wav')
+        wav_files = glob.glob(path+'/*.wav')
         arr = []
         for i in wav_files:
             x = self.text_from_file(i, stream=True)
@@ -51,4 +51,4 @@ def folder_stream(self, path):
                 file.write("".join(line)+' \n')
             file.close()
 
-        return arr 
+        return arr