From fdd1b6f1a98913c489076c836c25640f4fa29dad Mon Sep 17 00:00:00 2001 From: chihangc Date: Thu, 21 Nov 2024 14:47:08 +0800 Subject: [PATCH] fix: add support for leading zero in transcription when spoken by user - Ensures that leading zero is included in the transcription when explicitly voiced. --- whisper/normalizers/english.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/whisper/normalizers/english.py b/whisper/normalizers/english.py index 4932042bc..1d5777958 100644 --- a/whisper/normalizers/english.py +++ b/whisper/normalizers/english.py @@ -208,6 +208,10 @@ def output(result: Union[str, int]): prefix = current[0] if has_prefix else prefix if f.denominator == 1: value = f.numerator # store integers as int + # count the number of leading zeros and add back all leading zeros if they were removed + leading_zeros = len(current_without_prefix) - len(current_without_prefix.lstrip('0')) + if leading_zeros > 0 and value != 0: + value = "0" * leading_zeros + str(value) else: value = current_without_prefix elif current not in self.words: