diff --git a/whisper/decoding.py b/whisper/decoding.py index 49485d009..5aa7f9df2 100644 --- a/whisper/decoding.py +++ b/whisper/decoding.py @@ -514,7 +514,7 @@ class DecodingTask: def __init__(self, model: "Whisper", options: DecodingOptions): self.model = model - language = options.language or "en" + language = options.language or "default" tokenizer = get_tokenizer( model.is_multilingual, num_languages=model.num_languages, diff --git a/whisper/tokenizer.py b/whisper/tokenizer.py index 2af837570..501b7a945 100644 --- a/whisper/tokenizer.py +++ b/whisper/tokenizer.py @@ -381,7 +381,7 @@ def get_tokenizer( if multilingual: encoding_name = "multilingual" - language = language or "en" + language = language or "default" task = task or "transcribe" else: encoding_name = "gpt2"