Upload processor

Files changed (5) hide show

README.md CHANGED Viewed

@@ -2,9 +2,9 @@
 language:
 - ar
 license: apache-2.0
-base_model: tarteel-ai/whisper-base-ar-quran
 tags:
 - generated_from_trainer
 datasets:
 - zolfa
 metrics:
@@ -13,16 +13,16 @@ model-index:
 - name: Whisper-raghadomar
   results:
   - task:
-      name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
       name: Zolfa Dataset
       type: zolfa
       args: 'config: ar, split: test'
     metrics:
-    - name: Wer
-      type: wer
       value: 6.896551724137931
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

 language:
 - ar
 license: apache-2.0
 tags:
 - generated_from_trainer
+base_model: tarteel-ai/whisper-base-ar-quran
 datasets:
 - zolfa
 metrics:
 - name: Whisper-raghadomar
   results:
   - task:
       type: automatic-speech-recognition
+      name: Automatic Speech Recognition
     dataset:
       name: Zolfa Dataset
       type: zolfa
       args: 'config: ar, split: test'
     metrics:
+    - type: wer
       value: 6.896551724137931
+      name: Wer
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

added_tokens.json CHANGED Viewed

@@ -17,7 +17,6 @@
   "<|da|>": 50285,
   "<|de|>": 50261,
   "<|el|>": 50281,
-  "<|endoftext|>": 50257,
   "<|en|>": 50259,
   "<|es|>": 50262,
   "<|et|>": 50307,
@@ -30,6 +29,7 @@
   "<|gu|>": 50333,
   "<|haw|>": 50352,
   "<|ha|>": 50354,
   "<|hi|>": 50276,
   "<|hr|>": 50291,
   "<|ht|>": 50339,
@@ -38,7 +38,6 @@
   "<|id|>": 50275,
   "<|is|>": 50311,
   "<|it|>": 50274,
-  "<|iw|>": 50279,
   "<|ja|>": 50266,
   "<|jw|>": 50356,
   "<|ka|>": 50329,

   "<|da|>": 50285,
   "<|de|>": 50261,
   "<|el|>": 50281,
   "<|en|>": 50259,
   "<|es|>": 50262,
   "<|et|>": 50307,
   "<|gu|>": 50333,
   "<|haw|>": 50352,
   "<|ha|>": 50354,
+  "<|he|>": 50279,
   "<|hi|>": 50276,
   "<|hr|>": 50291,
   "<|ht|>": 50339,
   "<|id|>": 50275,
   "<|is|>": 50311,
   "<|it|>": 50274,
   "<|ja|>": 50266,
   "<|jw|>": 50356,
   "<|ka|>": 50329,

special_tokens_map.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "<|hi|>",
     "<|fi|>",
     "<|vi|>",
-    "<|iw|>",
     "<|uk|>",
     "<|el|>",
     "<|ms|>",
@@ -130,7 +130,7 @@
     "single_word": false
   },
   "unk_token": {
-    "content": "",
     "lstrip": false,
     "normalized": true,
     "rstrip": false,

     "<|hi|>",
     "<|fi|>",
     "<|vi|>",
+    "<|he|>",
     "<|uk|>",
     "<|el|>",
     "<|ms|>",
     "single_word": false
   },
   "unk_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": true,
     "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -179,7 +179,7 @@
       "special": true
     },
     "50279": {
-      "content": "<|iw|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -882,7 +882,7 @@
     "<|hi|>",
     "<|fi|>",
     "<|vi|>",
-    "<|iw|>",
     "<|uk|>",
     "<|el|>",
     "<|ms|>",
@@ -972,10 +972,10 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "errors": "replace",
-  "model_max_length": 448,
   "pad_token": "<|endoftext|>",
   "processor_class": "WhisperProcessor",
   "return_attention_mask": false,
   "tokenizer_class": "WhisperTokenizer",
-  "unk_token": ""
 }

       "special": true
     },
     "50279": {
+      "content": "<|he|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
     "<|hi|>",
     "<|fi|>",
     "<|vi|>",
+    "<|he|>",
     "<|uk|>",
     "<|el|>",
     "<|ms|>",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "errors": "replace",
+  "model_max_length": 1024,
   "pad_token": "<|endoftext|>",
   "processor_class": "WhisperProcessor",
   "return_attention_mask": false,
   "tokenizer_class": "WhisperTokenizer",
+  "unk_token": "<|endoftext|>"
 }

vocab.json CHANGED Viewed

@@ -314,6 +314,7 @@
   ";;": 35746,
   "<": 27,
   "</": 3433,
   "=": 28,
   "=\"": 13114,
   "=\"#": 34106,

   ";;": 35746,
   "<": 27,
   "</": 3433,
+  "<|endoftext|>": 50257,
   "=": 28,
   "=\"": 13114,
   "=\"#": 34106,