Jan Biermeyer commited on
Commit
aa413f7
·
1 Parent(s): c8a749d

still fixing

Browse files
Files changed (1) hide show
  1. rag/model_loader.py +13 -4
rag/model_loader.py CHANGED
@@ -180,11 +180,17 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
180
  cache_dir = os.getenv("HF_HOME") or os.getenv("TRANSFORMERS_CACHE") or "/workspace/.cache/huggingface" if os.getenv("WORKSPACE") else ".cache/huggingface"
181
 
182
  # For LoRA models, try loading tokenizer from LoRA directory first, then base model
 
183
  tokenizer = None
184
  if model_path and (model_path / "tokenizer.json").exists():
185
  try:
186
  logger.info(f"📝 Loading tokenizer from LoRA directory: {model_path}")
187
- tokenizer = AutoTokenizer.from_pretrained(str(model_path), cache_dir=cache_dir, trust_remote_code=True)
 
 
 
 
 
188
  except Exception as e:
189
  logger.warning(f"⚠️ Could not load tokenizer from LoRA dir: {e}, using base model")
190
 
@@ -193,7 +199,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
193
  base_model_name,
194
  cache_dir=cache_dir,
195
  padding_side='left', # Required for decoder-only models
196
- trust_remote_code=True
 
197
  )
198
 
199
  if tokenizer.pad_token is None:
@@ -297,7 +304,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
297
  base_model_name,
298
  cache_dir=cache_dir,
299
  padding_side='left',
300
- trust_remote_code=True
 
301
  )
302
 
303
  if tokenizer.pad_token is None:
@@ -380,7 +388,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
380
  base_model_name,
381
  cache_dir=cache_dir,
382
  padding_side='left',
383
- trust_remote_code=True
 
384
  )
385
 
386
  if tokenizer.pad_token is None:
 
180
  cache_dir = os.getenv("HF_HOME") or os.getenv("TRANSFORMERS_CACHE") or "/workspace/.cache/huggingface" if os.getenv("WORKSPACE") else ".cache/huggingface"
181
 
182
  # For LoRA models, try loading tokenizer from LoRA directory first, then base model
183
+ # Use slow tokenizer (use_fast=False) which requires sentencepiece for Llama/Mistral models
184
  tokenizer = None
185
  if model_path and (model_path / "tokenizer.json").exists():
186
  try:
187
  logger.info(f"📝 Loading tokenizer from LoRA directory: {model_path}")
188
+ tokenizer = AutoTokenizer.from_pretrained(
189
+ str(model_path),
190
+ cache_dir=cache_dir,
191
+ trust_remote_code=True,
192
+ use_fast=False # Use slow tokenizer with sentencepiece
193
+ )
194
  except Exception as e:
195
  logger.warning(f"⚠️ Could not load tokenizer from LoRA dir: {e}, using base model")
196
 
 
199
  base_model_name,
200
  cache_dir=cache_dir,
201
  padding_side='left', # Required for decoder-only models
202
+ trust_remote_code=True,
203
+ use_fast=False # Use slow tokenizer with sentencepiece
204
  )
205
 
206
  if tokenizer.pad_token is None:
 
304
  base_model_name,
305
  cache_dir=cache_dir,
306
  padding_side='left',
307
+ trust_remote_code=True,
308
+ use_fast=False # Use slow tokenizer with sentencepiece
309
  )
310
 
311
  if tokenizer.pad_token is None:
 
388
  base_model_name,
389
  cache_dir=cache_dir,
390
  padding_side='left',
391
+ trust_remote_code=True,
392
+ use_fast=False # Use slow tokenizer with sentencepiece
393
  )
394
 
395
  if tokenizer.pad_token is None: