Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	
		Jan Biermeyer
		
	commited on
		
		
					Commit 
							
							·
						
						aa413f7
	
1
								Parent(s):
							
							c8a749d
								
still fixing
Browse files- rag/model_loader.py +13 -4
 
    	
        rag/model_loader.py
    CHANGED
    
    | 
         @@ -180,11 +180,17 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]: 
     | 
|
| 180 | 
         
             
                    cache_dir = os.getenv("HF_HOME") or os.getenv("TRANSFORMERS_CACHE") or "/workspace/.cache/huggingface" if os.getenv("WORKSPACE") else ".cache/huggingface"
         
     | 
| 181 | 
         | 
| 182 | 
         
             
                    # For LoRA models, try loading tokenizer from LoRA directory first, then base model
         
     | 
| 
         | 
|
| 183 | 
         
             
                    tokenizer = None
         
     | 
| 184 | 
         
             
                    if model_path and (model_path / "tokenizer.json").exists():
         
     | 
| 185 | 
         
             
                        try:
         
     | 
| 186 | 
         
             
                            logger.info(f"📝 Loading tokenizer from LoRA directory: {model_path}")
         
     | 
| 187 | 
         
            -
                            tokenizer = AutoTokenizer.from_pretrained( 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 188 | 
         
             
                        except Exception as e:
         
     | 
| 189 | 
         
             
                            logger.warning(f"⚠️  Could not load tokenizer from LoRA dir: {e}, using base model")
         
     | 
| 190 | 
         | 
| 
         @@ -193,7 +199,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]: 
     | 
|
| 193 | 
         
             
                            base_model_name,
         
     | 
| 194 | 
         
             
                            cache_dir=cache_dir,
         
     | 
| 195 | 
         
             
                            padding_side='left',  # Required for decoder-only models
         
     | 
| 196 | 
         
            -
                            trust_remote_code=True
         
     | 
| 
         | 
|
| 197 | 
         
             
                        )
         
     | 
| 198 | 
         | 
| 199 | 
         
             
                    if tokenizer.pad_token is None:
         
     | 
| 
         @@ -297,7 +304,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]: 
     | 
|
| 297 | 
         
             
                        base_model_name,
         
     | 
| 298 | 
         
             
                        cache_dir=cache_dir,
         
     | 
| 299 | 
         
             
                        padding_side='left',
         
     | 
| 300 | 
         
            -
                        trust_remote_code=True
         
     | 
| 
         | 
|
| 301 | 
         
             
                    )
         
     | 
| 302 | 
         | 
| 303 | 
         
             
                    if tokenizer.pad_token is None:
         
     | 
| 
         @@ -380,7 +388,8 @@ def load_enhanced_model_m2max() -> Tuple[AutoModelForCausalLM, AutoTokenizer]: 
     | 
|
| 380 | 
         
             
                            base_model_name,
         
     | 
| 381 | 
         
             
                            cache_dir=cache_dir,
         
     | 
| 382 | 
         
             
                            padding_side='left',
         
     | 
| 383 | 
         
            -
                            trust_remote_code=True
         
     | 
| 
         | 
|
| 384 | 
         
             
                        )
         
     | 
| 385 | 
         | 
| 386 | 
         
             
                        if tokenizer.pad_token is None:
         
     | 
| 
         | 
|
| 180 | 
         
             
                    cache_dir = os.getenv("HF_HOME") or os.getenv("TRANSFORMERS_CACHE") or "/workspace/.cache/huggingface" if os.getenv("WORKSPACE") else ".cache/huggingface"
         
     | 
| 181 | 
         | 
| 182 | 
         
             
                    # For LoRA models, try loading tokenizer from LoRA directory first, then base model
         
     | 
| 183 | 
         
            +
                    # Use slow tokenizer (use_fast=False) which requires sentencepiece for Llama/Mistral models
         
     | 
| 184 | 
         
             
                    tokenizer = None
         
     | 
| 185 | 
         
             
                    if model_path and (model_path / "tokenizer.json").exists():
         
     | 
| 186 | 
         
             
                        try:
         
     | 
| 187 | 
         
             
                            logger.info(f"📝 Loading tokenizer from LoRA directory: {model_path}")
         
     | 
| 188 | 
         
            +
                            tokenizer = AutoTokenizer.from_pretrained(
         
     | 
| 189 | 
         
            +
                                str(model_path), 
         
     | 
| 190 | 
         
            +
                                cache_dir=cache_dir, 
         
     | 
| 191 | 
         
            +
                                trust_remote_code=True,
         
     | 
| 192 | 
         
            +
                                use_fast=False  # Use slow tokenizer with sentencepiece
         
     | 
| 193 | 
         
            +
                            )
         
     | 
| 194 | 
         
             
                        except Exception as e:
         
     | 
| 195 | 
         
             
                            logger.warning(f"⚠️  Could not load tokenizer from LoRA dir: {e}, using base model")
         
     | 
| 196 | 
         | 
| 
         | 
|
| 199 | 
         
             
                            base_model_name,
         
     | 
| 200 | 
         
             
                            cache_dir=cache_dir,
         
     | 
| 201 | 
         
             
                            padding_side='left',  # Required for decoder-only models
         
     | 
| 202 | 
         
            +
                            trust_remote_code=True,
         
     | 
| 203 | 
         
            +
                            use_fast=False  # Use slow tokenizer with sentencepiece
         
     | 
| 204 | 
         
             
                        )
         
     | 
| 205 | 
         | 
| 206 | 
         
             
                    if tokenizer.pad_token is None:
         
     | 
| 
         | 
|
| 304 | 
         
             
                        base_model_name,
         
     | 
| 305 | 
         
             
                        cache_dir=cache_dir,
         
     | 
| 306 | 
         
             
                        padding_side='left',
         
     | 
| 307 | 
         
            +
                        trust_remote_code=True,
         
     | 
| 308 | 
         
            +
                        use_fast=False  # Use slow tokenizer with sentencepiece
         
     | 
| 309 | 
         
             
                    )
         
     | 
| 310 | 
         | 
| 311 | 
         
             
                    if tokenizer.pad_token is None:
         
     | 
| 
         | 
|
| 388 | 
         
             
                            base_model_name,
         
     | 
| 389 | 
         
             
                            cache_dir=cache_dir,
         
     | 
| 390 | 
         
             
                            padding_side='left',
         
     | 
| 391 | 
         
            +
                            trust_remote_code=True,
         
     | 
| 392 | 
         
            +
                            use_fast=False  # Use slow tokenizer with sentencepiece
         
     | 
| 393 | 
         
             
                        )
         
     | 
| 394 | 
         | 
| 395 | 
         
             
                        if tokenizer.pad_token is None:
         
     |