sooh098
/

kanana-ko-rag

@@ -1,176 +0,0 @@
-import os
-import json
-import torch
-from tqdm import tqdm
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-from peft import PeftModel
-from transformers import AutoModel, AutoModelForSequenceClassification
-import chromadb
-# ======== 사용자 설정 ======== #
-base_model = "K-intelligence/Midm-2.0-Base-Instruct"
-lora_ckpt = "/home/sooh5090/axolotl/output/midm-finetuneb/checkpoint-1845"
-fp16_model_path = "/home/sooh5090/axolotl/output/spell-merged-fp16"
-test_file = "../data/json/korean_language_rag_V1.0_test.json"
-output_file = "../output/test_predictions.json"
-max_new_tokens = 512
-# ======== GPU 디바이스 분리 ======== #
-device_llm = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")  # LLM
-device_rag = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")  # 임베딩+리랭커
-# ======== 1. LoRA 병합 ======== #
-print("🔄 LoRA 병합 중...")
-tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
-base = AutoModelForCausalLM.from_pretrained(
-    base_model,
-    device_map={"": device_llm},
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    trust_remote_code=True
-)
-model = PeftModel.from_pretrained(base, lora_ckpt)
-model = model.merge_and_unload()
-os.makedirs(fp16_model_path, exist_ok=True)
-model.save_pretrained(fp16_model_path)
-tokenizer.save_pretrained(fp16_model_path)
-print(f"✅ FP16 모델 저장 완료: {fp16_model_path}")
-# ======== 2. 4bit 로드 ======== #
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.float16
-)
-model = AutoModelForCausalLM.from_pretrained(
-    fp16_model_path,
-    quantization_config=bnb_config,
-    device_map={"": device_llm},
-    trust_remote_code=True
-)
-model.eval()
-print("✅ 4bit 모델 로드 완료 (GPU 3번)")
-# ======== 3. RAG 검색기/리랭커 설정 (GPU 2번) ======== #
-embed_model_id = "dragonkue/snowflake-arctic-embed-l-v2.0-ko"
-embed_tokenizer = AutoTokenizer.from_pretrained(embed_model_id)
-embed_model = AutoModel.from_pretrained(embed_model_id).to(device_rag).eval()
-reranker_id = "dragonkue/bge-reranker-v2-m3-ko"
-reranker_tokenizer = AutoTokenizer.from_pretrained(reranker_id)
-reranker_model = AutoModelForSequenceClassification.from_pretrained(reranker_id).to(device_rag).eval()
-client = chromadb.PersistentClient(path="../grammar_db")
-collection = client.get_collection(name="korean_grammar_rules", embedding_function=None)
-# ======== 4. 임베딩/리랭킹 함수 ======== #
-def embed_query(text, chunk_size=512):
-    tokens = embed_tokenizer(text, add_special_tokens=False)["input_ids"]
-    chunks = [tokens[i:i + chunk_size] for i in range(0, len(tokens), chunk_size)]
-    embeddings = []
-    for chunk in chunks:
-        inputs = torch.tensor([embed_tokenizer.build_inputs_with_special_tokens(chunk)]).to(device_rag)
-        with torch.no_grad():
-            output = embed_model(input_ids=inputs).last_hidden_state
-            valid_token_count = (inputs != embed_tokenizer.pad_token_id).sum(dim=1, keepdim=True)
-            chunk_emb = output.sum(dim=1) / valid_token_count
-        embeddings.append(chunk_emb.cpu())
-    return torch.stack(embeddings).mean(dim=0).squeeze(0).tolist()
-def rerank(query, docs):
-    pairs = [(query, doc) for doc in docs]
-    inputs = reranker_tokenizer(pairs, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device_rag)
-    with torch.no_grad():
-        scores = reranker_model(**inputs).logits.squeeze(-1)
-    ranked = sorted(zip(docs, scores.tolist()), key=lambda x: x[1], reverse=True)
-    return ranked[0][0]
-def retrieve_context(query_text, top_k=3):
-    query_vec = embed_query(query_text)
-    results = collection.query(query_embeddings=[query_vec], n_results=top_k)
-    docs = results["documents"][0]
-    metas = results["metadatas"][0]
-    best_doc = rerank(query_text, docs)
-    best_idx = docs.index(best_doc)
-    title = metas[best_idx]["title"]
-    return f"[{title}]\n{best_doc.strip()}"
-# ======== 5. Instruction 템플릿 ======== #
-INSTRUCTION_TEMPLATES = {
-    "교정형": """당신은 한국어 어문 규범(맞춤법, 띄어쓰기, 표준어, 문장부호, 외래어 표기법 등)에 따라 문장을 교정하고 그 이유를 설명하는 AI입니다.
-[문제 유형: 교정형]
-- 주어진 문장이 어문 규범에 맞는지 판단하십시오.
-- 틀린 경우 올바른 형태로 고친 뒤, “~가 옳다. {이유}” 형식으로 답하십시오.
-- 문제 문장은 다시 출력하지 마십시오.""",
-    "선택형": """당신은 한국어 어문 규범(맞춤법, 띄어쓰기, 표준어, 문장부호, 외래어 표기법 등)에 따라 문장에서 올바른 표현을 선택하고 그 이유를 설명하는 AI입니다.
-[문제 유형: 선택형]
-- 주어진 보기 중에서 올바른 표현을 선택하십시오.
-- 정답은 “~가 옳다. {이유}” 형식으로 작성하십시오.
-- 문제 문장은 ��시 출력하지 마십시오."""
-}
-# ======== 6. 테스트 데이터 로드 ======== #
-with open(test_file, "r", encoding="utf-8") as f:
-    test_data = json.load(f)
-# ======== 7. 예측 ======== #
-predictions = []
-for sample in tqdm(test_data, desc="🔍 Test 예측 중"):
-    q_type = sample.get("input", {}).get("question_type")
-    question = sample.get("input", {}).get("question", "").strip()
-    # RAG 검색 (GPU 2번)
-    retrieved = retrieve_context(question)
-    # 프롬프트 구성
-    instruction = INSTRUCTION_TEMPLATES.get(q_type, INSTRUCTION_TEMPLATES["교정형"])
-    input_text = f"[참고 규범]\n{retrieved}\n\n질문: {question}\n답변:"
-    prompt = (
-        "<|begin_of_text|>\n"
-        f"<|start_header_id|>system<|end_header_id|>\n{instruction}\n"
-        "<|eot_id|>\n"
-        f"<|start_header_id|>user<|end_header_id|>\n{input_text}\n"
-        "<|eot_id|>\n"
-        "<|start_header_id|>assistant<|end_header_id|>\n"
-    )
-    # LLM (GPU 3번)
-    inputs = tokenizer(prompt, return_tensors="pt").to(device_llm)
-    inputs.pop("token_type_ids", None)
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            do_sample=False,
-            temperature=0.01,
-            eos_token_id=tokenizer.eos_token_id,
-            pad_token_id=tokenizer.pad_token_id
-        )
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
-    if "<|start_header_id|>assistant<|end_header_id|>\n" in decoded:
-        prediction = decoded.split("<|start_header_id|>assistant<|end_header_id|>\n")[-1].split("<|end_of_text|>")[0].strip()
-    else:
-        prediction = decoded.strip()
-    print("\n=============================")
-    print(f"📝 질문: {question}")
-    print(f"📚 검색 컨텍스트:\n{retrieved}")
-    print(f"🤖 모델 답변: {prediction}")
-    print("=============================\n")
-    predictions.append({
-        "id": sample.get("id", ""),
-        "input": sample.get("input", {}),
-        "output": {"answer": prediction}
-    })
-# ======== 8. 결과 저장 ======== #
-os.makedirs(os.path.dirname(output_file), exist_ok=True)
-with open(output_file, "w", encoding="utf-8") as f:
-    json.dump(predictions, f, ensure_ascii=False, indent=2)
-print(f"\n📄 테스트 결과 저장 완료: {output_file}")