ko-freshqa-leaderboard / src /submission_tracker.py
jisubae
fix: HF login error and future warning
cd13f52
raw
history blame
12.6 kB
"""
์‚ฌ์šฉ์ž ์ œ์ถœ ์ถ”์  ๋ชจ๋“ˆ
๋กœ๊ทธ์ธํ•œ ์‚ฌ์šฉ์ž์˜ user_id๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•˜๋ฃจ 3๋ฒˆ ์ œํ•œ ๊ธฐ๋Šฅ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
์ œ์ถœ ์ •๋ณด๋Š” ๋ณ„๋„์˜ HuggingFace repository์—์„œ ๊ด€๋ฆฌ๋ฉ๋‹ˆ๋‹ค.
"""
import os
import json
import pandas as pd
import tempfile
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from huggingface_hub import hf_hub_download, login, HfApi
import pytz
from src.utils import file_lock, get_current_date_str, get_current_datetime_str
# ํ•œ๊ตญ ์‹œ๊ฐ„๋Œ€ ์„ค์ •
KOREA_TZ = pytz.timezone('Asia/Seoul')
class SubmissionTracker:
"""์‚ฌ์šฉ์ž ์ œ์ถœ ์ถ”์  ํด๋ž˜์Šค - HuggingFace Repository ๊ธฐ๋ฐ˜"""
def __init__(
self,
filename: str = "user_submissions.json",
):
"""
Args:
filename: ์ œ์ถœ ๊ธฐ๋ก ํŒŒ์ผ๋ช…
"""
# ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ ์„ค์ • ๊ฐ€์ ธ์˜ค๊ธฐ
self.repo_id = os.getenv("SUBMISSION_TRACKER_REPO_ID")
self.admin_token = os.getenv("HF_TOKEN")
self.filename = filename
if not self.repo_id:
raise ValueError(
"SUBMISSION_TRACKER_REPO_ID ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. "
)
if not self.admin_token:
raise ValueError(
"HuggingFace Admin ํ† ํฐ์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. "
"HF_TOKEN ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜์„ธ์š”."
)
# HuggingFace API ์ดˆ๊ธฐํ™”
self.api = HfApi()
try:
# ๊ด€๋ฆฌ์ž ํ† ํฐ์œผ๋กœ ๋กœ๊ทธ์ธ (dataset read/write ์šฉ)
login(token=self.admin_token)
except Exception as e:
print(f"โŒ HuggingFace ๋กœ๊ทธ์ธ ์‹คํŒจ: {e}")
raise
# ์ œ์ถœ ๊ธฐ๋ก ๋กœ๋“œ
self.submissions: Dict = self.load_submissions()
def load_submissions(self) -> Dict:
"""HuggingFace repository์—์„œ ์ œ์ถœ ๊ธฐ๋ก ๋กœ๋“œ"""
try:
# ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ์— ํŒŒ์ผ ๋‹ค์šด๋กœ๋“œ
with tempfile.TemporaryDirectory() as temp_dir:
file_path = hf_hub_download(
repo_id=self.repo_id,
filename=self.filename,
local_dir=temp_dir,
repo_type="dataset",
token=self.admin_token,
)
# JSON ํŒŒ์ผ ๋กœ๋“œ
with open(file_path, "r", encoding="utf-8") as f:
submissions = json.load(f)
return submissions
except Exception as e:
print(f"โš ๏ธ ์ œ์ถœ ๊ธฐ๋ก ๋กœ๋“œ ์‹คํŒจ (์ƒˆ๋กœ ์‹œ์ž‘): {e}")
return {}
def get_today_submissions(self, user_id: str) -> List[Dict]:
"""์˜ค๋Š˜ ์‚ฌ์šฉ์ž์˜ ์ œ์ถœ ๊ธฐ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ"""
if not user_id:
return []
today = get_current_date_str()
user_submissions = self.submissions.get(user_id, {})
return user_submissions.get(today, [])
def can_submit(
self,
user_id: str,
submissions_data: Optional[Dict] = None,
) -> Tuple[bool, str, int]:
"""
์‚ฌ์šฉ์ž๊ฐ€ ์ œ์ถœํ•  ์ˆ˜ ์žˆ๋Š”์ง€ ํ™•์ธ.
Args:
user_id: ๋กœ๊ทธ์ธํ•œ ์‚ฌ์šฉ์ž์˜ ๊ณ ์œ  ID (HF ๊ณ„์ • ID ๋“ฑ)
submissions_data: ๊ฒ€์‚ฌ์— ์‚ฌ์šฉํ•  ์ œ์ถœ ๋ฐ์ดํ„ฐ(ํ…Œ์ŠคํŠธ/๋ฝ ๋‚ด๋ถ€ ์žฌ๊ฒ€์‚ฌ์šฉ).
None์ด๋ฉด self.submissions ์‚ฌ์šฉ.
"""
if not user_id:
raise ValueError("โŒ HuggingFace ๋กœ๊ทธ์ธ ์ƒํƒœ์—์„œ๋งŒ ์ œ์ถœ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. ๋กœ๊ทธ์ธ ์ •๋ณด๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
data = submissions_data if submissions_data is not None else self.submissions
today = get_current_date_str()
today_submissions = data.get(user_id, {}).get(today, [])
successful_count = len([s for s in today_submissions if s.get("success", False)])
if successful_count >= 3:
raise Exception("โŒ ์˜ค๋Š˜ ์ œ์ถœ ํ•œ๋„๋ฅผ ์ดˆ๊ณผํ–ˆ์Šต๋‹ˆ๋‹ค. ๋‚ด์ผ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”.")
remaining = 3 - successful_count
return True, f"โœ… ์ œ์ถœ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. (์˜ค๋Š˜ {successful_count}/3ํšŒ ์‚ฌ์šฉ, {remaining}ํšŒ ๋‚จ์Œ)", remaining
def record_submission(
self,
user_id: str,
submitter_name: str,
file_name: str,
success: bool,
error_message: Optional[str] = None,
submit_model: Optional[str] = None,
submit_description: Optional[str] = None,
) -> bool:
"""
์ œ์ถœ ๊ธฐ๋ก ์ถ”๊ฐ€ (ํŒŒ์ผ ์ž ๊ธˆ์œผ๋กœ ๋ณดํ˜ธ)
Args:
user_id: ๋กœ๊ทธ์ธํ•œ ์‚ฌ์šฉ์ž์˜ ๊ณ ์œ  ID (HF ๊ณ„์ • ID ๋“ฑ)
"""
if not user_id:
raise ValueError("โŒ HuggingFace ๋กœ๊ทธ์ธ ์ƒํƒœ์—์„œ๋งŒ ์ œ์ถœ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. ๋กœ๊ทธ์ธ ์ •๋ณด๋ฅผ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
# ์ž ๊ธˆ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
lock_file_path = tempfile.gettempdir() + f'/{self.repo_id.replace("/", "_")}.lock'
# ํŒŒ์ผ ์ž ๊ธˆ์œผ๋กœ ์ „์ฒด ๊ณผ์ •์„ atomicํ•˜๊ฒŒ ๋ณดํ˜ธ
with file_lock(lock_file_path):
try:
# ์ตœ์‹  ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค์‹œ ๋กœ๋“œ (๋‹ค๋ฅธ ํ”„๋กœ์„ธ์Šค์—์„œ ์—…๋ฐ์ดํŠธํ–ˆ์„ ์ˆ˜ ์žˆ์Œ)
latest_submissions = self.load_submissions()
# Lock ๋‚ด๋ถ€์—์„œ ์ตœ์‹  ๋ฐ์ดํ„ฐ ๊ธฐ์ค€์œผ๋กœ ์ œ์ถœ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ์žฌํ™•์ธ
try:
self.can_submit(
user_id=user_id,
submissions_data=latest_submissions,
)
except Exception as e:
# ์ œ์ถœ ์ œํ•œ ์ดˆ๊ณผ ์‹œ
print(f"์ œ์ถœ ์ œํ•œ ์ดˆ๊ณผ: {e}")
# ๋ฉ”๋ชจ๋ฆฌ๋งŒ ์ตœ์‹ ์œผ๋กœ ๋งž์ถ”๊ณ  ์ €์žฅํ•˜์ง€ ์•Š์Œ
self.submissions = latest_submissions
return False
# ์ƒˆ๋กœ์šด ์ œ์ถœ ๊ธฐ๋ก ์ถ”๊ฐ€
current_datetime = get_current_datetime_str()
if user_id not in latest_submissions:
latest_submissions[user_id] = {}
today = get_current_date_str()
if today not in latest_submissions[user_id]:
latest_submissions[user_id][today] = []
submission_record = {
"timestamp": current_datetime,
"submitter_name": submitter_name,
"file_name": file_name,
"success": success,
"error_message": error_message,
"submit_model": submit_model,
"submit_description": submit_description,
}
latest_submissions[user_id][today].append(submission_record)
# ๋ฉ”๋ชจ๋ฆฌ ์—…๋ฐ์ดํŠธ
self.submissions = latest_submissions
# ์ €์žฅ
return self._save_submissions_internal(latest_submissions)
except Exception as e:
print(f"โŒ ์ œ์ถœ ๊ธฐ๋ก ์ถ”๊ฐ€ ์‹คํŒจ: {e}")
return False
def _save_submissions_internal(self, submissions_data: Dict) -> bool:
"""๋‚ด๋ถ€ ์ €์žฅ ํ•จ์ˆ˜ (lock์€ ์ด๋ฏธ ํš๋“๋œ ์ƒํƒœ)"""
try:
# ์ž„์‹œ ํŒŒ์ผ์— JSON ๋ฐ์ดํ„ฐ ์ €์žฅ
with tempfile.NamedTemporaryFile(
mode="w",
encoding="utf-8",
suffix=".json",
delete=False,
) as temp_file:
json.dump(submissions_data, temp_file, ensure_ascii=False, indent=2)
temp_file_path = temp_file.name
# HuggingFace repository์— ํŒŒ์ผ ์—…๋กœ๋“œ
self.api.upload_file(
path_or_fileobj=temp_file_path,
path_in_repo=self.filename,
repo_id=self.repo_id,
repo_type="dataset",
token=self.admin_token,
commit_message=(
"Update submission records - "
f"{datetime.now(KOREA_TZ).strftime('%Y-%m-%d %H:%M:%S')}"
),
)
# ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
os.unlink(temp_file_path)
return True
except Exception as e:
print(f"โŒ ์ œ์ถœ ๊ธฐ๋ก ์ €์žฅ ์‹คํŒจ: {e}")
return False
def get_user_submission_history(self, user_id: str, days: int = 7) -> Dict:
"""์‚ฌ์šฉ์ž์˜ ์ตœ๊ทผ ์ œ์ถœ ๊ธฐ๋ก ๊ฐ€์ ธ์˜ค๊ธฐ"""
if not user_id or user_id not in self.submissions:
return {}
user_submissions = self.submissions[user_id]
today = datetime.now(KOREA_TZ).date()
history: Dict[str, List[Dict]] = {}
for i in range(days):
check_date = today - pd.Timedelta(days=i)
date_str = check_date.strftime("%Y-%m-%d")
if date_str in user_submissions:
history[date_str] = user_submissions[date_str]
return history
def get_submission_stats(self, user_id: str) -> Dict:
"""์‚ฌ์šฉ์ž ์ œ์ถœ ํ†ต๊ณ„ ๊ฐ€์ ธ์˜ค๊ธฐ"""
if not user_id:
return {}
today_submissions = self.get_today_submissions(user_id)
successful_today_count = len([s for s in today_submissions if s.get("success", False)])
history = self.get_user_submission_history(user_id, 7)
# ํ†ต๊ณ„ ๊ณ„์‚ฐ
total_submissions = sum(len(day_submissions) for day_submissions in history.values())
successful_submissions = sum(
len([s for s in day_submissions if s.get("success", False)])
for day_submissions in history.values()
)
failed_submissions = total_submissions - successful_submissions
return {
"today_count": len(today_submissions),
"today_remaining": max(0, 3 - successful_today_count),
"week_total": total_submissions,
"week_successful": successful_submissions,
"week_failed": failed_submissions,
"history": history,
}
def cleanup_old_records(self, days_to_keep: int = 30) -> int:
"""์˜ค๋ž˜๋œ ์ œ์ถœ ๊ธฐ๋ก ์ •๋ฆฌ (ํŒŒ์ผ ์ž ๊ธˆ ์‚ฌ์šฉ)"""
# ์ž ๊ธˆ ํŒŒ์ผ ๊ฒฝ๋กœ ์ƒ์„ฑ
lock_file_path = tempfile.gettempdir() + f'/{self.repo_id.replace("/", "_")}.lock'
# ํŒŒ์ผ ์ž ๊ธˆ์œผ๋กœ ์ „์ฒด ๊ณผ์ •์„ atomicํ•˜๊ฒŒ ๋ณดํ˜ธ
with file_lock(lock_file_path):
try:
# ์ตœ์‹  ๋ฐ์ดํ„ฐ๋ฅผ ๋‹ค์‹œ ๋กœ๋“œ
latest_submissions = self.load_submissions()
cutoff_date = datetime.now(KOREA_TZ) - pd.Timedelta(days=days_to_keep)
cutoff_str = cutoff_date.strftime("%Y-%m-%d")
cleaned_count = 0
for uid in list(latest_submissions.keys()):
user_submissions = latest_submissions[uid]
for date_str in list(user_submissions.keys()):
if date_str < cutoff_str:
del user_submissions[date_str]
cleaned_count += 1
# ๋นˆ ์‚ฌ์šฉ์ž ๊ธฐ๋ก ์ œ๊ฑฐ
if not user_submissions:
del latest_submissions[uid]
# ๋ฉ”๋ชจ๋ฆฌ ์—…๋ฐ์ดํŠธ
self.submissions = latest_submissions
if cleaned_count > 0:
if self._save_submissions_internal(latest_submissions):
print(f"๐Ÿงน {cleaned_count}๊ฐœ์˜ ์˜ค๋ž˜๋œ ์ œ์ถœ ๊ธฐ๋ก์„ ์ •๋ฆฌํ–ˆ์Šต๋‹ˆ๋‹ค.")
else:
print(f"โš ๏ธ {cleaned_count}๊ฐœ์˜ ์˜ค๋ž˜๋œ ์ œ์ถœ ๊ธฐ๋ก์„ ์ •๋ฆฌํ–ˆ์ง€๋งŒ ์ €์žฅ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
return cleaned_count
except Exception as e:
print(f"โŒ ์˜ค๋ž˜๋œ ๊ธฐ๋ก ์ •๋ฆฌ ์‹คํŒจ: {e}")
return 0
def get_submission_tracker() -> Optional[SubmissionTracker]:
"""SubmissionTracker ์ธ์Šคํ„ด์Šค ๋ฐ˜ํ™˜"""
try:
return SubmissionTracker()
except Exception as e:
print(f"โŒ SubmissionTracker ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}")
return None