""" translator.py 腾讯云 (批量接口+动态分包+多账号轮询) + 百度翻译 API 封装 ⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量 ------------------------------------------------------------------ TENCENT_CREDENTIALS_JSON 形如: [ {"secret_id": "AKIDxxxx", "secret_key": "yyyy"}, {"secret_id": "AKIDaaaa", "secret_key": "bbbb"} ] TENCENT_SECRET_ID (兼容旧配置) 单个 SecretId TENCENT_SECRET_KEY (兼容旧配置) 单个 SecretKey ------------------------------------------------------------------ BAIDU_CREDENTIALS_JSON 形如: [ {"app_id": "xxxx", "secret_key": "yyyy"} ] ------------------------------------------------------------------ """ import hashlib, hmac, json, os, random, time from datetime import datetime from typing import List, Sequence, Optional, Dict, Any import requests _tencent_creds_list = json.loads(os.environ.get("TENCENT_CREDENTIALS_JSON", "[]")) # 兼容旧的单账号配置 _legacy_id = os.environ.get("TENCENT_SECRET_ID") _legacy_key = os.environ.get("TENCENT_SECRET_KEY") if _legacy_id and _legacy_key: if not any(c.get("secret_id") == _legacy_id for c in _tencent_creds_list): _tencent_creds_list.append({"secret_id": _legacy_id, "secret_key": _legacy_key}) TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com") _tencent_idx: int = 0 BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate") _baidu_creds_list = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]")) # 全局索引,用于轮询 _baidu_idx: int = 0 # 腾讯云翻译逻辑 (批量接口 TextTranslateBatch) def _sign(key: bytes, msg: str) -> bytes: return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str) -> str: secret_date = _sign(("TC3" + secret_key).encode(), date) secret_service = _sign(secret_date, service) secret_signing = _sign(secret_service, "tc3_request") return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() def _call_tencent_batch_once(cred: Dict[str, str], text_list: List[str], src: str, tgt: str) -> List[str]: """ 调用腾讯云 TextTranslateBatch 接口 """ secret_id = cred["secret_id"] secret_key = cred["secret_key"] service = "tmt" host = "tmt.tencentcloudapi.com" action = "TextTranslateBatch" version = "2018-03-21" region = "ap-beijing" ts = int(time.time()) date = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d") algorithm = "TC3-HMAC-SHA256" payload = { "SourceTextList": text_list, "Source": src, "Target": tgt, "ProjectId": 0, } payload_str = json.dumps(payload, ensure_ascii=False) canonical_request = "\n".join([ "POST", "/", "", f"content-type:application/json; charset=utf-8\nhost:{host}\nx-tc-action:{action.lower()}\n", "content-type;host;x-tc-action", hashlib.sha256(payload_str.encode()).hexdigest(), ]) credential_scope = f"{date}/{service}/tc3_request" string_to_sign = "\n".join([ algorithm, str(ts), credential_scope, hashlib.sha256(canonical_request.encode()).hexdigest(), ]) signature = _tc3_signature(secret_key, date, service, string_to_sign) authorization = ( f"{algorithm} Credential={secret_id}/{credential_scope}, " f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}" ) headers = { "Authorization": authorization, "Content-Type": "application/json; charset=utf-8", "Host": host, "X-TC-Action": action, "X-TC-Timestamp": str(ts), "X-TC-Version": version, "X-TC-Region": region, } resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8) resp.raise_for_status() data = resp.json() if "Response" in data and "Error" in data["Response"]: err_code = data["Response"]["Error"].get("Code", "") err_msg = data["Response"]["Error"].get("Message", "") raise Exception(f"Tencent Biz Error: {err_code} - {err_msg}") return data["Response"]["TargetTextList"] def _translate_with_tencent_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]: """ 腾讯云入口: 1. 动态分包:同时考虑字符数限制 (<6000) 和 条数限制。 - 累计字符数 < 5000 (安全阈值) - 单批次条数 < 50 (安全阈值) 2. 账号轮询 (Polling):每组请求如果失败,会自动换号重试。 """ global _tencent_idx, _tencent_creds_list if not _tencent_creds_list: return None # 配置安全阈值 MAX_CHARS_PER_BATCH = 5000 # 官方限制 6000,留 1000 buffer MAX_ITEMS_PER_BATCH = 50 # 避免单次数组过大 chunks = [] current_chunk = [] current_char_count = 0 for text in texts: text_len = len(text) # 检查加入当前文本是否会超限 if current_chunk and ( (current_char_count + text_len > MAX_CHARS_PER_BATCH) or (len(current_chunk) >= MAX_ITEMS_PER_BATCH) ): # 结算当前块 chunks.append(current_chunk) current_chunk = [] current_char_count = 0 current_chunk.append(text) current_char_count += text_len # 处理剩余的最后一块 if current_chunk: chunks.append(current_chunk) all_results = [] for chunk in chunks: chunk_success = False attempts = len(_tencent_creds_list) for _ in range(attempts): cred = _tencent_creds_list[_tencent_idx] _tencent_idx = (_tencent_idx + 1) % len(_tencent_creds_list) try: res = _call_tencent_batch_once(cred, list(chunk), src, tgt) all_results.extend(res) chunk_success = True break # 成功则跳出重试 except Exception as e: safe_id = cred['secret_id'][:4] + "****" print(f"[translator] Tencent ID {safe_id} failed on batch: {e}. Switching...") continue if not chunk_success: print("[translator] All Tencent credentials failed for a batch. Falling back to Baidu.") return None # 只要有一个分片失败,整体降级,保证一致性 return all_results # 百度翻译逻辑 def _translate_with_baidu_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]: global _baidu_idx, _baidu_creds_list if not _baidu_creds_list: return None cred = _baidu_creds_list[_baidu_idx] _baidu_idx = (_baidu_idx + 1) % len(_baidu_creds_list) app_id, secret_key = cred["app_id"], cred["secret_key"] salt = random.randint(32768, 65536) query = "\n".join(texts) sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest() params = { "q": query, "from": src, "to": tgt, "appid": app_id, "salt": salt, "sign": sign, } try: resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8) resp.raise_for_status() data = resp.json() if "error_code" in data: raise Exception(f"Baidu Biz Error: {data['error_code']} - {data.get('error_msg')}") return [item["dst"] for item in data["trans_result"]] except Exception as e: print(f"[translator] Baidu API error → {e}") return None # 对外统一入口 def translate_texts(texts: Sequence[str], src_lang: str = "auto", tgt_lang: str = "zh") -> List[str]: """ 逻辑: 1. 尝试腾讯云 (批量接口 + 多账号轮询) 2. 失败降级到百度云 3. 还失败返回原文 """ if not texts: return [] # 1. 优先尝试腾讯云 out = _translate_with_tencent_pool(texts, src_lang, tgt_lang) # 2. 失败降级到百度 if out is None: out = _translate_with_baidu_pool(texts, src_lang, tgt_lang) return out or list(texts)