File size: 8,454 Bytes
b073a7b
 
329cf76
 
b073a7b
329cf76
b073a7b
329cf76
 
 
 
 
 
 
 
 
b073a7b
 
 
96c8569
 
329cf76
96c8569
efcb14c
0535e28
329cf76
 
 
 
 
 
 
 
 
7e19594
0599754
329cf76
 
 
20f1d29
329cf76
b073a7b
329cf76
b073a7b
96c8569
329cf76
 
96c8569
 
 
 
b073a7b
 
 
96c8569
 
329cf76
 
 
 
 
 
 
b073a7b
 
329cf76
b073a7b
 
 
 
 
 
 
329cf76
b073a7b
 
 
 
329cf76
20f1d29
b073a7b
 
 
 
 
 
 
 
 
 
329cf76
 
b073a7b
 
 
 
329cf76
b073a7b
 
329cf76
b073a7b
 
20f1d29
b073a7b
 
 
 
 
 
 
20f1d29
96c8569
329cf76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0535e28
500dfe6
329cf76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b073a7b
329cf76
20f1d29
329cf76
b073a7b
329cf76
b073a7b
 
 
 
96c8569
7e19594
96c8569
 
329cf76
 
b073a7b
96c8569
20f1d29
96c8569
 
329cf76
 
7e19594
 
b073a7b
 
329cf76
 
 
 
b073a7b
96c8569
 
 
329cf76
 
 
 
b073a7b
329cf76
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
"""
translator.py
腾讯云 (批量接口+动态分包+多账号轮询) + 百度翻译 API 封装
⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量
------------------------------------------------------------------
TENCENT_CREDENTIALS_JSON  形如:
[
  {"secret_id": "AKIDxxxx", "secret_key": "yyyy"},
  {"secret_id": "AKIDaaaa", "secret_key": "bbbb"}
]
TENCENT_SECRET_ID         (兼容旧配置) 单个 SecretId
TENCENT_SECRET_KEY        (兼容旧配置) 单个 SecretKey
------------------------------------------------------------------
BAIDU_CREDENTIALS_JSON    形如:
[
  {"app_id": "xxxx", "secret_key": "yyyy"}
]
------------------------------------------------------------------
"""
import hashlib, hmac, json, os, random, time
from datetime import datetime
from typing import List, Sequence, Optional, Dict, Any

import requests

_tencent_creds_list = json.loads(os.environ.get("TENCENT_CREDENTIALS_JSON", "[]"))

# 兼容旧的单账号配置
_legacy_id = os.environ.get("TENCENT_SECRET_ID")
_legacy_key = os.environ.get("TENCENT_SECRET_KEY")
if _legacy_id and _legacy_key:
    if not any(c.get("secret_id") == _legacy_id for c in _tencent_creds_list):
        _tencent_creds_list.append({"secret_id": _legacy_id, "secret_key": _legacy_key})

TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com")

_tencent_idx: int = 0


BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate")
_baidu_creds_list = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]"))

# 全局索引,用于轮询
_baidu_idx: int = 0


# 腾讯云翻译逻辑 (批量接口 TextTranslateBatch)
def _sign(key: bytes, msg: str) -> bytes:
    return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()

def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str) -> str:
    secret_date     = _sign(("TC3" + secret_key).encode(), date)
    secret_service  = _sign(secret_date, service)
    secret_signing  = _sign(secret_service, "tc3_request")
    return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()

def _call_tencent_batch_once(cred: Dict[str, str], text_list: List[str], src: str, tgt: str) -> List[str]:
    """
    调用腾讯云 TextTranslateBatch 接口
    """
    secret_id = cred["secret_id"]
    secret_key = cred["secret_key"]
    
    service  = "tmt"
    host     = "tmt.tencentcloudapi.com"
    action   = "TextTranslateBatch" 
    version  = "2018-03-21"
    region   = "ap-beijing"
    ts       = int(time.time())
    date     = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
    algorithm = "TC3-HMAC-SHA256"

    payload = {
        "SourceTextList": text_list, 
        "Source":     src,
        "Target":     tgt,
        "ProjectId":  0,
    }
    
    payload_str = json.dumps(payload, ensure_ascii=False)

    canonical_request = "\n".join([
        "POST",
        "/",
        "",
        f"content-type:application/json; charset=utf-8\nhost:{host}\nx-tc-action:{action.lower()}\n",
        "content-type;host;x-tc-action",
        hashlib.sha256(payload_str.encode()).hexdigest(),
    ])

    credential_scope    = f"{date}/{service}/tc3_request"
    string_to_sign      = "\n".join([
        algorithm, str(ts), credential_scope,
        hashlib.sha256(canonical_request.encode()).hexdigest(),
    ])

    signature = _tc3_signature(secret_key, date, service, string_to_sign)

    authorization = (
        f"{algorithm} Credential={secret_id}/{credential_scope}, "
        f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}"
    )
    headers = {
        "Authorization":  authorization,
        "Content-Type":   "application/json; charset=utf-8",
        "Host":           host,
        "X-TC-Action":    action,
        "X-TC-Timestamp": str(ts),
        "X-TC-Version":   version,
        "X-TC-Region":    region,
    }

    resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8)
    resp.raise_for_status()
    data = resp.json()
    
    if "Response" in data and "Error" in data["Response"]:
        err_code = data["Response"]["Error"].get("Code", "")
        err_msg = data["Response"]["Error"].get("Message", "")
        raise Exception(f"Tencent Biz Error: {err_code} - {err_msg}")

    return data["Response"]["TargetTextList"]


def _translate_with_tencent_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
    """
    腾讯云入口:
    1. 动态分包:同时考虑字符数限制 (<6000) 和 条数限制。
       - 累计字符数 < 5000 (安全阈值)
       - 单批次条数 < 50 (安全阈值)
    2. 账号轮询 (Polling):每组请求如果失败,会自动换号重试。
    """
    global _tencent_idx, _tencent_creds_list
    
    if not _tencent_creds_list:
        return None

    # 配置安全阈值
    MAX_CHARS_PER_BATCH = 5000  # 官方限制 6000,留 1000 buffer
    MAX_ITEMS_PER_BATCH = 50    # 避免单次数组过大

    chunks = []
    current_chunk = []
    current_char_count = 0
    
    for text in texts:
        text_len = len(text)
        
        # 检查加入当前文本是否会超限
        if current_chunk and (
            (current_char_count + text_len > MAX_CHARS_PER_BATCH) or 
            (len(current_chunk) >= MAX_ITEMS_PER_BATCH)
        ):
            # 结算当前块
            chunks.append(current_chunk)
            current_chunk = []
            current_char_count = 0
            
        current_chunk.append(text)
        current_char_count += text_len
        
    # 处理剩余的最后一块
    if current_chunk:
        chunks.append(current_chunk)

    all_results = []
    
    for chunk in chunks:
        chunk_success = False
        
        attempts = len(_tencent_creds_list)
        for _ in range(attempts):
            cred = _tencent_creds_list[_tencent_idx]
            _tencent_idx = (_tencent_idx + 1) % len(_tencent_creds_list)
            
            try:
                res = _call_tencent_batch_once(cred, list(chunk), src, tgt)
                all_results.extend(res)
                chunk_success = True
                break # 成功则跳出重试
            except Exception as e:
                safe_id = cred['secret_id'][:4] + "****"
                print(f"[translator] Tencent ID {safe_id} failed on batch: {e}. Switching...")
                continue
        
        if not chunk_success:
            print("[translator] All Tencent credentials failed for a batch. Falling back to Baidu.")
            return None # 只要有一个分片失败,整体降级,保证一致性

    return all_results


# 百度翻译逻辑
def _translate_with_baidu_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
    global _baidu_idx, _baidu_creds_list
    
    if not _baidu_creds_list:
        return None
        
    cred = _baidu_creds_list[_baidu_idx]
    _baidu_idx = (_baidu_idx + 1) % len(_baidu_creds_list)
    
    app_id, secret_key = cred["app_id"], cred["secret_key"]
    salt  = random.randint(32768, 65536)
    
    query = "\n".join(texts)
    
    sign  = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest()
    
    params = {
        "q": query, "from": src, "to": tgt,
        "appid": app_id, "salt": salt, "sign": sign,
    }
    try:
        resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8)
        resp.raise_for_status()
        data = resp.json()
        if "error_code" in data:
             raise Exception(f"Baidu Biz Error: {data['error_code']} - {data.get('error_msg')}")
        return [item["dst"] for item in data["trans_result"]]
    except Exception as e:
        print(f"[translator] Baidu API error → {e}")
        return None


# 对外统一入口
def translate_texts(texts: Sequence[str],
                    src_lang: str = "auto",
                    tgt_lang: str = "zh") -> List[str]:
    """
    逻辑:
    1. 尝试腾讯云 (批量接口 + 多账号轮询)
    2. 失败降级到百度云
    3. 还失败返回原文
    """
    if not texts:
        return []

    # 1. 优先尝试腾讯云
    out = _translate_with_tencent_pool(texts, src_lang, tgt_lang)
    
    # 2. 失败降级到百度
    if out is None:
        out = _translate_with_baidu_pool(texts, src_lang, tgt_lang)
        
    return out or list(texts)