IdlecloudX commited on
Commit
329cf76
·
verified ·
1 Parent(s): 57e41f0

Update translator.py

Browse files
Files changed (1) hide show
  1. translator.py +150 -64
translator.py CHANGED
@@ -1,48 +1,50 @@
1
  """
2
  translator.py
3
- 腾讯云 + 百度翻译 API 轮询封装
4
- ⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量
5
  ------------------------------------------------------------------
6
- TENCENT_SECRET_ID 腾讯云 SecretId
7
- TENCENT_SECRET_KEY 腾讯云 SecretKey
8
- TENCENT_TRANSLATE_URL (可选) 默认 https://tmt.tencentcloudapi.com
9
- BAIDU_TRANSLATE_URL (可选) 默认 https://fanyi-api.baidu.com/api/trans/vip/translate
10
- BAIDU_CREDENTIALS_JSON 形如:
11
  [
12
- {"app_id": "xxxx", "secret_key": "yyyy"},
13
- {"app_id": "aaaa", "secret_key": "bbbb"}
 
 
 
 
 
 
 
14
  ]
15
  ------------------------------------------------------------------
16
  """
17
  import hashlib, hmac, json, os, random, time
18
  from datetime import datetime
19
- from typing import List, Sequence, Optional
20
 
21
  import requests
22
 
23
- # ------------------------------------------------------------------
24
- # 读取环境变量
25
- # ------------------------------------------------------------------
26
- TENCENT_SECRET_ID = os.environ.get("TENCENT_SECRET_ID")
27
- TENCENT_SECRET_KEY = os.environ.get("TENCENT_SECRET_KEY")
 
 
 
 
28
  TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com")
29
 
 
 
 
30
  BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate")
31
- BAIDU_CREDENTIALS = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]"))
32
 
33
- # 内部轮询索引
34
  _baidu_idx: int = 0
35
- def _next_baidu_cred():
36
- global _baidu_idx
37
- if not BAIDU_CREDENTIALS:
38
- return None
39
- cred = BAIDU_CREDENTIALS[_baidu_idx]
40
- _baidu_idx = (_baidu_idx + 1) % len(BAIDU_CREDENTIALS)
41
- return cred
42
 
43
- # ------------------------------------------------------------------
44
- # 腾讯翻译
45
- # ------------------------------------------------------------------
46
  def _sign(key: bytes, msg: str) -> bytes:
47
  return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
48
 
@@ -52,13 +54,16 @@ def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str
52
  secret_signing = _sign(secret_service, "tc3_request")
53
  return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
54
 
55
- def _translate_with_tencent(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
56
- """优先使用腾讯云翻译。失败返回 None"""
57
- if not (TENCENT_SECRET_ID and TENCENT_SECRET_KEY):
58
- return None # 未配置凭证
 
 
 
59
  service = "tmt"
60
  host = "tmt.tencentcloudapi.com"
61
- action = "TextTranslate"
62
  version = "2018-03-21"
63
  region = "ap-beijing"
64
  ts = int(time.time())
@@ -66,14 +71,14 @@ def _translate_with_tencent(texts: Sequence[str], src="auto", tgt="zh") -> Optio
66
  algorithm = "TC3-HMAC-SHA256"
67
 
68
  payload = {
69
- "SourceText": "\n".join(texts),
70
  "Source": src,
71
  "Target": tgt,
72
  "ProjectId": 0,
73
  }
 
74
  payload_str = json.dumps(payload, ensure_ascii=False)
75
 
76
- # ---------- step‑1 canonical request ----------
77
  canonical_request = "\n".join([
78
  "POST",
79
  "/",
@@ -83,19 +88,16 @@ def _translate_with_tencent(texts: Sequence[str], src="auto", tgt="zh") -> Optio
83
  hashlib.sha256(payload_str.encode()).hexdigest(),
84
  ])
85
 
86
- # ---------- step‑2 string to sign ----------
87
- credential_scope = f"{date}/{service}/tc3_request"
88
- string_to_sign = "\n".join([
89
  algorithm, str(ts), credential_scope,
90
  hashlib.sha256(canonical_request.encode()).hexdigest(),
91
  ])
92
 
93
- # ---------- step‑3 signature ----------
94
- signature = _tc3_signature(TENCENT_SECRET_KEY, date, service, string_to_sign)
95
 
96
- # ---------- step‑4 headers ----------
97
  authorization = (
98
- f"{algorithm} Credential={TENCENT_SECRET_ID}/{credential_scope}, "
99
  f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}"
100
  )
101
  headers = {
@@ -108,27 +110,103 @@ def _translate_with_tencent(texts: Sequence[str], src="auto", tgt="zh") -> Optio
108
  "X-TC-Region": region,
109
  }
110
 
111
- # ---------- request ----------
112
- try:
113
- resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8)
114
- resp.raise_for_status()
115
- data = resp.json()
116
- return data["Response"]["TargetText"].split("\n")
117
- except Exception as e:
118
- print(f"[translator] Tencent API error {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  return None
120
 
121
- # ------------------------------------------------------------------
122
- # 百度翻译
123
- # ------------------------------------------------------------------
124
- def _translate_with_baidu(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
125
- creds = _next_baidu_cred()
126
- if creds is None:
127
- return None # 未配置凭证
128
- app_id, secret_key = creds["app_id"], creds["secret_key"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  salt = random.randint(32768, 65536)
 
130
  query = "\n".join(texts)
 
131
  sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest()
 
132
  params = {
133
  "q": query, "from": src, "to": tgt,
134
  "appid": app_id, "salt": salt, "sign": sign,
@@ -137,24 +215,32 @@ def _translate_with_baidu(texts: Sequence[str], src="auto", tgt="zh") -> Optiona
137
  resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8)
138
  resp.raise_for_status()
139
  data = resp.json()
 
 
140
  return [item["dst"] for item in data["trans_result"]]
141
  except Exception as e:
142
  print(f"[translator] Baidu API error → {e}")
143
  return None
144
 
145
- # ------------------------------------------------------------------
146
- # 对外统一函数
147
- # ------------------------------------------------------------------
148
  def translate_texts(texts: Sequence[str],
149
  src_lang: str = "auto",
150
  tgt_lang: str = "zh") -> List[str]:
151
  """
152
- 优先 Tencent → 失败再 Baidu → 如果都失败,返回原文。
 
 
 
153
  """
154
  if not texts:
155
  return []
156
 
157
- out = _translate_with_tencent(texts, src_lang, tgt_lang)
 
 
 
158
  if out is None:
159
- out = _translate_with_baidu(texts, src_lang, tgt_lang)
160
- return out or list(texts)
 
 
1
  """
2
  translator.py
3
+ 腾讯云 (批量接口+动态分包+多账号轮询) + 百度翻译 API 封装
4
+ ⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量
5
  ------------------------------------------------------------------
6
+ TENCENT_CREDENTIALS_JSON 形如:
 
 
 
 
7
  [
8
+ {"secret_id": "AKIDxxxx", "secret_key": "yyyy"},
9
+ {"secret_id": "AKIDaaaa", "secret_key": "bbbb"}
10
+ ]
11
+ TENCENT_SECRET_ID (兼容旧配置) 单个 SecretId
12
+ TENCENT_SECRET_KEY (兼容旧配置) 单个 SecretKey
13
+ ------------------------------------------------------------------
14
+ BAIDU_CREDENTIALS_JSON 形如:
15
+ [
16
+ {"app_id": "xxxx", "secret_key": "yyyy"}
17
  ]
18
  ------------------------------------------------------------------
19
  """
20
  import hashlib, hmac, json, os, random, time
21
  from datetime import datetime
22
+ from typing import List, Sequence, Optional, Dict, Any
23
 
24
  import requests
25
 
26
+ _tencent_creds_list = json.loads(os.environ.get("TENCENT_CREDENTIALS_JSON", "[]"))
27
+
28
+ # 兼容旧的单账号配置
29
+ _legacy_id = os.environ.get("TENCENT_SECRET_ID")
30
+ _legacy_key = os.environ.get("TENCENT_SECRET_KEY")
31
+ if _legacy_id and _legacy_key:
32
+ if not any(c.get("secret_id") == _legacy_id for c in _tencent_creds_list):
33
+ _tencent_creds_list.append({"secret_id": _legacy_id, "secret_key": _legacy_key})
34
+
35
  TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com")
36
 
37
+ _tencent_idx: int = 0
38
+
39
+
40
  BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate")
41
+ _baidu_creds_list = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]"))
42
 
43
+ # 全局索引,用于轮询
44
  _baidu_idx: int = 0
 
 
 
 
 
 
 
45
 
46
+
47
+ # 腾讯云翻译逻辑 (批量接口 TextTranslateBatch)
 
48
  def _sign(key: bytes, msg: str) -> bytes:
49
  return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
50
 
 
54
  secret_signing = _sign(secret_service, "tc3_request")
55
  return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
56
 
57
+ def _call_tencent_batch_once(cred: Dict[str, str], text_list: List[str], src: str, tgt: str) -> List[str]:
58
+ """
59
+ 调用腾讯云 TextTranslateBatch 接口
60
+ """
61
+ secret_id = cred["secret_id"]
62
+ secret_key = cred["secret_key"]
63
+
64
  service = "tmt"
65
  host = "tmt.tencentcloudapi.com"
66
+ action = "TextTranslateBatch"
67
  version = "2018-03-21"
68
  region = "ap-beijing"
69
  ts = int(time.time())
 
71
  algorithm = "TC3-HMAC-SHA256"
72
 
73
  payload = {
74
+ "SourceTextList": text_list,
75
  "Source": src,
76
  "Target": tgt,
77
  "ProjectId": 0,
78
  }
79
+
80
  payload_str = json.dumps(payload, ensure_ascii=False)
81
 
 
82
  canonical_request = "\n".join([
83
  "POST",
84
  "/",
 
88
  hashlib.sha256(payload_str.encode()).hexdigest(),
89
  ])
90
 
91
+ credential_scope = f"{date}/{service}/tc3_request"
92
+ string_to_sign = "\n".join([
 
93
  algorithm, str(ts), credential_scope,
94
  hashlib.sha256(canonical_request.encode()).hexdigest(),
95
  ])
96
 
97
+ signature = _tc3_signature(secret_key, date, service, string_to_sign)
 
98
 
 
99
  authorization = (
100
+ f"{algorithm} Credential={secret_id}/{credential_scope}, "
101
  f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}"
102
  )
103
  headers = {
 
110
  "X-TC-Region": region,
111
  }
112
 
113
+ resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8)
114
+ resp.raise_for_status()
115
+ data = resp.json()
116
+
117
+ if "Response" in data and "Error" in data["Response"]:
118
+ err_code = data["Response"]["Error"].get("Code", "")
119
+ err_msg = data["Response"]["Error"].get("Message", "")
120
+ raise Exception(f"Tencent Biz Error: {err_code} - {err_msg}")
121
+
122
+ return data["Response"]["TargetTextList"]
123
+
124
+
125
+ def _translate_with_tencent_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
126
+ """
127
+ 腾讯云入口:
128
+ 1. 动态分包:同时考虑字符数限制 (<6000) 和 条数限制。
129
+ - 累计字符数 < 5000 (安全阈值)
130
+ - 单批次条数 < 50 (安全阈值)
131
+ 2. 账号轮询 (Polling):每组请求如果失败,会自动换号重试。
132
+ """
133
+ global _tencent_idx, _tencent_creds_list
134
+
135
+ if not _tencent_creds_list:
136
  return None
137
 
138
+ # 配置安全阈值
139
+ MAX_CHARS_PER_BATCH = 5000 # 官方限制 6000,留 1000 buffer
140
+ MAX_ITEMS_PER_BATCH = 50 # 避免单次数组过大
141
+
142
+ chunks = []
143
+ current_chunk = []
144
+ current_char_count = 0
145
+
146
+ for text in texts:
147
+ text_len = len(text)
148
+
149
+ # 检查加入当前文本是否会超限
150
+ if current_chunk and (
151
+ (current_char_count + text_len > MAX_CHARS_PER_BATCH) or
152
+ (len(current_chunk) >= MAX_ITEMS_PER_BATCH)
153
+ ):
154
+ # 结算当前块
155
+ chunks.append(current_chunk)
156
+ current_chunk = []
157
+ current_char_count = 0
158
+
159
+ current_chunk.append(text)
160
+ current_char_count += text_len
161
+
162
+ # 处理剩余的最后一块
163
+ if current_chunk:
164
+ chunks.append(current_chunk)
165
+
166
+ all_results = []
167
+
168
+ for chunk in chunks:
169
+ chunk_success = False
170
+
171
+ attempts = len(_tencent_creds_list)
172
+ for _ in range(attempts):
173
+ cred = _tencent_creds_list[_tencent_idx]
174
+ _tencent_idx = (_tencent_idx + 1) % len(_tencent_creds_list)
175
+
176
+ try:
177
+ res = _call_tencent_batch_once(cred, list(chunk), src, tgt)
178
+ all_results.extend(res)
179
+ chunk_success = True
180
+ break # 成功则跳出重试
181
+ except Exception as e:
182
+ safe_id = cred['secret_id'][:4] + "****"
183
+ print(f"[translator] Tencent ID {safe_id} failed on batch: {e}. Switching...")
184
+ continue
185
+
186
+ if not chunk_success:
187
+ print("[translator] All Tencent credentials failed for a batch. Falling back to Baidu.")
188
+ return None # 只要有一个分片失败,整体降级,保证一致性
189
+
190
+ return all_results
191
+
192
+
193
+ # 百度翻译逻辑
194
+ def _translate_with_baidu_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
195
+ global _baidu_idx, _baidu_creds_list
196
+
197
+ if not _baidu_creds_list:
198
+ return None
199
+
200
+ cred = _baidu_creds_list[_baidu_idx]
201
+ _baidu_idx = (_baidu_idx + 1) % len(_baidu_creds_list)
202
+
203
+ app_id, secret_key = cred["app_id"], cred["secret_key"]
204
  salt = random.randint(32768, 65536)
205
+
206
  query = "\n".join(texts)
207
+
208
  sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest()
209
+
210
  params = {
211
  "q": query, "from": src, "to": tgt,
212
  "appid": app_id, "salt": salt, "sign": sign,
 
215
  resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8)
216
  resp.raise_for_status()
217
  data = resp.json()
218
+ if "error_code" in data:
219
+ raise Exception(f"Baidu Biz Error: {data['error_code']} - {data.get('error_msg')}")
220
  return [item["dst"] for item in data["trans_result"]]
221
  except Exception as e:
222
  print(f"[translator] Baidu API error → {e}")
223
  return None
224
 
225
+
226
+ # 对外统一入口
 
227
  def translate_texts(texts: Sequence[str],
228
  src_lang: str = "auto",
229
  tgt_lang: str = "zh") -> List[str]:
230
  """
231
+ 逻辑:
232
+ 1. 尝试腾讯云 (批量接口 + 多账号轮询)
233
+ 2. 失败降级到百度云
234
+ 3. 还失败返回原文
235
  """
236
  if not texts:
237
  return []
238
 
239
+ # 1. 优先尝试腾讯云
240
+ out = _translate_with_tencent_pool(texts, src_lang, tgt_lang)
241
+
242
+ # 2. 失败降级到百度
243
  if out is None:
244
+ out = _translate_with_baidu_pool(texts, src_lang, tgt_lang)
245
+
246
+ return out or list(texts)