perf: using claude auto translate

This commit is contained in:
ibuler
2026-03-03 11:13:27 +08:00
committed by 老广
parent d9be890e89
commit 5c13d95c33
6 changed files with 527 additions and 30 deletions

View File

@@ -0,0 +1,140 @@
name: Auto translate i18n (po/json)
on:
workflow_dispatch:
inputs:
mode:
description: 'Run mode'
required: true
default: 'full'
type: choice
options:
- full
provider:
description: 'Translation provider'
required: true
default: 'claude'
type: choice
options:
- openai
- claude
pull_request:
branches:
- 'dev'
paths:
- 'apps/i18n/**'
types: [opened, synchronize, reopened]
permissions:
contents: write
pull-requests: write
concurrency:
group: i18n-translate-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
translate:
runs-on: ubuntu-latest
steps:
- name: Checkout (PR branch)
if: ${{ github.event_name == 'pull_request' }}
uses: actions/checkout@v4
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
fetch-depth: 0
- name: Checkout (manual)
if: ${{ github.event_name == 'workflow_dispatch' }}
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install translator dependencies
run: |
python -m pip install --upgrade pip
python -m pip install "openai>=1.29.0" "polib>=1.2.0" "tqdm>=4.66.4" "anthropic>=0.40.0"
- name: Auto translate (PR)
if: ${{ github.event_name == 'pull_request' }}
env:
I18N_PROVIDER: ${{ vars.I18N_PROVIDER || 'claude' }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || secrets.GPT_API_TOKEN }}
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL }}
run: |
python apps/i18n/ci_translate.py \
--mode pr \
--base "${{ github.event.pull_request.base.sha }}" \
--head "${{ github.event.pull_request.head.sha }}" \
--overwrite
- name: Auto translate (manual full)
if: ${{ github.event_name == 'workflow_dispatch' }}
env:
I18N_PROVIDER: ${{ github.event.inputs.provider }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || secrets.GPT_API_TOKEN }}
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL }}
run: |
python apps/i18n/ci_translate.py --mode full
- name: Show changes
run: |
git status --porcelain
git diff --stat
- name: Create patch artifact (fork PR)
if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == true }}
run: |
git diff > i18n-translations.patch
- name: Upload patch artifact (fork PR)
if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == true }}
uses: actions/upload-artifact@v4
with:
name: i18n-translations
path: i18n-translations.patch
- name: Commit & push changes (same-repo PR)
if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false }}
run: |
if [ -z "$(git status --porcelain)" ]; then
echo "No changes to commit."
exit 0
fi
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add apps/i18n
git commit -m "chore(i18n): auto-translate updated strings"
git push
- name: Create pull request (manual full run)
if: ${{ github.event_name == 'workflow_dispatch' }}
uses: peter-evans/create-pull-request@v6
with:
commit-message: 'chore(i18n): auto-translate all strings'
title: 'chore(i18n): auto-translate all strings'
body: |
Auto-generated i18n translations.
- Triggered by: ${{ github.actor }}
- Provider: ${{ github.event.inputs.provider }}
branch: pr/dev/i18n-auto-translate
base: dev
add-paths: |
apps/i18n/**
labels: |
i18n
automated-pr

View File

@@ -10,6 +10,7 @@ class BaseTranslateManager:
bulk_size = 15
SEPARATOR = "<-SEP->"
LANG_MAPPER = {
'en': 'English',
'ja': 'Japanese',
'zh_Hant': 'Traditional Chinese',
'pt_BR': 'Portuguese (Brazil)',

View File

@@ -32,17 +32,23 @@ class CoreTranslateManager(BaseTranslateManager):
print(f'{RED}File save error: {e}{RED}')
async def run(self):
po_file_path = os.path.join(self._dir, 'zh', 'LC_MESSAGES', 'django.po')
po = polib.pofile(po_file_path)
zh_dict = {entry.msgid: entry.msgstr for entry in po.translated_entries()}
async def process_po(po_name: str):
po_file_path = os.path.join(self._dir, 'zh', 'LC_MESSAGES', po_name)
po = polib.pofile(po_file_path)
zh_dict = {entry.msgid: entry.msgstr for entry in po.translated_entries()}
for file_prefix, target_lang in self.LANG_MAPPER.items():
po_file_path = os.path.join(self._dir, file_prefix, 'LC_MESSAGES', 'django.po')
trans_po = polib.pofile(po_file_path)
need_trans_dict = self.get_need_trans_dict(zh_dict, trans_po)
print(f'{GREEN}Translate: {self.dir_name} {file_prefix} '
f'django.po need to translate {len(need_trans_dict)}{GREEN}\n')
if not need_trans_dict:
continue
translated_dict = await self.bulk_translate(need_trans_dict, target_lang)
self.save_translations_to_po(translated_dict, trans_po)
for file_prefix, target_lang in self.LANG_MAPPER.items():
po_file_path = os.path.join(self._dir, file_prefix, 'LC_MESSAGES', po_name)
trans_po = polib.pofile(po_file_path)
need_trans_dict = self.get_need_trans_dict(zh_dict, trans_po)
print(f'{GREEN}Translate: {self.dir_name} {file_prefix} '
f'{po_name} need to translate {len(need_trans_dict)}{GREEN}\n')
if not need_trans_dict:
continue
translated_dict = await self.bulk_translate(need_trans_dict, target_lang)
self.save_translations_to_po(translated_dict, trans_po)
await process_po('django.po')
djangojs_po = os.path.join(self._dir, 'zh', 'LC_MESSAGES', 'djangojs.po')
if os.path.exists(djangojs_po):
await process_po('djangojs.po')

View File

@@ -1,34 +1,88 @@
import os
from typing import Protocol
from openai import AsyncOpenAI
class Translator(Protocol):
async def translate_text(self, text: str, target_lang: str = "English") -> str | None: ...
_TRANSLATION_SYSTEM_PROMPT = (
"Now I ask you to be the translator. "
"Your goal is to understand the Chinese I provided you and translate it into {target_lang}. "
"Please translate naturally, smoothly and authentically (no translation accent). "
"Do NOT change placeholders or tokens; keep them exactly as-is, including but not limited to: "
"%s, %d, %(name)s, {name}, {}, {{value}}, <tag>...</tag>, URLs, and line breaks. "
"If you found word '动作' please translate it to 'Action', because it's short. "
"If you found word '管理' in menu, you can not translate it, because management is too long in menu."
)
class OpenAITranslate:
def __init__(self, key: str | None = None, base_url: str | None = None):
def __init__(
self,
key: str | None = None,
base_url: str | None = None,
model: str | None = None,
):
key = key or os.getenv("OPENAI_API_KEY")
base_url = base_url or os.getenv("OPENAI_BASE_URL") or None
self.model = model or os.getenv("OPENAI_MODEL") or "gpt-4o-mini"
self.client = AsyncOpenAI(api_key=key, base_url=base_url)
async def translate_text(self, text, target_lang="English") -> str | None:
async def translate_text(self, text: str, target_lang: str = "English") -> str | None:
try:
response = await self.client.chat.completions.create(
messages=[
{
"role": "system",
"content": f"Now I ask you to be the translator. "
f"Your goal is to understand the Chinese "
f"I provided you and translate it into {target_lang}. "
f"Please do not use a translation accent when translating, "
f"but translate naturally, smoothly and authentically, "
f"using beautiful and elegant words. way of expression,"
f"If you found word '动作' please translate it to 'Action', because it's short,"
f"If you found word '管理' in menu, you can not translate it, because management is too long in menu"
,
"content": _TRANSLATION_SYSTEM_PROMPT.format(target_lang=target_lang),
},
{
"role": "user",
"content": text,
},
],
model="gpt-4o-mini",
model=self.model,
)
except Exception as e:
print("Open AI Error: ", e)
return
print("OpenAI Error: ", e)
return None
return response.choices[0].message.content.strip()
class ClaudeTranslate:
def __init__(self, key: str | None = None, model: str | None = None):
# anthropic is optional at runtime; only required when provider=claude
from anthropic import AsyncAnthropic # type: ignore
key = key or os.getenv("ANTHROPIC_API_KEY")
self.model = model or os.getenv("ANTHROPIC_MODEL") or "claude-3-5-sonnet-latest"
self.client = AsyncAnthropic(api_key=key)
async def translate_text(self, text: str, target_lang: str = "English") -> str | None:
try:
msg = await self.client.messages.create(
model=self.model,
max_tokens=4096,
system=_TRANSLATION_SYSTEM_PROMPT.format(target_lang=target_lang),
messages=[{"role": "user", "content": text}],
)
except Exception as e:
print("Claude Error: ", e)
return None
# anthropic SDK returns content blocks; we want the concatenated text
parts: list[str] = []
for block in msg.content:
if getattr(block, "type", None) == "text":
parts.append(block.text)
return "".join(parts).strip() or None
def build_translator() -> Translator:
provider = (os.getenv("I18N_PROVIDER") or "openai").lower()
if provider in {"claude", "anthropic"}:
return ClaudeTranslate()
return OpenAITranslate()

296
apps/i18n/ci_translate.py Normal file
View File

@@ -0,0 +1,296 @@
#!/usr/bin/env python3
import argparse
import asyncio
import json
import os
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
import polib
from _translator.base import BaseTranslateManager
from _translator.utils import build_translator
REPO_ROOT = Path(__file__).resolve().parents[2]
I18N_ROOT = Path(__file__).resolve().parent
def _run_git(args: list[str]) -> str:
out = subprocess.check_output(["git", *args], cwd=REPO_ROOT, text=True)
return out
def _git_show_text(rev: str, relpath: str) -> str | None:
try:
return _run_git(["show", f"{rev}:{relpath}"])
except subprocess.CalledProcessError:
return None
def _read_json_text(path: Path) -> dict:
if not path.exists():
return {}
return json.loads(path.read_text(encoding="utf-8"))
def _read_json_text_from_git(rev: str, relpath: str) -> dict:
txt = _git_show_text(rev, relpath)
if not txt:
return {}
return json.loads(txt)
def _read_po_from_text(txt: str) -> polib.POFile:
with tempfile.NamedTemporaryFile("w+", encoding="utf-8", suffix=".po", delete=False) as f:
f.write(txt)
f.flush()
name = f.name
try:
return polib.pofile(name)
finally:
try:
os.unlink(name)
except OSError:
pass
def _read_po_from_git(rev: str, relpath: str) -> polib.POFile | None:
txt = _git_show_text(rev, relpath)
if not txt:
return None
return _read_po_from_text(txt)
def _read_po_from_fs(path: Path) -> polib.POFile:
return polib.pofile(str(path))
def _changed_keys_json(base: dict, head: dict) -> set[str]:
changed: set[str] = set()
for k, v in head.items():
if not k:
continue
if k not in base or base.get(k) != v:
changed.add(k)
return changed
def _po_translated_dict(po: polib.POFile) -> dict[str, str]:
return {e.msgid: e.msgstr for e in po.translated_entries()}
def _changed_msgids_po(base_po: polib.POFile | None, head_po: polib.POFile) -> set[str]:
base = _po_translated_dict(base_po) if base_po else {}
head = _po_translated_dict(head_po)
changed: set[str] = set()
for msgid, msgstr in head.items():
if msgid not in base or base.get(msgid) != msgstr:
changed.add(msgid)
return changed
@dataclass(frozen=True)
class WorkItem:
# For json: kind="json", src points to zh.json
# For po: kind="po", src points to zh/LC_MESSAGES/{django.po|djangojs.po}
kind: str
src_relpath: str
class _BulkTranslator(BaseTranslateManager):
# Reuse BaseTranslateManager.bulk_translate
pass
async def _translate_json_item(
translator,
module_dir: Path,
zh_relpath: str,
changed_keys: set[str],
overwrite: bool,
):
zh_path = REPO_ROOT / zh_relpath
zh_dict = _read_json_text(zh_path)
if not zh_dict:
return
mgr = _BulkTranslator(str(module_dir), translator)
for file_prefix, target_lang in mgr.LANG_MAPPER.items():
file_prefix = file_prefix.lower()
if file_prefix == "zh":
continue
target_path = module_dir / f"{file_prefix}.json"
target_dict = _read_json_text(target_path)
to_update: dict[str, str] = {}
for k in changed_keys:
if k not in zh_dict:
continue
if overwrite or k not in target_dict:
to_update[k] = zh_dict[k]
if not to_update:
continue
translated = await mgr.bulk_translate(to_update, target_lang)
target_dict.update(translated)
target_path.write_text(
json.dumps(target_dict, ensure_ascii=False, sort_keys=True, indent=4) + "\n",
encoding="utf-8",
)
async def _translate_po_item(
translator,
module_dir: Path,
po_name: str,
zh_relpath: str,
changed_msgids: set[str],
overwrite: bool,
):
zh_path = REPO_ROOT / zh_relpath
if not zh_path.exists():
return
zh_po = _read_po_from_fs(zh_path)
zh_dict = _po_translated_dict(zh_po)
if not zh_dict:
return
mgr = _BulkTranslator(str(module_dir), translator)
for file_prefix, target_lang in mgr.LANG_MAPPER.items():
if file_prefix == "zh":
continue
target_path = module_dir / file_prefix / "LC_MESSAGES" / po_name
if not target_path.exists():
continue
trans_po = _read_po_from_fs(target_path)
to_update: dict[str, str] = {}
for msgid in changed_msgids:
if msgid not in zh_dict:
continue
entry = trans_po.find(msgid)
if not entry:
continue
if overwrite or (not entry.msgstr) or ("fuzzy" in entry.flags):
to_update[msgid] = zh_dict[msgid]
if not to_update:
continue
translated = await mgr.bulk_translate(to_update, target_lang)
for msgid, msgstr in translated.items():
entry = trans_po.find(msgid)
if not entry:
continue
entry.flags = []
entry.previous_msgid = None
entry.msgstr = msgstr
trans_po.save(str(target_path))
def _discover_work_items_from_diff(base: str, head: str) -> list[WorkItem]:
changed_files = _run_git(["diff", "--name-only", f"{base}..{head}"]).splitlines()
items: list[WorkItem] = []
for p in changed_files:
if not p.startswith("apps/i18n/"):
continue
# json modules
if p.endswith("/zh.json") and "/LC_MESSAGES/" not in p:
items.append(WorkItem(kind="json", src_relpath=p))
continue
# gettext sources
if p.endswith("/zh/LC_MESSAGES/django.po") or p.endswith("/zh/LC_MESSAGES/djangojs.po"):
items.append(WorkItem(kind="po", src_relpath=p))
continue
# de-dup
uniq: dict[tuple[str, str], WorkItem] = {(i.kind, i.src_relpath): i for i in items}
return list(uniq.values())
async def run_pr(base: str, head: str, overwrite: bool):
translator = build_translator()
items = _discover_work_items_from_diff(base, head)
if not items:
print("No i18n source changes detected; skip.")
return
for item in items:
if item.kind == "json":
module_dir = (REPO_ROOT / item.src_relpath).parent
base_dict = _read_json_text_from_git(base, item.src_relpath)
head_dict = _read_json_text(REPO_ROOT / item.src_relpath)
changed = _changed_keys_json(base_dict, head_dict)
if not changed:
continue
await _translate_json_item(translator, module_dir, item.src_relpath, changed, overwrite=overwrite)
elif item.kind == "po":
src_path = REPO_ROOT / item.src_relpath
# .../core/zh/LC_MESSAGES/django.po -> module_dir=.../core
module_dir = src_path.parents[2]
po_name = src_path.name
base_po = _read_po_from_git(base, item.src_relpath)
head_po = _read_po_from_fs(src_path)
changed = _changed_msgids_po(base_po, head_po)
if not changed:
continue
await _translate_po_item(
translator,
module_dir,
po_name=po_name,
zh_relpath=item.src_relpath,
changed_msgids=changed,
overwrite=overwrite,
)
async def run_full():
# Full run: reuse existing translate logic, but load by file path
# to avoid name conflicts with any third-party "translate" package.
import importlib.util
translate_path = I18N_ROOT / "translate.py"
spec = importlib.util.spec_from_file_location("jumpserver_i18n_translate", translate_path)
if not spec or not spec.loader:
raise RuntimeError(f"Failed to load translate module from {translate_path}")
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
translator = build_translator()
manager = mod.Translate(translator)
await manager.run()
def main(argv: Iterable[str] | None = None):
parser = argparse.ArgumentParser(description="JumpServer i18n CI translator")
parser.add_argument("--mode", choices=["full", "pr"], required=True)
parser.add_argument("--base", help="Base git sha (PR only)")
parser.add_argument("--head", help="Head git sha (PR only)", default="HEAD")
parser.add_argument(
"--overwrite",
action=argparse.BooleanOptionalAction,
default=True,
help="Overwrite existing translations for changed source keys/msgids",
)
args = parser.parse_args(list(argv) if argv is not None else None)
if args.mode == "full":
asyncio.run(run_full())
return
if not args.base:
raise SystemExit("--base is required for --mode pr")
asyncio.run(run_pr(args.base, args.head, overwrite=args.overwrite))
if __name__ == "__main__":
main()

View File

@@ -4,7 +4,7 @@ import os
from _translator.const import LOCALE_DIR, RED
from _translator.core import CoreTranslateManager
from _translator.other import OtherTranslateManager
from _translator.utils import OpenAITranslate
from _translator.utils import build_translator
class Translate:
@@ -55,6 +55,6 @@ class Translate:
if __name__ == '__main__':
oai_trans = OpenAITranslate()
manager = Translate(oai_trans)
translator = build_translator()
manager = Translate(translator)
asyncio.run(manager.run())