diff --git a/.github/workflows/i18n-auto-translate.yml b/.github/workflows/i18n-auto-translate.yml new file mode 100644 index 000000000..c5bffccf8 --- /dev/null +++ b/.github/workflows/i18n-auto-translate.yml @@ -0,0 +1,140 @@ +name: Auto translate i18n (po/json) + +on: + workflow_dispatch: + inputs: + mode: + description: 'Run mode' + required: true + default: 'full' + type: choice + options: + - full + provider: + description: 'Translation provider' + required: true + default: 'claude' + type: choice + options: + - openai + - claude + + pull_request: + branches: + - 'dev' + paths: + - 'apps/i18n/**' + types: [opened, synchronize, reopened] + +permissions: + contents: write + pull-requests: write + +concurrency: + group: i18n-translate-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + translate: + runs-on: ubuntu-latest + steps: + - name: Checkout (PR branch) + if: ${{ github.event_name == 'pull_request' }} + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + fetch-depth: 0 + + - name: Checkout (manual) + if: ${{ github.event_name == 'workflow_dispatch' }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install translator dependencies + run: | + python -m pip install --upgrade pip + python -m pip install "openai>=1.29.0" "polib>=1.2.0" "tqdm>=4.66.4" "anthropic>=0.40.0" + + - name: Auto translate (PR) + if: ${{ github.event_name == 'pull_request' }} + env: + I18N_PROVIDER: ${{ vars.I18N_PROVIDER || 'claude' }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || secrets.GPT_API_TOKEN }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL }} + run: | + python apps/i18n/ci_translate.py \ + --mode pr \ + --base "${{ github.event.pull_request.base.sha }}" \ + --head "${{ github.event.pull_request.head.sha }}" \ + --overwrite + + - name: Auto translate (manual full) + if: ${{ github.event_name == 'workflow_dispatch' }} + env: + I18N_PROVIDER: ${{ github.event.inputs.provider }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY || secrets.GPT_API_TOKEN }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + OPENAI_MODEL: ${{ secrets.OPENAI_MODEL }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_MODEL: ${{ secrets.ANTHROPIC_MODEL }} + run: | + python apps/i18n/ci_translate.py --mode full + + - name: Show changes + run: | + git status --porcelain + git diff --stat + + - name: Create patch artifact (fork PR) + if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == true }} + run: | + git diff > i18n-translations.patch + + - name: Upload patch artifact (fork PR) + if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == true }} + uses: actions/upload-artifact@v4 + with: + name: i18n-translations + path: i18n-translations.patch + + - name: Commit & push changes (same-repo PR) + if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false }} + run: | + if [ -z "$(git status --porcelain)" ]; then + echo "No changes to commit." + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add apps/i18n + git commit -m "chore(i18n): auto-translate updated strings" + git push + + - name: Create pull request (manual full run) + if: ${{ github.event_name == 'workflow_dispatch' }} + uses: peter-evans/create-pull-request@v6 + with: + commit-message: 'chore(i18n): auto-translate all strings' + title: 'chore(i18n): auto-translate all strings' + body: | + Auto-generated i18n translations. + + - Triggered by: ${{ github.actor }} + - Provider: ${{ github.event.inputs.provider }} + branch: pr/dev/i18n-auto-translate + base: dev + add-paths: | + apps/i18n/** + labels: | + i18n + automated-pr diff --git a/apps/i18n/_translator/base.py b/apps/i18n/_translator/base.py index 3c8017cc8..758af9b3e 100644 --- a/apps/i18n/_translator/base.py +++ b/apps/i18n/_translator/base.py @@ -10,6 +10,7 @@ class BaseTranslateManager: bulk_size = 15 SEPARATOR = "<-SEP->" LANG_MAPPER = { + 'en': 'English', 'ja': 'Japanese', 'zh_Hant': 'Traditional Chinese', 'pt_BR': 'Portuguese (Brazil)', diff --git a/apps/i18n/_translator/core.py b/apps/i18n/_translator/core.py index 6a576de94..cc800b617 100644 --- a/apps/i18n/_translator/core.py +++ b/apps/i18n/_translator/core.py @@ -32,17 +32,23 @@ class CoreTranslateManager(BaseTranslateManager): print(f'{RED}File save error: {e}{RED}') async def run(self): - po_file_path = os.path.join(self._dir, 'zh', 'LC_MESSAGES', 'django.po') - po = polib.pofile(po_file_path) - zh_dict = {entry.msgid: entry.msgstr for entry in po.translated_entries()} + async def process_po(po_name: str): + po_file_path = os.path.join(self._dir, 'zh', 'LC_MESSAGES', po_name) + po = polib.pofile(po_file_path) + zh_dict = {entry.msgid: entry.msgstr for entry in po.translated_entries()} - for file_prefix, target_lang in self.LANG_MAPPER.items(): - po_file_path = os.path.join(self._dir, file_prefix, 'LC_MESSAGES', 'django.po') - trans_po = polib.pofile(po_file_path) - need_trans_dict = self.get_need_trans_dict(zh_dict, trans_po) - print(f'{GREEN}Translate: {self.dir_name} {file_prefix} ' - f'django.po need to translate {len(need_trans_dict)}{GREEN}\n') - if not need_trans_dict: - continue - translated_dict = await self.bulk_translate(need_trans_dict, target_lang) - self.save_translations_to_po(translated_dict, trans_po) + for file_prefix, target_lang in self.LANG_MAPPER.items(): + po_file_path = os.path.join(self._dir, file_prefix, 'LC_MESSAGES', po_name) + trans_po = polib.pofile(po_file_path) + need_trans_dict = self.get_need_trans_dict(zh_dict, trans_po) + print(f'{GREEN}Translate: {self.dir_name} {file_prefix} ' + f'{po_name} need to translate {len(need_trans_dict)}{GREEN}\n') + if not need_trans_dict: + continue + translated_dict = await self.bulk_translate(need_trans_dict, target_lang) + self.save_translations_to_po(translated_dict, trans_po) + + await process_po('django.po') + djangojs_po = os.path.join(self._dir, 'zh', 'LC_MESSAGES', 'djangojs.po') + if os.path.exists(djangojs_po): + await process_po('djangojs.po') diff --git a/apps/i18n/_translator/utils.py b/apps/i18n/_translator/utils.py index f4fcad6f3..52cbe0e19 100644 --- a/apps/i18n/_translator/utils.py +++ b/apps/i18n/_translator/utils.py @@ -1,34 +1,88 @@ +import os +from typing import Protocol + from openai import AsyncOpenAI +class Translator(Protocol): + async def translate_text(self, text: str, target_lang: str = "English") -> str | None: ... + + +_TRANSLATION_SYSTEM_PROMPT = ( + "Now I ask you to be the translator. " + "Your goal is to understand the Chinese I provided you and translate it into {target_lang}. " + "Please translate naturally, smoothly and authentically (no translation accent). " + "Do NOT change placeholders or tokens; keep them exactly as-is, including but not limited to: " + "%s, %d, %(name)s, {name}, {}, {{value}}, ..., URLs, and line breaks. " + "If you found word '动作' please translate it to 'Action', because it's short. " + "If you found word '管理' in menu, you can not translate it, because management is too long in menu." +) + + class OpenAITranslate: - def __init__(self, key: str | None = None, base_url: str | None = None): + def __init__( + self, + key: str | None = None, + base_url: str | None = None, + model: str | None = None, + ): + key = key or os.getenv("OPENAI_API_KEY") + base_url = base_url or os.getenv("OPENAI_BASE_URL") or None + self.model = model or os.getenv("OPENAI_MODEL") or "gpt-4o-mini" self.client = AsyncOpenAI(api_key=key, base_url=base_url) - async def translate_text(self, text, target_lang="English") -> str | None: + async def translate_text(self, text: str, target_lang: str = "English") -> str | None: try: response = await self.client.chat.completions.create( messages=[ { "role": "system", - "content": f"Now I ask you to be the translator. " - f"Your goal is to understand the Chinese " - f"I provided you and translate it into {target_lang}. " - f"Please do not use a translation accent when translating, " - f"but translate naturally, smoothly and authentically, " - f"using beautiful and elegant words. way of expression," - f"If you found word '动作' please translate it to 'Action', because it's short," - f"If you found word '管理' in menu, you can not translate it, because management is too long in menu" - , + "content": _TRANSLATION_SYSTEM_PROMPT.format(target_lang=target_lang), }, { "role": "user", "content": text, }, ], - model="gpt-4o-mini", + model=self.model, ) except Exception as e: - print("Open AI Error: ", e) - return + print("OpenAI Error: ", e) + return None return response.choices[0].message.content.strip() + + +class ClaudeTranslate: + def __init__(self, key: str | None = None, model: str | None = None): + # anthropic is optional at runtime; only required when provider=claude + from anthropic import AsyncAnthropic # type: ignore + + key = key or os.getenv("ANTHROPIC_API_KEY") + self.model = model or os.getenv("ANTHROPIC_MODEL") or "claude-3-5-sonnet-latest" + self.client = AsyncAnthropic(api_key=key) + + async def translate_text(self, text: str, target_lang: str = "English") -> str | None: + try: + msg = await self.client.messages.create( + model=self.model, + max_tokens=4096, + system=_TRANSLATION_SYSTEM_PROMPT.format(target_lang=target_lang), + messages=[{"role": "user", "content": text}], + ) + except Exception as e: + print("Claude Error: ", e) + return None + + # anthropic SDK returns content blocks; we want the concatenated text + parts: list[str] = [] + for block in msg.content: + if getattr(block, "type", None) == "text": + parts.append(block.text) + return "".join(parts).strip() or None + + +def build_translator() -> Translator: + provider = (os.getenv("I18N_PROVIDER") or "openai").lower() + if provider in {"claude", "anthropic"}: + return ClaudeTranslate() + return OpenAITranslate() diff --git a/apps/i18n/ci_translate.py b/apps/i18n/ci_translate.py new file mode 100644 index 000000000..cc8d604ca --- /dev/null +++ b/apps/i18n/ci_translate.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +import argparse +import asyncio +import json +import os +import subprocess +import tempfile +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + +import polib + +from _translator.base import BaseTranslateManager +from _translator.utils import build_translator + + +REPO_ROOT = Path(__file__).resolve().parents[2] +I18N_ROOT = Path(__file__).resolve().parent + + +def _run_git(args: list[str]) -> str: + out = subprocess.check_output(["git", *args], cwd=REPO_ROOT, text=True) + return out + + +def _git_show_text(rev: str, relpath: str) -> str | None: + try: + return _run_git(["show", f"{rev}:{relpath}"]) + except subprocess.CalledProcessError: + return None + + +def _read_json_text(path: Path) -> dict: + if not path.exists(): + return {} + return json.loads(path.read_text(encoding="utf-8")) + + +def _read_json_text_from_git(rev: str, relpath: str) -> dict: + txt = _git_show_text(rev, relpath) + if not txt: + return {} + return json.loads(txt) + + +def _read_po_from_text(txt: str) -> polib.POFile: + with tempfile.NamedTemporaryFile("w+", encoding="utf-8", suffix=".po", delete=False) as f: + f.write(txt) + f.flush() + name = f.name + try: + return polib.pofile(name) + finally: + try: + os.unlink(name) + except OSError: + pass + + +def _read_po_from_git(rev: str, relpath: str) -> polib.POFile | None: + txt = _git_show_text(rev, relpath) + if not txt: + return None + return _read_po_from_text(txt) + + +def _read_po_from_fs(path: Path) -> polib.POFile: + return polib.pofile(str(path)) + + +def _changed_keys_json(base: dict, head: dict) -> set[str]: + changed: set[str] = set() + for k, v in head.items(): + if not k: + continue + if k not in base or base.get(k) != v: + changed.add(k) + return changed + + +def _po_translated_dict(po: polib.POFile) -> dict[str, str]: + return {e.msgid: e.msgstr for e in po.translated_entries()} + + +def _changed_msgids_po(base_po: polib.POFile | None, head_po: polib.POFile) -> set[str]: + base = _po_translated_dict(base_po) if base_po else {} + head = _po_translated_dict(head_po) + changed: set[str] = set() + for msgid, msgstr in head.items(): + if msgid not in base or base.get(msgid) != msgstr: + changed.add(msgid) + return changed + + +@dataclass(frozen=True) +class WorkItem: + # For json: kind="json", src points to zh.json + # For po: kind="po", src points to zh/LC_MESSAGES/{django.po|djangojs.po} + kind: str + src_relpath: str + + +class _BulkTranslator(BaseTranslateManager): + # Reuse BaseTranslateManager.bulk_translate + pass + + +async def _translate_json_item( + translator, + module_dir: Path, + zh_relpath: str, + changed_keys: set[str], + overwrite: bool, +): + zh_path = REPO_ROOT / zh_relpath + zh_dict = _read_json_text(zh_path) + if not zh_dict: + return + + mgr = _BulkTranslator(str(module_dir), translator) + for file_prefix, target_lang in mgr.LANG_MAPPER.items(): + file_prefix = file_prefix.lower() + if file_prefix == "zh": + continue + + target_path = module_dir / f"{file_prefix}.json" + target_dict = _read_json_text(target_path) + + to_update: dict[str, str] = {} + for k in changed_keys: + if k not in zh_dict: + continue + if overwrite or k not in target_dict: + to_update[k] = zh_dict[k] + + if not to_update: + continue + translated = await mgr.bulk_translate(to_update, target_lang) + target_dict.update(translated) + target_path.write_text( + json.dumps(target_dict, ensure_ascii=False, sort_keys=True, indent=4) + "\n", + encoding="utf-8", + ) + + +async def _translate_po_item( + translator, + module_dir: Path, + po_name: str, + zh_relpath: str, + changed_msgids: set[str], + overwrite: bool, +): + zh_path = REPO_ROOT / zh_relpath + if not zh_path.exists(): + return + zh_po = _read_po_from_fs(zh_path) + zh_dict = _po_translated_dict(zh_po) + if not zh_dict: + return + + mgr = _BulkTranslator(str(module_dir), translator) + for file_prefix, target_lang in mgr.LANG_MAPPER.items(): + if file_prefix == "zh": + continue + + target_path = module_dir / file_prefix / "LC_MESSAGES" / po_name + if not target_path.exists(): + continue + + trans_po = _read_po_from_fs(target_path) + to_update: dict[str, str] = {} + + for msgid in changed_msgids: + if msgid not in zh_dict: + continue + entry = trans_po.find(msgid) + if not entry: + continue + if overwrite or (not entry.msgstr) or ("fuzzy" in entry.flags): + to_update[msgid] = zh_dict[msgid] + + if not to_update: + continue + translated = await mgr.bulk_translate(to_update, target_lang) + for msgid, msgstr in translated.items(): + entry = trans_po.find(msgid) + if not entry: + continue + entry.flags = [] + entry.previous_msgid = None + entry.msgstr = msgstr + trans_po.save(str(target_path)) + + +def _discover_work_items_from_diff(base: str, head: str) -> list[WorkItem]: + changed_files = _run_git(["diff", "--name-only", f"{base}..{head}"]).splitlines() + + items: list[WorkItem] = [] + for p in changed_files: + if not p.startswith("apps/i18n/"): + continue + + # json modules + if p.endswith("/zh.json") and "/LC_MESSAGES/" not in p: + items.append(WorkItem(kind="json", src_relpath=p)) + continue + + # gettext sources + if p.endswith("/zh/LC_MESSAGES/django.po") or p.endswith("/zh/LC_MESSAGES/djangojs.po"): + items.append(WorkItem(kind="po", src_relpath=p)) + continue + + # de-dup + uniq: dict[tuple[str, str], WorkItem] = {(i.kind, i.src_relpath): i for i in items} + return list(uniq.values()) + + +async def run_pr(base: str, head: str, overwrite: bool): + translator = build_translator() + items = _discover_work_items_from_diff(base, head) + if not items: + print("No i18n source changes detected; skip.") + return + + for item in items: + if item.kind == "json": + module_dir = (REPO_ROOT / item.src_relpath).parent + base_dict = _read_json_text_from_git(base, item.src_relpath) + head_dict = _read_json_text(REPO_ROOT / item.src_relpath) + changed = _changed_keys_json(base_dict, head_dict) + if not changed: + continue + await _translate_json_item(translator, module_dir, item.src_relpath, changed, overwrite=overwrite) + elif item.kind == "po": + src_path = REPO_ROOT / item.src_relpath + # .../core/zh/LC_MESSAGES/django.po -> module_dir=.../core + module_dir = src_path.parents[2] + po_name = src_path.name + base_po = _read_po_from_git(base, item.src_relpath) + head_po = _read_po_from_fs(src_path) + changed = _changed_msgids_po(base_po, head_po) + if not changed: + continue + await _translate_po_item( + translator, + module_dir, + po_name=po_name, + zh_relpath=item.src_relpath, + changed_msgids=changed, + overwrite=overwrite, + ) + + +async def run_full(): + # Full run: reuse existing translate logic, but load by file path + # to avoid name conflicts with any third-party "translate" package. + import importlib.util + + translate_path = I18N_ROOT / "translate.py" + spec = importlib.util.spec_from_file_location("jumpserver_i18n_translate", translate_path) + if not spec or not spec.loader: + raise RuntimeError(f"Failed to load translate module from {translate_path}") + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + + translator = build_translator() + manager = mod.Translate(translator) + await manager.run() + + +def main(argv: Iterable[str] | None = None): + parser = argparse.ArgumentParser(description="JumpServer i18n CI translator") + parser.add_argument("--mode", choices=["full", "pr"], required=True) + parser.add_argument("--base", help="Base git sha (PR only)") + parser.add_argument("--head", help="Head git sha (PR only)", default="HEAD") + parser.add_argument( + "--overwrite", + action=argparse.BooleanOptionalAction, + default=True, + help="Overwrite existing translations for changed source keys/msgids", + ) + args = parser.parse_args(list(argv) if argv is not None else None) + + if args.mode == "full": + asyncio.run(run_full()) + return + + if not args.base: + raise SystemExit("--base is required for --mode pr") + asyncio.run(run_pr(args.base, args.head, overwrite=args.overwrite)) + + +if __name__ == "__main__": + main() diff --git a/apps/i18n/translate.py b/apps/i18n/translate.py index 6dfde7cc1..b5a78fbc2 100644 --- a/apps/i18n/translate.py +++ b/apps/i18n/translate.py @@ -4,7 +4,7 @@ import os from _translator.const import LOCALE_DIR, RED from _translator.core import CoreTranslateManager from _translator.other import OtherTranslateManager -from _translator.utils import OpenAITranslate +from _translator.utils import build_translator class Translate: @@ -55,6 +55,6 @@ class Translate: if __name__ == '__main__': - oai_trans = OpenAITranslate() - manager = Translate(oai_trans) + translator = build_translator() + manager = Translate(translator) asyncio.run(manager.run())