mirror of
https://github.com/farcasclaudiu/openclaw.git
synced 2026-06-28 19:01:47 +03:00
Docs i18n: harden doc-mode pipeline
This commit is contained in:
+218
-3
@@ -4,15 +4,38 @@ import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type docJob struct {
|
||||
index int
|
||||
path string
|
||||
rel string
|
||||
}
|
||||
|
||||
type docResult struct {
|
||||
index int
|
||||
rel string
|
||||
duration time.Duration
|
||||
skipped bool
|
||||
err error
|
||||
}
|
||||
|
||||
func main() {
|
||||
var (
|
||||
targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)")
|
||||
sourceLang = flag.String("src", "en", "source language")
|
||||
docsRoot = flag.String("docs", "docs", "docs root")
|
||||
tmPath = flag.String("tm", "", "translation memory path")
|
||||
mode = flag.String("mode", "segment", "translation mode (segment|doc)")
|
||||
thinking = flag.String("thinking", "high", "thinking level (low|high)")
|
||||
overwrite = flag.Bool("overwrite", false, "overwrite existing translations")
|
||||
maxFiles = flag.Int("max", 0, "max files to process (0 = all)")
|
||||
parallel = flag.Int("parallel", 1, "parallel workers for doc mode")
|
||||
)
|
||||
flag.Parse()
|
||||
files := flag.Args()
|
||||
@@ -35,7 +58,7 @@ func main() {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary)
|
||||
translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary, *thinking)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
@@ -46,13 +69,205 @@ func main() {
|
||||
fatal(err)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if err := processFile(context.Background(), translator, tm, resolvedDocsRoot, file, *sourceLang, *targetLang); err != nil {
|
||||
ordered, err := orderFiles(resolvedDocsRoot, files)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
totalFiles := len(ordered)
|
||||
preSkipped := 0
|
||||
if *mode == "doc" && !*overwrite {
|
||||
filtered, skipped, err := filterDocQueue(resolvedDocsRoot, *targetLang, ordered)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
ordered = filtered
|
||||
preSkipped = skipped
|
||||
}
|
||||
if *maxFiles > 0 && *maxFiles < len(ordered) {
|
||||
ordered = ordered[:*maxFiles]
|
||||
}
|
||||
|
||||
log.SetFlags(log.LstdFlags)
|
||||
start := time.Now()
|
||||
processed := 0
|
||||
skipped := 0
|
||||
|
||||
if *parallel < 1 {
|
||||
*parallel = 1
|
||||
}
|
||||
|
||||
log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", *mode, totalFiles, len(ordered), preSkipped, *overwrite, *thinking, *parallel)
|
||||
switch *mode {
|
||||
case "doc":
|
||||
if *parallel > 1 {
|
||||
proc, skip, err := runDocParallel(context.Background(), ordered, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite, *parallel, glossary, *thinking)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
processed += proc
|
||||
skipped += skip
|
||||
} else {
|
||||
proc, skip, err := runDocSequential(context.Background(), ordered, translator, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
processed += proc
|
||||
skipped += skip
|
||||
}
|
||||
case "segment":
|
||||
if *parallel > 1 {
|
||||
fatal(fmt.Errorf("parallel processing is only supported in doc mode"))
|
||||
}
|
||||
proc, err := runSegmentSequential(context.Background(), ordered, translator, tm, resolvedDocsRoot, *sourceLang, *targetLang)
|
||||
if err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
processed += proc
|
||||
default:
|
||||
fatal(fmt.Errorf("unknown mode: %s", *mode))
|
||||
}
|
||||
|
||||
if err := tm.Save(); err != nil {
|
||||
fatal(err)
|
||||
}
|
||||
elapsed := time.Since(start).Round(time.Millisecond)
|
||||
log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed)
|
||||
}
|
||||
|
||||
func runDocSequential(ctx context.Context, ordered []string, translator *PiTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, error) {
|
||||
processed := 0
|
||||
skipped := 0
|
||||
for index, file := range ordered {
|
||||
relPath := resolveRelPath(docsRoot, file)
|
||||
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
||||
start := time.Now()
|
||||
skip, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite)
|
||||
if err != nil {
|
||||
return processed, skipped, err
|
||||
}
|
||||
if skip {
|
||||
skipped++
|
||||
log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
||||
} else {
|
||||
processed++
|
||||
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
||||
}
|
||||
}
|
||||
return processed, skipped, nil
|
||||
}
|
||||
|
||||
func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string) (int, int, error) {
|
||||
jobs := make(chan docJob)
|
||||
results := make(chan docResult, len(ordered))
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for worker := 0; worker < parallel; worker++ {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
translator, err := NewPiTranslator(srcLang, tgtLang, glossary, thinking)
|
||||
if err != nil {
|
||||
results <- docResult{err: err}
|
||||
return
|
||||
}
|
||||
defer translator.Close()
|
||||
for job := range jobs {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel)
|
||||
start := time.Now()
|
||||
skip, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite)
|
||||
results <- docResult{
|
||||
index: job.index,
|
||||
rel: job.rel,
|
||||
duration: time.Since(start),
|
||||
skipped: skip,
|
||||
err: err,
|
||||
}
|
||||
if err != nil {
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
}
|
||||
}(worker + 1)
|
||||
}
|
||||
|
||||
go func() {
|
||||
for index, file := range ordered {
|
||||
jobs <- docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)}
|
||||
}
|
||||
close(jobs)
|
||||
}()
|
||||
|
||||
processed := 0
|
||||
skipped := 0
|
||||
for i := 0; i < len(ordered); i++ {
|
||||
result := <-results
|
||||
if result.err != nil {
|
||||
wg.Wait()
|
||||
return processed, skipped, result.err
|
||||
}
|
||||
if result.skipped {
|
||||
skipped++
|
||||
log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
||||
} else {
|
||||
processed++
|
||||
log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond))
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
return processed, skipped, nil
|
||||
}
|
||||
|
||||
func runSegmentSequential(ctx context.Context, ordered []string, translator *PiTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, error) {
|
||||
processed := 0
|
||||
for index, file := range ordered {
|
||||
relPath := resolveRelPath(docsRoot, file)
|
||||
log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath)
|
||||
start := time.Now()
|
||||
if _, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang); err != nil {
|
||||
return processed, err
|
||||
}
|
||||
processed++
|
||||
log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond))
|
||||
}
|
||||
return processed, nil
|
||||
}
|
||||
|
||||
func resolveRelPath(docsRoot, file string) string {
|
||||
relPath := file
|
||||
if _, rel, err := resolveDocsPath(docsRoot, file); err == nil {
|
||||
relPath = rel
|
||||
}
|
||||
return relPath
|
||||
}
|
||||
|
||||
func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) {
|
||||
pending := make([]string, 0, len(ordered))
|
||||
skipped := 0
|
||||
for _, file := range ordered {
|
||||
absPath, relPath, err := resolveDocsPath(docsRoot, file)
|
||||
if err != nil {
|
||||
return nil, skipped, err
|
||||
}
|
||||
content, err := os.ReadFile(absPath)
|
||||
if err != nil {
|
||||
return nil, skipped, err
|
||||
}
|
||||
sourceHash := hashBytes(content)
|
||||
outputPath := filepath.Join(docsRoot, targetLang, relPath)
|
||||
skip, err := shouldSkipDoc(outputPath, sourceHash)
|
||||
if err != nil {
|
||||
return nil, skipped, err
|
||||
}
|
||||
if skip {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
pending = append(pending, file)
|
||||
}
|
||||
return pending, skipped, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user