mirror of
https://github.com/SamyRai/turash.git
synced 2025-12-26 23:01:33 +00:00
285 lines
8.8 KiB
Go
285 lines
8.8 KiB
Go
package service
|
||
|
||
import (
|
||
"bytes"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"time"
|
||
)
|
||
|
||
// TranslationService provides translation capabilities using Ollama
|
||
type TranslationService struct {
|
||
ollamaURL string
|
||
model string
|
||
client *http.Client
|
||
username string
|
||
password string
|
||
}
|
||
|
||
// TranslationRequest represents a translation request
|
||
type TranslationRequest struct {
|
||
Text string `json:"text"`
|
||
SourceLang string `json:"source_lang"`
|
||
TargetLang string `json:"target_lang"`
|
||
}
|
||
|
||
// TranslationResponse represents the response from Ollama
|
||
type TranslationResponse struct {
|
||
Model string `json:"model"`
|
||
Response string `json:"response"`
|
||
Done bool `json:"done"`
|
||
DoneReason string `json:"done_reason"`
|
||
}
|
||
|
||
// Note: OllamaGenerateRequest is now defined in ollama_client.go
|
||
// This file uses OllamaClient for translation functionality
|
||
|
||
// NewTranslationService creates a new translation service
|
||
func NewTranslationService(ollamaURL, model string) *TranslationService {
|
||
return NewTranslationServiceWithAuth(ollamaURL, model, "", "")
|
||
}
|
||
|
||
// NewTranslationServiceWithAuth creates a new translation service with authentication
|
||
func NewTranslationServiceWithAuth(ollamaURL, model, username, password string) *TranslationService {
|
||
if ollamaURL == "" {
|
||
ollamaURL = "http://localhost:11434"
|
||
}
|
||
if model == "" {
|
||
model = "qwen2.5:7b"
|
||
}
|
||
|
||
return &TranslationService{
|
||
ollamaURL: ollamaURL,
|
||
model: model,
|
||
username: username,
|
||
password: password,
|
||
client: &http.Client{
|
||
Timeout: 180 * time.Second, // Increased to 3 minutes for LLM processing
|
||
},
|
||
}
|
||
}
|
||
|
||
// Translate translates text from source language to target language
|
||
func (s *TranslationService) Translate(text, sourceLang, targetLang string) (string, error) {
|
||
if text == "" {
|
||
return "", fmt.Errorf("text cannot be empty")
|
||
}
|
||
|
||
// Build translation prompt
|
||
prompt := s.buildTranslationPrompt(text, sourceLang, targetLang)
|
||
|
||
// Prepare request - using the type from ollama_client.go
|
||
reqBody := struct {
|
||
Model string `json:"model"`
|
||
Prompt string `json:"prompt"`
|
||
Stream bool `json:"stream"`
|
||
}{
|
||
Model: s.model,
|
||
Prompt: prompt,
|
||
Stream: false,
|
||
}
|
||
|
||
jsonData, err := json.Marshal(reqBody)
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to marshal request: %w", err)
|
||
}
|
||
|
||
// Make request to Ollama
|
||
url := fmt.Sprintf("%s/api/generate", s.ollamaURL)
|
||
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to create request: %w", err)
|
||
}
|
||
|
||
req.Header.Set("Content-Type", "application/json")
|
||
|
||
// Add basic authentication if credentials are provided
|
||
if s.username != "" && s.password != "" {
|
||
req.SetBasicAuth(s.username, s.password)
|
||
}
|
||
|
||
resp, err := s.client.Do(req)
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to call Ollama API: %w", err)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
body, _ := io.ReadAll(resp.Body)
|
||
return "", fmt.Errorf("ollama API returned status %d: %s", resp.StatusCode, string(body))
|
||
}
|
||
|
||
// Parse response
|
||
var translationResp TranslationResponse
|
||
if err := json.NewDecoder(resp.Body).Decode(&translationResp); err != nil {
|
||
return "", fmt.Errorf("failed to decode ollama response: %w", err)
|
||
}
|
||
|
||
if !translationResp.Done {
|
||
return "", fmt.Errorf("translation incomplete: %s", translationResp.DoneReason)
|
||
}
|
||
|
||
return translationResp.Response, nil
|
||
}
|
||
|
||
// buildTranslationPrompt creates a translation prompt for the LLM
|
||
func (s *TranslationService) buildTranslationPrompt(text, sourceLang, targetLang string) string {
|
||
sourceLangName := s.getLanguageName(sourceLang)
|
||
targetLangName := s.getLanguageName(targetLang)
|
||
|
||
switch targetLang {
|
||
case "tt":
|
||
return s.buildTatarPrompt(text, sourceLangName)
|
||
case "en":
|
||
return s.buildEnglishPrompt(text, sourceLangName)
|
||
case "ru":
|
||
return s.buildRussianPrompt(text, sourceLangName)
|
||
default:
|
||
// Generic fallback
|
||
return fmt.Sprintf(`Translate the following text from %s to %s.
|
||
Provide only the translation, without any explanations or additional text.
|
||
|
||
Source text (%s):
|
||
%s
|
||
|
||
Translation (%s):`, sourceLangName, targetLangName, sourceLangName, text, targetLangName)
|
||
}
|
||
}
|
||
|
||
// buildTatarPrompt creates a prompt for Tatar translation with Cyrillic script requirements
|
||
func (s *TranslationService) buildTatarPrompt(text, sourceLangName string) string {
|
||
return fmt.Sprintf(`Translate the following text to Tatar language.
|
||
|
||
LANGUAGE DETECTION:
|
||
- The source text is indicated as %s, but if it's actually in a different language, detect the actual language and translate from that language
|
||
- Handle mixed-language text appropriately (e.g., "School № 6" is Russian text with Latin characters)
|
||
|
||
SCRIPT REQUIREMENTS:
|
||
- Use ONLY Cyrillic script (А-Я, а-я, Ё, ё, Ә, ә, Ө, ө, Ү, ү, Җ, җ, Ң, ң, Һ, һ)
|
||
- Do NOT use Latin letters (A-Z, a-z) except for internationally recognized company/brand names
|
||
- Do NOT use Arabic script
|
||
|
||
COMPANY AND BRAND NAMES:
|
||
- Keep internationally recognized company names in their original Latin form (e.g., "S7 Airlines", "Ak Bars Aero", "BMW", "Apple")
|
||
- Translate local company names and organizations to Tatar Cyrillic
|
||
- Keep technical terms and abbreviations in their commonly accepted form
|
||
|
||
LANGUAGE STYLE:
|
||
- Use proper Tatar grammar and syntax
|
||
- Use natural Tatar vocabulary, not word-by-word transliteration
|
||
- Maintain formal register appropriate for historical/cultural documentation
|
||
- Follow Tatar orthography rules
|
||
|
||
Source text (indicated as %s):
|
||
%s
|
||
|
||
Translation (Tatar, Cyrillic script):`, sourceLangName, sourceLangName, text)
|
||
}
|
||
|
||
// buildEnglishPrompt creates a prompt for English translation
|
||
func (s *TranslationService) buildEnglishPrompt(text, sourceLangName string) string {
|
||
return fmt.Sprintf(`Translate the following text to English.
|
||
|
||
LANGUAGE DETECTION:
|
||
- The source text is indicated as %s, but if it's actually in a different language, detect the actual language and translate from that language
|
||
- Handle mixed-language text appropriately
|
||
|
||
LANGUAGE REQUIREMENTS:
|
||
- Use proper English grammar and syntax
|
||
- Use natural, idiomatic English, not literal translation
|
||
- Maintain formal register appropriate for historical/cultural documentation
|
||
- Use British or American English consistently (prefer British for historical contexts)
|
||
|
||
COMPANY AND BRAND NAMES:
|
||
- Keep internationally recognized company names in their original form
|
||
- Translate local company names to English when appropriate
|
||
- Keep technical terms in their standard English form
|
||
|
||
Source text (indicated as %s):
|
||
%s
|
||
|
||
Translation (English):`, sourceLangName, sourceLangName, text)
|
||
}
|
||
|
||
// buildRussianPrompt creates a prompt for Russian translation
|
||
func (s *TranslationService) buildRussianPrompt(text, sourceLangName string) string {
|
||
return fmt.Sprintf(`Translate the following text to Russian.
|
||
|
||
LANGUAGE DETECTION:
|
||
- The source text is indicated as %s, but if it's actually in a different language, detect the actual language and translate from that language
|
||
- Handle mixed-language text appropriately
|
||
|
||
LANGUAGE REQUIREMENTS:
|
||
- Use proper Russian grammar and syntax
|
||
- Use natural Russian vocabulary, not word-by-word transliteration
|
||
- Maintain formal register appropriate for historical/cultural documentation
|
||
- Follow Russian orthography rules
|
||
|
||
COMPANY AND BRAND NAMES:
|
||
- Keep internationally recognized company names in their original form (Latin if commonly used)
|
||
- Translate local company names to Russian when appropriate
|
||
- Use Cyrillic transliteration for foreign names when standard
|
||
|
||
Source text (indicated as %s):
|
||
%s
|
||
|
||
Translation (Russian):`, sourceLangName, sourceLangName, text)
|
||
}
|
||
|
||
// getLanguageName returns the full name of the language
|
||
func (s *TranslationService) getLanguageName(langCode string) string {
|
||
langNames := map[string]string{
|
||
"ru": "Russian",
|
||
"en": "English",
|
||
"tt": "Tatar",
|
||
}
|
||
|
||
if name, ok := langNames[langCode]; ok {
|
||
return name
|
||
}
|
||
return langCode
|
||
}
|
||
|
||
// BatchTranslate translates multiple texts
|
||
func (s *TranslationService) BatchTranslate(texts []string, sourceLang, targetLang string) ([]string, error) {
|
||
results := make([]string, len(texts))
|
||
|
||
for i, text := range texts {
|
||
translated, err := s.Translate(text, sourceLang, targetLang)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to translate text %d: %w", i, err)
|
||
}
|
||
results[i] = translated
|
||
}
|
||
|
||
return results, nil
|
||
}
|
||
|
||
// HealthCheck checks if Ollama service is available
|
||
func (s *TranslationService) HealthCheck() error {
|
||
url := fmt.Sprintf("%s/api/tags", s.ollamaURL)
|
||
req, err := http.NewRequest("GET", url, nil)
|
||
if err != nil {
|
||
return fmt.Errorf("failed to create request: %w", err)
|
||
}
|
||
|
||
// Add basic authentication if credentials are provided
|
||
if s.username != "" && s.password != "" {
|
||
req.SetBasicAuth(s.username, s.password)
|
||
}
|
||
|
||
resp, err := s.client.Do(req)
|
||
if err != nil {
|
||
return fmt.Errorf("ollama service unavailable: %w", err)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
return fmt.Errorf("ollama service returned status %d", resp.StatusCode)
|
||
}
|
||
|
||
return nil
|
||
}
|