tercul-backend/internal/enrich/language_detector.go
Damir Mukimov 4957117cb6 Initial commit: Tercul Go project with comprehensive architecture
- Core Go application with GraphQL API using gqlgen
- Comprehensive data models for literary works, authors, translations
- Repository pattern with caching layer
- Authentication and authorization system
- Linguistics analysis capabilities with multiple adapters
- Vector search integration with Weaviate
- Docker containerization support
- Python data migration and analysis scripts
- Clean architecture with proper separation of concerns
- Production-ready configuration and middleware
- Proper .gitignore excluding vendor/, database files, and build artifacts
2025-08-13 07:42:32 +02:00

57 lines
1.8 KiB
Go

package enrich
import (
"strings"
)
// LanguageDetector detects the language of a text
type LanguageDetector struct{}
// NewLanguageDetector creates a new LanguageDetector
func NewLanguageDetector() *LanguageDetector {
return &LanguageDetector{}
}
// Detect detects the language of a text and returns the language code, confidence, and error
func (d *LanguageDetector) Detect(text Text) (string, float64, error) {
// This is a simplified implementation
// In a real-world scenario, you would use a library like github.com/pemistahl/lingua-go
// or call an external API for language detection
// For demonstration purposes, we'll use a simple heuristic based on common words
content := strings.ToLower(text.Body)
// Check for English
englishWords := []string{"the", "and", "is", "in", "to", "of", "that", "for"}
englishCount := countWords(content, englishWords)
// Check for Spanish
spanishWords := []string{"el", "la", "es", "en", "de", "que", "por", "para"}
spanishCount := countWords(content, spanishWords)
// Check for French
frenchWords := []string{"le", "la", "est", "en", "de", "que", "pour", "dans"}
frenchCount := countWords(content, frenchWords)
// Determine the most likely language
if englishCount > spanishCount && englishCount > frenchCount {
return "en", 0.7, nil
} else if spanishCount > englishCount && spanishCount > frenchCount {
return "es", 0.7, nil
} else if frenchCount > englishCount && frenchCount > spanishCount {
return "fr", 0.7, nil
}
// Default to English if we can't determine the language
return "en", 0.5, nil
}
// countWords counts the occurrences of words in a text
func countWords(text string, words []string) int {
count := 0
for _, word := range words {
count += strings.Count(text, " "+word+" ")
}
return count
}