mirror of
https://github.com/SamyRai/tercul-backend.git
synced 2025-12-27 05:11:34 +00:00
57 lines
1.7 KiB
Go
57 lines
1.7 KiB
Go
package linguistics
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
// languageDetector detects the language of a text
|
|
type languageDetector struct{}
|
|
|
|
// NewLanguageDetector creates a new LanguageDetector
|
|
func NewLanguageDetector() *languageDetector {
|
|
return &languageDetector{}
|
|
}
|
|
|
|
// Detect detects the language of a text and returns the language code, confidence, and error
|
|
func (d *languageDetector) DetectLanguage(text string) (string, error) {
|
|
// This is a simplified implementation
|
|
// In a real-world scenario, you would use a library like github.com/pemistahl/lingua-go
|
|
// or call an external API for language detection
|
|
|
|
// For demonstration purposes, we'll use a simple heuristic based on common words
|
|
content := strings.ToLower(text)
|
|
|
|
// Check for English
|
|
englishWords := []string{"the", "and", "is", "in", "to", "of", "that", "for"}
|
|
englishCount := countWords(content, englishWords)
|
|
|
|
// Check for Spanish
|
|
spanishWords := []string{"el", "la", "es", "en", "de", "que", "por", "para"}
|
|
spanishCount := countWords(content, spanishWords)
|
|
|
|
// Check for French
|
|
frenchWords := []string{"le", "la", "est", "en", "de", "que", "pour", "dans"}
|
|
frenchCount := countWords(content, frenchWords)
|
|
|
|
// Determine the most likely language
|
|
if englishCount > spanishCount && englishCount > frenchCount {
|
|
return "en", nil
|
|
} else if spanishCount > englishCount && spanishCount > frenchCount {
|
|
return "es", nil
|
|
} else if frenchCount > englishCount && frenchCount > spanishCount {
|
|
return "fr", nil
|
|
}
|
|
|
|
// Default to English if we can't determine the language
|
|
return "en", nil
|
|
}
|
|
|
|
// countWords counts the occurrences of words in a text
|
|
func countWords(text string, words []string) int {
|
|
count := 0
|
|
for _, word := range words {
|
|
count += strings.Count(text, " "+word+" ")
|
|
}
|
|
return count
|
|
}
|