tercul-backend/internal/jobs/linguistics/adapter_lingua.go

35 lines
984 B
Go

package linguistics
import (
lingua "github.com/pemistahl/lingua-go"
"strings"
)
// LinguaLanguageDetector implements LanguageDetector using lingua-go
type LinguaLanguageDetector struct {
detector lingua.LanguageDetector
}
// NewLinguaLanguageDetector builds a detector for all supported languages
func NewLinguaLanguageDetector() *LinguaLanguageDetector {
det := lingua.NewLanguageDetectorBuilder().FromAllLanguages().Build()
return &LinguaLanguageDetector{detector: det}
}
// DetectLanguage returns a lowercase ISO 639-1 code if possible
func (l *LinguaLanguageDetector) DetectLanguage(text string) (string, bool) {
lang, ok := l.detector.DetectLanguageOf(text)
if !ok {
return "", false
}
// Prefer ISO 639-1 when available else fallback to ISO 639-3
if s := lang.IsoCode639_1().String(); s != "" {
return s, true
}
if s := lang.IsoCode639_3().String(); s != "" {
return s, true
}
// fallback to language name
return strings.ToLower(lang.String()), true
}