tercul-backend/internal/jobs/linguistics/language_detector.go
google-labs-jules[bot] 53aa4d0344
Security Hardening and GraphQL Caching (#69)
* feat: add security middleware, graphql apq, and improved linting

- Add RateLimit, RequestValidation, and CORS middleware.
- Configure middleware chain in API server.
- Implement Redis cache for GraphQL Automatic Persisted Queries.
- Add .golangci.yml and fix linting issues (shadowing, timeouts).

* feat: security, caching and linting config

- Fix .golangci.yml config for govet shadow check
- (Previous changes: Security middleware, GraphQL APQ, Linting fixes)

* fix: resolve remaining lint errors

- Fix unhandled errors in tests (errcheck)
- Define constants for repeated strings (goconst)
- Suppress high complexity warnings with nolint:gocyclo
- Fix integer overflow warnings (gosec)
- Add package comments
- Split long lines (lll)
- Rename Analyse -> Analyze (misspell)
- Fix naked returns and unused params

---------

Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
2025-12-01 00:14:22 +01:00

63 lines
1.8 KiB
Go

package linguistics
import (
"strings"
)
const (
LangEN = "en"
LangES = "es"
LangFR = "fr"
)
// languageDetector detects the language of a text
type languageDetector struct{}
// NewLanguageDetector creates a new LanguageDetector
func NewLanguageDetector() *languageDetector {
return &languageDetector{}
}
// Detect detects the language of a text and returns the language code, confidence, and error
func (d *languageDetector) DetectLanguage(text string) (string, error) {
// This is a simplified implementation
// In a real-world scenario, you would use a library like github.com/pemistahl/lingua-go
// or call an external API for language detection
// For demonstration purposes, we'll use a simple heuristic based on common words
content := strings.ToLower(text)
// Check for English
englishWords := []string{"the", "and", "is", "in", "to", "of", "that", "for"}
englishCount := countWords(content, englishWords)
// Check for Spanish
spanishWords := []string{"el", "la", "es", "en", "de", "que", "por", "para"}
spanishCount := countWords(content, spanishWords)
// Check for French
frenchWords := []string{"le", "la", "est", "en", "de", "que", "pour", "dans"}
frenchCount := countWords(content, frenchWords)
// Determine the most likely language
if englishCount > spanishCount && englishCount > frenchCount {
return LangEN, nil
} else if spanishCount > englishCount && spanishCount > frenchCount {
return LangES, nil
} else if frenchCount > englishCount && frenchCount > spanishCount {
return LangFR, nil
}
// Default to English if we can't determine the language
return LangEN, nil
}
// countWords counts the occurrences of words in a text
func countWords(text string, words []string) int {
count := 0
for _, word := range words {
count += strings.Count(text, " "+word+" ")
}
return count
}