mirror of
https://github.com/SamyRai/tercul-backend.git
synced 2025-12-27 05:11:34 +00:00
Some checks failed
- Updated database models and repositories to replace uint IDs with UUIDs. - Modified test fixtures to generate and use UUIDs for authors, translations, users, and works. - Adjusted mock implementations to align with the new UUID structure. - Ensured all relevant functions and methods are updated to handle UUIDs correctly. - Added necessary imports for UUID handling in various files.
175 lines
4.5 KiB
Go
175 lines
4.5 KiB
Go
package linguistics
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
// Lemmatizer finds the base form (lemma) of words
|
|
type Lemmatizer struct{}
|
|
|
|
// NewLemmatizer creates a new Lemmatizer
|
|
func NewLemmatizer() *Lemmatizer {
|
|
return &Lemmatizer{}
|
|
}
|
|
|
|
// Lemma finds the base form (lemma) of a word and returns it
|
|
func (l *Lemmatizer) Lemma(word string, language string) (string, error) {
|
|
// This is a simplified implementation
|
|
// In a real-world scenario, you would use a library like github.com/jdkato/prose
|
|
// or call an external API for lemmatization
|
|
|
|
// Convert to lowercase
|
|
word = strings.ToLower(word)
|
|
|
|
// Handle different languages
|
|
switch language {
|
|
case "en":
|
|
return englishLemma(word), nil
|
|
case "es":
|
|
return spanishLemma(word), nil
|
|
case "fr":
|
|
return frenchLemma(word), nil
|
|
default:
|
|
// Default to English
|
|
return englishLemma(word), nil
|
|
}
|
|
}
|
|
|
|
// englishLemma finds the base form of an English word
|
|
//
|
|
//nolint:gocyclo // Large switch case
|
|
func englishLemma(word string) string {
|
|
// Check for irregular verbs
|
|
irregularVerbs := map[string]string{
|
|
"am": "be",
|
|
"are": "be",
|
|
"is": "be",
|
|
"was": "be",
|
|
"were": "be",
|
|
"been": "be",
|
|
"have": "have",
|
|
"has": "have",
|
|
"had": "have",
|
|
"do": "do",
|
|
"does": "do",
|
|
"did": "do",
|
|
"done": "do",
|
|
"go": "go",
|
|
"goes": "go",
|
|
"went": "go",
|
|
"gone": "go",
|
|
"get": "get",
|
|
"gets": "get",
|
|
"got": "get",
|
|
"gotten": "get",
|
|
"make": "make",
|
|
"makes": "make",
|
|
"made": "make",
|
|
"say": "say",
|
|
"says": "say",
|
|
"said": "say",
|
|
"see": "see",
|
|
"sees": "see",
|
|
"saw": "see",
|
|
"seen": "see",
|
|
"come": "come",
|
|
"comes": "come",
|
|
"came": "come",
|
|
"take": "take",
|
|
"takes": "take",
|
|
"took": "take",
|
|
"taken": "take",
|
|
"know": "know",
|
|
"knows": "know",
|
|
"knew": "know",
|
|
"known": "know",
|
|
"think": "think",
|
|
"thinks": "think",
|
|
"thought": "think",
|
|
}
|
|
|
|
if lemma, ok := irregularVerbs[word]; ok {
|
|
return lemma
|
|
}
|
|
|
|
// Check for plural nouns
|
|
if strings.HasSuffix(word, "s") && len(word) > 2 {
|
|
// Check for common plural endings
|
|
if strings.HasSuffix(word, "ies") && len(word) > 3 {
|
|
return word[:len(word)-3] + "y"
|
|
} else if strings.HasSuffix(word, "es") && len(word) > 2 {
|
|
return word[:len(word)-2]
|
|
} else if strings.HasSuffix(word, "s") && len(word) > 1 {
|
|
return word[:len(word)-1]
|
|
}
|
|
}
|
|
|
|
// Check for verb forms
|
|
if strings.HasSuffix(word, "ing") && len(word) > 3 {
|
|
// Check for doubled consonant
|
|
if len(word) > 4 && word[len(word)-4] == word[len(word)-5] {
|
|
return word[:len(word)-4]
|
|
}
|
|
return word[:len(word)-3]
|
|
} else if strings.HasSuffix(word, "ed") && len(word) > 2 {
|
|
// Check for doubled consonant
|
|
if len(word) > 3 && word[len(word)-3] == word[len(word)-4] {
|
|
return word[:len(word)-3]
|
|
}
|
|
return word[:len(word)-2]
|
|
}
|
|
|
|
// Return the original word if no rules apply
|
|
return word
|
|
}
|
|
|
|
// spanishLemma finds the base form of a Spanish word
|
|
func spanishLemma(word string) string {
|
|
// Simplified implementation for Spanish
|
|
// In a real-world scenario, you would use a more comprehensive approach
|
|
|
|
// Check for verb endings
|
|
if strings.HasSuffix(word, "ar") || strings.HasSuffix(word, "er") || strings.HasSuffix(word, "ir") {
|
|
return word
|
|
} else if strings.HasSuffix(word, "ando") || strings.HasSuffix(word, "endo") {
|
|
return word[:len(word)-4]
|
|
} else if strings.HasSuffix(word, "ado") || strings.HasSuffix(word, "ido") {
|
|
return word[:len(word)-3]
|
|
}
|
|
|
|
// Check for plural nouns
|
|
if strings.HasSuffix(word, "es") && len(word) > 2 {
|
|
return word[:len(word)-2]
|
|
} else if strings.HasSuffix(word, "s") && len(word) > 1 {
|
|
return word[:len(word)-1]
|
|
}
|
|
|
|
// Return the original word if no rules apply
|
|
return word
|
|
}
|
|
|
|
// frenchLemma finds the base form of a French word
|
|
func frenchLemma(word string) string {
|
|
// Simplified implementation for French
|
|
// In a real-world scenario, you would use a more comprehensive approach
|
|
|
|
// Check for verb endings
|
|
if strings.HasSuffix(word, "er") || strings.HasSuffix(word, "ir") || strings.HasSuffix(word, "re") {
|
|
return word
|
|
} else if strings.HasSuffix(word, "ant") || strings.HasSuffix(word, "ent") {
|
|
return word[:len(word)-3]
|
|
} else if strings.HasSuffix(word, "é") || strings.HasSuffix(word, "i") {
|
|
return word[:len(word)-1]
|
|
}
|
|
|
|
// Check for plural nouns
|
|
if strings.HasSuffix(word, "s") && len(word) > 1 {
|
|
return word[:len(word)-1]
|
|
} else if strings.HasSuffix(word, "x") && len(word) > 1 {
|
|
return word[:len(word)-1]
|
|
}
|
|
|
|
// Return the original word if no rules apply
|
|
return word
|
|
}
|