package linguistics import ( "strings" ) // Lemmatizer finds the base form (lemma) of words type Lemmatizer struct{} // NewLemmatizer creates a new Lemmatizer func NewLemmatizer() *Lemmatizer { return &Lemmatizer{} } // Lemma finds the base form (lemma) of a word and returns it func (l *Lemmatizer) Lemma(word string, language string) (string, error) { // This is a simplified implementation // In a real-world scenario, you would use a library like github.com/jdkato/prose // or call an external API for lemmatization // Convert to lowercase word = strings.ToLower(word) // Handle different languages switch language { case "en": return englishLemma(word), nil case "es": return spanishLemma(word), nil case "fr": return frenchLemma(word), nil default: // Default to English return englishLemma(word), nil } } // englishLemma finds the base form of an English word func englishLemma(word string) string { // Check for irregular verbs irregularVerbs := map[string]string{ "am": "be", "are": "be", "is": "be", "was": "be", "were": "be", "been": "be", "have": "have", "has": "have", "had": "have", "do": "do", "does": "do", "did": "do", "done": "do", "go": "go", "goes": "go", "went": "go", "gone": "go", "get": "get", "gets": "get", "got": "get", "gotten": "get", "make": "make", "makes": "make", "made": "make", "say": "say", "says": "say", "said": "say", "see": "see", "sees": "see", "saw": "see", "seen": "see", "come": "come", "comes": "come", "came": "come", "take": "take", "takes": "take", "took": "take", "taken": "take", "know": "know", "knows": "know", "knew": "know", "known": "know", "think": "think", "thinks": "think", "thought": "think", } if lemma, ok := irregularVerbs[word]; ok { return lemma } // Check for plural nouns if strings.HasSuffix(word, "s") && len(word) > 2 { // Check for common plural endings if strings.HasSuffix(word, "ies") && len(word) > 3 { return word[:len(word)-3] + "y" } else if strings.HasSuffix(word, "es") && len(word) > 2 { return word[:len(word)-2] } else if strings.HasSuffix(word, "s") && len(word) > 1 { return word[:len(word)-1] } } // Check for verb forms if strings.HasSuffix(word, "ing") && len(word) > 3 { // Check for doubled consonant if len(word) > 4 && word[len(word)-4] == word[len(word)-5] { return word[:len(word)-4] } return word[:len(word)-3] } else if strings.HasSuffix(word, "ed") && len(word) > 2 { // Check for doubled consonant if len(word) > 3 && word[len(word)-3] == word[len(word)-4] { return word[:len(word)-3] } return word[:len(word)-2] } // Return the original word if no rules apply return word } // spanishLemma finds the base form of a Spanish word func spanishLemma(word string) string { // Simplified implementation for Spanish // In a real-world scenario, you would use a more comprehensive approach // Check for verb endings if strings.HasSuffix(word, "ar") || strings.HasSuffix(word, "er") || strings.HasSuffix(word, "ir") { return word } else if strings.HasSuffix(word, "ando") || strings.HasSuffix(word, "endo") { return word[:len(word)-4] } else if strings.HasSuffix(word, "ado") || strings.HasSuffix(word, "ido") { return word[:len(word)-3] } // Check for plural nouns if strings.HasSuffix(word, "es") && len(word) > 2 { return word[:len(word)-2] } else if strings.HasSuffix(word, "s") && len(word) > 1 { return word[:len(word)-1] } // Return the original word if no rules apply return word } // frenchLemma finds the base form of a French word func frenchLemma(word string) string { // Simplified implementation for French // In a real-world scenario, you would use a more comprehensive approach // Check for verb endings if strings.HasSuffix(word, "er") || strings.HasSuffix(word, "ir") || strings.HasSuffix(word, "re") { return word } else if strings.HasSuffix(word, "ant") || strings.HasSuffix(word, "ent") { return word[:len(word)-3] } else if strings.HasSuffix(word, "é") || strings.HasSuffix(word, "i") { return word[:len(word)-1] } // Check for plural nouns if strings.HasSuffix(word, "s") && len(word) > 1 { return word[:len(word)-1] } else if strings.HasSuffix(word, "x") && len(word) > 1 { return word[:len(word)-1] } // Return the original word if no rules apply return word }