package linguistics import ( "sort" "strings" "unicode" ) // KeywordExtractor extracts keywords from text type KeywordExtractor struct{} // NewKeywordExtractor creates a new KeywordExtractor func NewKeywordExtractor() *KeywordExtractor { return &KeywordExtractor{} } // Extract extracts keywords from text and returns them func (e *KeywordExtractor) Extract(text Text) ([]Keyword, error) { // This is a simplified implementation // In a real-world scenario, you would use a library like github.com/jdkato/prose // or call an external API for keyword extraction content := strings.ToLower(text.Body) // Split into words words := strings.FieldsFunc(content, func(r rune) bool { return !unicode.IsLetter(r) && !unicode.IsNumber(r) }) // Count word frequencies wordFreq := make(map[string]int) for _, word := range words { if len(word) > 2 { // Skip very short words wordFreq[word]++ } } // Filter out stop words for word := range wordFreq { if isStopWord(word, text.Language) { delete(wordFreq, word) } } // Convert to keywords keywords := make([]Keyword, 0, len(wordFreq)) totalWords := len(words) for word, count := range wordFreq { // Calculate relevance based on frequency relevance := float64(count) / float64(totalWords) // Boost longer words slightly relevance *= (1.0 + float64(len(word))/20.0) keywords = append(keywords, Keyword{ Text: word, Relevance: relevance, }) } // Sort by relevance sort.Slice(keywords, func(i, j int) bool { return keywords[i].Relevance > keywords[j].Relevance }) // Limit to top keywords maxKeywords := 20 if len(keywords) > maxKeywords { keywords = keywords[:maxKeywords] } return keywords, nil }