mirror of
https://github.com/SamyRai/tercul-backend.git
synced 2025-12-27 05:11:34 +00:00
Some checks failed
- Updated database models and repositories to replace uint IDs with UUIDs. - Modified test fixtures to generate and use UUIDs for authors, translations, users, and works. - Adjusted mock implementations to align with the new UUID structure. - Ensured all relevant functions and methods are updated to handle UUIDs correctly. - Added necessary imports for UUID handling in various files.
74 lines
1.7 KiB
Go
74 lines
1.7 KiB
Go
package linguistics
|
|
|
|
import (
|
|
"sort"
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// KeywordExtractor extracts keywords from text
|
|
type KeywordExtractor struct{}
|
|
|
|
// NewKeywordExtractor creates a new KeywordExtractor
|
|
func NewKeywordExtractor() *KeywordExtractor {
|
|
return &KeywordExtractor{}
|
|
}
|
|
|
|
// Extract extracts keywords from text and returns them
|
|
func (e *KeywordExtractor) Extract(text Text) ([]Keyword, error) {
|
|
// This is a simplified implementation
|
|
// In a real-world scenario, you would use a library like github.com/jdkato/prose
|
|
// or call an external API for keyword extraction
|
|
|
|
content := strings.ToLower(text.Body)
|
|
|
|
// Split into words
|
|
words := strings.FieldsFunc(content, func(r rune) bool {
|
|
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
|
})
|
|
|
|
// Count word frequencies
|
|
wordFreq := make(map[string]int)
|
|
for _, word := range words {
|
|
if len(word) > 2 { // Skip very short words
|
|
wordFreq[word]++
|
|
}
|
|
}
|
|
|
|
// Filter out stop words
|
|
for word := range wordFreq {
|
|
if isStopWord(word, text.Language) {
|
|
delete(wordFreq, word)
|
|
}
|
|
}
|
|
|
|
// Convert to keywords
|
|
keywords := make([]Keyword, 0, len(wordFreq))
|
|
totalWords := len(words)
|
|
for word, count := range wordFreq {
|
|
// Calculate relevance based on frequency
|
|
relevance := float64(count) / float64(totalWords)
|
|
|
|
// Boost longer words slightly
|
|
relevance *= (1.0 + float64(len(word))/20.0)
|
|
|
|
keywords = append(keywords, Keyword{
|
|
Text: word,
|
|
Relevance: relevance,
|
|
})
|
|
}
|
|
|
|
// Sort by relevance
|
|
sort.Slice(keywords, func(i, j int) bool {
|
|
return keywords[i].Relevance > keywords[j].Relevance
|
|
})
|
|
|
|
// Limit to top keywords
|
|
maxKeywords := 20
|
|
if len(keywords) > maxKeywords {
|
|
keywords = keywords[:maxKeywords]
|
|
}
|
|
|
|
return keywords, nil
|
|
}
|