package linguistics import ( "sort" "strings" ) // TFIDFKeywordProvider is a lightweight keyword provider using local term frequencies as a proxy. // Note: A full TF-IDF requires a corpus. This implementation uses per-text frequency weighting // with stopword filtering and length thresholds to approximate keyword relevance without extra state. type TFIDFKeywordProvider struct{} func NewTFIDFKeywordProvider() *TFIDFKeywordProvider { return &TFIDFKeywordProvider{} } func (p *TFIDFKeywordProvider) Extract(text string, language string) ([]Keyword, error) { tokens := tokenizeWords(text) if len(tokens) == 0 { return nil, nil } freq := make(map[string]int, len(tokens)) for _, t := range tokens { if len(t) <= 2 || isStopWord(t, language) { continue } freq[strings.ToLower(t)]++ } total := 0 for _, c := range freq { total += c } keywords := make([]Keyword, 0, len(freq)) for w, c := range freq { rel := float64(c) / float64(len(tokens)) if rel > 0 { keywords = append(keywords, Keyword{Text: w, Relevance: rel}) } } sort.Slice(keywords, func(i, j int) bool { return keywords[i].Relevance > keywords[j].Relevance }) if len(keywords) > 10 { keywords = keywords[:10] } return keywords, nil }