tercul-backend/internal/jobs/linguistics/keyword_extractor_test.go
google-labs-jules[bot] 8797cec718 Refactor: In-progress refactoring to fix build.
This commit includes the following changes:
- Refactored all data repositories in `internal/data/sql/` to use a consistent `sql` package and to align with the new `domain` models.
- Fixed the GraphQL structure by moving the server creation logic from `internal/app` to `cmd/api`, which resolved an import cycle.
- Corrected numerous incorrect import paths for packages like `graph`, `linguistics`, `syncjob`, and the legacy `models` package.
- Resolved several package and function redeclaration errors.
- Removed legacy migration code.
2025-09-05 15:11:30 +00:00

28 lines
856 B
Go

package linguistics
import "testing"
func TestKeywordExtractor_Basic(t *testing.T) {
e := NewKeywordExtractor()
text := Text{Body: "The quick brown fox jumps over the lazy dog. The quick brown fox!"}
keywords, err := e.Extract(text)
if err != nil {
t.Fatalf("Extract returned error: %v", err)
}
if len(keywords) == 0 {
t.Fatalf("expected some keywords, got 0")
}
// Ensure stop words filtered and most frequent word appears first
if keywords[0].Text != "quick" && keywords[0].Text != "brown" && keywords[0].Text != "fox" {
t.Errorf("expected a content word as top keyword, got %q", keywords[0].Text)
}
for _, kw := range keywords {
if kw.Text == "the" || kw.Text == "over" {
t.Errorf("stop word %q should be filtered out", kw.Text)
}
if kw.Relevance <= 0 {
t.Errorf("keyword %q has non-positive relevance", kw.Text)
}
}
}