tercul-backend/internal/jobs/linguistics/text_utils_test.go

52 lines
1.3 KiB
Go

package linguistics
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestAnalyzeTextBasicStats(t *testing.T) {
text := "Hello world!\n\nThis is a test. Another sentence?"
words, sentences, paragraphs, avgWordLen := analyzeTextBasicStats(text)
assert.Equal(t, 8, words)
assert.Equal(t, 3, sentences)
assert.Equal(t, 2, paragraphs)
assert.InDelta(t, 4.0, avgWordLen, 1.0)
}
func TestSplitTextIntoChunks(t *testing.T) {
text := "A. B. C. D. E."
chunks := splitTextIntoChunks(text, 10)
// should not over-provision chunks
assert.GreaterOrEqual(t, len(chunks), 1)
assert.LessOrEqual(t, len(chunks), 5)
}
func TestExtractKeywordsOptimized(t *testing.T) {
text := "Go is great. Go is fast, simple, and efficient. Efficient systems love Go."
kws := extractKeywordsOptimized(text, "en")
assert.NotEmpty(t, kws)
// Ensure top keyword contains "go" or "efficient"
found := false
for _, kw := range kws {
if kw.Text == "go" || kw.Text == "efficient" {
found = true
break
}
}
assert.True(t, found)
}
func TestEstimateSentimentOptimized(t *testing.T) {
pos := "This product is amazing and wonderful, I love it!"
neg := "This is a terrible and horrible failure. I hate it."
sp := estimateSentimentOptimized(pos, "en")
sn := estimateSentimentOptimized(neg, "en")
assert.Greater(t, sp, 0.0)
assert.Less(t, sn, 0.0)
}