tercul-backend/linguistics/analyzer.go
Damir Mukimov fa336cacf3
wip
2025-09-01 00:43:59 +02:00

181 lines
4.9 KiB
Go

package linguistics
import (
"context"
"crypto/sha256"
"encoding/hex"
"sync"
"tercul/internal/platform/cache"
"tercul/internal/platform/log"
)
// Analyzer defines the interface for linguistic analysis services
type Analyzer interface {
// AnalyzeText performs linguistic analysis on the given text
AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error)
// AnalyzeWork performs linguistic analysis on a work
AnalyzeWork(ctx context.Context, workID uint) error
}
// BasicAnalyzer implements the Analyzer interface as a thin coordination layer.
// It delegates pure text analysis to TextAnalyzer and work analysis to WorkAnalysisService,
// and only handles caching and orchestration concerns here to preserve SRP/DRY.
type BasicAnalyzer struct {
textAnalyzer TextAnalyzer
workAnalysisService WorkAnalysisService
cache cache.Cache
resultCache map[string]*AnalysisResult
cacheMutex sync.RWMutex
concurrency int
cacheEnabled bool
}
// NewBasicAnalyzer creates a new BasicAnalyzer
func NewBasicAnalyzer(
textAnalyzer TextAnalyzer,
workService WorkAnalysisService,
redis cache.Cache,
concurrency int,
cacheEnabled bool,
) *BasicAnalyzer {
if concurrency <= 0 {
concurrency = 4
}
return &BasicAnalyzer{
textAnalyzer: textAnalyzer,
workAnalysisService: workService,
cache: redis,
resultCache: make(map[string]*AnalysisResult),
concurrency: concurrency,
cacheEnabled: cacheEnabled,
}
}
// WithCache adds a cache to the analyzer
func (a *BasicAnalyzer) WithCache(cache cache.Cache) *BasicAnalyzer {
a.cache = cache
return a
}
// WithConcurrency sets the number of concurrent workers
func (a *BasicAnalyzer) WithConcurrency(concurrency int) *BasicAnalyzer {
if concurrency > 0 {
a.concurrency = concurrency
}
return a
}
// EnableCache enables in-memory caching of analysis results
func (a *BasicAnalyzer) EnableCache() {
a.cacheEnabled = true
}
// DisableCache disables in-memory caching of analysis results
func (a *BasicAnalyzer) DisableCache() {
a.cacheEnabled = false
}
// AnalyzeText performs basic linguistic analysis on the given text
func (a *BasicAnalyzer) AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error) {
// Check in-memory cache first if enabled
if a.cacheEnabled {
cacheKey := makeTextCacheKey(language, text)
// Try to get from in-memory cache
a.cacheMutex.RLock()
cachedResult, found := a.resultCache[cacheKey]
a.cacheMutex.RUnlock()
if found {
log.LogDebug("In-memory cache hit for text analysis",
log.F("language", language),
log.F("textLength", len(text)))
return cachedResult, nil
}
// Try to get from Redis cache if available
if a.cache != nil {
var cachedResult AnalysisResult
err := a.cache.Get(ctx, "text_analysis:"+cacheKey, &cachedResult)
if err == nil {
log.LogDebug("Redis cache hit for text analysis",
log.F("language", language),
log.F("textLength", len(text)))
// Store in in-memory cache too
a.cacheMutex.Lock()
a.resultCache[cacheKey] = &cachedResult
a.cacheMutex.Unlock()
return &cachedResult, nil
}
}
}
// Cache miss or caching disabled, perform analysis using the pure TextAnalyzer
log.LogDebug("Performing text analysis",
log.F("language", language),
log.F("textLength", len(text)))
var (
result *AnalysisResult
err error
)
if len(text) > 10000 && a.concurrency > 1 {
result, err = a.textAnalyzer.AnalyzeTextConcurrently(ctx, text, language, a.concurrency)
} else {
result, err = a.textAnalyzer.AnalyzeText(ctx, text, language)
}
if err != nil {
return nil, err
}
// Cache the result if caching is enabled
if a.cacheEnabled {
cacheKey := makeTextCacheKey(language, text)
// Store in in-memory cache
a.cacheMutex.Lock()
a.resultCache[cacheKey] = result
a.cacheMutex.Unlock()
// Store in Redis cache if available
if a.cache != nil {
if err := a.cache.Set(ctx, "text_analysis:"+cacheKey, result, 0); err != nil {
log.LogWarn("Failed to cache text analysis result",
log.F("language", language),
log.F("textLength", len(text)),
log.F("error", err))
}
}
}
return result, nil
}
// AnalyzeWork performs linguistic analysis on a work and stores the results
func (a *BasicAnalyzer) AnalyzeWork(ctx context.Context, workID uint) error {
// Delegate to the WorkAnalysisService to preserve single ownership
return a.workAnalysisService.AnalyzeWork(ctx, workID)
}
// Helper functions for text analysis
// min returns the minimum of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}
// Note: max was unused and has been removed to keep the code minimal and focused
// makeTextCacheKey builds a stable cache key using a content hash to avoid collisions/leaks
func makeTextCacheKey(language, text string) string {
h := sha256.Sum256([]byte(text))
return language + ":" + hex.EncodeToString(h[:])
}