mirror of
https://github.com/SamyRai/tercul-backend.git
synced 2025-12-27 05:11:34 +00:00
181 lines
4.9 KiB
Go
181 lines
4.9 KiB
Go
package linguistics
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"sync"
|
|
|
|
"tercul/internal/platform/cache"
|
|
"tercul/internal/platform/log"
|
|
)
|
|
|
|
// Analyzer defines the interface for linguistic analysis services
|
|
type Analyzer interface {
|
|
// AnalyzeText performs linguistic analysis on the given text
|
|
AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error)
|
|
|
|
// AnalyzeWork performs linguistic analysis on a work
|
|
AnalyzeWork(ctx context.Context, workID uint) error
|
|
}
|
|
|
|
// BasicAnalyzer implements the Analyzer interface as a thin coordination layer.
|
|
// It delegates pure text analysis to TextAnalyzer and work analysis to WorkAnalysisService,
|
|
// and only handles caching and orchestration concerns here to preserve SRP/DRY.
|
|
type BasicAnalyzer struct {
|
|
textAnalyzer TextAnalyzer
|
|
workAnalysisService WorkAnalysisService
|
|
cache cache.Cache
|
|
resultCache map[string]*AnalysisResult
|
|
cacheMutex sync.RWMutex
|
|
concurrency int
|
|
cacheEnabled bool
|
|
}
|
|
|
|
// NewBasicAnalyzer creates a new BasicAnalyzer
|
|
func NewBasicAnalyzer(
|
|
textAnalyzer TextAnalyzer,
|
|
workService WorkAnalysisService,
|
|
redis cache.Cache,
|
|
concurrency int,
|
|
cacheEnabled bool,
|
|
) *BasicAnalyzer {
|
|
if concurrency <= 0 {
|
|
concurrency = 4
|
|
}
|
|
return &BasicAnalyzer{
|
|
textAnalyzer: textAnalyzer,
|
|
workAnalysisService: workService,
|
|
cache: redis,
|
|
resultCache: make(map[string]*AnalysisResult),
|
|
concurrency: concurrency,
|
|
cacheEnabled: cacheEnabled,
|
|
}
|
|
}
|
|
|
|
// WithCache adds a cache to the analyzer
|
|
func (a *BasicAnalyzer) WithCache(cache cache.Cache) *BasicAnalyzer {
|
|
a.cache = cache
|
|
return a
|
|
}
|
|
|
|
// WithConcurrency sets the number of concurrent workers
|
|
func (a *BasicAnalyzer) WithConcurrency(concurrency int) *BasicAnalyzer {
|
|
if concurrency > 0 {
|
|
a.concurrency = concurrency
|
|
}
|
|
return a
|
|
}
|
|
|
|
// EnableCache enables in-memory caching of analysis results
|
|
func (a *BasicAnalyzer) EnableCache() {
|
|
a.cacheEnabled = true
|
|
}
|
|
|
|
// DisableCache disables in-memory caching of analysis results
|
|
func (a *BasicAnalyzer) DisableCache() {
|
|
a.cacheEnabled = false
|
|
}
|
|
|
|
// AnalyzeText performs basic linguistic analysis on the given text
|
|
func (a *BasicAnalyzer) AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error) {
|
|
// Check in-memory cache first if enabled
|
|
if a.cacheEnabled {
|
|
cacheKey := makeTextCacheKey(language, text)
|
|
|
|
// Try to get from in-memory cache
|
|
a.cacheMutex.RLock()
|
|
cachedResult, found := a.resultCache[cacheKey]
|
|
a.cacheMutex.RUnlock()
|
|
|
|
if found {
|
|
log.LogDebug("In-memory cache hit for text analysis",
|
|
log.F("language", language),
|
|
log.F("textLength", len(text)))
|
|
return cachedResult, nil
|
|
}
|
|
|
|
// Try to get from Redis cache if available
|
|
if a.cache != nil {
|
|
var cachedResult AnalysisResult
|
|
err := a.cache.Get(ctx, "text_analysis:"+cacheKey, &cachedResult)
|
|
if err == nil {
|
|
log.LogDebug("Redis cache hit for text analysis",
|
|
log.F("language", language),
|
|
log.F("textLength", len(text)))
|
|
|
|
// Store in in-memory cache too
|
|
a.cacheMutex.Lock()
|
|
a.resultCache[cacheKey] = &cachedResult
|
|
a.cacheMutex.Unlock()
|
|
|
|
return &cachedResult, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// Cache miss or caching disabled, perform analysis using the pure TextAnalyzer
|
|
log.LogDebug("Performing text analysis",
|
|
log.F("language", language),
|
|
log.F("textLength", len(text)))
|
|
|
|
var (
|
|
result *AnalysisResult
|
|
err error
|
|
)
|
|
if len(text) > 10000 && a.concurrency > 1 {
|
|
result, err = a.textAnalyzer.AnalyzeTextConcurrently(ctx, text, language, a.concurrency)
|
|
} else {
|
|
result, err = a.textAnalyzer.AnalyzeText(ctx, text, language)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Cache the result if caching is enabled
|
|
if a.cacheEnabled {
|
|
cacheKey := makeTextCacheKey(language, text)
|
|
|
|
// Store in in-memory cache
|
|
a.cacheMutex.Lock()
|
|
a.resultCache[cacheKey] = result
|
|
a.cacheMutex.Unlock()
|
|
|
|
// Store in Redis cache if available
|
|
if a.cache != nil {
|
|
if err := a.cache.Set(ctx, "text_analysis:"+cacheKey, result, 0); err != nil {
|
|
log.LogWarn("Failed to cache text analysis result",
|
|
log.F("language", language),
|
|
log.F("textLength", len(text)),
|
|
log.F("error", err))
|
|
}
|
|
}
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// AnalyzeWork performs linguistic analysis on a work and stores the results
|
|
func (a *BasicAnalyzer) AnalyzeWork(ctx context.Context, workID uint) error {
|
|
// Delegate to the WorkAnalysisService to preserve single ownership
|
|
return a.workAnalysisService.AnalyzeWork(ctx, workID)
|
|
}
|
|
|
|
// Helper functions for text analysis
|
|
|
|
// min returns the minimum of two integers
|
|
func min(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
// Note: max was unused and has been removed to keep the code minimal and focused
|
|
|
|
// makeTextCacheKey builds a stable cache key using a content hash to avoid collisions/leaks
|
|
func makeTextCacheKey(language, text string) string {
|
|
h := sha256.Sum256([]byte(text))
|
|
return language + ":" + hex.EncodeToString(h[:])
|
|
}
|