tercul-backend/internal/jobs/linguistics/analyzer.go

package linguistics

import (
	"context"
	"crypto/sha256"
	"encoding/hex"
	"sync"

	"tercul/internal/platform/cache"
	"tercul/internal/platform/log"
)

// Analyzer defines the interface for linguistic analysis services
type Analyzer interface {
	// AnalyzeText performs linguistic analysis on the given text
	AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error)

	// AnalyzeWork performs linguistic analysis on a work
	AnalyzeWork(ctx context.Context, workID uint) error
}

// BasicAnalyzer implements the Analyzer interface as a thin coordination layer.
// It delegates pure text analysis to TextAnalyzer and work analysis to WorkAnalysisService,
// and only handles caching and orchestration concerns here to preserve SRP/DRY.
type BasicAnalyzer struct {
	textAnalyzer        TextAnalyzer
	workAnalysisService WorkAnalysisService
	cache               cache.Cache
	resultCache         map[string]*AnalysisResult
	cacheMutex          sync.RWMutex
	concurrency         int
	cacheEnabled        bool
}

// NewBasicAnalyzer creates a new BasicAnalyzer
func NewBasicAnalyzer(
	textAnalyzer TextAnalyzer,
	workService WorkAnalysisService,
	redis cache.Cache,
	concurrency int,
	cacheEnabled bool,
) *BasicAnalyzer {
	if concurrency <= 0 {
		concurrency = 4
	}
	return &BasicAnalyzer{
		textAnalyzer:        textAnalyzer,
		workAnalysisService: workService,
		cache:               redis,
		resultCache:         make(map[string]*AnalysisResult),
		concurrency:         concurrency,
		cacheEnabled:        cacheEnabled,
	}
}

// WithCache adds a cache to the analyzer
func (a *BasicAnalyzer) WithCache(cache cache.Cache) *BasicAnalyzer {
	a.cache = cache
	return a
}

// WithConcurrency sets the number of concurrent workers
func (a *BasicAnalyzer) WithConcurrency(concurrency int) *BasicAnalyzer {
	if concurrency > 0 {
		a.concurrency = concurrency
	}
	return a
}

// EnableCache enables in-memory caching of analysis results
func (a *BasicAnalyzer) EnableCache() {
	a.cacheEnabled = true
}

// DisableCache disables in-memory caching of analysis results
func (a *BasicAnalyzer) DisableCache() {
	a.cacheEnabled = false
}

// AnalyzeText performs basic linguistic analysis on the given text
func (a *BasicAnalyzer) AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error) {
	// Check in-memory cache first if enabled
	if a.cacheEnabled {
		cacheKey := makeTextCacheKey(language, text)

		// Try to get from in-memory cache
		a.cacheMutex.RLock()
		cachedResult, found := a.resultCache[cacheKey]
		a.cacheMutex.RUnlock()

		if found {
			log.LogDebug("In-memory cache hit for text analysis",
				log.F("language", language),
				log.F("textLength", len(text)))
			return cachedResult, nil
		}

		// Try to get from Redis cache if available
		if a.cache != nil {
			var cachedResult AnalysisResult
			err := a.cache.Get(ctx, "text_analysis:"+cacheKey, &cachedResult)
			if err == nil {
				log.LogDebug("Redis cache hit for text analysis",
					log.F("language", language),
					log.F("textLength", len(text)))

				// Store in in-memory cache too
				a.cacheMutex.Lock()
				a.resultCache[cacheKey] = &cachedResult
				a.cacheMutex.Unlock()

				return &cachedResult, nil
			}
		}
	}

	// Cache miss or caching disabled, perform analysis using the pure TextAnalyzer
	log.LogDebug("Performing text analysis",
		log.F("language", language),
		log.F("textLength", len(text)))

	var (
		result *AnalysisResult
		err    error
	)
	if len(text) > 10000 && a.concurrency > 1 {
		result, err = a.textAnalyzer.AnalyzeTextConcurrently(ctx, text, language, a.concurrency)
	} else {
		result, err = a.textAnalyzer.AnalyzeText(ctx, text, language)
	}
	if err != nil {
		return nil, err
	}

	// Cache the result if caching is enabled
	if a.cacheEnabled {
		cacheKey := makeTextCacheKey(language, text)

		// Store in in-memory cache
		a.cacheMutex.Lock()
		a.resultCache[cacheKey] = result
		a.cacheMutex.Unlock()

		// Store in Redis cache if available
		if a.cache != nil {
			if err := a.cache.Set(ctx, "text_analysis:"+cacheKey, result, 0); err != nil {
				log.LogWarn("Failed to cache text analysis result",
					log.F("language", language),
					log.F("textLength", len(text)),
					log.F("error", err))
			}
		}
	}

	return result, nil
}

// AnalyzeWork performs linguistic analysis on a work and stores the results
func (a *BasicAnalyzer) AnalyzeWork(ctx context.Context, workID uint) error {
	// Delegate to the WorkAnalysisService to preserve single ownership
	return a.workAnalysisService.AnalyzeWork(ctx, workID)
}

// Helper functions for text analysis

// min returns the minimum of two integers
func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// Note: max was unused and has been removed to keep the code minimal and focused

// makeTextCacheKey builds a stable cache key using a content hash to avoid collisions/leaks
func makeTextCacheKey(language, text string) string {
	h := sha256.Sum256([]byte(text))
	return language + ":" + hex.EncodeToString(h[:])
}