tercul-backend/internal/jobs/linguistics/work_analysis_service.go
Damir Mukimov d50722dad5
Some checks failed
Test / Integration Tests (push) Successful in 4s
Build / Build Binary (push) Failing after 2m9s
Docker Build / Build Docker Image (push) Failing after 2m32s
Test / Unit Tests (push) Failing after 3m12s
Lint / Go Lint (push) Failing after 1m0s
Refactor ID handling to use UUIDs across the application
- Updated database models and repositories to replace uint IDs with UUIDs.
- Modified test fixtures to generate and use UUIDs for authors, translations, users, and works.
- Adjusted mock implementations to align with the new UUID structure.
- Ensured all relevant functions and methods are updated to handle UUIDs correctly.
- Added necessary imports for UUID handling in various files.
2025-12-27 00:33:34 +01:00

322 lines
9.1 KiB
Go

package linguistics
import (
"context"
"fmt"
"sort"
"tercul/internal/domain"
"tercul/internal/platform/log"
"time"
"github.com/google/uuid"
)
type counter interface {
CountWithOptions(ctx context.Context, options *domain.QueryOptions) (int64, error)
}
type translationLister interface {
ListByWorkID(ctx context.Context, workID uuid.UUID) ([]domain.Translation, error)
}
type WorkAnalyticsDeps struct {
StatsRepo domain.StatsRepository
LikeCounter counter
CommentCounter counter
BookmarkCounter counter
TranslationCount counter
TranslationList translationLister
}
// WorkAnalysisService defines the interface for work-specific analysis operations
type WorkAnalysisService interface {
// AnalyzeWork performs linguistic analysis on a work
AnalyzeWork(ctx context.Context, workID uuid.UUID) error
// GetWorkAnalytics retrieves analytics data for a work
GetWorkAnalytics(ctx context.Context, workID uuid.UUID) (*WorkAnalytics, error)
}
// WorkAnalytics contains analytics data for a work
type WorkAnalytics struct {
WorkID uuid.UUID
ViewCount int64
LikeCount int64
CommentCount int64
BookmarkCount int64
TranslationCount int64
ReadabilityScore float64
SentimentScore float64
TopKeywords []string
PopularTranslations []TranslationAnalytics
}
// TranslationAnalytics contains analytics data for a translation
type TranslationAnalytics struct {
TranslationID uuid.UUID
Language string
ViewCount int64
LikeCount int64
}
// workAnalysisService implements the WorkAnalysisService interface
type workAnalysisService struct {
textAnalyzer TextAnalyzer
analysisCache AnalysisCache
analysisRepo AnalysisRepository
deps WorkAnalyticsDeps
concurrency int
cacheEnabled bool
}
// NewWorkAnalysisService creates a new WorkAnalysisService
func NewWorkAnalysisService(
textAnalyzer TextAnalyzer,
analysisCache AnalysisCache,
analysisRepo AnalysisRepository,
deps WorkAnalyticsDeps,
concurrency int,
cacheEnabled bool,
) WorkAnalysisService {
return &workAnalysisService{
textAnalyzer: textAnalyzer,
analysisCache: analysisCache,
analysisRepo: analysisRepo,
deps: deps,
concurrency: concurrency,
cacheEnabled: cacheEnabled,
}
}
// AnalyzeWork performs linguistic analysis on a work and stores the results
func (s *workAnalysisService) AnalyzeWork(ctx context.Context, workID uuid.UUID) error {
logger := log.FromContext(ctx).With("workID", workID)
if workID == uuid.Nil {
return fmt.Errorf("invalid work ID")
}
// Check cache first if enabled
if s.cacheEnabled && s.analysisCache.IsEnabled() {
cacheKey := fmt.Sprintf("work_analysis:%d", workID)
if result, err := s.analysisCache.Get(ctx, cacheKey); err == nil {
logger.Info("Cache hit for work analysis")
// Store directly to database
return s.analysisRepo.StoreAnalysisResults(ctx, workID, result)
}
}
// Get work content from database
content, err := s.analysisRepo.GetWorkContent(ctx, workID, "")
if err != nil {
logger.Error(err, "Failed to get work content for analysis")
return fmt.Errorf("failed to get work content: %w", err)
}
// Skip analysis if content is empty
if content == "" {
logger.Warn("Skipping analysis for work with empty content")
return nil
}
// Get work to determine language (via repository to avoid leaking GORM)
work, err := s.analysisRepo.GetWorkByID(ctx, workID)
if err != nil {
logger.Error(err, "Failed to fetch work for analysis")
return fmt.Errorf("failed to fetch work: %w", err)
}
// Analyze the text
start := time.Now()
logger.With("language", work.Language).
With("contentLength", len(content)).
Info("Analyzing work")
var result *AnalysisResult
// Use concurrent processing for large texts
if len(content) > 10000 && s.concurrency > 1 {
result, err = s.textAnalyzer.AnalyzeTextConcurrently(ctx, content, work.Language, s.concurrency)
} else {
result, err = s.textAnalyzer.AnalyzeText(ctx, content, work.Language)
}
if err != nil {
logger.Error(err, "Failed to analyze work text")
return fmt.Errorf("failed to analyze work text: %w", err)
}
// Store results in database
if err := s.analysisRepo.StoreAnalysisResults(ctx, workID, result); err != nil {
logger.Error(err, "Failed to store analysis results")
return fmt.Errorf("failed to store analysis results: %w", err)
}
// Cache the result if caching is enabled
if s.cacheEnabled && s.analysisCache.IsEnabled() {
cacheKey := fmt.Sprintf("work_analysis:%d", workID)
if err := s.analysisCache.Set(ctx, cacheKey, result); err != nil {
logger.Error(err, "Failed to cache work analysis result")
}
}
logger.With("wordCount", result.WordCount).
With("readabilityScore", result.ReadabilityScore).
With("sentiment", result.Sentiment).
With("durationMs", time.Since(start).Milliseconds()).
Info("Successfully analyzed work")
return nil
}
// GetWorkAnalytics retrieves analytics data for a work
func (s *workAnalysisService) GetWorkAnalytics(ctx context.Context, workID uuid.UUID) (*WorkAnalytics, error) {
if workID == uuid.Nil {
return nil, fmt.Errorf("invalid work ID")
}
// Get the work to ensure it exists
work, err := s.analysisRepo.GetWorkByID(ctx, workID)
if err != nil {
return nil, fmt.Errorf("work not found: %w", err)
}
// Get analysis results from database (may not exist yet)
_, readabilityScore, languageAnalysis, err := s.analysisRepo.GetAnalysisData(ctx, workID)
if err != nil {
log.FromContext(ctx).With("workID", workID).With("err", err).Warn("failed to load analysis data")
}
// Extract keywords from JSONB
var keywords []string
if languageAnalysis != nil && languageAnalysis.Analysis != nil {
if keywordsData, ok := languageAnalysis.Analysis["keywords"].([]interface{}); ok {
for _, kw := range keywordsData {
if keywordMap, ok := kw.(map[string]interface{}); ok {
if text, ok := keywordMap["text"].(string); ok {
keywords = append(keywords, text)
}
}
}
}
}
// Compute counters.
// Prefer StatsRepo (canonical counters), and fall back to counting tables when configured.
var (
viewCount int64
likeCount int64
commentCount int64
bookmarkCount int64
translationCount int64
popular []TranslationAnalytics
)
if s.deps.StatsRepo != nil {
if stats, err := s.deps.StatsRepo.GetOrCreateWorkStats(ctx, workID); err == nil && stats != nil {
viewCount = stats.Views
likeCount = stats.Likes
commentCount = stats.Comments
bookmarkCount = stats.Bookmarks
translationCount = stats.TranslationCount
}
}
if s.deps.StatsRepo == nil {
q := &domain.QueryOptions{Where: map[string]interface{}{"work_id": workID}}
if s.deps.LikeCounter != nil {
if c, err := s.deps.LikeCounter.CountWithOptions(ctx, q); err == nil {
likeCount = c
}
}
if s.deps.CommentCounter != nil {
if c, err := s.deps.CommentCounter.CountWithOptions(ctx, q); err == nil {
commentCount = c
}
}
if s.deps.BookmarkCounter != nil {
if c, err := s.deps.BookmarkCounter.CountWithOptions(ctx, q); err == nil {
bookmarkCount = c
}
}
if s.deps.TranslationCount != nil {
if c, err := s.deps.TranslationCount.CountWithOptions(ctx, q); err == nil {
translationCount = c
}
}
}
// Build PopularTranslations using per-translation stats.
if s.deps.TranslationList != nil && s.deps.StatsRepo != nil {
if translations, err := s.deps.TranslationList.ListByWorkID(ctx, workID); err == nil {
for _, t := range translations {
var views, likes int64
if ts, err := s.deps.StatsRepo.GetOrCreateTranslationStats(ctx, t.ID); err == nil && ts != nil {
views = ts.Views
likes = ts.Likes
}
popular = append(popular, TranslationAnalytics{
TranslationID: t.ID,
Language: t.Language,
ViewCount: views,
LikeCount: likes,
})
}
}
}
// Sort popular translations by view count desc, then likes desc
if len(popular) > 1 {
sort.SliceStable(popular, func(i, j int) bool {
if popular[i].ViewCount == popular[j].ViewCount {
return popular[i].LikeCount > popular[j].LikeCount
}
return popular[i].ViewCount > popular[j].ViewCount
})
// limit to top 3
if len(popular) > 3 {
popular = popular[:3]
}
}
return &WorkAnalytics{
WorkID: work.ID,
ViewCount: viewCount,
LikeCount: likeCount,
CommentCount: commentCount,
BookmarkCount: bookmarkCount,
TranslationCount: translationCount,
ReadabilityScore: safeReadabilityScore(readabilityScore),
SentimentScore: safeSentimentScore(languageAnalysis),
TopKeywords: keywords,
PopularTranslations: popular,
}, nil
}
func safeReadabilityScore(rs *domain.ReadabilityScore) float64 {
if rs == nil {
return 0
}
return rs.Score
}
func safeSentimentScore(la *domain.LanguageAnalysis) float64 {
if la == nil {
return 0
}
return extractSentimentFromAnalysis(la.Analysis)
}
// extractSentimentFromAnalysis extracts sentiment from the Analysis JSONB field
func extractSentimentFromAnalysis(analysis domain.JSONB) float64 {
if analysis == nil {
return 0.0
}
if sentiment, ok := analysis["sentiment"].(float64); ok {
return sentiment
}
return 0.0
}