tercul-backend/internal/jobs/linguistics/work_analysis_service.go

package linguistics

import (
	"context"
	"fmt"
	"sort"
	"tercul/internal/domain"
	"tercul/internal/platform/log"
	"time"

	"github.com/google/uuid"
)

type counter interface {
	CountWithOptions(ctx context.Context, options *domain.QueryOptions) (int64, error)
}

type translationLister interface {
	ListByWorkID(ctx context.Context, workID uuid.UUID) ([]domain.Translation, error)
}

type WorkAnalyticsDeps struct {
	StatsRepo        domain.StatsRepository
	LikeCounter      counter
	CommentCounter   counter
	BookmarkCounter  counter
	TranslationCount counter
	TranslationList  translationLister
}

// WorkAnalysisService defines the interface for work-specific analysis operations
type WorkAnalysisService interface {
	// AnalyzeWork performs linguistic analysis on a work
	AnalyzeWork(ctx context.Context, workID uuid.UUID) error

	// GetWorkAnalytics retrieves analytics data for a work
	GetWorkAnalytics(ctx context.Context, workID uuid.UUID) (*WorkAnalytics, error)
}

// WorkAnalytics contains analytics data for a work
type WorkAnalytics struct {
	WorkID              uuid.UUID
	ViewCount           int64
	LikeCount           int64
	CommentCount        int64
	BookmarkCount       int64
	TranslationCount    int64
	ReadabilityScore    float64
	SentimentScore      float64
	TopKeywords         []string
	PopularTranslations []TranslationAnalytics
}

// TranslationAnalytics contains analytics data for a translation
type TranslationAnalytics struct {
	TranslationID uuid.UUID
	Language      string
	ViewCount     int64
	LikeCount     int64
}

// workAnalysisService implements the WorkAnalysisService interface
type workAnalysisService struct {
	textAnalyzer  TextAnalyzer
	analysisCache AnalysisCache
	analysisRepo  AnalysisRepository
	deps          WorkAnalyticsDeps
	concurrency   int
	cacheEnabled  bool
}

// NewWorkAnalysisService creates a new WorkAnalysisService
func NewWorkAnalysisService(
	textAnalyzer TextAnalyzer,
	analysisCache AnalysisCache,
	analysisRepo AnalysisRepository,
	deps WorkAnalyticsDeps,
	concurrency int,
	cacheEnabled bool,
) WorkAnalysisService {
	return &workAnalysisService{
		textAnalyzer:  textAnalyzer,
		analysisCache: analysisCache,
		analysisRepo:  analysisRepo,
		deps:          deps,
		concurrency:   concurrency,
		cacheEnabled:  cacheEnabled,
	}
}

// AnalyzeWork performs linguistic analysis on a work and stores the results
func (s *workAnalysisService) AnalyzeWork(ctx context.Context, workID uuid.UUID) error {
	logger := log.FromContext(ctx).With("workID", workID)

	if workID == uuid.Nil {
		return fmt.Errorf("invalid work ID")
	}

	// Check cache first if enabled
	if s.cacheEnabled && s.analysisCache.IsEnabled() {
		cacheKey := fmt.Sprintf("work_analysis:%d", workID)

		if result, err := s.analysisCache.Get(ctx, cacheKey); err == nil {
			logger.Info("Cache hit for work analysis")

			// Store directly to database
			return s.analysisRepo.StoreAnalysisResults(ctx, workID, result)
		}
	}

	// Get work content from database
	content, err := s.analysisRepo.GetWorkContent(ctx, workID, "")
	if err != nil {
		logger.Error(err, "Failed to get work content for analysis")
		return fmt.Errorf("failed to get work content: %w", err)
	}

	// Skip analysis if content is empty
	if content == "" {
		logger.Warn("Skipping analysis for work with empty content")
		return nil
	}

	// Get work to determine language (via repository to avoid leaking GORM)
	work, err := s.analysisRepo.GetWorkByID(ctx, workID)
	if err != nil {
		logger.Error(err, "Failed to fetch work for analysis")
		return fmt.Errorf("failed to fetch work: %w", err)
	}

	// Analyze the text
	start := time.Now()
	logger.With("language", work.Language).
		With("contentLength", len(content)).
		Info("Analyzing work")

	var result *AnalysisResult

	// Use concurrent processing for large texts
	if len(content) > 10000 && s.concurrency > 1 {
		result, err = s.textAnalyzer.AnalyzeTextConcurrently(ctx, content, work.Language, s.concurrency)
	} else {
		result, err = s.textAnalyzer.AnalyzeText(ctx, content, work.Language)
	}

	if err != nil {
		logger.Error(err, "Failed to analyze work text")
		return fmt.Errorf("failed to analyze work text: %w", err)
	}

	// Store results in database
	if err := s.analysisRepo.StoreAnalysisResults(ctx, workID, result); err != nil {
		logger.Error(err, "Failed to store analysis results")
		return fmt.Errorf("failed to store analysis results: %w", err)
	}

	// Cache the result if caching is enabled
	if s.cacheEnabled && s.analysisCache.IsEnabled() {
		cacheKey := fmt.Sprintf("work_analysis:%d", workID)
		if err := s.analysisCache.Set(ctx, cacheKey, result); err != nil {
			logger.Error(err, "Failed to cache work analysis result")
		}
	}

	logger.With("wordCount", result.WordCount).
		With("readabilityScore", result.ReadabilityScore).
		With("sentiment", result.Sentiment).
		With("durationMs", time.Since(start).Milliseconds()).
		Info("Successfully analyzed work")

	return nil
}

// GetWorkAnalytics retrieves analytics data for a work
func (s *workAnalysisService) GetWorkAnalytics(ctx context.Context, workID uuid.UUID) (*WorkAnalytics, error) {
	if workID == uuid.Nil {
		return nil, fmt.Errorf("invalid work ID")
	}

	// Get the work to ensure it exists
	work, err := s.analysisRepo.GetWorkByID(ctx, workID)
	if err != nil {
		return nil, fmt.Errorf("work not found: %w", err)
	}

	// Get analysis results from database (may not exist yet)
	_, readabilityScore, languageAnalysis, err := s.analysisRepo.GetAnalysisData(ctx, workID)
	if err != nil {
		log.FromContext(ctx).With("workID", workID).With("err", err).Warn("failed to load analysis data")
	}

	// Extract keywords from JSONB
	var keywords []string
	if languageAnalysis != nil && languageAnalysis.Analysis != nil {
		if keywordsData, ok := languageAnalysis.Analysis["keywords"].([]interface{}); ok {
			for _, kw := range keywordsData {
				if keywordMap, ok := kw.(map[string]interface{}); ok {
					if text, ok := keywordMap["text"].(string); ok {
						keywords = append(keywords, text)
					}
				}
			}
		}
	}

	// Compute counters.
	// Prefer StatsRepo (canonical counters), and fall back to counting tables when configured.
	var (
		viewCount        int64
		likeCount        int64
		commentCount     int64
		bookmarkCount    int64
		translationCount int64
		popular          []TranslationAnalytics
	)

	if s.deps.StatsRepo != nil {
		if stats, err := s.deps.StatsRepo.GetOrCreateWorkStats(ctx, workID); err == nil && stats != nil {
			viewCount = stats.Views
			likeCount = stats.Likes
			commentCount = stats.Comments
			bookmarkCount = stats.Bookmarks
			translationCount = stats.TranslationCount
		}
	}

	if s.deps.StatsRepo == nil {
		q := &domain.QueryOptions{Where: map[string]interface{}{"work_id": workID}}
		if s.deps.LikeCounter != nil {
			if c, err := s.deps.LikeCounter.CountWithOptions(ctx, q); err == nil {
				likeCount = c
			}
		}
		if s.deps.CommentCounter != nil {
			if c, err := s.deps.CommentCounter.CountWithOptions(ctx, q); err == nil {
				commentCount = c
			}
		}
		if s.deps.BookmarkCounter != nil {
			if c, err := s.deps.BookmarkCounter.CountWithOptions(ctx, q); err == nil {
				bookmarkCount = c
			}
		}
		if s.deps.TranslationCount != nil {
			if c, err := s.deps.TranslationCount.CountWithOptions(ctx, q); err == nil {
				translationCount = c
			}
		}
	}

	// Build PopularTranslations using per-translation stats.
	if s.deps.TranslationList != nil && s.deps.StatsRepo != nil {
		if translations, err := s.deps.TranslationList.ListByWorkID(ctx, workID); err == nil {
			for _, t := range translations {
				var views, likes int64
				if ts, err := s.deps.StatsRepo.GetOrCreateTranslationStats(ctx, t.ID); err == nil && ts != nil {
					views = ts.Views
					likes = ts.Likes
				}
				popular = append(popular, TranslationAnalytics{
					TranslationID: t.ID,
					Language:      t.Language,
					ViewCount:     views,
					LikeCount:     likes,
				})
			}
		}
	}

	// Sort popular translations by view count desc, then likes desc
	if len(popular) > 1 {
		sort.SliceStable(popular, func(i, j int) bool {
			if popular[i].ViewCount == popular[j].ViewCount {
				return popular[i].LikeCount > popular[j].LikeCount
			}
			return popular[i].ViewCount > popular[j].ViewCount
		})
		// limit to top 3
		if len(popular) > 3 {
			popular = popular[:3]
		}
	}

	return &WorkAnalytics{
		WorkID:              work.ID,
		ViewCount:           viewCount,
		LikeCount:           likeCount,
		CommentCount:        commentCount,
		BookmarkCount:       bookmarkCount,
		TranslationCount:    translationCount,
		ReadabilityScore:    safeReadabilityScore(readabilityScore),
		SentimentScore:      safeSentimentScore(languageAnalysis),
		TopKeywords:         keywords,
		PopularTranslations: popular,
	}, nil
}

func safeReadabilityScore(rs *domain.ReadabilityScore) float64 {
	if rs == nil {
		return 0
	}
	return rs.Score
}

func safeSentimentScore(la *domain.LanguageAnalysis) float64 {
	if la == nil {
		return 0
	}
	return extractSentimentFromAnalysis(la.Analysis)
}

// extractSentimentFromAnalysis extracts sentiment from the Analysis JSONB field
func extractSentimentFromAnalysis(analysis domain.JSONB) float64 {
	if analysis == nil {
		return 0.0
	}
	if sentiment, ok := analysis["sentiment"].(float64); ok {
		return sentiment
	}
	return 0.0
}