mirror of
https://github.com/SamyRai/tercul-backend.git
synced 2025-12-27 05:11:34 +00:00
Some checks failed
- Updated database models and repositories to replace uint IDs with UUIDs. - Modified test fixtures to generate and use UUIDs for authors, translations, users, and works. - Adjusted mock implementations to align with the new UUID structure. - Ensured all relevant functions and methods are updated to handle UUIDs correctly. - Added necessary imports for UUID handling in various files.
322 lines
9.1 KiB
Go
322 lines
9.1 KiB
Go
package linguistics
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sort"
|
|
"tercul/internal/domain"
|
|
"tercul/internal/platform/log"
|
|
"time"
|
|
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
type counter interface {
|
|
CountWithOptions(ctx context.Context, options *domain.QueryOptions) (int64, error)
|
|
}
|
|
|
|
type translationLister interface {
|
|
ListByWorkID(ctx context.Context, workID uuid.UUID) ([]domain.Translation, error)
|
|
}
|
|
|
|
type WorkAnalyticsDeps struct {
|
|
StatsRepo domain.StatsRepository
|
|
LikeCounter counter
|
|
CommentCounter counter
|
|
BookmarkCounter counter
|
|
TranslationCount counter
|
|
TranslationList translationLister
|
|
}
|
|
|
|
// WorkAnalysisService defines the interface for work-specific analysis operations
|
|
type WorkAnalysisService interface {
|
|
// AnalyzeWork performs linguistic analysis on a work
|
|
AnalyzeWork(ctx context.Context, workID uuid.UUID) error
|
|
|
|
// GetWorkAnalytics retrieves analytics data for a work
|
|
GetWorkAnalytics(ctx context.Context, workID uuid.UUID) (*WorkAnalytics, error)
|
|
}
|
|
|
|
// WorkAnalytics contains analytics data for a work
|
|
type WorkAnalytics struct {
|
|
WorkID uuid.UUID
|
|
ViewCount int64
|
|
LikeCount int64
|
|
CommentCount int64
|
|
BookmarkCount int64
|
|
TranslationCount int64
|
|
ReadabilityScore float64
|
|
SentimentScore float64
|
|
TopKeywords []string
|
|
PopularTranslations []TranslationAnalytics
|
|
}
|
|
|
|
// TranslationAnalytics contains analytics data for a translation
|
|
type TranslationAnalytics struct {
|
|
TranslationID uuid.UUID
|
|
Language string
|
|
ViewCount int64
|
|
LikeCount int64
|
|
}
|
|
|
|
// workAnalysisService implements the WorkAnalysisService interface
|
|
type workAnalysisService struct {
|
|
textAnalyzer TextAnalyzer
|
|
analysisCache AnalysisCache
|
|
analysisRepo AnalysisRepository
|
|
deps WorkAnalyticsDeps
|
|
concurrency int
|
|
cacheEnabled bool
|
|
}
|
|
|
|
// NewWorkAnalysisService creates a new WorkAnalysisService
|
|
func NewWorkAnalysisService(
|
|
textAnalyzer TextAnalyzer,
|
|
analysisCache AnalysisCache,
|
|
analysisRepo AnalysisRepository,
|
|
deps WorkAnalyticsDeps,
|
|
concurrency int,
|
|
cacheEnabled bool,
|
|
) WorkAnalysisService {
|
|
return &workAnalysisService{
|
|
textAnalyzer: textAnalyzer,
|
|
analysisCache: analysisCache,
|
|
analysisRepo: analysisRepo,
|
|
deps: deps,
|
|
concurrency: concurrency,
|
|
cacheEnabled: cacheEnabled,
|
|
}
|
|
}
|
|
|
|
// AnalyzeWork performs linguistic analysis on a work and stores the results
|
|
func (s *workAnalysisService) AnalyzeWork(ctx context.Context, workID uuid.UUID) error {
|
|
logger := log.FromContext(ctx).With("workID", workID)
|
|
|
|
if workID == uuid.Nil {
|
|
return fmt.Errorf("invalid work ID")
|
|
}
|
|
|
|
// Check cache first if enabled
|
|
if s.cacheEnabled && s.analysisCache.IsEnabled() {
|
|
cacheKey := fmt.Sprintf("work_analysis:%d", workID)
|
|
|
|
if result, err := s.analysisCache.Get(ctx, cacheKey); err == nil {
|
|
logger.Info("Cache hit for work analysis")
|
|
|
|
// Store directly to database
|
|
return s.analysisRepo.StoreAnalysisResults(ctx, workID, result)
|
|
}
|
|
}
|
|
|
|
// Get work content from database
|
|
content, err := s.analysisRepo.GetWorkContent(ctx, workID, "")
|
|
if err != nil {
|
|
logger.Error(err, "Failed to get work content for analysis")
|
|
return fmt.Errorf("failed to get work content: %w", err)
|
|
}
|
|
|
|
// Skip analysis if content is empty
|
|
if content == "" {
|
|
logger.Warn("Skipping analysis for work with empty content")
|
|
return nil
|
|
}
|
|
|
|
// Get work to determine language (via repository to avoid leaking GORM)
|
|
work, err := s.analysisRepo.GetWorkByID(ctx, workID)
|
|
if err != nil {
|
|
logger.Error(err, "Failed to fetch work for analysis")
|
|
return fmt.Errorf("failed to fetch work: %w", err)
|
|
}
|
|
|
|
// Analyze the text
|
|
start := time.Now()
|
|
logger.With("language", work.Language).
|
|
With("contentLength", len(content)).
|
|
Info("Analyzing work")
|
|
|
|
var result *AnalysisResult
|
|
|
|
// Use concurrent processing for large texts
|
|
if len(content) > 10000 && s.concurrency > 1 {
|
|
result, err = s.textAnalyzer.AnalyzeTextConcurrently(ctx, content, work.Language, s.concurrency)
|
|
} else {
|
|
result, err = s.textAnalyzer.AnalyzeText(ctx, content, work.Language)
|
|
}
|
|
|
|
if err != nil {
|
|
logger.Error(err, "Failed to analyze work text")
|
|
return fmt.Errorf("failed to analyze work text: %w", err)
|
|
}
|
|
|
|
// Store results in database
|
|
if err := s.analysisRepo.StoreAnalysisResults(ctx, workID, result); err != nil {
|
|
logger.Error(err, "Failed to store analysis results")
|
|
return fmt.Errorf("failed to store analysis results: %w", err)
|
|
}
|
|
|
|
// Cache the result if caching is enabled
|
|
if s.cacheEnabled && s.analysisCache.IsEnabled() {
|
|
cacheKey := fmt.Sprintf("work_analysis:%d", workID)
|
|
if err := s.analysisCache.Set(ctx, cacheKey, result); err != nil {
|
|
logger.Error(err, "Failed to cache work analysis result")
|
|
}
|
|
}
|
|
|
|
logger.With("wordCount", result.WordCount).
|
|
With("readabilityScore", result.ReadabilityScore).
|
|
With("sentiment", result.Sentiment).
|
|
With("durationMs", time.Since(start).Milliseconds()).
|
|
Info("Successfully analyzed work")
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetWorkAnalytics retrieves analytics data for a work
|
|
func (s *workAnalysisService) GetWorkAnalytics(ctx context.Context, workID uuid.UUID) (*WorkAnalytics, error) {
|
|
if workID == uuid.Nil {
|
|
return nil, fmt.Errorf("invalid work ID")
|
|
}
|
|
|
|
// Get the work to ensure it exists
|
|
work, err := s.analysisRepo.GetWorkByID(ctx, workID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("work not found: %w", err)
|
|
}
|
|
|
|
// Get analysis results from database (may not exist yet)
|
|
_, readabilityScore, languageAnalysis, err := s.analysisRepo.GetAnalysisData(ctx, workID)
|
|
if err != nil {
|
|
log.FromContext(ctx).With("workID", workID).With("err", err).Warn("failed to load analysis data")
|
|
}
|
|
|
|
// Extract keywords from JSONB
|
|
var keywords []string
|
|
if languageAnalysis != nil && languageAnalysis.Analysis != nil {
|
|
if keywordsData, ok := languageAnalysis.Analysis["keywords"].([]interface{}); ok {
|
|
for _, kw := range keywordsData {
|
|
if keywordMap, ok := kw.(map[string]interface{}); ok {
|
|
if text, ok := keywordMap["text"].(string); ok {
|
|
keywords = append(keywords, text)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compute counters.
|
|
// Prefer StatsRepo (canonical counters), and fall back to counting tables when configured.
|
|
var (
|
|
viewCount int64
|
|
likeCount int64
|
|
commentCount int64
|
|
bookmarkCount int64
|
|
translationCount int64
|
|
popular []TranslationAnalytics
|
|
)
|
|
|
|
if s.deps.StatsRepo != nil {
|
|
if stats, err := s.deps.StatsRepo.GetOrCreateWorkStats(ctx, workID); err == nil && stats != nil {
|
|
viewCount = stats.Views
|
|
likeCount = stats.Likes
|
|
commentCount = stats.Comments
|
|
bookmarkCount = stats.Bookmarks
|
|
translationCount = stats.TranslationCount
|
|
}
|
|
}
|
|
|
|
if s.deps.StatsRepo == nil {
|
|
q := &domain.QueryOptions{Where: map[string]interface{}{"work_id": workID}}
|
|
if s.deps.LikeCounter != nil {
|
|
if c, err := s.deps.LikeCounter.CountWithOptions(ctx, q); err == nil {
|
|
likeCount = c
|
|
}
|
|
}
|
|
if s.deps.CommentCounter != nil {
|
|
if c, err := s.deps.CommentCounter.CountWithOptions(ctx, q); err == nil {
|
|
commentCount = c
|
|
}
|
|
}
|
|
if s.deps.BookmarkCounter != nil {
|
|
if c, err := s.deps.BookmarkCounter.CountWithOptions(ctx, q); err == nil {
|
|
bookmarkCount = c
|
|
}
|
|
}
|
|
if s.deps.TranslationCount != nil {
|
|
if c, err := s.deps.TranslationCount.CountWithOptions(ctx, q); err == nil {
|
|
translationCount = c
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build PopularTranslations using per-translation stats.
|
|
if s.deps.TranslationList != nil && s.deps.StatsRepo != nil {
|
|
if translations, err := s.deps.TranslationList.ListByWorkID(ctx, workID); err == nil {
|
|
for _, t := range translations {
|
|
var views, likes int64
|
|
if ts, err := s.deps.StatsRepo.GetOrCreateTranslationStats(ctx, t.ID); err == nil && ts != nil {
|
|
views = ts.Views
|
|
likes = ts.Likes
|
|
}
|
|
popular = append(popular, TranslationAnalytics{
|
|
TranslationID: t.ID,
|
|
Language: t.Language,
|
|
ViewCount: views,
|
|
LikeCount: likes,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort popular translations by view count desc, then likes desc
|
|
if len(popular) > 1 {
|
|
sort.SliceStable(popular, func(i, j int) bool {
|
|
if popular[i].ViewCount == popular[j].ViewCount {
|
|
return popular[i].LikeCount > popular[j].LikeCount
|
|
}
|
|
return popular[i].ViewCount > popular[j].ViewCount
|
|
})
|
|
// limit to top 3
|
|
if len(popular) > 3 {
|
|
popular = popular[:3]
|
|
}
|
|
}
|
|
|
|
return &WorkAnalytics{
|
|
WorkID: work.ID,
|
|
ViewCount: viewCount,
|
|
LikeCount: likeCount,
|
|
CommentCount: commentCount,
|
|
BookmarkCount: bookmarkCount,
|
|
TranslationCount: translationCount,
|
|
ReadabilityScore: safeReadabilityScore(readabilityScore),
|
|
SentimentScore: safeSentimentScore(languageAnalysis),
|
|
TopKeywords: keywords,
|
|
PopularTranslations: popular,
|
|
}, nil
|
|
}
|
|
|
|
func safeReadabilityScore(rs *domain.ReadabilityScore) float64 {
|
|
if rs == nil {
|
|
return 0
|
|
}
|
|
return rs.Score
|
|
}
|
|
|
|
func safeSentimentScore(la *domain.LanguageAnalysis) float64 {
|
|
if la == nil {
|
|
return 0
|
|
}
|
|
return extractSentimentFromAnalysis(la.Analysis)
|
|
}
|
|
|
|
// extractSentimentFromAnalysis extracts sentiment from the Analysis JSONB field
|
|
func extractSentimentFromAnalysis(analysis domain.JSONB) float64 {
|
|
if analysis == nil {
|
|
return 0.0
|
|
}
|
|
if sentiment, ok := analysis["sentiment"].(float64); ok {
|
|
return sentiment
|
|
}
|
|
return 0.0
|
|
}
|