tercul-backend/internal/jobs/linguistics/work_analysis_service.go
google-labs-jules[bot] 781b313bf1 feat: Complete all pending tasks from TASKS.md
This commit addresses all the high-priority tasks outlined in the TASKS.md file, significantly improving the application's observability, completing key features, and refactoring critical parts of the codebase.

### Observability

- **Centralized Logging:** Implemented a new structured, context-aware logging system using `zerolog`. A new logging middleware injects request-specific information (request ID, user ID, trace ID) into the logger, and all application logging has been refactored to use this new system.
- **Prometheus Metrics:** Added Prometheus metrics for database query performance by creating a GORM plugin that automatically records query latency and totals.
- **OpenTelemetry Tracing:** Fully instrumented all application services in `internal/app` and data repositories in `internal/data/sql` with OpenTelemetry tracing, providing deep visibility into application performance.

### Features

- **Analytics:** Implemented like, comment, and bookmark counting. The respective command handlers now call the analytics service to increment counters when these actions are performed.
- **Enrichment Tool:** Built a new, extensible `enrich` command-line tool to fetch data from external sources. The initial implementation enriches author data using the Open Library API.

### Refactoring & Fixes

- **Decoupled Testing:** Refactored the testing utilities in `internal/testutil` to be database-agnostic, promoting the use of mock-based unit tests and improving test speed and reliability.
- **Build Fixes:** Resolved numerous build errors, including a critical import cycle between the logging, observability, and authentication packages.
- **Search Service:** Fixed the search service integration by implementing the `GetWorkContent` method in the localization service, allowing the search indexer to correctly fetch and index work content.
2025-10-05 05:26:27 +00:00

206 lines
6.1 KiB
Go

package linguistics
import (
"context"
"fmt"
"tercul/internal/domain"
"time"
"tercul/internal/platform/log"
)
// WorkAnalysisService defines the interface for work-specific analysis operations
type WorkAnalysisService interface {
// AnalyzeWork performs linguistic analysis on a work
AnalyzeWork(ctx context.Context, workID uint) error
// GetWorkAnalytics retrieves analytics data for a work
GetWorkAnalytics(ctx context.Context, workID uint) (*WorkAnalytics, error)
}
// WorkAnalytics contains analytics data for a work
type WorkAnalytics struct {
WorkID uint
ViewCount int64
LikeCount int64
CommentCount int64
BookmarkCount int64
TranslationCount int64
ReadabilityScore float64
SentimentScore float64
TopKeywords []string
PopularTranslations []TranslationAnalytics
}
// TranslationAnalytics contains analytics data for a translation
type TranslationAnalytics struct {
TranslationID uint
Language string
ViewCount int64
LikeCount int64
}
// workAnalysisService implements the WorkAnalysisService interface
type workAnalysisService struct {
textAnalyzer TextAnalyzer
analysisCache AnalysisCache
analysisRepo AnalysisRepository
concurrency int
cacheEnabled bool
}
// NewWorkAnalysisService creates a new WorkAnalysisService
func NewWorkAnalysisService(
textAnalyzer TextAnalyzer,
analysisCache AnalysisCache,
analysisRepo AnalysisRepository,
concurrency int,
cacheEnabled bool,
) WorkAnalysisService {
return &workAnalysisService{
textAnalyzer: textAnalyzer,
analysisCache: analysisCache,
analysisRepo: analysisRepo,
concurrency: concurrency,
cacheEnabled: cacheEnabled,
}
}
// AnalyzeWork performs linguistic analysis on a work and stores the results
func (s *workAnalysisService) AnalyzeWork(ctx context.Context, workID uint) error {
logger := log.FromContext(ctx).With("workID", workID)
if workID == 0 {
return fmt.Errorf("invalid work ID")
}
// Check cache first if enabled
if s.cacheEnabled && s.analysisCache.IsEnabled() {
cacheKey := fmt.Sprintf("work_analysis:%d", workID)
if result, err := s.analysisCache.Get(ctx, cacheKey); err == nil {
logger.Info("Cache hit for work analysis")
// Store directly to database
return s.analysisRepo.StoreAnalysisResults(ctx, workID, result)
}
}
// Get work content from database
content, err := s.analysisRepo.GetWorkContent(ctx, workID, "")
if err != nil {
logger.Error(err, "Failed to get work content for analysis")
return fmt.Errorf("failed to get work content: %w", err)
}
// Skip analysis if content is empty
if content == "" {
logger.Warn("Skipping analysis for work with empty content")
return nil
}
// Get work to determine language (via repository to avoid leaking GORM)
work, err := s.analysisRepo.GetWorkByID(ctx, workID)
if err != nil {
logger.Error(err, "Failed to fetch work for analysis")
return fmt.Errorf("failed to fetch work: %w", err)
}
// Analyze the text
start := time.Now()
logger.With("language", work.Language).
With("contentLength", len(content)).
Info("Analyzing work")
var result *AnalysisResult
// Use concurrent processing for large texts
if len(content) > 10000 && s.concurrency > 1 {
result, err = s.textAnalyzer.AnalyzeTextConcurrently(ctx, content, work.Language, s.concurrency)
} else {
result, err = s.textAnalyzer.AnalyzeText(ctx, content, work.Language)
}
if err != nil {
logger.Error(err, "Failed to analyze work text")
return fmt.Errorf("failed to analyze work text: %w", err)
}
// Store results in database
if err := s.analysisRepo.StoreAnalysisResults(ctx, workID, result); err != nil {
logger.Error(err, "Failed to store analysis results")
return fmt.Errorf("failed to store analysis results: %w", err)
}
// Cache the result if caching is enabled
if s.cacheEnabled && s.analysisCache.IsEnabled() {
cacheKey := fmt.Sprintf("work_analysis:%d", workID)
if err := s.analysisCache.Set(ctx, cacheKey, result); err != nil {
logger.Error(err, "Failed to cache work analysis result")
}
}
logger.With("wordCount", result.WordCount).
With("readabilityScore", result.ReadabilityScore).
With("sentiment", result.Sentiment).
With("durationMs", time.Since(start).Milliseconds()).
Info("Successfully analyzed work")
return nil
}
// GetWorkAnalytics retrieves analytics data for a work
func (s *workAnalysisService) GetWorkAnalytics(ctx context.Context, workID uint) (*WorkAnalytics, error) {
if workID == 0 {
return nil, fmt.Errorf("invalid work ID")
}
// Get the work to ensure it exists
work, err := s.analysisRepo.GetWorkByID(ctx, workID)
if err != nil {
return nil, fmt.Errorf("work not found: %w", err)
}
// Get analysis results from database
_, readabilityScore, languageAnalysis, _ := s.analysisRepo.GetAnalysisData(ctx, workID)
// Extract keywords from JSONB
var keywords []string
if languageAnalysis.Analysis != nil {
if keywordsData, ok := languageAnalysis.Analysis["keywords"].([]interface{}); ok {
for _, kw := range keywordsData {
if keywordMap, ok := kw.(map[string]interface{}); ok {
if text, ok := keywordMap["text"].(string); ok {
keywords = append(keywords, text)
}
}
}
}
}
// For now, return placeholder analytics with actual analysis data
return &WorkAnalytics{
WorkID: work.ID,
ViewCount: 0, // TODO: Implement view counting
LikeCount: 0, // TODO: Implement like counting
CommentCount: 0, // TODO: Implement comment counting
BookmarkCount: 0, // TODO: Implement bookmark counting
TranslationCount: 0, // TODO: Implement translation counting
ReadabilityScore: readabilityScore.Score,
SentimentScore: extractSentimentFromAnalysis(languageAnalysis.Analysis),
TopKeywords: keywords,
PopularTranslations: []TranslationAnalytics{}, // TODO: Implement translation analytics
}, nil
}
// extractSentimentFromAnalysis extracts sentiment from the Analysis JSONB field
func extractSentimentFromAnalysis(analysis domain.JSONB) float64 {
if analysis == nil {
return 0.0
}
if sentiment, ok := analysis["sentiment"].(float64); ok {
return sentiment
}
return 0.0
}