tercul-backend/internal/jobs/linguistics/analysis_repository.go
google-labs-jules[bot] 781b313bf1 feat: Complete all pending tasks from TASKS.md
This commit addresses all the high-priority tasks outlined in the TASKS.md file, significantly improving the application's observability, completing key features, and refactoring critical parts of the codebase.

### Observability

- **Centralized Logging:** Implemented a new structured, context-aware logging system using `zerolog`. A new logging middleware injects request-specific information (request ID, user ID, trace ID) into the logger, and all application logging has been refactored to use this new system.
- **Prometheus Metrics:** Added Prometheus metrics for database query performance by creating a GORM plugin that automatically records query latency and totals.
- **OpenTelemetry Tracing:** Fully instrumented all application services in `internal/app` and data repositories in `internal/data/sql` with OpenTelemetry tracing, providing deep visibility into application performance.

### Features

- **Analytics:** Implemented like, comment, and bookmark counting. The respective command handlers now call the analytics service to increment counters when these actions are performed.
- **Enrichment Tool:** Built a new, extensible `enrich` command-line tool to fetch data from external sources. The initial implementation enriches author data using the Open Library API.

### Refactoring & Fixes

- **Decoupled Testing:** Refactored the testing utilities in `internal/testutil` to be database-agnostic, promoting the use of mock-based unit tests and improving test speed and reliability.
- **Build Fixes:** Resolved numerous build errors, including a critical import cycle between the logging, observability, and authentication packages.
- **Search Service:** Fixed the search service integration by implementing the `GetWorkContent` method in the localization service, allowing the search indexer to correctly fetch and index work content.
2025-10-05 05:26:27 +00:00

243 lines
8.4 KiB
Go

package linguistics
import (
"context"
"fmt"
"tercul/internal/domain"
"tercul/internal/domain/work"
"gorm.io/gorm"
"tercul/internal/platform/log"
)
// AnalysisRepository defines the interface for database operations related to analysis
type AnalysisRepository interface {
// StoreAnalysisResults stores analysis results in the database
StoreAnalysisResults(ctx context.Context, workID uint, result *AnalysisResult) error
// GetWorkContent retrieves content for a work from translations
GetWorkContent(ctx context.Context, workID uint, language string) (string, error)
// StoreWorkAnalysis stores work-specific analysis results
StoreWorkAnalysis(ctx context.Context, workID uint, textMetadata *domain.TextMetadata,
readabilityScore *domain.ReadabilityScore, languageAnalysis *domain.LanguageAnalysis) error
// GetWorkByID fetches a work by ID
GetWorkByID(ctx context.Context, workID uint) (*work.Work, error)
// GetAnalysisData fetches persisted analysis data for a work
GetAnalysisData(ctx context.Context, workID uint) (*domain.TextMetadata, *domain.ReadabilityScore, *domain.LanguageAnalysis, error)
}
// GORMAnalysisRepository implements AnalysisRepository using GORM
type GORMAnalysisRepository struct {
db *gorm.DB
}
// NewGORMAnalysisRepository creates a new GORMAnalysisRepository
func NewGORMAnalysisRepository(db *gorm.DB) *GORMAnalysisRepository {
return &GORMAnalysisRepository{db: db}
}
// StoreAnalysisResults stores analysis results in the database
func (r *GORMAnalysisRepository) StoreAnalysisResults(ctx context.Context, workID uint, result *AnalysisResult) error {
logger := log.FromContext(ctx).With("workID", workID)
if result == nil {
return fmt.Errorf("analysis result cannot be nil")
}
// Determine language from the work record to avoid hardcoded defaults
var workRecord work.Work
if err := r.db.WithContext(ctx).First(&workRecord, workID).Error; err != nil {
logger.Error(err, "Failed to fetch work for language")
return fmt.Errorf("failed to fetch work for language: %w", err)
}
// Create text metadata
textMetadata := &domain.TextMetadata{
WorkID: workID,
Language: workRecord.Language,
WordCount: result.WordCount,
SentenceCount: result.SentenceCount,
ParagraphCount: result.ParagraphCount,
AverageWordLength: result.AvgWordLength,
AverageSentenceLength: result.AvgSentenceLength,
}
// Create readability score
readabilityScore := &domain.ReadabilityScore{
WorkID: workID,
Language: workRecord.Language,
Score: result.ReadabilityScore,
Method: result.ReadabilityMethod,
}
// Create language analysis
languageAnalysis := &domain.LanguageAnalysis{
WorkID: workID,
Language: workRecord.Language,
Analysis: domain.JSONB{
"sentiment": result.Sentiment,
"keywords": extractKeywordsAsJSON(result.Keywords),
"topics": extractTopicsAsJSON(result.Topics),
},
}
return r.StoreWorkAnalysis(ctx, workID, textMetadata, readabilityScore, languageAnalysis)
}
// GetWorkContent retrieves content for a work from translations
func (r *GORMAnalysisRepository) GetWorkContent(ctx context.Context, workID uint, language string) (string, error) {
logger := log.FromContext(ctx).With("workID", workID)
// First, get the work to determine its language
var workRecord work.Work
if err := r.db.First(&workRecord, workID).Error; err != nil {
logger.Error(err, "Failed to fetch work for content retrieval")
return "", fmt.Errorf("failed to fetch work: %w", err)
}
// Try to get content from translations in order of preference:
// 1. Original language translation
// 2. Work's language translation
// 3. Any available translation
var translation domain.Translation
// Try original language first
if err := r.db.Where("translatable_type = ? AND translatable_id = ? AND is_original_language = ?",
"works", workID, true).First(&translation).Error; err == nil {
return translation.Content, nil
}
// Try work's language
if err := r.db.Where("translatable_type = ? AND translatable_id = ? AND language = ?",
"works", workID, workRecord.Language).First(&translation).Error; err == nil {
return translation.Content, nil
}
// Try any available translation
if err := r.db.Where("translatable_type = ? AND translatable_id = ?",
"works", workID).First(&translation).Error; err == nil {
return translation.Content, nil
}
return "", fmt.Errorf("no content found for work %d", workID)
}
// GetWorkByID fetches a work by ID
func (r *GORMAnalysisRepository) GetWorkByID(ctx context.Context, workID uint) (*work.Work, error) {
var workRecord work.Work
if err := r.db.WithContext(ctx).First(&workRecord, workID).Error; err != nil {
return nil, fmt.Errorf("failed to fetch work: %w", err)
}
return &workRecord, nil
}
// GetAnalysisData fetches persisted analysis data for a work
func (r *GORMAnalysisRepository) GetAnalysisData(ctx context.Context, workID uint) (*domain.TextMetadata, *domain.ReadabilityScore, *domain.LanguageAnalysis, error) {
logger := log.FromContext(ctx).With("workID", workID)
var textMetadata domain.TextMetadata
var readabilityScore domain.ReadabilityScore
var languageAnalysis domain.LanguageAnalysis
if err := r.db.WithContext(ctx).Where("work_id = ?", workID).First(&textMetadata).Error; err != nil {
logger.Warn("No text metadata found for work")
}
if err := r.db.WithContext(ctx).Where("work_id = ?", workID).First(&readabilityScore).Error; err != nil {
logger.Warn("No readability score found for work")
}
if err := r.db.WithContext(ctx).Where("work_id = ?", workID).First(&languageAnalysis).Error; err != nil {
logger.Warn("No language analysis found for work")
return nil, nil, nil, err
}
return &textMetadata, &readabilityScore, &languageAnalysis, nil
}
// StoreWorkAnalysis stores work-specific analysis results
func (r *GORMAnalysisRepository) StoreWorkAnalysis(ctx context.Context, workID uint,
textMetadata *domain.TextMetadata, readabilityScore *domain.ReadabilityScore,
languageAnalysis *domain.LanguageAnalysis) error {
logger := log.FromContext(ctx).With("workID", workID)
// Use a transaction to ensure all data is stored atomically
return r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
// Store text metadata
if textMetadata != nil {
if err := tx.Where("work_id = ?", workID).Delete(&domain.TextMetadata{}).Error; err != nil {
logger.Error(err, "Failed to delete existing text metadata")
return fmt.Errorf("failed to delete existing text metadata: %w", err)
}
if err := tx.Create(textMetadata).Error; err != nil {
logger.Error(err, "Failed to store text metadata")
return fmt.Errorf("failed to store text metadata: %w", err)
}
}
// Store readability score
if readabilityScore != nil {
if err := tx.Where("work_id = ?", workID).Delete(&domain.ReadabilityScore{}).Error; err != nil {
logger.Error(err, "Failed to delete existing readability score")
return fmt.Errorf("failed to delete existing readability score: %w", err)
}
if err := tx.Create(readabilityScore).Error; err != nil {
logger.Error(err, "Failed to store readability score")
return fmt.Errorf("failed to store readability score: %w", err)
}
}
// Store language analysis
if languageAnalysis != nil {
if err := tx.Where("work_id = ?", workID).Delete(&domain.LanguageAnalysis{}).Error; err != nil {
logger.Error(err, "Failed to delete existing language analysis")
return fmt.Errorf("failed to delete existing language analysis: %w", err)
}
if err := tx.Create(languageAnalysis).Error; err != nil {
logger.Error(err, "Failed to store language analysis")
return fmt.Errorf("failed to store language analysis: %w", err)
}
}
logger.Info("Successfully stored analysis results")
return nil
})
}
// Helper functions for data conversion
func extractKeywordsAsJSON(keywords []Keyword) domain.JSONB {
if len(keywords) == 0 {
return domain.JSONB{}
}
keywordData := make([]map[string]interface{}, len(keywords))
for i, kw := range keywords {
keywordData[i] = map[string]interface{}{
"text": kw.Text,
"relevance": kw.Relevance,
}
}
return domain.JSONB{"keywords": keywordData}
}
func extractTopicsAsJSON(topics []Topic) domain.JSONB {
if len(topics) == 0 {
return domain.JSONB{}
}
topicData := make([]map[string]interface{}, len(topics))
for i, topic := range topics {
topicData[i] = map[string]interface{}{
"name": topic.Name,
"relevance": topic.Relevance,
}
}
return domain.JSONB{"topics": topicData}
}