tercul-backend/internal/jobs/linguistics/analysis_repository.go
google-labs-jules[bot] caf07df08d feat(analytics): Enhance analytics capabilities
This commit introduces a comprehensive enhancement of the application's analytics features, addressing performance, data modeling, and feature set.

The key changes include:

- **Performance Improvement:** The analytics repository now uses a database "UPSERT" operation to increment counters, reducing two separate database calls (read and write) into a single, more efficient operation.

- **New Metrics:** The `WorkStats` and `TranslationStats` models have been enriched with new, calculated metrics:
  - `ReadingTime`: An estimation of the time required to read the work or translation.
  - `Complexity`: A score representing the linguistic complexity of the text.
  - `Sentiment`: A score indicating the emotional tone of the text.

- **Service Refactoring:** The analytics service has been refactored to support the new metrics. It now includes methods to calculate and update these scores, leveraging the existing linguistics package for text analysis.

- **GraphQL API Expansion:** The new analytics fields (`readingTime`, `complexity`, `sentiment`) have been exposed through the GraphQL API by updating the `WorkStats` and `TranslationStats` types in the schema.

- **Validation and Testing:**
  - GraphQL input validation has been centralized and improved by moving from ad-hoc checks to a consistent validation pattern in the GraphQL layer.
  - The test suite has been significantly improved with the addition of new tests for the analytics service and the data access layer, ensuring the correctness and robustness of the new features. This includes fixing several bugs that were discovered during the development process.
2025-09-07 19:26:51 +00:00

259 lines
8.6 KiB
Go

package linguistics
import (
"context"
"fmt"
"tercul/internal/domain"
"gorm.io/gorm"
"tercul/internal/platform/log"
)
// AnalysisRepository defines the interface for database operations related to analysis
type AnalysisRepository interface {
// StoreAnalysisResults stores analysis results in the database
StoreAnalysisResults(ctx context.Context, workID uint, result *AnalysisResult) error
// GetWorkContent retrieves content for a work from translations
GetWorkContent(ctx context.Context, workID uint, language string) (string, error)
// StoreWorkAnalysis stores work-specific analysis results
StoreWorkAnalysis(ctx context.Context, workID uint, textMetadata *domain.TextMetadata,
readabilityScore *domain.ReadabilityScore, languageAnalysis *domain.LanguageAnalysis) error
// GetWorkByID fetches a work by ID
GetWorkByID(ctx context.Context, workID uint) (*domain.Work, error)
// GetAnalysisData fetches persisted analysis data for a work
GetAnalysisData(ctx context.Context, workID uint) (*domain.TextMetadata, *domain.ReadabilityScore, *domain.LanguageAnalysis, error)
}
// GORMAnalysisRepository implements AnalysisRepository using GORM
type GORMAnalysisRepository struct {
db *gorm.DB
}
// NewGORMAnalysisRepository creates a new GORMAnalysisRepository
func NewGORMAnalysisRepository(db *gorm.DB) *GORMAnalysisRepository {
return &GORMAnalysisRepository{db: db}
}
// StoreAnalysisResults stores analysis results in the database
func (r *GORMAnalysisRepository) StoreAnalysisResults(ctx context.Context, workID uint, result *AnalysisResult) error {
if result == nil {
return fmt.Errorf("analysis result cannot be nil")
}
// Determine language from the work record to avoid hardcoded defaults
var work domain.Work
if err := r.db.WithContext(ctx).First(&work, workID).Error; err != nil {
log.LogError("Failed to fetch work for language",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to fetch work for language: %w", err)
}
// Create text metadata
textMetadata := &domain.TextMetadata{
WorkID: workID,
Language: work.Language,
WordCount: result.WordCount,
SentenceCount: result.SentenceCount,
ParagraphCount: result.ParagraphCount,
AverageWordLength: result.AvgWordLength,
AverageSentenceLength: result.AvgSentenceLength,
}
// Create readability score
readabilityScore := &domain.ReadabilityScore{
WorkID: workID,
Language: work.Language,
Score: result.ReadabilityScore,
Method: result.ReadabilityMethod,
}
// Create language analysis
languageAnalysis := &domain.LanguageAnalysis{
WorkID: workID,
Language: work.Language,
Analysis: domain.JSONB{
"sentiment": result.Sentiment,
"keywords": extractKeywordsAsJSON(result.Keywords),
"topics": extractTopicsAsJSON(result.Topics),
},
}
return r.StoreWorkAnalysis(ctx, workID, textMetadata, readabilityScore, languageAnalysis)
}
// GetWorkContent retrieves content for a work from translations
func (r *GORMAnalysisRepository) GetWorkContent(ctx context.Context, workID uint, language string) (string, error) {
// First, get the work to determine its language
var work domain.Work
if err := r.db.First(&work, workID).Error; err != nil {
log.LogError("Failed to fetch work for content retrieval",
log.F("workID", workID),
log.F("error", err))
return "", fmt.Errorf("failed to fetch work: %w", err)
}
// Try to get content from translations in order of preference:
// 1. Original language translation
// 2. Work's language translation
// 3. Any available translation
var translation domain.Translation
// Try original language first
if err := r.db.Where("translatable_type = ? AND translatable_id = ? AND is_original_language = ?",
"Work", workID, true).First(&translation).Error; err == nil {
return translation.Content, nil
}
// Try work's language
if err := r.db.Where("translatable_type = ? AND translatable_id = ? AND language = ?",
"Work", workID, work.Language).First(&translation).Error; err == nil {
return translation.Content, nil
}
// Try any available translation
if err := r.db.Where("translatable_type = ? AND translatable_id = ?",
"Work", workID).First(&translation).Error; err == nil {
return translation.Content, nil
}
return "", fmt.Errorf("no content found for work %d", workID)
}
// GetWorkByID fetches a work by ID
func (r *GORMAnalysisRepository) GetWorkByID(ctx context.Context, workID uint) (*domain.Work, error) {
var work domain.Work
if err := r.db.WithContext(ctx).First(&work, workID).Error; err != nil {
return nil, fmt.Errorf("failed to fetch work: %w", err)
}
return &work, nil
}
// GetAnalysisData fetches persisted analysis data for a work
func (r *GORMAnalysisRepository) GetAnalysisData(ctx context.Context, workID uint) (*domain.TextMetadata, *domain.ReadabilityScore, *domain.LanguageAnalysis, error) {
var textMetadata domain.TextMetadata
var readabilityScore domain.ReadabilityScore
var languageAnalysis domain.LanguageAnalysis
if err := r.db.WithContext(ctx).Where("work_id = ?", workID).First(&textMetadata).Error; err != nil {
log.LogWarn("No text metadata found for work",
log.F("workID", workID))
}
if err := r.db.WithContext(ctx).Where("work_id = ?", workID).First(&readabilityScore).Error; err != nil {
log.LogWarn("No readability score found for work",
log.F("workID", workID))
}
if err := r.db.WithContext(ctx).Where("work_id = ?", workID).First(&languageAnalysis).Error; err != nil {
log.LogWarn("No language analysis found for work",
log.F("workID", workID))
return nil, nil, nil, err
}
return &textMetadata, &readabilityScore, &languageAnalysis, nil
}
// StoreWorkAnalysis stores work-specific analysis results
func (r *GORMAnalysisRepository) StoreWorkAnalysis(ctx context.Context, workID uint,
textMetadata *domain.TextMetadata, readabilityScore *domain.ReadabilityScore,
languageAnalysis *domain.LanguageAnalysis) error {
// Use a transaction to ensure all data is stored atomically
return r.db.WithContext(ctx).Transaction(func(tx *gorm.DB) error {
// Store text metadata
if textMetadata != nil {
if err := tx.Where("work_id = ?", workID).Delete(&domain.TextMetadata{}).Error; err != nil {
log.LogError("Failed to delete existing text metadata",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to delete existing text metadata: %w", err)
}
if err := tx.Create(textMetadata).Error; err != nil {
log.LogError("Failed to store text metadata",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to store text metadata: %w", err)
}
}
// Store readability score
if readabilityScore != nil {
if err := tx.Where("work_id = ?", workID).Delete(&domain.ReadabilityScore{}).Error; err != nil {
log.LogError("Failed to delete existing readability score",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to delete existing readability score: %w", err)
}
if err := tx.Create(readabilityScore).Error; err != nil {
log.LogError("Failed to store readability score",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to store readability score: %w", err)
}
}
// Store language analysis
if languageAnalysis != nil {
if err := tx.Where("work_id = ?", workID).Delete(&domain.LanguageAnalysis{}).Error; err != nil {
log.LogError("Failed to delete existing language analysis",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to delete existing language analysis: %w", err)
}
if err := tx.Create(languageAnalysis).Error; err != nil {
log.LogError("Failed to store language analysis",
log.F("workID", workID),
log.F("error", err))
return fmt.Errorf("failed to store language analysis: %w", err)
}
}
log.LogInfo("Successfully stored analysis results",
log.F("workID", workID))
return nil
})
}
// Helper functions for data conversion
func extractKeywordsAsJSON(keywords []Keyword) domain.JSONB {
if len(keywords) == 0 {
return domain.JSONB{}
}
keywordData := make([]map[string]interface{}, len(keywords))
for i, kw := range keywords {
keywordData[i] = map[string]interface{}{
"text": kw.Text,
"relevance": kw.Relevance,
}
}
return domain.JSONB{"keywords": keywordData}
}
func extractTopicsAsJSON(topics []Topic) domain.JSONB {
if len(topics) == 0 {
return domain.JSONB{}
}
topicData := make([]map[string]interface{}, len(topics))
for i, topic := range topics {
topicData[i] = map[string]interface{}{
"name": topic.Name,
"relevance": topic.Relevance,
}
}
return domain.JSONB{"topics": topicData}
}