tercul-backend/internal/store/processor.go
Damir Mukimov 4957117cb6 Initial commit: Tercul Go project with comprehensive architecture
- Core Go application with GraphQL API using gqlgen
- Comprehensive data models for literary works, authors, translations
- Repository pattern with caching layer
- Authentication and authorization system
- Linguistics analysis capabilities with multiple adapters
- Vector search integration with Weaviate
- Docker containerization support
- Python data migration and analysis scripts
- Clean architecture with proper separation of concerns
- Production-ready configuration and middleware
- Proper .gitignore excluding vendor/, database files, and build artifacts
2025-08-13 07:42:32 +02:00

120 lines
3.1 KiB
Go

package store
import (
"context"
"log"
"tercul/internal/enrich"
)
// ProcessWork processes a work using the enrichment registry and stores the results
func ProcessWork(ctx context.Context, reg *enrich.Registry, db *DB, work Work) error {
log.Printf("Processing work ID %d", work.ID)
// Create a text object for the enrichment services
text := enrich.Text{ID: work.ID, Body: work.Body}
// Detect language
lang, confidence, err := reg.Lang.Detect(text)
if err != nil {
return err
}
log.Printf("Detected language: %s (confidence: %.2f)", lang, confidence)
// Tokenize text
tokens, err := reg.Tok.Tokenize(text)
if err != nil {
return err
}
log.Printf("Tokenized text into %d tokens", len(tokens))
// Tag parts of speech
pos, err := reg.Pos.Tag(tokens)
if err != nil {
return err
}
log.Printf("Tagged %d tokens with parts of speech", len(pos))
// Process each token
for i, token := range tokens {
// Get lemma
lemma, err := reg.Lem.Lemma(token.Text, lang)
if err != nil {
log.Printf("Error getting lemma for token %s: %v", token.Text, err)
lemma = token.Text // Use the original text as fallback
}
// Get phonetic encoding
phonetic := reg.Phon.Encode(token.Text)
// Store the word
if err := UpsertWord(db, work.ID, token.Text, lemma, pos[i], phonetic); err != nil {
log.Printf("Error storing word %s: %v", token.Text, err)
}
}
// Extract keywords
keywords, err := reg.Key.Extract(text)
if err != nil {
log.Printf("Error extracting keywords: %v", err)
} else {
// Convert keywords to strings
keywordStrings := make([]string, len(keywords))
for i, kw := range keywords {
keywordStrings[i] = kw.Text
}
// Save keywords
if err := SaveKeywords(db, work.ID, keywordStrings); err != nil {
log.Printf("Error saving keywords: %v", err)
}
}
// Analyze poetics
enrichMetrics, err := reg.Poet.Analyse(text)
if err != nil {
log.Printf("Error analyzing poetics: %v", err)
} else {
// Convert to store.PoeticMetrics
metrics := PoeticMetrics{
RhymeScheme: enrichMetrics.RhymeScheme,
MeterType: enrichMetrics.MeterType,
StanzaCount: enrichMetrics.StanzaCount,
LineCount: enrichMetrics.LineCount,
Structure: enrichMetrics.Structure,
}
// Save poetics
if err := SavePoetics(db, work.ID, metrics); err != nil {
log.Printf("Error saving poetics: %v", err)
}
}
// Mark the work as enriched
if err := MarkEnriched(db, work.ID, lang); err != nil {
log.Printf("Error marking work as enriched: %v", err)
return err
}
log.Printf("Successfully processed work ID %d", work.ID)
return nil
}
// ProcessPendingWorks processes all pending works
func ProcessPendingWorks(ctx context.Context, reg *enrich.Registry, db *DB) error {
log.Println("Processing pending works...")
// Get pending works
works := ListPendingWorks(db)
log.Printf("Found %d pending works", len(works))
// Process each work
for _, work := range works {
if err := ProcessWork(ctx, reg, db, work); err != nil {
log.Printf("Error processing work ID %d: %v", work.ID, err)
}
}
log.Println("Finished processing pending works")
return nil
}