mirror of
https://github.com/SamyRai/tercul-backend.git
synced 2025-12-27 04:01:34 +00:00
- Core Go application with GraphQL API using gqlgen - Comprehensive data models for literary works, authors, translations - Repository pattern with caching layer - Authentication and authorization system - Linguistics analysis capabilities with multiple adapters - Vector search integration with Weaviate - Docker containerization support - Python data migration and analysis scripts - Clean architecture with proper separation of concerns - Production-ready configuration and middleware - Proper .gitignore excluding vendor/, database files, and build artifacts
120 lines
3.1 KiB
Go
120 lines
3.1 KiB
Go
package store
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"tercul/internal/enrich"
|
|
)
|
|
|
|
// ProcessWork processes a work using the enrichment registry and stores the results
|
|
func ProcessWork(ctx context.Context, reg *enrich.Registry, db *DB, work Work) error {
|
|
log.Printf("Processing work ID %d", work.ID)
|
|
|
|
// Create a text object for the enrichment services
|
|
text := enrich.Text{ID: work.ID, Body: work.Body}
|
|
|
|
// Detect language
|
|
lang, confidence, err := reg.Lang.Detect(text)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Printf("Detected language: %s (confidence: %.2f)", lang, confidence)
|
|
|
|
// Tokenize text
|
|
tokens, err := reg.Tok.Tokenize(text)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Printf("Tokenized text into %d tokens", len(tokens))
|
|
|
|
// Tag parts of speech
|
|
pos, err := reg.Pos.Tag(tokens)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Printf("Tagged %d tokens with parts of speech", len(pos))
|
|
|
|
// Process each token
|
|
for i, token := range tokens {
|
|
// Get lemma
|
|
lemma, err := reg.Lem.Lemma(token.Text, lang)
|
|
if err != nil {
|
|
log.Printf("Error getting lemma for token %s: %v", token.Text, err)
|
|
lemma = token.Text // Use the original text as fallback
|
|
}
|
|
|
|
// Get phonetic encoding
|
|
phonetic := reg.Phon.Encode(token.Text)
|
|
|
|
// Store the word
|
|
if err := UpsertWord(db, work.ID, token.Text, lemma, pos[i], phonetic); err != nil {
|
|
log.Printf("Error storing word %s: %v", token.Text, err)
|
|
}
|
|
}
|
|
|
|
// Extract keywords
|
|
keywords, err := reg.Key.Extract(text)
|
|
if err != nil {
|
|
log.Printf("Error extracting keywords: %v", err)
|
|
} else {
|
|
// Convert keywords to strings
|
|
keywordStrings := make([]string, len(keywords))
|
|
for i, kw := range keywords {
|
|
keywordStrings[i] = kw.Text
|
|
}
|
|
|
|
// Save keywords
|
|
if err := SaveKeywords(db, work.ID, keywordStrings); err != nil {
|
|
log.Printf("Error saving keywords: %v", err)
|
|
}
|
|
}
|
|
|
|
// Analyze poetics
|
|
enrichMetrics, err := reg.Poet.Analyse(text)
|
|
if err != nil {
|
|
log.Printf("Error analyzing poetics: %v", err)
|
|
} else {
|
|
// Convert to store.PoeticMetrics
|
|
metrics := PoeticMetrics{
|
|
RhymeScheme: enrichMetrics.RhymeScheme,
|
|
MeterType: enrichMetrics.MeterType,
|
|
StanzaCount: enrichMetrics.StanzaCount,
|
|
LineCount: enrichMetrics.LineCount,
|
|
Structure: enrichMetrics.Structure,
|
|
}
|
|
|
|
// Save poetics
|
|
if err := SavePoetics(db, work.ID, metrics); err != nil {
|
|
log.Printf("Error saving poetics: %v", err)
|
|
}
|
|
}
|
|
|
|
// Mark the work as enriched
|
|
if err := MarkEnriched(db, work.ID, lang); err != nil {
|
|
log.Printf("Error marking work as enriched: %v", err)
|
|
return err
|
|
}
|
|
|
|
log.Printf("Successfully processed work ID %d", work.ID)
|
|
return nil
|
|
}
|
|
|
|
// ProcessPendingWorks processes all pending works
|
|
func ProcessPendingWorks(ctx context.Context, reg *enrich.Registry, db *DB) error {
|
|
log.Println("Processing pending works...")
|
|
|
|
// Get pending works
|
|
works := ListPendingWorks(db)
|
|
log.Printf("Found %d pending works", len(works))
|
|
|
|
// Process each work
|
|
for _, work := range works {
|
|
if err := ProcessWork(ctx, reg, db, work); err != nil {
|
|
log.Printf("Error processing work ID %d: %v", work.ID, err)
|
|
}
|
|
}
|
|
|
|
log.Println("Finished processing pending works")
|
|
return nil
|
|
}
|