tercul-backend/internal/jobs/linguistics/registry.go

55 lines
1.0 KiB
Go

package linguistics
// Registry holds all the text analysis services
type Registry struct {
Lang *LanguageDetector
Tok *Tokenizer
Pos *POSTagger
Lem *Lemmatizer
Phon *PhoneticEncoder
Key *KeywordExtractor
Poet *PoeticAnalyzer
}
// DefaultRegistry creates a new Registry with default implementations
func DefaultRegistry() *Registry {
return &Registry{
Lang: NewLanguageDetector(),
Tok: NewTokenizer(),
Pos: NewPOSTagger(),
Lem: NewLemmatizer(),
Phon: NewPhoneticEncoder(),
Key: NewKeywordExtractor(),
Poet: NewPoeticAnalyzer(),
}
}
// Text represents a text to be analyzed
type Text struct {
ID uint
Body string
}
// Token represents a token in a text
type Token struct {
Text string
Position int
Offset int
Length int
}
// Keyword represents a keyword extracted from a text
type Keyword struct {
Text string
Relevance float64
}
// PoeticMetrics represents metrics from poetic analysis
type PoeticMetrics struct {
RhymeScheme string
MeterType string
StanzaCount int
LineCount int
Structure string
}