package linguistics import ( "context" "sync" ) // TextAnalyzer defines the interface for pure text analysis operations type TextAnalyzer interface { // AnalyzeText performs linguistic analysis on the given text AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error) // AnalyzeTextConcurrently performs text analysis using concurrent processing AnalyzeTextConcurrently(ctx context.Context, text string, language string, concurrency int) (*AnalysisResult, error) } // BasicTextAnalyzer implements the TextAnalyzer interface with simple algorithms type BasicTextAnalyzer struct { langDetector LanguageDetector sentimentProvider SentimentProvider keywordProvider KeywordProvider } // NewBasicTextAnalyzer creates a new BasicTextAnalyzer func NewBasicTextAnalyzer() *BasicTextAnalyzer { return &BasicTextAnalyzer{} } // WithLanguageDetector injects a language detector provider func (a *BasicTextAnalyzer) WithLanguageDetector(detector LanguageDetector) *BasicTextAnalyzer { a.langDetector = detector return a } // WithSentimentProvider injects a sentiment provider func (a *BasicTextAnalyzer) WithSentimentProvider(provider SentimentProvider) *BasicTextAnalyzer { a.sentimentProvider = provider return a } // WithKeywordProvider injects a keyword provider func (a *BasicTextAnalyzer) WithKeywordProvider(provider KeywordProvider) *BasicTextAnalyzer { a.keywordProvider = provider return a } // AnalyzeText performs linguistic analysis on the given text func (a *BasicTextAnalyzer) AnalyzeText(ctx context.Context, text string, language string) (*AnalysisResult, error) { if text == "" { return &AnalysisResult{}, nil } // Auto-detect language if not provided and a detector exists if language == "" && a.langDetector != nil { if detected, ok := a.langDetector.DetectLanguage(text); ok { language = detected } } result := &AnalysisResult{ PartOfSpeechCounts: make(map[string]int), Entities: []Entity{}, Keywords: []Keyword{}, Topics: []Topic{}, } // Perform a single pass through the text for basic statistics words, sentences, paragraphs, avgWordLength := analyzeTextBasicStats(text) result.WordCount = words result.SentenceCount = sentences result.ParagraphCount = paragraphs result.AvgWordLength = avgWordLength // Calculate sentence length average if result.SentenceCount > 0 { result.AvgSentenceLength = float64(result.WordCount) / float64(result.SentenceCount) } // Calculate readability score (simplified Flesch-Kincaid) result.ReadabilityScore = calculateReadabilityScore(result.AvgSentenceLength, result.AvgWordLength) result.ReadabilityMethod = "Simplified Flesch-Kincaid" // Extract keywords: prefer provider if available if a.keywordProvider != nil { if kws, err := a.keywordProvider.Extract(text, language); err == nil { result.Keywords = kws } else { result.Keywords = extractKeywordsOptimized(text, language) } } else { result.Keywords = extractKeywordsOptimized(text, language) } // Sentiment: prefer provider if available if a.sentimentProvider != nil { if score, err := a.sentimentProvider.Score(text, language); err == nil { result.Sentiment = score } else { result.Sentiment = estimateSentimentOptimized(text, language) } } else { result.Sentiment = estimateSentimentOptimized(text, language) } return result, nil } // AnalyzeTextConcurrently performs text analysis using concurrent processing func (a *BasicTextAnalyzer) AnalyzeTextConcurrently(ctx context.Context, text string, language string, concurrency int) (*AnalysisResult, error) { if text == "" { return &AnalysisResult{}, nil } // Auto-detect language if not provided and a detector exists if language == "" && a.langDetector != nil { if detected, ok := a.langDetector.DetectLanguage(text); ok { language = detected } } // Split the text into chunks for concurrent processing chunks := splitTextIntoChunks(text, concurrency) n := len(chunks) // Create channels for results sized by actual chunks to avoid deadlocks wordCountCh := make(chan int, n) sentenceCountCh := make(chan int, n) paragraphCountCh := make(chan int, n) wordLengthSumCh := make(chan float64, n) wordLengthCountCh := make(chan int, n) keywordsCh := make(chan []Keyword, n) sentimentCh := make(chan float64, n) // Process each chunk concurrently var wg sync.WaitGroup for _, chunk := range chunks { wg.Add(1) go func(chunkText string) { defer wg.Done() select { case <-ctx.Done(): return default: } // Basic statistics words, sentences, paragraphs, wordLengthSum, wordCount := analyzeChunkBasicStats(chunkText) wordCountCh <- words sentenceCountCh <- sentences paragraphCountCh <- paragraphs wordLengthSumCh <- wordLengthSum wordLengthCountCh <- wordCount // Keywords (provider if available) if a.keywordProvider != nil { if kws, err := a.keywordProvider.Extract(chunkText, language); err == nil { keywordsCh <- kws } else { keywordsCh <- extractKeywordsOptimized(chunkText, language) } } else { keywordsCh <- extractKeywordsOptimized(chunkText, language) } // Sentiment (provider if available) if a.sentimentProvider != nil { if score, err := a.sentimentProvider.Score(chunkText, language); err == nil { sentimentCh <- score } else { sentimentCh <- estimateSentimentOptimized(chunkText, language) } } else { sentimentCh <- estimateSentimentOptimized(chunkText, language) } }(chunk) } // Wait for all goroutines to complete wg.Wait() close(wordCountCh) close(sentenceCountCh) close(paragraphCountCh) close(wordLengthSumCh) close(wordLengthCountCh) close(keywordsCh) close(sentimentCh) // Aggregate results result := &AnalysisResult{ PartOfSpeechCounts: make(map[string]int), Entities: []Entity{}, Keywords: []Keyword{}, Topics: []Topic{}, } // Sum up basic statistics for wc := range wordCountCh { result.WordCount += wc } for sc := range sentenceCountCh { result.SentenceCount += sc } for pc := range paragraphCountCh { result.ParagraphCount += pc } // Calculate average word length var totalWordLengthSum float64 var totalWordCount int for wls := range wordLengthSumCh { totalWordLengthSum += wls } for wlc := range wordLengthCountCh { totalWordCount += wlc } if totalWordCount > 0 { result.AvgWordLength = totalWordLengthSum / float64(totalWordCount) } // Calculate sentence length average if result.SentenceCount > 0 { result.AvgSentenceLength = float64(result.WordCount) / float64(result.SentenceCount) } // Calculate readability score result.ReadabilityScore = calculateReadabilityScore(result.AvgSentenceLength, result.AvgWordLength) result.ReadabilityMethod = "Simplified Flesch-Kincaid" // Merge keywords keywordSum := make(map[string]float64) keywordCount := make(map[string]int) for kws := range keywordsCh { for _, kw := range kws { keywordSum[kw.Text] += kw.Relevance keywordCount[kw.Text]++ } } for text, sum := range keywordSum { cnt := keywordCount[text] if cnt > 0 { result.Keywords = append(result.Keywords, Keyword{Text: text, Relevance: sum / float64(cnt)}) } } // Average sentiment var totalSentiment float64 var sentimentCount int for s := range sentimentCh { totalSentiment += s sentimentCount++ } if sentimentCount > 0 { result.Sentiment = totalSentiment / float64(sentimentCount) } return result, nil }