tercul-backend/internal/jobs/linguistics/phonetic_encoder.go
Damir Mukimov d50722dad5
Some checks failed
Test / Integration Tests (push) Successful in 4s
Build / Build Binary (push) Failing after 2m9s
Docker Build / Build Docker Image (push) Failing after 2m32s
Test / Unit Tests (push) Failing after 3m12s
Lint / Go Lint (push) Failing after 1m0s
Refactor ID handling to use UUIDs across the application
- Updated database models and repositories to replace uint IDs with UUIDs.
- Modified test fixtures to generate and use UUIDs for authors, translations, users, and works.
- Adjusted mock implementations to align with the new UUID structure.
- Ensured all relevant functions and methods are updated to handle UUIDs correctly.
- Added necessary imports for UUID handling in various files.
2025-12-27 00:33:34 +01:00

116 lines
2.4 KiB
Go

package linguistics
import (
"strings"
)
// PhoneticEncoder encodes words phonetically
type PhoneticEncoder struct{}
// NewPhoneticEncoder creates a new PhoneticEncoder
func NewPhoneticEncoder() *PhoneticEncoder {
return &PhoneticEncoder{}
}
// Encode encodes a word phonetically and returns the encoding
//
//nolint:gocyclo // Complex encoding rules
func (e *PhoneticEncoder) Encode(word string) string {
// This is a simplified implementation of the Soundex algorithm
// In a real-world scenario, you would use a library like github.com/jdkato/prose
// or call an external API for phonetic encoding
// Convert to uppercase
word = strings.ToUpper(word)
// Remove non-alphabetic characters
var sb strings.Builder
for _, r := range word {
if r >= 'A' && r <= 'Z' {
sb.WriteRune(r)
}
}
word = sb.String()
// Return empty string for empty input
if len(word) == 0 {
return ""
}
// Keep the first letter
result := string(word[0])
// Map consonants to digits
for i := 1; i < len(word); i++ {
c := word[i]
var code byte
switch c {
case 'B', 'F', 'P', 'V':
code = '1'
case 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
code = '2'
case 'D', 'T':
code = '3'
case 'L':
code = '4'
case 'M', 'N':
code = '5'
case 'R':
code = '6'
default:
code = '0' // Vowels and 'H', 'W', 'Y'
}
// Skip vowels and 'H', 'W', 'Y'
if code == '0' {
continue
}
// Skip duplicates
if i > 1 && code == result[len(result)-1] {
continue
}
result += string(code)
// Limit to 4 characters
if len(result) >= 4 {
break
}
}
// Pad with zeros if necessary
for len(result) < 4 {
result += "0"
}
return result
}
// DoubleMetaphone is an alternative phonetic algorithm
func (e *PhoneticEncoder) DoubleMetaphone(word string) (string, string) {
// This is a simplified implementation of the Double Metaphone algorithm
// In a real-world scenario, you would use a library or call an external API
// For simplicity, we'll just return the Soundex code and a variation
soundex := e.Encode(word)
// Create a variation by replacing the first digit with the next digit
var variation string
if len(soundex) > 1 {
firstDigit := soundex[1]
var nextDigit byte
if firstDigit >= '6' {
nextDigit = '1'
} else {
nextDigit = firstDigit + 1
}
variation = string(soundex[0]) + string(nextDigit) + soundex[2:]
} else {
variation = soundex
}
return soundex, variation
}