tercul-backend/internal/platform/search/schema.go
Damir Mukimov fa336cacf3
wip
2025-09-01 00:43:59 +02:00

529 lines
17 KiB
Go

package search
import (
"context"
"fmt"
"github.com/weaviate/weaviate-go-client/v5/weaviate"
"github.com/weaviate/weaviate/entities/models"
)
// CreateSchema initializes the schema in Weaviate
func CreateSchema(client *weaviate.Client) {
// Define all classes
classes := []*models.Class{
{
Class: "Work",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "content", DataType: []string{"text"}},
// Relationship properties (references)
{Name: "source", DataType: []string{"Source"}},
{Name: "embedding", DataType: []string{"Embedding"}},
{Name: "copyright", DataType: []string{"Copyright"}},
{Name: "collection", DataType: []string{"Collection"}},
{Name: "tag", DataType: []string{"Tag"}},
{Name: "readabilityScore", DataType: []string{"ReadabilityScore"}},
{Name: "media", DataType: []string{"Media"}},
{Name: "writingStyle", DataType: []string{"WritingStyle"}},
{Name: "emotion", DataType: []string{"Emotion"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "category", DataType: []string{"Category"}},
{Name: "topicCluster", DataType: []string{"TopicCluster"}},
{Name: "mood", DataType: []string{"Mood"}},
{Name: "concept", DataType: []string{"Concept"}},
{Name: "linguisticLayer", DataType: []string{"LinguisticLayer"}},
{Name: "workStats", DataType: []string{"WorkStats"}},
{Name: "textMetadata", DataType: []string{"TextMetadata"}},
{Name: "poeticAnalysis", DataType: []string{"PoeticAnalysis"}},
{Name: "hybridEntityWork", DataType: []string{"HybridEntity_Work"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
{Name: "author", DataType: []string{"Author"}},
},
VectorIndexConfig: map[string]interface{}{
"distance": "cosine",
"efConstruction": 128,
"maxConnections": 64,
},
},
{
Class: "Translation",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "content", DataType: []string{"text"}},
// Relationships
{Name: "work", DataType: []string{"Work"}},
{Name: "embedding", DataType: []string{"Embedding"}},
{Name: "translationStats", DataType: []string{"TranslationStats"}},
{Name: "hybridEntityWork", DataType: []string{"HybridEntity_Work"}},
{Name: "copyright", DataType: []string{"Copyright"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
{Name: "translator", DataType: []string{"User"}},
},
VectorIndexConfig: map[string]interface{}{
"distance": "cosine",
},
},
{
Class: "TopicCluster",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Emotion",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "collection", DataType: []string{"Collection"}},
},
VectorIndexConfig: map[string]interface{}{"distance": "cosine"},
},
{
Class: "Embedding",
Properties: []*models.Property{
{Name: "vector", DataType: []string{"number[]"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
},
},
{
Class: "Gamification",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "Contribution",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "status", DataType: []string{"string"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
},
},
{
Class: "Stats",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "LanguageAnalysis",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "WritingStyle",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Media",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "country", DataType: []string{"Country"}},
{Name: "city", DataType: []string{"City"}},
{Name: "mediaStats", DataType: []string{"MediaStats"}},
{Name: "copyright", DataType: []string{"Copyright"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
},
},
{
Class: "Collection",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "description", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "user", DataType: []string{"User"}},
{Name: "collectionStats", DataType: []string{"CollectionStats"}},
},
},
{
Class: "Bookmark",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "Word",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "concept", DataType: []string{"Concept"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Copyright",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "workOwner", DataType: []string{"Author"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "book", DataType: []string{"Book"}},
{Name: "source", DataType: []string{"Source"}},
},
},
{
Class: "Admin",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Author",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "biography", DataType: []string{"text"}},
{Name: "birthDate", DataType: []string{"date"}},
{Name: "deathDate", DataType: []string{"date"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "book", DataType: []string{"Book"}},
{Name: "country", DataType: []string{"Country"}},
{Name: "city", DataType: []string{"City"}},
{Name: "place", DataType: []string{"Place"}},
{Name: "address", DataType: []string{"Address"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
{Name: "copyright", DataType: []string{"Copyright"}},
},
},
{
Class: "Category",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "User",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "email", DataType: []string{"string"}},
{Name: "role", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "bookmark", DataType: []string{"Bookmark"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "collection", DataType: []string{"Collection"}},
{Name: "like", DataType: []string{"Like"}},
{Name: "comment", DataType: []string{"Comment"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "topicCluster", DataType: []string{"TopicCluster"}},
{Name: "country", DataType: []string{"Country"}},
{Name: "city", DataType: []string{"City"}},
{Name: "userStats", DataType: []string{"UserStats"}},
{Name: "book", DataType: []string{"Book"}},
{Name: "media", DataType: []string{"Media"}},
{Name: "address", DataType: []string{"Address"}},
{Name: "emotion", DataType: []string{"Emotion"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
{Name: "contribution", DataType: []string{"Contribution"}},
},
},
{
Class: "Book",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "bookStats", DataType: []string{"BookStats"}},
{Name: "copyright", DataType: []string{"Copyright"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
},
},
{
Class: "Source",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "copyright", DataType: []string{"Copyright"}},
{Name: "copyrightClaim", DataType: []string{"CopyrightClaim"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Tag",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Concept",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "word", DataType: []string{"Word"}},
},
},
{
Class: "Comment",
Properties: []*models.Property{
{Name: "text", DataType: []string{"text"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "lineNumber", DataType: []string{"int"}},
{Name: "parentComment", DataType: []string{"Comment"}},
{Name: "like", DataType: []string{"Like"}},
},
},
{
Class: "ReadabilityScore",
Properties: []*models.Property{
{Name: "score", DataType: []string{"number"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Language",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
},
},
{
Class: "Vote",
Properties: []*models.Property{
{Name: "value", DataType: []string{"number"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "comment", DataType: []string{"Comment"}},
},
},
{
Class: "Edition",
Properties: []*models.Property{
{Name: "version", DataType: []string{"text"}},
{Name: "book", DataType: []string{"Book"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "LinguisticLayer",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Mood",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "Like",
Properties: []*models.Property{
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "comment", DataType: []string{"Comment"}},
},
},
{
Class: "Notification",
Properties: []*models.Property{
{Name: "message", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "EditorialWorkflow",
Properties: []*models.Property{
{Name: "stage", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "Monetization",
Properties: []*models.Property{
{Name: "amount", DataType: []string{"number"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "book", DataType: []string{"Book"}},
},
},
{
Class: "Country",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "City",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "country", DataType: []string{"Country"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "Address",
Properties: []*models.Property{
{Name: "street", DataType: []string{"text"}},
{Name: "city", DataType: []string{"City"}},
{Name: "country", DataType: []string{"Country"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "WorkStats",
Properties: []*models.Property{
{Name: "views", DataType: []string{"number"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "TranslationStats",
Properties: []*models.Property{
{Name: "views", DataType: []string{"number"}},
{Name: "translation", DataType: []string{"Translation"}},
},
},
{
Class: "MediaStats",
Properties: []*models.Property{
{Name: "views", DataType: []string{"number"}},
{Name: "media", DataType: []string{"Media"}},
},
},
{
Class: "UserStats",
Properties: []*models.Property{
{Name: "activity", DataType: []string{"number"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "Place",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "city", DataType: []string{"City"}},
{Name: "country", DataType: []string{"Country"}},
{Name: "author", DataType: []string{"Author"}},
},
},
{
Class: "BookStats",
Properties: []*models.Property{
{Name: "sales", DataType: []string{"number"}},
{Name: "book", DataType: []string{"Book"}},
},
},
{
Class: "CollectionStats",
Properties: []*models.Property{
{Name: "items", DataType: []string{"number"}},
{Name: "collection", DataType: []string{"Collection"}},
},
},
{
Class: "TextMetadata",
Properties: []*models.Property{
{Name: "analysis", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "PoeticAnalysis",
Properties: []*models.Property{
{Name: "structure", DataType: []string{"text"}},
{Name: "language", DataType: []string{"string"}},
{Name: "work", DataType: []string{"Work"}},
},
},
{
Class: "HybridEntity_Work",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
},
},
{
Class: "CopyrightClaim",
Properties: []*models.Property{
{Name: "details", DataType: []string{"text"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
{Name: "book", DataType: []string{"Book"}},
{Name: "source", DataType: []string{"Source"}},
{Name: "author", DataType: []string{"Author"}},
{Name: "user", DataType: []string{"User"}},
},
},
{
Class: "Contributor",
Properties: []*models.Property{
{Name: "name", DataType: []string{"text"}},
{Name: "user", DataType: []string{"User"}},
{Name: "work", DataType: []string{"Work"}},
{Name: "translation", DataType: []string{"Translation"}},
},
},
{
Class: "Edge",
Properties: []*models.Property{
{Name: "sourceTable", DataType: []string{"string"}},
{Name: "sourceId", DataType: []string{"string"}},
{Name: "targetTable", DataType: []string{"string"}},
{Name: "targetId", DataType: []string{"string"}},
{Name: "relation", DataType: []string{"string"}},
{Name: "language", DataType: []string{"string"}},
{Name: "extra", DataType: []string{"object"}},
},
},
}
// Create each class in Weaviate
for _, class := range classes {
err := client.Schema().ClassCreator().WithClass(class).Do(context.Background())
if err != nil {
fmt.Printf("Failed to create class %s: %v", class.Class, err)
}
}
fmt.Println("Weaviate schema created successfully.")
}