package main import ( "context" "fmt" "math/rand" "strings" "time" ) // runSyntheticMode generates synthetic test data and appends it to the gallery func runSyntheticMode() error { generator := NewSyntheticDataGenerator() // Generate a random number of synthetic models (1-3) numModels := generator.rand.Intn(3) + 1 fmt.Printf("Generating %d synthetic models for testing...\n", numModels) var models []ProcessedModel for i := 0; i < numModels; i++ { model := generator.GenerateProcessedModel() models = append(models, model) fmt.Printf("Generated synthetic model: %s\n", model.ModelID) } // Generate YAML entries and append to gallery/index.yaml fmt.Println("Generating YAML entries for synthetic models...") err := generateYAMLForModels(context.Background(), models) if err != nil { return fmt.Errorf("error generating YAML entries: %w", err) } fmt.Printf("Successfully added %d synthetic models to the gallery for testing!\n", len(models)) return nil } // SyntheticDataGenerator provides methods to generate synthetic test data type SyntheticDataGenerator struct { rand *rand.Rand } // NewSyntheticDataGenerator creates a new synthetic data generator func NewSyntheticDataGenerator() *SyntheticDataGenerator { return &SyntheticDataGenerator{ rand: rand.New(rand.NewSource(time.Now().UnixNano())), } } // GenerateProcessedModelFile creates a synthetic ProcessedModelFile func (g *SyntheticDataGenerator) GenerateProcessedModelFile() ProcessedModelFile { fileTypes := []string{"model", "readme", "other"} fileType := fileTypes[g.rand.Intn(len(fileTypes))] var path string var isReadme bool switch fileType { case "model": path = fmt.Sprintf("model-%s.gguf", g.randomString(8)) isReadme = false case "readme": path = "README.md" isReadme = true default: path = fmt.Sprintf("file-%s.txt", g.randomString(6)) isReadme = false } return ProcessedModelFile{ Path: path, Size: int64(g.rand.Intn(1000000000) + 1000000), // 1MB to 1GB SHA256: g.randomSHA256(), IsReadme: isReadme, FileType: fileType, } } // GenerateProcessedModel creates a synthetic ProcessedModel func (g *SyntheticDataGenerator) GenerateProcessedModel() ProcessedModel { authors := []string{"microsoft", "meta", "google", "openai", "anthropic", "mistralai", "huggingface"} modelNames := []string{"llama", "gpt", "claude", "mistral", "gemma", "phi", "qwen", "codellama"} author := authors[g.rand.Intn(len(authors))] modelName := modelNames[g.rand.Intn(len(modelNames))] modelID := fmt.Sprintf("%s/%s-%s", author, modelName, g.randomString(6)) // Generate files numFiles := g.rand.Intn(5) + 2 // 2-6 files files := make([]ProcessedModelFile, numFiles) // Ensure at least one model file and one readme hasModelFile := false hasReadme := false for i := 0; i < numFiles; i++ { files[i] = g.GenerateProcessedModelFile() if files[i].FileType == "model" { hasModelFile = true } if files[i].FileType == "readme" { hasReadme = true } } // Add required files if missing if !hasModelFile { modelFile := g.GenerateProcessedModelFile() modelFile.FileType = "model" modelFile.Path = fmt.Sprintf("%s-Q4_K_M.gguf", modelName) files = append(files, modelFile) } if !hasReadme { readmeFile := g.GenerateProcessedModelFile() readmeFile.FileType = "readme" readmeFile.Path = "README.md" readmeFile.IsReadme = true files = append(files, readmeFile) } // Find preferred model file var preferredModelFile *ProcessedModelFile for i := range files { if files[i].FileType == "model" { preferredModelFile = &files[i] break } } // Find readme file var readmeFile *ProcessedModelFile for i := range files { if files[i].FileType == "readme" { readmeFile = &files[i] break } } readmeContent := g.generateReadmeContent(modelName, author) return ProcessedModel{ ModelID: modelID, Author: author, Downloads: g.rand.Intn(1000000) + 1000, LastModified: g.randomDate(), Files: files, PreferredModelFile: preferredModelFile, ReadmeFile: readmeFile, ReadmeContent: readmeContent, ReadmeContentPreview: truncateString(readmeContent, 200), QuantizationPreferences: []string{"Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"}, ProcessingError: "", } } // Helper methods for synthetic data generation func (g *SyntheticDataGenerator) randomString(length int) string { const charset = "abcdefghijklmnopqrstuvwxyz0123456789" b := make([]byte, length) for i := range b { b[i] = charset[g.rand.Intn(len(charset))] } return string(b) } func (g *SyntheticDataGenerator) randomSHA256() string { const charset = "0123456789abcdef" b := make([]byte, 64) for i := range b { b[i] = charset[g.rand.Intn(len(charset))] } return string(b) } func (g *SyntheticDataGenerator) randomDate() string { now := time.Now() daysAgo := g.rand.Intn(365) // Random date within last year pastDate := now.AddDate(0, 0, -daysAgo) return pastDate.Format("2006-01-02T15:04:05.000Z") } func (g *SyntheticDataGenerator) generateReadmeContent(modelName, author string) string { templates := []string{ fmt.Sprintf("# %s Model\n\nThis is a %s model developed by %s. It's designed for various natural language processing tasks including text generation, question answering, and conversation.\n\n## Features\n\n- High-quality text generation\n- Efficient inference\n- Multiple quantization options\n- Easy to use with LocalAI\n\n## Usage\n\nUse this model with LocalAI for various AI tasks.", strings.Title(modelName), modelName, author), fmt.Sprintf("# %s\n\nA powerful language model from %s. This model excels at understanding and generating human-like text across multiple domains.\n\n## Capabilities\n\n- Text completion\n- Code generation\n- Creative writing\n- Technical documentation\n\n## Model Details\n\n- Architecture: Transformer-based\n- Training: Large-scale supervised learning\n- Quantization: Available in multiple formats", strings.Title(modelName), author), fmt.Sprintf("# %s Language Model\n\nDeveloped by %s, this model represents state-of-the-art performance in natural language understanding and generation.\n\n## Key Features\n\n- Multilingual support\n- Context-aware responses\n- Efficient memory usage\n- Fast inference speed\n\n## Applications\n\n- Chatbots and virtual assistants\n- Content generation\n- Code completion\n- Educational tools", strings.Title(modelName), author), } return templates[g.rand.Intn(len(templates))] }