From 71a84b91e3fc069ecd2036fdf7a1b6218724becc Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 16 Oct 2025 16:05:17 +0200 Subject: [PATCH] chore(ci): fix gallery agent linting issues Signed-off-by: Ettore Di Giacinto --- .github/gallery-agent/gallery.go | 25 ++-- .github/gallery-agent/go.mod | 1 + .github/gallery-agent/main.go | 12 ++ .github/gallery-agent/testing.go | 190 +++++++++++++++++++++++++++++++ 4 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 .github/gallery-agent/testing.go diff --git a/.github/gallery-agent/gallery.go b/.github/gallery-agent/gallery.go index 07ea725b7..b0050c841 100644 --- a/.github/gallery-agent/gallery.go +++ b/.github/gallery-agent/gallery.go @@ -43,7 +43,18 @@ func generateYAMLEntry(model ProcessedModel, familyAnchor string) string { description = cleanTextContent(description) // Format description for YAML (indent each line and ensure no trailing spaces) - formattedDescription := strings.ReplaceAll(description, "\n", "\n ") + lines := strings.Split(description, "\n") + var formattedLines []string + for _, line := range lines { + if strings.TrimSpace(line) == "" { + // Keep empty lines as empty (no indentation) + formattedLines = append(formattedLines, "") + } else { + // Add indentation to non-empty lines + formattedLines = append(formattedLines, " "+line) + } + } + formattedDescription := strings.Join(formattedLines, "\n") // Remove any trailing spaces from the formatted description formattedDescription = strings.TrimRight(formattedDescription, " \t") yamlTemplate := "" @@ -53,15 +64,14 @@ func generateYAMLEntry(model ProcessedModel, familyAnchor string) string { urls: - https://huggingface.co/%s description: | - %s +%s overrides: parameters: model: %s files: - filename: %s sha256: %s - uri: huggingface://%s/%s -` + uri: huggingface://%s/%s` return fmt.Sprintf(yamlTemplate, familyAnchor, modelName, @@ -79,11 +89,10 @@ func generateYAMLEntry(model ProcessedModel, familyAnchor string) string { urls: - https://huggingface.co/%s description: | - %s +%s overrides: parameters: - model: %s -` + model: %s` return fmt.Sprintf(yamlTemplate, familyAnchor, modelName, @@ -179,7 +188,7 @@ func generateYAMLForModels(ctx context.Context, models []ProcessedModel) error { // Remove trailing whitespace from existing content and join entries without extra newlines existingContent := strings.TrimRight(string(content), " \t\n\r") yamlBlock := strings.Join(yamlEntries, "\n") - newContent := existingContent + "\n" + yamlBlock + newContent := existingContent + "\n" + yamlBlock + "\n" // Write back to file err = os.WriteFile(indexPath, []byte(newContent), 0644) diff --git a/.github/gallery-agent/go.mod b/.github/gallery-agent/go.mod index 7129f5507..250fc7967 100644 --- a/.github/gallery-agent/go.mod +++ b/.github/gallery-agent/go.mod @@ -8,6 +8,7 @@ require ( github.com/onsi/gomega v1.38.2 github.com/sashabaranov/go-openai v1.41.2 github.com/tmc/langchaingo v0.1.13 + gopkg.in/yaml.v3 v3.0.1 ) require ( diff --git a/.github/gallery-agent/main.go b/.github/gallery-agent/main.go index fb1ddce31..780e50f49 100644 --- a/.github/gallery-agent/main.go +++ b/.github/gallery-agent/main.go @@ -45,6 +45,18 @@ type SearchResult struct { } func main() { + // Check for synthetic mode + syntheticMode := os.Getenv("SYNTHETIC_MODE") + if syntheticMode == "true" || syntheticMode == "1" { + fmt.Println("Running in SYNTHETIC MODE - generating random test data") + err := runSyntheticMode() + if err != nil { + fmt.Fprintf(os.Stderr, "Error in synthetic mode: %v\n", err) + os.Exit(1) + } + return + } + // Get configuration from environment variables searchTerm := os.Getenv("SEARCH_TERM") if searchTerm == "" { diff --git a/.github/gallery-agent/testing.go b/.github/gallery-agent/testing.go new file mode 100644 index 000000000..103330db7 --- /dev/null +++ b/.github/gallery-agent/testing.go @@ -0,0 +1,190 @@ +package main + +import ( + "context" + "fmt" + "math/rand" + "strings" + "time" +) + +// runSyntheticMode generates synthetic test data and appends it to the gallery +func runSyntheticMode() error { + generator := NewSyntheticDataGenerator() + + // Generate a random number of synthetic models (1-3) + numModels := generator.rand.Intn(3) + 1 + fmt.Printf("Generating %d synthetic models for testing...\n", numModels) + + var models []ProcessedModel + for i := 0; i < numModels; i++ { + model := generator.GenerateProcessedModel() + models = append(models, model) + fmt.Printf("Generated synthetic model: %s\n", model.ModelID) + } + + // Generate YAML entries and append to gallery/index.yaml + fmt.Println("Generating YAML entries for synthetic models...") + err := generateYAMLForModels(context.Background(), models) + if err != nil { + return fmt.Errorf("error generating YAML entries: %w", err) + } + + fmt.Printf("Successfully added %d synthetic models to the gallery for testing!\n", len(models)) + return nil +} + +// SyntheticDataGenerator provides methods to generate synthetic test data +type SyntheticDataGenerator struct { + rand *rand.Rand +} + +// NewSyntheticDataGenerator creates a new synthetic data generator +func NewSyntheticDataGenerator() *SyntheticDataGenerator { + return &SyntheticDataGenerator{ + rand: rand.New(rand.NewSource(time.Now().UnixNano())), + } +} + +// GenerateProcessedModelFile creates a synthetic ProcessedModelFile +func (g *SyntheticDataGenerator) GenerateProcessedModelFile() ProcessedModelFile { + fileTypes := []string{"model", "readme", "other"} + fileType := fileTypes[g.rand.Intn(len(fileTypes))] + + var path string + var isReadme bool + + switch fileType { + case "model": + path = fmt.Sprintf("model-%s.gguf", g.randomString(8)) + isReadme = false + case "readme": + path = "README.md" + isReadme = true + default: + path = fmt.Sprintf("file-%s.txt", g.randomString(6)) + isReadme = false + } + + return ProcessedModelFile{ + Path: path, + Size: int64(g.rand.Intn(1000000000) + 1000000), // 1MB to 1GB + SHA256: g.randomSHA256(), + IsReadme: isReadme, + FileType: fileType, + } +} + +// GenerateProcessedModel creates a synthetic ProcessedModel +func (g *SyntheticDataGenerator) GenerateProcessedModel() ProcessedModel { + authors := []string{"microsoft", "meta", "google", "openai", "anthropic", "mistralai", "huggingface"} + modelNames := []string{"llama", "gpt", "claude", "mistral", "gemma", "phi", "qwen", "codellama"} + + author := authors[g.rand.Intn(len(authors))] + modelName := modelNames[g.rand.Intn(len(modelNames))] + modelID := fmt.Sprintf("%s/%s-%s", author, modelName, g.randomString(6)) + + // Generate files + numFiles := g.rand.Intn(5) + 2 // 2-6 files + files := make([]ProcessedModelFile, numFiles) + + // Ensure at least one model file and one readme + hasModelFile := false + hasReadme := false + + for i := 0; i < numFiles; i++ { + files[i] = g.GenerateProcessedModelFile() + if files[i].FileType == "model" { + hasModelFile = true + } + if files[i].FileType == "readme" { + hasReadme = true + } + } + + // Add required files if missing + if !hasModelFile { + modelFile := g.GenerateProcessedModelFile() + modelFile.FileType = "model" + modelFile.Path = fmt.Sprintf("%s-Q4_K_M.gguf", modelName) + files = append(files, modelFile) + } + + if !hasReadme { + readmeFile := g.GenerateProcessedModelFile() + readmeFile.FileType = "readme" + readmeFile.Path = "README.md" + readmeFile.IsReadme = true + files = append(files, readmeFile) + } + + // Find preferred model file + var preferredModelFile *ProcessedModelFile + for i := range files { + if files[i].FileType == "model" { + preferredModelFile = &files[i] + break + } + } + + // Find readme file + var readmeFile *ProcessedModelFile + for i := range files { + if files[i].FileType == "readme" { + readmeFile = &files[i] + break + } + } + + readmeContent := g.generateReadmeContent(modelName, author) + + return ProcessedModel{ + ModelID: modelID, + Author: author, + Downloads: g.rand.Intn(1000000) + 1000, + LastModified: g.randomDate(), + Files: files, + PreferredModelFile: preferredModelFile, + ReadmeFile: readmeFile, + ReadmeContent: readmeContent, + ReadmeContentPreview: truncateString(readmeContent, 200), + QuantizationPreferences: []string{"Q4_K_M", "Q4_K_S", "Q3_K_M", "Q2_K"}, + ProcessingError: "", + } +} + +// Helper methods for synthetic data generation +func (g *SyntheticDataGenerator) randomString(length int) string { + const charset = "abcdefghijklmnopqrstuvwxyz0123456789" + b := make([]byte, length) + for i := range b { + b[i] = charset[g.rand.Intn(len(charset))] + } + return string(b) +} + +func (g *SyntheticDataGenerator) randomSHA256() string { + const charset = "0123456789abcdef" + b := make([]byte, 64) + for i := range b { + b[i] = charset[g.rand.Intn(len(charset))] + } + return string(b) +} + +func (g *SyntheticDataGenerator) randomDate() string { + now := time.Now() + daysAgo := g.rand.Intn(365) // Random date within last year + pastDate := now.AddDate(0, 0, -daysAgo) + return pastDate.Format("2006-01-02T15:04:05.000Z") +} + +func (g *SyntheticDataGenerator) generateReadmeContent(modelName, author string) string { + templates := []string{ + fmt.Sprintf("# %s Model\n\nThis is a %s model developed by %s. It's designed for various natural language processing tasks including text generation, question answering, and conversation.\n\n## Features\n\n- High-quality text generation\n- Efficient inference\n- Multiple quantization options\n- Easy to use with LocalAI\n\n## Usage\n\nUse this model with LocalAI for various AI tasks.", strings.Title(modelName), modelName, author), + fmt.Sprintf("# %s\n\nA powerful language model from %s. This model excels at understanding and generating human-like text across multiple domains.\n\n## Capabilities\n\n- Text completion\n- Code generation\n- Creative writing\n- Technical documentation\n\n## Model Details\n\n- Architecture: Transformer-based\n- Training: Large-scale supervised learning\n- Quantization: Available in multiple formats", strings.Title(modelName), author), + fmt.Sprintf("# %s Language Model\n\nDeveloped by %s, this model represents state-of-the-art performance in natural language understanding and generation.\n\n## Key Features\n\n- Multilingual support\n- Context-aware responses\n- Efficient memory usage\n- Fast inference speed\n\n## Applications\n\n- Chatbots and virtual assistants\n- Content generation\n- Code completion\n- Educational tools", strings.Title(modelName), author), + } + + return templates[g.rand.Intn(len(templates))] +}