mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-02-09 05:19:24 -06:00
Merge branch 'main' into json/requireED25519
Signed-off-by: Jason Cameron <git@jasoncameron.dev>
This commit is contained in:
6
.github/workflows/docker-pr.yml
vendored
6
.github/workflows/docker-pr.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
with:
|
||||
images: ghcr.io/${{ github.repository }}
|
||||
|
||||
|
||||
10
.github/workflows/docker.yml
vendored
10
.github/workflows/docker.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
with:
|
||||
images: ${{ env.IMAGE }}
|
||||
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
SLOG_LEVEL: debug
|
||||
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@e8998f949152b193b063cb0ec769d69d929409be # v2.4.0
|
||||
uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0
|
||||
with:
|
||||
subject-name: ${{ env.IMAGE }}
|
||||
subject-digest: ${{ steps.build.outputs.digest }}
|
||||
|
||||
10
.github/workflows/docs-deploy.yml
vendored
10
.github/workflows/docs-deploy.yml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: techarohq
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
@@ -53,14 +53,14 @@ jobs:
|
||||
push: true
|
||||
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
||||
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
args: apply -k docs/manifest
|
||||
|
||||
- name: Apply k8s manifests to limsa lominsa
|
||||
uses: actions-hub/kubectl@b5b19eeb6a0ffde16637e398f8b96ef01eb8fdb7 # v1.33.3
|
||||
uses: actions-hub/kubectl@af345ed727f0268738e65be48422e463cc67c220 # v1.34.0
|
||||
env:
|
||||
KUBE_CONFIG: ${{ secrets.LIMSA_LOMINSA_KUBECONFIG }}
|
||||
with:
|
||||
|
||||
4
.github/workflows/docs-test.yml
vendored
4
.github/workflows/docs-test.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -22,7 +22,7 @@ jobs:
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5.7.0
|
||||
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5.8.0
|
||||
with:
|
||||
images: ghcr.io/techarohq/anubis/docs
|
||||
tags: |
|
||||
|
||||
8
.github/workflows/go.yml
vendored
8
.github/workflows/go.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
@@ -59,7 +59,7 @@ jobs:
|
||||
${{ runner.os }}-golang-
|
||||
|
||||
- name: Cache playwright binaries
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
id: playwright-cache
|
||||
with:
|
||||
path: |
|
||||
|
||||
6
.github/workflows/package-builds-stable.yml
vendored
6
.github/workflows/package-builds-stable.yml
vendored
@@ -14,7 +14,7 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
|
||||
@@ -15,7 +15,7 @@ jobs:
|
||||
#runs-on: alrest-techarohq
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-tags: true
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
uses: Homebrew/actions/setup-homebrew@main
|
||||
|
||||
- name: Setup Homebrew cellar cache
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
/home/linuxbrew/.linuxbrew/Cellar
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
brew bundle
|
||||
|
||||
- name: Setup Golang caches
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/go-build
|
||||
|
||||
2
.github/workflows/smoke-tests.yml
vendored
2
.github/workflows/smoke-tests.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
4
.github/workflows/ssh-ci-runner-cron.yml
vendored
4
.github/workflows/ssh-ci-runner-cron.yml
vendored
@@ -18,13 +18,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
- name: Log into registry
|
||||
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
|
||||
uses: docker/login-action@184bdaa0721073962dff0199f1fb9940f07167d1 # v3.5.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
|
||||
2
.github/workflows/ssh-ci.yml
vendored
2
.github/workflows/ssh-ci.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
- ci@ppc64le.techaro.lol
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-tags: true
|
||||
fetch-depth: 0
|
||||
|
||||
6
.github/workflows/zizmor.yml
vendored
6
.github/workflows/zizmor.yml
vendored
@@ -16,12 +16,12 @@ jobs:
|
||||
security-events: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6.4.3
|
||||
uses: astral-sh/setup-uv@4959332f0f014c5280e7eac8b70c90cb574c9f9b # v6.6.0
|
||||
|
||||
- name: Run zizmor 🌈
|
||||
run: uvx zizmor --format sarif . > results.sarif
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Upload SARIF file
|
||||
uses: github/codeql-action/upload-sarif@4e828ff8d448a8a6e532957b1811f387a63867e8 # v3.29.4
|
||||
uses: github/codeql-action/upload-sarif@3c3833e0f8c1c83d449a7478aa59c036a9165498 # v3.29.11
|
||||
with:
|
||||
sarif_file: results.sarif
|
||||
category: zizmor
|
||||
|
||||
@@ -29,7 +29,7 @@ var (
|
||||
)
|
||||
|
||||
type RobotsRule struct {
|
||||
UserAgent string
|
||||
UserAgents []string
|
||||
Disallows []string
|
||||
Allows []string
|
||||
CrawlDelay int
|
||||
@@ -130,10 +130,26 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
func createRuleFromAccumulated(userAgents, disallows, allows []string, crawlDelay int) RobotsRule {
|
||||
rule := RobotsRule{
|
||||
UserAgents: make([]string, len(userAgents)),
|
||||
Disallows: make([]string, len(disallows)),
|
||||
Allows: make([]string, len(allows)),
|
||||
CrawlDelay: crawlDelay,
|
||||
}
|
||||
copy(rule.UserAgents, userAgents)
|
||||
copy(rule.Disallows, disallows)
|
||||
copy(rule.Allows, allows)
|
||||
return rule
|
||||
}
|
||||
|
||||
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
scanner := bufio.NewScanner(input)
|
||||
var rules []RobotsRule
|
||||
var currentRule *RobotsRule
|
||||
var currentUserAgents []string
|
||||
var currentDisallows []string
|
||||
var currentAllows []string
|
||||
var currentCrawlDelay int
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
@@ -154,38 +170,42 @@ func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||
|
||||
switch directive {
|
||||
case "user-agent":
|
||||
// Start a new rule section
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
}
|
||||
currentRule = &RobotsRule{
|
||||
UserAgent: value,
|
||||
Disallows: make([]string, 0),
|
||||
Allows: make([]string, 0),
|
||||
// If we have accumulated rules with directives and encounter a new user-agent,
|
||||
// flush the current rules
|
||||
if len(currentUserAgents) > 0 && (len(currentDisallows) > 0 || len(currentAllows) > 0 || currentCrawlDelay > 0) {
|
||||
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||
rules = append(rules, rule)
|
||||
// Reset for next group
|
||||
currentUserAgents = nil
|
||||
currentDisallows = nil
|
||||
currentAllows = nil
|
||||
currentCrawlDelay = 0
|
||||
}
|
||||
currentUserAgents = append(currentUserAgents, value)
|
||||
|
||||
case "disallow":
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Disallows = append(currentRule.Disallows, value)
|
||||
if len(currentUserAgents) > 0 && value != "" {
|
||||
currentDisallows = append(currentDisallows, value)
|
||||
}
|
||||
|
||||
case "allow":
|
||||
if currentRule != nil && value != "" {
|
||||
currentRule.Allows = append(currentRule.Allows, value)
|
||||
if len(currentUserAgents) > 0 && value != "" {
|
||||
currentAllows = append(currentAllows, value)
|
||||
}
|
||||
|
||||
case "crawl-delay":
|
||||
if currentRule != nil {
|
||||
if len(currentUserAgents) > 0 {
|
||||
if delay, err := parseIntSafe(value); err == nil {
|
||||
currentRule.CrawlDelay = delay
|
||||
currentCrawlDelay = delay
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last rule
|
||||
if currentRule != nil {
|
||||
rules = append(rules, *currentRule)
|
||||
// Don't forget the last group of rules
|
||||
if len(currentUserAgents) > 0 {
|
||||
rule := createRuleFromAccumulated(currentUserAgents, currentDisallows, currentAllows, currentCrawlDelay)
|
||||
rules = append(rules, rule)
|
||||
}
|
||||
|
||||
// Mark blacklisted user agents (those with "Disallow: /")
|
||||
@@ -211,10 +231,11 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
var anubisRules []AnubisRule
|
||||
ruleCounter := 0
|
||||
|
||||
// Process each robots rule individually
|
||||
for _, robotsRule := range robotsRules {
|
||||
userAgent := robotsRule.UserAgent
|
||||
userAgents := robotsRule.UserAgents
|
||||
|
||||
// Handle crawl delay as weight adjustment (do this first before any continues)
|
||||
// Handle crawl delay
|
||||
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
@@ -223,20 +244,32 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||
}
|
||||
|
||||
if userAgent == "*" {
|
||||
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
} else if len(userAgents) == 1 {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgents[0])},
|
||||
}
|
||||
} else {
|
||||
// Multiple user agents - use any block
|
||||
var expressions []string
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
expressions = append(expressions, "true")
|
||||
} else {
|
||||
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||
}
|
||||
}
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
Any: expressions,
|
||||
}
|
||||
}
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
// Handle blacklisted user agents (complete deny/challenge)
|
||||
// Handle blacklisted user agents
|
||||
if robotsRule.IsBlacklist {
|
||||
ruleCounter++
|
||||
rule := AnubisRule{
|
||||
@@ -244,21 +277,36 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
Action: *userAgentDeny,
|
||||
}
|
||||
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
if len(userAgents) == 1 {
|
||||
userAgent := userAgents[0]
|
||||
if userAgent == "*" {
|
||||
// This would block everything - convert to a weight adjustment instead
|
||||
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||
rule.Action = "WEIGH"
|
||||
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{"true"}, // Always applies
|
||||
}
|
||||
} else {
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Multiple user agents - use any block
|
||||
var expressions []string
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
expressions = append(expressions, "true")
|
||||
} else {
|
||||
expressions = append(expressions, fmt.Sprintf("userAgent.contains(%q)", ua))
|
||||
}
|
||||
}
|
||||
rule.Expression = &config.ExpressionOrList{
|
||||
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||
Any: expressions,
|
||||
}
|
||||
}
|
||||
anubisRules = append(anubisRules, rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle specific disallow rules
|
||||
@@ -276,9 +324,33 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
// Build CEL expression
|
||||
var conditions []string
|
||||
|
||||
// Add user agent condition if not wildcard
|
||||
if userAgent != "*" {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
|
||||
// Add user agent conditions
|
||||
if len(userAgents) == 1 && userAgents[0] == "*" {
|
||||
// Wildcard user agent - no user agent condition needed
|
||||
} else if len(userAgents) == 1 {
|
||||
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgents[0]))
|
||||
} else {
|
||||
// For multiple user agents, we need to use a more complex expression
|
||||
// This is a limitation - we can't easily combine any for user agents with all for path
|
||||
// So we'll create separate rules for each user agent
|
||||
for _, ua := range userAgents {
|
||||
if ua == "*" {
|
||||
continue // Skip wildcard as it's handled separately
|
||||
}
|
||||
ruleCounter++
|
||||
subRule := AnubisRule{
|
||||
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||
Action: *baseAction,
|
||||
Expression: &config.ExpressionOrList{
|
||||
All: []string{
|
||||
fmt.Sprintf("userAgent.contains(%q)", ua),
|
||||
buildPathCondition(disallow),
|
||||
},
|
||||
},
|
||||
}
|
||||
anubisRules = append(anubisRules, subRule)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Add path condition
|
||||
@@ -291,7 +363,6 @@ func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||
|
||||
anubisRules = append(anubisRules, rule)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return anubisRules
|
||||
|
||||
@@ -78,6 +78,12 @@ func TestDataFileConversion(t *testing.T) {
|
||||
expectedFile: "complex.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||
},
|
||||
{
|
||||
name: "consecutive_user_agents",
|
||||
robotsFile: "consecutive.robots.txt",
|
||||
expectedFile: "consecutive.yaml",
|
||||
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
||||
6
cmd/robots2policy/testdata/blacklist.yaml
vendored
6
cmd/robots2policy/testdata/blacklist.yaml
vendored
@@ -25,6 +25,6 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
|
||||
24
cmd/robots2policy/testdata/complex.yaml
vendored
24
cmd/robots2policy/testdata/complex.yaml
vendored
@@ -20,8 +20,8 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
- userAgent.contains("Googlebot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-6
|
||||
- action: WEIGH
|
||||
expression: userAgent.contains("Bingbot")
|
||||
@@ -31,14 +31,14 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/search/")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
- userAgent.contains("Bingbot")
|
||||
- path.startsWith("/admin/")
|
||||
name: robots-txt-policy-disallow-9
|
||||
- action: DENY
|
||||
expression: userAgent.contains("BadBot")
|
||||
@@ -54,18 +54,18 @@
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/.*/admin")
|
||||
name: robots-txt-policy-disallow-13
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/temp.*\\.html")
|
||||
name: robots-txt-policy-disallow-14
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
- userAgent.contains("TestBot")
|
||||
- path.matches("^/file.\\.log")
|
||||
name: robots-txt-policy-disallow-15
|
||||
|
||||
25
cmd/robots2policy/testdata/consecutive.robots.txt
vendored
Normal file
25
cmd/robots2policy/testdata/consecutive.robots.txt
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
# Test consecutive user agents that should be grouped into any: blocks
|
||||
User-agent: *
|
||||
Disallow: /admin
|
||||
Crawl-delay: 10
|
||||
|
||||
# Multiple consecutive user agents - should be grouped
|
||||
User-agent: BadBot
|
||||
User-agent: SpamBot
|
||||
User-agent: EvilBot
|
||||
Disallow: /
|
||||
|
||||
# Single user agent - should be separate
|
||||
User-agent: GoodBot
|
||||
Disallow: /private
|
||||
|
||||
# Multiple consecutive user agents with crawl delay
|
||||
User-agent: SlowBot1
|
||||
User-agent: SlowBot2
|
||||
Crawl-delay: 5
|
||||
|
||||
# Multiple consecutive user agents with specific path
|
||||
User-agent: SearchBot1
|
||||
User-agent: SearchBot2
|
||||
User-agent: SearchBot3
|
||||
Disallow: /search
|
||||
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
Normal file
47
cmd/robots2policy/testdata/consecutive.yaml
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
- action: WEIGH
|
||||
expression: "true"
|
||||
name: robots-txt-policy-crawl-delay-1
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression: path.startsWith("/admin")
|
||||
name: robots-txt-policy-disallow-2
|
||||
- action: DENY
|
||||
expression:
|
||||
any:
|
||||
- userAgent.contains("BadBot")
|
||||
- userAgent.contains("SpamBot")
|
||||
- userAgent.contains("EvilBot")
|
||||
name: robots-txt-policy-blacklist-3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("GoodBot")
|
||||
- path.startsWith("/private")
|
||||
name: robots-txt-policy-disallow-4
|
||||
- action: WEIGH
|
||||
expression:
|
||||
any:
|
||||
- userAgent.contains("SlowBot1")
|
||||
- userAgent.contains("SlowBot2")
|
||||
name: robots-txt-policy-crawl-delay-5
|
||||
weight:
|
||||
adjust: 3
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot1")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-7
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot2")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-8
|
||||
- action: CHALLENGE
|
||||
expression:
|
||||
all:
|
||||
- userAgent.contains("SearchBot3")
|
||||
- path.startsWith("/search")
|
||||
name: robots-txt-policy-disallow-9
|
||||
8
cmd/robots2policy/testdata/simple.json
vendored
8
cmd/robots2policy/testdata/simple.json
vendored
@@ -1,12 +1,12 @@
|
||||
[
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/admin/\")",
|
||||
"name": "robots-txt-policy-disallow-1"
|
||||
"name": "robots-txt-policy-disallow-1",
|
||||
"action": "CHALLENGE"
|
||||
},
|
||||
{
|
||||
"action": "CHALLENGE",
|
||||
"expression": "path.startsWith(\"/private\")",
|
||||
"name": "robots-txt-policy-disallow-2"
|
||||
"name": "robots-txt-policy-disallow-2",
|
||||
"action": "CHALLENGE"
|
||||
}
|
||||
]
|
||||
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Document missing environment variables in installation guide: `SLOG_LEVEL`, `COOKIE_PREFIX`, `FORCED_LANGUAGE`, and `TARGET_DISABLE_KEEPALIVE` ([#1086](https://github.com/TecharoHQ/anubis/pull/1086))
|
||||
- Add validation warning when persistent storage is used without setting signing keys
|
||||
- Fixed `robots2policy` to properly group consecutive user agents into `any:` instead of only processing the last one ([#925](https://github.com/TecharoHQ/anubis/pull/925))
|
||||
|
||||
<!-- This changes the project to: -->
|
||||
|
||||
|
||||
Reference in New Issue
Block a user