mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-12-20 02:59:39 -06:00
* feat: first implementation of honeypot logic This is a bit of an experiment, stick with me. The core idea here is that badly written crawlers are that: badly written. They look for anything that contains `<a href="whatever" />` tags and will blindly use those values to recurse. This takes advantage of that by hiding a link in a `<script>` tag like this: ```html <script type="ignore"><a href="/bots-only">Don't click</a></script> ``` Browsers will ignore it because they have no handler for the "ignore" script type. This current draft is very unoptimized (it takes like 7 seconds to generate a page on my tower), however switching spintax libraries will make this much faster. The hope is to make this pluggable with WebAssembly such that we force administrators to choose a storage method. First we crawl before we walk. The AI involvement in this commit is limited to the spintax in affirmations.txt, spintext.txt, and titles.txt. This generates a bunch of "pseudoprofound bullshit" like the following: > This Restoration to Balance & Alignment > > There's a moment when creators are being called to realize that the work > can't be reduced to results, but about energy. We don't innovate products > by pushing harder, we do it by holding the vision. Because momentum can't > be forced, it unfolds over time when culture are moving in the same > direction. We're being invited into a paradigm shift in how we think > about innovation. [...] This is intended to "look" like normal article text. As this is a first draft, this sucks and will be improved upon. Assisted-by: GLM 4.6, ChatGPT, GPT-OSS 120b Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(honeypot/naive): optimize hilariously Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(honeypot/naive): attempt to automatically filter out based on crawling Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(lib): use mazeGen instead of bsGen Signed-off-by: Xe Iaso <me@xeiaso.net> * docs: add honeypot docs Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(test): go mod tidy Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: fix spelling metadata Signed-off-by: Xe Iaso <me@xeiaso.net> * chore: spelling Signed-off-by: Xe Iaso <me@xeiaso.net> --------- Signed-off-by: Xe Iaso <me@xeiaso.net>
56 lines
1.2 KiB
Go
56 lines
1.2 KiB
Go
// Package checker defines the Checker interface and a helper utility to avoid import cycles.
|
|
package checker
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
|
|
"github.com/TecharoHQ/anubis/internal"
|
|
)
|
|
|
|
type Impl interface {
|
|
Check(*http.Request) (bool, error)
|
|
Hash() string
|
|
}
|
|
|
|
type Func func(*http.Request) (bool, error)
|
|
|
|
func (f Func) Check(r *http.Request) (bool, error) {
|
|
return f(r)
|
|
}
|
|
|
|
func (f Func) Hash() string { return internal.FastHash(fmt.Sprintf("%#v", f)) }
|
|
|
|
type List []Impl
|
|
|
|
// Check runs each checker in the list against the request.
|
|
// It returns true only if *all* checkers return true (AND semantics).
|
|
// If any checker returns an error, the function returns false and the error.
|
|
func (l List) Check(r *http.Request) (bool, error) {
|
|
for _, c := range l {
|
|
ok, err := c.Check(r)
|
|
if err != nil {
|
|
// Propagate the error; overall result is false.
|
|
return false, err
|
|
}
|
|
if !ok {
|
|
// One false means the combined result is false. Short-circuit
|
|
// so we don't waste time.
|
|
return false, err
|
|
}
|
|
}
|
|
// Assume success until a checker says otherwise.
|
|
return true, nil
|
|
}
|
|
|
|
func (l List) Hash() string {
|
|
var sb strings.Builder
|
|
|
|
for _, c := range l {
|
|
fmt.Fprintln(&sb, c.Hash())
|
|
}
|
|
|
|
return internal.FastHash(sb.String())
|
|
}
|