mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 09:48:08 -04:00

* feat: add robots2policy CLI utility to convert robots.txt to Anubis challenge policies * feat: add documentation for robots2policy CLI tool * feat: implement crawl delay handling as weight adjustment in Anubis rules * feat: add various robots.txt and YAML configurations for user agent handling and crawl delays * test: add comprehensive tests for robots2policy conversion and parsing * fix: update example URL in usage instructions for robots2policy CLI * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * docs: add crawl delay weight adjustment and deny user agents option to robots2policy CLI * Update cmd/robots2policy/main.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * Update cmd/robots2policy/main.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * fix(robots2policy): use sigs.k8s.io/yaml Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(config): properly marshal bot policy rules Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(yeetfile): expose robots2policy in libexec Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(yeetfile): put robots2policy in $PATH Signed-off-by: Xe Iaso <me@xeiaso.net> * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * style: reorder imports * refactor: use preexisting structs in config * fix: correct flag check in main function * fix: reorder fields in AnubisRule struct for better alignment * style: improve alignment of struct fields in AnubisRule and OGTagCache * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * fix: add validation for generated Anubis rules from robots.txt * feat: add batch processing for robots.txt files to generate Anubis CEL policies * fix: improve usage message and error handling for input file requirement * refactor: update AnubisRule structure to use ExpressionOrList for improved expression handling * refactor: reorganize policy definitions in YAML files for consistency and clarity * fix: correct indentation in blacklist and complex YAML files for consistency * test: enhance output comparison in robots2policy tests for YAML and JSON formats * Revert "fix: improve usage message and error handling for input file requirement" This reverts commit ddcde1f2a326545d3ef2ec32e5e03f55f4f931a8. * fix: improve usage message and error handling in robots2policy Signed-off-by: Jason Cameron <git@jasoncameron.dev> --------- Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> Signed-off-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <git@jasoncameron.dev> Co-authored-by: Xe Iaso <me@xeiaso.net>
87 lines
3.2 KiB
JavaScript
87 lines
3.2 KiB
JavaScript
$`npm run assets`;
|
|
|
|
[
|
|
"amd64",
|
|
"arm64",
|
|
"ppc64le",
|
|
"riscv64",
|
|
].forEach(goarch => {
|
|
[deb, rpm, tarball].forEach(method => method.build({
|
|
name: "anubis",
|
|
description: "Anubis weighs the souls of incoming HTTP requests and uses a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots.",
|
|
homepage: "https://anubis.techaro.lol",
|
|
license: "MIT",
|
|
goarch,
|
|
|
|
documentation: {
|
|
"./README.md": "README.md",
|
|
"./LICENSE": "LICENSE",
|
|
"./data/botPolicies.json": "botPolicies.json",
|
|
"./data/botPolicies.yaml": "botPolicies.yaml",
|
|
},
|
|
|
|
build: ({ bin, etc, systemd, doc }) => {
|
|
$`go build -o ${bin}/anubis -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/anubis`;
|
|
$`go build -o ${bin}/anubis-robots2policy -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/robots2policy`;
|
|
|
|
file.install("./run/anubis@.service", `${systemd}/anubis@.service`);
|
|
file.install("./run/default.env", `${etc}/default.env`);
|
|
|
|
$`mkdir -p ${doc}/docs`
|
|
$`cp -a docs/docs ${doc}`;
|
|
$`find ${doc} -name _category_.json -delete`;
|
|
$`mkdir -p ${doc}/data`;
|
|
$`cp -a data/apps ${doc}/data/apps`;
|
|
$`cp -a data/bots ${doc}/data/bots`;
|
|
$`cp -a data/clients ${doc}/data/clients`;
|
|
$`cp -a data/common ${doc}/data/common`;
|
|
$`cp -a data/crawlers ${doc}/data/crawlers`;
|
|
$`cp -a data/meta ${doc}/data/meta`;
|
|
},
|
|
}));
|
|
});
|
|
|
|
// NOTE(Xe): Fixes #217. This is a "half baked" tarball that includes the harder
|
|
// parts for deterministic distros already done. Distributions like NixOS, Gentoo
|
|
// and *BSD ports have a difficult time fitting the square peg of their dependency
|
|
// model into the bazaar of round holes that various modern languages use. Needless
|
|
// to say, this makes adoption easier.
|
|
tarball.build({
|
|
name: "anubis-src-vendor",
|
|
license: "MIT",
|
|
// XXX(Xe): This is needed otherwise go will be very sad.
|
|
platform: yeet.goos,
|
|
goarch: yeet.goarch,
|
|
|
|
build: ({ out }) => {
|
|
// prepare clean checkout in $out
|
|
$`git archive --format=tar HEAD | tar xC ${out}`;
|
|
// vendor Go dependencies
|
|
$`cd ${out} && go mod vendor`;
|
|
// write VERSION file
|
|
$`echo ${git.tag()} > ${out}/VERSION`;
|
|
},
|
|
|
|
mkFilename: ({ name, version }) => `${name}-${version}`,
|
|
});
|
|
|
|
tarball.build({
|
|
name: "anubis-src-vendor-npm",
|
|
license: "MIT",
|
|
// XXX(Xe): This is needed otherwise go will be very sad.
|
|
platform: yeet.goos,
|
|
goarch: yeet.goarch,
|
|
|
|
build: ({ out }) => {
|
|
// prepare clean checkout in $out
|
|
$`git archive --format=tar HEAD | tar xC ${out}`;
|
|
// vendor Go dependencies
|
|
$`cd ${out} && go mod vendor`;
|
|
// build NPM-bound dependencies
|
|
$`cd ${out} && npm ci && npm run assets && rm -rf node_modules`
|
|
// write VERSION file
|
|
$`echo ${git.tag()} > ${out}/VERSION`;
|
|
},
|
|
|
|
mkFilename: ({ name, version }) => `${name}-${version}`,
|
|
}); |