anubis/yeetfile.js
Jason Cameron e0781e4560
feat: add robots2policy CLI to convert robots.txt to Anubis CEL (#657)
* feat: add robots2policy CLI utility to convert robots.txt to Anubis challenge policies

* feat: add documentation for robots2policy CLI tool

* feat: implement crawl delay handling as weight adjustment in Anubis rules

* feat: add various robots.txt and YAML configurations for user agent handling and crawl delays

* test: add comprehensive tests for robots2policy conversion and parsing

* fix: update example URL in usage instructions for robots2policy CLI

* Update metadata

check-spelling run (pull_request) for json/robots2policycli

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>

* docs: add crawl delay weight adjustment and deny user agents option to robots2policy CLI

* Update cmd/robots2policy/main.go

Co-authored-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com>

* Update cmd/robots2policy/main.go

Co-authored-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com>

* fix(robots2policy): use sigs.k8s.io/yaml

Signed-off-by: Xe Iaso <me@xeiaso.net>

* feat(config): properly marshal bot policy rules

Signed-off-by: Xe Iaso <me@xeiaso.net>

* chore(yeetfile): expose robots2policy in libexec

Signed-off-by: Xe Iaso <me@xeiaso.net>

* fix(yeetfile): put robots2policy in $PATH

Signed-off-by: Xe Iaso <me@xeiaso.net>

* Update metadata

check-spelling run (pull_request) for json/robots2policycli

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>

* style: reorder imports

* refactor: use preexisting structs in config

* fix: correct flag check in main function

* fix: reorder fields in AnubisRule struct for better alignment

* style: improve alignment of struct fields in AnubisRule and OGTagCache

* Update metadata

check-spelling run (pull_request) for json/robots2policycli

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>

* fix: add validation for generated Anubis rules from robots.txt

* feat: add batch processing for robots.txt files to generate Anubis CEL policies

* fix: improve usage message and error handling for input file requirement

* refactor: update AnubisRule structure to use ExpressionOrList for improved expression handling

* refactor: reorganize policy definitions in YAML files for consistency and clarity

* fix: correct indentation in blacklist and complex YAML files for consistency

* test: enhance output comparison in robots2policy tests for YAML and JSON formats

* Revert "fix: improve usage message and error handling for input file requirement"

This reverts commit ddcde1f2a326545d3ef2ec32e5e03f55f4f931a8.

* fix: improve usage message and error handling in robots2policy

Signed-off-by: Jason Cameron <git@jasoncameron.dev>

---------

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com>
Signed-off-by: Xe Iaso <me@xeiaso.net>
Signed-off-by: Jason Cameron <git@jasoncameron.dev>
Co-authored-by: Xe Iaso <me@xeiaso.net>
2025-06-14 23:41:00 -04:00

87 lines
3.2 KiB
JavaScript

$`npm run assets`;
[
"amd64",
"arm64",
"ppc64le",
"riscv64",
].forEach(goarch => {
[deb, rpm, tarball].forEach(method => method.build({
name: "anubis",
description: "Anubis weighs the souls of incoming HTTP requests and uses a sha256 proof-of-work challenge in order to protect upstream resources from scraper bots.",
homepage: "https://anubis.techaro.lol",
license: "MIT",
goarch,
documentation: {
"./README.md": "README.md",
"./LICENSE": "LICENSE",
"./data/botPolicies.json": "botPolicies.json",
"./data/botPolicies.yaml": "botPolicies.yaml",
},
build: ({ bin, etc, systemd, doc }) => {
$`go build -o ${bin}/anubis -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/anubis`;
$`go build -o ${bin}/anubis-robots2policy -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/robots2policy`;
file.install("./run/anubis@.service", `${systemd}/anubis@.service`);
file.install("./run/default.env", `${etc}/default.env`);
$`mkdir -p ${doc}/docs`
$`cp -a docs/docs ${doc}`;
$`find ${doc} -name _category_.json -delete`;
$`mkdir -p ${doc}/data`;
$`cp -a data/apps ${doc}/data/apps`;
$`cp -a data/bots ${doc}/data/bots`;
$`cp -a data/clients ${doc}/data/clients`;
$`cp -a data/common ${doc}/data/common`;
$`cp -a data/crawlers ${doc}/data/crawlers`;
$`cp -a data/meta ${doc}/data/meta`;
},
}));
});
// NOTE(Xe): Fixes #217. This is a "half baked" tarball that includes the harder
// parts for deterministic distros already done. Distributions like NixOS, Gentoo
// and *BSD ports have a difficult time fitting the square peg of their dependency
// model into the bazaar of round holes that various modern languages use. Needless
// to say, this makes adoption easier.
tarball.build({
name: "anubis-src-vendor",
license: "MIT",
// XXX(Xe): This is needed otherwise go will be very sad.
platform: yeet.goos,
goarch: yeet.goarch,
build: ({ out }) => {
// prepare clean checkout in $out
$`git archive --format=tar HEAD | tar xC ${out}`;
// vendor Go dependencies
$`cd ${out} && go mod vendor`;
// write VERSION file
$`echo ${git.tag()} > ${out}/VERSION`;
},
mkFilename: ({ name, version }) => `${name}-${version}`,
});
tarball.build({
name: "anubis-src-vendor-npm",
license: "MIT",
// XXX(Xe): This is needed otherwise go will be very sad.
platform: yeet.goos,
goarch: yeet.goarch,
build: ({ out }) => {
// prepare clean checkout in $out
$`git archive --format=tar HEAD | tar xC ${out}`;
// vendor Go dependencies
$`cd ${out} && go mod vendor`;
// build NPM-bound dependencies
$`cd ${out} && npm ci && npm run assets && rm -rf node_modules`
// write VERSION file
$`echo ${git.tag()} > ${out}/VERSION`;
},
mkFilename: ({ name, version }) => `${name}-${version}`,
});