mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 09:48:08 -04:00
feat: add robots2policy CLI to convert robots.txt to Anubis CEL (#657)
* feat: add robots2policy CLI utility to convert robots.txt to Anubis challenge policies * feat: add documentation for robots2policy CLI tool * feat: implement crawl delay handling as weight adjustment in Anubis rules * feat: add various robots.txt and YAML configurations for user agent handling and crawl delays * test: add comprehensive tests for robots2policy conversion and parsing * fix: update example URL in usage instructions for robots2policy CLI * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * docs: add crawl delay weight adjustment and deny user agents option to robots2policy CLI * Update cmd/robots2policy/main.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * Update cmd/robots2policy/main.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * fix(robots2policy): use sigs.k8s.io/yaml Signed-off-by: Xe Iaso <me@xeiaso.net> * feat(config): properly marshal bot policy rules Signed-off-by: Xe Iaso <me@xeiaso.net> * chore(yeetfile): expose robots2policy in libexec Signed-off-by: Xe Iaso <me@xeiaso.net> * fix(yeetfile): put robots2policy in $PATH Signed-off-by: Xe Iaso <me@xeiaso.net> * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * style: reorder imports * refactor: use preexisting structs in config * fix: correct flag check in main function * fix: reorder fields in AnubisRule struct for better alignment * style: improve alignment of struct fields in AnubisRule and OGTagCache * Update metadata check-spelling run (pull_request) for json/robots2policycli Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev> * fix: add validation for generated Anubis rules from robots.txt * feat: add batch processing for robots.txt files to generate Anubis CEL policies * fix: improve usage message and error handling for input file requirement * refactor: update AnubisRule structure to use ExpressionOrList for improved expression handling * refactor: reorganize policy definitions in YAML files for consistency and clarity * fix: correct indentation in blacklist and complex YAML files for consistency * test: enhance output comparison in robots2policy tests for YAML and JSON formats * Revert "fix: improve usage message and error handling for input file requirement" This reverts commit ddcde1f2a326545d3ef2ec32e5e03f55f4f931a8. * fix: improve usage message and error handling in robots2policy Signed-off-by: Jason Cameron <git@jasoncameron.dev> --------- Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> Signed-off-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <git@jasoncameron.dev> Co-authored-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
parent
7a195f1595
commit
e0781e4560
4
.github/actions/spelling/expect.txt
vendored
4
.github/actions/spelling/expect.txt
vendored
@ -12,6 +12,7 @@ archlinux
|
|||||||
badregexes
|
badregexes
|
||||||
bdba
|
bdba
|
||||||
berr
|
berr
|
||||||
|
betteralign
|
||||||
bingbot
|
bingbot
|
||||||
bitcoin
|
bitcoin
|
||||||
blogging
|
blogging
|
||||||
@ -96,6 +97,7 @@ gomod
|
|||||||
goodbot
|
goodbot
|
||||||
googlebot
|
googlebot
|
||||||
govulncheck
|
govulncheck
|
||||||
|
goyaml
|
||||||
GPG
|
GPG
|
||||||
GPT
|
GPT
|
||||||
gptbot
|
gptbot
|
||||||
@ -162,6 +164,7 @@ mojeekbot
|
|||||||
mozilla
|
mozilla
|
||||||
nbf
|
nbf
|
||||||
netsurf
|
netsurf
|
||||||
|
NFlag
|
||||||
nginx
|
nginx
|
||||||
nobots
|
nobots
|
||||||
NONINFRINGEMENT
|
NONINFRINGEMENT
|
||||||
@ -217,6 +220,7 @@ sebest
|
|||||||
secretplans
|
secretplans
|
||||||
selfsigned
|
selfsigned
|
||||||
Semrush
|
Semrush
|
||||||
|
Seo
|
||||||
setsebool
|
setsebool
|
||||||
shellcheck
|
shellcheck
|
||||||
Sidetrade
|
Sidetrade
|
||||||
|
78
cmd/robots2policy/batch/batch_process.go
Normal file
78
cmd/robots2policy/batch/batch_process.go
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
Batch process robots.txt files from archives like https://github.com/nrjones8/robots-dot-txt-archive-bot/tree/master/data/cleaned
|
||||||
|
into Anubis CEL policies. Usage: go run batch_process.go <directory with robots.txt files>
|
||||||
|
*/
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 2 {
|
||||||
|
fmt.Println("Usage: go run batch_process.go <cleaned_directory>")
|
||||||
|
fmt.Println("Example: go run batch_process.go ./cleaned")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanedDir := os.Args[1]
|
||||||
|
outputDir := "generated_policies"
|
||||||
|
|
||||||
|
// Create output directory
|
||||||
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||||
|
log.Fatalf("Failed to create output directory: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
err := filepath.WalkDir(cleanedDir, func(path string, d fs.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip directories
|
||||||
|
if d.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate policy name from file path
|
||||||
|
relPath, _ := filepath.Rel(cleanedDir, path)
|
||||||
|
policyName := strings.ReplaceAll(relPath, "/", "-")
|
||||||
|
policyName = strings.TrimSuffix(policyName, "-robots.txt")
|
||||||
|
policyName = strings.ReplaceAll(policyName, ".", "-")
|
||||||
|
|
||||||
|
outputFile := filepath.Join(outputDir, policyName+".yaml")
|
||||||
|
|
||||||
|
cmd := exec.Command("go", "run", "main.go",
|
||||||
|
"-input", path,
|
||||||
|
"-output", outputFile,
|
||||||
|
"-name", policyName,
|
||||||
|
"-format", "yaml")
|
||||||
|
|
||||||
|
if err := cmd.Run(); err != nil {
|
||||||
|
fmt.Printf("Warning: Failed to process %s: %v\n", path, err)
|
||||||
|
return nil // Continue processing other files
|
||||||
|
}
|
||||||
|
|
||||||
|
count++
|
||||||
|
if count%100 == 0 {
|
||||||
|
fmt.Printf("Processed %d files...\n", count)
|
||||||
|
} else if count%10 == 0 {
|
||||||
|
fmt.Print(".")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error walking directory: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Successfully processed %d robots.txt files\n", count)
|
||||||
|
fmt.Printf("Generated policies saved to: %s/\n", outputDir)
|
||||||
|
}
|
313
cmd/robots2policy/main.go
Normal file
313
cmd/robots2policy/main.go
Normal file
@ -0,0 +1,313 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||||
|
|
||||||
|
"sigs.k8s.io/yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
inputFile = flag.String("input", "", "path to robots.txt file (use - for stdin)")
|
||||||
|
outputFile = flag.String("output", "", "output file path (use - for stdout, defaults to stdout)")
|
||||||
|
outputFormat = flag.String("format", "yaml", "output format: yaml or json")
|
||||||
|
baseAction = flag.String("action", "CHALLENGE", "default action for disallowed paths: ALLOW, DENY, CHALLENGE, WEIGH")
|
||||||
|
crawlDelay = flag.Int("crawl-delay-weight", 0, "if > 0, add weight adjustment for crawl-delay (difficulty adjustment)")
|
||||||
|
policyName = flag.String("name", "robots-txt-policy", "name for the generated policy")
|
||||||
|
userAgentDeny = flag.String("deny-user-agents", "DENY", "action for specifically blocked user agents: DENY, CHALLENGE")
|
||||||
|
helpFlag = flag.Bool("help", false, "show help")
|
||||||
|
)
|
||||||
|
|
||||||
|
type RobotsRule struct {
|
||||||
|
UserAgent string
|
||||||
|
Disallows []string
|
||||||
|
Allows []string
|
||||||
|
CrawlDelay int
|
||||||
|
IsBlacklist bool // true if this is a specifically denied user agent
|
||||||
|
}
|
||||||
|
|
||||||
|
type AnubisRule struct {
|
||||||
|
Expression *config.ExpressionOrList `yaml:"expression,omitempty" json:"expression,omitempty"`
|
||||||
|
Challenge *config.ChallengeRules `yaml:"challenge,omitempty" json:"challenge,omitempty"`
|
||||||
|
Weight *config.Weight `yaml:"weight,omitempty" json:"weight,omitempty"`
|
||||||
|
Name string `yaml:"name" json:"name"`
|
||||||
|
Action string `yaml:"action" json:"action"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
|
||||||
|
fmt.Fprintf(os.Stderr, "%s [options] -input <robots.txt>\n\n", os.Args[0])
|
||||||
|
flag.PrintDefaults()
|
||||||
|
fmt.Fprintln(os.Stderr, "\nExamples:")
|
||||||
|
fmt.Fprintln(os.Stderr, " # Convert local robots.txt file")
|
||||||
|
fmt.Fprintln(os.Stderr, " robots2policy -input robots.txt -output policy.yaml")
|
||||||
|
fmt.Fprintln(os.Stderr, "")
|
||||||
|
fmt.Fprintln(os.Stderr, " # Convert from URL")
|
||||||
|
fmt.Fprintln(os.Stderr, " robots2policy -input https://example.com/robots.txt -format json")
|
||||||
|
fmt.Fprintln(os.Stderr, "")
|
||||||
|
fmt.Fprintln(os.Stderr, " # Read from stdin, write to stdout")
|
||||||
|
fmt.Fprintln(os.Stderr, " curl https://example.com/robots.txt | robots2policy -input -")
|
||||||
|
os.Exit(2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if len(flag.Args()) > 0 || *helpFlag || *inputFile == "" {
|
||||||
|
flag.Usage()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read robots.txt
|
||||||
|
var input io.Reader
|
||||||
|
if *inputFile == "-" {
|
||||||
|
input = os.Stdin
|
||||||
|
} else if strings.HasPrefix(*inputFile, "http://") || strings.HasPrefix(*inputFile, "https://") {
|
||||||
|
resp, err := http.Get(*inputFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to fetch robots.txt from URL: %v", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
input = resp.Body
|
||||||
|
} else {
|
||||||
|
file, err := os.Open(*inputFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to open input file: %v", err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
input = file
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse robots.txt
|
||||||
|
rules, err := parseRobotsTxt(input)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to Anubis rules
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
// Check if any rules were generated
|
||||||
|
if len(anubisRules) == 0 {
|
||||||
|
log.Fatal("no valid rules generated from robots.txt - file may be empty or contain no disallow directives")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate output
|
||||||
|
var output []byte
|
||||||
|
switch strings.ToLower(*outputFormat) {
|
||||||
|
case "yaml":
|
||||||
|
output, err = yaml.Marshal(anubisRules)
|
||||||
|
case "json":
|
||||||
|
output, err = json.MarshalIndent(anubisRules, "", " ")
|
||||||
|
default:
|
||||||
|
log.Fatalf("unsupported output format: %s (use yaml or json)", *outputFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to marshal output: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write output
|
||||||
|
if *outputFile == "" || *outputFile == "-" {
|
||||||
|
fmt.Print(string(output))
|
||||||
|
} else {
|
||||||
|
err = os.WriteFile(*outputFile, output, 0644)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to write output file: %v", err)
|
||||||
|
}
|
||||||
|
fmt.Printf("Generated Anubis policy written to %s\n", *outputFile)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseRobotsTxt(input io.Reader) ([]RobotsRule, error) {
|
||||||
|
scanner := bufio.NewScanner(input)
|
||||||
|
var rules []RobotsRule
|
||||||
|
var currentRule *RobotsRule
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
|
||||||
|
// Skip empty lines and comments
|
||||||
|
if line == "" || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split on first colon
|
||||||
|
parts := strings.SplitN(line, ":", 2)
|
||||||
|
if len(parts) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
directive := strings.TrimSpace(strings.ToLower(parts[0]))
|
||||||
|
value := strings.TrimSpace(parts[1])
|
||||||
|
|
||||||
|
switch directive {
|
||||||
|
case "user-agent":
|
||||||
|
// Start a new rule section
|
||||||
|
if currentRule != nil {
|
||||||
|
rules = append(rules, *currentRule)
|
||||||
|
}
|
||||||
|
currentRule = &RobotsRule{
|
||||||
|
UserAgent: value,
|
||||||
|
Disallows: make([]string, 0),
|
||||||
|
Allows: make([]string, 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
case "disallow":
|
||||||
|
if currentRule != nil && value != "" {
|
||||||
|
currentRule.Disallows = append(currentRule.Disallows, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
case "allow":
|
||||||
|
if currentRule != nil && value != "" {
|
||||||
|
currentRule.Allows = append(currentRule.Allows, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
case "crawl-delay":
|
||||||
|
if currentRule != nil {
|
||||||
|
if delay, err := parseIntSafe(value); err == nil {
|
||||||
|
currentRule.CrawlDelay = delay
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't forget the last rule
|
||||||
|
if currentRule != nil {
|
||||||
|
rules = append(rules, *currentRule)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark blacklisted user agents (those with "Disallow: /")
|
||||||
|
for i := range rules {
|
||||||
|
for _, disallow := range rules[i].Disallows {
|
||||||
|
if disallow == "/" {
|
||||||
|
rules[i].IsBlacklist = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rules, scanner.Err()
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseIntSafe(s string) (int, error) {
|
||||||
|
var result int
|
||||||
|
_, err := fmt.Sscanf(s, "%d", &result)
|
||||||
|
return result, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func convertToAnubisRules(robotsRules []RobotsRule) []AnubisRule {
|
||||||
|
var anubisRules []AnubisRule
|
||||||
|
ruleCounter := 0
|
||||||
|
|
||||||
|
for _, robotsRule := range robotsRules {
|
||||||
|
userAgent := robotsRule.UserAgent
|
||||||
|
|
||||||
|
// Handle crawl delay as weight adjustment (do this first before any continues)
|
||||||
|
if robotsRule.CrawlDelay > 0 && *crawlDelay > 0 {
|
||||||
|
ruleCounter++
|
||||||
|
rule := AnubisRule{
|
||||||
|
Name: fmt.Sprintf("%s-crawl-delay-%d", *policyName, ruleCounter),
|
||||||
|
Action: "WEIGH",
|
||||||
|
Weight: &config.Weight{Adjust: *crawlDelay},
|
||||||
|
}
|
||||||
|
|
||||||
|
if userAgent == "*" {
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: []string{"true"}, // Always applies
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
anubisRules = append(anubisRules, rule)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle blacklisted user agents (complete deny/challenge)
|
||||||
|
if robotsRule.IsBlacklist {
|
||||||
|
ruleCounter++
|
||||||
|
rule := AnubisRule{
|
||||||
|
Name: fmt.Sprintf("%s-blacklist-%d", *policyName, ruleCounter),
|
||||||
|
Action: *userAgentDeny,
|
||||||
|
}
|
||||||
|
|
||||||
|
if userAgent == "*" {
|
||||||
|
// This would block everything - convert to a weight adjustment instead
|
||||||
|
rule.Name = fmt.Sprintf("%s-global-restriction-%d", *policyName, ruleCounter)
|
||||||
|
rule.Action = "WEIGH"
|
||||||
|
rule.Weight = &config.Weight{Adjust: 20} // Increase difficulty significantly
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: []string{"true"}, // Always applies
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: []string{fmt.Sprintf("userAgent.contains(%q)", userAgent)},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
anubisRules = append(anubisRules, rule)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle specific disallow rules
|
||||||
|
for _, disallow := range robotsRule.Disallows {
|
||||||
|
if disallow == "/" {
|
||||||
|
continue // Already handled as blacklist above
|
||||||
|
}
|
||||||
|
|
||||||
|
ruleCounter++
|
||||||
|
rule := AnubisRule{
|
||||||
|
Name: fmt.Sprintf("%s-disallow-%d", *policyName, ruleCounter),
|
||||||
|
Action: *baseAction,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build CEL expression
|
||||||
|
var conditions []string
|
||||||
|
|
||||||
|
// Add user agent condition if not wildcard
|
||||||
|
if userAgent != "*" {
|
||||||
|
conditions = append(conditions, fmt.Sprintf("userAgent.contains(%q)", userAgent))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add path condition
|
||||||
|
pathCondition := buildPathCondition(disallow)
|
||||||
|
conditions = append(conditions, pathCondition)
|
||||||
|
|
||||||
|
rule.Expression = &config.ExpressionOrList{
|
||||||
|
All: conditions,
|
||||||
|
}
|
||||||
|
|
||||||
|
anubisRules = append(anubisRules, rule)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return anubisRules
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildPathCondition(robotsPath string) string {
|
||||||
|
// Handle wildcards in robots.txt paths
|
||||||
|
if strings.Contains(robotsPath, "*") || strings.Contains(robotsPath, "?") {
|
||||||
|
// Convert robots.txt wildcards to regex
|
||||||
|
regex := regexp.QuoteMeta(robotsPath)
|
||||||
|
regex = strings.ReplaceAll(regex, `\*`, `.*`) // * becomes .*
|
||||||
|
regex = strings.ReplaceAll(regex, `\?`, `.`) // ? becomes .
|
||||||
|
regex = "^" + regex
|
||||||
|
return fmt.Sprintf("path.matches(%q)", regex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple prefix match for most cases
|
||||||
|
return fmt.Sprintf("path.startsWith(%q)", robotsPath)
|
||||||
|
}
|
418
cmd/robots2policy/robots2policy_test.go
Normal file
418
cmd/robots2policy/robots2policy_test.go
Normal file
@ -0,0 +1,418 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
type TestCase struct {
|
||||||
|
name string
|
||||||
|
robotsFile string
|
||||||
|
expectedFile string
|
||||||
|
options TestOptions
|
||||||
|
}
|
||||||
|
|
||||||
|
type TestOptions struct {
|
||||||
|
format string
|
||||||
|
action string
|
||||||
|
crawlDelayWeight int
|
||||||
|
policyName string
|
||||||
|
deniedAction string
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDataFileConversion(t *testing.T) {
|
||||||
|
|
||||||
|
testCases := []TestCase{
|
||||||
|
{
|
||||||
|
name: "simple_default",
|
||||||
|
robotsFile: "simple.robots.txt",
|
||||||
|
expectedFile: "simple.yaml",
|
||||||
|
options: TestOptions{format: "yaml"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "simple_json",
|
||||||
|
robotsFile: "simple.robots.txt",
|
||||||
|
expectedFile: "simple.json",
|
||||||
|
options: TestOptions{format: "json"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "simple_deny_action",
|
||||||
|
robotsFile: "simple.robots.txt",
|
||||||
|
expectedFile: "deny-action.yaml",
|
||||||
|
options: TestOptions{format: "yaml", action: "DENY"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "simple_custom_name",
|
||||||
|
robotsFile: "simple.robots.txt",
|
||||||
|
expectedFile: "custom-name.yaml",
|
||||||
|
options: TestOptions{format: "yaml", policyName: "my-custom-policy"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "blacklist_with_crawl_delay",
|
||||||
|
robotsFile: "blacklist.robots.txt",
|
||||||
|
expectedFile: "blacklist.yaml",
|
||||||
|
options: TestOptions{format: "yaml", crawlDelayWeight: 3},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wildcards",
|
||||||
|
robotsFile: "wildcards.robots.txt",
|
||||||
|
expectedFile: "wildcards.yaml",
|
||||||
|
options: TestOptions{format: "yaml"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty_file",
|
||||||
|
robotsFile: "empty.robots.txt",
|
||||||
|
expectedFile: "empty.yaml",
|
||||||
|
options: TestOptions{format: "yaml"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "complex_scenario",
|
||||||
|
robotsFile: "complex.robots.txt",
|
||||||
|
expectedFile: "complex.yaml",
|
||||||
|
options: TestOptions{format: "yaml", crawlDelayWeight: 5},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
robotsPath := filepath.Join("testdata", tc.robotsFile)
|
||||||
|
expectedPath := filepath.Join("testdata", tc.expectedFile)
|
||||||
|
|
||||||
|
// Read robots.txt input
|
||||||
|
robotsFile, err := os.Open(robotsPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to open robots file %s: %v", robotsPath, err)
|
||||||
|
}
|
||||||
|
defer robotsFile.Close()
|
||||||
|
|
||||||
|
// Parse robots.txt
|
||||||
|
rules, err := parseRobotsTxt(robotsFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set test options
|
||||||
|
oldFormat := *outputFormat
|
||||||
|
oldAction := *baseAction
|
||||||
|
oldCrawlDelay := *crawlDelay
|
||||||
|
oldPolicyName := *policyName
|
||||||
|
oldDeniedAction := *userAgentDeny
|
||||||
|
|
||||||
|
if tc.options.format != "" {
|
||||||
|
*outputFormat = tc.options.format
|
||||||
|
}
|
||||||
|
if tc.options.action != "" {
|
||||||
|
*baseAction = tc.options.action
|
||||||
|
}
|
||||||
|
if tc.options.crawlDelayWeight > 0 {
|
||||||
|
*crawlDelay = tc.options.crawlDelayWeight
|
||||||
|
}
|
||||||
|
if tc.options.policyName != "" {
|
||||||
|
*policyName = tc.options.policyName
|
||||||
|
}
|
||||||
|
if tc.options.deniedAction != "" {
|
||||||
|
*userAgentDeny = tc.options.deniedAction
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore options after test
|
||||||
|
defer func() {
|
||||||
|
*outputFormat = oldFormat
|
||||||
|
*baseAction = oldAction
|
||||||
|
*crawlDelay = oldCrawlDelay
|
||||||
|
*policyName = oldPolicyName
|
||||||
|
*userAgentDeny = oldDeniedAction
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Convert to Anubis rules
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
// Generate output
|
||||||
|
var actualOutput []byte
|
||||||
|
switch strings.ToLower(*outputFormat) {
|
||||||
|
case "yaml":
|
||||||
|
actualOutput, err = yaml.Marshal(anubisRules)
|
||||||
|
case "json":
|
||||||
|
actualOutput, err = json.MarshalIndent(anubisRules, "", " ")
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to marshal output: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read expected output
|
||||||
|
expectedOutput, err := os.ReadFile(expectedPath)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to read expected file %s: %v", expectedPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.ToLower(*outputFormat) == "yaml" {
|
||||||
|
var actualData []interface{}
|
||||||
|
var expectedData []interface{}
|
||||||
|
|
||||||
|
err = yaml.Unmarshal(actualOutput, &actualData)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to unmarshal actual output: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = yaml.Unmarshal(expectedOutput, &expectedData)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to unmarshal expected output: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare data structures
|
||||||
|
if !compareData(actualData, expectedData) {
|
||||||
|
actualStr := strings.TrimSpace(string(actualOutput))
|
||||||
|
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||||
|
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var actualData []interface{}
|
||||||
|
var expectedData []interface{}
|
||||||
|
|
||||||
|
err = json.Unmarshal(actualOutput, &actualData)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to unmarshal actual JSON output: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = json.Unmarshal(expectedOutput, &expectedData)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to unmarshal expected JSON output: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compare data structures
|
||||||
|
if !compareData(actualData, expectedData) {
|
||||||
|
actualStr := strings.TrimSpace(string(actualOutput))
|
||||||
|
expectedStr := strings.TrimSpace(string(expectedOutput))
|
||||||
|
t.Errorf("Output mismatch for %s\nExpected:\n%s\n\nActual:\n%s", tc.name, expectedStr, actualStr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCaseInsensitiveParsing(t *testing.T) {
|
||||||
|
robotsTxt := `User-Agent: *
|
||||||
|
Disallow: /admin
|
||||||
|
Crawl-Delay: 10
|
||||||
|
|
||||||
|
User-agent: TestBot
|
||||||
|
disallow: /test
|
||||||
|
crawl-delay: 5
|
||||||
|
|
||||||
|
USER-AGENT: UpperBot
|
||||||
|
DISALLOW: /upper
|
||||||
|
CRAWL-DELAY: 20`
|
||||||
|
|
||||||
|
reader := strings.NewReader(robotsTxt)
|
||||||
|
rules, err := parseRobotsTxt(reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse case-insensitive robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedRules := 3
|
||||||
|
if len(rules) != expectedRules {
|
||||||
|
t.Errorf("Expected %d rules, got %d", expectedRules, len(rules))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that all crawl delays were parsed
|
||||||
|
for i, rule := range rules {
|
||||||
|
expectedDelays := []int{10, 5, 20}
|
||||||
|
if rule.CrawlDelay != expectedDelays[i] {
|
||||||
|
t.Errorf("Rule %d: expected crawl delay %d, got %d", i, expectedDelays[i], rule.CrawlDelay)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVariousOutputFormats(t *testing.T) {
|
||||||
|
robotsTxt := `User-agent: *
|
||||||
|
Disallow: /admin`
|
||||||
|
|
||||||
|
reader := strings.NewReader(robotsTxt)
|
||||||
|
rules, err := parseRobotsTxt(reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oldPolicyName := *policyName
|
||||||
|
*policyName = "test-policy"
|
||||||
|
defer func() { *policyName = oldPolicyName }()
|
||||||
|
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
// Test YAML output
|
||||||
|
yamlOutput, err := yaml.Marshal(anubisRules)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to marshal YAML: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(string(yamlOutput), "name: test-policy-disallow-1") {
|
||||||
|
t.Errorf("YAML output doesn't contain expected rule name")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test JSON output
|
||||||
|
jsonOutput, err := json.MarshalIndent(anubisRules, "", " ")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to marshal JSON: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.Contains(string(jsonOutput), `"name": "test-policy-disallow-1"`) {
|
||||||
|
t.Errorf("JSON output doesn't contain expected rule name")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDifferentActions(t *testing.T) {
|
||||||
|
robotsTxt := `User-agent: *
|
||||||
|
Disallow: /admin`
|
||||||
|
|
||||||
|
testActions := []string{"ALLOW", "DENY", "CHALLENGE", "WEIGH"}
|
||||||
|
|
||||||
|
for _, action := range testActions {
|
||||||
|
t.Run("action_"+action, func(t *testing.T) {
|
||||||
|
reader := strings.NewReader(robotsTxt)
|
||||||
|
rules, err := parseRobotsTxt(reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oldAction := *baseAction
|
||||||
|
*baseAction = action
|
||||||
|
defer func() { *baseAction = oldAction }()
|
||||||
|
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
if len(anubisRules) != 1 {
|
||||||
|
t.Fatalf("Expected 1 rule, got %d", len(anubisRules))
|
||||||
|
}
|
||||||
|
|
||||||
|
if anubisRules[0].Action != action {
|
||||||
|
t.Errorf("Expected action %s, got %s", action, anubisRules[0].Action)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPolicyNaming(t *testing.T) {
|
||||||
|
robotsTxt := `User-agent: *
|
||||||
|
Disallow: /admin
|
||||||
|
Disallow: /private
|
||||||
|
|
||||||
|
User-agent: BadBot
|
||||||
|
Disallow: /`
|
||||||
|
|
||||||
|
testNames := []string{"custom-policy", "my-rules", "site-protection"}
|
||||||
|
|
||||||
|
for _, name := range testNames {
|
||||||
|
t.Run("name_"+name, func(t *testing.T) {
|
||||||
|
reader := strings.NewReader(robotsTxt)
|
||||||
|
rules, err := parseRobotsTxt(reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oldName := *policyName
|
||||||
|
*policyName = name
|
||||||
|
defer func() { *policyName = oldName }()
|
||||||
|
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
// Check that all rule names use the custom prefix
|
||||||
|
for _, rule := range anubisRules {
|
||||||
|
if !strings.HasPrefix(rule.Name, name+"-") {
|
||||||
|
t.Errorf("Rule name %s doesn't start with expected prefix %s-", rule.Name, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCrawlDelayWeights(t *testing.T) {
|
||||||
|
robotsTxt := `User-agent: *
|
||||||
|
Disallow: /admin
|
||||||
|
Crawl-delay: 10
|
||||||
|
|
||||||
|
User-agent: SlowBot
|
||||||
|
Disallow: /slow
|
||||||
|
Crawl-delay: 60`
|
||||||
|
|
||||||
|
testWeights := []int{1, 5, 10, 25}
|
||||||
|
|
||||||
|
for _, weight := range testWeights {
|
||||||
|
t.Run(fmt.Sprintf("weight_%d", weight), func(t *testing.T) {
|
||||||
|
reader := strings.NewReader(robotsTxt)
|
||||||
|
rules, err := parseRobotsTxt(reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oldWeight := *crawlDelay
|
||||||
|
*crawlDelay = weight
|
||||||
|
defer func() { *crawlDelay = oldWeight }()
|
||||||
|
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
// Count weight rules and verify they have correct weight
|
||||||
|
weightRules := 0
|
||||||
|
for _, rule := range anubisRules {
|
||||||
|
if rule.Action == "WEIGH" && rule.Weight != nil {
|
||||||
|
weightRules++
|
||||||
|
if rule.Weight.Adjust != weight {
|
||||||
|
t.Errorf("Expected weight %d, got %d", weight, rule.Weight.Adjust)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedWeightRules := 2 // One for *, one for SlowBot
|
||||||
|
if weightRules != expectedWeightRules {
|
||||||
|
t.Errorf("Expected %d weight rules, got %d", expectedWeightRules, weightRules)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBlacklistActions(t *testing.T) {
|
||||||
|
robotsTxt := `User-agent: BadBot
|
||||||
|
Disallow: /
|
||||||
|
|
||||||
|
User-agent: SpamBot
|
||||||
|
Disallow: /`
|
||||||
|
|
||||||
|
testActions := []string{"DENY", "CHALLENGE"}
|
||||||
|
|
||||||
|
for _, action := range testActions {
|
||||||
|
t.Run("blacklist_"+action, func(t *testing.T) {
|
||||||
|
reader := strings.NewReader(robotsTxt)
|
||||||
|
rules, err := parseRobotsTxt(reader)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to parse robots.txt: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oldAction := *userAgentDeny
|
||||||
|
*userAgentDeny = action
|
||||||
|
defer func() { *userAgentDeny = oldAction }()
|
||||||
|
|
||||||
|
anubisRules := convertToAnubisRules(rules)
|
||||||
|
|
||||||
|
// All rules should be blacklist rules with the specified action
|
||||||
|
for _, rule := range anubisRules {
|
||||||
|
if !strings.Contains(rule.Name, "blacklist") {
|
||||||
|
t.Errorf("Expected blacklist rule, got %s", rule.Name)
|
||||||
|
}
|
||||||
|
if rule.Action != action {
|
||||||
|
t.Errorf("Expected action %s, got %s", action, rule.Action)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// compareData performs a deep comparison of two data structures,
|
||||||
|
// ignoring differences that are semantically equivalent in YAML/JSON
|
||||||
|
func compareData(actual, expected interface{}) bool {
|
||||||
|
return reflect.DeepEqual(actual, expected)
|
||||||
|
}
|
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
Normal file
15
cmd/robots2policy/testdata/blacklist.robots.txt
vendored
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# Test with blacklisted user agents
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /admin
|
||||||
|
Crawl-delay: 10
|
||||||
|
|
||||||
|
User-agent: BadBot
|
||||||
|
Disallow: /
|
||||||
|
|
||||||
|
User-agent: SpamBot
|
||||||
|
Disallow: /
|
||||||
|
Crawl-delay: 60
|
||||||
|
|
||||||
|
User-agent: Googlebot
|
||||||
|
Disallow: /search
|
||||||
|
Crawl-delay: 5
|
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
30
cmd/robots2policy/testdata/blacklist.yaml
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
- action: WEIGH
|
||||||
|
expression: "true"
|
||||||
|
name: robots-txt-policy-crawl-delay-1
|
||||||
|
weight:
|
||||||
|
adjust: 3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/admin")
|
||||||
|
name: robots-txt-policy-disallow-2
|
||||||
|
- action: DENY
|
||||||
|
expression: userAgent.contains("BadBot")
|
||||||
|
name: robots-txt-policy-blacklist-3
|
||||||
|
- action: WEIGH
|
||||||
|
expression: userAgent.contains("SpamBot")
|
||||||
|
name: robots-txt-policy-crawl-delay-4
|
||||||
|
weight:
|
||||||
|
adjust: 3
|
||||||
|
- action: DENY
|
||||||
|
expression: userAgent.contains("SpamBot")
|
||||||
|
name: robots-txt-policy-blacklist-5
|
||||||
|
- action: WEIGH
|
||||||
|
expression: userAgent.contains("Googlebot")
|
||||||
|
name: robots-txt-policy-crawl-delay-6
|
||||||
|
weight:
|
||||||
|
adjust: 3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("Googlebot")
|
||||||
|
- path.startsWith("/search")
|
||||||
|
name: robots-txt-policy-disallow-7
|
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
Normal file
30
cmd/robots2policy/testdata/complex.robots.txt
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Complex real-world example
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /admin/
|
||||||
|
Disallow: /private/
|
||||||
|
Disallow: /api/internal/
|
||||||
|
Allow: /api/public/
|
||||||
|
Crawl-delay: 5
|
||||||
|
|
||||||
|
User-agent: Googlebot
|
||||||
|
Disallow: /search/
|
||||||
|
Allow: /api/
|
||||||
|
Crawl-delay: 2
|
||||||
|
|
||||||
|
User-agent: Bingbot
|
||||||
|
Disallow: /search/
|
||||||
|
Disallow: /admin/
|
||||||
|
Crawl-delay: 10
|
||||||
|
|
||||||
|
User-agent: BadBot
|
||||||
|
Disallow: /
|
||||||
|
|
||||||
|
User-agent: SeoBot
|
||||||
|
Disallow: /
|
||||||
|
Crawl-delay: 300
|
||||||
|
|
||||||
|
# Test with various patterns
|
||||||
|
User-agent: TestBot
|
||||||
|
Disallow: /*/admin
|
||||||
|
Disallow: /temp*.html
|
||||||
|
Disallow: /file?.log
|
71
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
71
cmd/robots2policy/testdata/complex.yaml
vendored
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
- action: WEIGH
|
||||||
|
expression: "true"
|
||||||
|
name: robots-txt-policy-crawl-delay-1
|
||||||
|
weight:
|
||||||
|
adjust: 5
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/admin/")
|
||||||
|
name: robots-txt-policy-disallow-2
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/private/")
|
||||||
|
name: robots-txt-policy-disallow-3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/api/internal/")
|
||||||
|
name: robots-txt-policy-disallow-4
|
||||||
|
- action: WEIGH
|
||||||
|
expression: userAgent.contains("Googlebot")
|
||||||
|
name: robots-txt-policy-crawl-delay-5
|
||||||
|
weight:
|
||||||
|
adjust: 5
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("Googlebot")
|
||||||
|
- path.startsWith("/search/")
|
||||||
|
name: robots-txt-policy-disallow-6
|
||||||
|
- action: WEIGH
|
||||||
|
expression: userAgent.contains("Bingbot")
|
||||||
|
name: robots-txt-policy-crawl-delay-7
|
||||||
|
weight:
|
||||||
|
adjust: 5
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("Bingbot")
|
||||||
|
- path.startsWith("/search/")
|
||||||
|
name: robots-txt-policy-disallow-8
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("Bingbot")
|
||||||
|
- path.startsWith("/admin/")
|
||||||
|
name: robots-txt-policy-disallow-9
|
||||||
|
- action: DENY
|
||||||
|
expression: userAgent.contains("BadBot")
|
||||||
|
name: robots-txt-policy-blacklist-10
|
||||||
|
- action: WEIGH
|
||||||
|
expression: userAgent.contains("SeoBot")
|
||||||
|
name: robots-txt-policy-crawl-delay-11
|
||||||
|
weight:
|
||||||
|
adjust: 5
|
||||||
|
- action: DENY
|
||||||
|
expression: userAgent.contains("SeoBot")
|
||||||
|
name: robots-txt-policy-blacklist-12
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("TestBot")
|
||||||
|
- path.matches("^/.*/admin")
|
||||||
|
name: robots-txt-policy-disallow-13
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("TestBot")
|
||||||
|
- path.matches("^/temp.*\\.html")
|
||||||
|
name: robots-txt-policy-disallow-14
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
all:
|
||||||
|
- userAgent.contains("TestBot")
|
||||||
|
- path.matches("^/file.\\.log")
|
||||||
|
name: robots-txt-policy-disallow-15
|
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
Normal file
6
cmd/robots2policy/testdata/custom-name.yaml
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/admin/")
|
||||||
|
name: my-custom-policy-disallow-1
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/private")
|
||||||
|
name: my-custom-policy-disallow-2
|
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
Normal file
6
cmd/robots2policy/testdata/deny-action.yaml
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
- action: DENY
|
||||||
|
expression: path.startsWith("/admin/")
|
||||||
|
name: robots-txt-policy-disallow-1
|
||||||
|
- action: DENY
|
||||||
|
expression: path.startsWith("/private")
|
||||||
|
name: robots-txt-policy-disallow-2
|
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
Normal file
2
cmd/robots2policy/testdata/empty.robots.txt
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# Empty robots.txt (comments only)
|
||||||
|
# No actual rules
|
1
cmd/robots2policy/testdata/empty.yaml
vendored
Normal file
1
cmd/robots2policy/testdata/empty.yaml
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
[]
|
12
cmd/robots2policy/testdata/simple.json
vendored
Normal file
12
cmd/robots2policy/testdata/simple.json
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"action": "CHALLENGE",
|
||||||
|
"expression": "path.startsWith(\"/admin/\")",
|
||||||
|
"name": "robots-txt-policy-disallow-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "CHALLENGE",
|
||||||
|
"expression": "path.startsWith(\"/private\")",
|
||||||
|
"name": "robots-txt-policy-disallow-2"
|
||||||
|
}
|
||||||
|
]
|
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
Normal file
5
cmd/robots2policy/testdata/simple.robots.txt
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Simple robots.txt test
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /admin/
|
||||||
|
Disallow: /private
|
||||||
|
Allow: /public
|
6
cmd/robots2policy/testdata/simple.yaml
vendored
Normal file
6
cmd/robots2policy/testdata/simple.yaml
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/admin/")
|
||||||
|
name: robots-txt-policy-disallow-1
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.startsWith("/private")
|
||||||
|
name: robots-txt-policy-disallow-2
|
6
cmd/robots2policy/testdata/wildcards.robots.txt
vendored
Normal file
6
cmd/robots2policy/testdata/wildcards.robots.txt
vendored
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# Test wildcard patterns
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /search*
|
||||||
|
Disallow: /*/private
|
||||||
|
Disallow: /file?.txt
|
||||||
|
Disallow: /admin/*?action=delete
|
12
cmd/robots2policy/testdata/wildcards.yaml
vendored
Normal file
12
cmd/robots2policy/testdata/wildcards.yaml
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.matches("^/search.*")
|
||||||
|
name: robots-txt-policy-disallow-1
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.matches("^/.*/private")
|
||||||
|
name: robots-txt-policy-disallow-2
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.matches("^/file.\\.txt")
|
||||||
|
name: robots-txt-policy-disallow-3
|
||||||
|
- action: CHALLENGE
|
||||||
|
expression: path.matches("^/admin/.*.action=delete")
|
||||||
|
name: robots-txt-policy-disallow-4
|
@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
- Make progress bar styling more compatible (UXP, etc)
|
- Make progress bar styling more compatible (UXP, etc)
|
||||||
- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
|
- Optimized the OGTags subsystem with reduced allocations and runtime per request by up to 66%
|
||||||
- Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
|
- Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
|
||||||
|
- Add `robots2policy` CLI utility to convert robots.txt files to Anubis challenge policies using CEL expressions ([#409](https://github.com/TecharoHQ/anubis/issues/409))
|
||||||
|
|
||||||
## v1.19.1: Jenomis cen Lexentale - Echo 1
|
## v1.19.1: Jenomis cen Lexentale - Echo 1
|
||||||
|
|
||||||
|
84
docs/docs/admin/robots2policy.mdx
Normal file
84
docs/docs/admin/robots2policy.mdx
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
---
|
||||||
|
title: robots2policy CLI Tool
|
||||||
|
sidebar_position: 50
|
||||||
|
---
|
||||||
|
|
||||||
|
The `robots2policy` tool converts robots.txt files into Anubis challenge policies. It reads robots.txt rules and generates equivalent CEL expressions for path matching and user-agent filtering.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Install directly with Go:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go install github.com/TecharoHQ/anubis/cmd/robots2policy@latest
|
||||||
|
```
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Basic conversion from URL:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
robots2policy -input https://www.example.com/robots.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
Convert local file to YAML:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
robots2policy -input robots.txt -output policy.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Convert with custom settings:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
robots2policy -input robots.txt -action DENY -format json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
| Flag | Description | Default |
|
||||||
|
|-----------------------|--------------------------------------------------------------------|---------------------|
|
||||||
|
| `-input` | robots.txt file path or URL (use `-` for stdin) | *required* |
|
||||||
|
| `-output` | Output file (use `-` for stdout) | stdout |
|
||||||
|
| `-format` | Output format: `yaml` or `json` | `yaml` |
|
||||||
|
| `-action` | Action for disallowed paths: `ALLOW`, `DENY`, `CHALLENGE`, `WEIGH` | `CHALLENGE` |
|
||||||
|
| `-name` | Policy name prefix | `robots-txt-policy` |
|
||||||
|
| `-crawl-delay-weight` | Weight adjustment for crawl-delay rules | `3` |
|
||||||
|
| `-deny-user-agents` | Action for blacklisted user agents | `DENY` |
|
||||||
|
|
||||||
|
## Example
|
||||||
|
|
||||||
|
Input robots.txt:
|
||||||
|
```txt
|
||||||
|
User-agent: *
|
||||||
|
Disallow: /admin/
|
||||||
|
Disallow: /private
|
||||||
|
|
||||||
|
User-agent: BadBot
|
||||||
|
Disallow: /
|
||||||
|
```
|
||||||
|
|
||||||
|
Generated policy:
|
||||||
|
```yaml
|
||||||
|
- name: robots-txt-policy-disallow-1
|
||||||
|
action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
single: path.startsWith("/admin/")
|
||||||
|
- name: robots-txt-policy-disallow-2
|
||||||
|
action: CHALLENGE
|
||||||
|
expression:
|
||||||
|
single: path.startsWith("/private")
|
||||||
|
- name: robots-txt-policy-blacklist-3
|
||||||
|
action: DENY
|
||||||
|
expression:
|
||||||
|
single: userAgent.contains("BadBot")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using the Generated Policy
|
||||||
|
|
||||||
|
Save the output and import it in your main policy file:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
import:
|
||||||
|
- path: "./robots-policy.yaml"
|
||||||
|
```
|
||||||
|
|
||||||
|
The tool handles wildcard patterns, user-agent specific rules, and blacklisted bots automatically.
|
4
go.mod
4
go.mod
@ -12,7 +12,9 @@ require (
|
|||||||
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
|
github.com/sebest/xff v0.0.0-20210106013422-671bd2870b3a
|
||||||
github.com/yl2chen/cidranger v1.0.2
|
github.com/yl2chen/cidranger v1.0.2
|
||||||
golang.org/x/net v0.41.0
|
golang.org/x/net v0.41.0
|
||||||
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
k8s.io/apimachinery v0.33.1
|
k8s.io/apimachinery v0.33.1
|
||||||
|
sigs.k8s.io/yaml v1.4.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
@ -104,11 +106,9 @@ require (
|
|||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
|
||||||
google.golang.org/protobuf v1.36.5 // indirect
|
google.golang.org/protobuf v1.36.5 // indirect
|
||||||
gopkg.in/warnings.v0 v0.1.2 // indirect
|
gopkg.in/warnings.v0 v0.1.2 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
|
||||||
honnef.co/go/tools v0.6.1 // indirect
|
honnef.co/go/tools v0.6.1 // indirect
|
||||||
mvdan.cc/sh/v3 v3.11.0 // indirect
|
mvdan.cc/sh/v3 v3.11.0 // indirect
|
||||||
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
|
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
|
||||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
|
||||||
)
|
)
|
||||||
|
|
||||||
tool (
|
tool (
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
package ogtags
|
package ogtags
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"golang.org/x/net/html"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
func BenchmarkGetTarget(b *testing.B) {
|
func BenchmarkGetTarget(b *testing.B) {
|
||||||
|
@ -21,17 +21,17 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type OGTagCache struct {
|
type OGTagCache struct {
|
||||||
cache *decaymap.Impl[string, map[string]string]
|
cache *decaymap.Impl[string, map[string]string]
|
||||||
targetURL *url.URL
|
targetURL *url.URL
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
|
||||||
|
// Pre-built strings for optimization
|
||||||
|
unixPrefix string // "http://unix"
|
||||||
approvedTags []string
|
approvedTags []string
|
||||||
approvedPrefixes []string
|
approvedPrefixes []string
|
||||||
ogTimeToLive time.Duration
|
ogTimeToLive time.Duration
|
||||||
ogCacheConsiderHost bool
|
ogCacheConsiderHost bool
|
||||||
ogPassthrough bool
|
ogPassthrough bool
|
||||||
|
|
||||||
// Pre-built strings for optimization
|
|
||||||
unixPrefix string // "http://unix"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
|
func NewOGTagCache(target string, ogPassthrough bool, ogTimeToLive time.Duration, ogTagsConsiderHost bool) *OGTagCache {
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
package ogtags
|
package ogtags
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"golang.org/x/net/html"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
// FuzzGetTarget tests getTarget with various inputs
|
// FuzzGetTarget tests getTarget with various inputs
|
||||||
|
@ -46,15 +46,15 @@ const (
|
|||||||
const DefaultAlgorithm = "fast"
|
const DefaultAlgorithm = "fast"
|
||||||
|
|
||||||
type BotConfig struct {
|
type BotConfig struct {
|
||||||
UserAgentRegex *string `json:"user_agent_regex,omitempty"`
|
UserAgentRegex *string `json:"user_agent_regex,omitempty" yaml:"user_agent_regex,omitempty"`
|
||||||
PathRegex *string `json:"path_regex,omitempty"`
|
PathRegex *string `json:"path_regex,omitempty" yaml:"path_regex,omitempty"`
|
||||||
HeadersRegex map[string]string `json:"headers_regex,omitempty"`
|
HeadersRegex map[string]string `json:"headers_regex,omitempty" yaml:"headers_regex,omitempty"`
|
||||||
Expression *ExpressionOrList `json:"expression,omitempty"`
|
Expression *ExpressionOrList `json:"expression,omitempty" yaml:"expression,omitempty"`
|
||||||
Challenge *ChallengeRules `json:"challenge,omitempty"`
|
Challenge *ChallengeRules `json:"challenge,omitempty" yaml:"challenge,omitempty"`
|
||||||
Weight *Weight `json:"weight,omitempty"`
|
Weight *Weight `json:"weight,omitempty" yaml:"weight,omitempty"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name" yaml:"name"`
|
||||||
Action Rule `json:"action"`
|
Action Rule `json:"action" yaml:"action"`
|
||||||
RemoteAddr []string `json:"remote_addresses,omitempty"`
|
RemoteAddr []string `json:"remote_addresses,omitempty" yaml:"remote_addresses,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b BotConfig) Zero() bool {
|
func (b BotConfig) Zero() bool {
|
||||||
@ -170,9 +170,9 @@ func (b *BotConfig) Valid() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type ChallengeRules struct {
|
type ChallengeRules struct {
|
||||||
Algorithm string `json:"algorithm"`
|
Algorithm string `json:"algorithm,omitempty" yaml:"algorithm,omitempty"`
|
||||||
Difficulty int `json:"difficulty"`
|
Difficulty int `json:"difficulty,omitempty" yaml:"difficulty,omitempty"`
|
||||||
ReportAs int `json:"report_as"`
|
ReportAs int `json:"report_as,omitempty" yaml:"report_as,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -13,9 +13,9 @@ var (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type ExpressionOrList struct {
|
type ExpressionOrList struct {
|
||||||
Expression string `json:"-"`
|
Expression string `json:"-" yaml:"-"`
|
||||||
All []string `json:"all,omitempty"`
|
All []string `json:"all,omitempty" yaml:"all,omitempty"`
|
||||||
Any []string `json:"any,omitempty"`
|
Any []string `json:"any,omitempty" yaml:"any,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
|
func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
|
||||||
@ -34,6 +34,43 @@ func (eol ExpressionOrList) Equal(rhs *ExpressionOrList) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (eol *ExpressionOrList) MarshalYAML() (any, error) {
|
||||||
|
switch {
|
||||||
|
case len(eol.All) == 1 && len(eol.Any) == 0:
|
||||||
|
eol.Expression = eol.All[0]
|
||||||
|
eol.All = nil
|
||||||
|
case len(eol.Any) == 1 && len(eol.All) == 0:
|
||||||
|
eol.Expression = eol.Any[0]
|
||||||
|
eol.Any = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if eol.Expression != "" {
|
||||||
|
return eol.Expression, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type RawExpressionOrList ExpressionOrList
|
||||||
|
return RawExpressionOrList(*eol), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (eol *ExpressionOrList) MarshalJSON() ([]byte, error) {
|
||||||
|
switch {
|
||||||
|
case len(eol.All) == 1 && len(eol.Any) == 0:
|
||||||
|
eol.Expression = eol.All[0]
|
||||||
|
eol.All = nil
|
||||||
|
case len(eol.Any) == 1 && len(eol.All) == 0:
|
||||||
|
eol.Expression = eol.Any[0]
|
||||||
|
eol.Any = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if eol.Expression != "" {
|
||||||
|
return json.Marshal(string(eol.Expression))
|
||||||
|
}
|
||||||
|
|
||||||
|
type RawExpressionOrList ExpressionOrList
|
||||||
|
val := RawExpressionOrList(*eol)
|
||||||
|
return json.Marshal(val)
|
||||||
|
}
|
||||||
|
|
||||||
func (eol *ExpressionOrList) UnmarshalJSON(data []byte) error {
|
func (eol *ExpressionOrList) UnmarshalJSON(data []byte) error {
|
||||||
switch string(data[0]) {
|
switch string(data[0]) {
|
||||||
case `"`: // string
|
case `"`: // string
|
||||||
|
@ -1,12 +1,147 @@
|
|||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
yaml "sigs.k8s.io/yaml/goyaml.v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestExpressionOrListUnmarshal(t *testing.T) {
|
func TestExpressionOrListMarshalJSON(t *testing.T) {
|
||||||
|
for _, tt := range []struct {
|
||||||
|
name string
|
||||||
|
input *ExpressionOrList
|
||||||
|
output []byte
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single expression",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
Expression: "true",
|
||||||
|
},
|
||||||
|
output: []byte(`"true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
All: []string{"true", "true"},
|
||||||
|
},
|
||||||
|
output: []byte(`{"all":["true","true"]}`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all one",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
All: []string{"true"},
|
||||||
|
},
|
||||||
|
output: []byte(`"true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "any",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
Any: []string{"true", "false"},
|
||||||
|
},
|
||||||
|
output: []byte(`{"any":["true","false"]}`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "any one",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
Any: []string{"true"},
|
||||||
|
},
|
||||||
|
output: []byte(`"true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := json.Marshal(tt.input)
|
||||||
|
if !errors.Is(err, tt.err) {
|
||||||
|
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !bytes.Equal(result, tt.output) {
|
||||||
|
t.Logf("wanted: %s", string(tt.output))
|
||||||
|
t.Logf("got: %s", string(result))
|
||||||
|
t.Error("mismatched output")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExpressionOrListMarshalYAML(t *testing.T) {
|
||||||
|
for _, tt := range []struct {
|
||||||
|
name string
|
||||||
|
input *ExpressionOrList
|
||||||
|
output []byte
|
||||||
|
err error
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single expression",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
Expression: "true",
|
||||||
|
},
|
||||||
|
output: []byte(`"true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
All: []string{"true", "true"},
|
||||||
|
},
|
||||||
|
output: []byte(`all:
|
||||||
|
- "true"
|
||||||
|
- "true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "all one",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
All: []string{"true"},
|
||||||
|
},
|
||||||
|
output: []byte(`"true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "any",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
Any: []string{"true", "false"},
|
||||||
|
},
|
||||||
|
output: []byte(`any:
|
||||||
|
- "true"
|
||||||
|
- "false"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "any one",
|
||||||
|
input: &ExpressionOrList{
|
||||||
|
Any: []string{"true"},
|
||||||
|
},
|
||||||
|
output: []byte(`"true"`),
|
||||||
|
err: nil,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result, err := yaml.Marshal(tt.input)
|
||||||
|
if !errors.Is(err, tt.err) {
|
||||||
|
t.Errorf("wanted marshal error: %v but got: %v", tt.err, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
result = bytes.TrimSpace(result)
|
||||||
|
|
||||||
|
if !bytes.Equal(result, tt.output) {
|
||||||
|
t.Logf("wanted: %q", string(tt.output))
|
||||||
|
t.Logf("got: %q", string(result))
|
||||||
|
t.Error("mismatched output")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExpressionOrListUnmarshalJSON(t *testing.T) {
|
||||||
for _, tt := range []struct {
|
for _, tt := range []struct {
|
||||||
err error
|
err error
|
||||||
validErr error
|
validErr error
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
package config
|
package config
|
||||||
|
|
||||||
type Weight struct {
|
type Weight struct {
|
||||||
Adjust int `json:"adjust"`
|
Adjust int `json:"adjust" yaml:"adjust"`
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,7 @@ $`npm run assets`;
|
|||||||
|
|
||||||
build: ({ bin, etc, systemd, doc }) => {
|
build: ({ bin, etc, systemd, doc }) => {
|
||||||
$`go build -o ${bin}/anubis -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/anubis`;
|
$`go build -o ${bin}/anubis -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/anubis`;
|
||||||
|
$`go build -o ${bin}/anubis-robots2policy -ldflags '-s -w -extldflags "-static" -X "github.com/TecharoHQ/anubis.Version=${git.tag()}"' ./cmd/robots2policy`;
|
||||||
|
|
||||||
file.install("./run/anubis@.service", `${systemd}/anubis@.service`);
|
file.install("./run/anubis@.service", `${systemd}/anubis@.service`);
|
||||||
file.install("./run/default.env", `${etc}/default.env`);
|
file.install("./run/default.env", `${etc}/default.env`);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user