mirror of
https://github.com/TecharoHQ/anubis.git
synced 2025-08-03 09:48:08 -04:00

* feat: Add Open Graph tag support (og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Fix: Prevent nil pointer dereference in test (og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat!: Implement Open Graph tag caching and passthrough functionality (WIP) I'm going to sleep. currently tags are passed to renderIndex. see https://github.com/TecharoHQ/anubis/issues/131 Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat: Add configuration for air tool with build and logger settings Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat: Move OG tags to base template (og-tags) Moves the Open Graph (OG) tags from the index template to the base template. This allows OG tags to be set on any page, not just the index. Also adds a BaseWithOGTags function to the web package to allow passing OG tags to the base template. Removes the ogTags parameter from the Index function and template. Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Delete CHANGELOG.md Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat: Add language attribute to HTML tag in template Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix(tests): Fix nil pointer ref Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat(og-tags): Add timeout to http client (og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * style: fix line endings & indentation Signed-off-by: Jason Cameron <git@jasoncameron.dev> * style: add inspection comment for GoBoolExpressions in UnchangingCache Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat(og-tags): Implement Open Graph tag fetching and caching Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix(og-tags): Simplify Open Graph tag extraction logic Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix(og-tags): Add nil check in isOGMetaTag and enhance test cases Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat(og-tags): Add approved tags and prefixes for Open Graph extraction Signed-off-by: Jason Cameron <git@jasoncameron.dev> * test(og-tags): Update tests with approved tags and improve clarity Signed-off-by: Jason Cameron <git@jasoncameron.dev> * chore: Add changelog notes Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix: Improve stability of the target fetcher? Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix: Update template error handling and improve Open Graph tag integration Signed-off-by: Jason Cameron <git@jasoncameron.dev> * style: format files and remove deubg logs Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat: Credit CELPHASE for mascot design (og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat: Credit CELPHASE for mascot design (og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat: Allow twitter prefixed OG tags by default Signed-off-by: Jason Cameron <git@jasoncameron.dev> * chore: replace /tmp with /var Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Update docs/docs/CHANGELOG.md Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * Update docs/docs/admin/configuration/open-graph.mdx Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * chore: add fediverse to default prefixes (#og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat(og-tags): Remove og-query-distinct flag This commit removes the `og-query-distinct` flag and associated logic. URLs with different query parameters will now always be treated as the same cache key for Open Graph tags. This simplifies the caching logic and improves performance. Additionally, the http client used for fetching OG tags is now a member of the OGTagCache struct, rather than a global variable. This improves testability and allows for more flexible configuration in the future. Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Update docs/docs/admin/configuration/open-graph.mdx Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * docs: remove og tags references Signed-off-by: Jason Cameron <git@jasoncameron.dev> * refactor: rename url > u to not overlap package name Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Update internal/ogtags/cache.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * Update internal/ogtags/cache.go Co-authored-by: Xe Iaso <me@xeiaso.net> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> * fix(tests): Don't use network when network access is disabled Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Fix: Handle nil URL in GetOGTags (og-tags) Signed-off-by: Jason Cameron <git@jasoncameron.dev> * chore: sort installation docs alphabetically Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix(tests): validate that no duplicate requests are made Signed-off-by: Jason Cameron <git@jasoncameron.dev> * style(tests): remove unused ok var Signed-off-by: Jason Cameron <git@jasoncameron.dev> * docs: convert to table fmt Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat(og-tags): Enhance OG tag fetching and caching Adds additional approved OG tags (`keywords`, `author`), improves Signed-off-by: Jason Cameron <git@jasoncameron.dev> * chore: update generated templ's after format Signed-off-by: Jason Cameron <git@jasoncameron.dev> * fix(tests): update integration_test.go to reflect the new behavior of fetchHTMLDocument Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Revert "data/botPolicies: allow iMessage scraper by default (#178)" This reverts commit 21a9d777 Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Fix: Simplify ogTags access in cache test. Didn't know this was possible! wow! Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Fix: Handle request timeouts when fetching OG tags (#og-tags) Cache a nil result for half the TTL to avoid repeatedly requesting a timed-out URL. Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Fix: make OG tags passthrough option function. Signed-off-by: Jason Cameron <git@jasoncameron.dev> * Fix: Handle timeouts and non-200 responses when fetching OG tags (og-tags) - Cache empty results for timeouts and non-200 status codes to avoid spamming the server. - Use a non-nil empty map to represent empty results in the cache, as nil would be a cache miss. Signed-off-by: Jason Cameron <git@jasoncameron.dev> * feat(og-tags): switch to http.MaxBytesReader Signed-off-by: Jason Cameron <git@jasoncameron.dev> * chore(og-tags): add noindex, nofollow meta tag and update error line numbers Signed-off-by: Jason Cameron <git@jasoncameron.dev> --------- Signed-off-by: Jason Cameron <git@jasoncameron.dev> Signed-off-by: Jason Cameron <jasoncameron.all@gmail.com> Co-authored-by: Xe Iaso <me@xeiaso.net>
457 lines
11 KiB
Go
457 lines
11 KiB
Go
//go:build !windows
|
|
|
|
// Integration tests for Anubis, using Playwright.
|
|
//
|
|
// These tests require an already running Anubis and Playwright server.
|
|
//
|
|
// Anubis must be configured to redirect to the server started by the test suite.
|
|
// The bind address and the Anubis server can be specified using the flags `-bind` and `-anubis` respectively.
|
|
//
|
|
// Playwright must be started in server mode using `npx playwright@1.50.1 run-server --port 3000`.
|
|
// The version must match the minor used by the playwright-go package.
|
|
//
|
|
// On unsupported systems you may be able to use a container instead: https://playwright.dev/docs/docker#remote-connection
|
|
//
|
|
// In that case you may need to set the `-playwright` flag to the container's URL, and specify the `--host` the run-server command listens on.
|
|
package test
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"net/url"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/TecharoHQ/anubis"
|
|
libanubis "github.com/TecharoHQ/anubis/lib"
|
|
"github.com/playwright-community/playwright-go"
|
|
)
|
|
|
|
var (
|
|
playwrightPort = flag.Int("playwright-port", 9001, "Playwright port")
|
|
playwrightServer = flag.String("playwright", "ws://localhost:9001", "Playwright server URL")
|
|
playwrightMaxTime = flag.Duration("playwright-max-time", 5*time.Second, "maximum time for Playwright requests")
|
|
playwrightMaxHardTime = flag.Duration("playwright-max-hard-time", 5*time.Minute, "maximum time for hard Playwright requests")
|
|
|
|
testCases = []testCase{
|
|
{
|
|
name: "firefox",
|
|
action: actionChallenge,
|
|
realIP: placeholderIP,
|
|
userAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0",
|
|
},
|
|
{
|
|
name: "headlessChrome",
|
|
action: actionDeny,
|
|
realIP: placeholderIP,
|
|
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/120.0.6099.28 Safari/537.36",
|
|
},
|
|
{
|
|
name: "kagiBadIP",
|
|
action: actionChallenge,
|
|
isHard: true,
|
|
realIP: placeholderIP,
|
|
userAgent: "Mozilla/5.0 (compatible; Kagibot/1.0; +https://kagi.com/bot)",
|
|
},
|
|
{
|
|
name: "kagiGoodIP",
|
|
action: actionAllow,
|
|
realIP: "216.18.205.234",
|
|
userAgent: "Mozilla/5.0 (compatible; Kagibot/1.0; +https://kagi.com/bot)",
|
|
},
|
|
{
|
|
name: "unknownAgent",
|
|
action: actionAllow,
|
|
realIP: placeholderIP,
|
|
userAgent: "AnubisTest/0",
|
|
},
|
|
}
|
|
)
|
|
|
|
const (
|
|
actionAllow action = "ALLOW"
|
|
actionDeny action = "DENY"
|
|
actionChallenge action = "CHALLENGE"
|
|
|
|
placeholderIP = "fd11:5ee:bad:c0de::"
|
|
playwrightVersion = "1.50.1"
|
|
)
|
|
|
|
type action string
|
|
|
|
type testCase struct {
|
|
name string
|
|
action action
|
|
isHard bool
|
|
realIP, userAgent string
|
|
}
|
|
|
|
func doesNPXExist(t *testing.T) {
|
|
t.Helper()
|
|
|
|
if _, err := exec.LookPath("npx"); err != nil {
|
|
t.Skipf("npx not found in PATH, skipping integration smoke testing: %v", err)
|
|
}
|
|
}
|
|
|
|
func run(t *testing.T, command string) string {
|
|
t.Helper()
|
|
|
|
shPath, err := exec.LookPath("sh")
|
|
if err != nil {
|
|
t.Fatalf("[unexpected] %v", err)
|
|
}
|
|
|
|
t.Logf("running command: %s", command)
|
|
|
|
cmd := exec.Command(shPath, "-c", command)
|
|
cmd.Stdin = nil
|
|
cmd.Stderr = os.Stderr
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
t.Fatalf("can't run command: %v", err)
|
|
}
|
|
|
|
return string(output)
|
|
}
|
|
|
|
func daemonize(t *testing.T, command string) {
|
|
t.Helper()
|
|
|
|
shPath, err := exec.LookPath("sh")
|
|
if err != nil {
|
|
t.Fatalf("[unexpected] %v", err)
|
|
}
|
|
|
|
t.Logf("daemonizing command: %s", command)
|
|
|
|
cmd := exec.Command(shPath, "-c", command)
|
|
cmd.Stdin = nil
|
|
cmd.Stderr = os.Stderr
|
|
cmd.Stdout = os.Stdout
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
t.Fatalf("can't daemonize command: %v", err)
|
|
}
|
|
|
|
t.Cleanup(func() {
|
|
cmd.Process.Kill()
|
|
})
|
|
}
|
|
|
|
func startPlaywright(t *testing.T) {
|
|
t.Helper()
|
|
|
|
if os.Getenv("CI") == "true" {
|
|
run(t, fmt.Sprintf("npx --yes playwright@%s install --with-deps", playwrightVersion))
|
|
} else {
|
|
run(t, fmt.Sprintf("npx --yes playwright@%s install", playwrightVersion))
|
|
}
|
|
|
|
daemonize(t, fmt.Sprintf("npx --yes playwright@%s run-server --port %d", playwrightVersion, *playwrightPort))
|
|
|
|
for {
|
|
if _, err := http.Get(fmt.Sprintf("http://localhost:%d", *playwrightPort)); err != nil {
|
|
time.Sleep(500 * time.Millisecond)
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
|
|
//nosleep:bypass XXX(Xe): Playwright doesn't have a good way to signal readiness. This is a HACK that will just let the tests pass.
|
|
time.Sleep(2 * time.Second)
|
|
}
|
|
|
|
func TestPlaywrightBrowser(t *testing.T) {
|
|
if os.Getenv("DONT_USE_NETWORK") != "" {
|
|
t.Skip("test requires network egress")
|
|
return
|
|
}
|
|
|
|
doesNPXExist(t)
|
|
startPlaywright(t)
|
|
|
|
pw := setupPlaywright(t)
|
|
anubisURL := spawnAnubis(t)
|
|
|
|
browsers := []playwright.BrowserType{pw.Chromium, pw.Firefox, pw.WebKit}
|
|
|
|
for _, typ := range browsers {
|
|
t.Run(typ.Name()+"/warmup", func(t *testing.T) {
|
|
browser, err := typ.Connect(buildBrowserConnect(typ.Name()), playwright.BrowserTypeConnectOptions{
|
|
ExposeNetwork: playwright.String("<loopback>"),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("could not connect to remote browser: %v", err)
|
|
}
|
|
defer browser.Close()
|
|
|
|
ctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
|
AcceptDownloads: playwright.Bool(false),
|
|
ExtraHttpHeaders: map[string]string{
|
|
"X-Real-Ip": "127.0.0.1",
|
|
},
|
|
UserAgent: playwright.String("Sephiroth"),
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("could not create context: %v", err)
|
|
}
|
|
defer ctx.Close()
|
|
|
|
page, err := ctx.NewPage()
|
|
if err != nil {
|
|
t.Fatalf("could not create page: %v", err)
|
|
}
|
|
defer page.Close()
|
|
|
|
timeout := 2.0
|
|
page.Goto(anubisURL, playwright.PageGotoOptions{
|
|
Timeout: &timeout,
|
|
})
|
|
})
|
|
|
|
for _, tc := range testCases {
|
|
name := fmt.Sprintf("%s/%s", typ.Name(), tc.name)
|
|
t.Run(name, func(t *testing.T) {
|
|
_, hasDeadline := t.Deadline()
|
|
if tc.isHard && hasDeadline {
|
|
t.Skip("skipping hard challenge with deadline")
|
|
}
|
|
|
|
var performedAction action
|
|
var err error
|
|
for i := 0; i < 5; i++ {
|
|
performedAction, err = executeTestCase(t, tc, typ, anubisURL)
|
|
if performedAction == tc.action {
|
|
break
|
|
}
|
|
time.Sleep(time.Duration(i+1) * 250 * time.Millisecond)
|
|
}
|
|
if performedAction != tc.action {
|
|
t.Errorf("unexpected test result, expected %s, got %s", tc.action, performedAction)
|
|
}
|
|
if err != nil {
|
|
t.Fatalf("test error: %v", err)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
func buildBrowserConnect(name string) string {
|
|
u, _ := url.Parse(*playwrightServer)
|
|
|
|
q := u.Query()
|
|
q.Set("browser", name)
|
|
u.RawQuery = q.Encode()
|
|
|
|
return u.String()
|
|
}
|
|
|
|
func executeTestCase(t *testing.T, tc testCase, typ playwright.BrowserType, anubisURL string) (action, error) {
|
|
deadline, _ := t.Deadline()
|
|
|
|
browser, err := typ.Connect(buildBrowserConnect(typ.Name()), playwright.BrowserTypeConnectOptions{
|
|
ExposeNetwork: playwright.String("<loopback>"),
|
|
})
|
|
if err != nil {
|
|
return "", fmt.Errorf("could not connect to remote browser: %w", err)
|
|
}
|
|
defer browser.Close()
|
|
|
|
ctx, err := browser.NewContext(playwright.BrowserNewContextOptions{
|
|
AcceptDownloads: playwright.Bool(false),
|
|
ExtraHttpHeaders: map[string]string{
|
|
"X-Real-Ip": tc.realIP,
|
|
},
|
|
UserAgent: playwright.String(tc.userAgent),
|
|
})
|
|
if err != nil {
|
|
return "", fmt.Errorf("could not create context: %w", err)
|
|
}
|
|
defer ctx.Close()
|
|
|
|
page, err := ctx.NewPage()
|
|
if err != nil {
|
|
return "", fmt.Errorf("could not create page: %w", err)
|
|
}
|
|
defer page.Close()
|
|
|
|
// Attempt challenge.
|
|
|
|
start := time.Now()
|
|
_, err = page.Goto(anubisURL, playwright.PageGotoOptions{
|
|
Timeout: pwTimeout(tc, deadline),
|
|
})
|
|
if err != nil {
|
|
return "", pwFail(t, page, "could not navigate to test server: %v", err)
|
|
}
|
|
|
|
hadChallenge := false
|
|
switch tc.action {
|
|
case actionChallenge:
|
|
// FIXME: This could race if challenge is completed too quickly.
|
|
checkImage(t, tc, deadline, page, "#image[src*=pensive], #image[src*=happy]")
|
|
hadChallenge = true
|
|
case actionDeny:
|
|
checkImage(t, tc, deadline, page, "#image[src*=sad]")
|
|
return actionDeny, nil
|
|
}
|
|
|
|
// Ensure protected resource was provided.
|
|
|
|
res, err := page.Locator("#anubis-test").TextContent(playwright.LocatorTextContentOptions{
|
|
Timeout: pwTimeout(tc, deadline),
|
|
})
|
|
end := time.Now()
|
|
if err != nil {
|
|
pwFail(t, page, "could not get text content: %v", err)
|
|
}
|
|
|
|
var tm int64
|
|
if _, err := fmt.Sscanf(res, "%d", &tm); err != nil {
|
|
pwFail(t, page, "unexpected output: %s", res)
|
|
}
|
|
|
|
if tm < start.Unix() || end.Unix() < tm {
|
|
pwFail(t, page, "unexpected timestamp in output: %d not in range %d..%d", tm, start.Unix(), end.Unix())
|
|
}
|
|
|
|
if hadChallenge {
|
|
return actionChallenge, nil
|
|
} else {
|
|
return actionAllow, nil
|
|
}
|
|
}
|
|
|
|
func checkImage(t *testing.T, tc testCase, deadline time.Time, page playwright.Page, locator string) {
|
|
image := page.Locator(locator)
|
|
err := image.WaitFor(playwright.LocatorWaitForOptions{
|
|
Timeout: pwTimeout(tc, deadline),
|
|
})
|
|
if err != nil {
|
|
pwFail(t, page, "could not wait for result: %v", err)
|
|
}
|
|
|
|
failIsVisible, err := image.IsVisible()
|
|
if err != nil {
|
|
pwFail(t, page, "could not check result image: %v", err)
|
|
}
|
|
|
|
if !failIsVisible {
|
|
pwFail(t, page, "expected result image not visible")
|
|
}
|
|
}
|
|
|
|
func pwFail(t *testing.T, page playwright.Page, format string, args ...any) error {
|
|
t.Helper()
|
|
|
|
saveScreenshot(t, page)
|
|
return fmt.Errorf(format, args...)
|
|
}
|
|
|
|
func pwTimeout(tc testCase, deadline time.Time) *float64 {
|
|
max := *playwrightMaxTime
|
|
if tc.isHard {
|
|
max = *playwrightMaxHardTime
|
|
}
|
|
|
|
d := time.Until(deadline)
|
|
if d <= 0 || d > max {
|
|
return playwright.Float(float64(max.Milliseconds()))
|
|
}
|
|
return playwright.Float(float64(d.Milliseconds()))
|
|
}
|
|
|
|
func saveScreenshot(t *testing.T, page playwright.Page) {
|
|
t.Helper()
|
|
|
|
data, err := page.Screenshot()
|
|
if err != nil {
|
|
t.Logf("could not take screenshot: %v", err)
|
|
return
|
|
}
|
|
|
|
f, err := os.CreateTemp("", "anubis-test-fail-*.png")
|
|
if err != nil {
|
|
t.Logf("could not create temporary file: %v", err)
|
|
return
|
|
}
|
|
defer f.Close()
|
|
|
|
_, err = f.Write(data)
|
|
if err != nil {
|
|
t.Logf("could not write screenshot: %v", err)
|
|
return
|
|
}
|
|
|
|
t.Logf("screenshot saved to %s", f.Name())
|
|
}
|
|
|
|
func setupPlaywright(t *testing.T) *playwright.Playwright {
|
|
err := playwright.Install(&playwright.RunOptions{
|
|
SkipInstallBrowsers: true,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("could not install Playwright: %v", err)
|
|
}
|
|
|
|
pw, err := playwright.Run()
|
|
if err != nil {
|
|
t.Fatalf("could not start Playwright: %v", err)
|
|
}
|
|
return pw
|
|
}
|
|
|
|
func spawnAnubis(t *testing.T) string {
|
|
t.Helper()
|
|
|
|
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Add("Content-Type", "text/html")
|
|
fmt.Fprintf(w, "<html><body><span id=anubis-test>%d</span></body></html>", time.Now().Unix())
|
|
})
|
|
|
|
policy, err := libanubis.LoadPoliciesOrDefault("", anubis.DefaultDifficulty)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
listener, err := net.Listen("tcp", ":0")
|
|
if err != nil {
|
|
t.Fatalf("can't listen on random port: %v", err)
|
|
}
|
|
|
|
addr := listener.Addr().(*net.TCPAddr)
|
|
host := "localhost"
|
|
port := strconv.Itoa(addr.Port)
|
|
|
|
s, err := libanubis.New(libanubis.Options{
|
|
Next: h,
|
|
Policy: policy,
|
|
ServeRobotsTXT: true,
|
|
Target: "http://" + host + ":" + port,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("can't construct libanubis.Server: %v", err)
|
|
}
|
|
|
|
ts := &httptest.Server{
|
|
Listener: listener,
|
|
Config: &http.Server{Handler: s},
|
|
}
|
|
ts.Start()
|
|
t.Log(ts.URL)
|
|
|
|
t.Cleanup(func() {
|
|
ts.Close()
|
|
})
|
|
|
|
return ts.URL
|
|
}
|