Merge c1af11dc439a3fc2a90b30abb5f4d4077ec9eb18 into 963527fb60e8348c05b51d11d26243c41663eeac

This commit is contained in:
phoval 2025-07-30 20:33:06 +00:00 committed by GitHub
commit 601e7de005
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 127 additions and 87 deletions

View File

@ -104,6 +104,7 @@ Firecrawl
flagenv flagenv
Fordola Fordola
forgejo forgejo
forwardauth
fsys fsys
fullchain fullchain
gaissmai gaissmai

View File

@ -77,6 +77,7 @@ var (
extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder") extractResources = flag.String("extract-resources", "", "if set, extract the static resources to the specified folder")
webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals") webmasterEmail = flag.String("webmaster-email", "", "if set, displays webmaster's email on the reject page for appeals")
versionFlag = flag.Bool("version", false, "print Anubis version") versionFlag = flag.Bool("version", false, "print Anubis version")
publicUrl = flag.String("public-url", "", "the externally accessible URL for this Anubis instance, used for constructing redirect URLs (e.g., for forwardAuth).")
xffStripPrivate = flag.Bool("xff-strip-private", true, "if set, strip private addresses from X-Forwarded-For") xffStripPrivate = flag.Bool("xff-strip-private", true, "if set, strip private addresses from X-Forwarded-For")
thothInsecure = flag.Bool("thoth-insecure", false, "if set, connect to Thoth over plain HTTP/2, don't enable this unless support told you to") thothInsecure = flag.Bool("thoth-insecure", false, "if set, connect to Thoth over plain HTTP/2, don't enable this unless support told you to")
@ -412,6 +413,7 @@ func main() {
WebmasterEmail: *webmasterEmail, WebmasterEmail: *webmasterEmail,
OpenGraph: policy.OpenGraph, OpenGraph: policy.OpenGraph,
CookieSecure: *cookieSecure, CookieSecure: *cookieSecure,
PublicUrl: *publicUrl,
}) })
if err != nil { if err != nil {
log.Fatalf("can't construct libanubis.Server: %v", err) log.Fatalf("can't construct libanubis.Server: %v", err)
@ -440,6 +442,7 @@ func main() {
"base-prefix", *basePrefix, "base-prefix", *basePrefix,
"cookie-expiration-time", *cookieExpiration, "cookie-expiration-time", *cookieExpiration,
"rule-error-ids", ruleErrorIDs, "rule-error-ids", ruleErrorIDs,
"public-url", *publicUrl,
) )
go func() { go func() {

View File

@ -232,6 +232,7 @@ And some cleanups/refactors were added:
- Bump AI-robots.txt to version 1.37 - Bump AI-robots.txt to version 1.37
- Make progress bar styling more compatible (UXP, etc) - Make progress bar styling more compatible (UXP, etc)
- Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers - Add `--strip-base-prefix` flag/envvar to strip the base prefix from request paths when forwarding to target servers
- Added support to use Traefik forwardAuth middleware
- Fix an off-by-one in the default threshold config - Fix an off-by-one in the default threshold config
- Add functionality for HS512 JWT algorithm - Add functionality for HS512 JWT algorithm
- Add support for dynamic cookie domains with the `--cookie-dynamic-domain`/`COOKIE_DYNAMIC_DOMAIN` flag/envvar - Add support for dynamic cookie domains with the `--cookie-dynamic-domain`/`COOKIE_DYNAMIC_DOMAIN` flag/envvar

View File

@ -10,10 +10,6 @@ but it also applies to docker cli options.
::: :::
Currently, Anubis doesn't have any Traefik middleware,
so you need to manually route it between Traefik and your target service.
This routing is done per labels in Traefik.
In this example, we will use 4 Containers: In this example, we will use 4 Containers:
- `traefik` - the Traefik instance - `traefik` - the Traefik instance
@ -21,12 +17,6 @@ In this example, we will use 4 Containers:
- `target` - our service to protect (`traefik/whoami` in this case) - `target` - our service to protect (`traefik/whoami` in this case)
- `target2` - a second service that isn't supposed to be protected (`traefik/whoami` in this case) - `target2` - a second service that isn't supposed to be protected (`traefik/whoami` in this case)
There are 3 steps we need to follow:
1. Create a new exclusive Traefik endpoint for Anubis
2. Pass all unspecified requests to Anubis
3. Let Anubis pass all verified requests back to Traefik on its exclusive endpoint
## Diagram of Flow ## Diagram of Flow
This is a small diagram depicting the flow. This is a small diagram depicting the flow.
@ -40,74 +30,16 @@ anubis[Anubis]
target[Target] target[Target]
user-->|:443 - Requesting Service|traefik user-->|:443 - Requesting Service|traefik
traefik-->|:8080 - Passing to Anubis|anubis traefik-->|:8080 - Check authorization to Anubis|anubis
anubis-->|:3923 - Passing back to Traefik|traefik anubis-->|redirect if failed|traefik
user-->|:8080 - make the challenge|traefik
anubis-->|redirect back to target|traefik
traefik-->|:80 - Passing to the target|target traefik-->|:80 - Passing to the target|target
``` ```
## Create an Exclusive Anubis Endpoint in Traefik
There are 2 ways of registering a new endpoint in Traefik.
Which one to use depends on how you configured your Traefik so far.
**CLI Options:**
```yml
--entrypoints.anubis.address=:3923
```
**traefik.yml:**
```yml
entryPoints:
anubis:
address: ":3923"
```
It is important that the specified port isn't actually reachable from the outside,
but only exposed in the Docker network.
Exposing the Anubis port on Traefik directly will allow direct unprotected access to all containers behind it.
## Passing all unspecified Web Requests to Anubis
There are cases where you want Traefik to still route some requests without protection, just like before.
To achieve this, we can register Anubis as the default handler for non-protected requests.
We also don't want users to get SSL Errors during the checking phase,
thus we also need to let Traefik provide SSL Certs for our endpoint.
This example expects an TLS cert resolver called `le`.
We also expect there to be an endpoint called `websecure` for HTTPS in this example.
This is an example of the required labels to configure Traefik on the Anubis container:
```yml
labels:
- traefik.enable=true # Enabling Traefik
- traefik.docker.network=traefik # Telling Traefik which network to use
- traefik.http.routers.anubis.priority=1 # Setting Anubis to the lowest priority, so it only takes the slack
- traefik.http.routers.anubis.rule=PathRegexp(`.*`) # Wildcard match every path
- traefik.http.routers.anubis.entrypoints=websecure # Listen on HTTPS
- traefik.http.services.anubis.loadbalancer.server.port=8080 # Telling Traefik to which port it should route requests
- traefik.http.routers.anubis.service=anubis # Telling Traefik to use the above specified port
- traefik.http.routers.anubis.tls.certresolver=le # Telling Traefik to resolve a Cert for Anubis
```
## Passing all Verified Requests Back Correctly to Traefik
To pass verified requests back to Traefik,
we only need to configure Anubis using its environment variables:
```yml
environment:
- BIND=:8080
- TARGET=http://traefik:3923
```
## Full Example Config ## Full Example Config
Now that we know how to pass all requests back and forth, here is the example. This example contains 3 services: anubis, one that is protected and the other one that is not.
This example contains 2 services: one that is protected and the other one that is not.
**compose.yml** **compose.yml**
@ -128,6 +60,8 @@ services:
# Enable Traefik # Enable Traefik
- traefik.enable=true - traefik.enable=true
- traefik.docker.network=traefik - traefik.docker.network=traefik
# Anubis middleware
- traefik.http.middlewares.anubis.forwardauth.address=http://anubis:8080/.within.website/x/cmd/anubis/api/check
# Redirect any HTTP to HTTPS # Redirect any HTTP to HTTPS
- traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https - traefik.http.middlewares.redirect-to-https.redirectscheme.scheme=https
- traefik.http.routers.web.rule=PathPrefix(`/`) - traefik.http.routers.web.rule=PathPrefix(`/`)
@ -140,17 +74,22 @@ services:
environment: environment:
# Telling Anubis, where to listen for Traefik # Telling Anubis, where to listen for Traefik
- BIND=:8080 - BIND=:8080
# Telling Anubis to point to Traefik via the Docker network # Telling Anubis to do redirect — ensure there is a space after '='
- TARGET=http://traefik:3923 - TARGET=
# Specifies which domains Anubis is allowed to redirect to.
- REDIRECT_DOMAINS=example.com
# Should be the full external URL for Anubis (including scheme)
- PUBLIC_URL=https://anubis.example.com
# Should match your domain for proper cookie scoping
- COOKIE_DOMAIN=example.com
networks: networks:
- traefik - traefik
labels: labels:
- traefik.enable=true # Enabling Traefik - traefik.enable=true # Enabling Traefik
- traefik.docker.network=traefik # Telling Traefik which network to use - traefik.docker.network=traefik # Telling Traefik which network to use
- traefik.http.routers.anubis.priority=1 # Setting Anubis to the lowest priority, so it only takes the slack - traefik.http.routers.anubis.rule=Host(`anubis.example.com`) # Only Matching Requests for example.com
- traefik.http.routers.anubis.rule=PathRegexp(`.*`) # wildcard match anything
- traefik.http.routers.anubis.entrypoints=websecure # Listen on HTTPS - traefik.http.routers.anubis.entrypoints=websecure # Listen on HTTPS
- traefik.http.services.anubis.loadbalancer.server.port=8080 # Telling Traefik to which port it should route requests - traefik.http.services.anubis.loadbalancer.server.port=8080 # Telling Traefik where to receive requests
- traefik.http.routers.anubis.service=anubis # Telling Traefik to use the above specified port - traefik.http.routers.anubis.service=anubis # Telling Traefik to use the above specified port
- traefik.http.routers.anubis.tls.certresolver=le # Telling Traefik to resolve a Cert for Anubis - traefik.http.routers.anubis.tls.certresolver=le # Telling Traefik to resolve a Cert for Anubis
@ -163,9 +102,11 @@ services:
- traefik.enable=true # Enabling Traefik - traefik.enable=true # Enabling Traefik
- traefik.docker.network=traefik # Telling Traefik which network to use - traefik.docker.network=traefik # Telling Traefik which network to use
- traefik.http.routers.target.rule=Host(`example.com`) # Only Matching Requests for example.com - traefik.http.routers.target.rule=Host(`example.com`) # Only Matching Requests for example.com
- traefik.http.routers.target.entrypoints=anubis # Listening on the exclusive Anubis Network - traefik.http.routers.target.entrypoints=websecure # Listening on the exclusive Anubis Network
- traefik.http.services.target.loadbalancer.server.port=80 # Telling Traefik where to receive requests - traefik.http.services.target.loadbalancer.server.port=80 # Telling Traefik where to receive requests
- traefik.http.routers.target.service=target # Telling Traefik to use the above specified port - traefik.http.routers.target.service=target # Telling Traefik to use the above specified port
- traefik.http.routers.target.tls.certresolver=le # Telling Traefik to resolve a Cert for Anubis
- traefik.http.routers.target.middlewares=anubis@docker # Use the Anubis middleware
# Not Protected by Anubis # Not Protected by Anubis
target2: target2:
@ -175,7 +116,7 @@ services:
labels: labels:
- traefik.enable=true # Enabling Traefik - traefik.enable=true # Enabling Traefik
- traefik.docker.network=traefik # Telling Traefik which network to use - traefik.docker.network=traefik # Telling Traefik which network to use
- traefik.http.routers.target2.rule=Host(`another.com`) # Only Matching Requests for example.com - traefik.http.routers.target2.rule=Host(`another.example.com`) # Only Matching Requests for example.com
- traefik.http.routers.target2.entrypoints=websecure # Listening on the exclusive Anubis Network - traefik.http.routers.target2.entrypoints=websecure # Listening on the exclusive Anubis Network
- traefik.http.services.target2.loadbalancer.server.port=80 # Telling Traefik where to receive requests - traefik.http.services.target2.loadbalancer.server.port=80 # Telling Traefik where to receive requests
- traefik.http.routers.target2.service=target2 # Telling Traefik to use the above specified port - traefik.http.routers.target2.service=target2 # Telling Traefik to use the above specified port
@ -198,9 +139,6 @@ entryPoints:
address: ":80" address: ":80"
websecure: websecure:
address: ":443" address: ":443"
# Anubis
anubis:
address: ":3923"
certificatesResolvers: certificatesResolvers:
le: le:

View File

@ -77,6 +77,7 @@ Anubis uses these environment variables for configuration:
| `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. | | `OG_PASSTHROUGH` | `false` | If set to `true`, Anubis will enable Open Graph tag passthrough. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
| `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. | | `OG_CACHE_CONSIDER_HOST` | `false` | If set to `true`, Anubis will consider the host in the Open Graph tag cache key. Prefer using [the policy file](./configuration/open-graph.mdx) to configure the Open Graph subsystem. |
| `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. | | `POLICY_FNAME` | unset | The file containing [bot policy configuration](./policies.mdx). See the bot policy documentation for more details. If unset, the default bot policy configuration is used. |
| `PUBLIC_URL` | unset | The externally accessible URL for this Anubis instance, used for constructing redirect URLs (e.g., for Traefik forwardAuth). |
| `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. | | `REDIRECT_DOMAINS` | unset | If set, restrict the domains that Anubis can redirect to when passing a challenge.<br/><br/>If this is unset, Anubis may redirect to any domain which could cause security issues in the unlikely case that an attacker passes a challenge for your browser and then tricks you into clicking a link to your domain.<br/><br/>Note that if you are hosting Anubis on a non-standard port (`https://example:com:8443`, `http://www.example.net:8080`, etc.), you must also include the port number here. |
| `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. | | `SERVE_ROBOTS_TXT` | `false` | If set `true`, Anubis will serve a default `robots.txt` file that disallows all known AI scrapers by name and then additionally disallows every scraper. This is useful if facts and circumstances make it difficult to change the underlying service to serve such a `robots.txt` file. |
| `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. | | `SOCKET_MODE` | `0770` | _Only used when at least one of the `*_BIND_NETWORK` variables are set to `unix`._ The socket mode (permissions) for Unix domain sockets. |

View File

@ -153,7 +153,7 @@ func computeXFFHeader(remoteAddr string, origXFFHeader string, pref XFFComputePr
// generally they'd be expected to do these two things on // generally they'd be expected to do these two things on
// their own end to find the first non-spoofed IP // their own end to find the first non-spoofed IP
for i := len(origForwardedList) - 1; i >= 0; i-- { for i := len(origForwardedList) - 1; i >= 0; i-- {
segmentIP, err := netip.ParseAddr(origForwardedList[i]) segmentIP, err := netip.ParseAddr(strings.TrimSpace(origForwardedList[i]))
if err != nil { if err != nil {
// can't assess this element, so the remainder of the chain // can't assess this element, so the remainder of the chain
// can't be trusted. not a fatal error, since anyone can // can't be trusted. not a fatal error, since anyone can

View File

@ -43,6 +43,7 @@ type Options struct {
OpenGraph config.OpenGraph OpenGraph config.OpenGraph
ServeRobotsTXT bool ServeRobotsTXT bool
CookieSecure bool CookieSecure bool
PublicUrl string
} }
func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty int) (*policy.ParsedConfig, error) { func LoadPoliciesOrDefault(ctx context.Context, fname string, defaultDifficulty int) (*policy.ParsedConfig, error) {

View File

@ -1,9 +1,11 @@
package lib package lib
import ( import (
"errors"
"fmt" "fmt"
"math/rand" "math/rand"
"net/http" "net/http"
"net/url"
"regexp" "regexp"
"slices" "slices"
"strings" "strings"
@ -115,8 +117,17 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, cr policy.C
localizer := localization.GetLocalizer(r) localizer := localization.GetLocalizer(r)
if returnHTTPStatusOnly { if returnHTTPStatusOnly {
w.WriteHeader(http.StatusUnauthorized) if s.opts.PublicUrl == "" {
w.Write([]byte(localizer.T("authorization_required"))) w.WriteHeader(http.StatusUnauthorized)
w.Write([]byte(localizer.T("authorization_required")))
} else {
redirectURL, err := s.constructRedirectURL(r)
if err != nil {
s.respondWithStatus(w, r, err.Error(), http.StatusBadRequest)
return
}
http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
}
return return
} }
@ -186,6 +197,24 @@ func (s *Server) RenderIndex(w http.ResponseWriter, r *http.Request, cr policy.C
handler.ServeHTTP(w, r) handler.ServeHTTP(w, r)
} }
func (s *Server) constructRedirectURL(r *http.Request) (string, error) {
proto := r.Header.Get("X-Forwarded-Proto")
host := r.Header.Get("X-Forwarded-Host")
uri := r.Header.Get("X-Forwarded-Uri")
if proto == "" || host == "" || uri == "" {
return "", errors.New("missing required X-Forwarded-* headers")
}
// Check if host is allowed in RedirectDomains
if len(s.opts.RedirectDomains) > 0 && !slices.Contains(s.opts.RedirectDomains, host) {
return "", errors.New("redirect domain not allowed")
}
redir := proto + "://" + host + uri
escapedURL := url.QueryEscape(redir)
return fmt.Sprintf("%s/.within.website/?redir=%s", s.opts.PublicUrl, escapedURL), nil
}
func (s *Server) RenderBench(w http.ResponseWriter, r *http.Request) { func (s *Server) RenderBench(w http.ResponseWriter, r *http.Request) {
localizer := localization.GetLocalizer(r) localizer := localization.GetLocalizer(r)
@ -245,7 +274,12 @@ func (s *Server) ServeHTTPNext(w http.ResponseWriter, r *http.Request) {
return return
} }
if (len(urlParsed.Host) > 0 && len(s.opts.RedirectDomains) != 0 && !slices.Contains(s.opts.RedirectDomains, urlParsed.Host)) || urlParsed.Host != r.URL.Host { hostNotAllowed := len(urlParsed.Host) > 0 &&
len(s.opts.RedirectDomains) != 0 &&
!slices.Contains(s.opts.RedirectDomains, urlParsed.Host)
hostMismatch := r.URL.Host != "" && urlParsed.Host != r.URL.Host
if hostNotAllowed || hostMismatch {
s.respondWithStatus(w, r, localizer.T("redirect_domain_not_allowed"), http.StatusBadRequest) s.respondWithStatus(w, r, localizer.T("redirect_domain_not_allowed"), http.StatusBadRequest)
return return
} }

View File

@ -1,7 +1,9 @@
package lib package lib
import ( import (
"net/http"
"net/http/httptest" "net/http/httptest"
"net/url"
"testing" "testing"
"github.com/TecharoHQ/anubis" "github.com/TecharoHQ/anubis"
@ -129,3 +131,62 @@ func TestClearCookieWithDynamicDomain(t *testing.T) {
t.Errorf("wanted cookie max age of -1, got: %d", ckie.MaxAge) t.Errorf("wanted cookie max age of -1, got: %d", ckie.MaxAge)
} }
} }
func TestRenderIndexRedirect(t *testing.T) {
s := &Server{
opts: Options{
PublicUrl: "https://anubis.example.com",
},
}
req := httptest.NewRequest("GET", "/", nil)
req.Header.Set("X-Forwarded-Proto", "https")
req.Header.Set("X-Forwarded-Host", "example.com")
req.Header.Set("X-Forwarded-Uri", "/foo")
rr := httptest.NewRecorder()
s.RenderIndex(rr, req, nil, true)
if rr.Code != http.StatusTemporaryRedirect {
t.Errorf("expected status %d, got %d", http.StatusTemporaryRedirect, rr.Code)
}
location := rr.Header().Get("Location")
parsedURL, err := url.Parse(location)
if err != nil {
t.Fatalf("failed to parse location URL %q: %v", location, err)
}
scheme := "https"
if parsedURL.Scheme != scheme {
t.Errorf("expected scheme to be %q, got %q", scheme, parsedURL.Scheme)
}
host := "anubis.example.com"
if parsedURL.Host != host {
t.Errorf("expected url to be %q, got %q", host, parsedURL.Host)
}
redir := parsedURL.Query().Get("redir")
expectedRedir := "https://example.com/foo"
if redir != expectedRedir {
t.Errorf("expected redir param to be %q, got %q", expectedRedir, redir)
}
}
func TestRenderIndexUnauthorized(t *testing.T) {
s := &Server{
opts: Options{
PublicUrl: "",
},
}
req := httptest.NewRequest("GET", "/", nil)
rr := httptest.NewRecorder()
s.RenderIndex(rr, req, nil, true)
if rr.Code != http.StatusUnauthorized {
t.Errorf("expected status %d, got %d", http.StatusUnauthorized, rr.Code)
}
if body := rr.Body.String(); body != "Authorization required" {
t.Errorf("expected body %q, got %q", "Authorization required", body)
}
}