From 68b653b0998f03ff37fa53a3e5f7439f07617860 Mon Sep 17 00:00:00 2001
From: Xe Iaso
Date: Wed, 16 Jul 2025 20:31:18 -0400
Subject: [PATCH] feat(anubis): add /healthz route to metrics server (#843)
* feat(anubis): add /healthz route to metrics server
Also add health check test for Docker Compose and update documentation
for health checking Anubis with Docker Compose.
Signed-off-by: Xe Iaso
* chore: spelling
Signed-off-by: Xe Iaso
---------
Signed-off-by: Xe Iaso
---
.github/actions/spelling/expect.txt | 1 +
.github/workflows/smoke-tests.yml | 33 +-
cmd/anubis/main.go | 58 ++-
docs/docs/CHANGELOG.md | 1 +
.../admin/environments/docker-compose.mdx | 9 +-
docs/manifest/deployment.yaml | 25 +-
internal/health.go | 25 ++
test/git-clone/docker-compose.yaml | 4 +-
test/git-clone/test.sh | 2 +-
test/healthcheck/docker-compose.yaml | 15 +
test/healthcheck/test.sh | 32 ++
test/healthcheck/var/.gitignore | 2 +
web/index_templ.go | 413 +++++++++---------
13 files changed, 372 insertions(+), 248 deletions(-)
create mode 100644 internal/health.go
create mode 100644 test/healthcheck/docker-compose.yaml
create mode 100755 test/healthcheck/test.sh
create mode 100644 test/healthcheck/var/.gitignore
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index f2b8345..6c4bd4c 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -131,6 +131,7 @@ Hashcash
hashrate
headermap
healthcheck
+healthz
hec
hmc
hostable
diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml
index ee5f168..615083a 100644
--- a/.github/workflows/smoke-tests.yml
+++ b/.github/workflows/smoke-tests.yml
@@ -10,7 +10,13 @@ permissions:
contents: read
jobs:
- git-clone:
+ smoke-test:
+ strategy:
+ matrix:
+ test:
+ - git-clone
+ - git-push
+ - healthcheck
runs-on: ubuntu-24.04
steps:
- name: Checkout code
@@ -30,28 +36,5 @@ jobs:
- name: Run test
run: |
- cd test/git-clone
- ./test.sh
-
- git-push:
- runs-on: ubuntu-24.04
- steps:
- - name: Checkout code
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- with:
- persist-credentials: false
-
- - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
- with:
- node-version: latest
-
- - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5.5.0
- with:
- go-version: stable
-
- - uses: ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d # v0.9
-
- - name: Run test
- run: |
- cd test/git-push
+ cd test/${{ matrix.test }}
./test.sh
diff --git a/cmd/anubis/main.go b/cmd/anubis/main.go
index 500b804..9cade6a 100644
--- a/cmd/anubis/main.go
+++ b/cmd/anubis/main.go
@@ -38,6 +38,7 @@ import (
"github.com/facebookgo/flagenv"
_ "github.com/joho/godotenv/autoload"
"github.com/prometheus/client_golang/prometheus/promhttp"
+ healthv1 "google.golang.org/grpc/health/grpc_health_v1"
)
var (
@@ -97,7 +98,7 @@ func keyFromHex(value string) (ed25519.PrivateKey, error) {
}
func doHealthCheck() error {
- resp, err := http.Get("http://localhost" + *metricsBind + anubis.BasePrefix + "/metrics")
+ resp, err := http.Get("http://localhost" + *metricsBind + "/healthz")
if err != nil {
return fmt.Errorf("failed to fetch metrics: %w", err)
}
@@ -241,6 +242,15 @@ func main() {
}
internal.InitSlog(*slogLevel)
+ internal.SetHealth("anubis", healthv1.HealthCheckResponse_NOT_SERVING)
+
+ if *healthcheck {
+ log.Println("running healthcheck")
+ if err := doHealthCheck(); err != nil {
+ log.Fatal(err)
+ }
+ return
+ }
if *extractResources != "" {
if err := extractEmbedFS(data.BotPolicies, ".", *extractResources); err != nil {
@@ -253,6 +263,14 @@ func main() {
return
}
+ ctx := context.Background()
+ wg := new(sync.WaitGroup)
+
+ if *metricsBind != "" {
+ wg.Add(1)
+ go metricsServer(ctx, wg.Done)
+ }
+
var rp http.Handler
// when using anubis via Systemd and environment variables, then it is not possible to set targe to an empty string but only to space
if strings.TrimSpace(*target) != "" {
@@ -267,8 +285,6 @@ func main() {
log.Fatalf("you can't set COOKIE_DOMAIN and COOKIE_DYNAMIC_DOMAIN at the same time")
}
- ctx := context.Background()
-
// Thoth configuration
switch {
case *thothURL != "" && *thothToken == "":
@@ -398,16 +414,10 @@ func main() {
log.Fatalf("can't construct libanubis.Server: %v", err)
}
- wg := new(sync.WaitGroup)
// install signal handler
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
- if *metricsBind != "" {
- wg.Add(1)
- go metricsServer(ctx, wg.Done)
- }
-
var h http.Handler
h = s
h = internal.RemoteXRealIP(*useRemoteAddress, *bindNetwork, h)
@@ -442,6 +452,8 @@ func main() {
}
}()
+ internal.SetHealth("anubis", healthv1.HealthCheckResponse_SERVING)
+
if err := srv.Serve(listener); !errors.Is(err, http.ErrServerClosed) {
log.Fatal(err)
}
@@ -452,20 +464,30 @@ func metricsServer(ctx context.Context, done func()) {
defer done()
mux := http.NewServeMux()
- mux.Handle(anubis.BasePrefix+"/metrics", promhttp.Handler())
+ mux.Handle("/metrics", promhttp.Handler())
+ mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
+ st, ok := internal.GetHealth("anubis")
+ if !ok {
+ slog.Error("health service anubis does not exist, file a bug")
+ }
+
+ switch st {
+ case healthv1.HealthCheckResponse_NOT_SERVING:
+ http.Error(w, "NOT OK", http.StatusInternalServerError)
+ return
+ case healthv1.HealthCheckResponse_SERVING:
+ fmt.Fprintln(w, "OK")
+ return
+ default:
+ http.Error(w, "UNKNOWN", http.StatusFailedDependency)
+ return
+ }
+ })
srv := http.Server{Handler: mux, ErrorLog: internal.GetFilteredHTTPLogger()}
listener, metricsUrl := setupListener(*metricsBindNetwork, *metricsBind)
slog.Debug("listening for metrics", "url", metricsUrl)
- if *healthcheck {
- log.Println("running healthcheck")
- if err := doHealthCheck(); err != nil {
- log.Fatal(err)
- }
- return
- }
-
go func() {
<-ctx.Done()
c, cancel := context.WithTimeout(context.Background(), 5*time.Second)
diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md
index 7e0622e..a830548 100644
--- a/docs/docs/CHANGELOG.md
+++ b/docs/docs/CHANGELOG.md
@@ -103,6 +103,7 @@ There are a bunch of other assorted features and fixes too:
- The "Try again" button on the error page has been fixed. Previously it meant "try the solution again" instead of "try the challenge again".
- In certain cases, a user could be stuck with a test cookie that is invalid, locking them out of the service for up to half an hour. This has been fixed with better validation of this case and clearing the cookie.
- Start exposing JA4H fingerprints for later use in CEL expressions.
+- Add `/healthz` route for use in platform-based health checks.
### Potentially breaking changes
diff --git a/docs/docs/admin/environments/docker-compose.mdx b/docs/docs/admin/environments/docker-compose.mdx
index b9af4e8..300edf4 100644
--- a/docs/docs/admin/environments/docker-compose.mdx
+++ b/docs/docs/admin/environments/docker-compose.mdx
@@ -4,7 +4,7 @@ Docker compose is typically used in concert with other load balancers such as [A
```yaml
services:
- anubis-nginx:
+ anubis:
image: ghcr.io/techarohq/anubis:latest
environment:
BIND: ":8080"
@@ -15,10 +15,17 @@ services:
POLICY_FNAME: "/data/cfg/botPolicy.yaml"
OG_PASSTHROUGH: "true"
OG_EXPIRY_TIME: "24h"
+ healthcheck:
+ test: ["CMD", "anubis", "--healthcheck"]
+ interval: 5s
+ timeout: 30s
+ retries: 5
+ start_period: 500ms
ports:
- 8080:8080
volumes:
- "./botPolicy.yaml:/data/cfg/botPolicy.yaml:ro"
+
nginx:
image: nginx
volumes:
diff --git a/docs/manifest/deployment.yaml b/docs/manifest/deployment.yaml
index 23fd2df..a732f43 100644
--- a/docs/manifest/deployment.yaml
+++ b/docs/manifest/deployment.yaml
@@ -36,6 +36,18 @@ spec:
mountPath: /conf
ports:
- containerPort: 80
+ readinessProbe:
+ httpGet:
+ path: /
+ port: 80
+ initialDelaySeconds: 1
+ periodSeconds: 10
+ livenessProbe:
+ httpGet:
+ path: /
+ port: 80
+ initialDelaySeconds: 10
+ periodSeconds: 20
- name: anubis
image: ghcr.io/techarohq/anubis:main
imagePullPolicy: Always
@@ -80,4 +92,15 @@ spec:
type: RuntimeDefault
envFrom:
- secretRef:
- name: anubis-docs-thoth
+ readinessProbe:
+ httpGet:
+ path: /healthz
+ port: 9090
+ initialDelaySeconds: 1
+ periodSeconds: 10
+ livenessProbe:
+ httpGet:
+ path: /healthz
+ port: 9090
+ initialDelaySeconds: 10
+ periodSeconds: 20
diff --git a/internal/health.go b/internal/health.go
new file mode 100644
index 0000000..e6339c8
--- /dev/null
+++ b/internal/health.go
@@ -0,0 +1,25 @@
+package internal
+
+import (
+ "context"
+
+ "google.golang.org/grpc/health"
+ healthv1 "google.golang.org/grpc/health/grpc_health_v1"
+)
+
+var HealthSrv = health.NewServer()
+
+func SetHealth(svc string, status healthv1.HealthCheckResponse_ServingStatus) {
+ HealthSrv.SetServingStatus(svc, status)
+}
+
+func GetHealth(svc string) (healthv1.HealthCheckResponse_ServingStatus, bool) {
+ st, err := HealthSrv.Check(context.Background(), &healthv1.HealthCheckRequest{
+ Service: svc,
+ })
+ if err != nil {
+ return healthv1.HealthCheckResponse_UNKNOWN, false
+ }
+
+ return st.GetStatus(), true
+}
diff --git a/test/git-clone/docker-compose.yaml b/test/git-clone/docker-compose.yaml
index b89a33a..554bbe4 100644
--- a/test/git-clone/docker-compose.yaml
+++ b/test/git-clone/docker-compose.yaml
@@ -12,11 +12,11 @@ services:
anubis:
image: ko.local/anubis
environment:
- BIND: ":3000"
+ BIND: ":8005"
TARGET: http://cgit:80
USE_REMOTE_ADDRESS: "true"
ports:
- - 3000:3000
+ - 8005:8005
volumes:
cgit-data:
diff --git a/test/git-clone/test.sh b/test/git-clone/test.sh
index 3c7d4bc..4e05574 100755
--- a/test/git-clone/test.sh
+++ b/test/git-clone/test.sh
@@ -21,6 +21,6 @@ docker compose up -d
sleep 2
-(cd ./var/clones && git clone http://localhost:3000/status.git)
+(cd ./var/clones && git clone http://localhost:8005/status.git)
docker compose down
\ No newline at end of file
diff --git a/test/healthcheck/docker-compose.yaml b/test/healthcheck/docker-compose.yaml
new file mode 100644
index 0000000..c5e11e5
--- /dev/null
+++ b/test/healthcheck/docker-compose.yaml
@@ -0,0 +1,15 @@
+services:
+ web:
+ image: ghcr.io/xe/nginx-micro:v1.29.0
+
+ anubis:
+ image: ko.local/anubis
+ environment:
+ TARGET: http://web:80
+ USE_REMOTE_ADDRESS: "true"
+ healthcheck:
+ test: ["CMD", "anubis", "--healthcheck"]
+ interval: 5s
+ timeout: 30s
+ retries: 5
+ start_period: 500ms
diff --git a/test/healthcheck/test.sh b/test/healthcheck/test.sh
new file mode 100755
index 0000000..2e45c2e
--- /dev/null
+++ b/test/healthcheck/test.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+
+set -eo pipefail
+
+export VERSION=$GITHUB_COMMIT-test
+export KO_DOCKER_REPO=ko.local
+
+set -u
+
+(
+ cd ../.. && \
+ ko build --platform=all --base-import-paths --tags="latest" --image-user=1000 --image-annotation="" --image-label="" ./cmd/anubis -L
+)
+
+docker compose up -d
+
+attempt=1
+max_attempts=5
+delay=2
+
+while ! docker compose ps | grep healthy; do
+ if (( attempt >= max_attempts )); then
+ echo "Service did not become healthy after $max_attempts attempts."
+ exit 1
+ fi
+ echo "Waiting for healthy service... attempt $attempt"
+ sleep $delay
+ delay=$(( delay * 2 ))
+ attempt=$(( attempt + 1 ))
+done
+
+docker compose down
\ No newline at end of file
diff --git a/test/healthcheck/var/.gitignore b/test/healthcheck/var/.gitignore
new file mode 100644
index 0000000..c96a04f
--- /dev/null
+++ b/test/healthcheck/var/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
\ No newline at end of file
diff --git a/web/index_templ.go b/web/index_templ.go
index d83cbe5..7a3db05 100644
--- a/web/index_templ.go
+++ b/web/index_templ.go
@@ -164,51 +164,64 @@ func base(title string, body templ.Component, impressum *config.Impressum, chall
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 12, " Anubis from Techaro. ")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 12, " Anubis ")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var9 string
- templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("made_with"))
+ templ_7745c5c3_Var9, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("protected_from"))
if templ_7745c5c3_Err != nil {
- return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 75, Col: 40}
+ return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 73, Col: 127}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var9))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 13, ".
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 13, " Techaro. ")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var10 string
- templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("mascot_design"))
+ templ_7745c5c3_Var10, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("made_with"))
if templ_7745c5c3_Err != nil {
- return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 77, Col: 39}
+ return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 75, Col: 40}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var10))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 14, " ")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 14, ".
")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var11 string
- templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("celphase"))
+ templ_7745c5c3_Var11, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("mascot_design"))
if templ_7745c5c3_Err != nil {
- return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 77, Col: 123}
+ return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 77, Col: 39}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var11))
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, ".
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 15, " ")
+ if templ_7745c5c3_Err != nil {
+ return templ_7745c5c3_Err
+ }
+ var templ_7745c5c3_Var12 string
+ templ_7745c5c3_Var12, templ_7745c5c3_Err = templ.JoinStringErrs(localizer.T("celphase"))
+ if templ_7745c5c3_Err != nil {
+ return templ.Error{Err: templ_7745c5c3_Err, FileName: `index.templ`, Line: 77, Col: 123}
+ }
+ _, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var12))
+ if templ_7745c5c3_Err != nil {
+ return templ_7745c5c3_Err
+ }
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, ".")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
if impressum != nil {
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 16, "")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "
")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
@@ -216,25 +229,25 @@ func base(title string, body templ.Component, impressum *config.Impressum, chall
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 17, "-- Imprint
")
+ templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "\">Imprint")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
}
- templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 19, "