diff --git a/cmd/anubis/main.go b/cmd/anubis/main.go index 57ab4a1..ce77e7e 100644 --- a/cmd/anubis/main.go +++ b/cmd/anubis/main.go @@ -231,20 +231,6 @@ func makeReverseProxy(target string, targetSNI string, targetHost string, insecu return rp, nil } -func startDecayMapCleanup(ctx context.Context, s *libanubis.Server) { - ticker := time.NewTicker(1 * time.Hour) - defer ticker.Stop() - - for { - select { - case <-ticker.C: - s.CleanupDecayMap() - case <-ctx.Done(): - return - } - } -} - func main() { flagenv.Parse() flag.Parse() @@ -421,7 +407,6 @@ func main() { wg.Add(1) go metricsServer(ctx, wg.Done) } - go startDecayMapCleanup(ctx, s) var h http.Handler h = s diff --git a/docs/docs/CHANGELOG.md b/docs/docs/CHANGELOG.md index 7138b80..df1224b 100644 --- a/docs/docs/CHANGELOG.md +++ b/docs/docs/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove the "Success" interstitial after a proof of work challenge is concluded. - Anubis now has the concept of [storage backends](./admin/policies.mdx#storage-backends). These allow you to change how Anubis stores temporary data (in memory, on the disk, or in Valkey). If you run Anubis in an environment where you have a low amount of memory available for Anubis (eg: less than 64 megabytes), be sure to configure the [`bbolt`](./admin/policies.mdx#bbolt) storage backend. - The challenge issuance and validation process has been rewritten from scratch. Instead of generating challenge strings from request metadata (under the assumption that the values being compared against are stable), Anubis now generates random data for each challenge. This data is stored in the active [storage backend](./admin/policies.mdx#storage-backends) for up to 30 minutes. Fixes [#564](https://github.com/TecharoHQ/anubis/issues/564), [#746](https://github.com/TecharoHQ/anubis/issues/746), and other similar instances of this issue. +- Make the [Open Graph](./admin/configuration/open-graph.mdx) subsystem and DNSBL subsystem use [storage backends](./admin/policies.mdx#storage-backends) instead of storing everything in memory by default. - Add option for forcing a specific language ([#742](https://github.com/TecharoHQ/anubis/pull/742)) - Add translation for Turkish language ([#751](https://github.com/TecharoHQ/anubis/pull/751)) - Allow [Common Crawl](https://commoncrawl.org/) by default so scrapers have less incentive to scrape diff --git a/internal/ogtags/cache.go b/internal/ogtags/cache.go index b96da15..40b7444 100644 --- a/internal/ogtags/cache.go +++ b/internal/ogtags/cache.go @@ -1,6 +1,7 @@ package ogtags import ( + "context" "errors" "log/slog" "net/url" @@ -8,7 +9,7 @@ import ( ) // GetOGTags is the main function that retrieves Open Graph tags for a URL -func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]string, error) { +func (c *OGTagCache) GetOGTags(ctx context.Context, url *url.URL, originalHost string) (map[string]string, error) { if url == nil { return nil, errors.New("nil URL provided, cannot fetch OG tags") } @@ -21,12 +22,12 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st cacheKey := c.generateCacheKey(target, originalHost) // Check cache first - if cachedTags := c.checkCache(cacheKey); cachedTags != nil { + if cachedTags := c.checkCache(ctx, cacheKey); cachedTags != nil { return cachedTags, nil } // Fetch HTML content, passing the original host - doc, err := c.fetchHTMLDocumentWithCache(target, originalHost, cacheKey) + doc, err := c.fetchHTMLDocumentWithCache(ctx, target, originalHost, cacheKey) if errors.Is(err, syscall.ECONNREFUSED) { slog.Debug("Connection refused, returning empty tags") return nil, nil @@ -42,7 +43,7 @@ func (c *OGTagCache) GetOGTags(url *url.URL, originalHost string) (map[string]st ogTags := c.extractOGTags(doc) // Store in cache - c.cache.Set(cacheKey, ogTags, c.ogTimeToLive) + c.cache.Set(ctx, cacheKey, ogTags, c.ogTimeToLive) return ogTags, nil } @@ -59,8 +60,8 @@ func (c *OGTagCache) generateCacheKey(target string, originalHost string) string } // checkCache checks if we have the tags cached and returns them if so -func (c *OGTagCache) checkCache(cacheKey string) map[string]string { - if cachedTags, ok := c.cache.Get(cacheKey); ok { +func (c *OGTagCache) checkCache(ctx context.Context, cacheKey string) map[string]string { + if cachedTags, err := c.cache.Get(ctx, cacheKey); err == nil { slog.Debug("cache hit", "tags", cachedTags) return cachedTags } diff --git a/internal/ogtags/cache_test.go b/internal/ogtags/cache_test.go index 9b7e898..7efd497 100644 --- a/internal/ogtags/cache_test.go +++ b/internal/ogtags/cache_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/store/memory" ) func TestCacheReturnsDefault(t *testing.T) { @@ -21,14 +22,14 @@ func TestCacheReturnsDefault(t *testing.T) { TimeToLive: time.Minute, ConsiderHost: false, Override: want, - }) + }, memory.New(t.Context())) u, err := url.Parse("https://anubis.techaro.lol") if err != nil { t.Fatal(err) } - result, err := cache.GetOGTags(u, "anubis.techaro.lol") + result, err := cache.GetOGTags(t.Context(), u, "anubis.techaro.lol") if err != nil { t.Fatal(err) } @@ -49,7 +50,7 @@ func TestCheckCache(t *testing.T) { Enabled: true, TimeToLive: time.Minute, ConsiderHost: false, - }) + }, memory.New(t.Context())) // Set up test data urlStr := "http://example.com/page" @@ -60,16 +61,16 @@ func TestCheckCache(t *testing.T) { cacheKey := cache.generateCacheKey(urlStr, "example.com") // Test cache miss - tags := cache.checkCache(cacheKey) + tags := cache.checkCache(t.Context(), cacheKey) if tags != nil { t.Errorf("expected nil tags on cache miss, got %v", tags) } // Manually add to cache - cache.cache.Set(cacheKey, expectedTags, time.Minute) + cache.cache.Set(t.Context(), cacheKey, expectedTags, time.Minute) // Test cache hit - tags = cache.checkCache(cacheKey) + tags = cache.checkCache(t.Context(), cacheKey) if tags == nil { t.Fatal("expected non-nil tags on cache hit, got nil") } @@ -112,7 +113,7 @@ func TestGetOGTags(t *testing.T) { Enabled: true, TimeToLive: time.Minute, ConsiderHost: false, - }) + }, memory.New(t.Context())) // Parse the test server URL parsedURL, err := url.Parse(ts.URL) @@ -122,7 +123,7 @@ func TestGetOGTags(t *testing.T) { // Test fetching OG tags from the test server // Pass the host from the parsed test server URL - ogTags, err := cache.GetOGTags(parsedURL, parsedURL.Host) + ogTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host) if err != nil { t.Fatalf("failed to get OG tags: %v", err) } @@ -142,14 +143,14 @@ func TestGetOGTags(t *testing.T) { // Test fetching OG tags from the cache // Pass the host from the parsed test server URL - ogTags, err = cache.GetOGTags(parsedURL, parsedURL.Host) + ogTags, err = cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host) if err != nil { t.Fatalf("failed to get OG tags from cache: %v", err) } // Test fetching OG tags from the cache (3rd time) // Pass the host from the parsed test server URL - newOgTags, err := cache.GetOGTags(parsedURL, parsedURL.Host) + newOgTags, err := cache.GetOGTags(t.Context(), parsedURL, parsedURL.Host) if err != nil { t.Fatalf("failed to get OG tags from cache: %v", err) } @@ -263,10 +264,10 @@ func TestGetOGTagsWithHostConsideration(t *testing.T) { Enabled: true, TimeToLive: time.Minute, ConsiderHost: tc.ogCacheConsiderHost, - }) + }, memory.New(t.Context())) for i, req := range tc.requests { - ogTags, err := cache.GetOGTags(parsedURL, req.host) + ogTags, err := cache.GetOGTags(t.Context(), parsedURL, req.host) if err != nil { t.Errorf("Request %d (host: %s): unexpected error: %v", i+1, req.host, err) continue // Skip further checks for this request if error occurred diff --git a/internal/ogtags/fetch.go b/internal/ogtags/fetch.go index bd8276e..26a0af2 100644 --- a/internal/ogtags/fetch.go +++ b/internal/ogtags/fetch.go @@ -20,8 +20,8 @@ var ( // fetchHTMLDocumentWithCache fetches the HTML document from the given URL string, // preserving the original host header. -func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost string, cacheKey string) (*html.Node, error) { - req, err := http.NewRequestWithContext(context.Background(), "GET", urlStr, nil) +func (c *OGTagCache) fetchHTMLDocumentWithCache(ctx context.Context, urlStr string, originalHost string, cacheKey string) (*html.Node, error) { + req, err := http.NewRequestWithContext(ctx, "GET", urlStr, nil) if err != nil { return nil, fmt.Errorf("failed to create http request: %w", err) } @@ -41,7 +41,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri var netErr net.Error if errors.As(err, &netErr) && netErr.Timeout() { slog.Debug("og: request timed out", "url", urlStr) - c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server + c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive/2) // Cache empty result for half the TTL to not spam the server } return nil, fmt.Errorf("http get failed: %w", err) } @@ -56,7 +56,7 @@ func (c *OGTagCache) fetchHTMLDocumentWithCache(urlStr string, originalHost stri if resp.StatusCode != http.StatusOK { slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode) - c.cache.Set(cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes + c.cache.Set(ctx, cacheKey, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes return nil, fmt.Errorf("%w: page not found", ErrOgHandled) } diff --git a/internal/ogtags/fetch_test.go b/internal/ogtags/fetch_test.go index 84048f1..c986272 100644 --- a/internal/ogtags/fetch_test.go +++ b/internal/ogtags/fetch_test.go @@ -1,6 +1,7 @@ package ogtags import ( + "context" "fmt" "io" "net/http" @@ -11,6 +12,7 @@ import ( "time" "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/store/memory" "golang.org/x/net/html" ) @@ -85,8 +87,8 @@ func TestFetchHTMLDocument(t *testing.T) { Enabled: true, TimeToLive: time.Minute, ConsiderHost: false, - }) - doc, err := cache.fetchHTMLDocument(ts.URL, "anything") + }, memory.New(t.Context())) + doc, err := cache.fetchHTMLDocument(t.Context(), ts.URL, "anything") if tt.expectError { if err == nil { @@ -116,9 +118,9 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) { Enabled: true, TimeToLive: time.Minute, ConsiderHost: false, - }) + }, memory.New(t.Context())) - doc, err := cache.fetchHTMLDocument("http://invalid.url.that.doesnt.exist.example", "anything") + doc, err := cache.fetchHTMLDocument(t.Context(), "http://invalid.url.that.doesnt.exist.example", "anything") if err == nil { t.Error("expected error for invalid URL, got nil") @@ -130,7 +132,7 @@ func TestFetchHTMLDocumentInvalidURL(t *testing.T) { } // fetchHTMLDocument allows you to call fetchHTMLDocumentWithCache without a duplicate generateCacheKey call -func (c *OGTagCache) fetchHTMLDocument(urlStr string, originalHost string) (*html.Node, error) { +func (c *OGTagCache) fetchHTMLDocument(ctx context.Context, urlStr string, originalHost string) (*html.Node, error) { cacheKey := c.generateCacheKey(urlStr, originalHost) - return c.fetchHTMLDocumentWithCache(urlStr, originalHost, cacheKey) + return c.fetchHTMLDocumentWithCache(ctx, urlStr, originalHost, cacheKey) } diff --git a/internal/ogtags/integration_test.go b/internal/ogtags/integration_test.go index 9c9128f..574172d 100644 --- a/internal/ogtags/integration_test.go +++ b/internal/ogtags/integration_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/store/memory" ) func TestIntegrationGetOGTags(t *testing.T) { @@ -110,7 +111,7 @@ func TestIntegrationGetOGTags(t *testing.T) { Enabled: true, TimeToLive: time.Minute, ConsiderHost: false, - }) + }, memory.New(t.Context())) // Create URL for test testURL, _ := url.Parse(ts.URL) @@ -119,7 +120,7 @@ func TestIntegrationGetOGTags(t *testing.T) { // Get OG tags // Pass the host from the test URL - ogTags, err := cache.GetOGTags(testURL, testURL.Host) + ogTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host) // Check error expectation if tc.expectError { @@ -147,7 +148,7 @@ func TestIntegrationGetOGTags(t *testing.T) { // Test cache retrieval // Pass the host from the test URL - cachedOGTags, err := cache.GetOGTags(testURL, testURL.Host) + cachedOGTags, err := cache.GetOGTags(t.Context(), testURL, testURL.Host) if err != nil { t.Fatalf("failed to get OG tags from cache: %v", err) } diff --git a/internal/ogtags/mem_test.go b/internal/ogtags/mem_test.go index 9de2fd0..b415cda 100644 --- a/internal/ogtags/mem_test.go +++ b/internal/ogtags/mem_test.go @@ -7,6 +7,7 @@ import ( "testing" "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/store/memory" "golang.org/x/net/html" ) @@ -30,7 +31,7 @@ func BenchmarkGetTarget(b *testing.B) { for _, tt := range tests { b.Run(tt.name, func(b *testing.B) { - cache := NewOGTagCache(tt.target, config.OpenGraph{}) + cache := NewOGTagCache(tt.target, config.OpenGraph{}, memory.New(b.Context())) urls := make([]*url.URL, len(tt.paths)) for i, path := range tt.paths { u, _ := url.Parse(path) @@ -66,7 +67,7 @@ func BenchmarkExtractOGTags(b *testing.B) {
Content