fix(fetch): improve error handling for Content-Type parsing (#253)

* fix(fetch): improve error handling for Content-Type parsing

Signed-off-by: Jason Cameron <git@jasoncameron.dev>

* fix(fetch): rename OgHandledError to ErrOgHandled for statichcheck to like me

Signed-off-by: Jason Cameron <git@jasoncameron.dev>

---------

Signed-off-by: Jason Cameron <git@jasoncameron.dev>
This commit is contained in:
Jason Cameron 2025-04-13 15:59:58 -04:00 committed by GitHub
parent 3438595f32
commit 9865e3ded8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 15 additions and 9 deletions

View File

@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Whitelisted [DuckDuckBot](https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/) in botPolicies
- Improvements to build scripts to make them less independent of the build host
- Improved the OpenGraph error logging
## v1.16.0

View File

@ -23,8 +23,8 @@ func (c *OGTagCache) GetOGTags(url *url.URL) (map[string]string, error) {
if errors.Is(err, syscall.ECONNREFUSED) {
slog.Debug("Connection refused, returning empty tags")
return nil, nil
} else if errors.Is(err, ErrNotFound) {
// not even worth a debug log...
} else if errors.Is(err, ErrOgHandled) {
// Error was handled in fetchHTMLDocument, return empty tags
return nil, nil
}
if err != nil {

View File

@ -11,8 +11,8 @@ import (
)
var (
ErrNotFound = errors.New("page not found") /*todo: refactor into common errors lib? */
emptyMap = map[string]string{} // used to indicate an empty result in the cache. Can't use nil as it would be a cache miss.
ErrOgHandled = errors.New("og: handled error") // used to indicate that the error was handled and should not be logged
emptyMap = map[string]string{} // used to indicate an empty result in the cache. Can't use nil as it would be a cache miss.
)
func (c *OGTagCache) fetchHTMLDocument(urlStr string) (*html.Node, error) {
@ -31,7 +31,7 @@ func (c *OGTagCache) fetchHTMLDocument(urlStr string) (*html.Node, error) {
if resp.StatusCode != http.StatusOK {
slog.Debug("og: received non-OK status code", "url", urlStr, "status", resp.StatusCode)
c.cache.Set(urlStr, emptyMap, c.ogTimeToLive) // Cache empty result for non-successful status codes
return nil, ErrNotFound
return nil, fmt.Errorf("%w: page not found", ErrOgHandled)
}
// Check content type
@ -43,11 +43,13 @@ func (c *OGTagCache) fetchHTMLDocument(urlStr string) (*html.Node, error) {
mediaType, _, err := mime.ParseMediaType(ct)
if err != nil {
// Malformed Content-Type header
return nil, fmt.Errorf("invalid Content-Type '%s': %w", ct, err)
slog.Debug("og: malformed Content-Type header", "url", urlStr, "contentType", ct)
return nil, fmt.Errorf("%w malformed Content-Type header: %w", ErrOgHandled, err)
}
if mediaType != "text/html" && mediaType != "application/xhtml+xml" {
return nil, fmt.Errorf("unsupported Content-Type: %s", mediaType)
slog.Debug("og: unsupported Content-Type", "url", urlStr, "contentType", mediaType)
return nil, fmt.Errorf("%w unsupported Content-Type: %s", ErrOgHandled, mediaType)
}
}

View File

@ -101,6 +101,9 @@ func doesNPXExist(t *testing.T) {
}
func run(t *testing.T, command string) string {
if testing.Short() {
t.Skip("skipping integration smoke testing in short mode")
}
t.Helper()
shPath, err := exec.LookPath("sh")

View File

@ -15,12 +15,12 @@ import (
func loadPolicies(t *testing.T, fname string) *policy.ParsedConfig {
t.Helper()
policy, err := LoadPoliciesOrDefault("", anubis.DefaultDifficulty)
anubisPolicy, err := LoadPoliciesOrDefault("", anubis.DefaultDifficulty)
if err != nil {
t.Fatal(err)
}
return policy
return anubisPolicy
}
func spawnAnubis(t *testing.T, opts Options) *Server {