mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-14 04:28:49 +00:00
fix(thr1): update spec to respond to feedback and evaluation against a private dataset
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
@@ -11,13 +11,13 @@ The biggest source of prior art is [FoxIO's JA4H fingerprinting method](https://
|
|||||||
The fingerprint consists of four concatenated components:
|
The fingerprint consists of four concatenated components:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
<thr1_head>_<thr1_lang>_<thr1_sec>_<thr1_all>
|
<thr1_head>_<thr1_lang>_<thr1_sec>_<thr1_ua>_<thr1_enc>
|
||||||
```
|
```
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
get20cr1004_enca-d6b272e5b_sec-a9649072c_2a347fcf7
|
get201004_enca-d6b272e5b_sec-a9649072c_2a347fcf7_zs
|
||||||
```
|
```
|
||||||
|
|
||||||
Each component is described below:
|
Each component is described below:
|
||||||
@@ -28,15 +28,14 @@ Overall request summary of method, protocol, and header counts:
|
|||||||
|
|
||||||
- First three letters of the HTTP method, lowercased (e.g. get, pos).
|
- First three letters of the HTTP method, lowercased (e.g. get, pos).
|
||||||
- HTTP protocol version formatted in two digits (`10` for HTTP/1.0, `11` for HTTP/1.1, `20` for HTTP/2, `30` for HTTP/3 etc.).
|
- HTTP protocol version formatted in two digits (`10` for HTTP/1.0, `11` for HTTP/1.1, `20` for HTTP/2, `30` for HTTP/3 etc.).
|
||||||
- Single letter indicating if the request has cookies: `c` if present, `n` if not.
|
- If present, prefer the HTTP protocol version in `X-Http-Version`.
|
||||||
- Single letter indicating Referer header presence: `r` if present, `n` if absent.
|
|
||||||
- Number of HTTP headers sent by the client, zero-padded to two digits (e.g. `10`).
|
- Number of HTTP headers sent by the client, zero-padded to two digits (e.g. `10`).
|
||||||
- Number of `Sec-*` headers sent by the client, zero-padded to two digits (e.g. `04`).
|
- Number of `Sec-*` headers sent by the client, zero-padded to two digits (e.g. `04`).
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
get20cr1004
|
get201004
|
||||||
```
|
```
|
||||||
|
|
||||||
### `thr1_lang`
|
### `thr1_lang`
|
||||||
@@ -69,7 +68,7 @@ thr1_sec = "sec-" + HASH9
|
|||||||
|
|
||||||
Where:
|
Where:
|
||||||
|
|
||||||
- Collect **all headers whose names start with `sec-` (case-insensitive)**.
|
- Collect **all headers whose names start with `sec-` (case-insensitive)**, excluding `Sec-Fetch-User`.
|
||||||
- For each header:
|
- For each header:
|
||||||
|
|
||||||
1. Normalize the header name by lowercasing.
|
1. Normalize the header name by lowercasing.
|
||||||
@@ -156,32 +155,48 @@ sec-fetch-mode:navigate
|
|||||||
ua:Chromium/123,Google Chrome/123
|
ua:Chromium/123,Google Chrome/123
|
||||||
```
|
```
|
||||||
|
|
||||||
### `thr1_all`
|
### `thr1_ua`
|
||||||
|
|
||||||
A hash of the canonicalized form of request headers.
|
SHA256 fingerprint of the `User-Agent` string, taking the first 9 hex digits.
|
||||||
|
|
||||||
To construct a `tlr1_all`:
|
|
||||||
|
|
||||||
1. Collect all header keys excluding:
|
|
||||||
|
|
||||||
- `Cookie`
|
|
||||||
- `Referer`
|
|
||||||
- `User-Agent`
|
|
||||||
- Any header starting with `X-`
|
|
||||||
|
|
||||||
2. Sort header keys by lowercase name.
|
|
||||||
3. Serialize as:
|
|
||||||
|
|
||||||
```text
|
|
||||||
name:value
|
|
||||||
```
|
|
||||||
|
|
||||||
Joined by newlines.
|
|
||||||
|
|
||||||
4. Compute the SHA-256 checksum of that string and take the first 9 hex digits.
|
|
||||||
|
|
||||||
Example output:
|
Example output:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
2a347fcf7
|
2a347fcf7
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `thr1_enc`
|
||||||
|
|
||||||
|
Here’s the updated spec and Go implementation for the `thr1_enc` (compression) component, now including:
|
||||||
|
|
||||||
|
- **Most preferred compression encoding** (`*`, `gzip`, `deflate`, `br`, `zstd`)
|
||||||
|
- **Number of encodings declared**, truncated to **two digits** (`01`–`99`, capped)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### ✅ `thr1_enc` Spec (Revised)
|
||||||
|
|
||||||
|
**Format:**
|
||||||
|
|
||||||
|
```
|
||||||
|
<preferred_encoding>-<count>
|
||||||
|
```
|
||||||
|
|
||||||
|
- `preferred_encoding` is the first matching value in this priority order:
|
||||||
|
|
||||||
|
1. `*`
|
||||||
|
2. `gzip`
|
||||||
|
3. `deflate`
|
||||||
|
4. `br`
|
||||||
|
5. `zstd`
|
||||||
|
|
||||||
|
- If none match, use `none`
|
||||||
|
- `count` is the number of encoding options, zero-padded to 2 digits (max 99)
|
||||||
|
|
||||||
|
**Examples:**
|
||||||
|
|
||||||
|
- `gzip, deflate` → `gzip-02`
|
||||||
|
- `gzip;q=0.9, br;q=0.8` → `gzip-02`
|
||||||
|
- `zstd` → `zstd-01`
|
||||||
|
- `bogus` → `none-01`
|
||||||
|
- _empty_ → `none-00`
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ import (
|
|||||||
"github.com/TecharoHQ/anubis/lib/challenge"
|
"github.com/TecharoHQ/anubis/lib/challenge"
|
||||||
"github.com/TecharoHQ/anubis/lib/policy"
|
"github.com/TecharoHQ/anubis/lib/policy"
|
||||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||||
|
"github.com/TecharoHQ/anubis/lib/thr1"
|
||||||
|
|
||||||
// challenge implementations
|
// challenge implementations
|
||||||
_ "github.com/TecharoHQ/anubis/lib/challenge/proofofwork"
|
_ "github.com/TecharoHQ/anubis/lib/challenge/proofofwork"
|
||||||
@@ -74,18 +75,13 @@ type Server struct {
|
|||||||
func (s *Server) challengeFor(r *http.Request, difficulty int) string {
|
func (s *Server) challengeFor(r *http.Request, difficulty int) string {
|
||||||
fp := sha256.Sum256(s.pub[:])
|
fp := sha256.Sum256(s.pub[:])
|
||||||
|
|
||||||
acceptLanguage := r.Header.Get("Accept-Language")
|
|
||||||
if len(acceptLanguage) > 5 {
|
|
||||||
acceptLanguage = acceptLanguage[:5]
|
|
||||||
}
|
|
||||||
|
|
||||||
challengeData := fmt.Sprintf(
|
challengeData := fmt.Sprintf(
|
||||||
"Accept-Language=%s,X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d",
|
"THR1=%s,JA4=%s,Fingerprint=%x,User-Agent=%s,WeekTime=%s,Difficulty=%d",
|
||||||
acceptLanguage,
|
thr1.Fingerprint(r),
|
||||||
r.Header.Get("X-Real-Ip"),
|
r.Header.Get("X-Tls-Fingerprint-Ja4"),
|
||||||
|
fp,
|
||||||
r.UserAgent(),
|
r.UserAgent(),
|
||||||
time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339),
|
time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339),
|
||||||
fp,
|
|
||||||
difficulty,
|
difficulty,
|
||||||
)
|
)
|
||||||
return internal.SHA256sum(challengeData)
|
return internal.SHA256sum(challengeData)
|
||||||
|
|||||||
246
lib/thr1/thr1.go
Normal file
246
lib/thr1/thr1.go
Normal file
@@ -0,0 +1,246 @@
|
|||||||
|
package thr1
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Fingerprint(r *http.Request) string {
|
||||||
|
result := strings.Join([]string{
|
||||||
|
thr1Head(r),
|
||||||
|
thr1Lang(r),
|
||||||
|
thr1Sec(r),
|
||||||
|
thr1UA(r),
|
||||||
|
thr1Encoding(r),
|
||||||
|
}, "_")
|
||||||
|
|
||||||
|
slog.Info("THR1 got", "method", r.Method, "path", r.URL.Path, "thr1", result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func thr1Head(r *http.Request) string {
|
||||||
|
method := strings.ToLower(r.Method)
|
||||||
|
if len(method) > 3 {
|
||||||
|
method = method[:3]
|
||||||
|
}
|
||||||
|
|
||||||
|
version := "00"
|
||||||
|
if override := r.Header.Get("X-Http-Version"); override != "" {
|
||||||
|
switch strings.TrimSpace(strings.ToUpper(override)) {
|
||||||
|
case "HTTP/1.0":
|
||||||
|
version = "10"
|
||||||
|
case "HTTP/1.1":
|
||||||
|
version = "11"
|
||||||
|
case "HTTP/2.0":
|
||||||
|
version = "20"
|
||||||
|
case "HTTP/3.0":
|
||||||
|
version = "30"
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch {
|
||||||
|
case r.ProtoMajor == 1 && r.ProtoMinor == 0:
|
||||||
|
version = "10"
|
||||||
|
case r.ProtoMajor == 1 && r.ProtoMinor == 1:
|
||||||
|
version = "11"
|
||||||
|
case r.ProtoMajor == 2:
|
||||||
|
version = "20"
|
||||||
|
case r.ProtoMajor == 3:
|
||||||
|
version = "30"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hasSec := false
|
||||||
|
for k := range r.Header {
|
||||||
|
if strings.HasPrefix(strings.ToLower(k), "sec-") {
|
||||||
|
hasSec = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return method + version + strconv.FormatBool(hasSec)[:2]
|
||||||
|
}
|
||||||
|
|
||||||
|
func thr1Encoding(r *http.Request) string {
|
||||||
|
raw := r.Header.Get("Accept-Encoding")
|
||||||
|
if raw == "" {
|
||||||
|
return "none-00"
|
||||||
|
}
|
||||||
|
|
||||||
|
encodings := strings.Split(raw, ",")
|
||||||
|
count := len(encodings)
|
||||||
|
if count > 99 {
|
||||||
|
count = 99
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
var available []string
|
||||||
|
for _, e := range encodings {
|
||||||
|
enc := strings.ToLower(strings.TrimSpace(strings.Split(e, ";")[0]))
|
||||||
|
if enc != "" {
|
||||||
|
if _, exists := seen[enc]; !exists {
|
||||||
|
available = append(available, enc)
|
||||||
|
seen[enc] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
priorities := map[string]int{
|
||||||
|
"zstd": 1,
|
||||||
|
"br": 2,
|
||||||
|
"deflate": 3,
|
||||||
|
"gzip": 4,
|
||||||
|
"*": 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
best := "none"
|
||||||
|
bestRank := 999 // arbitrarily high
|
||||||
|
for _, enc := range available {
|
||||||
|
if rank, ok := priorities[enc]; ok {
|
||||||
|
if rank < bestRank {
|
||||||
|
best = enc
|
||||||
|
bestRank = rank
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if best == "*" {
|
||||||
|
best = "wild"
|
||||||
|
}
|
||||||
|
|
||||||
|
return best + "-" + pad2(count)
|
||||||
|
}
|
||||||
|
|
||||||
|
func pad2(n int) string {
|
||||||
|
if n < 10 {
|
||||||
|
return "0" + strconv.Itoa(n)
|
||||||
|
}
|
||||||
|
if n > 99 {
|
||||||
|
return "99"
|
||||||
|
}
|
||||||
|
return strconv.Itoa(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func thr1Lang(r *http.Request) string {
|
||||||
|
raw := r.Header.Get("Accept-Language")
|
||||||
|
if raw == "" {
|
||||||
|
return "-000000000"
|
||||||
|
}
|
||||||
|
trimmed := first4AlphaNum(strings.ToLower(raw)) + "-"
|
||||||
|
sum := sha256.Sum256([]byte(raw))
|
||||||
|
return trimmed + hex.EncodeToString(sum[:])[:9]
|
||||||
|
}
|
||||||
|
|
||||||
|
func first4AlphaNum(s string) string {
|
||||||
|
out := make([]rune, 0, 4)
|
||||||
|
for _, ch := range s {
|
||||||
|
if len(out) == 4 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if ('a' <= ch && ch <= 'z') || ('0' <= ch && ch <= '9') {
|
||||||
|
out = append(out, ch)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for len(out) < 4 {
|
||||||
|
out = append(out, '0')
|
||||||
|
}
|
||||||
|
return string(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
func thr1Sec(r *http.Request) string {
|
||||||
|
var lines []string
|
||||||
|
for k, vs := range r.Header {
|
||||||
|
lkey := strings.ToLower(k)
|
||||||
|
if !strings.HasPrefix(lkey, "sec-") || lkey == "sec-fetch-user" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch lkey {
|
||||||
|
case "sec-ch-ua":
|
||||||
|
lines = append(lines, parseSecChUA(vs))
|
||||||
|
case "sec-ch-ua-mobile":
|
||||||
|
lines = append(lines, parseSecCHSimple("mobile", vs))
|
||||||
|
case "sec-ch-ua-platform":
|
||||||
|
lines = append(lines, parseSecCHSimple("platform", vs))
|
||||||
|
case "sec-ch-ua-platform-version":
|
||||||
|
lines = append(lines, parseSecCHSimple("platform_version", vs))
|
||||||
|
case "sec-ch-ua-model":
|
||||||
|
lines = append(lines, parseSecCHSimple("model", vs))
|
||||||
|
case "sec-ch-ua-full-version":
|
||||||
|
lines = append(lines, parseSecCHSimple("full_version", vs))
|
||||||
|
default:
|
||||||
|
for _, v := range vs {
|
||||||
|
v = strings.Trim(v, `" `)
|
||||||
|
lines = append(lines, lkey+":"+v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sort.Strings(lines)
|
||||||
|
canonical := strings.Join(lines, "\n")
|
||||||
|
sum := sha256.Sum256([]byte(canonical))
|
||||||
|
return "sec-" + hex.EncodeToString(sum[:])[:9]
|
||||||
|
}
|
||||||
|
|
||||||
|
var brandVersionRe = regexp.MustCompile(`\s*"([^"]+)";v="([^"]+)"`)
|
||||||
|
|
||||||
|
func parseSecChUA(vs []string) string {
|
||||||
|
type pair struct{ Brand, Version string }
|
||||||
|
var pairs []pair
|
||||||
|
|
||||||
|
for _, v := range vs {
|
||||||
|
for _, match := range brandVersionRe.FindAllStringSubmatch(v, -1) {
|
||||||
|
if len(match) != 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
brand := match[1]
|
||||||
|
version := match[2]
|
||||||
|
if brand == "Not=A?Brand" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pairs = append(pairs, pair{brand, version})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(pairs, func(i, j int) bool {
|
||||||
|
return pairs[i].Brand < pairs[j].Brand
|
||||||
|
})
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("ua:")
|
||||||
|
for i, p := range pairs {
|
||||||
|
if i > 0 {
|
||||||
|
sb.WriteString(",")
|
||||||
|
}
|
||||||
|
sb.WriteString(p.Brand + "/" + p.Version)
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSecCHSimple(key string, vs []string) string {
|
||||||
|
for _, v := range vs {
|
||||||
|
v = strings.Trim(v, `" `)
|
||||||
|
if key == "mobile" {
|
||||||
|
switch v {
|
||||||
|
case "?1":
|
||||||
|
return "mobile:true"
|
||||||
|
case "?0":
|
||||||
|
return "mobile:false"
|
||||||
|
default:
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return key + ":" + v
|
||||||
|
}
|
||||||
|
return key + ":"
|
||||||
|
}
|
||||||
|
|
||||||
|
func thr1UA(r *http.Request) string {
|
||||||
|
ua := r.Header.Get("User-Agent")
|
||||||
|
sum := sha256.Sum256([]byte(ua))
|
||||||
|
return hex.EncodeToString(sum[:])[:9]
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user