mirror of
https://github.com/TecharoHQ/anubis.git
synced 2026-04-11 02:58:49 +00:00
fix(thr1): update spec to respond to feedback and evaluation against a private dataset
Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
@@ -11,13 +11,13 @@ The biggest source of prior art is [FoxIO's JA4H fingerprinting method](https://
|
||||
The fingerprint consists of four concatenated components:
|
||||
|
||||
```text
|
||||
<thr1_head>_<thr1_lang>_<thr1_sec>_<thr1_all>
|
||||
<thr1_head>_<thr1_lang>_<thr1_sec>_<thr1_ua>_<thr1_enc>
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
get20cr1004_enca-d6b272e5b_sec-a9649072c_2a347fcf7
|
||||
get201004_enca-d6b272e5b_sec-a9649072c_2a347fcf7_zs
|
||||
```
|
||||
|
||||
Each component is described below:
|
||||
@@ -28,15 +28,14 @@ Overall request summary of method, protocol, and header counts:
|
||||
|
||||
- First three letters of the HTTP method, lowercased (e.g. get, pos).
|
||||
- HTTP protocol version formatted in two digits (`10` for HTTP/1.0, `11` for HTTP/1.1, `20` for HTTP/2, `30` for HTTP/3 etc.).
|
||||
- Single letter indicating if the request has cookies: `c` if present, `n` if not.
|
||||
- Single letter indicating Referer header presence: `r` if present, `n` if absent.
|
||||
- If present, prefer the HTTP protocol version in `X-Http-Version`.
|
||||
- Number of HTTP headers sent by the client, zero-padded to two digits (e.g. `10`).
|
||||
- Number of `Sec-*` headers sent by the client, zero-padded to two digits (e.g. `04`).
|
||||
|
||||
Example:
|
||||
|
||||
```text
|
||||
get20cr1004
|
||||
get201004
|
||||
```
|
||||
|
||||
### `thr1_lang`
|
||||
@@ -69,7 +68,7 @@ thr1_sec = "sec-" + HASH9
|
||||
|
||||
Where:
|
||||
|
||||
- Collect **all headers whose names start with `sec-` (case-insensitive)**.
|
||||
- Collect **all headers whose names start with `sec-` (case-insensitive)**, excluding `Sec-Fetch-User`.
|
||||
- For each header:
|
||||
|
||||
1. Normalize the header name by lowercasing.
|
||||
@@ -156,32 +155,48 @@ sec-fetch-mode:navigate
|
||||
ua:Chromium/123,Google Chrome/123
|
||||
```
|
||||
|
||||
### `thr1_all`
|
||||
### `thr1_ua`
|
||||
|
||||
A hash of the canonicalized form of request headers.
|
||||
|
||||
To construct a `tlr1_all`:
|
||||
|
||||
1. Collect all header keys excluding:
|
||||
|
||||
- `Cookie`
|
||||
- `Referer`
|
||||
- `User-Agent`
|
||||
- Any header starting with `X-`
|
||||
|
||||
2. Sort header keys by lowercase name.
|
||||
3. Serialize as:
|
||||
|
||||
```text
|
||||
name:value
|
||||
```
|
||||
|
||||
Joined by newlines.
|
||||
|
||||
4. Compute the SHA-256 checksum of that string and take the first 9 hex digits.
|
||||
SHA256 fingerprint of the `User-Agent` string, taking the first 9 hex digits.
|
||||
|
||||
Example output:
|
||||
|
||||
```text
|
||||
2a347fcf7
|
||||
```
|
||||
|
||||
### `thr1_enc`
|
||||
|
||||
Here’s the updated spec and Go implementation for the `thr1_enc` (compression) component, now including:
|
||||
|
||||
- **Most preferred compression encoding** (`*`, `gzip`, `deflate`, `br`, `zstd`)
|
||||
- **Number of encodings declared**, truncated to **two digits** (`01`–`99`, capped)
|
||||
|
||||
---
|
||||
|
||||
### ✅ `thr1_enc` Spec (Revised)
|
||||
|
||||
**Format:**
|
||||
|
||||
```
|
||||
<preferred_encoding>-<count>
|
||||
```
|
||||
|
||||
- `preferred_encoding` is the first matching value in this priority order:
|
||||
|
||||
1. `*`
|
||||
2. `gzip`
|
||||
3. `deflate`
|
||||
4. `br`
|
||||
5. `zstd`
|
||||
|
||||
- If none match, use `none`
|
||||
- `count` is the number of encoding options, zero-padded to 2 digits (max 99)
|
||||
|
||||
**Examples:**
|
||||
|
||||
- `gzip, deflate` → `gzip-02`
|
||||
- `gzip;q=0.9, br;q=0.8` → `gzip-02`
|
||||
- `zstd` → `zstd-01`
|
||||
- `bogus` → `none-01`
|
||||
- _empty_ → `none-00`
|
||||
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/TecharoHQ/anubis/lib/challenge"
|
||||
"github.com/TecharoHQ/anubis/lib/policy"
|
||||
"github.com/TecharoHQ/anubis/lib/policy/config"
|
||||
"github.com/TecharoHQ/anubis/lib/thr1"
|
||||
|
||||
// challenge implementations
|
||||
_ "github.com/TecharoHQ/anubis/lib/challenge/proofofwork"
|
||||
@@ -74,18 +75,13 @@ type Server struct {
|
||||
func (s *Server) challengeFor(r *http.Request, difficulty int) string {
|
||||
fp := sha256.Sum256(s.pub[:])
|
||||
|
||||
acceptLanguage := r.Header.Get("Accept-Language")
|
||||
if len(acceptLanguage) > 5 {
|
||||
acceptLanguage = acceptLanguage[:5]
|
||||
}
|
||||
|
||||
challengeData := fmt.Sprintf(
|
||||
"Accept-Language=%s,X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d",
|
||||
acceptLanguage,
|
||||
r.Header.Get("X-Real-Ip"),
|
||||
"THR1=%s,JA4=%s,Fingerprint=%x,User-Agent=%s,WeekTime=%s,Difficulty=%d",
|
||||
thr1.Fingerprint(r),
|
||||
r.Header.Get("X-Tls-Fingerprint-Ja4"),
|
||||
fp,
|
||||
r.UserAgent(),
|
||||
time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339),
|
||||
fp,
|
||||
difficulty,
|
||||
)
|
||||
return internal.SHA256sum(challengeData)
|
||||
|
||||
246
lib/thr1/thr1.go
Normal file
246
lib/thr1/thr1.go
Normal file
@@ -0,0 +1,246 @@
|
||||
package thr1
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func Fingerprint(r *http.Request) string {
|
||||
result := strings.Join([]string{
|
||||
thr1Head(r),
|
||||
thr1Lang(r),
|
||||
thr1Sec(r),
|
||||
thr1UA(r),
|
||||
thr1Encoding(r),
|
||||
}, "_")
|
||||
|
||||
slog.Info("THR1 got", "method", r.Method, "path", r.URL.Path, "thr1", result)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func thr1Head(r *http.Request) string {
|
||||
method := strings.ToLower(r.Method)
|
||||
if len(method) > 3 {
|
||||
method = method[:3]
|
||||
}
|
||||
|
||||
version := "00"
|
||||
if override := r.Header.Get("X-Http-Version"); override != "" {
|
||||
switch strings.TrimSpace(strings.ToUpper(override)) {
|
||||
case "HTTP/1.0":
|
||||
version = "10"
|
||||
case "HTTP/1.1":
|
||||
version = "11"
|
||||
case "HTTP/2.0":
|
||||
version = "20"
|
||||
case "HTTP/3.0":
|
||||
version = "30"
|
||||
}
|
||||
} else {
|
||||
switch {
|
||||
case r.ProtoMajor == 1 && r.ProtoMinor == 0:
|
||||
version = "10"
|
||||
case r.ProtoMajor == 1 && r.ProtoMinor == 1:
|
||||
version = "11"
|
||||
case r.ProtoMajor == 2:
|
||||
version = "20"
|
||||
case r.ProtoMajor == 3:
|
||||
version = "30"
|
||||
}
|
||||
}
|
||||
|
||||
hasSec := false
|
||||
for k := range r.Header {
|
||||
if strings.HasPrefix(strings.ToLower(k), "sec-") {
|
||||
hasSec = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return method + version + strconv.FormatBool(hasSec)[:2]
|
||||
}
|
||||
|
||||
func thr1Encoding(r *http.Request) string {
|
||||
raw := r.Header.Get("Accept-Encoding")
|
||||
if raw == "" {
|
||||
return "none-00"
|
||||
}
|
||||
|
||||
encodings := strings.Split(raw, ",")
|
||||
count := len(encodings)
|
||||
if count > 99 {
|
||||
count = 99
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
var available []string
|
||||
for _, e := range encodings {
|
||||
enc := strings.ToLower(strings.TrimSpace(strings.Split(e, ";")[0]))
|
||||
if enc != "" {
|
||||
if _, exists := seen[enc]; !exists {
|
||||
available = append(available, enc)
|
||||
seen[enc] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
priorities := map[string]int{
|
||||
"zstd": 1,
|
||||
"br": 2,
|
||||
"deflate": 3,
|
||||
"gzip": 4,
|
||||
"*": 5,
|
||||
}
|
||||
|
||||
best := "none"
|
||||
bestRank := 999 // arbitrarily high
|
||||
for _, enc := range available {
|
||||
if rank, ok := priorities[enc]; ok {
|
||||
if rank < bestRank {
|
||||
best = enc
|
||||
bestRank = rank
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if best == "*" {
|
||||
best = "wild"
|
||||
}
|
||||
|
||||
return best + "-" + pad2(count)
|
||||
}
|
||||
|
||||
func pad2(n int) string {
|
||||
if n < 10 {
|
||||
return "0" + strconv.Itoa(n)
|
||||
}
|
||||
if n > 99 {
|
||||
return "99"
|
||||
}
|
||||
return strconv.Itoa(n)
|
||||
}
|
||||
|
||||
func thr1Lang(r *http.Request) string {
|
||||
raw := r.Header.Get("Accept-Language")
|
||||
if raw == "" {
|
||||
return "-000000000"
|
||||
}
|
||||
trimmed := first4AlphaNum(strings.ToLower(raw)) + "-"
|
||||
sum := sha256.Sum256([]byte(raw))
|
||||
return trimmed + hex.EncodeToString(sum[:])[:9]
|
||||
}
|
||||
|
||||
func first4AlphaNum(s string) string {
|
||||
out := make([]rune, 0, 4)
|
||||
for _, ch := range s {
|
||||
if len(out) == 4 {
|
||||
break
|
||||
}
|
||||
if ('a' <= ch && ch <= 'z') || ('0' <= ch && ch <= '9') {
|
||||
out = append(out, ch)
|
||||
}
|
||||
}
|
||||
for len(out) < 4 {
|
||||
out = append(out, '0')
|
||||
}
|
||||
return string(out)
|
||||
}
|
||||
|
||||
func thr1Sec(r *http.Request) string {
|
||||
var lines []string
|
||||
for k, vs := range r.Header {
|
||||
lkey := strings.ToLower(k)
|
||||
if !strings.HasPrefix(lkey, "sec-") || lkey == "sec-fetch-user" {
|
||||
continue
|
||||
}
|
||||
switch lkey {
|
||||
case "sec-ch-ua":
|
||||
lines = append(lines, parseSecChUA(vs))
|
||||
case "sec-ch-ua-mobile":
|
||||
lines = append(lines, parseSecCHSimple("mobile", vs))
|
||||
case "sec-ch-ua-platform":
|
||||
lines = append(lines, parseSecCHSimple("platform", vs))
|
||||
case "sec-ch-ua-platform-version":
|
||||
lines = append(lines, parseSecCHSimple("platform_version", vs))
|
||||
case "sec-ch-ua-model":
|
||||
lines = append(lines, parseSecCHSimple("model", vs))
|
||||
case "sec-ch-ua-full-version":
|
||||
lines = append(lines, parseSecCHSimple("full_version", vs))
|
||||
default:
|
||||
for _, v := range vs {
|
||||
v = strings.Trim(v, `" `)
|
||||
lines = append(lines, lkey+":"+v)
|
||||
}
|
||||
}
|
||||
}
|
||||
sort.Strings(lines)
|
||||
canonical := strings.Join(lines, "\n")
|
||||
sum := sha256.Sum256([]byte(canonical))
|
||||
return "sec-" + hex.EncodeToString(sum[:])[:9]
|
||||
}
|
||||
|
||||
var brandVersionRe = regexp.MustCompile(`\s*"([^"]+)";v="([^"]+)"`)
|
||||
|
||||
func parseSecChUA(vs []string) string {
|
||||
type pair struct{ Brand, Version string }
|
||||
var pairs []pair
|
||||
|
||||
for _, v := range vs {
|
||||
for _, match := range brandVersionRe.FindAllStringSubmatch(v, -1) {
|
||||
if len(match) != 3 {
|
||||
continue
|
||||
}
|
||||
brand := match[1]
|
||||
version := match[2]
|
||||
if brand == "Not=A?Brand" {
|
||||
continue
|
||||
}
|
||||
pairs = append(pairs, pair{brand, version})
|
||||
}
|
||||
}
|
||||
|
||||
sort.Slice(pairs, func(i, j int) bool {
|
||||
return pairs[i].Brand < pairs[j].Brand
|
||||
})
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString("ua:")
|
||||
for i, p := range pairs {
|
||||
if i > 0 {
|
||||
sb.WriteString(",")
|
||||
}
|
||||
sb.WriteString(p.Brand + "/" + p.Version)
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func parseSecCHSimple(key string, vs []string) string {
|
||||
for _, v := range vs {
|
||||
v = strings.Trim(v, `" `)
|
||||
if key == "mobile" {
|
||||
switch v {
|
||||
case "?1":
|
||||
return "mobile:true"
|
||||
case "?0":
|
||||
return "mobile:false"
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
return key + ":" + v
|
||||
}
|
||||
return key + ":"
|
||||
}
|
||||
|
||||
func thr1UA(r *http.Request) string {
|
||||
ua := r.Header.Get("User-Agent")
|
||||
sum := sha256.Sum256([]byte(ua))
|
||||
return hex.EncodeToString(sum[:])[:9]
|
||||
}
|
||||
Reference in New Issue
Block a user