fix(thr1): update spec to respond to feedback and evaluation against a private dataset

Signed-off-by: Xe Iaso <me@xeiaso.net>
This commit is contained in:
Xe Iaso
2025-06-09 10:12:34 -04:00
parent 3a4b1086af
commit de602116d0
3 changed files with 294 additions and 37 deletions

View File

@@ -11,13 +11,13 @@ The biggest source of prior art is [FoxIO's JA4H fingerprinting method](https://
The fingerprint consists of four concatenated components:
```text
<thr1_head>_<thr1_lang>_<thr1_sec>_<thr1_all>
<thr1_head>_<thr1_lang>_<thr1_sec>_<thr1_ua>_<thr1_enc>
```
Example:
```text
get20cr1004_enca-d6b272e5b_sec-a9649072c_2a347fcf7
get201004_enca-d6b272e5b_sec-a9649072c_2a347fcf7_zs
```
Each component is described below:
@@ -28,15 +28,14 @@ Overall request summary of method, protocol, and header counts:
- First three letters of the HTTP method, lowercased (e.g. get, pos).
- HTTP protocol version formatted in two digits (`10` for HTTP/1.0, `11` for HTTP/1.1, `20` for HTTP/2, `30` for HTTP/3 etc.).
- Single letter indicating if the request has cookies: `c` if present, `n` if not.
- Single letter indicating Referer header presence: `r` if present, `n` if absent.
- If present, prefer the HTTP protocol version in `X-Http-Version`.
- Number of HTTP headers sent by the client, zero-padded to two digits (e.g. `10`).
- Number of `Sec-*` headers sent by the client, zero-padded to two digits (e.g. `04`).
Example:
```text
get20cr1004
get201004
```
### `thr1_lang`
@@ -69,7 +68,7 @@ thr1_sec = "sec-" + HASH9
Where:
- Collect **all headers whose names start with `sec-` (case-insensitive)**.
- Collect **all headers whose names start with `sec-` (case-insensitive)**, excluding `Sec-Fetch-User`.
- For each header:
1. Normalize the header name by lowercasing.
@@ -156,32 +155,48 @@ sec-fetch-mode:navigate
ua:Chromium/123,Google Chrome/123
```
### `thr1_all`
### `thr1_ua`
A hash of the canonicalized form of request headers.
To construct a `tlr1_all`:
1. Collect all header keys excluding:
- `Cookie`
- `Referer`
- `User-Agent`
- Any header starting with `X-`
2. Sort header keys by lowercase name.
3. Serialize as:
```text
name:value
```
Joined by newlines.
4. Compute the SHA-256 checksum of that string and take the first 9 hex digits.
SHA256 fingerprint of the `User-Agent` string, taking the first 9 hex digits.
Example output:
```text
2a347fcf7
```
### `thr1_enc`
Heres the updated spec and Go implementation for the `thr1_enc` (compression) component, now including:
- **Most preferred compression encoding** (`*`, `gzip`, `deflate`, `br`, `zstd`)
- **Number of encodings declared**, truncated to **two digits** (`01``99`, capped)
---
### ✅ `thr1_enc` Spec (Revised)
**Format:**
```
<preferred_encoding>-<count>
```
- `preferred_encoding` is the first matching value in this priority order:
1. `*`
2. `gzip`
3. `deflate`
4. `br`
5. `zstd`
- If none match, use `none`
- `count` is the number of encoding options, zero-padded to 2 digits (max 99)
**Examples:**
- `gzip, deflate` → `gzip-02`
- `gzip;q=0.9, br;q=0.8` → `gzip-02`
- `zstd` → `zstd-01`
- `bogus` → `none-01`
- _empty_ → `none-00`

View File

@@ -27,6 +27,7 @@ import (
"github.com/TecharoHQ/anubis/lib/challenge"
"github.com/TecharoHQ/anubis/lib/policy"
"github.com/TecharoHQ/anubis/lib/policy/config"
"github.com/TecharoHQ/anubis/lib/thr1"
// challenge implementations
_ "github.com/TecharoHQ/anubis/lib/challenge/proofofwork"
@@ -74,18 +75,13 @@ type Server struct {
func (s *Server) challengeFor(r *http.Request, difficulty int) string {
fp := sha256.Sum256(s.pub[:])
acceptLanguage := r.Header.Get("Accept-Language")
if len(acceptLanguage) > 5 {
acceptLanguage = acceptLanguage[:5]
}
challengeData := fmt.Sprintf(
"Accept-Language=%s,X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d",
acceptLanguage,
r.Header.Get("X-Real-Ip"),
"THR1=%s,JA4=%s,Fingerprint=%x,User-Agent=%s,WeekTime=%s,Difficulty=%d",
thr1.Fingerprint(r),
r.Header.Get("X-Tls-Fingerprint-Ja4"),
fp,
r.UserAgent(),
time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339),
fp,
difficulty,
)
return internal.SHA256sum(challengeData)

246
lib/thr1/thr1.go Normal file
View File

@@ -0,0 +1,246 @@
package thr1
import (
"crypto/sha256"
"encoding/hex"
"log/slog"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
)
func Fingerprint(r *http.Request) string {
result := strings.Join([]string{
thr1Head(r),
thr1Lang(r),
thr1Sec(r),
thr1UA(r),
thr1Encoding(r),
}, "_")
slog.Info("THR1 got", "method", r.Method, "path", r.URL.Path, "thr1", result)
return result
}
func thr1Head(r *http.Request) string {
method := strings.ToLower(r.Method)
if len(method) > 3 {
method = method[:3]
}
version := "00"
if override := r.Header.Get("X-Http-Version"); override != "" {
switch strings.TrimSpace(strings.ToUpper(override)) {
case "HTTP/1.0":
version = "10"
case "HTTP/1.1":
version = "11"
case "HTTP/2.0":
version = "20"
case "HTTP/3.0":
version = "30"
}
} else {
switch {
case r.ProtoMajor == 1 && r.ProtoMinor == 0:
version = "10"
case r.ProtoMajor == 1 && r.ProtoMinor == 1:
version = "11"
case r.ProtoMajor == 2:
version = "20"
case r.ProtoMajor == 3:
version = "30"
}
}
hasSec := false
for k := range r.Header {
if strings.HasPrefix(strings.ToLower(k), "sec-") {
hasSec = true
break
}
}
return method + version + strconv.FormatBool(hasSec)[:2]
}
func thr1Encoding(r *http.Request) string {
raw := r.Header.Get("Accept-Encoding")
if raw == "" {
return "none-00"
}
encodings := strings.Split(raw, ",")
count := len(encodings)
if count > 99 {
count = 99
}
seen := make(map[string]struct{})
var available []string
for _, e := range encodings {
enc := strings.ToLower(strings.TrimSpace(strings.Split(e, ";")[0]))
if enc != "" {
if _, exists := seen[enc]; !exists {
available = append(available, enc)
seen[enc] = struct{}{}
}
}
}
priorities := map[string]int{
"zstd": 1,
"br": 2,
"deflate": 3,
"gzip": 4,
"*": 5,
}
best := "none"
bestRank := 999 // arbitrarily high
for _, enc := range available {
if rank, ok := priorities[enc]; ok {
if rank < bestRank {
best = enc
bestRank = rank
}
}
}
if best == "*" {
best = "wild"
}
return best + "-" + pad2(count)
}
func pad2(n int) string {
if n < 10 {
return "0" + strconv.Itoa(n)
}
if n > 99 {
return "99"
}
return strconv.Itoa(n)
}
func thr1Lang(r *http.Request) string {
raw := r.Header.Get("Accept-Language")
if raw == "" {
return "-000000000"
}
trimmed := first4AlphaNum(strings.ToLower(raw)) + "-"
sum := sha256.Sum256([]byte(raw))
return trimmed + hex.EncodeToString(sum[:])[:9]
}
func first4AlphaNum(s string) string {
out := make([]rune, 0, 4)
for _, ch := range s {
if len(out) == 4 {
break
}
if ('a' <= ch && ch <= 'z') || ('0' <= ch && ch <= '9') {
out = append(out, ch)
}
}
for len(out) < 4 {
out = append(out, '0')
}
return string(out)
}
func thr1Sec(r *http.Request) string {
var lines []string
for k, vs := range r.Header {
lkey := strings.ToLower(k)
if !strings.HasPrefix(lkey, "sec-") || lkey == "sec-fetch-user" {
continue
}
switch lkey {
case "sec-ch-ua":
lines = append(lines, parseSecChUA(vs))
case "sec-ch-ua-mobile":
lines = append(lines, parseSecCHSimple("mobile", vs))
case "sec-ch-ua-platform":
lines = append(lines, parseSecCHSimple("platform", vs))
case "sec-ch-ua-platform-version":
lines = append(lines, parseSecCHSimple("platform_version", vs))
case "sec-ch-ua-model":
lines = append(lines, parseSecCHSimple("model", vs))
case "sec-ch-ua-full-version":
lines = append(lines, parseSecCHSimple("full_version", vs))
default:
for _, v := range vs {
v = strings.Trim(v, `" `)
lines = append(lines, lkey+":"+v)
}
}
}
sort.Strings(lines)
canonical := strings.Join(lines, "\n")
sum := sha256.Sum256([]byte(canonical))
return "sec-" + hex.EncodeToString(sum[:])[:9]
}
var brandVersionRe = regexp.MustCompile(`\s*"([^"]+)";v="([^"]+)"`)
func parseSecChUA(vs []string) string {
type pair struct{ Brand, Version string }
var pairs []pair
for _, v := range vs {
for _, match := range brandVersionRe.FindAllStringSubmatch(v, -1) {
if len(match) != 3 {
continue
}
brand := match[1]
version := match[2]
if brand == "Not=A?Brand" {
continue
}
pairs = append(pairs, pair{brand, version})
}
}
sort.Slice(pairs, func(i, j int) bool {
return pairs[i].Brand < pairs[j].Brand
})
var sb strings.Builder
sb.WriteString("ua:")
for i, p := range pairs {
if i > 0 {
sb.WriteString(",")
}
sb.WriteString(p.Brand + "/" + p.Version)
}
return sb.String()
}
func parseSecCHSimple(key string, vs []string) string {
for _, v := range vs {
v = strings.Trim(v, `" `)
if key == "mobile" {
switch v {
case "?1":
return "mobile:true"
case "?0":
return "mobile:false"
default:
continue
}
}
return key + ":" + v
}
return key + ":"
}
func thr1UA(r *http.Request) string {
ua := r.Header.Get("User-Agent")
sum := sha256.Sum256([]byte(ua))
return hex.EncodeToString(sum[:])[:9]
}