diff --git a/docs/docs/developer/thr1.mdx b/docs/docs/developer/thr1.mdx index 5a332c50..a03ca156 100644 --- a/docs/docs/developer/thr1.mdx +++ b/docs/docs/developer/thr1.mdx @@ -11,13 +11,13 @@ The biggest source of prior art is [FoxIO's JA4H fingerprinting method](https:// The fingerprint consists of four concatenated components: ```text -___ +____ ``` Example: ```text -get20cr1004_enca-d6b272e5b_sec-a9649072c_2a347fcf7 +get201004_enca-d6b272e5b_sec-a9649072c_2a347fcf7_zs ``` Each component is described below: @@ -28,15 +28,14 @@ Overall request summary of method, protocol, and header counts: - First three letters of the HTTP method, lowercased (e.g. get, pos). - HTTP protocol version formatted in two digits (`10` for HTTP/1.0, `11` for HTTP/1.1, `20` for HTTP/2, `30` for HTTP/3 etc.). -- Single letter indicating if the request has cookies: `c` if present, `n` if not. -- Single letter indicating Referer header presence: `r` if present, `n` if absent. + - If present, prefer the HTTP protocol version in `X-Http-Version`. - Number of HTTP headers sent by the client, zero-padded to two digits (e.g. `10`). - Number of `Sec-*` headers sent by the client, zero-padded to two digits (e.g. `04`). Example: ```text -get20cr1004 +get201004 ``` ### `thr1_lang` @@ -69,7 +68,7 @@ thr1_sec = "sec-" + HASH9 Where: -- Collect **all headers whose names start with `sec-` (case-insensitive)**. +- Collect **all headers whose names start with `sec-` (case-insensitive)**, excluding `Sec-Fetch-User`. - For each header: 1. Normalize the header name by lowercasing. @@ -156,32 +155,48 @@ sec-fetch-mode:navigate ua:Chromium/123,Google Chrome/123 ``` -### `thr1_all` +### `thr1_ua` -A hash of the canonicalized form of request headers. - -To construct a `tlr1_all`: - -1. Collect all header keys excluding: - -- `Cookie` -- `Referer` -- `User-Agent` -- Any header starting with `X-` - -2. Sort header keys by lowercase name. -3. Serialize as: - - ```text - name:value - ``` - - Joined by newlines. - -4. Compute the SHA-256 checksum of that string and take the first 9 hex digits. +SHA256 fingerprint of the `User-Agent` string, taking the first 9 hex digits. Example output: ```text 2a347fcf7 ``` + +### `thr1_enc` + +Here’s the updated spec and Go implementation for the `thr1_enc` (compression) component, now including: + +- **Most preferred compression encoding** (`*`, `gzip`, `deflate`, `br`, `zstd`) +- **Number of encodings declared**, truncated to **two digits** (`01`–`99`, capped) + +--- + +### ✅ `thr1_enc` Spec (Revised) + +**Format:** + +``` +- +``` + +- `preferred_encoding` is the first matching value in this priority order: + + 1. `*` + 2. `gzip` + 3. `deflate` + 4. `br` + 5. `zstd` + +- If none match, use `none` +- `count` is the number of encoding options, zero-padded to 2 digits (max 99) + +**Examples:** + +- `gzip, deflate` → `gzip-02` +- `gzip;q=0.9, br;q=0.8` → `gzip-02` +- `zstd` → `zstd-01` +- `bogus` → `none-01` +- _empty_ → `none-00` diff --git a/lib/anubis.go b/lib/anubis.go index 06e780bd..21ac99eb 100644 --- a/lib/anubis.go +++ b/lib/anubis.go @@ -27,6 +27,7 @@ import ( "github.com/TecharoHQ/anubis/lib/challenge" "github.com/TecharoHQ/anubis/lib/policy" "github.com/TecharoHQ/anubis/lib/policy/config" + "github.com/TecharoHQ/anubis/lib/thr1" // challenge implementations _ "github.com/TecharoHQ/anubis/lib/challenge/proofofwork" @@ -74,18 +75,13 @@ type Server struct { func (s *Server) challengeFor(r *http.Request, difficulty int) string { fp := sha256.Sum256(s.pub[:]) - acceptLanguage := r.Header.Get("Accept-Language") - if len(acceptLanguage) > 5 { - acceptLanguage = acceptLanguage[:5] - } - challengeData := fmt.Sprintf( - "Accept-Language=%s,X-Real-IP=%s,User-Agent=%s,WeekTime=%s,Fingerprint=%x,Difficulty=%d", - acceptLanguage, - r.Header.Get("X-Real-Ip"), + "THR1=%s,JA4=%s,Fingerprint=%x,User-Agent=%s,WeekTime=%s,Difficulty=%d", + thr1.Fingerprint(r), + r.Header.Get("X-Tls-Fingerprint-Ja4"), + fp, r.UserAgent(), time.Now().UTC().Round(24*7*time.Hour).Format(time.RFC3339), - fp, difficulty, ) return internal.SHA256sum(challengeData) diff --git a/lib/thr1/thr1.go b/lib/thr1/thr1.go new file mode 100644 index 00000000..83ad026e --- /dev/null +++ b/lib/thr1/thr1.go @@ -0,0 +1,246 @@ +package thr1 + +import ( + "crypto/sha256" + "encoding/hex" + "log/slog" + "net/http" + "regexp" + "sort" + "strconv" + "strings" +) + +func Fingerprint(r *http.Request) string { + result := strings.Join([]string{ + thr1Head(r), + thr1Lang(r), + thr1Sec(r), + thr1UA(r), + thr1Encoding(r), + }, "_") + + slog.Info("THR1 got", "method", r.Method, "path", r.URL.Path, "thr1", result) + + return result +} + +func thr1Head(r *http.Request) string { + method := strings.ToLower(r.Method) + if len(method) > 3 { + method = method[:3] + } + + version := "00" + if override := r.Header.Get("X-Http-Version"); override != "" { + switch strings.TrimSpace(strings.ToUpper(override)) { + case "HTTP/1.0": + version = "10" + case "HTTP/1.1": + version = "11" + case "HTTP/2.0": + version = "20" + case "HTTP/3.0": + version = "30" + } + } else { + switch { + case r.ProtoMajor == 1 && r.ProtoMinor == 0: + version = "10" + case r.ProtoMajor == 1 && r.ProtoMinor == 1: + version = "11" + case r.ProtoMajor == 2: + version = "20" + case r.ProtoMajor == 3: + version = "30" + } + } + + hasSec := false + for k := range r.Header { + if strings.HasPrefix(strings.ToLower(k), "sec-") { + hasSec = true + break + } + } + + return method + version + strconv.FormatBool(hasSec)[:2] +} + +func thr1Encoding(r *http.Request) string { + raw := r.Header.Get("Accept-Encoding") + if raw == "" { + return "none-00" + } + + encodings := strings.Split(raw, ",") + count := len(encodings) + if count > 99 { + count = 99 + } + + seen := make(map[string]struct{}) + var available []string + for _, e := range encodings { + enc := strings.ToLower(strings.TrimSpace(strings.Split(e, ";")[0])) + if enc != "" { + if _, exists := seen[enc]; !exists { + available = append(available, enc) + seen[enc] = struct{}{} + } + } + } + + priorities := map[string]int{ + "zstd": 1, + "br": 2, + "deflate": 3, + "gzip": 4, + "*": 5, + } + + best := "none" + bestRank := 999 // arbitrarily high + for _, enc := range available { + if rank, ok := priorities[enc]; ok { + if rank < bestRank { + best = enc + bestRank = rank + } + } + } + + if best == "*" { + best = "wild" + } + + return best + "-" + pad2(count) +} + +func pad2(n int) string { + if n < 10 { + return "0" + strconv.Itoa(n) + } + if n > 99 { + return "99" + } + return strconv.Itoa(n) +} + +func thr1Lang(r *http.Request) string { + raw := r.Header.Get("Accept-Language") + if raw == "" { + return "-000000000" + } + trimmed := first4AlphaNum(strings.ToLower(raw)) + "-" + sum := sha256.Sum256([]byte(raw)) + return trimmed + hex.EncodeToString(sum[:])[:9] +} + +func first4AlphaNum(s string) string { + out := make([]rune, 0, 4) + for _, ch := range s { + if len(out) == 4 { + break + } + if ('a' <= ch && ch <= 'z') || ('0' <= ch && ch <= '9') { + out = append(out, ch) + } + } + for len(out) < 4 { + out = append(out, '0') + } + return string(out) +} + +func thr1Sec(r *http.Request) string { + var lines []string + for k, vs := range r.Header { + lkey := strings.ToLower(k) + if !strings.HasPrefix(lkey, "sec-") || lkey == "sec-fetch-user" { + continue + } + switch lkey { + case "sec-ch-ua": + lines = append(lines, parseSecChUA(vs)) + case "sec-ch-ua-mobile": + lines = append(lines, parseSecCHSimple("mobile", vs)) + case "sec-ch-ua-platform": + lines = append(lines, parseSecCHSimple("platform", vs)) + case "sec-ch-ua-platform-version": + lines = append(lines, parseSecCHSimple("platform_version", vs)) + case "sec-ch-ua-model": + lines = append(lines, parseSecCHSimple("model", vs)) + case "sec-ch-ua-full-version": + lines = append(lines, parseSecCHSimple("full_version", vs)) + default: + for _, v := range vs { + v = strings.Trim(v, `" `) + lines = append(lines, lkey+":"+v) + } + } + } + sort.Strings(lines) + canonical := strings.Join(lines, "\n") + sum := sha256.Sum256([]byte(canonical)) + return "sec-" + hex.EncodeToString(sum[:])[:9] +} + +var brandVersionRe = regexp.MustCompile(`\s*"([^"]+)";v="([^"]+)"`) + +func parseSecChUA(vs []string) string { + type pair struct{ Brand, Version string } + var pairs []pair + + for _, v := range vs { + for _, match := range brandVersionRe.FindAllStringSubmatch(v, -1) { + if len(match) != 3 { + continue + } + brand := match[1] + version := match[2] + if brand == "Not=A?Brand" { + continue + } + pairs = append(pairs, pair{brand, version}) + } + } + + sort.Slice(pairs, func(i, j int) bool { + return pairs[i].Brand < pairs[j].Brand + }) + + var sb strings.Builder + sb.WriteString("ua:") + for i, p := range pairs { + if i > 0 { + sb.WriteString(",") + } + sb.WriteString(p.Brand + "/" + p.Version) + } + return sb.String() +} + +func parseSecCHSimple(key string, vs []string) string { + for _, v := range vs { + v = strings.Trim(v, `" `) + if key == "mobile" { + switch v { + case "?1": + return "mobile:true" + case "?0": + return "mobile:false" + default: + continue + } + } + return key + ":" + v + } + return key + ":" +} + +func thr1UA(r *http.Request) string { + ua := r.Header.Get("User-Agent") + sum := sha256.Sum256([]byte(ua)) + return hex.EncodeToString(sum[:])[:9] +}