feat(subsonic): sort search3 results by relevance (#5086)
* fix(subsonic): optimize search3 for high-cardinality FTS queries Use a two-phase query strategy for FTS5 searches to avoid the performance penalty of expensive LEFT JOINs (annotation, bookmark, library) on high-cardinality results like "the". Phase 1 runs a lightweight query (main table + FTS index only) to get sorted, paginated rowids. Phase 2 hydrates only those few rowids with the full JOINs, making them nearly free. For queries with complex ORDER BY expressions that reference joined tables (e.g. artist search sorted by play count), the optimization is skipped and the original single-query approach is used. * fix(search): update order by clauses to include 'rank' for FTS queries Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): reintroduce 'rank' in Phase 2 ORDER BY for FTS queries Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): remove 'rank' from ORDER BY in non-FTS queries and adjust two-phase query handling Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): update FTS ranking to use bm25 weights and simplify ORDER BY qualification Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): refine FTS query handling and improve comments for clarity Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): refactor full-text search handling to streamline query strategy selection and improve LIKE fallback logic. Increase e2e coverage for search3 Signed-off-by: Deluan <deluan@navidrome.org> * refactor: enhance FTS column definitions and relevance weights Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): refactor Search method signatures to remove offset and size parameters, streamline query handling Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): allow single-character queries in search strategies and update related tests Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): make FTS Phase 1 treat Max=0 as no limit, reorganize tests FTS Phase 1 unconditionally called Limit(uint64(options.Max)), which produced LIMIT 0 when Max was zero. This diverged from applyOptions where Max=0 means no limit. Now Phase 1 mirrors applyOptions: only add LIMIT/OFFSET when the value is positive. Also moved legacy backend integration tests from sql_search_fts_test.go to sql_search_like_test.go and added regression tests for the Max=0 behavior on both backends. * refactor: simplify callSearch function by removing variadic options and directly using QueryOptions Signed-off-by: Deluan <deluan@navidrome.org> * fix(search): implement ftsQueryDegraded function to detect significant content loss in FTS queries Signed-off-by: Deluan <deluan@navidrome.org> --------- Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
+55
-63
@@ -6,7 +6,6 @@ import (
|
||||
. "github.com/Masterminds/squirrel"
|
||||
"github.com/google/uuid"
|
||||
"github.com/navidrome/navidrome/conf"
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model"
|
||||
"github.com/navidrome/navidrome/utils/str"
|
||||
)
|
||||
@@ -16,57 +15,71 @@ func formatFullText(text ...string) string {
|
||||
return " " + fullText
|
||||
}
|
||||
|
||||
// searchExprFunc is the function signature for search expression builders.
|
||||
type searchExprFunc func(tableName string, query string) Sqlizer
|
||||
|
||||
// getSearchExpr returns the active search expression function based on config.
|
||||
// It falls back to legacySearchExpr when Search.FullString is enabled, because
|
||||
// FTS5 is token-based and cannot match substrings within words.
|
||||
// CJK queries are routed to likeSearchExpr, since FTS5's unicode61 tokenizer
|
||||
// cannot segment CJK text.
|
||||
func getSearchExpr() searchExprFunc {
|
||||
if conf.Server.Search.Backend == "legacy" || conf.Server.Search.FullString {
|
||||
return legacySearchExpr
|
||||
}
|
||||
return func(tableName, query string) Sqlizer {
|
||||
if containsCJK(query) {
|
||||
return likeSearchExpr(tableName, query)
|
||||
}
|
||||
return ftsSearchExpr(tableName, query)
|
||||
}
|
||||
// searchConfig holds per-repository constants for doSearch.
|
||||
type searchConfig struct {
|
||||
NaturalOrder string // ORDER BY for empty-query results (e.g. "album.rowid")
|
||||
OrderBy []string // ORDER BY for text search results (e.g. ["name"])
|
||||
MBIDFields []string // columns to match when query is a UUID
|
||||
// LibraryFilter overrides the default applyLibraryFilter for FTS Phase 1.
|
||||
// Needed when library access requires a junction table (e.g. artist → library_artist).
|
||||
LibraryFilter func(sq SelectBuilder) SelectBuilder
|
||||
}
|
||||
|
||||
// doSearch performs a full-text search with the specified parameters.
|
||||
// The naturalOrder is used to sort results when no full-text filter is applied. It is useful for cases like
|
||||
// OpenSubsonic, where an empty search query should return all results in a natural order. Normally the parameter
|
||||
// should be `tableName + ".rowid"`, but some repositories (ex: artist) may use a different natural order.
|
||||
func (r sqlRepository) doSearch(sq SelectBuilder, q string, offset, size int, results any, naturalOrder string, orderBys ...string) error {
|
||||
// searchStrategy defines how to execute a text search against a repository table.
|
||||
// options carries filters and pagination that must reach all query phases,
|
||||
// including FTS Phase 1 which builds its own query outside sq.
|
||||
type searchStrategy interface {
|
||||
Sqlizer
|
||||
execute(r sqlRepository, sq SelectBuilder, dest any, cfg searchConfig, options model.QueryOptions) error
|
||||
}
|
||||
|
||||
// getSearchStrategy returns the appropriate search strategy based on config and query content.
|
||||
// Returns nil when the query produces no searchable tokens.
|
||||
func getSearchStrategy(tableName, query string) searchStrategy {
|
||||
if conf.Server.Search.Backend == "legacy" || conf.Server.Search.FullString {
|
||||
return newLegacySearch(tableName, query)
|
||||
}
|
||||
if containsCJK(query) {
|
||||
return newLikeSearch(tableName, query)
|
||||
}
|
||||
return newFTSSearch(tableName, query)
|
||||
}
|
||||
|
||||
// doSearch dispatches a search query: empty → natural order, UUID → MBID match,
|
||||
// otherwise delegates to getSearchStrategy. sq must already have LIMIT/OFFSET set
|
||||
// via newSelect(options...). options is forwarded so FTS Phase 1 can apply the same
|
||||
// filters and pagination independently.
|
||||
func (r sqlRepository) doSearch(sq SelectBuilder, q string, results any, cfg searchConfig, options model.QueryOptions) error {
|
||||
q = strings.TrimSpace(q)
|
||||
q = strings.TrimSuffix(q, "*")
|
||||
|
||||
sq = sq.Where(Eq{r.tableName + ".missing": false})
|
||||
|
||||
// Empty query (OpenSubsonic `search3?query=""`) — return all in natural order.
|
||||
if q == "" || q == `""` {
|
||||
sq = sq.OrderBy(cfg.NaturalOrder)
|
||||
return r.queryAll(sq, results, options)
|
||||
}
|
||||
|
||||
// MBID search: if query is a valid UUID, search by MBID fields instead
|
||||
if uuid.Validate(q) == nil && len(cfg.MBIDFields) > 0 {
|
||||
sq = sq.Where(mbidExpr(r.tableName, q, cfg.MBIDFields...))
|
||||
return r.queryAll(sq, results)
|
||||
}
|
||||
|
||||
// Min-length guard: single-character queries are too broad for search3.
|
||||
// This check lives here (not in the strategies) so that fullTextFilter
|
||||
// (REST filter path) can still use single-character queries.
|
||||
if len(q) < 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
searchExpr := getSearchExpr()
|
||||
filter := searchExpr(r.tableName, q)
|
||||
if filter != nil {
|
||||
sq = sq.Where(filter)
|
||||
sq = sq.OrderBy(orderBys...)
|
||||
} else {
|
||||
// This is to speed up the results of `search3?query=""`, for OpenSubsonic
|
||||
// If the filter is empty, we sort by the specified natural order.
|
||||
sq = sq.OrderBy(naturalOrder)
|
||||
strategy := getSearchStrategy(r.tableName, q)
|
||||
if strategy == nil {
|
||||
return nil
|
||||
}
|
||||
sq = sq.Where(Eq{r.tableName + ".missing": false})
|
||||
sq = sq.Limit(uint64(size)).Offset(uint64(offset))
|
||||
return r.queryAll(sq, results, model.QueryOptions{Offset: offset})
|
||||
}
|
||||
|
||||
func (r sqlRepository) searchByMBID(sq SelectBuilder, mbid string, mbidFields []string, results any) error {
|
||||
sq = sq.Where(mbidExpr(r.tableName, mbid, mbidFields...))
|
||||
sq = sq.Where(Eq{r.tableName + ".missing": false})
|
||||
|
||||
return r.queryAll(sq, results)
|
||||
return strategy.execute(r, sq, results, cfg, options)
|
||||
}
|
||||
|
||||
func mbidExpr(tableName, mbid string, mbidFields ...string) Sqlizer {
|
||||
@@ -80,24 +93,3 @@ func mbidExpr(tableName, mbid string, mbidFields ...string) Sqlizer {
|
||||
}
|
||||
return Or(cond)
|
||||
}
|
||||
|
||||
// legacySearchExpr generates LIKE-based search filters against the full_text column.
|
||||
// This is the original search implementation, used when Search.Backend="legacy".
|
||||
func legacySearchExpr(tableName string, s string) Sqlizer {
|
||||
q := str.SanitizeStrings(s)
|
||||
if q == "" {
|
||||
log.Trace("Search using legacy backend, query is empty", "table", tableName)
|
||||
return nil
|
||||
}
|
||||
var sep string
|
||||
if !conf.Server.Search.FullString {
|
||||
sep = " "
|
||||
}
|
||||
parts := strings.Split(q, " ")
|
||||
filters := And{}
|
||||
for _, part := range parts {
|
||||
filters = append(filters, Like{tableName + ".full_text": "%" + sep + part + "%"})
|
||||
}
|
||||
log.Trace("Search using legacy backend", "query", filters, "table", tableName)
|
||||
return filters
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user