From 54de0dbc52a25a8a5fbf1b44d5ca6703a61d87ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Deluan=20Quint=C3=A3o?= Date: Sat, 21 Feb 2026 17:52:42 -0500 Subject: [PATCH] feat(server): implement FTS5-based full-text search (#5079) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * build: add sqlite_fts5 build tag to enable FTS5 support * feat: add SearchBackend config option (default: fts) * feat: add buildFTS5Query for safe FTS5 query preprocessing * feat: add FTS5 search backend with config toggle, refactor legacy search - Add searchExprFunc type and getSearchExpr() for backend selection - Rename fullTextExpr to legacySearchExpr - Add ftsSearchExpr using FTS5 MATCH subquery - Update fullTextFilter in sql_restful.go to use configured backend * feat: add FTS5 migration with virtual tables, triggers, and search_participants Creates FTS5 virtual tables for media_file, album, and artist with unicode61 tokenizer and diacritic folding. Adds search_participants column, populates from JSON, and sets up INSERT/UPDATE/DELETE triggers. * feat: populate search_participants in PostMapArgs for FTS5 indexing * test: add FTS5 search integration tests * fix: exclude FTS5 virtual tables from e2e DB restore The restoreDB function iterates all tables in sqlite_master and runs DELETE + INSERT to reset state. FTS5 contentless virtual tables cannot be directly deleted from. Since triggers handle FTS5 sync automatically, simply skip tables matching *_fts and *_fts_* patterns. * build: add compile-time guard for sqlite_fts5 build tag Same pattern as netgo: compilation fails with a clear error if the sqlite_fts5 build tag is missing. * build: add sqlite_fts5 tag to reflex dev server config * build: extract GO_BUILD_TAGS variable in Makefile to avoid duplication * fix: strip leading * from FTS5 queries to prevent "unknown special query" error * feat: auto-append prefix wildcard to FTS5 search tokens for broader matching Every plain search token now gets a trailing * appended (e.g., "love" becomes "love*"), so searching for "love" also matches "lovelace", "lovely", etc. Quoted phrases are preserved as exact matches without wildcards. Results are ordered alphabetically by name/title, so shorter exact matches naturally appear first. * fix: clarify comments about FTS5 operator neutralization The comments said "strip" but the code lowercases operators to neutralize them (FTS5 operators are case-sensitive). Updated comments to accurately describe the behavior. * fix: use fmt.Sprintf for FTS5 phrase placeholders The previous encoding used rune('0'+index) which silently breaks with 10+ quoted phrases. Use fmt.Sprintf for arbitrary index support. * fix: validate and normalize SearchBackend config option Normalize the value to lowercase and fall back to "fts" with a log warning for unrecognized values. This prevents silent misconfiguration from typos like "FTS", "Legacy", or "fts5". * refactor: improve documentation for build tags and FTS5 requirements Signed-off-by: Deluan * refactor: convert FTS5 query and search backend normalization tests to DescribeTable format Signed-off-by: Deluan * fix: add sqlite_fts5 build tag to golangci configuration Signed-off-by: Deluan * feat: add UISearchDebounceMs configuration option and update related components Signed-off-by: Deluan * fix: fall back to legacy search when SearchFullString is enabled FTS5 is token-based and cannot match substrings within words, so getSearchExpr now returns legacySearchExpr when SearchFullString is true, regardless of SearchBackend setting. * fix: add sqlite_fts5 build tag to CI pipeline and Dockerfile * fix: add WHEN clauses to FTS5 AFTER UPDATE triggers Added WHEN clauses to the media_file_fts_au, album_fts_au, and artist_fts_au triggers so they only fire when FTS-indexed columns actually change. Previously, every row update (e.g., play count, rating, starred status) triggered an unnecessary delete+insert cycle in the FTS shadow tables. The WHEN clauses use IS NOT for NULL-safe comparison of each indexed column, avoiding FTS index churn for non-indexed updates. * feat: add SearchBackend configuration option to data and insights components Signed-off-by: Deluan * fix: enhance input sanitization for FTS5 by stripping additional punctuation and special characters Signed-off-by: Deluan * feat: add search_normalized column for punctuated name search (R.E.M., AC/DC) Add index-time normalization and query-time single-letter collapsing to fix FTS5 search for punctuated names. A new search_normalized column stores concatenated forms of punctuated words (e.g., "R.E.M." → "REM", "AC/DC" → "ACDC") and is indexed in FTS5 tables. At query time, runs of consecutive single letters (from dot-stripping) are collapsed into OR expressions like ("R E M" OR REM*) to match both the original tokens and the normalized form. This enables searching by "R.E.M.", "REM", "AC/DC", "ACDC", "A-ha", or "Aha" and finding the correct results. * refactor: simplify isSingleUnicodeLetter to avoid []rune allocation Use utf8.DecodeRuneInString to check for a single Unicode letter instead of converting the entire string to a []rune slice. * feat: define ftsSearchColumns for flexible FTS5 search column inclusion Signed-off-by: Deluan * feat: update collapseSingleLetterRuns to return quoted phrases for abbreviations Signed-off-by: Deluan * feat: implement extractPunctuatedWords to handle artist/album names with embedded punctuation Signed-off-by: Deluan * feat: implement extractPunctuatedWords to handle artist/album names with embedded punctuation Signed-off-by: Deluan * refactor: punctuated word handling to improve processing of artist/album names Signed-off-by: Deluan * feat: add CJK support for search queries with LIKE filters Signed-off-by: Deluan * feat: enhance FTS5 search by adding album version support and CJK handling Signed-off-by: Deluan * refactor: search configuration to use structured options Signed-off-by: Deluan * feat: enhance search functionality to support punctuation-only queries and update related tests Signed-off-by: Deluan --------- Signed-off-by: Deluan --- .github/workflows/pipeline.yml | 2 +- .golangci.yml | 1 + Dockerfile | 2 +- Makefile | 13 +- conf/buildtags/buildtags.go | 4 - conf/buildtags/doc.go | 6 + conf/buildtags/netgo.go | 6 +- conf/buildtags/sqlite_fts5.go | 8 + conf/configuration.go | 26 +- conf/configuration_test.go | 14 + conf/export_test.go | 2 + consts/consts.go | 11 +- core/metrics/insights.go | 3 +- core/metrics/insights/data.go | 1 + .../20260220173400_add_fts5_search.go | 391 ++++++++++++++++++ main.go | 5 +- persistence/album_repository.go | 5 +- persistence/album_repository_test.go | 6 + persistence/artist_repository.go | 1 + persistence/artist_repository_test.go | 50 ++- persistence/mediafile_repository.go | 5 +- persistence/mediafile_repository_test.go | 2 +- persistence/persistence_suite_test.go | 31 +- persistence/sql_restful.go | 3 +- persistence/sql_restful_test.go | 9 +- persistence/sql_search.go | 32 +- persistence/sql_search_fts.go | 261 ++++++++++++ persistence/sql_search_fts_test.go | 333 +++++++++++++++ persistence/sql_search_test.go | 97 +++++ reflex.conf | 2 +- server/e2e/e2e_suite_test.go | 2 +- server/serve_index.go | 1 + server/serve_index_test.go | 1 + ui/src/common/List.jsx | 2 + ui/src/config.js | 1 + 35 files changed, 1283 insertions(+), 56 deletions(-) delete mode 100644 conf/buildtags/buildtags.go create mode 100644 conf/buildtags/doc.go create mode 100644 conf/buildtags/sqlite_fts5.go create mode 100644 db/migrations/20260220173400_add_fts5_search.go create mode 100644 persistence/sql_search_fts.go create mode 100644 persistence/sql_search_fts_test.go diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 33620555..fd8edcd1 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -117,7 +117,7 @@ jobs: - name: Test run: | pkg-config --define-prefix --cflags --libs taglib # for debugging - go test -shuffle=on -tags netgo -race ./... -v + go test -shuffle=on -tags netgo,sqlite_fts5 -race ./... -v - name: Test ndpgen run: | diff --git a/.golangci.yml b/.golangci.yml index 996dafcc..1937c2f7 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -2,6 +2,7 @@ version: "2" run: build-tags: - netgo + - sqlite_fts5 linters: enable: - asasalint diff --git a/Dockerfile b/Dockerfile index ad43e247..b32c1df5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -109,7 +109,7 @@ RUN --mount=type=bind,source=. \ export EXT=".exe" fi - go build -tags=netgo -ldflags="${LD_EXTRA} -w -s \ + go build -tags=netgo,sqlite_fts5 -ldflags="${LD_EXTRA} -w -s \ -X github.com/navidrome/navidrome/consts.gitSha=${GIT_SHA} \ -X github.com/navidrome/navidrome/consts.gitTag=${GIT_TAG}" \ -o /out/navidrome${EXT} . diff --git a/Makefile b/Makefile index d16cb282..f7b7b1b0 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ GO_VERSION=$(shell grep "^go " go.mod | cut -f 2 -d ' ') NODE_VERSION=$(shell cat .nvmrc) +GO_BUILD_TAGS=netgo,sqlite_fts5 # Set global environment variables, required for most targets export CGO_CFLAGS_ALLOW=--define-prefix @@ -46,12 +47,12 @@ stop: ##@Development Stop development servers (UI and backend) .PHONY: stop watch: ##@Development Start Go tests in watch mode (re-run when code changes) - go tool ginkgo watch -tags=netgo -notify ./... + go tool ginkgo watch -tags=$(GO_BUILD_TAGS) -notify ./... .PHONY: watch PKG ?= ./... test: ##@Development Run Go tests. Use PKG variable to specify packages to test, e.g. make test PKG=./server - go test -tags netgo $(PKG) + go test -tags $(GO_BUILD_TAGS) $(PKG) .PHONY: test test-ndpgen: ##@Development Run tests for ndpgen plugin @@ -62,7 +63,7 @@ testall: test test-ndpgen test-i18n test-js ##@Development Run Go and JS tests .PHONY: testall test-race: ##@Development Run Go tests with race detector - go test -tags netgo -race -shuffle=on $(PKG) + go test -tags $(GO_BUILD_TAGS) -race -shuffle=on $(PKG) .PHONY: test-race test-js: ##@Development Run JS tests @@ -108,7 +109,7 @@ format: ##@Development Format code .PHONY: format wire: check_go_env ##@Development Update Dependency Injection - go tool wire gen -tags=netgo ./... + go tool wire gen -tags=$(GO_BUILD_TAGS) ./... .PHONY: wire gen: check_go_env ##@Development Run go generate for code generation @@ -144,14 +145,14 @@ setup-git: ##@Development Setup Git hooks (pre-commit and pre-push) .PHONY: setup-git build: check_go_env buildjs ##@Build Build the project - go build -ldflags="-X github.com/navidrome/navidrome/consts.gitSha=$(GIT_SHA) -X github.com/navidrome/navidrome/consts.gitTag=$(GIT_TAG)" -tags=netgo + go build -ldflags="-X github.com/navidrome/navidrome/consts.gitSha=$(GIT_SHA) -X github.com/navidrome/navidrome/consts.gitTag=$(GIT_TAG)" -tags=$(GO_BUILD_TAGS) .PHONY: build buildall: deprecated build .PHONY: buildall debug-build: check_go_env buildjs ##@Build Build the project (with remote debug on) - go build -gcflags="all=-N -l" -ldflags="-X github.com/navidrome/navidrome/consts.gitSha=$(GIT_SHA) -X github.com/navidrome/navidrome/consts.gitTag=$(GIT_TAG)" -tags=netgo + go build -gcflags="all=-N -l" -ldflags="-X github.com/navidrome/navidrome/consts.gitSha=$(GIT_SHA) -X github.com/navidrome/navidrome/consts.gitTag=$(GIT_TAG)" -tags=$(GO_BUILD_TAGS) .PHONY: debug-build buildjs: check_node_env ui/build/index.html ##@Build Build only frontend diff --git a/conf/buildtags/buildtags.go b/conf/buildtags/buildtags.go deleted file mode 100644 index 5fc12508..00000000 --- a/conf/buildtags/buildtags.go +++ /dev/null @@ -1,4 +0,0 @@ -package buildtags - -// This file is left intentionally empty. It is used to make sure the package is not empty, in the case all -// required build tags are disabled. diff --git a/conf/buildtags/doc.go b/conf/buildtags/doc.go new file mode 100644 index 00000000..f637b635 --- /dev/null +++ b/conf/buildtags/doc.go @@ -0,0 +1,6 @@ +// Package buildtags provides compile-time enforcement of required build tags. +// +// Each file in this package is guarded by a build constraint and exports a variable +// that main.go references. If a required tag is missing during compilation, the build +// fails with an "undefined" error, directing the developer to use `make build`. +package buildtags diff --git a/conf/buildtags/netgo.go b/conf/buildtags/netgo.go index 0062ad2b..40700470 100644 --- a/conf/buildtags/netgo.go +++ b/conf/buildtags/netgo.go @@ -2,10 +2,6 @@ package buildtags -// NOTICE: This file was created to force the inclusion of the `netgo` tag when compiling the project. -// If the tag is not included, the compilation will fail because this variable won't be defined, and the `main.go` -// file requires it. - -// Why this tag is required? See https://github.com/navidrome/navidrome/issues/700 +// The `netgo` tag is required when compiling the project. See https://github.com/navidrome/navidrome/issues/700 var NETGO = true diff --git a/conf/buildtags/sqlite_fts5.go b/conf/buildtags/sqlite_fts5.go new file mode 100644 index 00000000..1476e04c --- /dev/null +++ b/conf/buildtags/sqlite_fts5.go @@ -0,0 +1,8 @@ +//go:build sqlite_fts5 + +package buildtags + +// FTS5 is required for full-text search. Without this tag, the SQLite driver +// won't include FTS5 support, causing runtime failures on migrations and search queries. + +var SQLITE_FTS5 = true diff --git a/conf/configuration.go b/conf/configuration.go index 8994c1c8..000bffb5 100644 --- a/conf/configuration.go +++ b/conf/configuration.go @@ -58,7 +58,7 @@ type configOptions struct { SmartPlaylistRefreshDelay time.Duration AutoTranscodeDownload bool DefaultDownsamplingFormat string - SearchFullString bool + Search searchOptions `json:",omitzero"` SimilarSongsMatchThreshold int RecentlyAddedByModTime bool PreferSortTags bool @@ -82,6 +82,7 @@ type configOptions struct { DefaultTheme string DefaultLanguage string DefaultUIVolume int + UISearchDebounceMs int EnableReplayGain bool EnableCoverAnimation bool EnableNowPlaying bool @@ -251,6 +252,11 @@ type extAuthOptions struct { UserHeader string } +type searchOptions struct { + Backend string + FullString bool +} + var ( Server = &configOptions{} hooks []func() @@ -344,6 +350,8 @@ func Load(noConfigDump bool) { os.Exit(1) } + Server.Search.Backend = normalizeSearchBackend(Server.Search.Backend) + if Server.BaseURL != "" { u, err := url.Parse(Server.BaseURL) if err != nil { @@ -392,6 +400,7 @@ func Load(noConfigDump bool) { logDeprecatedOptions("Scanner.GenreSeparators", "") logDeprecatedOptions("Scanner.GroupAlbumReleases", "") logDeprecatedOptions("DevEnableBufferedScrobble", "") // Deprecated: Buffered scrobbling is now always enabled and this option is ignored + logDeprecatedOptions("SearchFullString", "Search.FullString") logDeprecatedOptions("ReverseProxyWhitelist", "ExtAuth.TrustedSources") logDeprecatedOptions("ReverseProxyUserHeader", "ExtAuth.UserHeader") logDeprecatedOptions("HTTPSecurityHeaders.CustomFrameOptionsValue", "HTTPHeaders.FrameOptions") @@ -539,6 +548,17 @@ func validateSchedule(schedule, field string) (string, error) { return schedule, err } +func normalizeSearchBackend(value string) string { + v := strings.ToLower(strings.TrimSpace(value)) + switch v { + case "fts", "legacy": + return v + default: + log.Error("Invalid Search.Backend value, falling back to 'fts'", "value", value) + return "fts" + } +} + // AddHook is used to register initialization code that should run as soon as the config is loaded func AddHook(hook func()) { hooks = append(hooks, hook) @@ -585,7 +605,8 @@ func setViperDefaults() { viper.SetDefault("enablemediafilecoverart", true) viper.SetDefault("autotranscodedownload", false) viper.SetDefault("defaultdownsamplingformat", consts.DefaultDownsamplingFormat) - viper.SetDefault("searchfullstring", false) + viper.SetDefault("search.fullstring", false) + viper.SetDefault("search.backend", "fts") viper.SetDefault("similarsongsmatchthreshold", 85) viper.SetDefault("recentlyaddedbymodtime", false) viper.SetDefault("prefersorttags", false) @@ -604,6 +625,7 @@ func setViperDefaults() { viper.SetDefault("defaulttheme", "Dark") viper.SetDefault("defaultlanguage", "") viper.SetDefault("defaultuivolume", consts.DefaultUIVolume) + viper.SetDefault("uisearchdebouncems", consts.DefaultUISearchDebounceMs) viper.SetDefault("enablereplaygain", true) viper.SetDefault("enablecoveranimation", true) viper.SetDefault("enablenowplaying", true) diff --git a/conf/configuration_test.go b/conf/configuration_test.go index 90604f54..b4ed6ca2 100644 --- a/conf/configuration_test.go +++ b/conf/configuration_test.go @@ -52,6 +52,20 @@ var _ = Describe("Configuration", func() { }) }) + DescribeTable("NormalizeSearchBackend", + func(input, expected string) { + Expect(conf.NormalizeSearchBackend(input)).To(Equal(expected)) + }, + Entry("accepts 'fts'", "fts", "fts"), + Entry("accepts 'legacy'", "legacy", "legacy"), + Entry("normalizes 'FTS' to lowercase", "FTS", "fts"), + Entry("normalizes 'Legacy' to lowercase", "Legacy", "legacy"), + Entry("trims whitespace", " fts ", "fts"), + Entry("falls back to 'fts' for 'fts5'", "fts5", "fts"), + Entry("falls back to 'fts' for unrecognized values", "invalid", "fts"), + Entry("falls back to 'fts' for empty string", "", "fts"), + ) + DescribeTable("should load configuration from", func(format string) { filename := filepath.Join("testdata", "cfg."+format) diff --git a/conf/export_test.go b/conf/export_test.go index abd2fa5a..7344dc4c 100644 --- a/conf/export_test.go +++ b/conf/export_test.go @@ -7,3 +7,5 @@ func ResetConf() { var SetViperDefaults = setViperDefaults var ParseLanguages = parseLanguages + +var NormalizeSearchBackend = normalizeSearchBackend diff --git a/consts/consts.go b/consts/consts.go index eb10fdc0..ebde9d1d 100644 --- a/consts/consts.go +++ b/consts/consts.go @@ -66,11 +66,12 @@ const ( I18nFolder = "i18n" ScanIgnoreFile = ".ndignore" - PlaceholderArtistArt = "artist-placeholder.webp" - PlaceholderAlbumArt = "album-placeholder.webp" - PlaceholderAvatar = "logo-192x192.png" - UICoverArtSize = 300 - DefaultUIVolume = 100 + PlaceholderArtistArt = "artist-placeholder.webp" + PlaceholderAlbumArt = "album-placeholder.webp" + PlaceholderAvatar = "logo-192x192.png" + UICoverArtSize = 300 + DefaultUIVolume = 100 + DefaultUISearchDebounceMs = 200 DefaultHttpClientTimeOut = 10 * time.Second diff --git a/core/metrics/insights.go b/core/metrics/insights.go index 849ddd6f..f059d739 100644 --- a/core/metrics/insights.go +++ b/core/metrics/insights.go @@ -208,7 +208,8 @@ var staticData = sync.OnceValue(func() insights.Data { data.Config.TranscodingCacheSize = conf.Server.TranscodingCacheSize data.Config.ImageCacheSize = conf.Server.ImageCacheSize data.Config.SessionTimeout = uint64(math.Trunc(conf.Server.SessionTimeout.Seconds())) - data.Config.SearchFullString = conf.Server.SearchFullString + data.Config.SearchFullString = conf.Server.Search.FullString + data.Config.SearchBackend = conf.Server.Search.Backend data.Config.RecentlyAddedByModTime = conf.Server.RecentlyAddedByModTime data.Config.PreferSortTags = conf.Server.PreferSortTags data.Config.BackupSchedule = conf.Server.Backup.Schedule diff --git a/core/metrics/insights/data.go b/core/metrics/insights/data.go index 99eb542a..5580d895 100644 --- a/core/metrics/insights/data.go +++ b/core/metrics/insights/data.go @@ -68,6 +68,7 @@ type Data struct { EnableNowPlaying bool `json:"enableNowPlaying,omitempty"` SessionTimeout uint64 `json:"sessionTimeout,omitempty"` SearchFullString bool `json:"searchFullString,omitempty"` + SearchBackend string `json:"searchBackend,omitempty"` RecentlyAddedByModTime bool `json:"recentlyAddedByModTime,omitempty"` PreferSortTags bool `json:"preferSortTags,omitempty"` BackupSchedule string `json:"backupSchedule,omitempty"` diff --git a/db/migrations/20260220173400_add_fts5_search.go b/db/migrations/20260220173400_add_fts5_search.go new file mode 100644 index 00000000..dc4cd647 --- /dev/null +++ b/db/migrations/20260220173400_add_fts5_search.go @@ -0,0 +1,391 @@ +package migrations + +import ( + "context" + "database/sql" + "fmt" + + "github.com/pressly/goose/v3" +) + +func init() { + goose.AddMigrationContext(upAddFts5Search, downAddFts5Search) +} + +// stripPunct generates a SQL expression that strips common punctuation from a column or expression. +// Used during migration to approximate the Go normalizeForFTS function for bulk-populating search_normalized. +func stripPunct(col string) string { + return fmt.Sprintf( + `REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(%s, '.', ''), '/', ''), '-', ''), '''', ''), '&', ''), ',', '')`, + col, + ) +} + +func upAddFts5Search(ctx context.Context, tx *sql.Tx) error { + notice(tx, "Adding FTS5 full-text search indexes. This may take a moment on large libraries.") + + // Step 1: Add search_participants and search_normalized columns to media_file, album, and artist + _, err := tx.ExecContext(ctx, `ALTER TABLE media_file ADD COLUMN search_participants TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("adding search_participants to media_file: %w", err) + } + _, err = tx.ExecContext(ctx, `ALTER TABLE media_file ADD COLUMN search_normalized TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("adding search_normalized to media_file: %w", err) + } + _, err = tx.ExecContext(ctx, `ALTER TABLE album ADD COLUMN search_participants TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("adding search_participants to album: %w", err) + } + _, err = tx.ExecContext(ctx, `ALTER TABLE album ADD COLUMN search_normalized TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("adding search_normalized to album: %w", err) + } + _, err = tx.ExecContext(ctx, `ALTER TABLE artist ADD COLUMN search_normalized TEXT NOT NULL DEFAULT ''`) + if err != nil { + return fmt.Errorf("adding search_normalized to artist: %w", err) + } + + // Step 2: Populate search_participants from participants JSON. + // Extract all "name" values from the participants JSON structure. + // participants is a JSON object like: {"artist":[{"name":"...","id":"..."}],"albumartist":[...]} + // We use json_each + json_extract to flatten all names into a space-separated string. + _, err = tx.ExecContext(ctx, ` + UPDATE media_file SET search_participants = COALESCE( + (SELECT group_concat(json_extract(je2.value, '$.name'), ' ') + FROM json_each(media_file.participants) AS je1, + json_each(je1.value) AS je2 + WHERE json_extract(je2.value, '$.name') IS NOT NULL), + '' + ) + WHERE participants IS NOT NULL AND participants != '' AND participants != '{}' + `) + if err != nil { + return fmt.Errorf("populating media_file search_participants: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + UPDATE album SET search_participants = COALESCE( + (SELECT group_concat(json_extract(je2.value, '$.name'), ' ') + FROM json_each(album.participants) AS je1, + json_each(je1.value) AS je2 + WHERE json_extract(je2.value, '$.name') IS NOT NULL), + '' + ) + WHERE participants IS NOT NULL AND participants != '' AND participants != '{}' + `) + if err != nil { + return fmt.Errorf("populating album search_participants: %w", err) + } + + // Step 2b: Populate search_normalized using SQL REPLACE chains for common punctuation. + // The Go code will compute the precise value on next scan; this is a best-effort approximation. + _, err = tx.ExecContext(ctx, fmt.Sprintf(` + UPDATE artist SET search_normalized = %s + WHERE name != %s`, + stripPunct("name"), stripPunct("name"))) + if err != nil { + return fmt.Errorf("populating artist search_normalized: %w", err) + } + + _, err = tx.ExecContext(ctx, fmt.Sprintf(` + UPDATE album SET search_normalized = TRIM(%s || ' ' || %s) + WHERE name != %s OR COALESCE(album_artist, '') != %s`, + stripPunct("name"), stripPunct("COALESCE(album_artist, '')"), + stripPunct("name"), stripPunct("COALESCE(album_artist, '')"))) + if err != nil { + return fmt.Errorf("populating album search_normalized: %w", err) + } + + _, err = tx.ExecContext(ctx, fmt.Sprintf(` + UPDATE media_file SET search_normalized = + TRIM(%s || ' ' || %s || ' ' || %s || ' ' || %s) + WHERE title != %s + OR COALESCE(album, '') != %s + OR COALESCE(artist, '') != %s + OR COALESCE(album_artist, '') != %s`, + stripPunct("title"), stripPunct("COALESCE(album, '')"), + stripPunct("COALESCE(artist, '')"), stripPunct("COALESCE(album_artist, '')"), + stripPunct("title"), stripPunct("COALESCE(album, '')"), + stripPunct("COALESCE(artist, '')"), stripPunct("COALESCE(album_artist, '')"))) + if err != nil { + return fmt.Errorf("populating media_file search_normalized: %w", err) + } + + // Step 3: Create FTS5 virtual tables + _, err = tx.ExecContext(ctx, ` + CREATE VIRTUAL TABLE IF NOT EXISTS media_file_fts USING fts5( + title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + disc_subtitle, search_participants, search_normalized, + content='', content_rowid='rowid', + tokenize='unicode61 remove_diacritics 2' + ) + `) + if err != nil { + return fmt.Errorf("creating media_file_fts: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE VIRTUAL TABLE IF NOT EXISTS album_fts USING fts5( + name, sort_album_name, album_artist, + search_participants, discs, catalog_num, album_version, search_normalized, + content='', content_rowid='rowid', + tokenize='unicode61 remove_diacritics 2' + ) + `) + if err != nil { + return fmt.Errorf("creating album_fts: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE VIRTUAL TABLE IF NOT EXISTS artist_fts USING fts5( + name, sort_artist_name, search_normalized, + content='', content_rowid='rowid', + tokenize='unicode61 remove_diacritics 2' + ) + `) + if err != nil { + return fmt.Errorf("creating artist_fts: %w", err) + } + + // Step 4: Bulk-populate FTS5 indexes from existing data + _, err = tx.ExecContext(ctx, ` + INSERT INTO media_file_fts(rowid, title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + disc_subtitle, search_participants, search_normalized) + SELECT rowid, title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + COALESCE(disc_subtitle, ''), COALESCE(search_participants, ''), + COALESCE(search_normalized, '') + FROM media_file + `) + if err != nil { + return fmt.Errorf("populating media_file_fts: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + INSERT INTO album_fts(rowid, name, sort_album_name, album_artist, + search_participants, discs, catalog_num, album_version, search_normalized) + SELECT rowid, name, COALESCE(sort_album_name, ''), COALESCE(album_artist, ''), + COALESCE(search_participants, ''), COALESCE(discs, ''), + COALESCE(catalog_num, ''), + COALESCE((SELECT group_concat(json_extract(je.value, '$.value'), ' ') + FROM json_each(album.tags, '$.albumversion') AS je), ''), + COALESCE(search_normalized, '') + FROM album + `) + if err != nil { + return fmt.Errorf("populating album_fts: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + INSERT INTO artist_fts(rowid, name, sort_artist_name, search_normalized) + SELECT rowid, name, COALESCE(sort_artist_name, ''), COALESCE(search_normalized, '') + FROM artist + `) + if err != nil { + return fmt.Errorf("populating artist_fts: %w", err) + } + + // Step 5: Create triggers for media_file + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER media_file_fts_ai AFTER INSERT ON media_file BEGIN + INSERT INTO media_file_fts(rowid, title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + disc_subtitle, search_participants, search_normalized) + VALUES (NEW.rowid, NEW.title, NEW.album, NEW.artist, NEW.album_artist, + NEW.sort_title, NEW.sort_album_name, NEW.sort_artist_name, NEW.sort_album_artist_name, + COALESCE(NEW.disc_subtitle, ''), COALESCE(NEW.search_participants, ''), + COALESCE(NEW.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating media_file_fts insert trigger: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER media_file_fts_ad AFTER DELETE ON media_file BEGIN + INSERT INTO media_file_fts(media_file_fts, rowid, title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + disc_subtitle, search_participants, search_normalized) + VALUES ('delete', OLD.rowid, OLD.title, OLD.album, OLD.artist, OLD.album_artist, + OLD.sort_title, OLD.sort_album_name, OLD.sort_artist_name, OLD.sort_album_artist_name, + COALESCE(OLD.disc_subtitle, ''), COALESCE(OLD.search_participants, ''), + COALESCE(OLD.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating media_file_fts delete trigger: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER media_file_fts_au AFTER UPDATE ON media_file + WHEN + OLD.title IS NOT NEW.title OR + OLD.album IS NOT NEW.album OR + OLD.artist IS NOT NEW.artist OR + OLD.album_artist IS NOT NEW.album_artist OR + OLD.sort_title IS NOT NEW.sort_title OR + OLD.sort_album_name IS NOT NEW.sort_album_name OR + OLD.sort_artist_name IS NOT NEW.sort_artist_name OR + OLD.sort_album_artist_name IS NOT NEW.sort_album_artist_name OR + OLD.disc_subtitle IS NOT NEW.disc_subtitle OR + OLD.search_participants IS NOT NEW.search_participants OR + OLD.search_normalized IS NOT NEW.search_normalized + BEGIN + INSERT INTO media_file_fts(media_file_fts, rowid, title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + disc_subtitle, search_participants, search_normalized) + VALUES ('delete', OLD.rowid, OLD.title, OLD.album, OLD.artist, OLD.album_artist, + OLD.sort_title, OLD.sort_album_name, OLD.sort_artist_name, OLD.sort_album_artist_name, + COALESCE(OLD.disc_subtitle, ''), COALESCE(OLD.search_participants, ''), + COALESCE(OLD.search_normalized, '')); + INSERT INTO media_file_fts(rowid, title, album, artist, album_artist, + sort_title, sort_album_name, sort_artist_name, sort_album_artist_name, + disc_subtitle, search_participants, search_normalized) + VALUES (NEW.rowid, NEW.title, NEW.album, NEW.artist, NEW.album_artist, + NEW.sort_title, NEW.sort_album_name, NEW.sort_artist_name, NEW.sort_album_artist_name, + COALESCE(NEW.disc_subtitle, ''), COALESCE(NEW.search_participants, ''), + COALESCE(NEW.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating media_file_fts update trigger: %w", err) + } + + // Step 6: Create triggers for album + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER album_fts_ai AFTER INSERT ON album BEGIN + INSERT INTO album_fts(rowid, name, sort_album_name, album_artist, + search_participants, discs, catalog_num, album_version, search_normalized) + VALUES (NEW.rowid, NEW.name, COALESCE(NEW.sort_album_name, ''), COALESCE(NEW.album_artist, ''), + COALESCE(NEW.search_participants, ''), COALESCE(NEW.discs, ''), + COALESCE(NEW.catalog_num, ''), + COALESCE((SELECT group_concat(json_extract(je.value, '$.value'), ' ') + FROM json_each(NEW.tags, '$.albumversion') AS je), ''), + COALESCE(NEW.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating album_fts insert trigger: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER album_fts_ad AFTER DELETE ON album BEGIN + INSERT INTO album_fts(album_fts, rowid, name, sort_album_name, album_artist, + search_participants, discs, catalog_num, album_version, search_normalized) + VALUES ('delete', OLD.rowid, OLD.name, COALESCE(OLD.sort_album_name, ''), COALESCE(OLD.album_artist, ''), + COALESCE(OLD.search_participants, ''), COALESCE(OLD.discs, ''), + COALESCE(OLD.catalog_num, ''), + COALESCE((SELECT group_concat(json_extract(je.value, '$.value'), ' ') + FROM json_each(OLD.tags, '$.albumversion') AS je), ''), + COALESCE(OLD.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating album_fts delete trigger: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER album_fts_au AFTER UPDATE ON album + WHEN + OLD.name IS NOT NEW.name OR + OLD.sort_album_name IS NOT NEW.sort_album_name OR + OLD.album_artist IS NOT NEW.album_artist OR + OLD.search_participants IS NOT NEW.search_participants OR + OLD.discs IS NOT NEW.discs OR + OLD.catalog_num IS NOT NEW.catalog_num OR + OLD.tags IS NOT NEW.tags OR + OLD.search_normalized IS NOT NEW.search_normalized + BEGIN + INSERT INTO album_fts(album_fts, rowid, name, sort_album_name, album_artist, + search_participants, discs, catalog_num, album_version, search_normalized) + VALUES ('delete', OLD.rowid, OLD.name, COALESCE(OLD.sort_album_name, ''), COALESCE(OLD.album_artist, ''), + COALESCE(OLD.search_participants, ''), COALESCE(OLD.discs, ''), + COALESCE(OLD.catalog_num, ''), + COALESCE((SELECT group_concat(json_extract(je.value, '$.value'), ' ') + FROM json_each(OLD.tags, '$.albumversion') AS je), ''), + COALESCE(OLD.search_normalized, '')); + INSERT INTO album_fts(rowid, name, sort_album_name, album_artist, + search_participants, discs, catalog_num, album_version, search_normalized) + VALUES (NEW.rowid, NEW.name, COALESCE(NEW.sort_album_name, ''), COALESCE(NEW.album_artist, ''), + COALESCE(NEW.search_participants, ''), COALESCE(NEW.discs, ''), + COALESCE(NEW.catalog_num, ''), + COALESCE((SELECT group_concat(json_extract(je.value, '$.value'), ' ') + FROM json_each(NEW.tags, '$.albumversion') AS je), ''), + COALESCE(NEW.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating album_fts update trigger: %w", err) + } + + // Step 7: Create triggers for artist + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER artist_fts_ai AFTER INSERT ON artist BEGIN + INSERT INTO artist_fts(rowid, name, sort_artist_name, search_normalized) + VALUES (NEW.rowid, NEW.name, COALESCE(NEW.sort_artist_name, ''), + COALESCE(NEW.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating artist_fts insert trigger: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER artist_fts_ad AFTER DELETE ON artist BEGIN + INSERT INTO artist_fts(artist_fts, rowid, name, sort_artist_name, search_normalized) + VALUES ('delete', OLD.rowid, OLD.name, COALESCE(OLD.sort_artist_name, ''), + COALESCE(OLD.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating artist_fts delete trigger: %w", err) + } + + _, err = tx.ExecContext(ctx, ` + CREATE TRIGGER artist_fts_au AFTER UPDATE ON artist + WHEN + OLD.name IS NOT NEW.name OR + OLD.sort_artist_name IS NOT NEW.sort_artist_name OR + OLD.search_normalized IS NOT NEW.search_normalized + BEGIN + INSERT INTO artist_fts(artist_fts, rowid, name, sort_artist_name, search_normalized) + VALUES ('delete', OLD.rowid, OLD.name, COALESCE(OLD.sort_artist_name, ''), + COALESCE(OLD.search_normalized, '')); + INSERT INTO artist_fts(rowid, name, sort_artist_name, search_normalized) + VALUES (NEW.rowid, NEW.name, COALESCE(NEW.sort_artist_name, ''), + COALESCE(NEW.search_normalized, '')); + END + `) + if err != nil { + return fmt.Errorf("creating artist_fts update trigger: %w", err) + } + + return nil +} + +func downAddFts5Search(ctx context.Context, tx *sql.Tx) error { + for _, trigger := range []string{ + "media_file_fts_ai", "media_file_fts_ad", "media_file_fts_au", + "album_fts_ai", "album_fts_ad", "album_fts_au", + "artist_fts_ai", "artist_fts_ad", "artist_fts_au", + } { + _, err := tx.ExecContext(ctx, "DROP TRIGGER IF EXISTS "+trigger) + if err != nil { + return fmt.Errorf("dropping trigger %s: %w", trigger, err) + } + } + + for _, table := range []string{"media_file_fts", "album_fts", "artist_fts"} { + _, err := tx.ExecContext(ctx, "DROP TABLE IF EXISTS "+table) + if err != nil { + return fmt.Errorf("dropping table %s: %w", table, err) + } + } + + // Note: We don't drop search_participants columns because SQLite doesn't support DROP COLUMN + // on older versions, and the column is harmless if left in place. + return nil +} diff --git a/main.go b/main.go index 65db162a..b5fb508b 100644 --- a/main.go +++ b/main.go @@ -9,11 +9,12 @@ import ( //goland:noinspection GoBoolExpressions func main() { - // This import is used to force the inclusion of the `netgo` tag when compiling the project. + // These references force the inclusion of build tags when compiling the project. // If you get compilation errors like "undefined: buildtags.NETGO", this means you forgot to specify - // the `netgo` build tag when compiling the project. + // the required build tags when compiling the project. // To avoid these kind of errors, you should use `make build` to compile the project. _ = buildtags.NETGO + _ = buildtags.SQLITE_FTS5 cmd.Execute() } diff --git a/persistence/album_repository.go b/persistence/album_repository.go index 651953a1..58bfcca5 100644 --- a/persistence/album_repository.go +++ b/persistence/album_repository.go @@ -62,11 +62,14 @@ func (a *dbAlbum) PostScan() error { func (a *dbAlbum) PostMapArgs(args map[string]any) error { fullText := []string{a.Name, a.SortAlbumName, a.AlbumArtist} - fullText = append(fullText, a.Album.Participants.AllNames()...) + participantNames := a.Album.Participants.AllNames() + fullText = append(fullText, participantNames...) fullText = append(fullText, slices.Collect(maps.Values(a.Album.Discs))...) fullText = append(fullText, a.Album.Tags[model.TagAlbumVersion]...) fullText = append(fullText, a.Album.Tags[model.TagCatalogNumber]...) args["full_text"] = formatFullText(fullText...) + args["search_participants"] = strings.Join(participantNames, " ") + args["search_normalized"] = normalizeForFTS(a.Name, a.AlbumArtist) args["tags"] = marshalTags(a.Album.Tags) args["participants"] = marshalParticipants(a.Album.Participants) diff --git a/persistence/album_repository_test.go b/persistence/album_repository_test.go index 8c82dc47..9fbc6b97 100644 --- a/persistence/album_repository_test.go +++ b/persistence/album_repository_test.go @@ -56,17 +56,23 @@ var _ = Describe("AlbumRepository", func() { It("returns all records sorted", func() { Expect(GetAll(model.QueryOptions{Sort: "name"})).To(Equal(model.Albums{ albumAbbeyRoad, + albumWithVersion, + albumCJK, albumMultiDisc, albumRadioactivity, albumSgtPeppers, + albumPunctuation, })) }) It("returns all records sorted desc", func() { Expect(GetAll(model.QueryOptions{Sort: "name", Order: "desc"})).To(Equal(model.Albums{ + albumPunctuation, albumSgtPeppers, albumRadioactivity, albumMultiDisc, + albumCJK, + albumWithVersion, albumAbbeyRoad, })) }) diff --git a/persistence/artist_repository.go b/persistence/artist_repository.go index 5623bd7f..f801787d 100644 --- a/persistence/artist_repository.go +++ b/persistence/artist_repository.go @@ -102,6 +102,7 @@ func (a *dbArtist) PostMapArgs(m map[string]any) error { similarArtists, _ := json.Marshal(sa) m["similar_artists"] = string(similarArtists) m["full_text"] = formatFullText(a.Name, a.SortArtistName) + m["search_normalized"] = normalizeForFTS(a.Name) // Do not override the sort_artist_name and mbz_artist_id fields if they are empty // TODO: Better way to handle this? diff --git a/persistence/artist_repository_test.go b/persistence/artist_repository_test.go index 18883378..15340eeb 100644 --- a/persistence/artist_repository_test.go +++ b/persistence/artist_repository_test.go @@ -193,7 +193,7 @@ var _ = Describe("ArtistRepository", func() { Describe("Basic Operations", func() { Describe("Count", func() { It("returns the number of artists in the DB", func() { - Expect(repo.CountAll()).To(Equal(int64(2))) + Expect(repo.CountAll()).To(Equal(int64(4))) }) }) @@ -228,13 +228,19 @@ var _ = Describe("ArtistRepository", func() { idx, err := repo.GetIndex(false, []int{1}) Expect(err).ToNot(HaveOccurred()) - Expect(idx).To(HaveLen(2)) + Expect(idx).To(HaveLen(4)) Expect(idx[0].ID).To(Equal("F")) Expect(idx[0].Artists).To(HaveLen(1)) Expect(idx[0].Artists[0].Name).To(Equal(artistBeatles.Name)) Expect(idx[1].ID).To(Equal("K")) Expect(idx[1].Artists).To(HaveLen(1)) Expect(idx[1].Artists[0].Name).To(Equal(artistKraftwerk.Name)) + Expect(idx[2].ID).To(Equal("R")) + Expect(idx[2].Artists).To(HaveLen(1)) + Expect(idx[2].Artists[0].Name).To(Equal(artistPunctuation.Name)) + Expect(idx[3].ID).To(Equal("S")) + Expect(idx[3].Artists).To(HaveLen(1)) + Expect(idx[3].Artists[0].Name).To(Equal(artistCJK.Name)) // Restore the original value artistBeatles.SortArtistName = "" @@ -246,13 +252,19 @@ var _ = Describe("ArtistRepository", func() { XIt("returns the index when PreferSortTags is true and SortArtistName is empty", func() { idx, err := repo.GetIndex(false, []int{1}) Expect(err).ToNot(HaveOccurred()) - Expect(idx).To(HaveLen(2)) + Expect(idx).To(HaveLen(4)) Expect(idx[0].ID).To(Equal("B")) Expect(idx[0].Artists).To(HaveLen(1)) Expect(idx[0].Artists[0].Name).To(Equal(artistBeatles.Name)) Expect(idx[1].ID).To(Equal("K")) Expect(idx[1].Artists).To(HaveLen(1)) Expect(idx[1].Artists[0].Name).To(Equal(artistKraftwerk.Name)) + Expect(idx[2].ID).To(Equal("R")) + Expect(idx[2].Artists).To(HaveLen(1)) + Expect(idx[2].Artists[0].Name).To(Equal(artistPunctuation.Name)) + Expect(idx[3].ID).To(Equal("S")) + Expect(idx[3].Artists).To(HaveLen(1)) + Expect(idx[3].Artists[0].Name).To(Equal(artistCJK.Name)) }) }) @@ -268,13 +280,19 @@ var _ = Describe("ArtistRepository", func() { idx, err := repo.GetIndex(false, []int{1}) Expect(err).ToNot(HaveOccurred()) - Expect(idx).To(HaveLen(2)) + Expect(idx).To(HaveLen(4)) Expect(idx[0].ID).To(Equal("B")) Expect(idx[0].Artists).To(HaveLen(1)) Expect(idx[0].Artists[0].Name).To(Equal(artistBeatles.Name)) Expect(idx[1].ID).To(Equal("K")) Expect(idx[1].Artists).To(HaveLen(1)) Expect(idx[1].Artists[0].Name).To(Equal(artistKraftwerk.Name)) + Expect(idx[2].ID).To(Equal("R")) + Expect(idx[2].Artists).To(HaveLen(1)) + Expect(idx[2].Artists[0].Name).To(Equal(artistPunctuation.Name)) + Expect(idx[3].ID).To(Equal("S")) + Expect(idx[3].Artists).To(HaveLen(1)) + Expect(idx[3].Artists[0].Name).To(Equal(artistCJK.Name)) // Restore the original value artistBeatles.SortArtistName = "" @@ -285,13 +303,19 @@ var _ = Describe("ArtistRepository", func() { It("returns the index when SortArtistName is empty", func() { idx, err := repo.GetIndex(false, []int{1}) Expect(err).ToNot(HaveOccurred()) - Expect(idx).To(HaveLen(2)) + Expect(idx).To(HaveLen(4)) Expect(idx[0].ID).To(Equal("B")) Expect(idx[0].Artists).To(HaveLen(1)) Expect(idx[0].Artists[0].Name).To(Equal(artistBeatles.Name)) Expect(idx[1].ID).To(Equal("K")) Expect(idx[1].Artists).To(HaveLen(1)) Expect(idx[1].Artists[0].Name).To(Equal(artistKraftwerk.Name)) + Expect(idx[2].ID).To(Equal("R")) + Expect(idx[2].Artists).To(HaveLen(1)) + Expect(idx[2].Artists[0].Name).To(Equal(artistPunctuation.Name)) + Expect(idx[3].ID).To(Equal("S")) + Expect(idx[3].Artists).To(HaveLen(1)) + Expect(idx[3].Artists[0].Name).To(Equal(artistCJK.Name)) }) }) @@ -377,7 +401,7 @@ var _ = Describe("ArtistRepository", func() { // Admin users can see all content when valid library IDs are provided idx, err := repo.GetIndex(false, []int{1}) Expect(err).ToNot(HaveOccurred()) - Expect(idx).To(HaveLen(2)) + Expect(idx).To(HaveLen(4)) // With non-existent library ID, admin users see no content because no artists are associated with that library idx, err = repo.GetIndex(false, []int{999}) @@ -625,11 +649,11 @@ var _ = Describe("ArtistRepository", func() { It("sees all artists regardless of library permissions", func() { count, err := repo.CountAll() Expect(err).ToNot(HaveOccurred()) - Expect(count).To(Equal(int64(2))) + Expect(count).To(Equal(int64(4))) artists, err := repo.GetAll() Expect(err).ToNot(HaveOccurred()) - Expect(artists).To(HaveLen(2)) + Expect(artists).To(HaveLen(4)) exists, err := repo.Exists(artistBeatles.ID) Expect(err).ToNot(HaveOccurred()) @@ -661,7 +685,7 @@ var _ = Describe("ArtistRepository", func() { // Should see missing artist in GetAll by default for admin users artists, err := repo.GetAll() Expect(err).ToNot(HaveOccurred()) - Expect(artists).To(HaveLen(3)) // Including the missing artist + Expect(artists).To(HaveLen(5)) // Including the missing artist // Search never returns missing artists (hardcoded behavior) results, err := repo.Search("Missing Artist", 0, 10) @@ -767,19 +791,19 @@ var _ = Describe("ArtistRepository", func() { It("CountAll returns correct count after gaining access", func() { count, err := restrictedRepo.CountAll() Expect(err).ToNot(HaveOccurred()) - Expect(count).To(Equal(int64(2))) // Beatles and Kraftwerk + Expect(count).To(Equal(int64(4))) // Beatles, Kraftwerk, Seatbelts, and The Roots }) It("GetAll returns artists after gaining access", func() { artists, err := restrictedRepo.GetAll() Expect(err).ToNot(HaveOccurred()) - Expect(artists).To(HaveLen(2)) + Expect(artists).To(HaveLen(4)) var names []string for _, artist := range artists { names = append(names, artist.Name) } - Expect(names).To(ContainElements("The Beatles", "Kraftwerk")) + Expect(names).To(ContainElements("The Beatles", "Kraftwerk", "シートベルツ", "The Roots")) }) It("Exists returns true for accessible artists", func() { @@ -796,7 +820,7 @@ var _ = Describe("ArtistRepository", func() { // With valid library access, should see artists idx, err := restrictedRepo.GetIndex(false, []int{1}) Expect(err).ToNot(HaveOccurred()) - Expect(idx).To(HaveLen(2)) + Expect(idx).To(HaveLen(4)) // With non-existent library ID, should see nothing (non-admin user) idx, err = restrictedRepo.GetIndex(false, []int{999}) diff --git a/persistence/mediafile_repository.go b/persistence/mediafile_repository.go index 617cce4c..264be6f3 100644 --- a/persistence/mediafile_repository.go +++ b/persistence/mediafile_repository.go @@ -58,8 +58,11 @@ func (m *dbMediaFile) PostScan() error { func (m *dbMediaFile) PostMapArgs(args map[string]any) error { fullText := []string{m.FullTitle(), m.Album, m.Artist, m.AlbumArtist, m.SortTitle, m.SortAlbumName, m.SortArtistName, m.SortAlbumArtistName, m.DiscSubtitle} - fullText = append(fullText, m.MediaFile.Participants.AllNames()...) + participantNames := m.MediaFile.Participants.AllNames() + fullText = append(fullText, participantNames...) args["full_text"] = formatFullText(fullText...) + args["search_participants"] = strings.Join(participantNames, " ") + args["search_normalized"] = normalizeForFTS(m.FullTitle(), m.Album, m.Artist, m.AlbumArtist) args["tags"] = marshalTags(m.MediaFile.Tags) args["participants"] = marshalParticipants(m.MediaFile.Participants) return nil diff --git a/persistence/mediafile_repository_test.go b/persistence/mediafile_repository_test.go index 84cfd464..853480b3 100644 --- a/persistence/mediafile_repository_test.go +++ b/persistence/mediafile_repository_test.go @@ -39,7 +39,7 @@ var _ = Describe("MediaRepository", func() { }) It("counts the number of mediafiles in the DB", func() { - Expect(mr.CountAll()).To(Equal(int64(10))) + Expect(mr.CountAll()).To(Equal(int64(13))) }) Describe("CountBySuffix", func() { diff --git a/persistence/persistence_suite_test.go b/persistence/persistence_suite_test.go index 559ca3d4..0ee1570a 100644 --- a/persistence/persistence_suite_test.go +++ b/persistence/persistence_suite_test.go @@ -56,12 +56,22 @@ func al(al model.Album) model.Album { return al } +func alWithTags(a model.Album, tags model.Tags) model.Album { + a = al(a) + a.Tags = tags + return a +} + var ( - artistKraftwerk = model.Artist{ID: "2", Name: "Kraftwerk", OrderArtistName: "kraftwerk"} - artistBeatles = model.Artist{ID: "3", Name: "The Beatles", OrderArtistName: "beatles"} - testArtists = model.Artists{ + artistKraftwerk = model.Artist{ID: "2", Name: "Kraftwerk", OrderArtistName: "kraftwerk"} + artistBeatles = model.Artist{ID: "3", Name: "The Beatles", OrderArtistName: "beatles"} + artistCJK = model.Artist{ID: "4", Name: "シートベルツ", SortArtistName: "Seatbelts", OrderArtistName: "seatbelts"} + artistPunctuation = model.Artist{ID: "5", Name: "The Roots", OrderArtistName: "roots"} + testArtists = model.Artists{ artistKraftwerk, artistBeatles, + artistCJK, + artistPunctuation, } ) @@ -70,11 +80,18 @@ var ( albumAbbeyRoad = al(model.Album{ID: "102", Name: "Abbey Road", AlbumArtist: "The Beatles", OrderAlbumName: "abbey road", AlbumArtistID: "3", EmbedArtPath: p("/beatles/1/come together.mp3"), SongCount: 1, MaxYear: 1969}) albumRadioactivity = al(model.Album{ID: "103", Name: "Radioactivity", AlbumArtist: "Kraftwerk", OrderAlbumName: "radioactivity", AlbumArtistID: "2", EmbedArtPath: p("/kraft/radio/radio.mp3"), SongCount: 2}) albumMultiDisc = al(model.Album{ID: "104", Name: "Multi Disc Album", AlbumArtist: "Test Artist", OrderAlbumName: "multi disc album", AlbumArtistID: "1", EmbedArtPath: p("/test/multi/disc1/track1.mp3"), SongCount: 4}) - testAlbums = model.Albums{ + albumCJK = al(model.Album{ID: "105", Name: "COWBOY BEBOP", AlbumArtist: "シートベルツ", OrderAlbumName: "cowboy bebop", AlbumArtistID: "4", EmbedArtPath: p("/seatbelts/cowboy-bebop/track1.mp3"), SongCount: 1}) + albumWithVersion = alWithTags(model.Album{ID: "106", Name: "Abbey Road", AlbumArtist: "The Beatles", OrderAlbumName: "abbey road", AlbumArtistID: "3", EmbedArtPath: p("/beatles/2/come together.mp3"), SongCount: 1, MaxYear: 2019}, + model.Tags{model.TagAlbumVersion: {"Deluxe Edition"}}) + albumPunctuation = al(model.Album{ID: "107", Name: "Things Fall Apart", AlbumArtist: "The Roots", OrderAlbumName: "things fall apart", AlbumArtistID: "5", EmbedArtPath: p("/roots/things/track1.mp3"), SongCount: 1}) + testAlbums = model.Albums{ albumSgtPeppers, albumAbbeyRoad, albumRadioactivity, albumMultiDisc, + albumCJK, + albumWithVersion, + albumPunctuation, } ) @@ -101,6 +118,9 @@ var ( songDisc1Track01 = mf(model.MediaFile{ID: "2002", Title: "Disc 1 Track 1", ArtistID: "1", Artist: "Test Artist", AlbumID: "104", Album: "Multi Disc Album", DiscNumber: 1, TrackNumber: 1, Path: p("/test/multi/disc1/track1.mp3"), OrderAlbumName: "multi disc album", OrderArtistName: "test artist"}) songDisc2Track01 = mf(model.MediaFile{ID: "2003", Title: "Disc 2 Track 1", ArtistID: "1", Artist: "Test Artist", AlbumID: "104", Album: "Multi Disc Album", DiscNumber: 2, TrackNumber: 1, Path: p("/test/multi/disc2/track1.mp3"), OrderAlbumName: "multi disc album", OrderArtistName: "test artist"}) songDisc1Track02 = mf(model.MediaFile{ID: "2004", Title: "Disc 1 Track 2", ArtistID: "1", Artist: "Test Artist", AlbumID: "104", Album: "Multi Disc Album", DiscNumber: 1, TrackNumber: 2, Path: p("/test/multi/disc1/track2.mp3"), OrderAlbumName: "multi disc album", OrderArtistName: "test artist"}) + songCJK = mf(model.MediaFile{ID: "3001", Title: "プラチナ・ジェット", ArtistID: "4", Artist: "シートベルツ", AlbumID: "105", Album: "COWBOY BEBOP", Path: p("/seatbelts/cowboy-bebop/track1.mp3")}) + songVersioned = mf(model.MediaFile{ID: "3002", Title: "Come Together", ArtistID: "3", Artist: "The Beatles", AlbumID: "106", Album: "Abbey Road", Path: p("/beatles/2/come together.mp3")}) + songPunctuation = mf(model.MediaFile{ID: "3003", Title: "!!!!!!!", ArtistID: "5", Artist: "The Roots", AlbumID: "107", Album: "Things Fall Apart", Path: p("/roots/things/track1.mp3")}) testSongs = model.MediaFiles{ songDayInALife, songComeTogether, @@ -112,6 +132,9 @@ var ( songDisc1Track01, songDisc2Track01, songDisc1Track02, + songCJK, + songVersioned, + songPunctuation, } ) diff --git a/persistence/sql_restful.go b/persistence/sql_restful.go index ff0d06a8..27207c45 100644 --- a/persistence/sql_restful.go +++ b/persistence/sql_restful.go @@ -109,9 +109,10 @@ func booleanFilter(field string, value any) Sqlizer { func fullTextFilter(tableName string, mbidFields ...string) func(string, any) Sqlizer { return func(field string, value any) Sqlizer { v := strings.ToLower(value.(string)) + searchExpr := getSearchExpr() cond := cmp.Or( mbidExpr(tableName, v, mbidFields...), - fullTextExpr(tableName, v), + searchExpr(tableName, v), ) return cond } diff --git a/persistence/sql_restful_test.go b/persistence/sql_restful_test.go index e3399df4..ea0f802a 100644 --- a/persistence/sql_restful_test.go +++ b/persistence/sql_restful_test.go @@ -26,7 +26,9 @@ var _ = Describe("sqlRestful", func() { Expect(r.parseRestFilters(context.Background(), options)).To(BeNil()) }) - It(`returns nil if tries a filter with fullTextExpr("'")`, func() { + It(`returns nil if tries a filter with legacySearchExpr("'")`, func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "legacy" r.filterMappings = map[string]filterFunc{ "name": fullTextFilter("table"), } @@ -77,6 +79,7 @@ var _ = Describe("sqlRestful", func() { BeforeEach(func() { DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "legacy" tableName = "test_table" mbidFields = []string{"mbid", "artist_mbid"} filter = fullTextFilter(tableName, mbidFields...) @@ -136,7 +139,7 @@ var _ = Describe("sqlRestful", func() { Context("when SearchFullString config changes behavior", func() { It("uses different separator with SearchFullString=false", func() { - conf.Server.SearchFullString = false + conf.Server.Search.FullString = false result := filter("search", "test query") andCondition, ok := result.(squirrel.And) @@ -149,7 +152,7 @@ var _ = Describe("sqlRestful", func() { }) It("uses no separator with SearchFullString=true", func() { - conf.Server.SearchFullString = true + conf.Server.Search.FullString = true result := filter("search", "test query") andCondition, ok := result.(squirrel.And) diff --git a/persistence/sql_search.go b/persistence/sql_search.go index 0d3bfb74..e5c245bd 100644 --- a/persistence/sql_search.go +++ b/persistence/sql_search.go @@ -6,6 +6,7 @@ import ( . "github.com/Masterminds/squirrel" "github.com/google/uuid" "github.com/navidrome/navidrome/conf" + "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/utils/str" ) @@ -15,6 +16,26 @@ func formatFullText(text ...string) string { return " " + fullText } +// searchExprFunc is the function signature for search expression builders. +type searchExprFunc func(tableName string, query string) Sqlizer + +// getSearchExpr returns the active search expression function based on config. +// It falls back to legacySearchExpr when Search.FullString is enabled, because +// FTS5 is token-based and cannot match substrings within words. +// CJK queries are routed to likeSearchExpr, since FTS5's unicode61 tokenizer +// cannot segment CJK text. +func getSearchExpr() searchExprFunc { + if conf.Server.Search.Backend == "legacy" || conf.Server.Search.FullString { + return legacySearchExpr + } + return func(tableName, query string) Sqlizer { + if containsCJK(query) { + return likeSearchExpr(tableName, query) + } + return ftsSearchExpr(tableName, query) + } +} + // doSearch performs a full-text search with the specified parameters. // The naturalOrder is used to sort results when no full-text filter is applied. It is useful for cases like // OpenSubsonic, where an empty search query should return all results in a natural order. Normally the parameter @@ -26,7 +47,8 @@ func (r sqlRepository) doSearch(sq SelectBuilder, q string, offset, size int, re return nil } - filter := fullTextExpr(r.tableName, q) + searchExpr := getSearchExpr() + filter := searchExpr(r.tableName, q) if filter != nil { sq = sq.Where(filter) sq = sq.OrderBy(orderBys...) @@ -59,13 +81,16 @@ func mbidExpr(tableName, mbid string, mbidFields ...string) Sqlizer { return Or(cond) } -func fullTextExpr(tableName string, s string) Sqlizer { +// legacySearchExpr generates LIKE-based search filters against the full_text column. +// This is the original search implementation, used when Search.Backend="legacy". +func legacySearchExpr(tableName string, s string) Sqlizer { q := str.SanitizeStrings(s) if q == "" { + log.Trace("Search using legacy backend, query is empty", "table", tableName) return nil } var sep string - if !conf.Server.SearchFullString { + if !conf.Server.Search.FullString { sep = " " } parts := strings.Split(q, " ") @@ -73,5 +98,6 @@ func fullTextExpr(tableName string, s string) Sqlizer { for _, part := range parts { filters = append(filters, Like{tableName + ".full_text": "%" + sep + part + "%"}) } + log.Trace("Search using legacy backend", "query", filters, "table", tableName) return filters } diff --git a/persistence/sql_search_fts.go b/persistence/sql_search_fts.go new file mode 100644 index 00000000..25f16cb8 --- /dev/null +++ b/persistence/sql_search_fts.go @@ -0,0 +1,261 @@ +package persistence + +import ( + "fmt" + "regexp" + "strings" + "unicode" + "unicode/utf8" + + . "github.com/Masterminds/squirrel" + "github.com/navidrome/navidrome/log" +) + +// containsCJK returns true if the string contains any CJK (Chinese/Japanese/Korean) characters. +// CJK text doesn't use spaces between words, so FTS5's unicode61 tokenizer treats entire +// CJK phrases as single tokens, making token-based search ineffective for CJK content. +func containsCJK(s string) bool { + for _, r := range s { + if unicode.Is(unicode.Han, r) || + unicode.Is(unicode.Hiragana, r) || + unicode.Is(unicode.Katakana, r) || + unicode.Is(unicode.Hangul, r) { + return true + } + } + return false +} + +// fts5SpecialChars matches characters that should be stripped from user input. +// We keep only Unicode letters, numbers, whitespace, * (prefix wildcard), " (phrase quotes), +// and \x00 (internal placeholder marker). All punctuation is removed because the unicode61 +// tokenizer treats it as token separators, and characters like ' can cause FTS5 parse errors +// as unbalanced string delimiters. +var fts5SpecialChars = regexp.MustCompile(`[^\p{L}\p{N}\s*"\x00]`) + +// fts5PunctStrip strips everything except letters and numbers (no whitespace, wildcards, or quotes). +// Used for normalizing words at index time to create concatenated forms (e.g., "R.E.M." → "REM"). +var fts5PunctStrip = regexp.MustCompile(`[^\p{L}\p{N}]`) + +// fts5Operators matches FTS5 boolean operators as whole words (case-insensitive). +var fts5Operators = regexp.MustCompile(`(?i)\b(AND|OR|NOT|NEAR)\b`) + +// fts5LeadingStar matches a * at the start of a token. FTS5 only supports * at the end (prefix queries). +var fts5LeadingStar = regexp.MustCompile(`(^|[\s])\*+`) + +// normalizeForFTS takes multiple strings, strips non-letter/non-number characters from each word, +// and returns a space-separated string of words that changed after stripping (deduplicated). +// This is used at index time to create concatenated forms: "R.E.M." → "REM", "AC/DC" → "ACDC". +func normalizeForFTS(values ...string) string { + seen := make(map[string]struct{}) + var result []string + for _, v := range values { + for _, word := range strings.Fields(v) { + stripped := fts5PunctStrip.ReplaceAllString(word, "") + if stripped == "" || stripped == word { + continue + } + lower := strings.ToLower(stripped) + if _, ok := seen[lower]; ok { + continue + } + seen[lower] = struct{}{} + result = append(result, stripped) + } + } + return strings.Join(result, " ") +} + +// isSingleUnicodeLetter returns true if token is exactly one Unicode letter. +func isSingleUnicodeLetter(token string) bool { + r, size := utf8.DecodeRuneInString(token) + return size == len(token) && size > 0 && unicode.IsLetter(r) +} + +// namePunctuation is the set of characters commonly used as separators in artist/album +// names (hyphens, slashes, dots, apostrophes). Only words containing these are candidates +// for punctuated-word processing; other special characters (^, :, &) are just stripped. +const namePunctuation = `-/.''` + +// processPunctuatedWords handles words with embedded name punctuation before the general +// special-character stripping. For each punctuated word it produces either: +// - A quoted phrase for dotted abbreviations: R.E.M. → "R E M" +// - A phrase+concat OR for other patterns: a-ha → ("a ha" OR aha*) +func processPunctuatedWords(input string, phrases []string) (string, []string) { + words := strings.Fields(input) + var result []string + for _, w := range words { + if strings.HasPrefix(w, "\x00") || strings.ContainsAny(w, `*"`) || !strings.ContainsAny(w, namePunctuation) { + result = append(result, w) + continue + } + concat := fts5PunctStrip.ReplaceAllString(w, "") + if concat == "" || concat == w { + result = append(result, w) + continue + } + subTokens := strings.Fields(fts5SpecialChars.ReplaceAllString(w, " ")) + if len(subTokens) < 2 { + // Single sub-token after splitting (e.g., N' → N): just use the stripped form + result = append(result, concat) + continue + } + // Dotted abbreviations (R.E.M., U.K.) — all single letters separated by dots only + if isDottedAbbreviation(w, subTokens) { + phrases = append(phrases, fmt.Sprintf(`"%s"`, strings.Join(subTokens, " "))) + } else { + // Punctuated names (a-ha, AC/DC, Jay-Z) — phrase for adjacency + concat for search_normalized + phrases = append(phrases, fmt.Sprintf(`("%s" OR %s*)`, strings.Join(subTokens, " "), concat)) + } + result = append(result, fmt.Sprintf("\x00PHRASE%d\x00", len(phrases)-1)) + } + return strings.Join(result, " "), phrases +} + +// isDottedAbbreviation returns true if w uses only dots as punctuation and all sub-tokens +// are single letters (e.g., "R.E.M.", "U.K." but not "a-ha" or "AC/DC"). +func isDottedAbbreviation(w string, subTokens []string) bool { + for _, r := range w { + if !unicode.IsLetter(r) && !unicode.IsNumber(r) && r != '.' { + return false + } + } + for _, st := range subTokens { + if !isSingleUnicodeLetter(st) { + return false + } + } + return true +} + +// buildFTS5Query preprocesses user input into a safe FTS5 MATCH expression. +// It preserves quoted phrases and * prefix wildcards, neutralizes FTS5 operators +// (by lowercasing them, since FTS5 operators are case-sensitive) and strips +// special characters to prevent query injection. +func buildFTS5Query(userInput string) string { + q := strings.TrimSpace(userInput) + if q == "" { + return "" + } + + var phrases []string + result := q + for { + start := strings.Index(result, `"`) + if start == -1 { + break + } + end := strings.Index(result[start+1:], `"`) + if end == -1 { + // Unmatched quote — remove it + result = result[:start] + result[start+1:] + break + } + end += start + 1 + phrase := result[start : end+1] // includes quotes + phrases = append(phrases, phrase) + result = result[:start] + fmt.Sprintf("\x00PHRASE%d\x00", len(phrases)-1) + result[end+1:] + } + + // Neutralize FTS5 operators by lowercasing them (FTS5 operators are case-sensitive: + // AND, OR, NOT, NEAR are operators, but and, or, not, near are plain tokens) + result = fts5Operators.ReplaceAllStringFunc(result, strings.ToLower) + + // Handle words with embedded punctuation (a-ha, AC/DC, R.E.M.) before stripping + result, phrases = processPunctuatedWords(result, phrases) + + result = fts5SpecialChars.ReplaceAllString(result, " ") + result = fts5LeadingStar.ReplaceAllString(result, "$1") + tokens := strings.Fields(result) + + // Append * to plain tokens for prefix matching (e.g., "love" → "love*"). + // Skip tokens that are already wildcarded or are quoted phrase placeholders. + for i, t := range tokens { + if strings.HasPrefix(t, "\x00") || strings.HasSuffix(t, "*") { + continue + } + tokens[i] = t + "*" + } + + result = strings.Join(tokens, " ") + + for i, phrase := range phrases { + placeholder := fmt.Sprintf("\x00PHRASE%d\x00", i) + result = strings.ReplaceAll(result, placeholder, phrase) + } + + return result +} + +// likeSearchColumns defines the core columns to search with LIKE queries. +// These are the primary user-visible fields for each entity type. +// Used as a fallback when FTS5 cannot handle the query (e.g., CJK text, punctuation-only input). +var likeSearchColumns = map[string][]string{ + "media_file": {"title", "album", "artist", "album_artist"}, + "album": {"name", "album_artist"}, + "artist": {"name"}, +} + +// likeSearchExpr generates LIKE-based search filters against core columns. +// Each word in the query must match at least one column (AND between words), +// and each word can match any column (OR within a word). +// Used as a fallback when FTS5 cannot handle the query (e.g., CJK text, punctuation-only input). +func likeSearchExpr(tableName string, s string) Sqlizer { + s = strings.TrimSpace(s) + if s == "" { + log.Trace("Search using LIKE backend, query is empty", "table", tableName) + return nil + } + columns, ok := likeSearchColumns[tableName] + if !ok { + log.Trace("Search using LIKE backend, couldn't find columns for this table", "table", tableName) + return nil + } + words := strings.Fields(s) + wordFilters := And{} + for _, word := range words { + colFilters := Or{} + for _, col := range columns { + colFilters = append(colFilters, Like{tableName + "." + col: "%" + word + "%"}) + } + wordFilters = append(wordFilters, colFilters) + } + log.Trace("Search using LIKE backend", "query", wordFilters, "table", tableName) + return wordFilters +} + +// ftsSearchColumns defines which FTS5 columns are included in general search. +// Columns not listed here are indexed but not searched by default, +// enabling future additions (comments, lyrics, bios) without affecting general search. +var ftsSearchColumns = map[string]string{ + "media_file": "{title album artist album_artist sort_title sort_album_name sort_artist_name sort_album_artist_name disc_subtitle search_participants search_normalized}", + "album": "{name sort_album_name album_artist search_participants discs catalog_num album_version search_normalized}", + "artist": "{name sort_artist_name search_normalized}", +} + +// ftsSearchExpr generates an FTS5 MATCH-based search filter. +// If the query produces no FTS tokens (e.g., punctuation-only like "!!!!!!!"), +// it falls back to LIKE-based search. +func ftsSearchExpr(tableName string, s string) Sqlizer { + q := buildFTS5Query(s) + if q == "" { + s = strings.TrimSpace(s) + if s != "" { + log.Trace("Search using LIKE fallback for non-tokenizable query", "table", tableName, "query", s) + return likeSearchExpr(tableName, s) + } + return nil + } + ftsTable := tableName + "_fts" + matchExpr := q + if cols, ok := ftsSearchColumns[tableName]; ok { + matchExpr = cols + " : (" + q + ")" + } + + filter := Expr( + tableName+".rowid IN (SELECT rowid FROM "+ftsTable+" WHERE "+ftsTable+" MATCH ?)", + matchExpr, + ) + log.Trace("Search using FTS5 backend", "table", tableName, "query", q, "filter", filter) + return filter +} diff --git a/persistence/sql_search_fts_test.go b/persistence/sql_search_fts_test.go new file mode 100644 index 00000000..31725295 --- /dev/null +++ b/persistence/sql_search_fts_test.go @@ -0,0 +1,333 @@ +package persistence + +import ( + "context" + + "github.com/navidrome/navidrome/conf" + "github.com/navidrome/navidrome/conf/configtest" + "github.com/navidrome/navidrome/log" + "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/model/request" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = DescribeTable("buildFTS5Query", + func(input, expected string) { + Expect(buildFTS5Query(input)).To(Equal(expected)) + }, + Entry("returns empty string for empty input", "", ""), + Entry("returns empty string for whitespace-only input", " ", ""), + Entry("appends * to a single word for prefix matching", "beatles", "beatles*"), + Entry("appends * to each word for prefix matching", "abbey road", "abbey* road*"), + Entry("preserves quoted phrases without appending *", `"the beatles"`, `"the beatles"`), + Entry("does not double-append * to existing prefix wildcard", "beat*", "beat*"), + Entry("strips FTS5 operators and appends * to lowercased words", "AND OR NOT NEAR", "and* or* not* near*"), + Entry("strips special FTS5 syntax characters and appends *", "test^col:val", "test* col* val*"), + Entry("handles mixed phrases and words", `"the beatles" abbey`, `"the beatles" abbey*`), + Entry("handles prefix with multiple words", "beat* abbey", "beat* abbey*"), + Entry("collapses multiple spaces", "abbey road", "abbey* road*"), + Entry("strips leading * from tokens and appends trailing *", "*livia", "livia*"), + Entry("strips leading * and preserves existing trailing *", "*livia oliv*", "livia* oliv*"), + Entry("strips standalone *", "*", ""), + Entry("strips apostrophe from input", "Guns N' Roses", "Guns* N* Roses*"), + Entry("converts slashed word to phrase+concat OR", "AC/DC", `("AC DC" OR ACDC*)`), + Entry("converts hyphenated word to phrase+concat OR", "a-ha", `("a ha" OR aha*)`), + Entry("converts partial hyphenated word to phrase+concat OR", "a-h", `("a h" OR ah*)`), + Entry("converts hyphenated name to phrase+concat OR", "Jay-Z", `("Jay Z" OR JayZ*)`), + Entry("converts contraction to phrase+concat OR", "it's", `("it s" OR its*)`), + Entry("handles punctuated word mixed with plain words", "best of a-ha", `best* of* ("a ha" OR aha*)`), + Entry("strips miscellaneous punctuation", "rock & roll, vol. 2", "rock* roll* vol* 2*"), + Entry("preserves unicode characters with diacritics", "Björk début", "Björk* début*"), + Entry("collapses dotted abbreviation into phrase", "R.E.M.", `"R E M"`), + Entry("collapses abbreviation without trailing dot", "R.E.M", `"R E M"`), + Entry("collapses abbreviation mixed with words", "best of R.E.M.", `best* of* "R E M"`), + Entry("collapses two-letter abbreviation", "U.K.", `"U K"`), + Entry("does not collapse single letter surrounded by words", "I am fine", "I* am* fine*"), + Entry("does not collapse single standalone letter", "A test", "A* test*"), + Entry("preserves quoted phrase with punctuation verbatim", `"ac/dc"`, `"ac/dc"`), + Entry("preserves quoted abbreviation verbatim", `"R.E.M."`, `"R.E.M."`), + Entry("returns empty string for punctuation-only input", "!!!!!!!", ""), + Entry("returns empty string for mixed punctuation", "!@#$%^&", ""), +) + +var _ = DescribeTable("normalizeForFTS", + func(expected string, values ...string) { + Expect(normalizeForFTS(values...)).To(Equal(expected)) + }, + Entry("strips dots and concatenates", "REM", "R.E.M."), + Entry("strips slash", "ACDC", "AC/DC"), + Entry("strips hyphen", "Aha", "A-ha"), + Entry("skips unchanged words", "", "The Beatles"), + Entry("handles mixed input", "REM", "R.E.M.", "Automatic for the People"), + Entry("deduplicates", "REM", "R.E.M.", "R.E.M."), + Entry("strips apostrophe from word", "N", "Guns N' Roses"), + Entry("handles multiple values with punctuation", "REM ACDC", "R.E.M.", "AC/DC"), +) + +var _ = DescribeTable("containsCJK", + func(input string, expected bool) { + Expect(containsCJK(input)).To(Equal(expected)) + }, + Entry("returns false for empty string", "", false), + Entry("returns false for ASCII text", "hello world", false), + Entry("returns false for Latin with diacritics", "Björk début", false), + Entry("detects Chinese characters (Han)", "周杰伦", true), + Entry("detects Japanese Hiragana", "こんにちは", true), + Entry("detects Japanese Katakana", "カタカナ", true), + Entry("detects Korean Hangul", "한국어", true), + Entry("detects CJK mixed with Latin", "best of 周杰伦", true), + Entry("detects single CJK character", "a曲b", true), +) + +var _ = Describe("likeSearchExpr", func() { + It("returns nil for empty query", func() { + Expect(likeSearchExpr("media_file", "")).To(BeNil()) + }) + + It("returns nil for whitespace-only query", func() { + Expect(likeSearchExpr("media_file", " ")).To(BeNil()) + }) + + It("generates LIKE filters against core columns for single CJK word", func() { + expr := likeSearchExpr("media_file", "周杰伦") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + // Should have OR between columns for the single word + Expect(sql).To(ContainSubstring("OR")) + Expect(sql).To(ContainSubstring("media_file.title LIKE")) + Expect(sql).To(ContainSubstring("media_file.album LIKE")) + Expect(sql).To(ContainSubstring("media_file.artist LIKE")) + Expect(sql).To(ContainSubstring("media_file.album_artist LIKE")) + Expect(args).To(HaveLen(4)) + for _, arg := range args { + Expect(arg).To(Equal("%周杰伦%")) + } + }) + + It("generates AND of OR groups for multi-word query", func() { + expr := likeSearchExpr("media_file", "周杰伦 greatest") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + // Two groups AND'd together, each with 4 columns OR'd + Expect(sql).To(ContainSubstring("AND")) + Expect(args).To(HaveLen(8)) + }) + + It("uses correct columns for album table", func() { + expr := likeSearchExpr("album", "周杰伦") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("album.name LIKE")) + Expect(sql).To(ContainSubstring("album.album_artist LIKE")) + Expect(args).To(HaveLen(2)) + }) + + It("uses correct columns for artist table", func() { + expr := likeSearchExpr("artist", "周杰伦") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("artist.name LIKE")) + Expect(args).To(HaveLen(1)) + }) + + It("returns nil for unknown table", func() { + Expect(likeSearchExpr("unknown_table", "周杰伦")).To(BeNil()) + }) +}) + +var _ = Describe("ftsSearchExpr", func() { + It("returns nil for empty query", func() { + Expect(ftsSearchExpr("media_file", "")).To(BeNil()) + }) + + It("generates rowid IN subquery with MATCH and column filter", func() { + expr := ftsSearchExpr("media_file", "beatles") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("media_file.rowid IN")) + Expect(sql).To(ContainSubstring("media_file_fts")) + Expect(sql).To(ContainSubstring("MATCH")) + Expect(args).To(HaveLen(1)) + Expect(args[0]).To(HavePrefix("{title album artist album_artist")) + Expect(args[0]).To(ContainSubstring("beatles*")) + }) + + It("generates correct FTS table name per entity", func() { + for _, table := range []string{"media_file", "album", "artist"} { + expr := ftsSearchExpr(table, "test") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring(table + ".rowid IN")) + Expect(sql).To(ContainSubstring(table + "_fts")) + } + }) + + It("wraps query with column filter for known tables", func() { + expr := ftsSearchExpr("artist", "Beatles") + _, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(args[0]).To(Equal("{name sort_artist_name search_normalized} : (Beatles*)")) + }) + + It("passes query without column filter for unknown tables", func() { + expr := ftsSearchExpr("unknown_table", "test") + _, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(args[0]).To(Equal("test*")) + }) + + It("preserves phrase queries inside column filter", func() { + expr := ftsSearchExpr("media_file", `"the beatles"`) + _, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(args[0]).To(ContainSubstring(`"the beatles"`)) + }) + + It("preserves prefix queries inside column filter", func() { + expr := ftsSearchExpr("media_file", "beat*") + _, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(args[0]).To(ContainSubstring("beat*")) + }) + + It("falls back to LIKE search for punctuation-only query", func() { + expr := ftsSearchExpr("media_file", "!!!!!!!") + Expect(expr).ToNot(BeNil()) + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("LIKE")) + Expect(args).To(ContainElement("%!!!!!!!%")) + }) + + It("returns nil for empty string even with LIKE fallback", func() { + Expect(ftsSearchExpr("media_file", "")).To(BeNil()) + Expect(ftsSearchExpr("media_file", " ")).To(BeNil()) + }) +}) + +var _ = Describe("FTS5 Integration Search", func() { + var ( + mr model.MediaFileRepository + alr model.AlbumRepository + arr model.ArtistRepository + ) + + BeforeEach(func() { + ctx := log.NewContext(context.TODO()) + ctx = request.WithUser(ctx, adminUser) + conn := GetDBXBuilder() + mr = NewMediaFileRepository(ctx, conn) + alr = NewAlbumRepository(ctx, conn) + arr = NewArtistRepository(ctx, conn) + }) + + Describe("MediaFile search", func() { + It("finds media files by title", func() { + results, err := mr.Search("Radioactivity", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Title).To(Equal("Radioactivity")) + Expect(results[0].ID).To(Equal(songRadioactivity.ID)) + }) + + It("finds media files by artist name", func() { + results, err := mr.Search("Beatles", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(3)) + for _, r := range results { + Expect(r.Artist).To(Equal("The Beatles")) + } + }) + }) + + Describe("Album search", func() { + It("finds albums by name", func() { + results, err := alr.Search("Sgt Peppers", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("Sgt Peppers")) + Expect(results[0].ID).To(Equal(albumSgtPeppers.ID)) + }) + + It("finds albums with multi-word search", func() { + results, err := alr.Search("Abbey Road", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(2)) + }) + }) + + Describe("Artist search", func() { + It("finds artists by name", func() { + results, err := arr.Search("Kraftwerk", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("Kraftwerk")) + Expect(results[0].ID).To(Equal(artistKraftwerk.ID)) + }) + }) + + Describe("CJK search", func() { + It("finds media files by CJK title", func() { + results, err := mr.Search("プラチナ", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Title).To(Equal("プラチナ・ジェット")) + Expect(results[0].ID).To(Equal(songCJK.ID)) + }) + + It("finds media files by CJK artist name", func() { + results, err := mr.Search("シートベルツ", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Artist).To(Equal("シートベルツ")) + }) + + It("finds albums by CJK artist name", func() { + results, err := alr.Search("シートベルツ", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("COWBOY BEBOP")) + Expect(results[0].ID).To(Equal(albumCJK.ID)) + }) + + It("finds artists by CJK name", func() { + results, err := arr.Search("シートベルツ", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Name).To(Equal("シートベルツ")) + Expect(results[0].ID).To(Equal(artistCJK.ID)) + }) + }) + + Describe("Album version search", func() { + It("finds albums by version tag via FTS", func() { + results, err := alr.Search("Deluxe", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].ID).To(Equal(albumWithVersion.ID)) + }) + }) + + Describe("Punctuation-only search", func() { + It("finds media files with punctuation-only title", func() { + results, err := mr.Search("!!!!!!!", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Title).To(Equal("!!!!!!!")) + Expect(results[0].ID).To(Equal(songPunctuation.ID)) + }) + }) + + Describe("Legacy backend fallback", func() { + It("returns results using legacy LIKE-based search when configured", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "legacy" + + results, err := mr.Search("Radioactivity", 0, 10) + Expect(err).ToNot(HaveOccurred()) + Expect(results).To(HaveLen(1)) + Expect(results[0].Title).To(Equal("Radioactivity")) + }) + }) +}) diff --git a/persistence/sql_search_test.go b/persistence/sql_search_test.go index 6bfd88d9..b59570af 100644 --- a/persistence/sql_search_test.go +++ b/persistence/sql_search_test.go @@ -1,6 +1,8 @@ package persistence import ( + "github.com/navidrome/navidrome/conf" + "github.com/navidrome/navidrome/conf/configtest" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -11,4 +13,99 @@ var _ = Describe("sqlRepository", func() { Expect(formatFullText("legiao urbana")).To(Equal(" legiao urbana")) }) }) + + Describe("legacySearchExpr", func() { + It("returns nil for empty query", func() { + Expect(legacySearchExpr("media_file", "")).To(BeNil()) + }) + + It("generates LIKE filter for single word", func() { + expr := legacySearchExpr("media_file", "beatles") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("media_file.full_text LIKE")) + Expect(args).To(ContainElement("% beatles%")) + }) + + It("generates AND of LIKE filters for multiple words", func() { + expr := legacySearchExpr("media_file", "abbey road") + sql, args, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("AND")) + Expect(args).To(HaveLen(2)) + }) + }) + + Describe("getSearchExpr", func() { + It("returns ftsSearchExpr by default", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "fts" + conf.Server.Search.FullString = false + + expr := getSearchExpr()("media_file", "test") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("MATCH")) + }) + + It("returns legacySearchExpr when SearchBackend is legacy", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "legacy" + conf.Server.Search.FullString = false + + expr := getSearchExpr()("media_file", "test") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("LIKE")) + }) + + It("falls back to legacySearchExpr when SearchFullString is enabled", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "fts" + conf.Server.Search.FullString = true + + expr := getSearchExpr()("media_file", "test") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("LIKE")) + }) + + It("routes CJK queries to likeSearchExpr instead of ftsSearchExpr", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "fts" + conf.Server.Search.FullString = false + + expr := getSearchExpr()("media_file", "周杰伦") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + // CJK should use LIKE, not MATCH + Expect(sql).To(ContainSubstring("LIKE")) + Expect(sql).NotTo(ContainSubstring("MATCH")) + }) + + It("routes non-CJK queries to ftsSearchExpr", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "fts" + conf.Server.Search.FullString = false + + expr := getSearchExpr()("media_file", "beatles") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + Expect(sql).To(ContainSubstring("MATCH")) + }) + + It("uses legacy for CJK when SearchBackend is legacy", func() { + DeferCleanup(configtest.SetupConfig()) + conf.Server.Search.Backend = "legacy" + conf.Server.Search.FullString = false + + expr := getSearchExpr()("media_file", "周杰伦") + sql, _, err := expr.ToSql() + Expect(err).ToNot(HaveOccurred()) + // Legacy should still use full_text column LIKE + Expect(sql).To(ContainSubstring("LIKE")) + Expect(sql).To(ContainSubstring("full_text")) + }) + }) + }) diff --git a/reflex.conf b/reflex.conf index 9633ab91..47dd775a 100644 --- a/reflex.conf +++ b/reflex.conf @@ -1 +1 @@ --s -r "(\.go$$|\.cpp$$|\.h$$|navidrome.toml|resources|token_received.html)" -R "(^ui|^data|^db/migrations)" -R "_test\.go$$" -- go run -race -tags netgo . +-s -r "(\.go$$|\.cpp$$|\.h$$|navidrome.toml|resources|token_received.html)" -R "(^ui|^data|^db/migrations)" -R "_test\.go$$" -- go run -race -tags netgo,sqlite_fts5 . diff --git a/server/e2e/e2e_suite_test.go b/server/e2e/e2e_suite_test.go index 479f9c68..92214950 100644 --- a/server/e2e/e2e_suite_test.go +++ b/server/e2e/e2e_suite_test.go @@ -363,7 +363,7 @@ func restoreDB() { _, err = sqlDB.Exec("ATTACH DATABASE ? AS snapshot", snapshotPath) Expect(err).ToNot(HaveOccurred()) - rows, err := sqlDB.Query("SELECT name FROM main.sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'") + rows, err := sqlDB.Query("SELECT name FROM main.sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' AND name NOT LIKE '%_fts' AND name NOT LIKE '%_fts_%'") Expect(err).ToNot(HaveOccurred()) var tables []string for rows.Next() { diff --git a/server/serve_index.go b/server/serve_index.go index b9a3b3a2..b5b36426 100644 --- a/server/serve_index.go +++ b/server/serve_index.go @@ -54,6 +54,7 @@ func serveIndex(ds model.DataStore, fs fs.FS, shareInfo *model.Share) http.Handl "defaultTheme": conf.Server.DefaultTheme, "defaultLanguage": conf.Server.DefaultLanguage, "defaultUIVolume": conf.Server.DefaultUIVolume, + "uiSearchDebounceMs": conf.Server.UISearchDebounceMs, "enableCoverAnimation": conf.Server.EnableCoverAnimation, "enableNowPlaying": conf.Server.EnableNowPlaying, "gaTrackingId": conf.Server.GATrackingID, diff --git a/server/serve_index_test.go b/server/serve_index_test.go index 4f179f22..9d6f480f 100644 --- a/server/serve_index_test.go +++ b/server/serve_index_test.go @@ -85,6 +85,7 @@ var _ = Describe("serveIndex", func() { Entry("defaultTheme", func() { conf.Server.DefaultTheme = "Light" }, "defaultTheme", "Light"), Entry("defaultLanguage", func() { conf.Server.DefaultLanguage = "pt" }, "defaultLanguage", "pt"), Entry("defaultUIVolume", func() { conf.Server.DefaultUIVolume = 45 }, "defaultUIVolume", float64(45)), + Entry("uiSearchDebounceMs", func() { conf.Server.UISearchDebounceMs = 500 }, "uiSearchDebounceMs", float64(500)), Entry("enableCoverAnimation", func() { conf.Server.EnableCoverAnimation = true }, "enableCoverAnimation", true), Entry("enableNowPlaying", func() { conf.Server.EnableNowPlaying = true }, "enableNowPlaying", true), Entry("gaTrackingId", func() { conf.Server.GATrackingID = "UA-12345" }, "gaTrackingId", "UA-12345"), diff --git a/ui/src/common/List.jsx b/ui/src/common/List.jsx index f74ab027..72c2d948 100644 --- a/ui/src/common/List.jsx +++ b/ui/src/common/List.jsx @@ -1,5 +1,6 @@ import React from 'react' import { List as RAList } from 'react-admin' +import config from '../config' import { Pagination } from './Pagination' import { Title } from './index' @@ -13,6 +14,7 @@ export const List = (props) => { args={{ smart_count: 2 }} /> } + debounce={config.uiSearchDebounceMs} perPage={15} pagination={} {...props} diff --git a/ui/src/config.js b/ui/src/config.js index 9582e95e..5acf10b6 100644 --- a/ui/src/config.js +++ b/ui/src/config.js @@ -20,6 +20,7 @@ const defaultConfig = { defaultTheme: 'Dark', defaultLanguage: '', defaultUIVolume: 100, + uiSearchDebounceMs: 200, enableUserEditing: true, enableSharing: true, shareURL: '',