Files
navidrome/persistence/mediafile_repository.go
T
Deluan Quintão b59eb32961 feat(subsonic): sort search3 results by relevance (#5086)
* fix(subsonic): optimize search3 for high-cardinality FTS queries

Use a two-phase query strategy for FTS5 searches to avoid the
performance penalty of expensive LEFT JOINs (annotation, bookmark,
library) on high-cardinality results like "the".

Phase 1 runs a lightweight query (main table + FTS index only) to get
sorted, paginated rowids. Phase 2 hydrates only those few rowids with
the full JOINs, making them nearly free.

For queries with complex ORDER BY expressions that reference joined
tables (e.g. artist search sorted by play count), the optimization is
skipped and the original single-query approach is used.

* fix(search): update order by clauses to include 'rank' for FTS queries

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): reintroduce 'rank' in Phase 2 ORDER BY for FTS queries

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): remove 'rank' from ORDER BY in non-FTS queries and adjust two-phase query handling

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): update FTS ranking to use bm25 weights and simplify ORDER BY qualification

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): refine FTS query handling and improve comments for clarity

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): refactor full-text search handling to streamline query strategy selection and improve LIKE fallback logic.

Increase e2e coverage for search3

Signed-off-by: Deluan <deluan@navidrome.org>

* refactor: enhance FTS column definitions and relevance weights

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): refactor Search method signatures to remove offset and size parameters, streamline query handling

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): allow single-character queries in search strategies and update related tests

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): make FTS Phase 1 treat Max=0 as no limit, reorganize tests

FTS Phase 1 unconditionally called Limit(uint64(options.Max)), which
produced LIMIT 0 when Max was zero. This diverged from applyOptions
where Max=0 means no limit. Now Phase 1 mirrors applyOptions: only add
LIMIT/OFFSET when the value is positive. Also moved legacy backend
integration tests from sql_search_fts_test.go to sql_search_like_test.go
and added regression tests for the Max=0 behavior on both backends.

* refactor: simplify callSearch function by removing variadic options and directly using QueryOptions

Signed-off-by: Deluan <deluan@navidrome.org>

* fix(search): implement ftsQueryDegraded function to detect significant content loss in FTS queries

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
2026-02-23 08:51:54 -05:00

471 lines
14 KiB
Go

package persistence
import (
"context"
"fmt"
"slices"
"strconv"
"strings"
"sync"
"time"
. "github.com/Masterminds/squirrel"
"github.com/deluan/rest"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/slice"
"github.com/pocketbase/dbx"
)
type mediaFileRepository struct {
sqlRepository
}
type dbMediaFile struct {
*model.MediaFile `structs:",flatten"`
Participants string `structs:"-" json:"-"`
Tags string `structs:"-" json:"-"`
// These are necessary to map the correct names (rg_*) to the correct fields (RG*)
// without using `db` struct tags in the model.MediaFile struct
RgAlbumGain *float64 `structs:"-" json:"-"`
RgAlbumPeak *float64 `structs:"-" json:"-"`
RgTrackGain *float64 `structs:"-" json:"-"`
RgTrackPeak *float64 `structs:"-" json:"-"`
}
func (m *dbMediaFile) PostScan() error {
m.RGTrackGain = m.RgTrackGain
m.RGTrackPeak = m.RgTrackPeak
m.RGAlbumGain = m.RgAlbumGain
m.RGAlbumPeak = m.RgAlbumPeak
var err error
m.MediaFile.Participants, err = unmarshalParticipants(m.Participants)
if err != nil {
return fmt.Errorf("parsing media_file from db: %w", err)
}
if m.Tags != "" {
m.MediaFile.Tags, err = unmarshalTags(m.Tags)
if err != nil {
return fmt.Errorf("parsing media_file from db: %w", err)
}
m.Genre, m.Genres = m.MediaFile.Tags.ToGenres()
}
return nil
}
func (m *dbMediaFile) PostMapArgs(args map[string]any) error {
fullText := []string{m.FullTitle(), m.Album, m.Artist, m.AlbumArtist,
m.SortTitle, m.SortAlbumName, m.SortArtistName, m.SortAlbumArtistName, m.DiscSubtitle}
participantNames := m.MediaFile.Participants.AllNames()
fullText = append(fullText, participantNames...)
args["full_text"] = formatFullText(fullText...)
args["search_participants"] = strings.Join(participantNames, " ")
args["search_normalized"] = normalizeForFTS(m.FullTitle(), m.Album, m.Artist, m.AlbumArtist)
args["tags"] = marshalTags(m.MediaFile.Tags)
args["participants"] = marshalParticipants(m.MediaFile.Participants)
return nil
}
type dbMediaFiles []dbMediaFile
func (m dbMediaFiles) toModels() model.MediaFiles {
return slice.Map(m, func(mf dbMediaFile) model.MediaFile { return *mf.MediaFile })
}
func NewMediaFileRepository(ctx context.Context, db dbx.Builder) model.MediaFileRepository {
r := &mediaFileRepository{}
r.ctx = ctx
r.db = db
r.tableName = "media_file"
r.registerModel(&model.MediaFile{}, mediaFileFilter())
r.setSortMappings(map[string]string{
"title": "order_title",
"artist": "order_artist_name, order_album_name, release_date, disc_number, track_number",
"album_artist": "order_album_artist_name, order_album_name, release_date, disc_number, track_number",
"album": "order_album_name, album_id, disc_number, track_number, order_artist_name, title",
"random": "random",
"created_at": "media_file.created_at",
"recently_added": mediaFileRecentlyAddedSort(),
"starred_at": "starred, starred_at",
"rated_at": "rating, rated_at",
})
return r
}
var mediaFileFilter = sync.OnceValue(func() map[string]filterFunc {
filters := map[string]filterFunc{
"id": idFilter("media_file"),
"title": fullTextFilter("media_file", "mbz_recording_id", "mbz_release_track_id"),
"starred": annotationBoolFilter("starred"),
"genre_id": tagIDFilter,
"missing": booleanFilter,
"artists_id": artistFilter,
"library_id": libraryIdFilter,
}
// Add all album tags as filters
for tag := range model.TagMappings() {
if _, exists := filters[string(tag)]; !exists {
filters[string(tag)] = tagIDFilter
}
}
return filters
})
func mediaFileRecentlyAddedSort() string {
if conf.Server.RecentlyAddedByModTime {
return "media_file.updated_at"
}
return "media_file.created_at"
}
func (r *mediaFileRepository) CountAll(options ...model.QueryOptions) (int64, error) {
query := r.newSelect()
query = r.withAnnotation(query, "media_file.id")
query = r.applyLibraryFilter(query)
return r.count(query, options...)
}
func (r *mediaFileRepository) CountBySuffix(options ...model.QueryOptions) (map[string]int64, error) {
sel := r.newSelect(options...).
Columns("lower(suffix) as suffix", "count(*) as count").
GroupBy("lower(suffix)")
var res []struct {
Suffix string
Count int64
}
err := r.queryAll(sel, &res)
if err != nil {
return nil, err
}
counts := make(map[string]int64, len(res))
for _, c := range res {
counts[c.Suffix] = c.Count
}
return counts, nil
}
func (r *mediaFileRepository) Exists(id string) (bool, error) {
return r.exists(Eq{"media_file.id": id})
}
func (r *mediaFileRepository) Put(m *model.MediaFile) error {
if m.CreatedAt.IsZero() {
m.CreatedAt = time.Now()
}
id, err := r.putByMatch(Eq{"path": m.Path, "library_id": m.LibraryID}, m.ID, &dbMediaFile{MediaFile: m})
if err != nil {
return err
}
m.ID = id
return r.updateParticipants(m.ID, m.Participants)
}
func (r *mediaFileRepository) selectMediaFile(options ...model.QueryOptions) SelectBuilder {
sql := r.newSelect(options...).Columns("media_file.*", "library.path as library_path", "library.name as library_name").
LeftJoin("library on media_file.library_id = library.id")
sql = r.withAnnotation(sql, "media_file.id")
sql = r.withBookmark(sql, "media_file.id")
return r.applyLibraryFilter(sql)
}
func (r *mediaFileRepository) Get(id string) (*model.MediaFile, error) {
res, err := r.GetAll(model.QueryOptions{Filters: Eq{"media_file.id": id}})
if err != nil {
return nil, err
}
if len(res) == 0 {
return nil, model.ErrNotFound
}
return &res[0], nil
}
func (r *mediaFileRepository) GetWithParticipants(id string) (*model.MediaFile, error) {
m, err := r.Get(id)
if err != nil {
return nil, err
}
m.Participants, err = r.getParticipants(m)
return m, err
}
func (r *mediaFileRepository) GetAll(options ...model.QueryOptions) (model.MediaFiles, error) {
sq := r.selectMediaFile(options...)
var res dbMediaFiles
err := r.queryAll(sq, &res, options...)
if err != nil {
return nil, err
}
return res.toModels(), nil
}
func (r *mediaFileRepository) GetAllByTags(tag model.TagName, values []string, options ...model.QueryOptions) (model.MediaFiles, error) {
placeholders := make([]string, len(values))
args := make([]any, len(values))
for i, v := range values {
placeholders[i] = "?"
args[i] = v
}
tagFilter := Expr(
fmt.Sprintf("exists (select 1 from json_tree(media_file.tags, '$.%s') where key='value' and value in (%s))",
tag, strings.Join(placeholders, ",")),
args...,
)
var opts model.QueryOptions
if len(options) > 0 {
opts = options[0]
}
if opts.Filters != nil {
opts.Filters = And{tagFilter, opts.Filters}
} else {
opts.Filters = tagFilter
}
return r.GetAll(opts)
}
func (r *mediaFileRepository) GetCursor(options ...model.QueryOptions) (model.MediaFileCursor, error) {
sq := r.selectMediaFile(options...)
cursor, err := queryWithStableResults[dbMediaFile](r.sqlRepository, sq)
if err != nil {
return nil, err
}
return func(yield func(model.MediaFile, error) bool) {
for m, err := range cursor {
if m.MediaFile == nil {
yield(model.MediaFile{}, fmt.Errorf("unexpected nil mediafile: %v", m))
return
}
if !yield(*m.MediaFile, err) || err != nil {
return
}
}
}, nil
}
// FindByPaths finds media files by their paths.
// The paths can be library-qualified (format: "libraryID:path") or unqualified ("path").
// Library-qualified paths search within the specified library, while unqualified paths
// search across all libraries for backward compatibility.
func (r *mediaFileRepository) FindByPaths(paths []string) (model.MediaFiles, error) {
query := Or{}
for _, path := range paths {
parts := strings.SplitN(path, ":", 2)
if len(parts) == 2 {
// Library-qualified path: "libraryID:path"
libraryID, err := strconv.Atoi(parts[0])
if err != nil {
// Invalid format, skip
continue
}
relativePath := parts[1]
query = append(query, And{
Eq{"path collate nocase": relativePath},
Eq{"library_id": libraryID},
})
} else {
// Unqualified path: search across all libraries
query = append(query, Eq{"path collate nocase": path})
}
}
if len(query) == 0 {
return model.MediaFiles{}, nil
}
sel := r.newSelect().Columns("*").Where(query)
var res dbMediaFiles
if err := r.queryAll(sel, &res); err != nil {
return nil, err
}
return res.toModels(), nil
}
func (r *mediaFileRepository) Delete(id string) error {
return r.delete(Eq{"id": id})
}
func (r *mediaFileRepository) DeleteAllMissing() (int64, error) {
user := loggedUser(r.ctx)
if !user.IsAdmin {
return 0, rest.ErrPermissionDenied
}
del := Delete(r.tableName).Where(Eq{"missing": true})
return r.executeSQL(del)
}
func (r *mediaFileRepository) DeleteMissing(ids []string) error {
user := loggedUser(r.ctx)
if !user.IsAdmin {
return rest.ErrPermissionDenied
}
return r.delete(
And{
Eq{"missing": true},
Eq{"id": ids},
},
)
}
func (r *mediaFileRepository) MarkMissing(missing bool, mfs ...*model.MediaFile) error {
ids := slice.SeqFunc(mfs, func(m *model.MediaFile) string { return m.ID })
for chunk := range slice.CollectChunks(ids, 200) {
upd := Update(r.tableName).
Set("missing", missing).
Set("updated_at", time.Now()).
Where(Eq{"id": chunk})
c, err := r.executeSQL(upd)
if err != nil || c == 0 {
log.Error(r.ctx, "Error setting mediafile missing flag", "ids", chunk, err)
return err
}
log.Debug(r.ctx, "Marked missing mediafiles", "total", c, "ids", chunk)
}
return nil
}
func (r *mediaFileRepository) MarkMissingByFolder(missing bool, folderIDs ...string) error {
for chunk := range slices.Chunk(folderIDs, 200) {
upd := Update(r.tableName).
Set("missing", missing).
Set("updated_at", time.Now()).
Where(And{
Eq{"folder_id": chunk},
Eq{"missing": !missing},
})
c, err := r.executeSQL(upd)
if err != nil {
log.Error(r.ctx, "Error setting mediafile missing flag", "folderIDs", chunk, err)
return err
}
log.Debug(r.ctx, "Marked missing mediafiles from missing folders", "total", c, "folders", chunk)
}
return nil
}
// GetMissingAndMatching returns all mediafiles that are missing and their potential matches (comparing PIDs)
// that were added/updated after the last scan started. The result is ordered by PID.
// It does not need to load bookmarks, annotations and participants, as they are not used by the scanner.
func (r *mediaFileRepository) GetMissingAndMatching(libId int) (model.MediaFileCursor, error) {
subQ := r.newSelect().Columns("pid").
Where(And{
Eq{"media_file.missing": true},
Eq{"library_id": libId},
})
subQText, subQArgs, err := subQ.PlaceholderFormat(Question).ToSql()
if err != nil {
return nil, err
}
sel := r.newSelect().Columns("media_file.*", "library.path as library_path", "library.name as library_name").
LeftJoin("library on media_file.library_id = library.id").
Where("pid in ("+subQText+")", subQArgs...).
Where(Or{
Eq{"missing": true},
ConcatExpr("media_file.created_at > library.last_scan_started_at"),
}).
OrderBy("pid")
cursor, err := queryWithStableResults[dbMediaFile](r.sqlRepository, sel)
if err != nil {
return nil, err
}
return func(yield func(model.MediaFile, error) bool) {
for m, err := range cursor {
if !yield(*m.MediaFile, err) || err != nil {
return
}
}
}, nil
}
// FindRecentFilesByMBZTrackID finds recently added files by MusicBrainz Track ID in other libraries
// It uses a lightweight query without annotation/bookmark joins since those are not needed for matching
func (r *mediaFileRepository) FindRecentFilesByMBZTrackID(missing model.MediaFile, since time.Time) (model.MediaFiles, error) {
sel := r.newSelect().Columns("media_file.*", "library.path as library_path", "library.name as library_name").
LeftJoin("library on media_file.library_id = library.id").
Where(And{
NotEq{"media_file.library_id": missing.LibraryID},
Eq{"media_file.mbz_release_track_id": missing.MbzReleaseTrackID},
NotEq{"media_file.mbz_release_track_id": ""}, // Exclude empty MBZ Track IDs
Eq{"media_file.suffix": missing.Suffix},
Gt{"media_file.created_at": since},
Eq{"media_file.missing": false},
}).OrderBy("media_file.created_at DESC")
var res dbMediaFiles
err := r.queryAll(sel, &res)
if err != nil {
return nil, err
}
return res.toModels(), nil
}
// FindRecentFilesByProperties finds recently added files by intrinsic properties in other libraries
// It uses a lightweight query without annotation/bookmark joins since those are not needed for matching
func (r *mediaFileRepository) FindRecentFilesByProperties(missing model.MediaFile, since time.Time) (model.MediaFiles, error) {
sel := r.newSelect().Columns("media_file.*", "library.path as library_path", "library.name as library_name").
LeftJoin("library on media_file.library_id = library.id").
Where(And{
NotEq{"media_file.library_id": missing.LibraryID},
Eq{"media_file.title": missing.Title},
Eq{"media_file.size": missing.Size},
Eq{"media_file.suffix": missing.Suffix},
Eq{"media_file.disc_number": missing.DiscNumber},
Eq{"media_file.track_number": missing.TrackNumber},
Eq{"media_file.album": missing.Album},
Eq{"media_file.mbz_release_track_id": ""}, // Exclude files with MBZ Track ID
Gt{"media_file.created_at": since},
Eq{"media_file.missing": false},
}).OrderBy("media_file.created_at DESC")
var res dbMediaFiles
err := r.queryAll(sel, &res)
if err != nil {
return nil, err
}
return res.toModels(), nil
}
var mediaFileSearchConfig = searchConfig{
NaturalOrder: "media_file.rowid",
OrderBy: []string{"title"},
MBIDFields: []string{"mbz_recording_id", "mbz_release_track_id"},
}
func (r *mediaFileRepository) Search(q string, options ...model.QueryOptions) (model.MediaFiles, error) {
var opts model.QueryOptions
if len(options) > 0 {
opts = options[0]
}
var res dbMediaFiles
err := r.doSearch(r.selectMediaFile(options...), q, &res, mediaFileSearchConfig, opts)
if err != nil {
return nil, fmt.Errorf("searching media_file %q: %w", q, err)
}
return res.toModels(), nil
}
func (r *mediaFileRepository) Count(options ...rest.QueryOptions) (int64, error) {
return r.CountAll(r.parseRestOptions(r.ctx, options...))
}
func (r *mediaFileRepository) Read(id string) (any, error) {
return r.Get(id)
}
func (r *mediaFileRepository) ReadAll(options ...rest.QueryOptions) (any, error) {
return r.GetAll(r.parseRestOptions(r.ctx, options...))
}
func (r *mediaFileRepository) EntityName() string {
return "mediafile"
}
func (r *mediaFileRepository) NewInstance() any {
return &model.MediaFile{}
}
var _ model.MediaFileRepository = (*mediaFileRepository)(nil)
var _ model.ResourceRepository = (*mediaFileRepository)(nil)