diff --git a/core/agents/interfaces.go b/core/agents/interfaces.go index 94373ec1..19df91d0 100644 --- a/core/agents/interfaces.go +++ b/core/agents/interfaces.go @@ -36,6 +36,7 @@ type Song struct { ID string Name string MBID string + ISRC string Artist string ArtistMBID string Album string diff --git a/core/external/extdata_helper_test.go b/core/external/extdata_helper_test.go index f1d92be1..185a568b 100644 --- a/core/external/extdata_helper_test.go +++ b/core/external/extdata_helper_test.go @@ -92,6 +92,11 @@ func (m *mockMediaFileRepo) Get(id string) (*model.MediaFile, error) { return args.Get(0).(*model.MediaFile), args.Error(1) } +// GetAllByTags implements model.MediaFileRepository. +func (m *mockMediaFileRepo) GetAllByTags(_ model.TagName, _ []string, options ...model.QueryOptions) (model.MediaFiles, error) { + return m.GetAll(options...) +} + // GetAll implements model.MediaFileRepository. func (m *mockMediaFileRepo) GetAll(options ...model.QueryOptions) (model.MediaFiles, error) { argsSlice := make([]interface{}, len(options)) diff --git a/core/external/provider_matching.go b/core/external/provider_matching.go index 5d548fb9..8a6c6a09 100644 --- a/core/external/provider_matching.go +++ b/core/external/provider_matching.go @@ -19,17 +19,18 @@ import ( // # Algorithm Overview // // The algorithm matches songs from external agents (Last.fm, Deezer, etc.) to tracks in the -// local music library using three matching strategies in priority order: +// local music library using four matching strategies in priority order: // // 1. Direct ID match: Songs with an ID field are matched directly to MediaFiles by ID // 2. MusicBrainz Recording ID (MBID) match: Songs with MBID are matched to tracks with // matching mbz_recording_id -// 3. Title+Artist fuzzy match: Remaining songs are matched using fuzzy string comparison +// 3. ISRC match: Songs with ISRC are matched to tracks with matching ISRC tag +// 4. Title+Artist fuzzy match: Remaining songs are matched using fuzzy string comparison // with metadata specificity scoring // // # Matching Priority // -// When selecting the final result, matches are prioritized in order: ID > MBID > Title+Artist. +// When selecting the final result, matches are prioritized in order: ID > MBID > ISRC > Title+Artist. // This ensures that more reliable identifiers take precedence over fuzzy text matching. // // # Fuzzy Matching Details @@ -59,7 +60,16 @@ import ( // ] // Result: t1 (MBID match takes priority over title+artist) // -// Example 2 - Specificity Ranking: +// Example 2 - ISRC Priority: +// +// Agent returns: {Name: "Paranoid Android", ISRC: "GBAYE0000351", Artist: "Radiohead"} +// Library has: [ +// {ID: "t1", Title: "Paranoid Android", Tags: {isrc: ["GBAYE0000351"]}}, +// {ID: "t2", Title: "Paranoid Android", Artist: "Radiohead"}, +// ] +// Result: t1 (ISRC match takes priority over title+artist) +// +// Example 3 - Specificity Ranking: // // Agent returns: {Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"} // Library has: [ @@ -68,7 +78,7 @@ import ( // ] // Result: t2 (Level 3 beats Level 1 due to album match) // -// Example 3 - Fuzzy Title Matching: +// Example 4 - Fuzzy Title Matching: // // Agent returns: {Name: "Bohemian Rhapsody", Artist: "Queen"} // Library has: {ID: "t1", Title: "Bohemian Rhapsody - Remastered", Artist: "Queen"} @@ -90,16 +100,36 @@ func (e *provider) matchSongsToLibrary(ctx context.Context, songs []agents.Song, if err != nil { return nil, fmt.Errorf("failed to load tracks by ID: %w", err) } - mbidMatches, err := e.loadTracksByMBID(ctx, songs) + mbidMatches, err := e.loadTracksByMBID(ctx, songs, idMatches) if err != nil { return nil, fmt.Errorf("failed to load tracks by MBID: %w", err) } - titleMatches, err := e.loadTracksByTitleAndArtist(ctx, songs, idMatches, mbidMatches) + isrcMatches, err := e.loadTracksByISRC(ctx, songs, idMatches, mbidMatches) + if err != nil { + return nil, fmt.Errorf("failed to load tracks by ISRC: %w", err) + } + titleMatches, err := e.loadTracksByTitleAndArtist(ctx, songs, idMatches, mbidMatches, isrcMatches) if err != nil { return nil, fmt.Errorf("failed to load tracks by title: %w", err) } - return e.selectBestMatchingSongs(songs, idMatches, mbidMatches, titleMatches, count), nil + return e.selectBestMatchingSongs(songs, idMatches, mbidMatches, isrcMatches, titleMatches, count), nil +} + +// songMatchedIn checks if a song has already been matched in any of the provided match maps. +// It checks the song's ID, MBID, and ISRC fields against the corresponding map keys. +func songMatchedIn(s agents.Song, priorMatches ...map[string]model.MediaFile) bool { + keys := []string{s.ID, s.MBID, s.ISRC} + for _, m := range priorMatches { + for _, key := range keys { + if key != "" { + if mf, ok := m[key]; ok && mf.ID != "" { + return true + } + } + } + } + return false } // loadTracksByID fetches MediaFiles from the library using direct ID matching. @@ -138,10 +168,10 @@ func (e *provider) loadTracksByID(ctx context.Context, songs []agents.Song) (map // It extracts all non-empty MBID fields from the input songs and performs a single // batch query against the mbz_recording_id column. Returns a map keyed by MBID for // O(1) lookup. Only non-missing files are returned. -func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song) (map[string]model.MediaFile, error) { +func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) { var mbids []string for _, s := range songs { - if s.MBID != "" { + if s.MBID != "" && !songMatchedIn(s, priorMatches...) { mbids = append(mbids, s.MBID) } } @@ -168,6 +198,37 @@ func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song) (m return matches, nil } +// loadTracksByISRC fetches MediaFiles from the library using ISRC (International Standard +// Recording Code) matching. It extracts all non-empty ISRC fields from the input songs and +// queries the tags JSON column for matching ISRC values. Returns a map keyed by ISRC for +// O(1) lookup. Only non-missing files are returned. +func (e *provider) loadTracksByISRC(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) { + var isrcs []string + for _, s := range songs { + if s.ISRC != "" && !songMatchedIn(s, priorMatches...) { + isrcs = append(isrcs, s.ISRC) + } + } + matches := map[string]model.MediaFile{} + if len(isrcs) == 0 { + return matches, nil + } + res, err := e.ds.MediaFile(ctx).GetAllByTags(model.TagISRC, isrcs, model.QueryOptions{ + Filters: squirrel.Eq{"missing": false}, + }) + if err != nil { + return matches, err + } + for _, mf := range res { + for _, isrc := range mf.Tags.Values(model.TagISRC) { + if _, ok := matches[isrc]; !ok { + matches[isrc] = mf + } + } + } + return matches, nil +} + // songQuery represents a normalized query for matching a song to library tracks. // All string fields are sanitized (lowercased, diacritics removed) for comparison. // This struct is used internally by loadTracksByTitleAndArtist to group queries by artist. @@ -246,8 +307,8 @@ func computeSpecificityLevel(q songQuery, mf model.MediaFile, albumThreshold flo // Uses a unified scoring approach that combines title similarity (Jaro-Winkler) with // metadata specificity (MBIDs, album names) for both exact and fuzzy matches. // Returns a map keyed by "title|artist" for compatibility with selectBestMatchingSongs. -func (e *provider) loadTracksByTitleAndArtist(ctx context.Context, songs []agents.Song, idMatches, mbidMatches map[string]model.MediaFile) (map[string]model.MediaFile, error) { - queries := e.buildTitleQueries(songs, idMatches, mbidMatches) +func (e *provider) loadTracksByTitleAndArtist(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) { + queries := e.buildTitleQueries(songs, priorMatches...) if len(queries) == 0 { return map[string]model.MediaFile{}, nil } @@ -344,14 +405,10 @@ func (e *provider) findBestMatch(q songQuery, tracks model.MediaFiles, threshold return bestMatch, found } -func (e *provider) buildTitleQueries(songs []agents.Song, idMatches, mbidMatches map[string]model.MediaFile) []songQuery { +func (e *provider) buildTitleQueries(songs []agents.Song, priorMatches ...map[string]model.MediaFile) []songQuery { var queries []songQuery for _, s := range songs { - // Skip if already matched by ID or MBID - if s.ID != "" && idMatches[s.ID].ID != "" { - continue - } - if s.MBID != "" && mbidMatches[s.MBID].ID != "" { + if songMatchedIn(s, priorMatches...) { continue } queries = append(queries, songQuery{ @@ -366,7 +423,7 @@ func (e *provider) buildTitleQueries(songs []agents.Song, idMatches, mbidMatches return queries } -func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, byTitleArtist map[string]model.MediaFile, count int) model.MediaFiles { +func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, byISRC, byTitleArtist map[string]model.MediaFile, count int) model.MediaFiles { var mfs model.MediaFiles for _, t := range songs { if len(mfs) == count { @@ -386,6 +443,13 @@ func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, by continue } } + // Try ISRC match third + if t.ISRC != "" { + if mf, ok := byISRC[t.ISRC]; ok { + mfs = append(mfs, mf) + continue + } + } // Fall back to title+artist match (composite key preserves duplicate titles) key := str.SanitizeFieldForSorting(t.Name) + "|" + str.SanitizeFieldForSortingNoArticle(t.Artist) if mf, ok := byTitleArtist[key]; ok { diff --git a/model/mediafile.go b/model/mediafile.go index 1ae63e75..1d34cd76 100644 --- a/model/mediafile.go +++ b/model/mediafile.go @@ -359,6 +359,7 @@ type MediaFileRepository interface { Get(id string) (*MediaFile, error) GetWithParticipants(id string) (*MediaFile, error) GetAll(options ...QueryOptions) (MediaFiles, error) + GetAllByTags(tag TagName, values []string, options ...QueryOptions) (MediaFiles, error) GetCursor(options ...QueryOptions) (MediaFileCursor, error) Delete(id string) error DeleteMissing(ids []string) error diff --git a/persistence/mediafile_repository.go b/persistence/mediafile_repository.go index 9c682369..2054a34b 100644 --- a/persistence/mediafile_repository.go +++ b/persistence/mediafile_repository.go @@ -195,6 +195,31 @@ func (r *mediaFileRepository) GetAll(options ...model.QueryOptions) (model.Media return res.toModels(), nil } +func (r *mediaFileRepository) GetAllByTags(tag model.TagName, values []string, options ...model.QueryOptions) (model.MediaFiles, error) { + placeholders := make([]string, len(values)) + args := make([]any, len(values)) + for i, v := range values { + placeholders[i] = "?" + args[i] = v + } + tagFilter := Expr( + fmt.Sprintf("exists (select 1 from json_tree(media_file.tags, '$.%s') where key='value' and value in (%s))", + tag, strings.Join(placeholders, ",")), + args..., + ) + + var opts model.QueryOptions + if len(options) > 0 { + opts = options[0] + } + if opts.Filters != nil { + opts.Filters = And{tagFilter, opts.Filters} + } else { + opts.Filters = tagFilter + } + return r.GetAll(opts) +} + func (r *mediaFileRepository) GetCursor(options ...model.QueryOptions) (model.MediaFileCursor, error) { sq := r.selectMediaFile(options...) cursor, err := queryWithStableResults[dbMediaFile](r.sqlRepository, sq) diff --git a/plugins/capabilities/metadata_agent.go b/plugins/capabilities/metadata_agent.go index 7070637f..407f21ec 100644 --- a/plugins/capabilities/metadata_agent.go +++ b/plugins/capabilities/metadata_agent.go @@ -142,6 +142,8 @@ type SongRef struct { Name string `json:"name"` // MBID is the MusicBrainz ID for the song. MBID string `json:"mbid,omitempty"` + // ISRC is the International Standard Recording Code for the song. + ISRC string `json:"isrc,omitempty"` // Artist is the artist name. Artist string `json:"artist,omitempty"` // ArtistMBID is the MusicBrainz artist ID. diff --git a/plugins/capabilities/metadata_agent.yaml b/plugins/capabilities/metadata_agent.yaml index 07ae939e..4940a505 100644 --- a/plugins/capabilities/metadata_agent.yaml +++ b/plugins/capabilities/metadata_agent.yaml @@ -343,6 +343,9 @@ components: mbid: type: string description: MBID is the MusicBrainz ID for the song. + isrc: + type: string + description: ISRC is the International Standard Recording Code for the song. artist: type: string description: Artist is the artist name. diff --git a/plugins/metadata_agent.go b/plugins/metadata_agent.go index 67a7e373..451ef26d 100644 --- a/plugins/metadata_agent.go +++ b/plugins/metadata_agent.go @@ -230,6 +230,7 @@ func songRefsToAgentSongs(refs []capabilities.SongRef) []agents.Song { ID: s.ID, Name: s.Name, MBID: s.MBID, + ISRC: s.ISRC, Artist: s.Artist, ArtistMBID: s.ArtistMBID, Album: s.Album, diff --git a/plugins/pdk/go/metadata/metadata.go b/plugins/pdk/go/metadata/metadata.go index 46f6da5d..7cd63865 100644 --- a/plugins/pdk/go/metadata/metadata.go +++ b/plugins/pdk/go/metadata/metadata.go @@ -171,6 +171,8 @@ type SongRef struct { Name string `json:"name"` // MBID is the MusicBrainz ID for the song. MBID string `json:"mbid,omitempty"` + // ISRC is the International Standard Recording Code for the song. + ISRC string `json:"isrc,omitempty"` // Artist is the artist name. Artist string `json:"artist,omitempty"` // ArtistMBID is the MusicBrainz artist ID. diff --git a/plugins/pdk/go/metadata/metadata_stub.go b/plugins/pdk/go/metadata/metadata_stub.go index 7b0a1109..bdcd06fc 100644 --- a/plugins/pdk/go/metadata/metadata_stub.go +++ b/plugins/pdk/go/metadata/metadata_stub.go @@ -168,6 +168,8 @@ type SongRef struct { Name string `json:"name"` // MBID is the MusicBrainz ID for the song. MBID string `json:"mbid,omitempty"` + // ISRC is the International Standard Recording Code for the song. + ISRC string `json:"isrc,omitempty"` // Artist is the artist name. Artist string `json:"artist,omitempty"` // ArtistMBID is the MusicBrainz artist ID. diff --git a/plugins/pdk/rust/nd-pdk-capabilities/src/metadata.rs b/plugins/pdk/rust/nd-pdk-capabilities/src/metadata.rs index 24ca0b6d..463e52c3 100644 --- a/plugins/pdk/rust/nd-pdk-capabilities/src/metadata.rs +++ b/plugins/pdk/rust/nd-pdk-capabilities/src/metadata.rs @@ -242,6 +242,9 @@ pub struct SongRef { /// MBID is the MusicBrainz ID for the song. #[serde(default, skip_serializing_if = "String::is_empty")] pub mbid: String, + /// ISRC is the International Standard Recording Code for the song. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub isrc: String, /// Artist is the artist name. #[serde(default, skip_serializing_if = "String::is_empty")] pub artist: String, diff --git a/plugins/testdata/test-metadata-agent/main.go b/plugins/testdata/test-metadata-agent/main.go index 2ff2a0bd..23e933eb 100644 --- a/plugins/testdata/test-metadata-agent/main.go +++ b/plugins/testdata/test-metadata-agent/main.go @@ -134,6 +134,7 @@ func (t *testMetadataAgent) GetSimilarSongsByTrack(input metadata.SimilarSongsBy ID: "similar-track-id-" + strconv.Itoa(i+1), Name: "Similar to " + input.Name + " #" + strconv.Itoa(i+1), MBID: "similar-mbid-" + strconv.Itoa(i+1), + ISRC: "similar-isrc-" + strconv.Itoa(i+1), Artist: input.Artist, ArtistMBID: "artist-mbid-" + strconv.Itoa(i+1), }) diff --git a/tests/mock_mediafile_repo.go b/tests/mock_mediafile_repo.go index 5b38a718..8542fc8d 100644 --- a/tests/mock_mediafile_repo.go +++ b/tests/mock_mediafile_repo.go @@ -76,6 +76,10 @@ func (m *MockMediaFileRepo) GetWithParticipants(id string) (*model.MediaFile, er return nil, model.ErrNotFound } +func (m *MockMediaFileRepo) GetAllByTags(_ model.TagName, _ []string, options ...model.QueryOptions) (model.MediaFiles, error) { + return m.GetAll(options...) +} + func (m *MockMediaFileRepo) GetAll(qo ...model.QueryOptions) (model.MediaFiles, error) { if len(qo) > 0 { m.Options = qo[0]