feat: add ISRC matching for similar songs (#4946)

* feat: add ISRC support to similar songs matching and plugin interface

Add ISRC (International Standard Recording Code) as a high-priority
identifier in the provider matching algorithm, alongside MBID. The
matching pipeline now uses four strategies in priority order:
ID > MBID > ISRC > Title+Artist fuzzy match.

- Add ISRC field to agents.Song struct
- Add ISRC field to plugin capability SongRef (Go, Rust PDKs)
- Add loadTracksByISRC using json_tree query on tags column
- Integrate ISRC into matchSongsToLibrary, selectBestMatchingSongs,
  and buildTitleQueries

https://claude.ai/code/session_01Dd4mTq1VQZag4RNjCVusiF

* chore: regenerate plugin schema after ISRC addition

Run `make gen` to update the generated YAML schema for the
metadata agent capability with the new ISRC field on SongRef.

https://claude.ai/code/session_01Dd4mTq1VQZag4RNjCVusiF

* feat(mediafile): add GetAllByTags method to MediaFileRepository for tag-based retrieval

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(provider): speed up track matching by incorporating prior matches in ISRC and MBID lookups

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Deluan Quintão
2026-01-27 14:54:29 -05:00
committed by GitHub
parent a55c4f0410
commit 1afcf7775b
13 changed files with 133 additions and 19 deletions
+5
View File
@@ -92,6 +92,11 @@ func (m *mockMediaFileRepo) Get(id string) (*model.MediaFile, error) {
return args.Get(0).(*model.MediaFile), args.Error(1)
}
// GetAllByTags implements model.MediaFileRepository.
func (m *mockMediaFileRepo) GetAllByTags(_ model.TagName, _ []string, options ...model.QueryOptions) (model.MediaFiles, error) {
return m.GetAll(options...)
}
// GetAll implements model.MediaFileRepository.
func (m *mockMediaFileRepo) GetAll(options ...model.QueryOptions) (model.MediaFiles, error) {
argsSlice := make([]interface{}, len(options))
+83 -19
View File
@@ -19,17 +19,18 @@ import (
// # Algorithm Overview
//
// The algorithm matches songs from external agents (Last.fm, Deezer, etc.) to tracks in the
// local music library using three matching strategies in priority order:
// local music library using four matching strategies in priority order:
//
// 1. Direct ID match: Songs with an ID field are matched directly to MediaFiles by ID
// 2. MusicBrainz Recording ID (MBID) match: Songs with MBID are matched to tracks with
// matching mbz_recording_id
// 3. Title+Artist fuzzy match: Remaining songs are matched using fuzzy string comparison
// 3. ISRC match: Songs with ISRC are matched to tracks with matching ISRC tag
// 4. Title+Artist fuzzy match: Remaining songs are matched using fuzzy string comparison
// with metadata specificity scoring
//
// # Matching Priority
//
// When selecting the final result, matches are prioritized in order: ID > MBID > Title+Artist.
// When selecting the final result, matches are prioritized in order: ID > MBID > ISRC > Title+Artist.
// This ensures that more reliable identifiers take precedence over fuzzy text matching.
//
// # Fuzzy Matching Details
@@ -59,7 +60,16 @@ import (
// ]
// Result: t1 (MBID match takes priority over title+artist)
//
// Example 2 - Specificity Ranking:
// Example 2 - ISRC Priority:
//
// Agent returns: {Name: "Paranoid Android", ISRC: "GBAYE0000351", Artist: "Radiohead"}
// Library has: [
// {ID: "t1", Title: "Paranoid Android", Tags: {isrc: ["GBAYE0000351"]}},
// {ID: "t2", Title: "Paranoid Android", Artist: "Radiohead"},
// ]
// Result: t1 (ISRC match takes priority over title+artist)
//
// Example 3 - Specificity Ranking:
//
// Agent returns: {Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"}
// Library has: [
@@ -68,7 +78,7 @@ import (
// ]
// Result: t2 (Level 3 beats Level 1 due to album match)
//
// Example 3 - Fuzzy Title Matching:
// Example 4 - Fuzzy Title Matching:
//
// Agent returns: {Name: "Bohemian Rhapsody", Artist: "Queen"}
// Library has: {ID: "t1", Title: "Bohemian Rhapsody - Remastered", Artist: "Queen"}
@@ -90,16 +100,36 @@ func (e *provider) matchSongsToLibrary(ctx context.Context, songs []agents.Song,
if err != nil {
return nil, fmt.Errorf("failed to load tracks by ID: %w", err)
}
mbidMatches, err := e.loadTracksByMBID(ctx, songs)
mbidMatches, err := e.loadTracksByMBID(ctx, songs, idMatches)
if err != nil {
return nil, fmt.Errorf("failed to load tracks by MBID: %w", err)
}
titleMatches, err := e.loadTracksByTitleAndArtist(ctx, songs, idMatches, mbidMatches)
isrcMatches, err := e.loadTracksByISRC(ctx, songs, idMatches, mbidMatches)
if err != nil {
return nil, fmt.Errorf("failed to load tracks by ISRC: %w", err)
}
titleMatches, err := e.loadTracksByTitleAndArtist(ctx, songs, idMatches, mbidMatches, isrcMatches)
if err != nil {
return nil, fmt.Errorf("failed to load tracks by title: %w", err)
}
return e.selectBestMatchingSongs(songs, idMatches, mbidMatches, titleMatches, count), nil
return e.selectBestMatchingSongs(songs, idMatches, mbidMatches, isrcMatches, titleMatches, count), nil
}
// songMatchedIn checks if a song has already been matched in any of the provided match maps.
// It checks the song's ID, MBID, and ISRC fields against the corresponding map keys.
func songMatchedIn(s agents.Song, priorMatches ...map[string]model.MediaFile) bool {
keys := []string{s.ID, s.MBID, s.ISRC}
for _, m := range priorMatches {
for _, key := range keys {
if key != "" {
if mf, ok := m[key]; ok && mf.ID != "" {
return true
}
}
}
}
return false
}
// loadTracksByID fetches MediaFiles from the library using direct ID matching.
@@ -138,10 +168,10 @@ func (e *provider) loadTracksByID(ctx context.Context, songs []agents.Song) (map
// It extracts all non-empty MBID fields from the input songs and performs a single
// batch query against the mbz_recording_id column. Returns a map keyed by MBID for
// O(1) lookup. Only non-missing files are returned.
func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song) (map[string]model.MediaFile, error) {
func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) {
var mbids []string
for _, s := range songs {
if s.MBID != "" {
if s.MBID != "" && !songMatchedIn(s, priorMatches...) {
mbids = append(mbids, s.MBID)
}
}
@@ -168,6 +198,37 @@ func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song) (m
return matches, nil
}
// loadTracksByISRC fetches MediaFiles from the library using ISRC (International Standard
// Recording Code) matching. It extracts all non-empty ISRC fields from the input songs and
// queries the tags JSON column for matching ISRC values. Returns a map keyed by ISRC for
// O(1) lookup. Only non-missing files are returned.
func (e *provider) loadTracksByISRC(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) {
var isrcs []string
for _, s := range songs {
if s.ISRC != "" && !songMatchedIn(s, priorMatches...) {
isrcs = append(isrcs, s.ISRC)
}
}
matches := map[string]model.MediaFile{}
if len(isrcs) == 0 {
return matches, nil
}
res, err := e.ds.MediaFile(ctx).GetAllByTags(model.TagISRC, isrcs, model.QueryOptions{
Filters: squirrel.Eq{"missing": false},
})
if err != nil {
return matches, err
}
for _, mf := range res {
for _, isrc := range mf.Tags.Values(model.TagISRC) {
if _, ok := matches[isrc]; !ok {
matches[isrc] = mf
}
}
}
return matches, nil
}
// songQuery represents a normalized query for matching a song to library tracks.
// All string fields are sanitized (lowercased, diacritics removed) for comparison.
// This struct is used internally by loadTracksByTitleAndArtist to group queries by artist.
@@ -246,8 +307,8 @@ func computeSpecificityLevel(q songQuery, mf model.MediaFile, albumThreshold flo
// Uses a unified scoring approach that combines title similarity (Jaro-Winkler) with
// metadata specificity (MBIDs, album names) for both exact and fuzzy matches.
// Returns a map keyed by "title|artist" for compatibility with selectBestMatchingSongs.
func (e *provider) loadTracksByTitleAndArtist(ctx context.Context, songs []agents.Song, idMatches, mbidMatches map[string]model.MediaFile) (map[string]model.MediaFile, error) {
queries := e.buildTitleQueries(songs, idMatches, mbidMatches)
func (e *provider) loadTracksByTitleAndArtist(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) {
queries := e.buildTitleQueries(songs, priorMatches...)
if len(queries) == 0 {
return map[string]model.MediaFile{}, nil
}
@@ -344,14 +405,10 @@ func (e *provider) findBestMatch(q songQuery, tracks model.MediaFiles, threshold
return bestMatch, found
}
func (e *provider) buildTitleQueries(songs []agents.Song, idMatches, mbidMatches map[string]model.MediaFile) []songQuery {
func (e *provider) buildTitleQueries(songs []agents.Song, priorMatches ...map[string]model.MediaFile) []songQuery {
var queries []songQuery
for _, s := range songs {
// Skip if already matched by ID or MBID
if s.ID != "" && idMatches[s.ID].ID != "" {
continue
}
if s.MBID != "" && mbidMatches[s.MBID].ID != "" {
if songMatchedIn(s, priorMatches...) {
continue
}
queries = append(queries, songQuery{
@@ -366,7 +423,7 @@ func (e *provider) buildTitleQueries(songs []agents.Song, idMatches, mbidMatches
return queries
}
func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, byTitleArtist map[string]model.MediaFile, count int) model.MediaFiles {
func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, byISRC, byTitleArtist map[string]model.MediaFile, count int) model.MediaFiles {
var mfs model.MediaFiles
for _, t := range songs {
if len(mfs) == count {
@@ -386,6 +443,13 @@ func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, by
continue
}
}
// Try ISRC match third
if t.ISRC != "" {
if mf, ok := byISRC[t.ISRC]; ok {
mfs = append(mfs, mf)
continue
}
}
// Fall back to title+artist match (composite key preserves duplicate titles)
key := str.SanitizeFieldForSorting(t.Name) + "|" + str.SanitizeFieldForSortingNoArticle(t.Artist)
if mf, ok := byTitleArtist[key]; ok {