feat: add ISRC matching for similar songs (#4946)

* feat: add ISRC support to similar songs matching and plugin interface

Add ISRC (International Standard Recording Code) as a high-priority
identifier in the provider matching algorithm, alongside MBID. The
matching pipeline now uses four strategies in priority order:
ID > MBID > ISRC > Title+Artist fuzzy match.

- Add ISRC field to agents.Song struct
- Add ISRC field to plugin capability SongRef (Go, Rust PDKs)
- Add loadTracksByISRC using json_tree query on tags column
- Integrate ISRC into matchSongsToLibrary, selectBestMatchingSongs,
  and buildTitleQueries

https://claude.ai/code/session_01Dd4mTq1VQZag4RNjCVusiF

* chore: regenerate plugin schema after ISRC addition

Run `make gen` to update the generated YAML schema for the
metadata agent capability with the new ISRC field on SongRef.

https://claude.ai/code/session_01Dd4mTq1VQZag4RNjCVusiF

* feat(mediafile): add GetAllByTags method to MediaFileRepository for tag-based retrieval

Signed-off-by: Deluan <deluan@navidrome.org>

* feat(provider): speed up track matching by incorporating prior matches in ISRC and MBID lookups

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Deluan Quintão
2026-01-27 14:54:29 -05:00
committed by GitHub
parent a55c4f0410
commit 1afcf7775b
13 changed files with 133 additions and 19 deletions
+1
View File
@@ -36,6 +36,7 @@ type Song struct {
ID string
Name string
MBID string
ISRC string
Artist string
ArtistMBID string
Album string
+5
View File
@@ -92,6 +92,11 @@ func (m *mockMediaFileRepo) Get(id string) (*model.MediaFile, error) {
return args.Get(0).(*model.MediaFile), args.Error(1)
}
// GetAllByTags implements model.MediaFileRepository.
func (m *mockMediaFileRepo) GetAllByTags(_ model.TagName, _ []string, options ...model.QueryOptions) (model.MediaFiles, error) {
return m.GetAll(options...)
}
// GetAll implements model.MediaFileRepository.
func (m *mockMediaFileRepo) GetAll(options ...model.QueryOptions) (model.MediaFiles, error) {
argsSlice := make([]interface{}, len(options))
+83 -19
View File
@@ -19,17 +19,18 @@ import (
// # Algorithm Overview
//
// The algorithm matches songs from external agents (Last.fm, Deezer, etc.) to tracks in the
// local music library using three matching strategies in priority order:
// local music library using four matching strategies in priority order:
//
// 1. Direct ID match: Songs with an ID field are matched directly to MediaFiles by ID
// 2. MusicBrainz Recording ID (MBID) match: Songs with MBID are matched to tracks with
// matching mbz_recording_id
// 3. Title+Artist fuzzy match: Remaining songs are matched using fuzzy string comparison
// 3. ISRC match: Songs with ISRC are matched to tracks with matching ISRC tag
// 4. Title+Artist fuzzy match: Remaining songs are matched using fuzzy string comparison
// with metadata specificity scoring
//
// # Matching Priority
//
// When selecting the final result, matches are prioritized in order: ID > MBID > Title+Artist.
// When selecting the final result, matches are prioritized in order: ID > MBID > ISRC > Title+Artist.
// This ensures that more reliable identifiers take precedence over fuzzy text matching.
//
// # Fuzzy Matching Details
@@ -59,7 +60,16 @@ import (
// ]
// Result: t1 (MBID match takes priority over title+artist)
//
// Example 2 - Specificity Ranking:
// Example 2 - ISRC Priority:
//
// Agent returns: {Name: "Paranoid Android", ISRC: "GBAYE0000351", Artist: "Radiohead"}
// Library has: [
// {ID: "t1", Title: "Paranoid Android", Tags: {isrc: ["GBAYE0000351"]}},
// {ID: "t2", Title: "Paranoid Android", Artist: "Radiohead"},
// ]
// Result: t1 (ISRC match takes priority over title+artist)
//
// Example 3 - Specificity Ranking:
//
// Agent returns: {Name: "Enjoy the Silence", Artist: "Depeche Mode", Album: "Violator"}
// Library has: [
@@ -68,7 +78,7 @@ import (
// ]
// Result: t2 (Level 3 beats Level 1 due to album match)
//
// Example 3 - Fuzzy Title Matching:
// Example 4 - Fuzzy Title Matching:
//
// Agent returns: {Name: "Bohemian Rhapsody", Artist: "Queen"}
// Library has: {ID: "t1", Title: "Bohemian Rhapsody - Remastered", Artist: "Queen"}
@@ -90,16 +100,36 @@ func (e *provider) matchSongsToLibrary(ctx context.Context, songs []agents.Song,
if err != nil {
return nil, fmt.Errorf("failed to load tracks by ID: %w", err)
}
mbidMatches, err := e.loadTracksByMBID(ctx, songs)
mbidMatches, err := e.loadTracksByMBID(ctx, songs, idMatches)
if err != nil {
return nil, fmt.Errorf("failed to load tracks by MBID: %w", err)
}
titleMatches, err := e.loadTracksByTitleAndArtist(ctx, songs, idMatches, mbidMatches)
isrcMatches, err := e.loadTracksByISRC(ctx, songs, idMatches, mbidMatches)
if err != nil {
return nil, fmt.Errorf("failed to load tracks by ISRC: %w", err)
}
titleMatches, err := e.loadTracksByTitleAndArtist(ctx, songs, idMatches, mbidMatches, isrcMatches)
if err != nil {
return nil, fmt.Errorf("failed to load tracks by title: %w", err)
}
return e.selectBestMatchingSongs(songs, idMatches, mbidMatches, titleMatches, count), nil
return e.selectBestMatchingSongs(songs, idMatches, mbidMatches, isrcMatches, titleMatches, count), nil
}
// songMatchedIn checks if a song has already been matched in any of the provided match maps.
// It checks the song's ID, MBID, and ISRC fields against the corresponding map keys.
func songMatchedIn(s agents.Song, priorMatches ...map[string]model.MediaFile) bool {
keys := []string{s.ID, s.MBID, s.ISRC}
for _, m := range priorMatches {
for _, key := range keys {
if key != "" {
if mf, ok := m[key]; ok && mf.ID != "" {
return true
}
}
}
}
return false
}
// loadTracksByID fetches MediaFiles from the library using direct ID matching.
@@ -138,10 +168,10 @@ func (e *provider) loadTracksByID(ctx context.Context, songs []agents.Song) (map
// It extracts all non-empty MBID fields from the input songs and performs a single
// batch query against the mbz_recording_id column. Returns a map keyed by MBID for
// O(1) lookup. Only non-missing files are returned.
func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song) (map[string]model.MediaFile, error) {
func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) {
var mbids []string
for _, s := range songs {
if s.MBID != "" {
if s.MBID != "" && !songMatchedIn(s, priorMatches...) {
mbids = append(mbids, s.MBID)
}
}
@@ -168,6 +198,37 @@ func (e *provider) loadTracksByMBID(ctx context.Context, songs []agents.Song) (m
return matches, nil
}
// loadTracksByISRC fetches MediaFiles from the library using ISRC (International Standard
// Recording Code) matching. It extracts all non-empty ISRC fields from the input songs and
// queries the tags JSON column for matching ISRC values. Returns a map keyed by ISRC for
// O(1) lookup. Only non-missing files are returned.
func (e *provider) loadTracksByISRC(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) {
var isrcs []string
for _, s := range songs {
if s.ISRC != "" && !songMatchedIn(s, priorMatches...) {
isrcs = append(isrcs, s.ISRC)
}
}
matches := map[string]model.MediaFile{}
if len(isrcs) == 0 {
return matches, nil
}
res, err := e.ds.MediaFile(ctx).GetAllByTags(model.TagISRC, isrcs, model.QueryOptions{
Filters: squirrel.Eq{"missing": false},
})
if err != nil {
return matches, err
}
for _, mf := range res {
for _, isrc := range mf.Tags.Values(model.TagISRC) {
if _, ok := matches[isrc]; !ok {
matches[isrc] = mf
}
}
}
return matches, nil
}
// songQuery represents a normalized query for matching a song to library tracks.
// All string fields are sanitized (lowercased, diacritics removed) for comparison.
// This struct is used internally by loadTracksByTitleAndArtist to group queries by artist.
@@ -246,8 +307,8 @@ func computeSpecificityLevel(q songQuery, mf model.MediaFile, albumThreshold flo
// Uses a unified scoring approach that combines title similarity (Jaro-Winkler) with
// metadata specificity (MBIDs, album names) for both exact and fuzzy matches.
// Returns a map keyed by "title|artist" for compatibility with selectBestMatchingSongs.
func (e *provider) loadTracksByTitleAndArtist(ctx context.Context, songs []agents.Song, idMatches, mbidMatches map[string]model.MediaFile) (map[string]model.MediaFile, error) {
queries := e.buildTitleQueries(songs, idMatches, mbidMatches)
func (e *provider) loadTracksByTitleAndArtist(ctx context.Context, songs []agents.Song, priorMatches ...map[string]model.MediaFile) (map[string]model.MediaFile, error) {
queries := e.buildTitleQueries(songs, priorMatches...)
if len(queries) == 0 {
return map[string]model.MediaFile{}, nil
}
@@ -344,14 +405,10 @@ func (e *provider) findBestMatch(q songQuery, tracks model.MediaFiles, threshold
return bestMatch, found
}
func (e *provider) buildTitleQueries(songs []agents.Song, idMatches, mbidMatches map[string]model.MediaFile) []songQuery {
func (e *provider) buildTitleQueries(songs []agents.Song, priorMatches ...map[string]model.MediaFile) []songQuery {
var queries []songQuery
for _, s := range songs {
// Skip if already matched by ID or MBID
if s.ID != "" && idMatches[s.ID].ID != "" {
continue
}
if s.MBID != "" && mbidMatches[s.MBID].ID != "" {
if songMatchedIn(s, priorMatches...) {
continue
}
queries = append(queries, songQuery{
@@ -366,7 +423,7 @@ func (e *provider) buildTitleQueries(songs []agents.Song, idMatches, mbidMatches
return queries
}
func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, byTitleArtist map[string]model.MediaFile, count int) model.MediaFiles {
func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, byISRC, byTitleArtist map[string]model.MediaFile, count int) model.MediaFiles {
var mfs model.MediaFiles
for _, t := range songs {
if len(mfs) == count {
@@ -386,6 +443,13 @@ func (e *provider) selectBestMatchingSongs(songs []agents.Song, byID, byMBID, by
continue
}
}
// Try ISRC match third
if t.ISRC != "" {
if mf, ok := byISRC[t.ISRC]; ok {
mfs = append(mfs, mf)
continue
}
}
// Fall back to title+artist match (composite key preserves duplicate titles)
key := str.SanitizeFieldForSorting(t.Name) + "|" + str.SanitizeFieldForSortingNoArticle(t.Artist)
if mf, ok := byTitleArtist[key]; ok {
+1
View File
@@ -359,6 +359,7 @@ type MediaFileRepository interface {
Get(id string) (*MediaFile, error)
GetWithParticipants(id string) (*MediaFile, error)
GetAll(options ...QueryOptions) (MediaFiles, error)
GetAllByTags(tag TagName, values []string, options ...QueryOptions) (MediaFiles, error)
GetCursor(options ...QueryOptions) (MediaFileCursor, error)
Delete(id string) error
DeleteMissing(ids []string) error
+25
View File
@@ -195,6 +195,31 @@ func (r *mediaFileRepository) GetAll(options ...model.QueryOptions) (model.Media
return res.toModels(), nil
}
func (r *mediaFileRepository) GetAllByTags(tag model.TagName, values []string, options ...model.QueryOptions) (model.MediaFiles, error) {
placeholders := make([]string, len(values))
args := make([]any, len(values))
for i, v := range values {
placeholders[i] = "?"
args[i] = v
}
tagFilter := Expr(
fmt.Sprintf("exists (select 1 from json_tree(media_file.tags, '$.%s') where key='value' and value in (%s))",
tag, strings.Join(placeholders, ",")),
args...,
)
var opts model.QueryOptions
if len(options) > 0 {
opts = options[0]
}
if opts.Filters != nil {
opts.Filters = And{tagFilter, opts.Filters}
} else {
opts.Filters = tagFilter
}
return r.GetAll(opts)
}
func (r *mediaFileRepository) GetCursor(options ...model.QueryOptions) (model.MediaFileCursor, error) {
sq := r.selectMediaFile(options...)
cursor, err := queryWithStableResults[dbMediaFile](r.sqlRepository, sq)
+2
View File
@@ -142,6 +142,8 @@ type SongRef struct {
Name string `json:"name"`
// MBID is the MusicBrainz ID for the song.
MBID string `json:"mbid,omitempty"`
// ISRC is the International Standard Recording Code for the song.
ISRC string `json:"isrc,omitempty"`
// Artist is the artist name.
Artist string `json:"artist,omitempty"`
// ArtistMBID is the MusicBrainz artist ID.
+3
View File
@@ -343,6 +343,9 @@ components:
mbid:
type: string
description: MBID is the MusicBrainz ID for the song.
isrc:
type: string
description: ISRC is the International Standard Recording Code for the song.
artist:
type: string
description: Artist is the artist name.
+1
View File
@@ -230,6 +230,7 @@ func songRefsToAgentSongs(refs []capabilities.SongRef) []agents.Song {
ID: s.ID,
Name: s.Name,
MBID: s.MBID,
ISRC: s.ISRC,
Artist: s.Artist,
ArtistMBID: s.ArtistMBID,
Album: s.Album,
+2
View File
@@ -171,6 +171,8 @@ type SongRef struct {
Name string `json:"name"`
// MBID is the MusicBrainz ID for the song.
MBID string `json:"mbid,omitempty"`
// ISRC is the International Standard Recording Code for the song.
ISRC string `json:"isrc,omitempty"`
// Artist is the artist name.
Artist string `json:"artist,omitempty"`
// ArtistMBID is the MusicBrainz artist ID.
+2
View File
@@ -168,6 +168,8 @@ type SongRef struct {
Name string `json:"name"`
// MBID is the MusicBrainz ID for the song.
MBID string `json:"mbid,omitempty"`
// ISRC is the International Standard Recording Code for the song.
ISRC string `json:"isrc,omitempty"`
// Artist is the artist name.
Artist string `json:"artist,omitempty"`
// ArtistMBID is the MusicBrainz artist ID.
@@ -242,6 +242,9 @@ pub struct SongRef {
/// MBID is the MusicBrainz ID for the song.
#[serde(default, skip_serializing_if = "String::is_empty")]
pub mbid: String,
/// ISRC is the International Standard Recording Code for the song.
#[serde(default, skip_serializing_if = "String::is_empty")]
pub isrc: String,
/// Artist is the artist name.
#[serde(default, skip_serializing_if = "String::is_empty")]
pub artist: String,
+1
View File
@@ -134,6 +134,7 @@ func (t *testMetadataAgent) GetSimilarSongsByTrack(input metadata.SimilarSongsBy
ID: "similar-track-id-" + strconv.Itoa(i+1),
Name: "Similar to " + input.Name + " #" + strconv.Itoa(i+1),
MBID: "similar-mbid-" + strconv.Itoa(i+1),
ISRC: "similar-isrc-" + strconv.Itoa(i+1),
Artist: input.Artist,
ArtistMBID: "artist-mbid-" + strconv.Itoa(i+1),
})
+4
View File
@@ -76,6 +76,10 @@ func (m *MockMediaFileRepo) GetWithParticipants(id string) (*model.MediaFile, er
return nil, model.ErrNotFound
}
func (m *MockMediaFileRepo) GetAllByTags(_ model.TagName, _ []string, options ...model.QueryOptions) (model.MediaFiles, error) {
return m.GetAll(options...)
}
func (m *MockMediaFileRepo) GetAll(qo ...model.QueryOptions) (model.MediaFiles, error) {
if len(qo) > 0 {
m.Options = qo[0]