fix: handle UTF BOM in lyrics and playlist files (#4637)

* fix: handle UTF-8 BOM in lyrics and playlist files

Added UTF-8 BOM (Byte Order Mark) detection and stripping for external lyrics files and playlist files. This ensures that files with BOM markers are correctly parsed and recognized as synced lyrics or valid playlists.

The fix introduces a new ioutils package with UTF8Reader and UTF8ReadFile functions that automatically detect and remove UTF-8, UTF-16 LE, and UTF-16 BE BOMs. These utilities are now used when reading external lyrics and playlist files to ensure consistent parsing regardless of BOM presence.

Added comprehensive tests for BOM handling in both lyrics and playlists, including test fixtures with actual BOM markers to verify correct behavior.

* test: add test for UTF-16 LE encoded LRC files

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
Deluan Quintão
2025-10-31 09:07:23 -04:00
committed by GitHub
parent 0bdd3e6f8b
commit 91fab68578
10 changed files with 218 additions and 4 deletions
+2 -2
View File
@@ -8,6 +8,7 @@ import (
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/utils/ioutils"
)
func fromEmbedded(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) {
@@ -27,8 +28,7 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) (
externalLyric := basePath[0:len(basePath)-len(ext)] + suffix
contents, err := os.ReadFile(externalLyric)
contents, err := ioutils.UTF8ReadFile(externalLyric)
if errors.Is(err, os.ErrNotExist) {
log.Trace(ctx, "no lyrics found at path", "path", externalLyric)
return nil, nil
+34
View File
@@ -108,5 +108,39 @@ var _ = Describe("sources", func() {
},
}))
})
It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() {
// The function looks for <basePath-without-ext><suffix>, so we need to pass
// a MediaFile with .mp3 path and look for .lrc suffix
mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).ToNot(BeNil())
Expect(lyrics).To(HaveLen(1))
// The critical assertion: even with BOM, synced should be true
Expect(lyrics[0].Synced).To(BeTrue(), "Lyrics with BOM marker should be recognized as synced")
Expect(lyrics[0].Line).To(HaveLen(1))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0))))
Expect(lyrics[0].Line[0].Value).To(ContainSubstring("作曲"))
})
It("should handle UTF-16 LE encoded LRC files", func() {
mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"}
lyrics, err := fromExternalFile(ctx, &mf, ".lrc")
Expect(err).To(BeNil())
Expect(lyrics).ToNot(BeNil())
Expect(lyrics).To(HaveLen(1))
// UTF-16 should be properly converted to UTF-8
Expect(lyrics[0].Synced).To(BeTrue(), "UTF-16 encoded lyrics should be recognized as synced")
Expect(lyrics[0].Line).To(HaveLen(2))
Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800))))
Expect(lyrics[0].Line[0].Value).To(Equal("We're no strangers to love"))
Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801))))
Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I"))
})
})
})