From 91fab68578d8fa3ab7a8606c421ef1e3b67d77a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Deluan=20Quint=C3=A3o?= Date: Fri, 31 Oct 2025 09:07:23 -0400 Subject: [PATCH] fix: handle UTF BOM in lyrics and playlist files (#4637) * fix: handle UTF-8 BOM in lyrics and playlist files Added UTF-8 BOM (Byte Order Mark) detection and stripping for external lyrics files and playlist files. This ensures that files with BOM markers are correctly parsed and recognized as synced lyrics or valid playlists. The fix introduces a new ioutils package with UTF8Reader and UTF8ReadFile functions that automatically detect and remove UTF-8, UTF-16 LE, and UTF-16 BE BOMs. These utilities are now used when reading external lyrics and playlist files to ensure consistent parsing regardless of BOM presence. Added comprehensive tests for BOM handling in both lyrics and playlists, including test fixtures with actual BOM markers to verify correct behavior. * test: add test for UTF-16 LE encoded LRC files Signed-off-by: Deluan --------- Signed-off-by: Deluan --- core/lyrics/sources.go | 4 +- core/lyrics/sources_test.go | 34 ++++++ core/playlists.go | 6 +- core/playlists_test.go | 18 +++ tests/fixtures/bom-test.lrc | 4 + tests/fixtures/bom-utf16-test.lrc | Bin 0 -> 164 bytes tests/fixtures/playlists/bom-test-utf16.m3u | Bin 0 -> 412 bytes tests/fixtures/playlists/bom-test.m3u | 6 + utils/ioutils/ioutils.go | 33 ++++++ utils/ioutils/ioutils_test.go | 117 ++++++++++++++++++++ 10 files changed, 218 insertions(+), 4 deletions(-) create mode 100644 tests/fixtures/bom-test.lrc create mode 100644 tests/fixtures/bom-utf16-test.lrc create mode 100644 tests/fixtures/playlists/bom-test-utf16.m3u create mode 100644 tests/fixtures/playlists/bom-test.m3u create mode 100644 utils/ioutils/ioutils.go create mode 100644 utils/ioutils/ioutils_test.go diff --git a/core/lyrics/sources.go b/core/lyrics/sources.go index 6d4a4cc6..857dc2ee 100644 --- a/core/lyrics/sources.go +++ b/core/lyrics/sources.go @@ -8,6 +8,7 @@ import ( "github.com/navidrome/navidrome/log" "github.com/navidrome/navidrome/model" + "github.com/navidrome/navidrome/utils/ioutils" ) func fromEmbedded(ctx context.Context, mf *model.MediaFile) (model.LyricList, error) { @@ -27,8 +28,7 @@ func fromExternalFile(ctx context.Context, mf *model.MediaFile, suffix string) ( externalLyric := basePath[0:len(basePath)-len(ext)] + suffix - contents, err := os.ReadFile(externalLyric) - + contents, err := ioutils.UTF8ReadFile(externalLyric) if errors.Is(err, os.ErrNotExist) { log.Trace(ctx, "no lyrics found at path", "path", externalLyric) return nil, nil diff --git a/core/lyrics/sources_test.go b/core/lyrics/sources_test.go index e92564c0..b3d50210 100644 --- a/core/lyrics/sources_test.go +++ b/core/lyrics/sources_test.go @@ -108,5 +108,39 @@ var _ = Describe("sources", func() { }, })) }) + + It("should handle LRC files with UTF-8 BOM marker (issue #4631)", func() { + // The function looks for , so we need to pass + // a MediaFile with .mp3 path and look for .lrc suffix + mf := model.MediaFile{Path: "tests/fixtures/bom-test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".lrc") + + Expect(err).To(BeNil()) + Expect(lyrics).ToNot(BeNil()) + Expect(lyrics).To(HaveLen(1)) + + // The critical assertion: even with BOM, synced should be true + Expect(lyrics[0].Synced).To(BeTrue(), "Lyrics with BOM marker should be recognized as synced") + Expect(lyrics[0].Line).To(HaveLen(1)) + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(0)))) + Expect(lyrics[0].Line[0].Value).To(ContainSubstring("作曲")) + }) + + It("should handle UTF-16 LE encoded LRC files", func() { + mf := model.MediaFile{Path: "tests/fixtures/bom-utf16-test.mp3"} + lyrics, err := fromExternalFile(ctx, &mf, ".lrc") + + Expect(err).To(BeNil()) + Expect(lyrics).ToNot(BeNil()) + Expect(lyrics).To(HaveLen(1)) + + // UTF-16 should be properly converted to UTF-8 + Expect(lyrics[0].Synced).To(BeTrue(), "UTF-16 encoded lyrics should be recognized as synced") + Expect(lyrics[0].Line).To(HaveLen(2)) + Expect(lyrics[0].Line[0].Start).To(Equal(gg.P(int64(18800)))) + Expect(lyrics[0].Line[0].Value).To(Equal("We're no strangers to love")) + Expect(lyrics[0].Line[1].Start).To(Equal(gg.P(int64(22801)))) + Expect(lyrics[0].Line[1].Value).To(Equal("You know the rules and so do I")) + }) }) }) diff --git a/core/playlists.go b/core/playlists.go index 2eebc94e..f98179f8 100644 --- a/core/playlists.go +++ b/core/playlists.go @@ -20,6 +20,7 @@ import ( "github.com/navidrome/navidrome/model" "github.com/navidrome/navidrome/model/criteria" "github.com/navidrome/navidrome/model/request" + "github.com/navidrome/navidrome/utils/ioutils" "github.com/navidrome/navidrome/utils/slice" "golang.org/x/text/unicode/norm" ) @@ -97,12 +98,13 @@ func (s *playlists) parsePlaylist(ctx context.Context, playlistFile string, fold } defer file.Close() + reader := ioutils.UTF8Reader(file) extension := strings.ToLower(filepath.Ext(playlistFile)) switch extension { case ".nsp": - err = s.parseNSP(ctx, pls, file) + err = s.parseNSP(ctx, pls, reader) default: - err = s.parseM3U(ctx, pls, folder, file) + err = s.parseM3U(ctx, pls, folder, reader) } return pls, err } diff --git a/core/playlists_test.go b/core/playlists_test.go index 399210ac..fb42f9c9 100644 --- a/core/playlists_test.go +++ b/core/playlists_test.go @@ -74,6 +74,24 @@ var _ = Describe("Playlists", func() { Expect(err).ToNot(HaveOccurred()) Expect(pls.Tracks).To(HaveLen(2)) }) + + It("parses playlists with UTF-8 BOM marker", func() { + pls, err := ps.ImportFile(ctx, folder, "bom-test.m3u") + Expect(err).ToNot(HaveOccurred()) + Expect(pls.OwnerID).To(Equal("123")) + Expect(pls.Name).To(Equal("Test Playlist")) + Expect(pls.Tracks).To(HaveLen(1)) + Expect(pls.Tracks[0].Path).To(Equal("tests/fixtures/playlists/test.mp3")) + }) + + It("parses UTF-16 LE encoded playlists with BOM and converts to UTF-8", func() { + pls, err := ps.ImportFile(ctx, folder, "bom-test-utf16.m3u") + Expect(err).ToNot(HaveOccurred()) + Expect(pls.OwnerID).To(Equal("123")) + Expect(pls.Name).To(Equal("UTF-16 Test Playlist")) + Expect(pls.Tracks).To(HaveLen(1)) + Expect(pls.Tracks[0].Path).To(Equal("tests/fixtures/playlists/test.mp3")) + }) }) Describe("NSP", func() { diff --git a/tests/fixtures/bom-test.lrc b/tests/fixtures/bom-test.lrc new file mode 100644 index 00000000..223c37de --- /dev/null +++ b/tests/fixtures/bom-test.lrc @@ -0,0 +1,4 @@ +[00:00.00] 作曲 : 柏大輔 +NOTE: This file intentionally contains a UTF-8 BOM (Byte Order Mark) at byte 0. +This tests BOM handling in lyrics parsing (GitHub issue #4631). +The BOM bytes are: 0xEF 0xBB 0xBF \ No newline at end of file diff --git a/tests/fixtures/bom-utf16-test.lrc b/tests/fixtures/bom-utf16-test.lrc new file mode 100644 index 0000000000000000000000000000000000000000..e40ea3255fd95fe3b366e41cad0d4502ce35bd6a GIT binary patch literal 164 zcmXwxK?;K~6hvq3DYA1X(UtTDoU+!?pVsC2jhAtvU#k~w>BPFF`LEbibh%5bBSXczJ$t*hDT<7d=2hY) zU)soAr_8dgt5`EgGiGvL@t~bBmw$Y)MbYvEW(RulUd{a`UM;tC$hnt1Ri)V>sJwS_ WH@`2#-`_mZuBGU99`G#n$jmR%nn4r* literal 0 HcmV?d00001 diff --git a/tests/fixtures/playlists/bom-test.m3u b/tests/fixtures/playlists/bom-test.m3u new file mode 100644 index 00000000..f5a00806 --- /dev/null +++ b/tests/fixtures/playlists/bom-test.m3u @@ -0,0 +1,6 @@ +#EXTM3U +# NOTE: This file intentionally contains a UTF-8 BOM (Byte Order Mark) at the beginning +# (bytes 0xEF 0xBB 0xBF) to test BOM handling in playlist parsing. +#PLAYLIST:Test Playlist +#EXTINF:123,Test Artist - Test Song +test.mp3 diff --git a/utils/ioutils/ioutils.go b/utils/ioutils/ioutils.go new file mode 100644 index 00000000..89d3997f --- /dev/null +++ b/utils/ioutils/ioutils.go @@ -0,0 +1,33 @@ +package ioutils + +import ( + "io" + "os" + + "golang.org/x/text/encoding/unicode" + "golang.org/x/text/transform" +) + +// UTF8Reader wraps an io.Reader to handle Byte Order Mark (BOM) properly. +// It strips UTF-8 BOM if present, and converts UTF-16 (LE/BE) to UTF-8. +// This is particularly useful for reading user-provided text files (like LRC lyrics, +// playlists) that may have been created on Windows, which often adds BOM markers. +// +// Reference: https://en.wikipedia.org/wiki/Byte_order_mark +func UTF8Reader(r io.Reader) io.Reader { + return transform.NewReader(r, unicode.BOMOverride(unicode.UTF8.NewDecoder())) +} + +// UTF8ReadFile reads the named file and returns its contents as a byte slice, +// automatically handling BOM markers. It's similar to os.ReadFile but strips +// UTF-8 BOM and converts UTF-16 encoded files to UTF-8. +func UTF8ReadFile(filename string) ([]byte, error) { + file, err := os.Open(filename) + if err != nil { + return nil, err + } + defer file.Close() + + reader := UTF8Reader(file) + return io.ReadAll(reader) +} diff --git a/utils/ioutils/ioutils_test.go b/utils/ioutils/ioutils_test.go new file mode 100644 index 00000000..7f548387 --- /dev/null +++ b/utils/ioutils/ioutils_test.go @@ -0,0 +1,117 @@ +package ioutils + +import ( + "bytes" + "io" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestIOUtils(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "IO Utils Suite") +} + +var _ = Describe("UTF8Reader", func() { + Context("when reading text with UTF-8 BOM", func() { + It("strips the UTF-8 BOM marker", func() { + // UTF-8 BOM is EF BB BF + input := []byte{0xEF, 0xBB, 0xBF, 'h', 'e', 'l', 'l', 'o'} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hello")) + }) + + It("strips UTF-8 BOM from multi-line text", func() { + // Test with the actual LRC file format + input := []byte{0xEF, 0xBB, 0xBF, '[', '0', '0', ':', '0', '0', '.', '0', '0', ']', ' ', 't', 'e', 's', 't'} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("[00:00.00] test")) + }) + }) + + Context("when reading text without BOM", func() { + It("passes through unchanged", func() { + input := []byte("hello world") + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hello world")) + }) + }) + + Context("when reading UTF-16 LE encoded text", func() { + It("converts to UTF-8 and strips BOM", func() { + // UTF-16 LE BOM (FF FE) followed by "hi" in UTF-16 LE + input := []byte{0xFF, 0xFE, 'h', 0x00, 'i', 0x00} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hi")) + }) + }) + + Context("when reading UTF-16 BE encoded text", func() { + It("converts to UTF-8 and strips BOM", func() { + // UTF-16 BE BOM (FE FF) followed by "hi" in UTF-16 BE + input := []byte{0xFE, 0xFF, 0x00, 'h', 0x00, 'i'} + reader := UTF8Reader(bytes.NewReader(input)) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("hi")) + }) + }) + + Context("when reading empty content", func() { + It("returns empty string", func() { + reader := UTF8Reader(bytes.NewReader([]byte{})) + + output, err := io.ReadAll(reader) + Expect(err).ToNot(HaveOccurred()) + Expect(string(output)).To(Equal("")) + }) + }) +}) + +var _ = Describe("UTF8ReadFile", func() { + Context("when reading a file with UTF-8 BOM", func() { + It("strips the BOM marker", func() { + // Use the actual fixture from issue #4631 + contents, err := UTF8ReadFile("../../tests/fixtures/bom-test.lrc") + Expect(err).ToNot(HaveOccurred()) + + // Should NOT start with BOM + Expect(contents[0]).ToNot(Equal(byte(0xEF))) + // Should start with '[' + Expect(contents[0]).To(Equal(byte('['))) + Expect(string(contents)).To(HavePrefix("[00:00.00]")) + }) + }) + + Context("when reading a file without BOM", func() { + It("reads the file normally", func() { + contents, err := UTF8ReadFile("../../tests/fixtures/test.lrc") + Expect(err).ToNot(HaveOccurred()) + + // Should contain the expected content + Expect(string(contents)).To(ContainSubstring("We're no strangers to love")) + }) + }) + + Context("when reading a non-existent file", func() { + It("returns an error", func() { + _, err := UTF8ReadFile("../../tests/fixtures/nonexistent.lrc") + Expect(err).To(HaveOccurred()) + }) + }) +})