fix: handle UTF BOM in lyrics and playlist files (#4637)

* fix: handle UTF-8 BOM in lyrics and playlist files

Added UTF-8 BOM (Byte Order Mark) detection and stripping for external lyrics files and playlist files. This ensures that files with BOM markers are correctly parsed and recognized as synced lyrics or valid playlists.

The fix introduces a new ioutils package with UTF8Reader and UTF8ReadFile functions that automatically detect and remove UTF-8, UTF-16 LE, and UTF-16 BE BOMs. These utilities are now used when reading external lyrics and playlist files to ensure consistent parsing regardless of BOM presence.

Added comprehensive tests for BOM handling in both lyrics and playlists, including test fixtures with actual BOM markers to verify correct behavior.

* test: add test for UTF-16 LE encoded LRC files

Signed-off-by: Deluan <deluan@navidrome.org>

---------

Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
Deluan Quintão
2025-10-31 09:07:23 -04:00
committed by GitHub
parent 0bdd3e6f8b
commit 91fab68578
10 changed files with 218 additions and 4 deletions
+33
View File
@@ -0,0 +1,33 @@
package ioutils
import (
"io"
"os"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
// UTF8Reader wraps an io.Reader to handle Byte Order Mark (BOM) properly.
// It strips UTF-8 BOM if present, and converts UTF-16 (LE/BE) to UTF-8.
// This is particularly useful for reading user-provided text files (like LRC lyrics,
// playlists) that may have been created on Windows, which often adds BOM markers.
//
// Reference: https://en.wikipedia.org/wiki/Byte_order_mark
func UTF8Reader(r io.Reader) io.Reader {
return transform.NewReader(r, unicode.BOMOverride(unicode.UTF8.NewDecoder()))
}
// UTF8ReadFile reads the named file and returns its contents as a byte slice,
// automatically handling BOM markers. It's similar to os.ReadFile but strips
// UTF-8 BOM and converts UTF-16 encoded files to UTF-8.
func UTF8ReadFile(filename string) ([]byte, error) {
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
reader := UTF8Reader(file)
return io.ReadAll(reader)
}
+117
View File
@@ -0,0 +1,117 @@
package ioutils
import (
"bytes"
"io"
"testing"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
func TestIOUtils(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "IO Utils Suite")
}
var _ = Describe("UTF8Reader", func() {
Context("when reading text with UTF-8 BOM", func() {
It("strips the UTF-8 BOM marker", func() {
// UTF-8 BOM is EF BB BF
input := []byte{0xEF, 0xBB, 0xBF, 'h', 'e', 'l', 'l', 'o'}
reader := UTF8Reader(bytes.NewReader(input))
output, err := io.ReadAll(reader)
Expect(err).ToNot(HaveOccurred())
Expect(string(output)).To(Equal("hello"))
})
It("strips UTF-8 BOM from multi-line text", func() {
// Test with the actual LRC file format
input := []byte{0xEF, 0xBB, 0xBF, '[', '0', '0', ':', '0', '0', '.', '0', '0', ']', ' ', 't', 'e', 's', 't'}
reader := UTF8Reader(bytes.NewReader(input))
output, err := io.ReadAll(reader)
Expect(err).ToNot(HaveOccurred())
Expect(string(output)).To(Equal("[00:00.00] test"))
})
})
Context("when reading text without BOM", func() {
It("passes through unchanged", func() {
input := []byte("hello world")
reader := UTF8Reader(bytes.NewReader(input))
output, err := io.ReadAll(reader)
Expect(err).ToNot(HaveOccurred())
Expect(string(output)).To(Equal("hello world"))
})
})
Context("when reading UTF-16 LE encoded text", func() {
It("converts to UTF-8 and strips BOM", func() {
// UTF-16 LE BOM (FF FE) followed by "hi" in UTF-16 LE
input := []byte{0xFF, 0xFE, 'h', 0x00, 'i', 0x00}
reader := UTF8Reader(bytes.NewReader(input))
output, err := io.ReadAll(reader)
Expect(err).ToNot(HaveOccurred())
Expect(string(output)).To(Equal("hi"))
})
})
Context("when reading UTF-16 BE encoded text", func() {
It("converts to UTF-8 and strips BOM", func() {
// UTF-16 BE BOM (FE FF) followed by "hi" in UTF-16 BE
input := []byte{0xFE, 0xFF, 0x00, 'h', 0x00, 'i'}
reader := UTF8Reader(bytes.NewReader(input))
output, err := io.ReadAll(reader)
Expect(err).ToNot(HaveOccurred())
Expect(string(output)).To(Equal("hi"))
})
})
Context("when reading empty content", func() {
It("returns empty string", func() {
reader := UTF8Reader(bytes.NewReader([]byte{}))
output, err := io.ReadAll(reader)
Expect(err).ToNot(HaveOccurred())
Expect(string(output)).To(Equal(""))
})
})
})
var _ = Describe("UTF8ReadFile", func() {
Context("when reading a file with UTF-8 BOM", func() {
It("strips the BOM marker", func() {
// Use the actual fixture from issue #4631
contents, err := UTF8ReadFile("../../tests/fixtures/bom-test.lrc")
Expect(err).ToNot(HaveOccurred())
// Should NOT start with BOM
Expect(contents[0]).ToNot(Equal(byte(0xEF)))
// Should start with '['
Expect(contents[0]).To(Equal(byte('[')))
Expect(string(contents)).To(HavePrefix("[00:00.00]"))
})
})
Context("when reading a file without BOM", func() {
It("reads the file normally", func() {
contents, err := UTF8ReadFile("../../tests/fixtures/test.lrc")
Expect(err).ToNot(HaveOccurred())
// Should contain the expected content
Expect(string(contents)).To(ContainSubstring("We're no strangers to love"))
})
})
Context("when reading a non-existent file", func() {
It("returns an error", func() {
_, err := UTF8ReadFile("../../tests/fixtures/nonexistent.lrc")
Expect(err).To(HaveOccurred())
})
})
})