Reorganize metadata extractors code
This commit is contained in:
@@ -0,0 +1,193 @@
|
||||
package ffmpeg
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/navidrome/navidrome/conf"
|
||||
"github.com/navidrome/navidrome/log"
|
||||
)
|
||||
|
||||
type Parser struct{}
|
||||
|
||||
type parsedTags = map[string][]string
|
||||
|
||||
func (e *Parser) Parse(files ...string) (map[string]parsedTags, error) {
|
||||
args := e.createProbeCommand(files)
|
||||
|
||||
log.Trace("Executing command", "args", args)
|
||||
cmd := exec.Command(args[0], args[1:]...) // #nosec
|
||||
output, _ := cmd.CombinedOutput()
|
||||
fileTags := map[string]parsedTags{}
|
||||
if len(output) == 0 {
|
||||
return fileTags, errors.New("error extracting metadata files")
|
||||
}
|
||||
infos := e.parseOutput(string(output))
|
||||
for file, info := range infos {
|
||||
tags, err := e.extractMetadata(file, info)
|
||||
// Skip files with errors
|
||||
if err == nil {
|
||||
fileTags[file] = tags
|
||||
}
|
||||
}
|
||||
return fileTags, nil
|
||||
}
|
||||
|
||||
func (e *Parser) extractMetadata(filePath, info string) (parsedTags, error) {
|
||||
tags := e.parseInfo(info)
|
||||
if len(tags) == 0 {
|
||||
log.Trace("Not a media file. Skipping", "filePath", filePath)
|
||||
return nil, errors.New("not a media file")
|
||||
}
|
||||
|
||||
alternativeTags := map[string][]string{
|
||||
"disc": {"tpa"},
|
||||
"has_picture": {"metadata_block_picture"},
|
||||
}
|
||||
for tagName, alternatives := range alternativeTags {
|
||||
for _, altName := range alternatives {
|
||||
if altValue, ok := tags[altName]; ok {
|
||||
tags[tagName] = append(tags[tagName], altValue...)
|
||||
}
|
||||
}
|
||||
}
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
var (
|
||||
// Input #0, mp3, from 'groovin.mp3':
|
||||
inputRegex = regexp.MustCompile(`(?m)^Input #\d+,.*,\sfrom\s'(.*)'`)
|
||||
|
||||
// TITLE : Back In Black
|
||||
tagsRx = regexp.MustCompile(`(?i)^\s{4,6}([\w\s-]+)\s*:(.*)`)
|
||||
|
||||
// : Second comment line
|
||||
continuationRx = regexp.MustCompile(`(?i)^\s+:(.*)`)
|
||||
|
||||
// Duration: 00:04:16.00, start: 0.000000, bitrate: 995 kb/s`
|
||||
durationRx = regexp.MustCompile(`^\s\sDuration: ([\d.:]+).*bitrate: (\d+)`)
|
||||
|
||||
// Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 192 kb/s
|
||||
bitRateRx = regexp.MustCompile(`^\s{2,4}Stream #\d+:\d+: (Audio):.*, (\d+) kb/s`)
|
||||
|
||||
// Stream #0:1: Video: mjpeg, yuvj444p(pc, bt470bg/unknown/unknown), 600x600 [SAR 1:1 DAR 1:1], 90k tbr, 90k tbn, 90k tbc`
|
||||
coverRx = regexp.MustCompile(`^\s{2,4}Stream #\d+:\d+: (Video):.*`)
|
||||
)
|
||||
|
||||
func (e *Parser) parseOutput(output string) map[string]string {
|
||||
outputs := map[string]string{}
|
||||
all := inputRegex.FindAllStringSubmatchIndex(output, -1)
|
||||
for i, loc := range all {
|
||||
// Filename is the first captured group
|
||||
file := output[loc[2]:loc[3]]
|
||||
|
||||
// File info is everything from the match, up until the beginning of the next match
|
||||
info := ""
|
||||
initial := loc[1]
|
||||
if i < len(all)-1 {
|
||||
end := all[i+1][0] - 1
|
||||
info = output[initial:end]
|
||||
} else {
|
||||
// if this is the last match
|
||||
info = output[initial:]
|
||||
}
|
||||
outputs[file] = info
|
||||
}
|
||||
return outputs
|
||||
}
|
||||
|
||||
func (e *Parser) parseInfo(info string) map[string][]string {
|
||||
tags := map[string][]string{}
|
||||
|
||||
reader := strings.NewReader(info)
|
||||
scanner := bufio.NewScanner(reader)
|
||||
lastTag := ""
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
match := tagsRx.FindStringSubmatch(line)
|
||||
if len(match) > 0 {
|
||||
tagName := strings.TrimSpace(strings.ToLower(match[1]))
|
||||
if tagName != "" {
|
||||
tagValue := strings.TrimSpace(match[2])
|
||||
tags[tagName] = append(tags[tagName], tagValue)
|
||||
lastTag = tagName
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if lastTag != "" {
|
||||
match = continuationRx.FindStringSubmatch(line)
|
||||
if len(match) > 0 {
|
||||
if tags[lastTag] == nil {
|
||||
tags[lastTag] = []string{""}
|
||||
}
|
||||
tagValue := tags[lastTag][0]
|
||||
tags[lastTag][0] = tagValue + "\n" + strings.TrimSpace(match[1])
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
lastTag = ""
|
||||
match = coverRx.FindStringSubmatch(line)
|
||||
if len(match) > 0 {
|
||||
tags["has_picture"] = []string{"true"}
|
||||
continue
|
||||
}
|
||||
|
||||
match = durationRx.FindStringSubmatch(line)
|
||||
if len(match) > 0 {
|
||||
tags["duration"] = []string{e.parseDuration(match[1])}
|
||||
if len(match) > 1 {
|
||||
tags["bitrate"] = []string{match[2]}
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
match = bitRateRx.FindStringSubmatch(line)
|
||||
if len(match) > 0 {
|
||||
tags["bitrate"] = []string{match[2]}
|
||||
}
|
||||
}
|
||||
|
||||
comment := tags["comment"]
|
||||
if len(comment) > 0 && comment[0] == "Cover (front)" {
|
||||
delete(tags, "comment")
|
||||
}
|
||||
|
||||
return tags
|
||||
}
|
||||
|
||||
var zeroTime = time.Date(0000, time.January, 1, 0, 0, 0, 0, time.UTC)
|
||||
|
||||
func (e *Parser) parseDuration(tag string) string {
|
||||
d, err := time.Parse("15:04:05", tag)
|
||||
if err != nil {
|
||||
return "0"
|
||||
}
|
||||
return strconv.FormatFloat(d.Sub(zeroTime).Seconds(), 'f', 2, 32)
|
||||
}
|
||||
|
||||
// Inputs will always be absolute paths
|
||||
func (e *Parser) createProbeCommand(inputs []string) []string {
|
||||
split := strings.Split(conf.Server.ProbeCommand, " ")
|
||||
args := make([]string, 0)
|
||||
|
||||
for _, s := range split {
|
||||
if s == "%s" {
|
||||
for _, inp := range inputs {
|
||||
args = append(args, "-i", inp)
|
||||
}
|
||||
} else {
|
||||
args = append(args, s)
|
||||
}
|
||||
}
|
||||
return args
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
package ffmpeg
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/tests"
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func TestFFMpeg(t *testing.T) {
|
||||
tests.Init(t, true)
|
||||
log.SetLevel(log.LevelCritical)
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "FFMpeg Suite")
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
package ffmpeg
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Parser", func() {
|
||||
var e *Parser
|
||||
BeforeEach(func() {
|
||||
e = &Parser{}
|
||||
})
|
||||
|
||||
Context("extractMetadata", func() {
|
||||
It("extracts MusicBrainz custom tags", func() {
|
||||
const output = `
|
||||
Input #0, ape, from './Capture/02 01 - Symphony No. 5 in C minor, Op. 67 I. Allegro con brio - Ludwig van Beethoven.ape':
|
||||
Metadata:
|
||||
ALBUM : Forever Classics
|
||||
ARTIST : Ludwig van Beethoven
|
||||
TITLE : Symphony No. 5 in C minor, Op. 67: I. Allegro con brio
|
||||
MUSICBRAINZ_ALBUMSTATUS: official
|
||||
MUSICBRAINZ_ALBUMTYPE: album
|
||||
MusicBrainz_AlbumComment: MP3
|
||||
Musicbrainz_Albumid: 71eb5e4a-90e2-4a31-a2d1-a96485fcb667
|
||||
musicbrainz_trackid: ffe06940-727a-415a-b608-b7e45737f9d8
|
||||
Musicbrainz_Artistid: 1f9df192-a621-4f54-8850-2c5373b7eac9
|
||||
Musicbrainz_Albumartistid: 89ad4ac3-39f7-470e-963a-56509c546377
|
||||
Musicbrainz_Releasegroupid: 708b1ae1-2d3d-34c7-b764-2732b154f5b6
|
||||
musicbrainz_releasetrackid: 6fee2e35-3049-358f-83be-43b36141028b
|
||||
CatalogNumber : PLD 1201
|
||||
`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("catalognumber", []string{"PLD 1201"}))
|
||||
Expect(md).To(HaveKeyWithValue("musicbrainz_trackid", []string{"ffe06940-727a-415a-b608-b7e45737f9d8"}))
|
||||
Expect(md).To(HaveKeyWithValue("musicbrainz_albumid", []string{"71eb5e4a-90e2-4a31-a2d1-a96485fcb667"}))
|
||||
Expect(md).To(HaveKeyWithValue("musicbrainz_artistid", []string{"1f9df192-a621-4f54-8850-2c5373b7eac9"}))
|
||||
Expect(md).To(HaveKeyWithValue("musicbrainz_albumartistid", []string{"89ad4ac3-39f7-470e-963a-56509c546377"}))
|
||||
Expect(md).To(HaveKeyWithValue("musicbrainz_albumtype", []string{"album"}))
|
||||
Expect(md).To(HaveKeyWithValue("musicbrainz_albumcomment", []string{"MP3"}))
|
||||
})
|
||||
|
||||
It("detects embedded cover art correctly", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
|
||||
Metadata:
|
||||
compilation : 1
|
||||
Duration: 00:00:01.02, start: 0.000000, bitrate: 477 kb/s
|
||||
Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 192 kb/s
|
||||
Stream #0:1: Video: mjpeg, yuvj444p(pc, bt470bg/unknown/unknown), 600x600 [SAR 1:1 DAR 1:1], 90k tbr, 90k tbn, 90k tbc`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("has_picture", []string{"true"}))
|
||||
})
|
||||
|
||||
It("detects embedded cover art in ffmpeg 4.4 output", func() {
|
||||
const output = `
|
||||
|
||||
Input #0, flac, from '/run/media/naomi/Archivio/Musica/Katy Perry/Chained to the Rhythm/01 Katy Perry featuring Skip Marley - Chained to the Rhythm.flac':
|
||||
Metadata:
|
||||
ARTIST : Katy Perry featuring Skip Marley
|
||||
Duration: 00:03:57.91, start: 0.000000, bitrate: 983 kb/s
|
||||
Stream #0:0: Audio: flac, 44100 Hz, stereo, s16
|
||||
Stream #0:1: Video: mjpeg (Baseline), yuvj444p(pc, bt470bg/unknown/unknown), 599x518, 90k tbr, 90k tbn, 90k tbc (attached pic)
|
||||
Metadata:
|
||||
comment : Cover (front)`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("has_picture", []string{"true"}))
|
||||
})
|
||||
|
||||
It("detects embedded cover art in ogg containers", func() {
|
||||
const output = `
|
||||
Input #0, ogg, from '/Users/deluan/Music/iTunes/iTunes Media/Music/_Testes/Jamaican In New York/01-02 Jamaican In New York (Album Version).opus':
|
||||
Duration: 00:04:28.69, start: 0.007500, bitrate: 139 kb/s
|
||||
Stream #0:0(eng): Audio: opus, 48000 Hz, stereo, fltp
|
||||
Metadata:
|
||||
ALBUM : Jamaican In New York
|
||||
metadata_block_picture: AAAAAwAAAAppbWFnZS9qcGVnAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4Id/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQ
|
||||
TITLE : Jamaican In New York (Album Version)`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKey("has_picture"))
|
||||
})
|
||||
|
||||
It("gets bitrate from the stream, if available", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
|
||||
Duration: 00:00:01.02, start: 0.000000, bitrate: 477 kb/s
|
||||
Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 192 kb/s`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("bitrate", []string{"192"}))
|
||||
})
|
||||
|
||||
It("parses duration with milliseconds", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
|
||||
Duration: 00:05:02.63, start: 0.000000, bitrate: 140 kb/s`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("duration", []string{"302.63"}))
|
||||
})
|
||||
|
||||
It("parses stream level tags", func() {
|
||||
const output = `
|
||||
Input #0, ogg, from './01-02 Drive (Teku).opus':
|
||||
Metadata:
|
||||
ALBUM : Hot Wheels Acceleracers Soundtrack
|
||||
Duration: 00:03:37.37, start: 0.007500, bitrate: 135 kb/s
|
||||
Stream #0:0(eng): Audio: opus, 48000 Hz, stereo, fltp
|
||||
Metadata:
|
||||
TITLE : Drive (Teku)`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("title", []string{"Drive (Teku)"}))
|
||||
})
|
||||
|
||||
It("does not overlap top level tags with the stream level tags", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from 'groovin.mp3':
|
||||
Metadata:
|
||||
title : Groovin' (feat. Daniel Sneijers, Susanne Alt)
|
||||
Duration: 00:03:34.28, start: 0.025056, bitrate: 323 kb/s
|
||||
Metadata:
|
||||
title : garbage`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("title", []string{"Groovin' (feat. Daniel Sneijers, Susanne Alt)", "garbage"}))
|
||||
})
|
||||
|
||||
It("parses multiline tags", func() {
|
||||
const outputWithMultilineComment = `
|
||||
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'modulo.m4a':
|
||||
Metadata:
|
||||
comment : https://www.mixcloud.com/codigorock/30-minutos-com-saara-saara/
|
||||
:
|
||||
: Tracklist:
|
||||
:
|
||||
: 01. Saara Saara
|
||||
: 02. Carta Corrente
|
||||
: 03. X
|
||||
: 04. Eclipse Lunar
|
||||
: 05. Vírus de Sírius
|
||||
: 06. Doktor Fritz
|
||||
: 07. Wunderbar
|
||||
: 08. Quarta Dimensão
|
||||
Duration: 00:26:46.96, start: 0.052971, bitrate: 69 kb/s`
|
||||
const expectedComment = `https://www.mixcloud.com/codigorock/30-minutos-com-saara-saara/
|
||||
|
||||
Tracklist:
|
||||
|
||||
01. Saara Saara
|
||||
02. Carta Corrente
|
||||
03. X
|
||||
04. Eclipse Lunar
|
||||
05. Vírus de Sírius
|
||||
06. Doktor Fritz
|
||||
07. Wunderbar
|
||||
08. Quarta Dimensão`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", outputWithMultilineComment)
|
||||
Expect(md).To(HaveKeyWithValue("comment", []string{expectedComment}))
|
||||
})
|
||||
|
||||
It("parses sort tags correctly", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from '/Users/deluan/Downloads/椎名林檎 - 加爾基 精液 栗ノ花 - 2003/02 - ドツペルゲンガー.mp3':
|
||||
Metadata:
|
||||
title-sort : Dopperugengā
|
||||
album : 加爾基 精液 栗ノ花
|
||||
artist : 椎名林檎
|
||||
album_artist : 椎名林檎
|
||||
title : ドツペルゲンガー
|
||||
albumsort : Kalk Samen Kuri No Hana
|
||||
artist_sort : Shiina, Ringo
|
||||
ALBUMARTISTSORT : Shiina, Ringo
|
||||
`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("title", []string{"ドツペルゲンガー"}))
|
||||
Expect(md).To(HaveKeyWithValue("album", []string{"加爾基 精液 栗ノ花"}))
|
||||
Expect(md).To(HaveKeyWithValue("artist", []string{"椎名林檎"}))
|
||||
Expect(md).To(HaveKeyWithValue("album_artist", []string{"椎名林檎"}))
|
||||
Expect(md).To(HaveKeyWithValue("title-sort", []string{"Dopperugengā"}))
|
||||
Expect(md).To(HaveKeyWithValue("albumsort", []string{"Kalk Samen Kuri No Hana"}))
|
||||
Expect(md).To(HaveKeyWithValue("artist_sort", []string{"Shiina, Ringo"}))
|
||||
Expect(md).To(HaveKeyWithValue("albumartistsort", []string{"Shiina, Ringo"}))
|
||||
})
|
||||
|
||||
It("ignores cover comment", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from './Edie Brickell/Picture Perfect Morning/01-01 Tomorrow Comes.mp3':
|
||||
Metadata:
|
||||
title : Tomorrow Comes
|
||||
artist : Edie Brickell
|
||||
Duration: 00:03:56.12, start: 0.000000, bitrate: 332 kb/s
|
||||
Stream #0:0: Audio: mp3, 44100 Hz, stereo, s16p, 320 kb/s
|
||||
Stream #0:1: Video: mjpeg, yuvj420p(pc, bt470bg/unknown/unknown), 1200x1200 [SAR 72:72 DAR 1:1], 90k tbr, 90k tbn, 90k tbc
|
||||
Metadata:
|
||||
comment : Cover (front)`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).ToNot(HaveKey("comment"))
|
||||
})
|
||||
|
||||
It("parses tags with spaces in the name", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from '/Users/deluan/Music/Music/Media/_/Wyclef Jean - From the Hut, to the Projects, to the Mansion/10 - The Struggle (interlude).mp3':
|
||||
Metadata:
|
||||
ALBUM ARTIST : Wyclef Jean
|
||||
`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("album artist", []string{"Wyclef Jean"}))
|
||||
})
|
||||
})
|
||||
|
||||
It("creates a valid command line", func() {
|
||||
args := e.createProbeCommand([]string{"/music library/one.mp3", "/music library/two.mp3"})
|
||||
Expect(args).To(Equal([]string{"ffmpeg", "-i", "/music library/one.mp3", "-i", "/music library/two.mp3", "-f", "ffmetadata"}))
|
||||
})
|
||||
|
||||
It("parses an integer TBPM tag", func() {
|
||||
const output = `
|
||||
Input #0, mp3, from 'tests/fixtures/test.mp3':
|
||||
Metadata:
|
||||
TBPM : 123`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.mp3", output)
|
||||
Expect(md).To(HaveKeyWithValue("tbpm", []string{"123"}))
|
||||
})
|
||||
|
||||
It("parses and rounds a floating point fBPM tag", func() {
|
||||
const output = `
|
||||
Input #0, ogg, from 'tests/fixtures/test.ogg':
|
||||
Metadata:
|
||||
FBPM : 141.7`
|
||||
md, _ := e.extractMetadata("tests/fixtures/test.ogg", output)
|
||||
Expect(md).To(HaveKeyWithValue("fbpm", []string{"141.7"}))
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user