Moved Metadata Extraction to its own package

This commit is contained in:
Deluan
2020-09-04 11:08:16 -04:00
committed by Deluan Quintão
parent 0beec552b1
commit 1187ee7cc1
5 changed files with 21 additions and 19 deletions
+275
View File
@@ -0,0 +1,275 @@
package metadata
import (
"bufio"
"errors"
"fmt"
"os"
"os/exec"
"path"
"regexp"
"strconv"
"strings"
"time"
"github.com/deluan/navidrome/conf"
"github.com/deluan/navidrome/log"
)
type ffmpegMetadata struct {
filePath string
suffix string
fileInfo os.FileInfo
tags map[string]string
}
func (m *ffmpegMetadata) Title() string { return m.getTag("title", "sort_name") }
func (m *ffmpegMetadata) Album() string { return m.getTag("album", "sort_album") }
func (m *ffmpegMetadata) Artist() string { return m.getTag("artist", "sort_artist") }
func (m *ffmpegMetadata) AlbumArtist() string { return m.getTag("album_artist", "albumartist") }
func (m *ffmpegMetadata) SortTitle() string { return m.getSortTag("", "title", "name") }
func (m *ffmpegMetadata) SortAlbum() string { return m.getSortTag("", "album") }
func (m *ffmpegMetadata) SortArtist() string { return m.getSortTag("", "artist") }
func (m *ffmpegMetadata) SortAlbumArtist() string {
return m.getSortTag("tso2", "albumartist", "album_artist")
}
func (m *ffmpegMetadata) Composer() string { return m.getTag("composer", "tcm", "sort_composer") }
func (m *ffmpegMetadata) Genre() string { return m.getTag("genre") }
func (m *ffmpegMetadata) Year() int { return m.parseYear("date") }
func (m *ffmpegMetadata) TrackNumber() (int, int) { return m.parseTuple("track") }
func (m *ffmpegMetadata) DiscNumber() (int, int) { return m.parseTuple("tpa", "disc") }
func (m *ffmpegMetadata) DiscSubtitle() string {
return m.getTag("tsst", "discsubtitle", "setsubtitle")
}
func (m *ffmpegMetadata) HasPicture() bool {
return m.getTag("has_picture", "metadata_block_picture") != ""
}
func (m *ffmpegMetadata) Comment() string { return m.getTag("comment") }
func (m *ffmpegMetadata) Compilation() bool { return m.parseBool("compilation") }
func (m *ffmpegMetadata) Duration() float32 { return m.parseDuration("duration") }
func (m *ffmpegMetadata) BitRate() int { return m.parseInt("bitrate") }
func (m *ffmpegMetadata) ModificationTime() time.Time { return m.fileInfo.ModTime() }
func (m *ffmpegMetadata) FilePath() string { return m.filePath }
func (m *ffmpegMetadata) Suffix() string { return m.suffix }
func (m *ffmpegMetadata) Size() int64 { return m.fileInfo.Size() }
type ffmpegMetadataExtractor struct{}
func (e *ffmpegMetadataExtractor) Extract(files ...string) (map[string]Metadata, error) {
args := createProbeCommand(files)
log.Trace("Executing command", "args", args)
cmd := exec.Command(args[0], args[1:]...) // #nosec
output, _ := cmd.CombinedOutput()
mds := map[string]Metadata{}
if len(output) == 0 {
return mds, errors.New("error extracting metadata files")
}
infos := parseOutput(string(output))
for file, info := range infos {
md, err := extractMetadata(file, info)
// Skip files with errors
if err == nil {
mds[file] = md
}
}
return mds, nil
}
var (
// Input #0, mp3, from 'groovin.mp3':
inputRegex = regexp.MustCompile(`(?m)^Input #\d+,.*,\sfrom\s'(.*)'`)
// TITLE : Back In Black
tagsRx = regexp.MustCompile(`(?i)^\s{4,6}([\w-]+)\s*:(.*)`)
// Duration: 00:04:16.00, start: 0.000000, bitrate: 995 kb/s`
durationRx = regexp.MustCompile(`^\s\sDuration: ([\d.:]+).*bitrate: (\d+)`)
// Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 192 kb/s
bitRateRx = regexp.MustCompile(`^\s{4}Stream #\d+:\d+: (Audio):.*, (\d+) kb/s`)
// Stream #0:1: Video: mjpeg, yuvj444p(pc, bt470bg/unknown/unknown), 600x600 [SAR 1:1 DAR 1:1], 90k tbr, 90k tbn, 90k tbc`
coverRx = regexp.MustCompile(`^\s{4}Stream #\d+:\d+: (Video):.*`)
)
func parseOutput(output string) map[string]string {
outputs := map[string]string{}
all := inputRegex.FindAllStringSubmatchIndex(output, -1)
for i, loc := range all {
// Filename is the first captured group
file := output[loc[2]:loc[3]]
// File info is everything from the match, up until the beginning of the next match
info := ""
initial := loc[1]
if i < len(all)-1 {
end := all[i+1][0] - 1
info = output[initial:end]
} else {
// if this is the last match
info = output[initial:]
}
outputs[file] = info
}
return outputs
}
func extractMetadata(filePath, info string) (*ffmpegMetadata, error) {
m := &ffmpegMetadata{filePath: filePath, tags: map[string]string{}}
m.suffix = strings.ToLower(strings.TrimPrefix(path.Ext(filePath), "."))
var err error
m.fileInfo, err = os.Stat(filePath)
if err != nil {
log.Warn("Error stating file. Skipping", "filePath", filePath, err)
return nil, errors.New("error stating file")
}
m.parseInfo(info)
if len(m.tags) == 0 {
log.Trace("Not a media file. Skipping", "filePath", filePath)
return nil, errors.New("not a media file")
}
return m, nil
}
func (m *ffmpegMetadata) parseInfo(info string) {
reader := strings.NewReader(info)
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
match := tagsRx.FindStringSubmatch(line)
if len(match) > 0 {
tagName := strings.ToLower(match[1])
tagValue := strings.TrimSpace(match[2])
// Skip when the tag was previously found
if _, ok := m.tags[tagName]; !ok {
m.tags[tagName] = tagValue
}
continue
}
match = coverRx.FindStringSubmatch(line)
if len(match) > 0 {
m.tags["has_picture"] = "true"
continue
}
match = durationRx.FindStringSubmatch(line)
if len(match) > 0 {
m.tags["duration"] = match[1]
if len(match) > 1 {
m.tags["bitrate"] = match[2]
}
continue
}
match = bitRateRx.FindStringSubmatch(line)
if len(match) > 0 {
m.tags["bitrate"] = match[2]
}
}
}
func (m *ffmpegMetadata) parseInt(tagName string) int {
if v, ok := m.tags[tagName]; ok {
i, _ := strconv.Atoi(v)
return i
}
return 0
}
var dateRegex = regexp.MustCompile(`^([12]\d\d\d)`)
func (m *ffmpegMetadata) parseYear(tagName string) int {
if v, ok := m.tags[tagName]; ok {
match := dateRegex.FindStringSubmatch(v)
if len(match) == 0 {
log.Warn("Error parsing year from ffmpeg date field", "file", m.filePath, "date", v)
return 0
}
year, _ := strconv.Atoi(match[1])
return year
}
return 0
}
func (m *ffmpegMetadata) getTag(tags ...string) string {
for _, t := range tags {
if v, ok := m.tags[t]; ok {
return v
}
}
return ""
}
func (m *ffmpegMetadata) getSortTag(originalTag string, tags ...string) string {
formats := []string{"sort%s", "sort_%s", "sort-%s", "%ssort", "%s_sort", "%s-sort"}
all := []string{originalTag}
for _, tag := range tags {
for _, format := range formats {
name := fmt.Sprintf(format, tag)
all = append(all, name)
}
}
return m.getTag(all...)
}
func (m *ffmpegMetadata) parseTuple(tags ...string) (int, int) {
for _, tagName := range tags {
if v, ok := m.tags[tagName]; ok {
tuple := strings.Split(v, "/")
t1, t2 := 0, 0
t1, _ = strconv.Atoi(tuple[0])
if len(tuple) > 1 {
t2, _ = strconv.Atoi(tuple[1])
} else {
t2, _ = strconv.Atoi(m.tags[tagName+"total"])
}
return t1, t2
}
}
return 0, 0
}
func (m *ffmpegMetadata) parseBool(tagName string) bool {
if v, ok := m.tags[tagName]; ok {
i, _ := strconv.Atoi(strings.TrimSpace(v))
return i == 1
}
return false
}
var zeroTime = time.Date(0000, time.January, 1, 0, 0, 0, 0, time.UTC)
func (m *ffmpegMetadata) parseDuration(tagName string) float32 {
if v, ok := m.tags[tagName]; ok {
d, err := time.Parse("15:04:05", v)
if err != nil {
return 0
}
return float32(d.Sub(zeroTime).Seconds())
}
return 0
}
// Inputs will always be absolute paths
func createProbeCommand(inputs []string) []string {
split := strings.Split(conf.Server.ProbeCommand, " ")
args := make([]string, 0)
for _, s := range split {
if s == "%s" {
for _, inp := range inputs {
args = append(args, "-i", inp)
}
} else {
args = append(args, s)
}
}
return args
}
+243
View File
@@ -0,0 +1,243 @@
package metadata
import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = Describe("ffmpegMetadata", func() {
// TODO Need to mock `ffmpeg`
XContext("ExtractAllMetadata", func() {
It("correctly parses metadata from all files in folder", func() {
e := &ffmpegMetadataExtractor{}
mds, err := e.Extract("tests/fixtures/test.mp3", "tests/fixtures/test.ogg")
Expect(err).NotTo(HaveOccurred())
Expect(mds).To(HaveLen(2))
m := mds["tests/fixtures/test.mp3"]
Expect(m.Title()).To(Equal("Song"))
Expect(m.Album()).To(Equal("Album"))
Expect(m.Artist()).To(Equal("Artist"))
Expect(m.AlbumArtist()).To(Equal("Album Artist"))
Expect(m.Composer()).To(Equal("Composer"))
Expect(m.Compilation()).To(BeTrue())
Expect(m.Genre()).To(Equal("Rock"))
Expect(m.Year()).To(Equal(2014))
n, t := m.TrackNumber()
Expect(n).To(Equal(2))
Expect(t).To(Equal(10))
n, t = m.DiscNumber()
Expect(n).To(Equal(1))
Expect(t).To(Equal(2))
Expect(m.HasPicture()).To(BeTrue())
Expect(m.Duration()).To(Equal(1))
Expect(m.BitRate()).To(Equal(476))
Expect(m.FilePath()).To(Equal("tests/fixtures/test.mp3"))
Expect(m.Suffix()).To(Equal("mp3"))
Expect(m.Size()).To(Equal(60845))
m = mds["tests/fixtures/test.ogg"]
Expect(err).To(BeNil())
Expect(m.Title()).To(BeEmpty())
Expect(m.HasPicture()).To(BeFalse())
Expect(m.Duration()).To(Equal(3))
Expect(m.BitRate()).To(Equal(9))
Expect(m.Suffix()).To(Equal("ogg"))
Expect(m.FilePath()).To(Equal("tests/fixtures/test.ogg"))
Expect(m.Size()).To(Equal(4408))
})
})
Context("extractMetadata", func() {
It("detects embedded cover art correctly", func() {
const output = `
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
Metadata:
compilation : 1
Duration: 00:00:01.02, start: 0.000000, bitrate: 477 kb/s
Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 192 kb/s
Stream #0:1: Video: mjpeg, yuvj444p(pc, bt470bg/unknown/unknown), 600x600 [SAR 1:1 DAR 1:1], 90k tbr, 90k tbn, 90k tbc`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.HasPicture()).To(BeTrue())
})
It("detects embedded cover art in ogg containers", func() {
const output = `
Input #0, ogg, from '/Users/deluan/Music/iTunes/iTunes Media/Music/_Testes/Jamaican In New York/01-02 Jamaican In New York (Album Version).opus':
Duration: 00:04:28.69, start: 0.007500, bitrate: 139 kb/s
Stream #0:0(eng): Audio: opus, 48000 Hz, stereo, fltp
Metadata:
ALBUM : Jamaican In New York
metadata_block_picture: AAAAAwAAAAppbWFnZS9qcGVnAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4Id/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQ
TITLE : Jamaican In New York (Album Version)`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.HasPicture()).To(BeTrue())
})
It("gets bitrate from the stream, if available", func() {
const output = `
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
Duration: 00:00:01.02, start: 0.000000, bitrate: 477 kb/s
Stream #0:0: Audio: mp3, 44100 Hz, stereo, fltp, 192 kb/s`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.BitRate()).To(Equal(192))
})
It("parses correctly the compilation tag", func() {
const output = `
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
Metadata:
compilation : 1
Duration: 00:05:02.63, start: 0.000000, bitrate: 140 kb/s`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.Compilation()).To(BeTrue())
})
It("parses duration with milliseconds", func() {
const output = `
Input #0, mp3, from '/Users/deluan/Music/iTunes/iTunes Media/Music/Compilations/Putumayo Presents Blues Lounge/09 Pablo's Blues.mp3':
Duration: 00:05:02.63, start: 0.000000, bitrate: 140 kb/s`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.Duration()).To(BeNumerically("~", 302.63, 0.001))
})
It("parses stream level tags", func() {
const output = `
Input #0, ogg, from './01-02 Drive (Teku).opus':
Metadata:
ALBUM : Hot Wheels Acceleracers Soundtrack
Duration: 00:03:37.37, start: 0.007500, bitrate: 135 kb/s
Stream #0:0(eng): Audio: opus, 48000 Hz, stereo, fltp
Metadata:
TITLE : Drive (Teku)`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.Title()).To(Equal("Drive (Teku)"))
})
It("does not overlap top level tags with the stream level tags", func() {
const output = `
Input #0, mp3, from 'groovin.mp3':
Metadata:
title : Groovin' (feat. Daniel Sneijers, Susanne Alt)
Duration: 00:03:34.28, start: 0.025056, bitrate: 323 kb/s
Metadata:
title : garbage`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.Title()).To(Equal("Groovin' (feat. Daniel Sneijers, Susanne Alt)"))
})
It("ignores case in the tag name", func() {
const output = `
Input #0, flac, from '/Users/deluan/Downloads/06. Back In Black.flac':
Metadata:
ALBUM : Back In Black
DATE : 1980.07.25
disc : 1
GENRE : Hard Rock
TITLE : Back In Black
DISCTOTAL : 1
TRACKTOTAL : 10
track : 6
Duration: 00:04:16.00, start: 0.000000, bitrate: 995 kb/s`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.Title()).To(Equal("Back In Black"))
Expect(md.Album()).To(Equal("Back In Black"))
Expect(md.Genre()).To(Equal("Hard Rock"))
n, t := md.TrackNumber()
Expect(n).To(Equal(6))
Expect(t).To(Equal(10))
n, t = md.DiscNumber()
Expect(n).To(Equal(1))
Expect(t).To(Equal(1))
Expect(md.Year()).To(Equal(1980))
})
// TODO Handle multiline tags
XIt("parses multiline tags", func() {
const outputWithMultilineComment = `
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'modulo.m4a':
Metadata:
comment : https://www.mixcloud.com/codigorock/30-minutos-com-saara-saara/
:
: Tracklist:
:
: 01. Saara Saara
: 02. Carta Corrente
: 03. X
: 04. Eclipse Lunar
: 05. Vírus de Sírius
: 06. Doktor Fritz
: 07. Wunderbar
: 08. Quarta Dimensão
Duration: 00:26:46.96, start: 0.052971, bitrate: 69 kb/s`
const expectedComment = `https://www.mixcloud.com/codigorock/30-minutos-com-saara-saara/
Tracklist:
01. Saara Saara
02. Carta Corrente
03. X
04. Eclipse Lunar
05. Vírus de Sírius
06. Doktor Fritz
07. Wunderbar
08. Quarta Dimensão
`
md, _ := extractMetadata("tests/fixtures/test.mp3", outputWithMultilineComment)
Expect(md.Comment()).To(Equal(expectedComment))
})
It("parses sort tags correctly", func() {
const output = `
Input #0, mp3, from '/Users/deluan/Downloads/椎名林檎 - 加爾基 精液 栗ノ花 - 2003/02 - ドツペルゲンガー.mp3':
Metadata:
title-sort : Dopperugengā
album : 加爾基 精液 栗ノ花
artist : 椎名林檎
album_artist : 椎名林檎
title : ドツペルゲンガー
albumsort : Kalk Samen Kuri No Hana
artist_sort : Shiina, Ringo
ALBUMARTISTSORT : Shiina, Ringo
`
md, _ := extractMetadata("tests/fixtures/test.mp3", output)
Expect(md.Title()).To(Equal("ドツペルゲンガー"))
Expect(md.Album()).To(Equal("加爾基 精液 栗ノ花"))
Expect(md.Artist()).To(Equal("椎名林檎"))
Expect(md.AlbumArtist()).To(Equal("椎名林檎"))
Expect(md.SortTitle()).To(Equal("Dopperugengā"))
Expect(md.SortAlbum()).To(Equal("Kalk Samen Kuri No Hana"))
Expect(md.SortArtist()).To(Equal("Shiina, Ringo"))
Expect(md.SortAlbumArtist()).To(Equal("Shiina, Ringo"))
})
})
Context("parseYear", func() {
It("parses the year correctly", func() {
var examples = map[string]int{
"1985": 1985,
"2002-01": 2002,
"1969.06": 1969,
"1980.07.25": 1980,
"2004-00-00": 2004,
"2013-May-12": 2013,
"May 12, 2016": 0,
}
for tag, expected := range examples {
md := &ffmpegMetadata{tags: map[string]string{"date": tag}}
Expect(md.Year()).To(Equal(expected))
}
})
It("returns 0 if year is invalid", func() {
md := &ffmpegMetadata{tags: map[string]string{"date": "invalid"}}
Expect(md.Year()).To(Equal(0))
})
})
It("creates a valid command line", func() {
args := createProbeCommand([]string{"/music library/one.mp3", "/music library/two.mp3"})
Expect(args).To(Equal([]string{"ffmpeg", "-i", "/music library/one.mp3", "-i", "/music library/two.mp3", "-f", "ffmetadata"}))
})
})
+38
View File
@@ -0,0 +1,38 @@
package metadata
import "time"
type Metadata interface {
Title() string
Album() string
Artist() string
AlbumArtist() string
SortTitle() string
SortAlbum() string
SortArtist() string
SortAlbumArtist() string
Composer() string
Genre() string
Year() int
TrackNumber() (int, int)
DiscNumber() (int, int)
DiscSubtitle() string
HasPicture() bool
Comment() string
Compilation() bool
Duration() float32
BitRate() int
ModificationTime() time.Time
FilePath() string
Suffix() string
Size() int64
}
type Extractor interface {
Extract(files ...string) (map[string]Metadata, error)
}
func Extract(files ...string) (map[string]Metadata, error) {
e := &ffmpegMetadataExtractor{}
return e.Extract(files...)
}