New Folder Scanner - WIP

This commit is contained in:
Deluan
2020-01-16 16:53:48 -05:00
parent 7a16d41abe
commit 123f543a94
27 changed files with 1092 additions and 60 deletions
+113
View File
@@ -0,0 +1,113 @@
package scanner
import (
"os"
"path"
"strings"
"time"
"github.com/cloudsonic/sonic-server/log"
)
type ChangeDetector struct {
rootFolder string
lastUpdate time.Time
dirMap map[string]time.Time
}
func NewChangeDetector(rootFolder string, lastUpdate time.Time) *ChangeDetector {
return &ChangeDetector{
rootFolder: rootFolder,
lastUpdate: lastUpdate,
dirMap: map[string]time.Time{},
}
}
func (s *ChangeDetector) Scan() (changed []string, deleted []string, err error) {
start := time.Now()
newMap := make(map[string]time.Time)
err = s.loadMap(s.rootFolder, newMap)
if err != nil {
return
}
changed, deleted, err = s.checkForUpdates(s.dirMap, newMap)
if err != nil {
return
}
elapsed := time.Since(start)
log.Trace("Folder analysis complete\n", "total", len(newMap), "changed", len(changed), "deleted", len(deleted), "elapsed", elapsed)
s.dirMap = newMap
return
}
func (s *ChangeDetector) loadDir(dirPath string) (children []string, lastUpdated time.Time, err error) {
dir, err := os.Open(dirPath)
if err != nil {
return
}
dirInfo, err := os.Stat(dirPath)
if err != nil {
return
}
lastUpdated = dirInfo.ModTime()
files, err := dir.Readdir(-1)
if err != nil {
return
}
for _, f := range files {
if f.IsDir() {
children = append(children, path.Join(dirPath, f.Name()))
} else {
if f.ModTime().After(lastUpdated) {
lastUpdated = f.ModTime()
}
}
}
return
}
func (s *ChangeDetector) loadMap(rootPath string, dirMap map[string]time.Time) error {
children, lastUpdated, err := s.loadDir(rootPath)
if err != nil {
return err
}
for _, c := range children {
err := s.loadMap(c, dirMap)
if err != nil {
return err
}
}
dir := s.getRelativePath(rootPath)
dirMap[dir] = lastUpdated
return nil
}
func (s *ChangeDetector) getRelativePath(subfolder string) string {
dir := strings.TrimPrefix(subfolder, s.rootFolder)
if dir == "" {
dir = "."
}
return dir
}
func (s *ChangeDetector) checkForUpdates(oldMap map[string]time.Time, newMap map[string]time.Time) (changed []string, deleted []string, err error) {
for dir, lastUpdated := range newMap {
oldLastUpdated, ok := oldMap[dir]
if !ok {
oldLastUpdated = s.lastUpdate
}
if lastUpdated.After(oldLastUpdated) {
changed = append(changed, dir)
}
}
for dir := range oldMap {
if _, ok := newMap[dir]; !ok {
deleted = append(deleted, dir)
}
}
return
}
+103
View File
@@ -0,0 +1,103 @@
package scanner
import (
"io/ioutil"
"os"
"path"
"time"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = Describe("ChangeDetector", func() {
var testFolder string
var scanner *ChangeDetector
BeforeEach(func() {
testFolder, _ = ioutil.TempDir("", "cloudsonic_tests")
err := os.MkdirAll(testFolder, 0700)
if err != nil {
panic(err)
}
scanner = NewChangeDetector(testFolder, time.Time{})
})
It("detects changes recursively", func() {
// Scan empty folder
changed, deleted, err := scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(ConsistOf("."))
// Add one subfolder
err = os.MkdirAll(path.Join(testFolder, "a"), 0700)
if err != nil {
panic(err)
}
changed, deleted, err = scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(ConsistOf(".", "/a"))
// Add more subfolders
err = os.MkdirAll(path.Join(testFolder, "a", "b", "c"), 0700)
if err != nil {
panic(err)
}
changed, deleted, err = scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(ConsistOf("/a", "/a/b", "/a/b/c"))
// Scan with no changes
changed, deleted, err = scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(BeEmpty())
// New file in subfolder
_, err = os.Create(path.Join(testFolder, "a", "b", "empty.txt"))
if err != nil {
panic(err)
}
changed, deleted, err = scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(ConsistOf("/a/b"))
// Delete file in subfolder
err = os.Remove(path.Join(testFolder, "a", "b", "empty.txt"))
if err != nil {
panic(err)
}
changed, deleted, err = scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(ConsistOf("/a/b"))
// Delete subfolder
err = os.Remove(path.Join(testFolder, "a", "b", "c"))
if err != nil {
panic(err)
}
changed, deleted, err = scanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(ConsistOf("/a/b/c"))
Expect(changed).To(ConsistOf("/a/b"))
// Only returns changes after lastUpdate
newScanner := NewChangeDetector(testFolder, time.Now())
changed, deleted, err = newScanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(BeEmpty())
Expect(changed).To(BeEmpty())
_, err = os.Create(path.Join(testFolder, "a", "b", "new.txt"))
changed, deleted, err = newScanner.Scan()
Expect(err).To(BeNil())
Expect(deleted).To(BeEmpty())
Expect(changed).To(ConsistOf("/a/b"))
})
})
+140
View File
@@ -0,0 +1,140 @@
package scanner
import (
"encoding/json"
"errors"
"os"
"os/exec"
"path"
"strconv"
"strings"
"time"
"github.com/cloudsonic/sonic-server/conf"
"github.com/cloudsonic/sonic-server/log"
"github.com/dhowden/tag"
)
type Metadata struct {
filePath string
suffix string
fileInfo os.FileInfo
t tag.Metadata
duration int
bitRate int
compilation bool
}
func ExtractMetadata(filePath string) (*Metadata, error) {
m := &Metadata{filePath: filePath}
m.suffix = strings.ToLower(strings.TrimPrefix(path.Ext(filePath), "."))
fi, err := os.Stat(filePath)
if err != nil {
return nil, err
}
m.fileInfo = fi
f, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer f.Close()
t, err := tag.ReadFrom(f)
if err != nil {
return nil, err
}
m.t = t
err = m.probe(filePath)
return m, err
}
func (m *Metadata) Title() string { return m.t.Title() }
func (m *Metadata) Album() string { return m.t.Album() }
func (m *Metadata) Artist() string { return m.t.Artist() }
func (m *Metadata) AlbumArtist() string { return m.t.AlbumArtist() }
func (m *Metadata) Composer() string { return m.t.Composer() }
func (m *Metadata) Genre() string { return m.t.Genre() }
func (m *Metadata) Year() int { return m.t.Year() }
func (m *Metadata) TrackNumber() (int, int) { return m.t.Track() }
func (m *Metadata) DiscNumber() (int, int) { return m.t.Disc() }
func (m *Metadata) HasPicture() bool { return m.t.Picture() != nil }
func (m *Metadata) Compilation() bool { return m.compilation }
func (m *Metadata) Duration() int { return m.duration }
func (m *Metadata) BitRate() int { return m.bitRate }
func (m *Metadata) ModificationTime() time.Time { return m.fileInfo.ModTime() }
func (m *Metadata) FilePath() string { return m.filePath }
func (m *Metadata) Suffix() string { return m.suffix }
func (m *Metadata) Size() int { return int(m.fileInfo.Size()) }
// probe analyzes the file and returns duration in seconds and bitRate in kb/s.
// It uses the ffprobe external tool, configured in conf.Sonic.ProbeCommand
func (m *Metadata) probe(filePath string) error {
cmdLine, args := createProbeCommand(filePath)
log.Trace("Executing command", "cmdLine", cmdLine, "args", args)
cmd := exec.Command(cmdLine, args...)
output, err := cmd.CombinedOutput()
if err != nil {
return err
}
return m.parseOutput(output)
}
func (m *Metadata) parseInt(objItf interface{}, field string) (int, error) {
obj := objItf.(map[string]interface{})
s, ok := obj[field].(string)
if !ok {
return -1, errors.New("invalid ffprobe output field obj." + field)
}
fDuration, err := strconv.ParseFloat(s, 64)
if err != nil {
return -1, err
}
return int(fDuration), nil
}
func (m *Metadata) parseOutput(output []byte) error {
var data map[string]map[string]interface{}
err := json.Unmarshal(output, &data)
if err != nil {
return err
}
format, ok := data["format"]
if !ok {
err = errors.New("invalid ffprobe output. no format found")
return err
}
if tags, ok := format["tags"]; ok {
c, _ := m.parseInt(tags, "compilation")
m.compilation = c == 1
}
m.duration, err = m.parseInt(format, "duration")
if err != nil {
return err
}
m.bitRate, err = m.parseInt(format, "bit_rate")
m.bitRate = m.bitRate / 1000
if err != nil {
return err
}
return nil
}
func createProbeCommand(filePath string) (string, []string) {
cmd := conf.Sonic.ProbeCommand
split := strings.Split(cmd, " ")
for i, s := range split {
s = strings.Replace(s, "%s", filePath, -1)
split[i] = s
}
return split[0], split[1:]
}
+55
View File
@@ -0,0 +1,55 @@
package scanner
import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = Describe("Metadata", func() {
It("correctly parses mp3 file", func() {
m, err := ExtractMetadata("../tests/fixtures/test.mp3")
Expect(err).To(BeNil())
Expect(m.Title()).To(Equal("Song"))
Expect(m.Album()).To(Equal("Album"))
Expect(m.Artist()).To(Equal("Artist"))
Expect(m.AlbumArtist()).To(Equal("Album Artist"))
Expect(m.Composer()).To(Equal("Composer"))
Expect(m.Compilation()).To(BeFalse())
Expect(m.Genre()).To(Equal("Rock"))
Expect(m.Year()).To(Equal(2014))
n, t := m.TrackNumber()
Expect(n).To(Equal(2))
Expect(t).To(Equal(10))
n, t = m.DiscNumber()
Expect(n).To(Equal(1))
Expect(t).To(Equal(2))
Expect(m.HasPicture()).To(BeTrue())
Expect(m.Duration()).To(Equal(1))
Expect(m.BitRate()).To(Equal(476))
Expect(m.FilePath()).To(Equal("../tests/fixtures/test.mp3"))
Expect(m.Suffix()).To(Equal("mp3"))
Expect(m.Size()).To(Equal(60845))
})
It("correctly parses ogg file with no tags", func() {
m, err := ExtractMetadata("../tests/fixtures/test.ogg")
Expect(err).To(BeNil())
Expect(m.Title()).To(BeEmpty())
Expect(m.HasPicture()).To(BeFalse())
Expect(m.Duration()).To(Equal(3))
Expect(m.BitRate()).To(Equal(9))
Expect(m.Suffix()).To(Equal("ogg"))
Expect(m.FilePath()).To(Equal("../tests/fixtures/test.ogg"))
Expect(m.Size()).To(Equal(4408))
})
It("returns error for invalid media file", func() {
_, err := ExtractMetadata("../tests/fixtures/itunes-library.xml")
Expect(err).ToNot(BeNil())
})
It("returns error for file not found", func() {
_, err := ExtractMetadata("../tests/fixtures/NOT-FOUND.mp3")
Expect(err).ToNot(BeNil())
})
})
+121
View File
@@ -0,0 +1,121 @@
package scanner
import (
"context"
"errors"
"fmt"
"strconv"
"time"
"github.com/cloudsonic/sonic-server/log"
"github.com/cloudsonic/sonic-server/model"
)
type Scanner struct {
folders map[string]FolderScanner
repos Repositories
}
type Repositories struct {
folder model.MediaFolderRepository
mediaFile model.MediaFileRepository
album model.AlbumRepository
artist model.ArtistRepository
index model.ArtistIndexRepository
playlist model.PlaylistRepository
property model.PropertyRepository
}
func New(mfRepo model.MediaFileRepository, albumRepo model.AlbumRepository, artistRepo model.ArtistRepository, idxRepo model.ArtistIndexRepository, plsRepo model.PlaylistRepository, folderRepo model.MediaFolderRepository, property model.PropertyRepository) *Scanner {
repos := Repositories{
folder: folderRepo,
mediaFile: mfRepo,
album: albumRepo,
artist: artistRepo,
index: idxRepo,
playlist: plsRepo,
property: property,
}
s := &Scanner{repos: repos, folders: map[string]FolderScanner{}}
s.loadFolders()
return s
}
func (s *Scanner) Rescan(mediaFolder string, fullRescan bool) error {
folderScanner := s.folders[mediaFolder]
start := time.Now()
lastModifiedSince := time.Time{}
if !fullRescan {
lastModifiedSince = s.getLastModifiedSince(mediaFolder)
log.Debug("Scanning folder", "folder", mediaFolder, "lastModifiedSince", lastModifiedSince)
} else {
log.Debug("Scanning folder (full scan)", "folder", mediaFolder)
}
err := folderScanner.Scan(nil, lastModifiedSince)
if err != nil {
log.Error("Error importing MediaFolder", "folder", mediaFolder, err)
}
s.updateLastModifiedSince(mediaFolder, start)
log.Debug("Finished scanning folder", "folder", mediaFolder, "elapsed", time.Since(start))
return err
}
func (s *Scanner) RescanAll(fullRescan bool) error {
var hasError bool
for folder := range s.folders {
err := s.Rescan(folder, fullRescan)
hasError = hasError || err != nil
}
if hasError {
log.Error("Errors while scanning media. Please check the logs")
return errors.New("errors while scanning media")
}
return nil
}
func (s *Scanner) Status() []StatusInfo { return nil }
func (i *Scanner) getLastModifiedSince(folder string) time.Time {
ms, err := i.repos.property.Get(model.PropLastScan + "-" + folder)
if err != nil {
return time.Time{}
}
if ms == "" {
return time.Time{}
}
s, _ := strconv.ParseInt(ms, 10, 64)
return time.Unix(0, s*int64(time.Millisecond))
}
func (s *Scanner) updateLastModifiedSince(folder string, t time.Time) {
millis := t.UnixNano() / int64(time.Millisecond)
s.repos.property.Put(model.PropLastScan+"-"+folder, fmt.Sprint(millis))
}
func (s *Scanner) loadFolders() {
fs, _ := s.repos.folder.GetAll()
for _, f := range fs {
log.Info("Configuring Media Folder", "name", f.Name, "path", f.Path)
s.folders[f.Path] = NewTagScanner(f.Path, s.repos)
}
}
type Status int
const (
StatusComplete Status = iota
StatusInProgress
StatusError
)
type StatusInfo struct {
MediaFolder string
Status Status
}
type FolderScanner interface {
Scan(ctx context.Context, lastModifiedSince time.Time) error
}
+36
View File
@@ -0,0 +1,36 @@
package scanner
import (
"testing"
"time"
"github.com/cloudsonic/sonic-server/conf"
"github.com/cloudsonic/sonic-server/log"
"github.com/cloudsonic/sonic-server/persistence"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
func TestScanner(t *testing.T) {
log.SetLevel(log.LevelCritical)
RegisterFailHandler(Fail)
RunSpecs(t, "Scanner Suite")
}
var _ = XDescribe("TODO: REMOVE", func() {
conf.Sonic.DbPath = "./testDB"
log.SetLevel(log.LevelDebug)
repos := Repositories{
folder: persistence.NewMediaFolderRepository(),
mediaFile: persistence.NewMediaFileRepository(),
album: persistence.NewAlbumRepository(),
artist: persistence.NewArtistRepository(),
index: persistence.NewArtistIndexRepository(),
playlist: nil,
}
It("WORKS!", func() {
t := NewTagScanner("/Users/deluan/Music/iTunes/iTunes Media/Music", repos)
//t := NewTagScanner("/Users/deluan/Development/cloudsonic/sonic-server/tests/fixtures", repos)
Expect(t.Scan(nil, time.Time{})).To(BeNil())
})
})
+273
View File
@@ -0,0 +1,273 @@
package scanner
import (
"context"
"crypto/md5"
"fmt"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/cloudsonic/sonic-server/log"
"github.com/cloudsonic/sonic-server/model"
)
type TagScanner struct {
rootFolder string
repos Repositories
}
func NewTagScanner(rootFolder string, repos Repositories) *TagScanner {
return &TagScanner{
rootFolder: rootFolder,
repos: repos,
}
}
// Scan algorithm overview:
// For each changed: Get all files from DB that starts with it, scan each file:
// if changed or new, delete from DB and add new from the file
// if not found, delete from DB
// scan and add the new ones
// For each deleted: delete all files from DB that starts with it
// Create new albums/artists, update counters (how?)
// collect all albumids and artistids from previous steps
// run something like this (for albums):
// select album_id, album, f.artist, f.compilation, max(f.year), count(*), sum(f.play_count), max(f.updated_at), a.id from media_file f left outer join album a on f.album_id = a.id group by album_id;
// when a.id is not null update, else insert (collect all inserts and run just one InsertMulti)
// Delete all empty albums, delete all empty Artists
// Recreate ArtistIndex
func (s *TagScanner) Scan(ctx context.Context, lastModifiedSince time.Time) error {
detector := NewChangeDetector(s.rootFolder, lastModifiedSince)
changed, deleted, err := detector.Scan()
if err != nil {
return err
}
if len(changed)+len(deleted) == 0 {
return nil
}
log.Info("Folder changes found", "changed", len(changed), "deleted", len(deleted))
updatedArtists := map[string]bool{}
updatedAlbums := map[string]bool{}
for _, c := range changed {
err := s.processChangedDir(c, updatedArtists, updatedAlbums)
if err != nil {
return err
}
}
for _, c := range deleted {
err := s.processDeletedDir(c, updatedArtists, updatedAlbums)
if err != nil {
return err
}
}
err = s.refreshAlbums(updatedAlbums)
if err != nil {
return err
}
err = s.refreshArtists(updatedArtists)
if err != nil {
return err
}
err = s.repos.index.Refresh()
if err != nil {
return err
}
return nil
}
func (s *TagScanner) refreshAlbums(updatedAlbums map[string]bool) error {
var ids []string
for id := range updatedAlbums {
ids = append(ids, id)
}
return s.repos.album.Refresh(ids...)
}
func (s *TagScanner) refreshArtists(updatedArtists map[string]bool) error {
var ids []string
for id := range updatedArtists {
ids = append(ids, id)
}
return s.repos.artist.Refresh(ids...)
}
func (s *TagScanner) processChangedDir(dir string, updatedArtists map[string]bool, updatedAlbums map[string]bool) error {
dir = path.Join(s.rootFolder, dir)
start := time.Now()
// Load folder's current tracks from DB into a map
currentTracks := map[string]model.MediaFile{}
ct, err := s.repos.mediaFile.FindByPath(dir)
if err != nil {
return err
}
for _, t := range ct {
currentTracks[t.ID] = t
updatedArtists[t.ArtistID] = true
updatedAlbums[t.AlbumID] = true
}
// Load tracks from the folder
newTracks, err := s.loadTracks(dir)
if err != nil {
return err
}
// If track from folder is newer than the one in DB, update/insert in DB and delete from the current tracks
log.Trace("Processing changed folder", "dir", dir, "tracksInDB", len(currentTracks), "tracksInFolder", len(newTracks))
numUpdatedTracks := 0
numPurgedTracks := 0
for _, n := range newTracks {
c, ok := currentTracks[n.ID]
if !ok || (ok && n.UpdatedAt.After(c.UpdatedAt)) {
err := s.repos.mediaFile.Put(&n)
updatedArtists[n.ArtistID] = true
updatedAlbums[n.AlbumID] = true
numUpdatedTracks++
if err != nil {
return err
}
}
delete(currentTracks, n.ID)
}
// Remaining tracks from DB that are not in the folder are deleted
for id := range currentTracks {
numPurgedTracks++
if err := s.repos.mediaFile.Delete(id); err != nil {
return err
}
}
log.Debug("Finished processing changed folder", "dir", dir, "updated", numUpdatedTracks, "purged", numPurgedTracks, "elapsed", time.Since(start))
return nil
}
func (s *TagScanner) processDeletedDir(dir string, updatedArtists map[string]bool, updatedAlbums map[string]bool) error {
dir = path.Join(s.rootFolder, dir)
ct, err := s.repos.mediaFile.FindByPath(dir)
if err != nil {
return err
}
for _, t := range ct {
updatedArtists[t.ArtistID] = true
updatedAlbums[t.AlbumID] = true
}
return s.repos.mediaFile.DeleteByPath(dir)
}
func (s *TagScanner) loadTracks(dirPath string) (model.MediaFiles, error) {
dir, err := os.Open(dirPath)
if err != nil {
return nil, err
}
files, err := dir.Readdir(-1)
if err != nil {
return nil, err
}
var mds model.MediaFiles
for _, f := range files {
if f.IsDir() {
continue
}
filePath := path.Join(dirPath, f.Name())
md, err := ExtractMetadata(filePath)
if err != nil {
continue
}
mf := s.toMediaFile(md)
mds = append(mds, mf)
}
return mds, nil
}
func (s *TagScanner) toMediaFile(md *Metadata) model.MediaFile {
mf := model.MediaFile{}
mf.ID = s.trackID(md)
mf.Title = s.mapTrackTitle(md)
mf.Album = md.Album()
mf.AlbumID = s.albumID(md)
mf.Album = s.mapAlbumName(md)
if md.Artist() == "" {
mf.Artist = "[Unknown Artist]"
} else {
mf.Artist = md.Artist()
}
mf.ArtistID = s.artistID(md)
mf.AlbumArtist = md.AlbumArtist()
mf.Genre = md.Genre()
mf.Compilation = md.Compilation()
mf.Year = md.Year()
mf.TrackNumber, _ = md.TrackNumber()
mf.DiscNumber, _ = md.DiscNumber()
mf.Duration = md.Duration()
mf.BitRate = md.BitRate()
mf.Path = md.FilePath()
mf.Suffix = md.Suffix()
mf.Size = strconv.Itoa(md.Size())
mf.HasCoverArt = md.HasPicture()
// TODO Get Creation time. https://github.com/djherbis/times ?
mf.CreatedAt = md.ModificationTime()
mf.UpdatedAt = md.ModificationTime()
return mf
}
func (s *TagScanner) mapTrackTitle(md *Metadata) string {
if md.Title() == "" {
s := strings.TrimPrefix(md.FilePath(), s.rootFolder+string(os.PathSeparator))
e := filepath.Ext(s)
return strings.TrimSuffix(s, e)
}
return md.Title()
}
func (s *TagScanner) mapArtistName(md *Metadata) string {
switch {
case md.Compilation():
return "Various Artists"
case md.AlbumArtist() != "":
return md.AlbumArtist()
case md.Artist() != "":
return md.Artist()
default:
return "[Unknown Artist]"
}
}
func (s *TagScanner) mapAlbumName(md *Metadata) string {
name := md.Album()
if name == "" {
return "[Unknown Album]"
}
return name
}
func (s *TagScanner) trackID(md *Metadata) string {
return fmt.Sprintf("%x", md5.Sum([]byte(md.FilePath())))
}
func (s *TagScanner) albumID(md *Metadata) string {
albumPath := strings.ToLower(fmt.Sprintf("%s\\%s", s.mapArtistName(md), s.mapAlbumName(md)))
return fmt.Sprintf("%x", md5.Sum([]byte(albumPath)))
}
func (s *TagScanner) artistID(md *Metadata) string {
return fmt.Sprintf("%x", md5.Sum([]byte(strings.ToLower(s.mapArtistName(md)))))
}