Preparing for new scanner

This commit is contained in:
Deluan
2020-01-14 21:51:35 -05:00
parent 02d642814b
commit 25686c1742
11 changed files with 31 additions and 28 deletions
-310
View File
@@ -1,310 +0,0 @@
package scanner
import (
"fmt"
"os"
"strconv"
"strings"
"time"
"github.com/cloudsonic/sonic-server/conf"
"github.com/cloudsonic/sonic-server/domain"
"github.com/cloudsonic/sonic-server/log"
"github.com/cloudsonic/sonic-server/utils"
)
type Scanner interface {
ScanLibrary(lastModifiedSince time.Time, path string) (int, error)
MediaFiles() map[string]*domain.MediaFile
Albums() map[string]*domain.Album
Artists() map[string]*domain.Artist
Playlists() map[string]*domain.Playlist
}
type tempIndex map[string]domain.ArtistInfo
type Importer struct {
scanner Scanner
mediaFolder string
mfRepo domain.MediaFileRepository
albumRepo domain.AlbumRepository
artistRepo domain.ArtistRepository
idxRepo domain.ArtistIndexRepository
plsRepo domain.PlaylistRepository
propertyRepo domain.PropertyRepository
lastScan time.Time
lastCheck time.Time
}
func NewImporter(mediaFolder string, scanner Scanner, mfRepo domain.MediaFileRepository, albumRepo domain.AlbumRepository, artistRepo domain.ArtistRepository, idxRepo domain.ArtistIndexRepository, plsRepo domain.PlaylistRepository, propertyRepo domain.PropertyRepository) *Importer {
return &Importer{
scanner: scanner,
mediaFolder: mediaFolder,
mfRepo: mfRepo,
albumRepo: albumRepo,
artistRepo: artistRepo,
idxRepo: idxRepo,
plsRepo: plsRepo,
propertyRepo: propertyRepo,
}
}
func (i *Importer) CheckForUpdates(force bool) {
if force {
i.lastCheck = time.Time{}
}
i.startImport()
}
func (i *Importer) startImport() {
go func() {
info, err := os.Stat(i.mediaFolder)
if err != nil {
log.Error(err)
return
}
if i.lastCheck.After(info.ModTime()) {
return
}
i.lastCheck = time.Now()
i.scan()
}()
}
func (i *Importer) scan() {
i.lastScan = i.lastModifiedSince()
if i.lastScan.IsZero() {
log.Info("Starting first iTunes Library scan. This can take a while...")
}
total, err := i.scanner.ScanLibrary(i.lastScan, i.mediaFolder)
if err != nil {
log.Error("Error importing iTunes Library", err)
return
}
log.Debug("Totals informed by the scanner", "tracks", total,
"songs", len(i.scanner.MediaFiles()),
"albums", len(i.scanner.Albums()),
"artists", len(i.scanner.Artists()),
"playlists", len(i.scanner.Playlists()))
if err := i.importLibrary(); err != nil {
log.Error("Error persisting data", err)
}
if i.lastScan.IsZero() {
log.Info("Finished first iTunes Library import")
} else {
log.Debug("Finished updating tracks from iTunes Library")
}
}
func (i *Importer) lastModifiedSince() time.Time {
ms, err := i.propertyRepo.Get(domain.PropLastScan)
if err != nil {
log.Warn("Couldn't read LastScan", err)
return time.Time{}
}
if ms == "" {
log.Debug("First scan")
return time.Time{}
}
s, _ := strconv.ParseInt(ms, 10, 64)
return time.Unix(0, s*int64(time.Millisecond))
}
func (i *Importer) importLibrary() (err error) {
arc, _ := i.artistRepo.CountAll()
alc, _ := i.albumRepo.CountAll()
mfc, _ := i.mfRepo.CountAll()
plc, _ := i.plsRepo.CountAll()
log.Debug("Saving updated data")
mfs, mfu := i.importMediaFiles()
log.Debug("Imported media files", "total", len(mfs), "updated", mfu)
als, alu := i.importAlbums()
log.Debug("Imported albums", "total", len(als), "updated", alu)
ars := i.importArtists()
log.Debug("Imported artists", "total", len(ars))
pls := i.importPlaylists()
log.Debug("Imported playlists", "total", len(pls))
i.importArtistIndex()
log.Debug("Purging old data")
if err := i.mfRepo.PurgeInactive(mfs); err != nil {
log.Error(err)
}
if err := i.albumRepo.PurgeInactive(als); err != nil {
log.Error(err)
}
if err := i.artistRepo.PurgeInactive(ars); err != nil {
log.Error("Deleting inactive artists", err)
}
if _, err := i.plsRepo.PurgeInactive(pls); err != nil {
log.Error(err)
}
arc2, _ := i.artistRepo.CountAll()
alc2, _ := i.albumRepo.CountAll()
mfc2, _ := i.mfRepo.CountAll()
plc2, _ := i.plsRepo.CountAll()
if arc != arc2 || alc != alc2 || mfc != mfc2 || plc != plc2 {
log.Info(fmt.Sprintf("Updated library totals: %d(%+d) artists, %d(%+d) albums, %d(%+d) songs, %d(%+d) playlists", arc2, arc2-arc, alc2, alc2-alc, mfc2, mfc2-mfc, plc2, plc2-plc))
}
if alu > 0 || mfu > 0 {
log.Info(fmt.Sprintf("Updated items: %d album(s), %d song(s)", alu, mfu))
}
if err == nil {
millis := time.Now().UnixNano() / int64(time.Millisecond)
i.propertyRepo.Put(domain.PropLastScan, fmt.Sprint(millis))
log.Debug("LastScan", "timestamp", millis)
}
return err
}
func (i *Importer) importMediaFiles() (domain.MediaFiles, int) {
mfs := make(domain.MediaFiles, len(i.scanner.MediaFiles()))
updates := 0
j := 0
for _, mf := range i.scanner.MediaFiles() {
mfs[j] = *mf
j++
if mf.UpdatedAt.Before(i.lastScan) {
continue
}
if mf.Starred {
original, err := i.mfRepo.Get(mf.ID)
if err != nil || !original.Starred {
mf.StarredAt = mf.UpdatedAt
} else {
mf.StarredAt = original.StarredAt
}
}
if err := i.mfRepo.Put(mf); err != nil {
log.Error(err)
}
updates++
if !i.lastScan.IsZero() {
log.Debug(fmt.Sprintf(`-- Updated Track: "%s"`, mf.Title))
}
}
return mfs, updates
}
func (i *Importer) importAlbums() (domain.Albums, int) {
als := make(domain.Albums, len(i.scanner.Albums()))
updates := 0
j := 0
for _, al := range i.scanner.Albums() {
als[j] = *al
j++
if al.UpdatedAt.Before(i.lastScan) {
continue
}
if al.Starred {
original, err := i.albumRepo.Get(al.ID)
if err != nil || !original.Starred {
al.StarredAt = al.UpdatedAt
} else {
al.StarredAt = original.StarredAt
}
}
if err := i.albumRepo.Put(al); err != nil {
log.Error(err)
}
updates++
if !i.lastScan.IsZero() {
log.Debug(fmt.Sprintf(`-- Updated Album: "%s" from "%s"`, al.Name, al.Artist))
}
}
return als, updates
}
func (i *Importer) importArtists() domain.Artists {
ars := make(domain.Artists, len(i.scanner.Artists()))
j := 0
for _, ar := range i.scanner.Artists() {
ars[j] = *ar
j++
if err := i.artistRepo.Put(ar); err != nil {
log.Error(err)
}
}
return ars
}
func (i *Importer) importArtistIndex() {
indexGroups := utils.ParseIndexGroups(conf.Sonic.IndexGroups)
artistIndex := make(map[string]tempIndex)
for _, ar := range i.scanner.Artists() {
i.collectIndex(indexGroups, ar, artistIndex)
}
if err := i.saveIndex(artistIndex); err != nil {
log.Error(err)
}
}
func (i *Importer) importPlaylists() domain.Playlists {
pls := make(domain.Playlists, len(i.scanner.Playlists()))
j := 0
for _, pl := range i.scanner.Playlists() {
pl.Public = true
pl.Owner = conf.Sonic.User
pl.Comment = "Original: " + pl.FullPath
pls[j] = *pl
j++
if err := i.plsRepo.Put(pl); err != nil {
log.Error(err)
}
}
return pls
}
func (i *Importer) collectIndex(ig utils.IndexGroups, a *domain.Artist, artistIndex map[string]tempIndex) {
name := a.Name
indexName := strings.ToLower(utils.NoArticle(name))
if indexName == "" {
return
}
group := i.findGroup(ig, indexName)
artists := artistIndex[group]
if artists == nil {
artists = make(tempIndex)
artistIndex[group] = artists
}
artists[indexName] = domain.ArtistInfo{ArtistID: a.ID, Artist: a.Name, AlbumCount: a.AlbumCount}
}
func (i *Importer) findGroup(ig utils.IndexGroups, name string) string {
for k, v := range ig {
key := strings.ToLower(k)
if strings.HasPrefix(name, key) {
return v
}
}
return "#"
}
func (i *Importer) saveIndex(artistIndex map[string]tempIndex) error {
i.idxRepo.DeleteAll()
for k, temp := range artistIndex {
idx := &domain.ArtistIndex{ID: k}
for _, v := range temp {
idx.Artists = append(idx.Artists, v)
}
err := i.idxRepo.Put(idx)
if err != nil {
return err
}
}
return nil
}
-74
View File
@@ -1,74 +0,0 @@
package scanner
import (
"testing"
"github.com/cloudsonic/sonic-server/domain"
"github.com/cloudsonic/sonic-server/tests"
"github.com/cloudsonic/sonic-server/utils"
. "github.com/smartystreets/goconvey/convey"
)
func TestCollectIndex(t *testing.T) {
tests.Init(t, false)
ig := utils.IndexGroups{"A": "A", "B": "B", "Tom": "Tom", "X": "X-Z"}
importer := &Importer{}
Convey("Simple Name", t, func() {
a := &domain.Artist{Name: "Björk"}
artistIndex := make(map[string]tempIndex)
importer.collectIndex(ig, a, artistIndex)
So(artistIndex, ShouldContainKey, "B")
So(artistIndex["B"], ShouldContainKey, "björk")
for _, k := range []string{"A", "Tom", "X-Z", "#"} {
So(artistIndex, ShouldNotContainKey, k)
}
})
Convey("Name not in the index", t, func() {
a := &domain.Artist{Name: "Kraftwerk"}
artistIndex := make(map[string]tempIndex)
importer.collectIndex(ig, a, artistIndex)
So(artistIndex, ShouldContainKey, "#")
So(artistIndex["#"], ShouldContainKey, "kraftwerk")
for _, k := range []string{"A", "B", "Tom", "X-Z"} {
So(artistIndex, ShouldNotContainKey, k)
}
})
Convey("Name starts with an article", t, func() {
a := &domain.Artist{Name: "The The"}
artistIndex := make(map[string]tempIndex)
importer.collectIndex(ig, a, artistIndex)
So(artistIndex, ShouldContainKey, "#")
So(artistIndex["#"], ShouldContainKey, "the")
for _, k := range []string{"A", "B", "Tom", "X-Z"} {
So(artistIndex, ShouldNotContainKey, k)
}
})
Convey("Name match a multichar entry", t, func() {
a := &domain.Artist{Name: "Tom Waits"}
artistIndex := make(map[string]tempIndex)
importer.collectIndex(ig, a, artistIndex)
So(artistIndex, ShouldContainKey, "Tom")
So(artistIndex["Tom"], ShouldContainKey, "tom waits")
for _, k := range []string{"A", "B", "X-Z", "#"} {
So(artistIndex, ShouldNotContainKey, k)
}
})
}
-401
View File
@@ -1,401 +0,0 @@
package scanner
import (
"crypto/md5"
"fmt"
"html"
"mime"
"net/url"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
"github.com/cloudsonic/sonic-server/conf"
"github.com/cloudsonic/sonic-server/domain"
"github.com/cloudsonic/sonic-server/log"
"github.com/dhowden/itl"
"github.com/dhowden/tag"
)
type ItunesScanner struct {
mediaFiles map[string]*domain.MediaFile
albums map[string]*domain.Album
artists map[string]*domain.Artist
playlists map[string]*domain.Playlist
pplaylists map[string]plsRelation
pmediaFiles map[int]*domain.MediaFile
lastModifiedSince time.Time
checksumRepo CheckSumRepository
newSums map[string]string
}
func NewItunesScanner(checksumRepo CheckSumRepository) *ItunesScanner {
return &ItunesScanner{checksumRepo: checksumRepo}
}
type CheckSumRepository interface {
Get(id string) (string, error)
SetData(newSums map[string]string) error
}
type plsRelation struct {
pID string
parentPID string
name string
}
func (s *ItunesScanner) ScanLibrary(lastModifiedSince time.Time, path string) (int, error) {
log.Debug("Checking for updates", "lastModifiedSince", lastModifiedSince, "library", path)
xml, _ := os.Open(path)
l, err := itl.ReadFromXML(xml)
if err != nil {
return 0, err
}
log.Debug("Loaded tracks", "total", len(l.Tracks))
s.lastModifiedSince = lastModifiedSince
s.mediaFiles = make(map[string]*domain.MediaFile)
s.albums = make(map[string]*domain.Album)
s.artists = make(map[string]*domain.Artist)
s.playlists = make(map[string]*domain.Playlist)
s.pplaylists = make(map[string]plsRelation)
s.pmediaFiles = make(map[int]*domain.MediaFile)
s.newSums = make(map[string]string)
songsPerAlbum := make(map[string]int)
albumsPerArtist := make(map[string]map[string]bool)
i := 0
for _, t := range l.Tracks {
if !s.skipTrack(&t) {
s.calcCheckSum(&t)
ar := s.collectArtists(&t)
mf := s.collectMediaFiles(&t)
s.collectAlbums(&t, mf, ar)
songsPerAlbum[mf.AlbumID]++
if albumsPerArtist[mf.ArtistID] == nil {
albumsPerArtist[mf.ArtistID] = make(map[string]bool)
}
albumsPerArtist[mf.ArtistID][mf.AlbumID] = true
}
i++
if i%1000 == 0 {
log.Debug(fmt.Sprintf("Processed %d tracks", i), "artists", len(s.artists), "albums", len(s.albums), "songs", len(s.mediaFiles))
}
}
for albumId, count := range songsPerAlbum {
s.albums[albumId].SongCount = count
}
for artistId, albums := range albumsPerArtist {
s.artists[artistId].AlbumCount = len(albums)
}
if err := s.checksumRepo.SetData(s.newSums); err != nil {
log.Error("Error saving checksums", err)
} else {
log.Debug("Saved checksums", "total", len(s.newSums))
}
ignFolders := conf.Sonic.PlsIgnoreFolders
ignPatterns := strings.Split(conf.Sonic.PlsIgnoredPatterns, ";")
for _, p := range l.Playlists {
rel := plsRelation{pID: p.PlaylistPersistentID, parentPID: p.ParentPersistentID, name: unescape(p.Name)}
s.pplaylists[p.PlaylistPersistentID] = rel
fullPath := s.fullPath(p.PlaylistPersistentID)
if s.skipPlaylist(&p, ignFolders, ignPatterns, fullPath) {
continue
}
s.collectPlaylists(&p, fullPath)
}
log.Debug("Processed playlists", "total", len(l.Playlists))
return len(l.Tracks), nil
}
func (s *ItunesScanner) MediaFiles() map[string]*domain.MediaFile {
return s.mediaFiles
}
func (s *ItunesScanner) Albums() map[string]*domain.Album {
return s.albums
}
func (s *ItunesScanner) Artists() map[string]*domain.Artist {
return s.artists
}
func (s *ItunesScanner) Playlists() map[string]*domain.Playlist {
return s.playlists
}
func (s *ItunesScanner) skipTrack(t *itl.Track) bool {
if t.Podcast {
return true
}
if conf.Sonic.DevDisableFileCheck {
return false
}
if !strings.HasPrefix(t.Location, "file://") {
return true
}
ext := filepath.Ext(t.Location)
m := mime.TypeByExtension(ext)
return !strings.HasPrefix(m, "audio/")
}
func (s *ItunesScanner) skipPlaylist(p *itl.Playlist, ignFolders bool, ignPatterns []string, fullPath string) bool {
// Skip all "special" iTunes playlists, and also ignored patterns
if p.Master || p.Music || p.Audiobooks || p.Movies || p.TVShows || p.Podcasts || p.ITunesU || (ignFolders && p.Folder) {
return true
}
for _, p := range ignPatterns {
if p == "" {
continue
}
m, _ := regexp.MatchString(p, fullPath)
if m {
return true
}
}
return false
}
func (s *ItunesScanner) collectPlaylists(p *itl.Playlist, fullPath string) {
pl := &domain.Playlist{}
pl.ID = p.PlaylistPersistentID
pl.Name = unescape(p.Name)
pl.FullPath = fullPath
pl.Tracks = make([]string, 0, len(p.PlaylistItems))
for _, item := range p.PlaylistItems {
if mf, found := s.pmediaFiles[item.TrackID]; found {
pl.Tracks = append(pl.Tracks, mf.ID)
pl.Duration += mf.Duration
}
}
if len(pl.Tracks) > 0 {
s.playlists[pl.ID] = pl
}
}
func (s *ItunesScanner) fullPath(pID string) string {
rel, found := s.pplaylists[pID]
if !found {
return ""
}
if rel.parentPID == "" {
return rel.name
}
return fmt.Sprintf("%s > %s", s.fullPath(rel.parentPID), rel.name)
}
func (s *ItunesScanner) lastChangedDate(t *itl.Track) time.Time {
if s.hasChanged(t) {
return time.Now()
}
allDates := []time.Time{t.DateModified, t.PlayDateUTC}
c := time.Time{}
for _, d := range allDates {
if c.Before(d) {
c = d
}
}
return c
}
func (s *ItunesScanner) hasChanged(t *itl.Track) bool {
id := t.PersistentID
oldSum, _ := s.checksumRepo.Get(id)
newSum := s.newSums[id]
return oldSum != newSum
}
// Calc sum of stats fields (whose changes are not reflected in DataModified)
func (s *ItunesScanner) calcCheckSum(t *itl.Track) string {
id := t.PersistentID
data := fmt.Sprint(t.DateModified, t.PlayCount, t.PlayDate, t.ArtworkCount, t.Loved, t.AlbumLoved,
t.Rating, t.AlbumRating, t.SkipCount, t.SkipDate)
sum := fmt.Sprintf("%x", md5.Sum([]byte(data)))
s.newSums[id] = sum
return sum
}
func (s *ItunesScanner) collectMediaFiles(t *itl.Track) *domain.MediaFile {
mf := &domain.MediaFile{}
mf.ID = t.PersistentID
mf.Album = unescape(t.Album)
mf.AlbumID = albumId(t)
mf.ArtistID = artistId(t)
mf.Title = unescape(t.Name)
mf.Artist = unescape(t.Artist)
if mf.Album == "" {
mf.Album = "[Unknown Album]"
}
if mf.Artist == "" {
mf.Artist = "[Unknown Artist]"
}
mf.AlbumArtist = unescape(t.AlbumArtist)
mf.Genre = unescape(t.Genre)
mf.Compilation = t.Compilation
mf.Starred = t.Loved
mf.Rating = t.Rating / 20
mf.PlayCount = t.PlayCount
mf.PlayDate = t.PlayDateUTC
mf.Year = t.Year
mf.TrackNumber = t.TrackNumber
mf.DiscNumber = t.DiscNumber
if t.Size > 0 {
mf.Size = strconv.Itoa(t.Size)
}
if t.TotalTime > 0 {
mf.Duration = t.TotalTime / 1000
}
mf.BitRate = t.BitRate
path := extractPath(t.Location)
mf.Path = path
mf.Suffix = strings.TrimPrefix(filepath.Ext(path), ".")
mf.CreatedAt = t.DateAdded
mf.UpdatedAt = s.lastChangedDate(t)
if mf.UpdatedAt.After(s.lastModifiedSince) && !conf.Sonic.DevDisableFileCheck {
mf.HasCoverArt = hasCoverArt(path)
}
s.mediaFiles[mf.ID] = mf
s.pmediaFiles[t.TrackID] = mf
return mf
}
func (s *ItunesScanner) collectAlbums(t *itl.Track, mf *domain.MediaFile, ar *domain.Artist) *domain.Album {
id := albumId(t)
_, found := s.albums[id]
if !found {
s.albums[id] = &domain.Album{}
}
al := s.albums[id]
al.ID = id
al.ArtistID = ar.ID
al.Name = mf.Album
al.Year = t.Year
al.Compilation = t.Compilation
al.Starred = t.AlbumLoved
al.Rating = t.AlbumRating / 20
al.PlayCount += t.PlayCount
al.Genre = mf.Genre
al.Artist = mf.Artist
al.AlbumArtist = ar.Name
if al.Name == "" {
al.Name = "[Unknown Album]"
}
if al.Artist == "" {
al.Artist = "[Unknown Artist]"
}
al.Duration += mf.Duration
if mf.HasCoverArt {
al.CoverArtId = mf.ID
al.CoverArtPath = mf.Path
}
if al.PlayDate.IsZero() || t.PlayDateUTC.After(al.PlayDate) {
al.PlayDate = t.PlayDateUTC
}
if al.CreatedAt.IsZero() || t.DateAdded.Before(al.CreatedAt) {
al.CreatedAt = t.DateAdded
}
trackUpdate := s.lastChangedDate(t)
if al.UpdatedAt.IsZero() || trackUpdate.After(al.UpdatedAt) {
al.UpdatedAt = trackUpdate
}
return al
}
func (s *ItunesScanner) collectArtists(t *itl.Track) *domain.Artist {
id := artistId(t)
_, found := s.artists[id]
if !found {
s.artists[id] = &domain.Artist{}
}
ar := s.artists[id]
ar.ID = id
ar.Name = unescape(realArtistName(t))
if ar.Name == "" {
ar.Name = "[Unknown Artist]"
}
return ar
}
func albumId(t *itl.Track) string {
s := strings.ToLower(fmt.Sprintf("%s\\%s", realArtistName(t), t.Album))
return fmt.Sprintf("%x", md5.Sum([]byte(s)))
}
func artistId(t *itl.Track) string {
return fmt.Sprintf("%x", md5.Sum([]byte(strings.ToLower(realArtistName(t)))))
}
func hasCoverArt(path string) bool {
defer func() {
if r := recover(); r != nil {
log.Error("Panic reading tag", "path", path, "error", r)
}
}()
if _, err := os.Stat(path); err == nil {
f, err := os.Open(path)
if err != nil {
log.Warn("Error opening file", "path", path, err)
return false
}
defer f.Close()
m, err := tag.ReadFrom(f)
if err != nil {
log.Warn("Error reading tag from file", "path", path, err)
return false
}
return m.Picture() != nil
}
//log.Warn("File not found", "path", path)
return false
}
func unescape(str string) string {
return html.UnescapeString(str)
}
func extractPath(loc string) string {
path := strings.Replace(loc, "+", "%2B", -1)
path, _ = url.QueryUnescape(path)
path = html.UnescapeString(path)
return strings.TrimPrefix(path, "file://")
}
func realArtistName(t *itl.Track) string {
switch {
case t.Compilation:
return "Various Artists"
case t.AlbumArtist != "":
return t.AlbumArtist
}
return t.Artist
}
var _ Scanner = (*ItunesScanner)(nil)
-25
View File
@@ -1,25 +0,0 @@
package scanner
import (
"testing"
. "github.com/smartystreets/goconvey/convey"
)
func TestExtractLocation(t *testing.T) {
Convey("Given a path with a plus (+) signal", t, func() {
location := "file:///Users/deluan/Music/iTunes%201/iTunes%20Media/Music/Chance/Six%20Through%20Ten/03%20Forgive+Forget.m4a"
Convey("When I decode it", func() {
path := extractPath(location)
Convey("I get the correct path", func() {
So(path, ShouldEqual, "/Users/deluan/Music/iTunes 1/iTunes Media/Music/Chance/Six Through Ten/03 Forgive+Forget.m4a")
})
})
})
}
-9
View File
@@ -1,9 +0,0 @@
package scanner
import "github.com/google/wire"
var Set = wire.NewSet(
NewImporter,
NewItunesScanner,
wire.Bind(new(Scanner), new(*ItunesScanner)),
)