feat(scanner): implement file-based target passing for large target lists

Signed-off-by: Deluan <deluan@navidrome.org>
This commit is contained in:
Deluan
2025-12-16 16:08:32 -05:00
parent 8c80be56da
commit 9ed309ac81
4 changed files with 363 additions and 6 deletions
+71 -3
View File
@@ -14,6 +14,12 @@ import (
"github.com/navidrome/navidrome/model"
)
const (
// argLengthThreshold is the threshold for switching from command-line args to file-based target passing.
// Set conservatively at 24KB to support Windows (~32KB limit) with margin for env vars.
argLengthThreshold = 24 * 1024
)
// scannerExternal is a scanner that runs an external process to do the scanning. It is used to avoid
// memory leaks or retention in the main process, as the scanner can consume a lot of memory. The
// external process will be spawned with the same executable as the current process, and will run
@@ -45,10 +51,14 @@ func (s *scannerExternal) scan(ctx context.Context, fullScan bool, targets []mod
// Add targets if provided
if len(targets) > 0 {
for _, target := range targets {
args = append(args, "-t", target.String())
targetArgs, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
if err != nil {
progress <- &ProgressInfo{Error: err.Error()}
return
}
log.Debug(ctx, "Spawning external scanner process with targets", "fullScan", fullScan, "path", exe, "targets", targets)
defer cleanup()
log.Debug(ctx, "Spawning external scanner process with target file", "fullScan", fullScan, "path", exe, "numTargets", len(targets))
args = append(args, targetArgs...)
} else {
log.Debug(ctx, "Spawning external scanner process", "fullScan", fullScan, "path", exe)
}
@@ -98,4 +108,62 @@ func (s *scannerExternal) wait(cmd *exec.Cmd, out *io.PipeWriter) {
_ = out.Close()
}
// targetArguments builds command-line arguments for the given scan targets.
// If the estimated argument length exceeds a threshold, it writes the targets to a temp file
// and returns the --target-file argument instead.
// Returns the arguments, a cleanup function to remove any temp file created, and an error if any.
func targetArguments(ctx context.Context, targets []model.ScanTarget, lengthThreshold int) ([]string, func(), error) {
var args []string
// Estimate argument length to decide whether to use file-based approach
argLength := estimateArgLength(targets)
if argLength > lengthThreshold {
// Write targets to temp file and pass via --target-file
targetFile, err := writeTargetsToFile(targets)
if err != nil {
return nil, nil, fmt.Errorf("failed to write targets to file: %w", err)
}
args = append(args, "--target-file", targetFile)
return args, func() {
os.Remove(targetFile) // Clean up temp file
}, nil
}
// Use command-line arguments for small target lists
for _, target := range targets {
args = append(args, "-t", target.String())
}
return args, func() {}, nil
}
// estimateArgLength estimates the total length of command-line arguments for the given targets.
func estimateArgLength(targets []model.ScanTarget) int {
length := 0
for _, target := range targets {
// Each target adds: "-t " + target string + space
length += 3 + len(target.String()) + 1
}
return length
}
// writeTargetsToFile writes the targets to a temporary file, one per line.
// Returns the path to the temp file, which the caller should clean up.
func writeTargetsToFile(targets []model.ScanTarget) (string, error) {
tmpFile, err := os.CreateTemp("", "navidrome-scan-targets-*.txt")
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
defer tmpFile.Close()
for _, target := range targets {
if _, err := fmt.Fprintln(tmpFile, target.String()); err != nil {
os.Remove(tmpFile.Name())
return "", fmt.Errorf("failed to write to temp file: %w", err)
}
}
return tmpFile.Name(), nil
}
var _ scanner = (*scannerExternal)(nil)
+160
View File
@@ -0,0 +1,160 @@
package scanner
import (
"context"
"os"
"strings"
"github.com/navidrome/navidrome/model"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("targetArguments", func() {
var ctx context.Context
BeforeEach(func() {
ctx = GinkgoT().Context()
})
Context("with small target list", func() {
It("returns command-line arguments for single target", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock"},
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{"-t", "1:Music/Rock"}))
})
It("returns command-line arguments for multiple targets", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock"},
{LibraryID: 2, FolderPath: "Music/Jazz"},
{LibraryID: 3, FolderPath: "Classical"},
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{
"-t", "1:Music/Rock",
"-t", "2:Music/Jazz",
"-t", "3:Classical",
}))
})
It("handles targets with special characters", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock & Roll"},
{LibraryID: 2, FolderPath: "Music/Jazz (Modern)"},
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{
"-t", "1:Music/Rock & Roll",
"-t", "2:Music/Jazz (Modern)",
}))
})
})
Context("with large target list exceeding threshold", func() {
It("returns --target-file argument when exceeding threshold", func() {
// Create enough targets to exceed the threshold
var targets []model.ScanTarget
for i := 1; i <= 600; i++ {
targets = append(targets, model.ScanTarget{
LibraryID: 1,
FolderPath: "Music/VeryLongFolderPathToSimulateRealScenario/SubFolder",
})
}
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(HaveLen(2))
Expect(args[0]).To(Equal("--target-file"))
// Verify the file exists and has correct format
filePath := args[1]
Expect(filePath).To(ContainSubstring("navidrome-scan-targets-"))
Expect(filePath).To(HaveSuffix(".txt"))
// Verify file actually exists
_, err = os.Stat(filePath)
Expect(err).ToNot(HaveOccurred())
})
It("creates temp file with correct format", func() {
// Use custom threshold to easily exceed it
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music/Rock"},
{LibraryID: 2, FolderPath: "Music/Jazz"},
{LibraryID: 3, FolderPath: "Classical"},
}
// Set threshold very low to force file usage
args, cleanup, err := targetArguments(ctx, targets, 10)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args[0]).To(Equal("--target-file"))
// Verify file exists with correct format
filePath := args[1]
Expect(filePath).To(ContainSubstring("navidrome-scan-targets-"))
Expect(filePath).To(HaveSuffix(".txt"))
// Verify file content
content, err := os.ReadFile(filePath)
Expect(err).ToNot(HaveOccurred())
lines := strings.Split(strings.TrimSpace(string(content)), "\n")
Expect(lines).To(HaveLen(3))
Expect(lines[0]).To(Equal("1:Music/Rock"))
Expect(lines[1]).To(Equal("2:Music/Jazz"))
Expect(lines[2]).To(Equal("3:Classical"))
})
})
Context("edge cases", func() {
It("handles empty target list", func() {
var targets []model.ScanTarget
args, cleanup, err := targetArguments(ctx, targets, argLengthThreshold)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(BeEmpty())
})
It("uses command-line args when exactly at threshold", func() {
// Create targets that are exactly at threshold
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music"},
}
// Estimate length should be 11 bytes
estimatedLength := estimateArgLength(targets)
args, cleanup, err := targetArguments(ctx, targets, estimatedLength)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args).To(Equal([]string{"-t", "1:Music"}))
})
It("uses file when one byte over threshold", func() {
targets := []model.ScanTarget{
{LibraryID: 1, FolderPath: "Music"},
}
// Set threshold just below the estimated length
estimatedLength := estimateArgLength(targets)
args, cleanup, err := targetArguments(ctx, targets, estimatedLength-1)
Expect(err).ToNot(HaveOccurred())
defer cleanup()
Expect(args[0]).To(Equal("--target-file"))
})
})
})