From 59a6f44b25d4a8e8cabd71d5a0f5a56dc5a12541 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Tue, 19 May 2026 16:42:38 -0400 Subject: [PATCH 1/8] approach 1: subcommand with skippable steps Signed-off-by: Will Murphy --- cmd/grype/cli/commands/db.go | 1 + cmd/grype/cli/commands/db_build.go | 249 ++++++++++++++++++ cmd/grype/cli/commands/db_build_test.go | 71 +++++ cmd/grype/cli/options/database_build.go | 133 ++++++++++ .../db/build/providers/external/log_writer.go | 114 ++++++++ .../providers/external/log_writer_test.go | 67 +++++ grype/db/build/providers/external/provider.go | 114 ++++++++ grype/db/build/providers/providers.go | 70 +++++ grype/db/build/providers/vunnel/provider.go | 193 ++++++++++++++ grype/db/build/pull/provider_run_config.go | 25 ++ grype/db/build/pull/pull.go | 71 +++++ 11 files changed, 1108 insertions(+) create mode 100644 cmd/grype/cli/commands/db_build.go create mode 100644 cmd/grype/cli/commands/db_build_test.go create mode 100644 cmd/grype/cli/options/database_build.go create mode 100644 grype/db/build/providers/external/log_writer.go create mode 100644 grype/db/build/providers/external/log_writer_test.go create mode 100644 grype/db/build/providers/external/provider.go create mode 100644 grype/db/build/providers/providers.go create mode 100644 grype/db/build/providers/vunnel/provider.go create mode 100644 grype/db/build/pull/provider_run_config.go create mode 100644 grype/db/build/pull/pull.go diff --git a/cmd/grype/cli/commands/db.go b/cmd/grype/cli/commands/db.go index e1328cd745b..6bcd883a082 100644 --- a/cmd/grype/cli/commands/db.go +++ b/cmd/grype/cli/commands/db.go @@ -19,6 +19,7 @@ func DB(app clio.Application) *cobra.Command { } db.AddCommand( + DBBuild(app), DBCheck(app), DBDelete(app), DBImport(app), diff --git a/cmd/grype/cli/commands/db_build.go b/cmd/grype/cli/commands/db_build.go new file mode 100644 index 00000000000..2769e406a65 --- /dev/null +++ b/cmd/grype/cli/commands/db_build.go @@ -0,0 +1,249 @@ +package commands + +import ( + "errors" + "fmt" + "os" + "strings" + + "github.com/scylladb/go-set/strset" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/grype/db" + "github.com/anchore/grype/grype/db/build/providers" + "github.com/anchore/grype/grype/db/build/providers/vunnel" + "github.com/anchore/grype/grype/db/build/pull" + dbprovider "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" +) + +const ( + skipPhasePull = "pull" + skipPhaseValidate = "validate" + skipPhaseWrite = "write" + skipPhasePackage = "package" +) + +var allSkipPhases = []string{skipPhasePull, skipPhaseValidate, skipPhaseWrite, skipPhasePackage} + +// dbBuildConfigWrapper nests the DatabaseBuild options under `db.build:` in +// the application YAML config so the schema remains coherent next to the +// existing `db:` settings used by other db commands. The command flags are +// still registered directly on DatabaseBuild via its AddFlags method. +type dbBuildConfigWrapper struct { + DB dbBuildConfigDBSection `yaml:"db" json:"db" mapstructure:"db"` +} + +type dbBuildConfigDBSection struct { + Build *options.DatabaseBuild `yaml:"build" json:"build" mapstructure:"build"` +} + +func DBBuild(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "build", + Short: "Build a vulnerability database from upstream vulnerability data", + Long: `Build a vulnerability database by running the full pull -> write -> package +pipeline. Use --skip to omit any combination of phases (pull, validate, write, +package); for example: + + grype db build --skip pull,package # build a DB from existing provider data + grype db build --skip pull,write # only package an already-built DB`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuild(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuildConfigWrapper{DB: dbBuildConfigDBSection{Build: opts}}) +} + +func runDBBuild(opts *options.DatabaseBuild) error { + skip, err := parseSkipPhases(opts.Skip) + if err != nil { + return err + } + + if err := validateCPEParts(opts.IncludeCPEParts); err != nil { + return err + } + + if opts.ArchiveExtension != "" && !strset.New("tar.gz", "tar.zst").Has(opts.ArchiveExtension) { + return fmt.Errorf("archive-extension must be 'tar.gz' or 'tar.zst'") + } + + needProviders := !skip.Has(skipPhasePull) || !skip.Has(skipPhaseWrite) + + var pvdrs dbprovider.Providers + if needProviders { + pvdrs, err = buildProviders(opts) + if err != nil { + return err + } + } + + if !skip.Has(skipPhasePull) { + if err := runPullPhase(opts, pvdrs); err != nil { + return fmt.Errorf("pull phase failed: %w", err) + } + } else { + log.Info("skipping pull phase") + } + + if !skip.Has(skipPhaseWrite) { + if err := runWritePhase(opts, pvdrs, skip.Has(skipPhaseValidate)); err != nil { + return fmt.Errorf("write phase failed: %w", err) + } + } else { + log.Info("skipping write phase") + } + + if !skip.Has(skipPhasePackage) { + if err := runPackagePhase(opts); err != nil { + return fmt.Errorf("package phase failed: %w", err) + } + } else { + log.Info("skipping package phase") + } + + return nil +} + +func parseSkipPhases(raw []string) (*strset.Set, error) { + set := strset.New() + for _, entry := range raw { + for _, p := range strings.Split(entry, ",") { + p = strings.TrimSpace(strings.ToLower(p)) + if p == "" { + continue + } + if !strset.New(allSkipPhases...).Has(p) { + return nil, fmt.Errorf("invalid --skip phase %q (allowed: %s)", p, strings.Join(allSkipPhases, ", ")) + } + set.Add(p) + } + } + return set, nil +} + +func validateCPEParts(parts []string) error { + if len(parts) == 0 { + return errors.New("no CPE parts provided") + } + validParts := strset.New("a", "o", "h") + for _, part := range parts { + if !validParts.Has(part) { + return fmt.Errorf("invalid CPE part: %s", part) + } + } + return nil +} + +func buildProviders(opts *options.DatabaseBuild) (dbprovider.Providers, error) { + vCfg := vunnel.Config{ + Config: opts.Provider.Vunnel.Config, + Executor: opts.Provider.Vunnel.Executor, + DockerImage: opts.Provider.Vunnel.DockerImage, + DockerTag: opts.Provider.Vunnel.DockerTag, + GenerateConfigs: opts.Provider.Vunnel.GenerateConfigs, + ExcludeProviders: opts.Provider.Vunnel.ExcludeProviders, + Env: opts.Provider.Vunnel.Env, + } + + pvdrs, err := providers.New(opts.Provider.Root, vCfg, opts.Provider.Configs...) + if err != nil { + if errors.Is(err, providers.ErrNoProviders) { + log.Error("configure a provider via the application config or use -g to generate a list of configs from vunnel") + } + return nil, fmt.Errorf("unable to create providers: %w", err) + } + + if len(opts.Provider.IncludeFilter) > 0 { + log.WithFields("keep-only", opts.Provider.IncludeFilter).Debug("filtering providers by name") + pvdrs = pvdrs.Filter(opts.Provider.IncludeFilter...) + } + + return pvdrs, nil +} + +func runPullPhase(opts *options.DatabaseBuild, pvdrs dbprovider.Providers) error { + return pull.Pull(pull.Config{ + Parallelism: opts.Pull.Parallelism, + Collection: dbprovider.Collection{ + Root: opts.Provider.Root, + Providers: pvdrs, + }, + }) +} + +func runWritePhase(opts *options.DatabaseBuild, pvdrs dbprovider.Providers, skipValidation bool) error { + if _, err := os.Stat(opts.Dir); os.IsNotExist(err) { + if err := os.MkdirAll(opts.Dir, 0755); err != nil { + return fmt.Errorf("unable to make db build dir: %w", err) + } + } + + states, err := providerStates(skipValidation, pvdrs) + if err != nil { + return fmt.Errorf("unable to get provider states: %w", err) + } + + earliest, err := dbprovider.States(states).EarliestTimestamp() + if err != nil { + return fmt.Errorf("unable to get earliest timestamp: %w", err) + } + + return db.Build(db.BuildConfig{ + SchemaVersion: opts.SchemaVersion, + Directory: opts.Dir, + States: states, + Timestamp: earliest, + IncludeCPEParts: opts.IncludeCPEParts, + InferNVDFixVersions: opts.InferNVDFixVersions, + Hydrate: opts.Hydrate, + FailOnMissingFixDate: opts.FailOnMissingFixDate, + BatchSize: opts.BatchSize, + }) +} + +func runPackagePhase(opts *options.DatabaseBuild) error { + // v5 DB writing (and its corresponding listing.json) is no longer supported via this command; + // publish-base-url is intentionally omitted. + return db.Package(opts.Dir, "", opts.ArchiveExtension, opts.CompressorCommands) +} + +func providerStates(skipValidation bool, providers []dbprovider.Reader) ([]dbprovider.State, error) { + var states []dbprovider.State + log.Debug("reading all provider state") + + if len(providers) == 0 { + return nil, fmt.Errorf("no providers configured") + } + + for _, p := range providers { + log.WithFields("provider", p.ID().Name).Debug("reading state") + + sd, err := p.State() + if err != nil { + return nil, fmt.Errorf("unable to read provider state: %w", err) + } + + if !skipValidation { + log.WithFields("provider", p.ID().Name).Trace("validating state") + if err := sd.Verify(); err != nil { + return nil, fmt.Errorf("invalid provider state: %w", err) + } + } + states = append(states, *sd) + } + if !skipValidation { + log.Debugf("state validated for all providers") + } + return states, nil +} diff --git a/cmd/grype/cli/commands/db_build_test.go b/cmd/grype/cli/commands/db_build_test.go new file mode 100644 index 00000000000..469ab13aad0 --- /dev/null +++ b/cmd/grype/cli/commands/db_build_test.go @@ -0,0 +1,71 @@ +package commands + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseSkipPhases(t *testing.T) { + tests := []struct { + name string + input []string + wantPhases []string + wantErr bool + }{ + { + name: "empty", + input: nil, + wantPhases: nil, + }, + { + name: "single comma-separated entry", + input: []string{"pull,validate,package"}, + wantPhases: []string{skipPhasePull, skipPhaseValidate, skipPhasePackage}, + }, + { + name: "multiple --skip occurrences", + input: []string{"pull", "write"}, + wantPhases: []string{skipPhasePull, skipPhaseWrite}, + }, + { + name: "mixed case and whitespace tolerated", + input: []string{" PULL , Validate "}, + wantPhases: []string{skipPhasePull, skipPhaseValidate}, + }, + { + name: "rejects unknown phase", + input: []string{"foobar"}, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseSkipPhases(tt.input) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + + if len(tt.wantPhases) == 0 { + assert.Equal(t, 0, got.Size()) + return + } + for _, p := range tt.wantPhases { + assert.True(t, got.Has(p), "expected %q in skip set; got %s", p, strings.Join(got.List(), ",")) + } + assert.Equal(t, len(tt.wantPhases), got.Size()) + }) + } +} + +func TestValidateCPEParts(t *testing.T) { + require.NoError(t, validateCPEParts([]string{"a", "h", "o"})) + require.Error(t, validateCPEParts(nil)) + require.Error(t, validateCPEParts([]string{})) + require.Error(t, validateCPEParts([]string{"a", "x"})) +} diff --git a/cmd/grype/cli/options/database_build.go b/cmd/grype/cli/options/database_build.go new file mode 100644 index 00000000000..549e83b8950 --- /dev/null +++ b/cmd/grype/cli/options/database_build.go @@ -0,0 +1,133 @@ +package options + +import ( + "github.com/anchore/clio" + "github.com/anchore/grype/grype/db" + "github.com/anchore/grype/grype/db/build/pull" +) + +// DatabaseBuild holds the configuration for `grype db build`, the unified +// pull -> write -> package pipeline. The shape mirrors grype-db's historical +// configuration (provider/pull/build/package) so that existing config files +// remain familiar; individual phases can be skipped via --skip. +type DatabaseBuild struct { + // build-time options (covers the "write" phase) + SchemaVersion int `yaml:"schema-version" json:"schema-version" mapstructure:"schema-version"` + Dir string `yaml:"dir" json:"dir" mapstructure:"dir"` + BatchSize int `yaml:"batch-size" json:"batch-size" mapstructure:"batch-size"` + IncludeCPEParts []string `yaml:"include-cpe-parts" json:"include-cpe-parts" mapstructure:"include-cpe-parts"` + InferNVDFixVersions bool `yaml:"infer-nvd-fix-versions" json:"infer-nvd-fix-versions" mapstructure:"infer-nvd-fix-versions"` + Hydrate bool `yaml:"hydrate" json:"hydrate" mapstructure:"hydrate"` + FailOnMissingFixDate bool `yaml:"fail-on-missing-fix-date" json:"fail-on-missing-fix-date" mapstructure:"fail-on-missing-fix-date"` + + // pipeline control + Skip []string `yaml:"skip" json:"skip" mapstructure:"skip"` + + // archive options (covers the "package" phase) + ArchiveExtension string `yaml:"archive-extension" json:"archive-extension" mapstructure:"archive-extension"` + CompressorCommands map[string]string `yaml:"compressor-commands" json:"compressor-commands" mapstructure:"compressor-commands"` + + // nested config for the pull phase + providers + Pull DatabaseBuildPull `yaml:"pull" json:"pull" mapstructure:"pull"` + Provider DatabaseBuildProvider `yaml:"provider" json:"provider" mapstructure:"provider"` +} + +type DatabaseBuildPull struct { + Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` +} + +type DatabaseBuildProvider struct { + Root string `yaml:"root" json:"root" mapstructure:"root"` + IncludeFilter []string `yaml:"include-filter" json:"include-filter" mapstructure:"include-filter"` + Vunnel DatabaseBuildVunnel `yaml:"vunnel" json:"vunnel" mapstructure:"vunnel"` + Configs []pull.ProviderRunConfig `yaml:"configs" json:"configs" mapstructure:"configs"` +} + +type DatabaseBuildVunnel struct { + Config string `yaml:"config" json:"config" mapstructure:"config"` + Executor string `yaml:"executor" json:"executor" mapstructure:"executor"` + DockerImage string `yaml:"docker-image" json:"docker-image" mapstructure:"docker-image"` + DockerTag string `yaml:"docker-tag" json:"docker-tag" mapstructure:"docker-tag"` + GenerateConfigs bool `yaml:"generate-configs" json:"generate-configs" mapstructure:"generate-configs"` + ExcludeProviders []string `yaml:"exclude-providers" json:"exclude-providers" mapstructure:"exclude-providers"` + Env map[string]string `yaml:"env,omitempty" json:"env,omitempty" mapstructure:"env"` +} + +var _ interface { + clio.FlagAdder + clio.FieldDescriber +} = (*DatabaseBuild)(nil) + +func DefaultDatabaseBuild() *DatabaseBuild { + return &DatabaseBuild{ + SchemaVersion: db.DefaultSchemaVersion, + Dir: "./build", + BatchSize: db.DefaultBatchSize, + IncludeCPEParts: []string{"a", "h", "o"}, + InferNVDFixVersions: true, + Hydrate: false, + FailOnMissingFixDate: false, + Skip: nil, + Pull: DatabaseBuildPull{ + Parallelism: 4, + }, + Provider: DatabaseBuildProvider{ + Root: "./data", + Vunnel: DatabaseBuildVunnel{ + Executor: "docker", + DockerImage: "ghcr.io/anchore/vunnel", + DockerTag: "latest", + GenerateConfigs: false, + ExcludeProviders: []string{"centos"}, + }, + }, + } +} + +func (o *DatabaseBuild) AddFlags(flags clio.FlagSet) { + flags.IntVarP(&o.SchemaVersion, "schema", "s", + "DB schema version to build for") + + flags.StringVarP(&o.Dir, "dir", "d", + "directory where the database is written") + + flags.BoolVarP(&o.Provider.Vunnel.GenerateConfigs, "generate-providers-from-vunnel", "g", + "generate provider configs from 'vunnel list' output") + + flags.StringVarP(&o.ArchiveExtension, "archive-extension", "e", + "override the extension used during DB archiving (default chosen by the DB schema, typically 'tar.zst')") + + flags.StringArrayVarP(&o.Skip, "skip", "", + "comma-separated phases of the build pipeline to skip; one or more of: pull, validate, write, package") + + flags.StringArrayVarP(&o.Provider.IncludeFilter, "provider-name", "p", + "one or more provider names to filter the build to (default: empty = all)") +} + +func (o *DatabaseBuild) DescribeFields(d clio.FieldDescriptionSet) { + d.Add(&o.SchemaVersion, `DB schema version to build for`) + d.Add(&o.Dir, `directory to write the built SQLite DB into`) + d.Add(&o.BatchSize, `number of database operations to batch before flushing to disk +(balances throughput with memory usage; 0 = library default)`) + d.Add(&o.IncludeCPEParts, `CPE parts (a, h, o) to include when emitting CPE-based vulnerability matches`) + d.Add(&o.InferNVDFixVersions, `derive missing NVD fix versions from CVE configurations when building the DB`) + d.Add(&o.Hydrate, `populate post-build derived data (only applies for schemas > 5)`) + d.Add(&o.FailOnMissingFixDate, `fail the build if any fix entry lacks a known available date`) + d.Add(&o.Skip, `phases of the build pipeline to skip (pull, validate, write, package)`) + d.Add(&o.ArchiveExtension, `archive extension used during DB packaging; empty means the schema default`) + d.Add(&o.CompressorCommands, `external commands to use for compressing archives, keyed by extension`) + + d.Add(&o.Pull.Parallelism, `number of vulnerability providers to update concurrently during the pull phase`) + + d.Add(&o.Provider.Root, `directory holding the vulnerability provider workspace (see vunnel provider-workspace-state schema)`) + d.Add(&o.Provider.IncludeFilter, `restrict the build to these provider names (empty = include all)`) + d.Add(&o.Provider.Configs, `manually crafted provider configurations (advanced use only)`) + + d.Add(&o.Provider.Vunnel.Config, `path to a vunnel configuration file to mount/use when running vunnel`) + d.Add(&o.Provider.Vunnel.Executor, `how to run vunnel: 'docker' (default), 'podman', or 'local' (use vunnel from $PATH)`) + d.Add(&o.Provider.Vunnel.DockerImage, `docker image to use when running vunnel via docker/podman`) + d.Add(&o.Provider.Vunnel.DockerTag, `image tag for the vunnel docker image`) + d.Add(&o.Provider.Vunnel.GenerateConfigs, `generate additional provider configurations from 'vunnel list' output`) + d.Add(&o.Provider.Vunnel.ExcludeProviders, `providers to exclude from 'vunnel list' output (only when generate-configs is true)`) + d.Add(&o.Provider.Vunnel.Env, `environment variables to pass to the vunnel process`) +} diff --git a/grype/db/build/providers/external/log_writer.go b/grype/db/build/providers/external/log_writer.go new file mode 100644 index 00000000000..aeaadf665b2 --- /dev/null +++ b/grype/db/build/providers/external/log_writer.go @@ -0,0 +1,114 @@ +package external + +import ( + "fmt" + "regexp" + "strings" + + "github.com/anchore/grype/internal/log" +) + +var ( + logLevelPattern = regexp.MustCompile(`^(?P.*)\[(?PTRACE|DEBUG|INFO|WARN|WARNING|ERROR)\s?\] (?P.*)$`) + + // The provider logging level can be independently controlled via vunnel config, + // so the default if no log level could be parsed should be info + defaultLogLevel = "INFO" +) + +type logWriter struct { + name string +} + +func newLogWriter(name string) *logWriter { + return &logWriter{ + name: name, + } +} + +// matchNamedCaptureGroups takes a regular expression and string and returns all of the named capture group results in a map. +// This is only for the first match in the regex. Callers shouldn't be providing regexes with multiple capture groups with the same name. +func matchNamedCaptureGroups(regEx *regexp.Regexp, content string) map[string]string { + // note: we are looking across all matches and stopping on the first non-empty match. Why? Take the following example: + // input: "cool something to match against" pattern: `((?Pmatch) (?Pagainst))?`. Since the pattern is + // encapsulated in an optional capture group, there will be results for each character, but the results will match + // on nothing. The only "true" match will be at the end ("match against"). + allMatches := regEx.FindAllStringSubmatch(content, -1) + var results map[string]string + for _, match := range allMatches { + for nameIdx, name := range regEx.SubexpNames() { + if nameIdx > len(match) || len(name) == 0 { + continue + } + if results == nil { + results = make(map[string]string) + } + results[name] = match[nameIdx] + } + if !isEmptyMap(results) { + break + } + } + return results +} + +func isEmptyMap(m map[string]string) bool { + if len(m) == 0 { + return true + } + for _, value := range m { + if value != "" { + return false + } + } + return true +} + +func processLogLine(line string) (string, string) { + line = strings.TrimRight(line, "\n") + groups := matchNamedCaptureGroups(logLevelPattern, line) + + level, ok := groups["level"] + if !ok || level == "" { + return defaultLogLevel, line + } + + prefix, ok := groups["prefix"] + if !ok { + return defaultLogLevel, line + } + + suffix, ok := groups["suffix"] + if !ok { + return defaultLogLevel, line + } + + message := fmt.Sprintf("%s%s", prefix, suffix) + return strings.ToUpper(level), message +} + +func (lw logWriter) Write(p []byte) (n int, err error) { + for _, line := range strings.Split(string(p), "\n") { + level, line := processLogLine(line) + if line != "" { + message := fmt.Sprintf("[%s]", lw.name) + line + + switch level { + case "TRACE": + log.Trace(message) + case "DEBUG": + log.Debug(message) + case "INFO": + log.Info(message) + case "WARN", "WARNING": + log.Warn(message) + case "ERROR": + log.Error(message) + default: + log.Info(message) + } + } + } + + return len(p), nil +} diff --git a/grype/db/build/providers/external/log_writer_test.go b/grype/db/build/providers/external/log_writer_test.go new file mode 100644 index 00000000000..534406ef063 --- /dev/null +++ b/grype/db/build/providers/external/log_writer_test.go @@ -0,0 +1,67 @@ +package external + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLogWriter_processLogLine(t *testing.T) { + tests := []struct { + name string + line string + expectedLevel string + expectedMessage string + }{ + { + name: "default log level", + line: `\033[0maggregating vulnerability data providers=[rhel]`, + expectedLevel: defaultLogLevel, + expectedMessage: `\033[0maggregating vulnerability data providers=[rhel]`, + }, + { + name: "info log level", + line: `\033[0m[INFO ] aggregating vulnerability data providers=[rhel]`, + expectedLevel: "INFO", + expectedMessage: `\033[0maggregating vulnerability data providers=[rhel]`, + }, + { + name: "warning log level", + line: `blah [WARNING] something could be going wrong`, + expectedLevel: "WARNING", + expectedMessage: `blah something could be going wrong`, + }, + { + name: "warn log level", + line: `blah [WARN ] something could be going wrong`, + expectedLevel: "WARN", + expectedMessage: `blah something could be going wrong`, + }, + { + name: "debug log level", + line: `abcdefg [DEBUG] jasdklfjlaksdjflksadj`, + expectedLevel: "DEBUG", + expectedMessage: `abcdefg jasdklfjlaksdjflksadj`, + }, + { + name: "trace log level", + line: `[TRACE] -----^^^^^`, + expectedLevel: "TRACE", + expectedMessage: `-----^^^^^`, + }, + { + name: "error log level", + line: `[ERROR] something bad happened`, + expectedLevel: "ERROR", + expectedMessage: `something bad happened`, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + level, message := processLogLine(test.line) + assert.Equal(t, level, test.expectedLevel) + assert.Equal(t, message, test.expectedMessage) + }) + } +} diff --git a/grype/db/build/providers/external/provider.go b/grype/db/build/providers/external/provider.go new file mode 100644 index 00000000000..9eaf236ef53 --- /dev/null +++ b/grype/db/build/providers/external/provider.go @@ -0,0 +1,114 @@ +package external + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/google/shlex" + + "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" + "github.com/anchore/grype/internal/redact" +) + +var _ provider.Reader = (*pvdr)(nil) + +const Kind provider.Kind = "external" + +type Config struct { + Cmd string `yaml:"cmd" json:"cmd" mapstructure:"cmd"` + ExecDir string `yaml:"dir,omitempty" json:"dir,omitempty" mapstructure:"dir"` + State string `yaml:"state" json:"state" mapstructure:"state"` + Env map[string]string `yaml:"env,omitempty" json:"env,omitempty" mapstructure:"env"` +} + +func (c Config) Redact() { + if c.Env == nil { + return + } + for _, v := range c.Env { + // note: we don't know which env vars are sensitive, so we assume all are + redact.Add(v) + } +} + +type pvdr struct { + id provider.Identifier + cfg Config + root string +} + +func NewProvider(root string, id provider.Identifier, cfg Config) provider.Reader { + return &pvdr{ + id: id, + cfg: cfg, + root: root, + } +} + +func (p pvdr) ID() provider.Identifier { + return p.id +} + +func (p pvdr) State() (*provider.State, error) { + return provider.ReadState(filepath.Join(p.root, p.cfg.State)) +} + +func (p pvdr) Update(ctx context.Context) error { + if err := run(ctx, p.cfg.Cmd, p.cfg.ExecDir, p.ID().Name, p.cfg.Env); err != nil { + return fmt.Errorf("failed to pull data from %q provider: %w", p.id.Name, err) + } + return nil +} + +func run(ctx context.Context, cmd, dir, name string, env map[string]string) error { + log.WithFields("provider", name, "dir", dir).Tracef("running external provider: %q", cmd) + + parsedArgs, err := shlex.Split(cmd) + if err != nil { + return fmt.Errorf("unable to parse shell arguments %q: %w", cmd, err) + } + + if len(parsedArgs) == 0 { + return fmt.Errorf("no command specified") + } + cmdStr := parsedArgs[0] + var args []string + if len(parsedArgs) > 1 { + args = parsedArgs[1:] + } + cmdObj := exec.CommandContext(ctx, cmdStr, args...) + cmdObj.Dir = dir + cmdObj.Env = append(cmdObj.Env, envMapToSlice(env)...) + + cmdObj.Stdout = newLogWriter(name) + cmdObj.Stderr = newLogWriter(name) + + if err := cmdObj.Run(); err != nil { + if exitError, ok := err.(*exec.ExitError); ok { //nolint: errorlint + return fmt.Errorf("command failed: %d", exitError.ExitCode()) + } + return err + } + + return nil +} + +func envMapToSlice(env map[string]string) (envList []string) { + for key, val := range env { + if key == "" { + continue + } + if strings.HasPrefix(val, "$") { + val = os.Getenv(val[1:]) + // for safety, assume that all values from environment variables are sensitive + redact.Add(val) + } + envList = append(envList, fmt.Sprintf("%s=%s", key, val)) + } + return +} diff --git a/grype/db/build/providers/providers.go b/grype/db/build/providers/providers.go new file mode 100644 index 00000000000..a0a8ee9af44 --- /dev/null +++ b/grype/db/build/providers/providers.go @@ -0,0 +1,70 @@ +package providers + +import ( + "fmt" + + "github.com/go-viper/mapstructure/v2" + + "github.com/anchore/grype/grype/db/build/providers/external" + "github.com/anchore/grype/grype/db/build/providers/vunnel" + "github.com/anchore/grype/grype/db/build/pull" + "github.com/anchore/grype/grype/db/provider" +) + +var ErrNoProviders = fmt.Errorf("no providers configured") + +func New(root string, vCfg vunnel.Config, cfgs ...pull.ProviderRunConfig) (provider.Providers, error) { + var providers []provider.Reader + var eolProviders []provider.Reader + + if vCfg.GenerateConfigs { + generatedCfgs, err := vunnel.GenerateConfigs(root, vCfg) + if err != nil { + return nil, fmt.Errorf("unable to generate vunnel providers: %w", err) + } + cfgs = append(cfgs, generatedCfgs...) + } + + if len(cfgs) == 0 { + return nil, ErrNoProviders + } + + for _, cfg := range cfgs { + p, err := newProvider(root, vCfg, cfg) + if err != nil { + return nil, err + } + switch p.ID().Name { + case "nvd": + // it is important that NVD is processed first since other providers depend on the severity information from these records + providers = append([]provider.Reader{p}, providers...) + case "eol": + // EOL provider must run last since it needs OperatingSystem records to exist (created by other providers) + eolProviders = append(eolProviders, p) + default: + providers = append(providers, p) + } + } + + // append EOL providers at the end + providers = append(providers, eolProviders...) + + return providers, nil +} + +func newProvider(root string, vCfg vunnel.Config, cfg pull.ProviderRunConfig) (provider.Reader, error) { + switch cfg.Kind { + case vunnel.Kind, "": // note: this is the default + return vunnel.NewProvider(root, cfg.Identifier, vCfg), nil + case external.Kind: + var c external.Config + if err := mapstructure.Decode(cfg.Config, &c); err != nil { + return nil, fmt.Errorf("failed to decode external provider config: %w", err) + } + return external.NewProvider(root, cfg.Identifier, c), nil + case "internal": // reserved, not implemented (golang vulnerability data providers in-repo) + return nil, fmt.Errorf("internal providers not yet implemented") + default: + return nil, fmt.Errorf("unknown provider kind %q", cfg.Kind) + } +} diff --git a/grype/db/build/providers/vunnel/provider.go b/grype/db/build/providers/vunnel/provider.go new file mode 100644 index 00000000000..28b7d57e201 --- /dev/null +++ b/grype/db/build/providers/vunnel/provider.go @@ -0,0 +1,193 @@ +package vunnel + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/google/shlex" + "github.com/scylladb/go-set/strset" + + "github.com/anchore/grype/grype/db/build/providers/external" + "github.com/anchore/grype/grype/db/build/pull" + "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" + "github.com/anchore/grype/internal/redact" +) + +const Kind provider.Kind = "vunnel" // special case of external + +type Config struct { + Config string `yaml:"config" json:"config" mapstructure:"config"` + Executor string `yaml:"executor" json:"executor" mapstructure:"executor"` + DockerImage string `yaml:"docker-image" json:"docker-image" mapstructure:"docker-image"` + DockerTag string `yaml:"docker-tag" json:"docker-tag" mapstructure:"docker-tag"` + GenerateConfigs bool `yaml:"generate-configs" json:"generate-configs" mapstructure:"generate-configs"` + ExcludeProviders []string `yaml:"exclude-providers" json:"exclude-providers" mapstructure:"exclude-providers"` + Env map[string]string `yaml:"env,omitempty" json:"env,omitempty" mapstructure:"env"` +} + +func (c Config) Redact() { + if c.Env == nil { + return + } + for _, v := range c.Env { + // note: we don't know which env vars are sensitive, so we assume all are + redact.Add(v) + } +} + +func NewProvider(root string, id provider.Identifier, cfg Config) provider.Reader { + return external.NewProvider(root, id, + external.Config{ + Cmd: getRunCommand(root, id, cfg), + State: fmt.Sprintf("%s/metadata.json", id.Name), + Env: cfg.Env, + }, + ) +} + +func getRunCommand(root string, id provider.Identifier, cfg Config) string { + switch cfg.Executor { + case "docker", "podman": + dataRootCtr := root + if !strings.HasPrefix(root, "/") { + dataRootCtr = strings.TrimPrefix(root, "./") + } + + dataRootHost, err := filepath.Abs(root) + if err != nil { + log.WithFields("error", err).Warn("unable to get absolute path for provider root directory, using relative path") + dataRootHost = root + } + + var cfgVol string + if _, err := os.Stat(".vunnel.yaml"); !os.IsNotExist(err) { + cwd, err := os.Getwd() + if err != nil { + log.WithFields("error", err, "provider", id.Name).Warn("unable to get current working directory, ignoring vunnel config") + } else { + cfgVol = fmt.Sprintf("-v %s/.vunnel.yaml:/.vunnel.yaml", cwd) + } + } + + var envStr string + if cfg.Env != nil { + for k, v := range cfg.Env { + if strings.HasPrefix(v, "$") { + v = os.Getenv(v[1:]) + // for safety, assume that all values from environment variables are sensitive + redact.Add(v) + } + envStr += fmt.Sprintf("-e %s=%s ", k, v) + } + } + + return fmt.Sprintf("%s run --rm -t -v %s:/%s %s %s %s:%s run %s", cfg.Executor, dataRootHost, dataRootCtr, cfgVol, envStr, cfg.DockerImage, cfg.DockerTag, id.Name) + } + + var cfgSection string + if cfg.Config != "" { + cfgSection = fmt.Sprintf("-c %s", cfg.Config) + } + + return fmt.Sprintf("vunnel %s run %s", cfgSection, id.Name) +} + +func getListCommand(root string, cfg Config) string { + switch cfg.Executor { + case "docker", "podman": + dataRootCtr := root + if !strings.HasPrefix(root, "/") { + dataRootCtr = strings.TrimPrefix(root, "./") + } + + dataRootHost, err := filepath.Abs(root) + if err != nil { + log.WithFields("error", err).Warn("unable to get absolute path for provider root directory, using relative path") + dataRootHost = root + } + + var cfgVol string + if _, err := os.Stat(".vunnel.yaml"); !os.IsNotExist(err) { + cwd, err := os.Getwd() + if err != nil { + log.WithFields("error", err).Warn("unable to get current working directory, ignoring vunnel config") + } else { + cfgVol = fmt.Sprintf("-v %s/.vunnel.yaml:/.vunnel.yaml", cwd) + } + } + + var envStr string + if cfg.Env != nil { + for k, v := range cfg.Env { + if strings.HasPrefix(v, "$") { + v = os.Getenv(v[1:]) + // for safety, assume that all values from environment variables are sensitive + redact.Add(v) + } + envStr += fmt.Sprintf("-e %s=%s ", k, v) + } + } + + return fmt.Sprintf("%s run --rm -t -v %s:/%s %s %s %s:%s list", cfg.Executor, dataRootHost, dataRootCtr, cfgVol, envStr, cfg.DockerImage, cfg.DockerTag) + } + + var cfgSection string + if cfg.Config != "" { + cfgSection = fmt.Sprintf("-c %s", cfg.Config) + } + + return fmt.Sprintf("vunnel %s list", cfgSection) +} + +func GenerateConfigs(root string, cfg Config) ([]pull.ProviderRunConfig, error) { + cmdStr := getListCommand(root, cfg) + cmdList, err := shlex.Split(cmdStr) + if err != nil { + return nil, err + } + cmd, args := cmdList[0], cmdList[1:] + + cmdObj := exec.Command(cmd, args...) + sb := strings.Builder{} + cmdObj.Stderr = &sb + out, err := cmdObj.Output() + if err != nil { + if sb.Len() > 0 { + log.Errorf("vunnel list failed: %s", sb.String()) + } + return nil, fmt.Errorf("unable to execute vunnel list: %w", err) + } + + lines := strings.Split(string(out), "\n") + excludeSet := strset.New(cfg.ExcludeProviders...) + + var cfgs []pull.ProviderRunConfig + for _, line := range lines { + line = strings.TrimSpace(line) + if line == "" { + continue + } + if strings.Contains(line, " ") || strings.Contains(line, ":") || strings.Contains(line, "[") { + log.WithFields("value", line).Trace("provider name appears to be invalid, skipping") + continue + } + + if excludeSet.Has(line) { + log.WithFields("provider", line).Trace("skipping config") + continue + } + log.WithFields("provider", line).Trace("including config") + cfgs = append(cfgs, pull.ProviderRunConfig{ + Identifier: provider.Identifier{ + Name: line, + Kind: Kind, + }, + }) + } + + return cfgs, nil +} diff --git a/grype/db/build/pull/provider_run_config.go b/grype/db/build/pull/provider_run_config.go new file mode 100644 index 00000000000..ef8621dec07 --- /dev/null +++ b/grype/db/build/pull/provider_run_config.go @@ -0,0 +1,25 @@ +package pull + +import ( + "github.com/anchore/grype/grype/db/provider" +) + +// Redactable is implemented by provider configs whose contents should have any +// sensitive values registered with the application redaction store before logging. +type Redactable interface { + Redact() +} + +type ProviderRunConfig struct { + provider.Identifier `yaml:",inline" mapstructure:",squash"` + Config any `yaml:"config,omitempty" json:"config" mapstructure:"config"` +} + +func (c ProviderRunConfig) Redact() { + if c.Config == nil { + return + } + if r, ok := c.Config.(Redactable); ok { + r.Redact() + } +} diff --git a/grype/db/build/pull/pull.go b/grype/db/build/pull/pull.go new file mode 100644 index 00000000000..888be104469 --- /dev/null +++ b/grype/db/build/pull/pull.go @@ -0,0 +1,71 @@ +package pull + +import ( + "context" + "sync" + + "github.com/hashicorp/go-multierror" + "golang.org/x/sync/semaphore" + + "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" +) + +type Config struct { + Parallelism int + Collection provider.Collection +} + +func Pull(cfg Config) error { + var names []string + for _, p := range cfg.Collection.Providers { + names = append(names, p.ID().Name) + } + log.WithFields("providers", names).Info("aggregating vulnerability data") + + var wg sync.WaitGroup + sem := semaphore.NewWeighted(int64(cfg.Parallelism)) + + ctx := context.Background() + + var errs error + var errsLock sync.Mutex + updateErrs := func(err error) { + if err != nil { + errsLock.Lock() + defer errsLock.Unlock() + errs = multierror.Append(errs, err) + } + } + + for _, p := range cfg.Collection.Providers { + writer, ok := p.(provider.Writer) + if !ok { + log.WithFields("provider", p.ID().Name).Warn("provider does not implement Writer") + continue + } + if err := sem.Acquire(ctx, 1); err != nil { + updateErrs(err) + break + } + if errs != nil { + // note: we don't cancel the context to stop existing provider updates. Why? this may leave otherwise + // valid providers in a bad state. Instead, we just let the other providers that have already been started + // to finish and return the error from the failed provider. + log.WithFields("error", errs).Error("provider update failed, waiting for already started provider updates to finish before exiting...") + break + } + wg.Add(1) + go func(prov provider.Writer, name string) { + defer sem.Release(1) + defer wg.Done() + log.WithFields("provider", name).Info("running vulnerability provider") + updateErrs(prov.Update(ctx)) + }(writer, p.ID().Name) + } + + log.Trace("all providers started, waiting for graceful completion...") + wg.Wait() + + return errs +} From 4a126421967809bd11fe99bff748d036b82f7ac1 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 10:14:22 -0400 Subject: [PATCH 2/8] db build: support -p as standalone provider filter; yaml-safe map fields - when no explicit configs and no -g, synthesize a vunnel provider config per -p name so 'db build -p alpine -p alma' works against on-disk data - PostLoad flattens csv in -p and --skip so '-p alpine,alma' matches --from - stringMap renders inline yaml ({} / {k: v}) for Env and CompressorCommands so 'grype config' output is valid yaml instead of go's map[] format Signed-off-by: Will Murphy Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/grype/cli/commands/db_build.go | 31 ++++++++++--- cmd/grype/cli/options/database_build.go | 60 +++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 8 deletions(-) diff --git a/cmd/grype/cli/commands/db_build.go b/cmd/grype/cli/commands/db_build.go index 2769e406a65..31691c4b968 100644 --- a/cmd/grype/cli/commands/db_build.go +++ b/cmd/grype/cli/commands/db_build.go @@ -153,18 +153,39 @@ func buildProviders(opts *options.DatabaseBuild) (dbprovider.Providers, error) { DockerTag: opts.Provider.Vunnel.DockerTag, GenerateConfigs: opts.Provider.Vunnel.GenerateConfigs, ExcludeProviders: opts.Provider.Vunnel.ExcludeProviders, - Env: opts.Provider.Vunnel.Env, + Env: map[string]string(opts.Provider.Vunnel.Env), } - pvdrs, err := providers.New(opts.Provider.Root, vCfg, opts.Provider.Configs...) + cfgs := append([]pull.ProviderRunConfig(nil), opts.Provider.Configs...) + + // If the user passed -p but didn't supply explicit configs and didn't ask + // to enumerate via `vunnel list` (-g), treat each -p value as a vunnel + // provider config. This lets `-p alpine -p alma` work on its own when the + // provider data already exists on disk under provider.root. + if len(cfgs) == 0 && !vCfg.GenerateConfigs && len(opts.Provider.IncludeFilter) > 0 { + for _, name := range opts.Provider.IncludeFilter { + cfgs = append(cfgs, pull.ProviderRunConfig{ + Identifier: dbprovider.Identifier{ + Name: name, + Kind: vunnel.Kind, + }, + }) + } + log.WithFields("providers", opts.Provider.IncludeFilter).Debug("synthesized vunnel provider configs from --provider-name") + } + + pvdrs, err := providers.New(opts.Provider.Root, vCfg, cfgs...) if err != nil { if errors.Is(err, providers.ErrNoProviders) { - log.Error("configure a provider via the application config or use -g to generate a list of configs from vunnel") + log.Error("configure a provider via the application config, pass -p for each provider, or use -g to enumerate them via vunnel list") } return nil, fmt.Errorf("unable to create providers: %w", err) } - if len(opts.Provider.IncludeFilter) > 0 { + // Only run the post-filter when configs or -g produced the provider set; + // when -p synthesized them above, the filter is implicit. + hadExplicitSources := vCfg.GenerateConfigs || len(opts.Provider.Configs) > 0 + if hadExplicitSources && len(opts.Provider.IncludeFilter) > 0 { log.WithFields("keep-only", opts.Provider.IncludeFilter).Debug("filtering providers by name") pvdrs = pvdrs.Filter(opts.Provider.IncludeFilter...) } @@ -215,7 +236,7 @@ func runWritePhase(opts *options.DatabaseBuild, pvdrs dbprovider.Providers, skip func runPackagePhase(opts *options.DatabaseBuild) error { // v5 DB writing (and its corresponding listing.json) is no longer supported via this command; // publish-base-url is intentionally omitted. - return db.Package(opts.Dir, "", opts.ArchiveExtension, opts.CompressorCommands) + return db.Package(opts.Dir, "", opts.ArchiveExtension, map[string]string(opts.CompressorCommands)) } func providerStates(skipValidation bool, providers []dbprovider.Reader) ([]dbprovider.State, error) { diff --git a/cmd/grype/cli/options/database_build.go b/cmd/grype/cli/options/database_build.go index 549e83b8950..ebb6b8248c0 100644 --- a/cmd/grype/cli/options/database_build.go +++ b/cmd/grype/cli/options/database_build.go @@ -1,11 +1,37 @@ package options import ( + "fmt" + "sort" + "strings" + "github.com/anchore/clio" "github.com/anchore/grype/grype/db" "github.com/anchore/grype/grype/db/build/pull" ) +// stringMap is a string->string map that renders as inline YAML (e.g. "{}" or +// "{k: v, k2: v2}") when formatted with %v, so that `grype config` produces +// output that is itself valid YAML. The default Go formatter for map types +// would emit "map[]" which round-trips back through YAML as a string. +type stringMap map[string]string + +func (m stringMap) String() string { + if len(m) == 0 { + return "{}" + } + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + parts := make([]string, 0, len(m)) + for _, k := range keys { + parts = append(parts, fmt.Sprintf("%s: %s", k, m[k])) + } + return "{" + strings.Join(parts, ", ") + "}" +} + // DatabaseBuild holds the configuration for `grype db build`, the unified // pull -> write -> package pipeline. The shape mirrors grype-db's historical // configuration (provider/pull/build/package) so that existing config files @@ -24,8 +50,8 @@ type DatabaseBuild struct { Skip []string `yaml:"skip" json:"skip" mapstructure:"skip"` // archive options (covers the "package" phase) - ArchiveExtension string `yaml:"archive-extension" json:"archive-extension" mapstructure:"archive-extension"` - CompressorCommands map[string]string `yaml:"compressor-commands" json:"compressor-commands" mapstructure:"compressor-commands"` + ArchiveExtension string `yaml:"archive-extension" json:"archive-extension" mapstructure:"archive-extension"` + CompressorCommands stringMap `yaml:"compressor-commands" json:"compressor-commands" mapstructure:"compressor-commands"` // nested config for the pull phase + providers Pull DatabaseBuildPull `yaml:"pull" json:"pull" mapstructure:"pull"` @@ -50,14 +76,40 @@ type DatabaseBuildVunnel struct { DockerTag string `yaml:"docker-tag" json:"docker-tag" mapstructure:"docker-tag"` GenerateConfigs bool `yaml:"generate-configs" json:"generate-configs" mapstructure:"generate-configs"` ExcludeProviders []string `yaml:"exclude-providers" json:"exclude-providers" mapstructure:"exclude-providers"` - Env map[string]string `yaml:"env,omitempty" json:"env,omitempty" mapstructure:"env"` + Env stringMap `yaml:"env,omitempty" json:"env,omitempty" mapstructure:"env"` } var _ interface { clio.FlagAdder clio.FieldDescriber + clio.PostLoader } = (*DatabaseBuild)(nil) +// PostLoad flattens any comma-separated entries in --provider-name and --skip +// so that "-p alpine,alma,rhel" behaves the same as "-p alpine -p alma -p rhel" +// (matching the convention used by grype's --from flag). +func (o *DatabaseBuild) PostLoad() error { + o.Provider.IncludeFilter = flattenCSV(o.Provider.IncludeFilter) + o.Skip = flattenCSV(o.Skip) + return nil +} + +func flattenCSV(in []string) []string { + if len(in) == 0 { + return in + } + var out []string + for _, v := range in { + for _, s := range strings.Split(v, ",") { + s = strings.TrimSpace(s) + if s != "" { + out = append(out, s) + } + } + } + return out +} + func DefaultDatabaseBuild() *DatabaseBuild { return &DatabaseBuild{ SchemaVersion: db.DefaultSchemaVersion, @@ -68,6 +120,7 @@ func DefaultDatabaseBuild() *DatabaseBuild { Hydrate: false, FailOnMissingFixDate: false, Skip: nil, + CompressorCommands: stringMap{}, Pull: DatabaseBuildPull{ Parallelism: 4, }, @@ -79,6 +132,7 @@ func DefaultDatabaseBuild() *DatabaseBuild { DockerTag: "latest", GenerateConfigs: false, ExcludeProviders: []string{"centos"}, + Env: stringMap{}, }, }, } From 35b69bb04c6442883f57e7b92285d90db73c8c05 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 10:33:57 -0400 Subject: [PATCH 3/8] wip: phase 1 migrage db build Signed-off-by: Will Murphy --- cmd/grype/cli/cli.go | 1 + cmd/grype/cli/commands/db.go | 1 - cmd/grype/cli/commands/db_build.go | 270 ------------------- cmd/grype/cli/commands/db_build_test.go | 71 ----- cmd/grype/cli/commands/db_builder.go | 38 +++ cmd/grype/cli/commands/db_builder_build.go | 76 ++++++ cmd/grype/cli/commands/db_builder_common.go | 119 ++++++++ cmd/grype/cli/commands/db_builder_package.go | 47 ++++ cmd/grype/cli/commands/db_builder_pull.go | 61 +++++ cmd/grype/cli/options/database_build.go | 33 ++- 10 files changed, 357 insertions(+), 360 deletions(-) delete mode 100644 cmd/grype/cli/commands/db_build.go delete mode 100644 cmd/grype/cli/commands/db_build_test.go create mode 100644 cmd/grype/cli/commands/db_builder.go create mode 100644 cmd/grype/cli/commands/db_builder_build.go create mode 100644 cmd/grype/cli/commands/db_builder_common.go create mode 100644 cmd/grype/cli/commands/db_builder_package.go create mode 100644 cmd/grype/cli/commands/db_builder_pull.go diff --git a/cmd/grype/cli/cli.go b/cmd/grype/cli/cli.go index 924cab85722..f8d98b5bc08 100644 --- a/cmd/grype/cli/cli.go +++ b/cmd/grype/cli/cli.go @@ -104,6 +104,7 @@ func create(id clio.Identification) (clio.Application, *cobra.Command) { // add sub-commands rootCmd.AddCommand( commands.DB(app), + commands.DBBuilder(app), commands.Completion(app), commands.Explain(app), clio.VersionCommand(id, syftVersion, dbVersion), diff --git a/cmd/grype/cli/commands/db.go b/cmd/grype/cli/commands/db.go index 6bcd883a082..e1328cd745b 100644 --- a/cmd/grype/cli/commands/db.go +++ b/cmd/grype/cli/commands/db.go @@ -19,7 +19,6 @@ func DB(app clio.Application) *cobra.Command { } db.AddCommand( - DBBuild(app), DBCheck(app), DBDelete(app), DBImport(app), diff --git a/cmd/grype/cli/commands/db_build.go b/cmd/grype/cli/commands/db_build.go deleted file mode 100644 index 31691c4b968..00000000000 --- a/cmd/grype/cli/commands/db_build.go +++ /dev/null @@ -1,270 +0,0 @@ -package commands - -import ( - "errors" - "fmt" - "os" - "strings" - - "github.com/scylladb/go-set/strset" - "github.com/spf13/cobra" - - "github.com/anchore/clio" - "github.com/anchore/grype/cmd/grype/cli/options" - "github.com/anchore/grype/grype/db" - "github.com/anchore/grype/grype/db/build/providers" - "github.com/anchore/grype/grype/db/build/providers/vunnel" - "github.com/anchore/grype/grype/db/build/pull" - dbprovider "github.com/anchore/grype/grype/db/provider" - "github.com/anchore/grype/internal/log" -) - -const ( - skipPhasePull = "pull" - skipPhaseValidate = "validate" - skipPhaseWrite = "write" - skipPhasePackage = "package" -) - -var allSkipPhases = []string{skipPhasePull, skipPhaseValidate, skipPhaseWrite, skipPhasePackage} - -// dbBuildConfigWrapper nests the DatabaseBuild options under `db.build:` in -// the application YAML config so the schema remains coherent next to the -// existing `db:` settings used by other db commands. The command flags are -// still registered directly on DatabaseBuild via its AddFlags method. -type dbBuildConfigWrapper struct { - DB dbBuildConfigDBSection `yaml:"db" json:"db" mapstructure:"db"` -} - -type dbBuildConfigDBSection struct { - Build *options.DatabaseBuild `yaml:"build" json:"build" mapstructure:"build"` -} - -func DBBuild(app clio.Application) *cobra.Command { - opts := options.DefaultDatabaseBuild() - - cmd := &cobra.Command{ - Use: "build", - Short: "Build a vulnerability database from upstream vulnerability data", - Long: `Build a vulnerability database by running the full pull -> write -> package -pipeline. Use --skip to omit any combination of phases (pull, validate, write, -package); for example: - - grype db build --skip pull,package # build a DB from existing provider data - grype db build --skip pull,write # only package an already-built DB`, - Args: cobra.NoArgs, - PreRunE: func(cmd *cobra.Command, args []string) error { - return disableUI(app)(cmd, args) - }, - RunE: func(_ *cobra.Command, _ []string) error { - return runDBBuild(opts) - }, - } - - return app.SetupCommand(cmd, &dbBuildConfigWrapper{DB: dbBuildConfigDBSection{Build: opts}}) -} - -func runDBBuild(opts *options.DatabaseBuild) error { - skip, err := parseSkipPhases(opts.Skip) - if err != nil { - return err - } - - if err := validateCPEParts(opts.IncludeCPEParts); err != nil { - return err - } - - if opts.ArchiveExtension != "" && !strset.New("tar.gz", "tar.zst").Has(opts.ArchiveExtension) { - return fmt.Errorf("archive-extension must be 'tar.gz' or 'tar.zst'") - } - - needProviders := !skip.Has(skipPhasePull) || !skip.Has(skipPhaseWrite) - - var pvdrs dbprovider.Providers - if needProviders { - pvdrs, err = buildProviders(opts) - if err != nil { - return err - } - } - - if !skip.Has(skipPhasePull) { - if err := runPullPhase(opts, pvdrs); err != nil { - return fmt.Errorf("pull phase failed: %w", err) - } - } else { - log.Info("skipping pull phase") - } - - if !skip.Has(skipPhaseWrite) { - if err := runWritePhase(opts, pvdrs, skip.Has(skipPhaseValidate)); err != nil { - return fmt.Errorf("write phase failed: %w", err) - } - } else { - log.Info("skipping write phase") - } - - if !skip.Has(skipPhasePackage) { - if err := runPackagePhase(opts); err != nil { - return fmt.Errorf("package phase failed: %w", err) - } - } else { - log.Info("skipping package phase") - } - - return nil -} - -func parseSkipPhases(raw []string) (*strset.Set, error) { - set := strset.New() - for _, entry := range raw { - for _, p := range strings.Split(entry, ",") { - p = strings.TrimSpace(strings.ToLower(p)) - if p == "" { - continue - } - if !strset.New(allSkipPhases...).Has(p) { - return nil, fmt.Errorf("invalid --skip phase %q (allowed: %s)", p, strings.Join(allSkipPhases, ", ")) - } - set.Add(p) - } - } - return set, nil -} - -func validateCPEParts(parts []string) error { - if len(parts) == 0 { - return errors.New("no CPE parts provided") - } - validParts := strset.New("a", "o", "h") - for _, part := range parts { - if !validParts.Has(part) { - return fmt.Errorf("invalid CPE part: %s", part) - } - } - return nil -} - -func buildProviders(opts *options.DatabaseBuild) (dbprovider.Providers, error) { - vCfg := vunnel.Config{ - Config: opts.Provider.Vunnel.Config, - Executor: opts.Provider.Vunnel.Executor, - DockerImage: opts.Provider.Vunnel.DockerImage, - DockerTag: opts.Provider.Vunnel.DockerTag, - GenerateConfigs: opts.Provider.Vunnel.GenerateConfigs, - ExcludeProviders: opts.Provider.Vunnel.ExcludeProviders, - Env: map[string]string(opts.Provider.Vunnel.Env), - } - - cfgs := append([]pull.ProviderRunConfig(nil), opts.Provider.Configs...) - - // If the user passed -p but didn't supply explicit configs and didn't ask - // to enumerate via `vunnel list` (-g), treat each -p value as a vunnel - // provider config. This lets `-p alpine -p alma` work on its own when the - // provider data already exists on disk under provider.root. - if len(cfgs) == 0 && !vCfg.GenerateConfigs && len(opts.Provider.IncludeFilter) > 0 { - for _, name := range opts.Provider.IncludeFilter { - cfgs = append(cfgs, pull.ProviderRunConfig{ - Identifier: dbprovider.Identifier{ - Name: name, - Kind: vunnel.Kind, - }, - }) - } - log.WithFields("providers", opts.Provider.IncludeFilter).Debug("synthesized vunnel provider configs from --provider-name") - } - - pvdrs, err := providers.New(opts.Provider.Root, vCfg, cfgs...) - if err != nil { - if errors.Is(err, providers.ErrNoProviders) { - log.Error("configure a provider via the application config, pass -p for each provider, or use -g to enumerate them via vunnel list") - } - return nil, fmt.Errorf("unable to create providers: %w", err) - } - - // Only run the post-filter when configs or -g produced the provider set; - // when -p synthesized them above, the filter is implicit. - hadExplicitSources := vCfg.GenerateConfigs || len(opts.Provider.Configs) > 0 - if hadExplicitSources && len(opts.Provider.IncludeFilter) > 0 { - log.WithFields("keep-only", opts.Provider.IncludeFilter).Debug("filtering providers by name") - pvdrs = pvdrs.Filter(opts.Provider.IncludeFilter...) - } - - return pvdrs, nil -} - -func runPullPhase(opts *options.DatabaseBuild, pvdrs dbprovider.Providers) error { - return pull.Pull(pull.Config{ - Parallelism: opts.Pull.Parallelism, - Collection: dbprovider.Collection{ - Root: opts.Provider.Root, - Providers: pvdrs, - }, - }) -} - -func runWritePhase(opts *options.DatabaseBuild, pvdrs dbprovider.Providers, skipValidation bool) error { - if _, err := os.Stat(opts.Dir); os.IsNotExist(err) { - if err := os.MkdirAll(opts.Dir, 0755); err != nil { - return fmt.Errorf("unable to make db build dir: %w", err) - } - } - - states, err := providerStates(skipValidation, pvdrs) - if err != nil { - return fmt.Errorf("unable to get provider states: %w", err) - } - - earliest, err := dbprovider.States(states).EarliestTimestamp() - if err != nil { - return fmt.Errorf("unable to get earliest timestamp: %w", err) - } - - return db.Build(db.BuildConfig{ - SchemaVersion: opts.SchemaVersion, - Directory: opts.Dir, - States: states, - Timestamp: earliest, - IncludeCPEParts: opts.IncludeCPEParts, - InferNVDFixVersions: opts.InferNVDFixVersions, - Hydrate: opts.Hydrate, - FailOnMissingFixDate: opts.FailOnMissingFixDate, - BatchSize: opts.BatchSize, - }) -} - -func runPackagePhase(opts *options.DatabaseBuild) error { - // v5 DB writing (and its corresponding listing.json) is no longer supported via this command; - // publish-base-url is intentionally omitted. - return db.Package(opts.Dir, "", opts.ArchiveExtension, map[string]string(opts.CompressorCommands)) -} - -func providerStates(skipValidation bool, providers []dbprovider.Reader) ([]dbprovider.State, error) { - var states []dbprovider.State - log.Debug("reading all provider state") - - if len(providers) == 0 { - return nil, fmt.Errorf("no providers configured") - } - - for _, p := range providers { - log.WithFields("provider", p.ID().Name).Debug("reading state") - - sd, err := p.State() - if err != nil { - return nil, fmt.Errorf("unable to read provider state: %w", err) - } - - if !skipValidation { - log.WithFields("provider", p.ID().Name).Trace("validating state") - if err := sd.Verify(); err != nil { - return nil, fmt.Errorf("invalid provider state: %w", err) - } - } - states = append(states, *sd) - } - if !skipValidation { - log.Debugf("state validated for all providers") - } - return states, nil -} diff --git a/cmd/grype/cli/commands/db_build_test.go b/cmd/grype/cli/commands/db_build_test.go deleted file mode 100644 index 469ab13aad0..00000000000 --- a/cmd/grype/cli/commands/db_build_test.go +++ /dev/null @@ -1,71 +0,0 @@ -package commands - -import ( - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestParseSkipPhases(t *testing.T) { - tests := []struct { - name string - input []string - wantPhases []string - wantErr bool - }{ - { - name: "empty", - input: nil, - wantPhases: nil, - }, - { - name: "single comma-separated entry", - input: []string{"pull,validate,package"}, - wantPhases: []string{skipPhasePull, skipPhaseValidate, skipPhasePackage}, - }, - { - name: "multiple --skip occurrences", - input: []string{"pull", "write"}, - wantPhases: []string{skipPhasePull, skipPhaseWrite}, - }, - { - name: "mixed case and whitespace tolerated", - input: []string{" PULL , Validate "}, - wantPhases: []string{skipPhasePull, skipPhaseValidate}, - }, - { - name: "rejects unknown phase", - input: []string{"foobar"}, - wantErr: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := parseSkipPhases(tt.input) - if tt.wantErr { - require.Error(t, err) - return - } - require.NoError(t, err) - - if len(tt.wantPhases) == 0 { - assert.Equal(t, 0, got.Size()) - return - } - for _, p := range tt.wantPhases { - assert.True(t, got.Has(p), "expected %q in skip set; got %s", p, strings.Join(got.List(), ",")) - } - assert.Equal(t, len(tt.wantPhases), got.Size()) - }) - } -} - -func TestValidateCPEParts(t *testing.T) { - require.NoError(t, validateCPEParts([]string{"a", "h", "o"})) - require.Error(t, validateCPEParts(nil)) - require.Error(t, validateCPEParts([]string{})) - require.Error(t, validateCPEParts([]string{"a", "x"})) -} diff --git a/cmd/grype/cli/commands/db_builder.go b/cmd/grype/cli/commands/db_builder.go new file mode 100644 index 00000000000..620b22685b4 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder.go @@ -0,0 +1,38 @@ +package commands + +import ( + "github.com/spf13/cobra" + + "github.com/anchore/clio" +) + +// DBBuilder is the parent command for all DB-producing operations: running +// vulnerability providers (vunnel), writing the SQLite database from the +// resulting workspace, and packaging the database into a distributable +// archive. End-user "consume an existing DB" commands live under `grype db`. +func DBBuilder(app clio.Application) *cobra.Command { + cmd := &cobra.Command{ + Use: "db-builder", + Short: "Build and manage vulnerability database artifacts", + Long: `Build vulnerability databases from upstream sources. + +This command group produces vulnerability databases from provider data +(typically vunnel), writes them to SQLite, and packages them for +distribution. The canonical flow is: + + grype db-builder pull -p # refresh one provider workspace via vunnel + grype db-builder build --dir ./build # write a DB from the workspace + grype db-builder package --dir ./build # archive the DB + +For commands that operate on an already-installed database (check, update, +search, ...), see 'grype db' instead.`, + } + + cmd.AddCommand( + DBBuilderPull(app), + DBBuilderBuild(app), + DBBuilderPackage(app), + ) + + return cmd +} diff --git a/cmd/grype/cli/commands/db_builder_build.go b/cmd/grype/cli/commands/db_builder_build.go new file mode 100644 index 00000000000..8e7f8637e02 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_build.go @@ -0,0 +1,76 @@ +package commands + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/grype/db" + dbprovider "github.com/anchore/grype/grype/db/provider" +) + +// DBBuilderBuild writes a SQLite vulnerability database from on-disk +// provider workspace data. It assumes 'db-builder pull' has already +// populated the workspace; no vunnel processes are spawned by this command. +func DBBuilderBuild(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "build", + Short: "Write a vulnerability database from provider workspace data", + Long: `Read on-disk provider workspace data (typically populated by +'grype db-builder pull') and write a SQLite vulnerability database into +--dir. Does not produce an archive; use 'grype db-builder package' for that.`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderBuild(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderBuild(opts *options.DatabaseBuild) error { + if err := validateCPEParts(opts.IncludeCPEParts); err != nil { + return err + } + + pvdrs, err := buildProviders(opts) + if err != nil { + return err + } + + if _, err := os.Stat(opts.Dir); os.IsNotExist(err) { + if err := os.MkdirAll(opts.Dir, 0755); err != nil { + return fmt.Errorf("unable to make db build dir: %w", err) + } + } + + states, err := providerStates(opts.SkipValidation, pvdrs) + if err != nil { + return fmt.Errorf("unable to get provider states: %w", err) + } + + earliest, err := dbprovider.States(states).EarliestTimestamp() + if err != nil { + return fmt.Errorf("unable to get earliest timestamp: %w", err) + } + + return db.Build(db.BuildConfig{ + SchemaVersion: opts.SchemaVersion, + Directory: opts.Dir, + States: states, + Timestamp: earliest, + IncludeCPEParts: opts.IncludeCPEParts, + InferNVDFixVersions: opts.InferNVDFixVersions, + Hydrate: opts.Hydrate, + FailOnMissingFixDate: opts.FailOnMissingFixDate, + BatchSize: opts.BatchSize, + }) +} diff --git a/cmd/grype/cli/commands/db_builder_common.go b/cmd/grype/cli/commands/db_builder_common.go new file mode 100644 index 00000000000..70d2095f378 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_common.go @@ -0,0 +1,119 @@ +package commands + +import ( + "errors" + "fmt" + + "github.com/scylladb/go-set/strset" + + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/grype/db/build/providers" + "github.com/anchore/grype/grype/db/build/providers/vunnel" + "github.com/anchore/grype/grype/db/build/pull" + dbprovider "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" +) + +// dbBuilderConfigWrapper places the DatabaseBuild options under a top-level +// `db-builder:` key in the application YAML, parallel to the existing `db:` +// section used by end-user `grype db ...` commands. All three db-builder +// subcommands share the same configuration view. +type dbBuilderConfigWrapper struct { + DBBuilder *options.DatabaseBuild `yaml:"db-builder" json:"db-builder" mapstructure:"db-builder"` +} + +// buildProviders constructs the provider collection used by both the pull and +// the write subcommands. The provider set comes from one of (in order): +// - explicit provider configs in the YAML +// - -g (generate via `vunnel list`) +// - -p (synthesized vunnel provider configs) +func buildProviders(opts *options.DatabaseBuild) (dbprovider.Providers, error) { + vCfg := vunnel.Config{ + Config: opts.Provider.Vunnel.Config, + Executor: opts.Provider.Vunnel.Executor, + DockerImage: opts.Provider.Vunnel.DockerImage, + DockerTag: opts.Provider.Vunnel.DockerTag, + GenerateConfigs: opts.Provider.Vunnel.GenerateConfigs, + ExcludeProviders: opts.Provider.Vunnel.ExcludeProviders, + Env: map[string]string(opts.Provider.Vunnel.Env), + } + + cfgs := append([]pull.ProviderRunConfig(nil), opts.Provider.Configs...) + + // If the user passed -p but didn't supply explicit configs and didn't ask + // to enumerate via `vunnel list` (-g), treat each -p value as a vunnel + // provider config. This lets `-p alpine -p alma` work on its own when the + // provider data already exists on disk under provider.root. + if len(cfgs) == 0 && !vCfg.GenerateConfigs && len(opts.Provider.IncludeFilter) > 0 { + for _, name := range opts.Provider.IncludeFilter { + cfgs = append(cfgs, pull.ProviderRunConfig{ + Identifier: dbprovider.Identifier{ + Name: name, + Kind: vunnel.Kind, + }, + }) + } + log.WithFields("providers", opts.Provider.IncludeFilter).Debug("synthesized vunnel provider configs from --provider-name") + } + + pvdrs, err := providers.New(opts.Provider.Root, vCfg, cfgs...) + if err != nil { + if errors.Is(err, providers.ErrNoProviders) { + log.Error("configure a provider via the application config, pass -p for each provider, or use -g to enumerate them via vunnel list") + } + return nil, fmt.Errorf("unable to create providers: %w", err) + } + + // Only run the post-filter when configs or -g produced the provider set; + // when -p synthesized them above, the filter is implicit. + hadExplicitSources := vCfg.GenerateConfigs || len(opts.Provider.Configs) > 0 + if hadExplicitSources && len(opts.Provider.IncludeFilter) > 0 { + log.WithFields("keep-only", opts.Provider.IncludeFilter).Debug("filtering providers by name") + pvdrs = pvdrs.Filter(opts.Provider.IncludeFilter...) + } + + return pvdrs, nil +} + +func validateCPEParts(parts []string) error { + if len(parts) == 0 { + return errors.New("no CPE parts provided") + } + validParts := strset.New("a", "o", "h") + for _, part := range parts { + if !validParts.Has(part) { + return fmt.Errorf("invalid CPE part: %s", part) + } + } + return nil +} + +func providerStates(skipValidation bool, providers []dbprovider.Reader) ([]dbprovider.State, error) { + var states []dbprovider.State + log.Debug("reading all provider state") + + if len(providers) == 0 { + return nil, fmt.Errorf("no providers configured") + } + + for _, p := range providers { + log.WithFields("provider", p.ID().Name).Debug("reading state") + + sd, err := p.State() + if err != nil { + return nil, fmt.Errorf("unable to read provider state: %w", err) + } + + if !skipValidation { + log.WithFields("provider", p.ID().Name).Trace("validating state") + if err := sd.Verify(); err != nil { + return nil, fmt.Errorf("invalid provider state: %w", err) + } + } + states = append(states, *sd) + } + if !skipValidation { + log.Debugf("state validated for all providers") + } + return states, nil +} diff --git a/cmd/grype/cli/commands/db_builder_package.go b/cmd/grype/cli/commands/db_builder_package.go new file mode 100644 index 00000000000..10332a01763 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_package.go @@ -0,0 +1,47 @@ +package commands + +import ( + "fmt" + + "github.com/scylladb/go-set/strset" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/grype/db" +) + +// DBBuilderPackage archives an already-built SQLite database (produced by +// 'grype db-builder build') into a compressed tarball ready for +// distribution. +func DBBuilderPackage(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "package", + Short: "Package a built vulnerability database into an archive", + Long: `Archive an already-built database directory (produced by +'grype db-builder build') into a compressed tarball ready for upload and +distribution. The archive extension defaults to the schema's preferred +format and can be overridden with --archive-extension.`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderPackage(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderPackage(opts *options.DatabaseBuild) error { + if opts.ArchiveExtension != "" && !strset.New("tar.gz", "tar.zst").Has(opts.ArchiveExtension) { + return fmt.Errorf("archive-extension must be 'tar.gz' or 'tar.zst'") + } + + // v5 DB writing (and its corresponding listing.json) is no longer supported via this command; + // publish-base-url is intentionally omitted. + return db.Package(opts.Dir, "", opts.ArchiveExtension, map[string]string(opts.CompressorCommands)) +} diff --git a/cmd/grype/cli/commands/db_builder_pull.go b/cmd/grype/cli/commands/db_builder_pull.go new file mode 100644 index 00000000000..a1ae0dcad32 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_pull.go @@ -0,0 +1,61 @@ +package commands + +import ( + "fmt" + + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/grype/db/build/pull" + dbprovider "github.com/anchore/grype/grype/db/provider" +) + +// DBBuilderPull runs vulnerability providers (via the vunnel runner, by +// default) to refresh on-disk workspace data. This is the per-provider step +// in the CI scatter flow — it does not touch the SQLite DB. +func DBBuilderPull(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "pull", + Short: "Refresh vulnerability provider workspace data", + Long: `Run one or more vulnerability providers (vunnel by default) to refresh the +on-disk workspace under provider.root. This is the per-provider step that +fans out across the data-sync matrix; it does not produce or modify a +database. + +Examples: + + grype db-builder pull -p alpine # refresh just the alpine provider + grype db-builder pull -p alpine,alma # refresh two providers + grype db-builder pull -g # enumerate providers via 'vunnel list'`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderPull(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderPull(opts *options.DatabaseBuild) error { + pvdrs, err := buildProviders(opts) + if err != nil { + return err + } + + if err := pull.Pull(pull.Config{ + Parallelism: opts.Pull.Parallelism, + Collection: dbprovider.Collection{ + Root: opts.Provider.Root, + Providers: pvdrs, + }, + }); err != nil { + return fmt.Errorf("pull failed: %w", err) + } + return nil +} diff --git a/cmd/grype/cli/options/database_build.go b/cmd/grype/cli/options/database_build.go index ebb6b8248c0..6298cccb453 100644 --- a/cmd/grype/cli/options/database_build.go +++ b/cmd/grype/cli/options/database_build.go @@ -32,12 +32,12 @@ func (m stringMap) String() string { return "{" + strings.Join(parts, ", ") + "}" } -// DatabaseBuild holds the configuration for `grype db build`, the unified -// pull -> write -> package pipeline. The shape mirrors grype-db's historical -// configuration (provider/pull/build/package) so that existing config files -// remain familiar; individual phases can be skipped via --skip. +// DatabaseBuild holds the configuration shared by the `grype db-builder` +// subcommands (pull, build, package). Each subcommand reads the subset of +// fields it needs; flags are registered on a single AddFlags so the YAML +// shape stays consistent across subcommands. type DatabaseBuild struct { - // build-time options (covers the "write" phase) + // build-time options (used by `db-builder build`) SchemaVersion int `yaml:"schema-version" json:"schema-version" mapstructure:"schema-version"` Dir string `yaml:"dir" json:"dir" mapstructure:"dir"` BatchSize int `yaml:"batch-size" json:"batch-size" mapstructure:"batch-size"` @@ -45,15 +45,13 @@ type DatabaseBuild struct { InferNVDFixVersions bool `yaml:"infer-nvd-fix-versions" json:"infer-nvd-fix-versions" mapstructure:"infer-nvd-fix-versions"` Hydrate bool `yaml:"hydrate" json:"hydrate" mapstructure:"hydrate"` FailOnMissingFixDate bool `yaml:"fail-on-missing-fix-date" json:"fail-on-missing-fix-date" mapstructure:"fail-on-missing-fix-date"` + SkipValidation bool `yaml:"skip-validation" json:"skip-validation" mapstructure:"skip-validation"` - // pipeline control - Skip []string `yaml:"skip" json:"skip" mapstructure:"skip"` - - // archive options (covers the "package" phase) + // archive options (used by `db-builder package`) ArchiveExtension string `yaml:"archive-extension" json:"archive-extension" mapstructure:"archive-extension"` CompressorCommands stringMap `yaml:"compressor-commands" json:"compressor-commands" mapstructure:"compressor-commands"` - // nested config for the pull phase + providers + // pull + provider options (used by `db-builder pull` and indirectly by build for state reading) Pull DatabaseBuildPull `yaml:"pull" json:"pull" mapstructure:"pull"` Provider DatabaseBuildProvider `yaml:"provider" json:"provider" mapstructure:"provider"` } @@ -85,12 +83,11 @@ var _ interface { clio.PostLoader } = (*DatabaseBuild)(nil) -// PostLoad flattens any comma-separated entries in --provider-name and --skip -// so that "-p alpine,alma,rhel" behaves the same as "-p alpine -p alma -p rhel" +// PostLoad flattens any comma-separated entries in --provider-name so that +// "-p alpine,alma,rhel" behaves the same as "-p alpine -p alma -p rhel" // (matching the convention used by grype's --from flag). func (o *DatabaseBuild) PostLoad() error { o.Provider.IncludeFilter = flattenCSV(o.Provider.IncludeFilter) - o.Skip = flattenCSV(o.Skip) return nil } @@ -119,8 +116,8 @@ func DefaultDatabaseBuild() *DatabaseBuild { InferNVDFixVersions: true, Hydrate: false, FailOnMissingFixDate: false, - Skip: nil, - CompressorCommands: stringMap{}, + SkipValidation: false, + CompressorCommands: stringMap{}, Pull: DatabaseBuildPull{ Parallelism: 4, }, @@ -151,8 +148,8 @@ func (o *DatabaseBuild) AddFlags(flags clio.FlagSet) { flags.StringVarP(&o.ArchiveExtension, "archive-extension", "e", "override the extension used during DB archiving (default chosen by the DB schema, typically 'tar.zst')") - flags.StringArrayVarP(&o.Skip, "skip", "", - "comma-separated phases of the build pipeline to skip; one or more of: pull, validate, write, package") + flags.BoolVarP(&o.SkipValidation, "skip-validation", "", + "skip per-provider state validation before writing the DB") flags.StringArrayVarP(&o.Provider.IncludeFilter, "provider-name", "p", "one or more provider names to filter the build to (default: empty = all)") @@ -167,7 +164,7 @@ func (o *DatabaseBuild) DescribeFields(d clio.FieldDescriptionSet) { d.Add(&o.InferNVDFixVersions, `derive missing NVD fix versions from CVE configurations when building the DB`) d.Add(&o.Hydrate, `populate post-build derived data (only applies for schemas > 5)`) d.Add(&o.FailOnMissingFixDate, `fail the build if any fix entry lacks a known available date`) - d.Add(&o.Skip, `phases of the build pipeline to skip (pull, validate, write, package)`) + d.Add(&o.SkipValidation, `skip per-provider state validation before writing the DB`) d.Add(&o.ArchiveExtension, `archive extension used during DB packaging; empty means the schema default`) d.Add(&o.CompressorCommands, `external commands to use for compressing archives, keyed by extension`) From 6ade9cab2f64619402d2920ace16c201b0eabef5 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 10:45:25 -0400 Subject: [PATCH 4/8] wip: grype db build move phase 2 Signed-off-by: Will Murphy --- cmd/grype/cli/commands/db_builder.go | 1 + cmd/grype/cli/commands/db_builder_cache.go | 90 +++++ .../cli/commands/db_builder_cache_backup.go | 194 +++++++++++ .../cli/commands/db_builder_cache_delete.go | 63 ++++ .../cli/commands/db_builder_cache_restore.go | 325 ++++++++++++++++++ .../cli/commands/db_builder_cache_status.go | 133 +++++++ cmd/grype/cli/options/database_build.go | 48 ++- go.mod | 2 +- grype/db/package_legacy.go | 2 +- grype/db/v6/build/archive.go | 2 +- .../tarutil/file_entry.go | 0 .../tarutil/file_entry_test.go | 0 .../internal => internal}/tarutil/populate.go | 0 .../tarutil/populate_test.go | 0 .../tarutil/reader_entry.go | 0 .../tarutil/reader_entry_test.go | 0 .../db/internal => internal}/tarutil/tar.go | 0 .../internal => internal}/tarutil/writer.go | 0 .../tarutil/writer_test.go | 0 19 files changed, 847 insertions(+), 13 deletions(-) create mode 100644 cmd/grype/cli/commands/db_builder_cache.go create mode 100644 cmd/grype/cli/commands/db_builder_cache_backup.go create mode 100644 cmd/grype/cli/commands/db_builder_cache_delete.go create mode 100644 cmd/grype/cli/commands/db_builder_cache_restore.go create mode 100644 cmd/grype/cli/commands/db_builder_cache_status.go rename {grype/db/internal => internal}/tarutil/file_entry.go (100%) rename {grype/db/internal => internal}/tarutil/file_entry_test.go (100%) rename {grype/db/internal => internal}/tarutil/populate.go (100%) rename {grype/db/internal => internal}/tarutil/populate_test.go (100%) rename {grype/db/internal => internal}/tarutil/reader_entry.go (100%) rename {grype/db/internal => internal}/tarutil/reader_entry_test.go (100%) rename {grype/db/internal => internal}/tarutil/tar.go (100%) rename {grype/db/internal => internal}/tarutil/writer.go (100%) rename {grype/db/internal => internal}/tarutil/writer_test.go (100%) diff --git a/cmd/grype/cli/commands/db_builder.go b/cmd/grype/cli/commands/db_builder.go index 620b22685b4..6d64aa07c0e 100644 --- a/cmd/grype/cli/commands/db_builder.go +++ b/cmd/grype/cli/commands/db_builder.go @@ -32,6 +32,7 @@ search, ...), see 'grype db' instead.`, DBBuilderPull(app), DBBuilderBuild(app), DBBuilderPackage(app), + DBBuilderCache(app), ) return cmd diff --git a/cmd/grype/cli/commands/db_builder_cache.go b/cmd/grype/cli/commands/db_builder_cache.go new file mode 100644 index 00000000000..d4a18157ac8 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache.go @@ -0,0 +1,90 @@ +package commands + +import ( + "errors" + "fmt" + "os" + "strings" + + "github.com/scylladb/go-set/strset" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + dbprovider "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" +) + +// DBBuilderCache groups the subcommands that operate on provider workspace +// tarballs (the input/output of the data-sync pipeline). These commands sit +// outside the build/pull/package phases because they move workspace data +// between hosts via tar archives — typically into and out of an OCI-hosted +// cache like ORAS-pushed ghcr.io images. +func DBBuilderCache(app clio.Application) *cobra.Command { + cmd := &cobra.Command{ + Use: "cache", + Short: "Manage provider workspace cache archives", + Long: `Move vulnerability provider workspace data between hosts via tar +archives. These commands are used by the data-sync pipeline to scatter +per-provider workspace state (typically backed by ORAS/ghcr.io) and gather +it on a publish host before running 'grype db-builder build'.`, + } + + cmd.AddCommand( + DBBuilderCacheBackup(app), + DBBuilderCacheRestore(app), + DBBuilderCacheStatus(app), + DBBuilderCacheDelete(app), + ) + + return cmd +} + +// readProviderNamesFromRoot lists the immediate subdirectories of root, +// each of which is the workspace for one vulnerability provider. +func readProviderNamesFromRoot(root string) ([]string, error) { + listing, err := os.ReadDir(root) + if err != nil { + return nil, err + } + var providers []string + for _, f := range listing { + if !f.IsDir() { + continue + } + providers = append(providers, f.Name()) + } + return providers, nil +} + +// validateRequestedProviders intersects on-disk providers with the user's +// --provider-name filter, returning the kept set and an error if any +// requested provider is missing from disk. +func validateRequestedProviders(providersOnDisk, requested []string) ([]string, error) { + if len(requested) == 0 { + return providersOnDisk, nil + } + requestedSet := strset.New(requested...) + var result []string + for _, p := range providersOnDisk { + if requestedSet.Has(p) { + result = append(result, p) + requestedSet.Remove(p) + } + } + if requestedSet.Size() > 0 { + return nil, fmt.Errorf("providers requested but not present on disk: %s", strings.Join(requestedSet.List(), ", ")) + } + return result, nil +} + +// deleteProviderCache removes a single provider's workspace directory under root. +func deleteProviderCache(root, name string) error { + workspace := dbprovider.NewWorkspace(root, name) + dir := workspace.Path() + if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) { + log.WithFields("dir", dir).Debug("provider cache does not exist, skipping...") + return nil + } + log.WithFields("dir", dir).Info("deleting provider data") + return os.RemoveAll(dir) +} diff --git a/cmd/grype/cli/commands/db_builder_cache_backup.go b/cmd/grype/cli/commands/db_builder_cache_backup.go new file mode 100644 index 00000000000..00da71f74fb --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_backup.go @@ -0,0 +1,194 @@ +package commands + +import ( + "encoding/json" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "github.com/scylladb/go-set/strset" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + dbprovider "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/log" + "github.com/anchore/grype/internal/tarutil" +) + +// DBBuilderCacheBackup creates a compressed tar archive of one or more +// provider workspaces. The data-sync workflow uploads the resulting archive +// to ORAS/ghcr.io so it can be restored on a publish host before +// 'db-builder build'. +func DBBuilderCacheBackup(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "backup", + Short: "Archive provider workspace data into a cache tarball", + Long: `Walk the provider workspace under --root and write a compressed tar +archive at --path containing the selected providers' state. By default the +archive includes both 'input' and 'results' directories; pass --results-only +to omit raw input data (and mark each provider's metadata as stale so the +next pull is forced to re-download).`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderCacheBackup(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderCacheBackup(opts *options.DatabaseBuild) error { + if opts.Cache.Path == "" { + return fmt.Errorf("--path is required") + } + + providersDescr := "all" + if len(opts.Provider.IncludeFilter) > 0 { + providersDescr = fmt.Sprintf("%s", opts.Provider.IncludeFilter) + } + log.WithFields("providers", providersDescr).Info("backing up provider state") + + writer, err := tarutil.NewWriter(opts.Cache.Path) + if err != nil { + return fmt.Errorf("unable to create archive writer: %w", err) + } + defer writer.Close() + + allowableProviders := strset.New(opts.Provider.IncludeFilter...) + + providerNames, err := readProviderNamesFromRoot(opts.Provider.Root) + if err != nil { + return err + } + + for _, name := range providerNames { + if allowableProviders.Size() > 0 && !allowableProviders.Has(name) { + log.WithFields("provider", name).Trace("skipping...") + continue + } + + log.WithFields("provider", name).Trace("validating provider") + workspace := dbprovider.NewWorkspace(opts.Provider.Root, name) + sd, err := workspace.ReadState() + if err != nil { + return fmt.Errorf("unable to read provider %q state: %w", name, err) + } + + if err := sd.Verify(workspace.Path()); err != nil { + return fmt.Errorf("provider %q state is invalid: %w", name, err) + } + + log.WithFields("provider", name).Debug("archiving data") + if err := archiveProvider(opts, name, writer); err != nil { + return err + } + } + + log.WithFields("path", opts.Cache.Path).Info("provider state archived") + return nil +} + +func archiveProvider(opts *options.DatabaseBuild, name string, writer tarutil.Writer) error { + wd, err := os.Getwd() + if err != nil { + return err + } + + if err := os.Chdir(opts.Provider.Root); err != nil { + return err + } + defer func(dir string) { + if err := os.Chdir(dir); err != nil { + log.Errorf("unable to restore directory: %w", err) + } + }(wd) + + var visitor pathVisitor + if opts.Cache.ResultsOnly { + log.WithFields("provider", name).Debug("archiving results only") + visitor = newCacheResultsOnlyWorkspaceVisitStrategy(writer, name) + } else { + log.WithFields("provider", name).Debug("archiving full workspace") + visitor = cacheFullWorkspaceVisitStrategy{writer: writer} + } + + return filepath.Walk(name, visitor.visitPath) +} + +type pathVisitor interface { + visitPath(path string, info fs.FileInfo, err error) error +} + +type cacheFullWorkspaceVisitStrategy struct { + writer tarutil.Writer +} + +func (t cacheFullWorkspaceVisitStrategy) visitPath(p string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + return t.writer.WriteEntry(tarutil.NewEntryFromFilePath(p)) +} + +type cacheResultsOnlyWorkspaceVisitStrategy struct { + writer tarutil.Writer + providerName string + metadataPath string + inputPath string +} + +func newCacheResultsOnlyWorkspaceVisitStrategy(writer tarutil.Writer, providerName string) cacheResultsOnlyWorkspaceVisitStrategy { + return cacheResultsOnlyWorkspaceVisitStrategy{ + writer: writer, + providerName: providerName, + metadataPath: filepath.Join(providerName, "metadata.json"), + inputPath: filepath.Join(providerName, "input"), + } +} + +func (t cacheResultsOnlyWorkspaceVisitStrategy) visitPath(p string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + + switch { + case strings.HasPrefix(p, t.inputPath): + return nil + case p == t.metadataPath: + var state dbprovider.State + f, err := os.Open(p) + if err != nil { + return err + } + defer f.Close() + + if err := json.NewDecoder(f).Decode(&state); err != nil { + return err + } + + state.Stale = true + + stateJSON, err := json.MarshalIndent(state, "", " ") + if err != nil { + return err + } + + return t.writer.WriteEntry(tarutil.NewEntryFromBytes(stateJSON, p, info)) + } + + return t.writer.WriteEntry(tarutil.NewEntryFromFilePath(p)) +} diff --git a/cmd/grype/cli/commands/db_builder_cache_delete.go b/cmd/grype/cli/commands/db_builder_cache_delete.go new file mode 100644 index 00000000000..cceb5ec6087 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_delete.go @@ -0,0 +1,63 @@ +package commands + +import ( + "github.com/scylladb/go-set/strset" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/internal/log" +) + +// DBBuilderCacheDelete removes provider workspace directories under +// --root. Without --provider-name it removes all provider data, which +// is a dev/operator convenience and not used in the standard sync flow. +func DBBuilderCacheDelete(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "delete", + Short: "Delete provider workspace data", + Long: `Remove provider workspace directories from --root. Without +--provider-name, removes all provider data. This is a destructive operation +intended for local development; CI sync flows do not need it.`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderCacheDelete(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderCacheDelete(opts *options.DatabaseBuild) error { + allowableProviders := strset.New(opts.Provider.IncludeFilter...) + + providerNames, err := readProviderNamesFromRoot(opts.Provider.Root) + if err != nil { + return err + } + + if len(providerNames) == 0 { + log.Info("no provider data found to delete") + return nil + } + + for _, name := range providerNames { + if allowableProviders.Size() > 0 && !allowableProviders.Has(name) { + log.WithFields("provider", name).Trace("skipping...") + continue + } + if err := deleteProviderCache(opts.Provider.Root, name); err != nil { + return err + } + } + + if allowableProviders.Size() == 0 { + log.Info("all provider data deleted") + } + return nil +} diff --git a/cmd/grype/cli/commands/db_builder_cache_restore.go b/cmd/grype/cli/commands/db_builder_cache_restore.go new file mode 100644 index 00000000000..8c6d9d104f9 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_restore.go @@ -0,0 +1,325 @@ +package commands + +import ( + "archive/tar" + "compress/gzip" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/scylladb/go-set/strset" + "github.com/spf13/afero" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/internal/log" +) + +// DBBuilderCacheRestore extracts a provider workspace tar archive into the +// provider workspace root, optionally restricted to a subset of providers. +// This is the inverse of 'db-builder cache backup'. +func DBBuilderCacheRestore(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "restore", + Short: "Restore provider workspace data from a cache tarball", + Long: `Read the cache archive at --path and extract its contents into the +provider workspace root. Use --provider-name to restore only a subset and +--delete-existing to remove any pre-existing data for the restored +providers before extracting.`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderCacheRestore(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderCacheRestore(opts *options.DatabaseBuild) error { + if opts.Cache.Path == "" { + return fmt.Errorf("--path is required") + } + + providersDescr := "all" + if len(opts.Provider.IncludeFilter) > 0 { + providersDescr = fmt.Sprintf("%s", opts.Provider.IncludeFilter) + } + log.WithFields("providers", providersDescr).Info("restoring provider state") + + if err := os.MkdirAll(opts.Provider.Root, 0755); err != nil { + return fmt.Errorf("failed to create provider root directory: %w", err) + } + + allowableProviders := strset.New(opts.Provider.IncludeFilter...) + restorableProviders, err := readProviderNamesFromTarGz(opts.Cache.Path) + if err != nil { + return err + } + + selectedProviders := strset.New() + for _, name := range restorableProviders { + if allowableProviders.Size() > 0 && !allowableProviders.Has(name) { + log.WithFields("provider", name).Trace("skipping...") + continue + } + selectedProviders.Add(name) + + if opts.Cache.DeleteExisting { + log.WithFields("provider", name).Info("deleting existing provider data") + if err := deleteProviderCache(opts.Provider.Root, name); err != nil { + return fmt.Errorf("failed to delete provider cache: %w", err) + } + } else { + dir := filepath.Join(opts.Provider.Root, name) + if _, err := os.Stat(dir); !errors.Is(err, os.ErrNotExist) { + log.WithFields("provider", name, "dir", dir).Debug("note: there is pre-existing provider data which could be overwritten by the restore operation") + } + } + } + + log.WithFields("archive", opts.Cache.Path).Info("restoring provider data from backup") + + f, err := os.Open(opts.Cache.Path) + if err != nil { + return fmt.Errorf("failed to open cache archive: %w", err) + } + + wd, err := os.Getwd() + if err != nil { + return err + } + if err := os.Chdir(opts.Provider.Root); err != nil { + return err + } + defer func(dir string) { + if err := os.Chdir(dir); err != nil { + log.Errorf("unable to restore directory: %w", err) + } + }(wd) + + if err := extractTarGz(f, selectedProviders); err != nil { + return fmt.Errorf("failed to extract cache archive: %w", err) + } + + log.WithFields("path", opts.Cache.Path).Info("provider data restored") + return nil +} + +func getProviderNameFromPath(path string) string { + parts := strings.Split(filepath.Clean(path), string(os.PathSeparator)) + if len(parts) > 0 { + return parts[0] + } + return "" +} + +func readProviderNamesFromTarGz(tarPath string) ([]string, error) { + f, err := os.Open(tarPath) + if err != nil { + return nil, fmt.Errorf("failed to open cache archive: %w", err) + } + + gr, err := gzip.NewReader(f) + if err != nil { + return nil, fmt.Errorf("failed to create gzip reader: %w", err) + } + + providers := strset.New() + tr := tar.NewReader(gr) + + for { + header, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return nil, fmt.Errorf("failed to read tar header: %w", err) + } + provider := getProviderNameFromPath(header.Name) + if provider != "" { + providers.Add(provider) + } + } + + f.Close() + return providers.List(), nil +} + +func extractTarGz(reader io.Reader, selectedProviders *strset.Set) error { + gr, err := gzip.NewReader(reader) + if err != nil { + return fmt.Errorf("failed to create gzip reader: %w", err) + } + + tr := tar.NewReader(gr) + + rootPath, err := os.Getwd() + if err != nil { + return fmt.Errorf("failed to get current working directory: %w", err) + } + rootPath, err = filepath.Abs(rootPath) + if err != nil { + return fmt.Errorf("failed to get absolute path: %w", err) + } + + var restoredAny bool + fs := afero.NewOsFs() + for { + header, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + return fmt.Errorf("failed to read tar header: %w", err) + } + + provider := getProviderNameFromPath(header.Name) + if !selectedProviders.Has(provider) { + log.WithFields("path", header.Name, "provider", provider).Trace("skipping...") + continue + } + restoredAny = true + + if err := processTarHeader(fs, rootPath, header, tr); err != nil { + return err + } + } + + if !restoredAny { + return fmt.Errorf("no provider data was restored") + } + return nil +} + +func processTarHeader(fs afero.Fs, rootPath string, header *tar.Header, reader io.Reader) error { + cleanedPath := cleanPathRelativeToRoot(rootPath, header.Name) + if err := detectPathTraversal(rootPath, cleanedPath); err != nil { + return err + } + log.WithFields("path", cleanedPath).Trace("extracting file") + + switch header.Typeflag { + case tar.TypeDir: + if err := fs.Mkdir(cleanedPath, 0755); err != nil { + return fmt.Errorf("failed to create directory: %w", err) + } + case tar.TypeSymlink: + if err := handleSymlink(fs, rootPath, cleanedPath, header.Linkname); err != nil { + return fmt.Errorf("failed to create symlink: %w", err) + } + case tar.TypeReg: + if err := handleFile(fs, cleanedPath, reader); err != nil { + return fmt.Errorf("failed to handle file: %w", err) + } + default: + log.WithFields("name", cleanedPath, "type", header.Typeflag).Warn("unknown file type in backup archive") + } + return nil +} + +func handleFile(fs afero.Fs, cleanedPath string, reader io.Reader) error { + if cleanedPath == "" { + return fmt.Errorf("empty path") + } + parentPath := filepath.Dir(cleanedPath) + if parentPath != "" { + if err := fs.MkdirAll(parentPath, 0755); err != nil { + return fmt.Errorf("failed to create parent directory %q for file %q: %w", parentPath, cleanedPath, err) + } + } + outFile, err := fs.Create(cleanedPath) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + if err := safeCopy(outFile, reader); err != nil { + return fmt.Errorf("failed to copy file: %w", err) + } + if err := outFile.Close(); err != nil { + return fmt.Errorf("failed to close file: %w", err) + } + return nil +} + +func handleSymlink(fs afero.Fs, rootPath, cleanedPath, linkName string) error { + if err := detectLinkTraversal(rootPath, cleanedPath, linkName); err != nil { + return err + } + + linkReader, ok := fs.(afero.LinkReader) + if !ok { + return afero.ErrNoReadlink + } + + if linkTarget, err := linkReader.ReadlinkIfPossible(cleanedPath); err == nil { + if linkTarget == linkName { + return nil + } + if err := fs.Remove(cleanedPath); err != nil { + return fmt.Errorf("failed to remove existing symlink: %w", err) + } + } + + linker, ok := fs.(afero.Linker) + if !ok { + return afero.ErrNoSymlink + } + + if err := linker.SymlinkIfPossible(linkName, cleanedPath); err != nil { + return fmt.Errorf("failed to create symlink: %w", err) + } + return nil +} + +func cleanPathRelativeToRoot(rootPath, path string) string { + return filepath.Join(rootPath, filepath.Clean(path)) +} + +func detectLinkTraversal(rootPath, cleanedPath, linkTarget string) error { + linkTarget = filepath.Clean(linkTarget) + if filepath.IsAbs(linkTarget) { + return detectPathTraversal(rootPath, linkTarget) + } + linkTarget = filepath.Join(filepath.Dir(cleanedPath), linkTarget) + if !strings.HasPrefix(linkTarget, rootPath) { + return fmt.Errorf("symlink points outside root: %s -> %s", cleanedPath, linkTarget) + } + return nil +} + +func detectPathTraversal(rootPath, cleanedPath string) error { + if cleanedPath == "" { + return nil + } + if !strings.HasPrefix(cleanedPath, rootPath) { + return fmt.Errorf("path traversal detected: %s", cleanedPath) + } + return nil +} + +const ( + _ = iota + kb = 1 << (10 * iota) //nolint:deadcode,unused + mb //nolint:deadcode,unused + gb +) + +const perFileReadLimit = 25 * gb + +// safeCopy limits the copy from the reader to defend against decompression +// bomb attacks during archive extraction. +func safeCopy(writer io.Writer, reader io.Reader) error { + numBytes, err := io.Copy(writer, io.LimitReader(reader, perFileReadLimit)) + if numBytes >= perFileReadLimit || errors.Is(err, io.EOF) { + return fmt.Errorf("zip read limit hit (potential decompression bomb attack)") + } + return nil +} diff --git a/cmd/grype/cli/commands/db_builder_cache_status.go b/cmd/grype/cli/commands/db_builder_cache_status.go new file mode 100644 index 00000000000..c83e0f95f56 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_status.go @@ -0,0 +1,133 @@ +package commands + +import ( + "fmt" + "os" + "time" + + "github.com/gookit/color" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/grype/cmd/grype/cli/options" + dbprovider "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/grype/db/provider/entry" +) + +// DBBuilderCacheStatus walks the on-disk provider workspaces and reports +// each provider's validity, result count, and timestamp. Used as a sanity +// check between 'cache restore' and 'cache backup' in the sync pipeline. +func DBBuilderCacheStatus(app clio.Application) *cobra.Command { + opts := options.DefaultDatabaseBuild() + + cmd := &cobra.Command{ + Use: "status", + Short: "Report status of the on-disk provider workspace cache", + Long: `Inspect each provider workspace under --root and print whether its +state is valid, how many result rows it contains, and when it was last +updated. Exits non-zero if any selected provider is invalid (or has fewer +rows than --min-rows when set).`, + Args: cobra.NoArgs, + PreRunE: func(cmd *cobra.Command, args []string) error { + return disableUI(app)(cmd, args) + }, + RunE: func(_ *cobra.Command, _ []string) error { + return runDBBuilderCacheStatus(opts) + }, + } + + return app.SetupCommand(cmd, &dbBuilderConfigWrapper{DBBuilder: opts}) +} + +func runDBBuilderCacheStatus(opts *options.DatabaseBuild) error { + providerNames, err := readProviderNamesFromRoot(opts.Provider.Root) + if err != nil { + return err + } + + providerNames, missingProvidersErr := validateRequestedProviders(providerNames, opts.Provider.IncludeFilter) + + var sds []*dbprovider.State + var errs []error + + for _, name := range providerNames { + workspace := dbprovider.NewWorkspace(opts.Provider.Root, name) + sd, err := workspace.ReadState() + if err != nil { + sds = append(sds, nil) + errs = append(errs, err) + continue + } + + if err := sd.Verify(workspace.Path()); err != nil { + sds = append(sds, nil) + errs = append(errs, err) + continue + } + + errs = append(errs, nil) + sds = append(sds, sd) + } + + success := true + + for idx, sd := range sds { + validMsg := "valid" + isValid := true + if errs[idx] != nil { + validMsg = fmt.Sprintf("INVALID (%s)", errs[idx].Error()) + isValid = false + } else if sd == nil { + validMsg = "INVALID (no state description found)" + isValid = false + } + + var count int64 + name := providerNames[idx] + + if sd != nil { + name = sd.Provider + counter := func() (int64, error) { + return entry.Count(sd.Store, sd.ResultPaths()) + } + count, err = validateMinRowsCount(opts.Cache.MinRows, counter) + if err != nil { + isValid = false + validMsg = fmt.Sprintf("INVALID (%s)", err.Error()) + } + } + + success = success && isValid + + fmt.Printf(" • %s\n", name) + statusFmt := color.HiRed + if isValid { + fmt.Printf(" ├── results: %d\n", count) + fmt.Printf(" ├── created: %s\n", sd.Timestamp.Format(time.RFC3339)) + statusFmt = color.HiGreen + } + + fmt.Printf(" └── status: %s\n", statusFmt.Sprint(validMsg)) + } + + if missingProvidersErr != nil { + success = false + fmt.Printf("INVALID (%s)\n", missingProvidersErr.Error()) + } + + if !success { + os.Exit(1) + } + return nil +} + +func validateMinRowsCount(minRows int, counter func() (int64, error)) (int64, error) { + count, err := counter() + if err != nil { + return 0, fmt.Errorf("unable to count entries: %w", err) + } + if count <= int64(minRows) { + return 0, fmt.Errorf("data has %d rows, must have more than %d", count, minRows) + } + return count, nil +} diff --git a/cmd/grype/cli/options/database_build.go b/cmd/grype/cli/options/database_build.go index 6298cccb453..2edb61ec3bf 100644 --- a/cmd/grype/cli/options/database_build.go +++ b/cmd/grype/cli/options/database_build.go @@ -54,6 +54,16 @@ type DatabaseBuild struct { // pull + provider options (used by `db-builder pull` and indirectly by build for state reading) Pull DatabaseBuildPull `yaml:"pull" json:"pull" mapstructure:"pull"` Provider DatabaseBuildProvider `yaml:"provider" json:"provider" mapstructure:"provider"` + + // cache subcommand options (used by `db-builder cache {backup,restore,status,delete}`) + Cache DatabaseBuildCache `yaml:"cache" json:"cache" mapstructure:"cache"` +} + +type DatabaseBuildCache struct { + Path string `yaml:"path" json:"path" mapstructure:"path"` + DeleteExisting bool `yaml:"delete-existing" json:"delete-existing" mapstructure:"delete-existing"` + ResultsOnly bool `yaml:"results-only" json:"results-only" mapstructure:"results-only"` + MinRows int `yaml:"min-rows" json:"min-rows" mapstructure:"min-rows"` } type DatabaseBuildPull struct { @@ -61,19 +71,19 @@ type DatabaseBuildPull struct { } type DatabaseBuildProvider struct { - Root string `yaml:"root" json:"root" mapstructure:"root"` - IncludeFilter []string `yaml:"include-filter" json:"include-filter" mapstructure:"include-filter"` - Vunnel DatabaseBuildVunnel `yaml:"vunnel" json:"vunnel" mapstructure:"vunnel"` - Configs []pull.ProviderRunConfig `yaml:"configs" json:"configs" mapstructure:"configs"` + Root string `yaml:"root" json:"root" mapstructure:"root"` + IncludeFilter []string `yaml:"include-filter" json:"include-filter" mapstructure:"include-filter"` + Vunnel DatabaseBuildVunnel `yaml:"vunnel" json:"vunnel" mapstructure:"vunnel"` + Configs []pull.ProviderRunConfig `yaml:"configs" json:"configs" mapstructure:"configs"` } type DatabaseBuildVunnel struct { - Config string `yaml:"config" json:"config" mapstructure:"config"` - Executor string `yaml:"executor" json:"executor" mapstructure:"executor"` - DockerImage string `yaml:"docker-image" json:"docker-image" mapstructure:"docker-image"` - DockerTag string `yaml:"docker-tag" json:"docker-tag" mapstructure:"docker-tag"` - GenerateConfigs bool `yaml:"generate-configs" json:"generate-configs" mapstructure:"generate-configs"` - ExcludeProviders []string `yaml:"exclude-providers" json:"exclude-providers" mapstructure:"exclude-providers"` + Config string `yaml:"config" json:"config" mapstructure:"config"` + Executor string `yaml:"executor" json:"executor" mapstructure:"executor"` + DockerImage string `yaml:"docker-image" json:"docker-image" mapstructure:"docker-image"` + DockerTag string `yaml:"docker-tag" json:"docker-tag" mapstructure:"docker-tag"` + GenerateConfigs bool `yaml:"generate-configs" json:"generate-configs" mapstructure:"generate-configs"` + ExcludeProviders []string `yaml:"exclude-providers" json:"exclude-providers" mapstructure:"exclude-providers"` Env stringMap `yaml:"env,omitempty" json:"env,omitempty" mapstructure:"env"` } @@ -153,6 +163,19 @@ func (o *DatabaseBuild) AddFlags(flags clio.FlagSet) { flags.StringArrayVarP(&o.Provider.IncludeFilter, "provider-name", "p", "one or more provider names to filter the build to (default: empty = all)") + + // cache subcommand flags + flags.StringVarP(&o.Cache.Path, "path", "", + "path to the cache archive (used by 'db-builder cache backup' and 'restore')") + + flags.BoolVarP(&o.Cache.DeleteExisting, "delete-existing", "", + "delete any existing provider data before restoring from the cache archive") + + flags.BoolVarP(&o.Cache.ResultsOnly, "results-only", "", + "archive only the provider 'results' directory (omit raw 'input' data)") + + flags.IntVarP(&o.Cache.MinRows, "min-rows", "", + "fail 'cache status' validation unless more than this many rows are present in the provider results") } func (o *DatabaseBuild) DescribeFields(d clio.FieldDescriptionSet) { @@ -181,4 +204,9 @@ func (o *DatabaseBuild) DescribeFields(d clio.FieldDescriptionSet) { d.Add(&o.Provider.Vunnel.GenerateConfigs, `generate additional provider configurations from 'vunnel list' output`) d.Add(&o.Provider.Vunnel.ExcludeProviders, `providers to exclude from 'vunnel list' output (only when generate-configs is true)`) d.Add(&o.Provider.Vunnel.Env, `environment variables to pass to the vunnel process`) + + d.Add(&o.Cache.Path, `path to the cache archive used by 'db-builder cache backup' and 'restore'`) + d.Add(&o.Cache.DeleteExisting, `delete any existing provider data before restoring from the cache archive`) + d.Add(&o.Cache.ResultsOnly, `archive only the provider 'results' directory (omit raw 'input' data)`) + d.Add(&o.Cache.MinRows, `fail 'cache status' unless more than this many rows are present in the provider results`) } diff --git a/go.mod b/go.mod index 13c69e52a60..96de34a5a31 100644 --- a/go.mod +++ b/go.mod @@ -82,6 +82,7 @@ require ( require ( github.com/bmatcuk/doublestar/v4 v4.10.0 github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 + golang.org/x/sync v0.20.0 ) require ( @@ -329,7 +330,6 @@ require ( golang.org/x/mod v0.36.0 // indirect golang.org/x/net v0.54.0 // indirect golang.org/x/oauth2 v0.36.0 // indirect - golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.44.0 // indirect golang.org/x/term v0.43.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect diff --git a/grype/db/package_legacy.go b/grype/db/package_legacy.go index c7f1603e6b7..049f2d4d051 100644 --- a/grype/db/package_legacy.go +++ b/grype/db/package_legacy.go @@ -13,11 +13,11 @@ import ( "github.com/scylladb/go-set/strset" "github.com/spf13/afero" - "github.com/anchore/grype/grype/db/internal/tarutil" grypeDBLegacy "github.com/anchore/grype/grype/db/v5" grypeDBLegacyDistribution "github.com/anchore/grype/grype/db/v5/distribution" grypeDBLegacyStore "github.com/anchore/grype/grype/db/v5/store" "github.com/anchore/grype/internal/log" + "github.com/anchore/grype/internal/tarutil" ) // listingFiles is a set of files that should not be included in the archive diff --git a/grype/db/v6/build/archive.go b/grype/db/v6/build/archive.go index 8dd7092f991..0c198eb9e23 100644 --- a/grype/db/v6/build/archive.go +++ b/grype/db/v6/build/archive.go @@ -10,11 +10,11 @@ import ( "strings" "time" - "github.com/anchore/grype/grype/db/internal/tarutil" "github.com/anchore/grype/grype/db/provider" v6 "github.com/anchore/grype/grype/db/v6" v6Distribution "github.com/anchore/grype/grype/db/v6/distribution" "github.com/anchore/grype/internal/log" + "github.com/anchore/grype/internal/tarutil" ) func CreateArchive(dbDir, overrideArchiveExtension string, compressorCommands map[string]string) error { diff --git a/grype/db/internal/tarutil/file_entry.go b/internal/tarutil/file_entry.go similarity index 100% rename from grype/db/internal/tarutil/file_entry.go rename to internal/tarutil/file_entry.go diff --git a/grype/db/internal/tarutil/file_entry_test.go b/internal/tarutil/file_entry_test.go similarity index 100% rename from grype/db/internal/tarutil/file_entry_test.go rename to internal/tarutil/file_entry_test.go diff --git a/grype/db/internal/tarutil/populate.go b/internal/tarutil/populate.go similarity index 100% rename from grype/db/internal/tarutil/populate.go rename to internal/tarutil/populate.go diff --git a/grype/db/internal/tarutil/populate_test.go b/internal/tarutil/populate_test.go similarity index 100% rename from grype/db/internal/tarutil/populate_test.go rename to internal/tarutil/populate_test.go diff --git a/grype/db/internal/tarutil/reader_entry.go b/internal/tarutil/reader_entry.go similarity index 100% rename from grype/db/internal/tarutil/reader_entry.go rename to internal/tarutil/reader_entry.go diff --git a/grype/db/internal/tarutil/reader_entry_test.go b/internal/tarutil/reader_entry_test.go similarity index 100% rename from grype/db/internal/tarutil/reader_entry_test.go rename to internal/tarutil/reader_entry_test.go diff --git a/grype/db/internal/tarutil/tar.go b/internal/tarutil/tar.go similarity index 100% rename from grype/db/internal/tarutil/tar.go rename to internal/tarutil/tar.go diff --git a/grype/db/internal/tarutil/writer.go b/internal/tarutil/writer.go similarity index 100% rename from grype/db/internal/tarutil/writer.go rename to internal/tarutil/writer.go diff --git a/grype/db/internal/tarutil/writer_test.go b/internal/tarutil/writer_test.go similarity index 100% rename from grype/db/internal/tarutil/writer_test.go rename to internal/tarutil/writer_test.go From 5bc64961dfdc5e8953ee93a37c6019dbb1fcfe6c Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 11:57:48 -0400 Subject: [PATCH 5/8] wip: moving db build to grype Signed-off-by: Will Murphy --- .../commands/db_builder_cache_backup_test.go | 143 +++++++++++++++ .../commands/db_builder_cache_restore_test.go | 163 ++++++++++++++++++ .../cli/commands/db_builder_cache_test.go | 82 +++++++++ .../cli/commands/db_builder_common_test.go | 79 +++++++++ cmd/grype/cli/options/database_build_test.go | 73 ++++++++ llms.txt | 33 ++++ 6 files changed, 573 insertions(+) create mode 100644 cmd/grype/cli/commands/db_builder_cache_backup_test.go create mode 100644 cmd/grype/cli/commands/db_builder_cache_restore_test.go create mode 100644 cmd/grype/cli/commands/db_builder_cache_test.go create mode 100644 cmd/grype/cli/commands/db_builder_common_test.go create mode 100644 cmd/grype/cli/options/database_build_test.go diff --git a/cmd/grype/cli/commands/db_builder_cache_backup_test.go b/cmd/grype/cli/commands/db_builder_cache_backup_test.go new file mode 100644 index 00000000000..59bd0f3f8ea --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_backup_test.go @@ -0,0 +1,143 @@ +package commands + +import ( + "archive/tar" + "encoding/json" + "errors" + "io" + "os" + "path/filepath" + "testing" + + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/grype/cmd/grype/cli/options" + dbprovider "github.com/anchore/grype/grype/db/provider" + "github.com/anchore/grype/internal/tarutil" +) + +// writeTestProviderWorkspace lays out a minimal vunnel-style workspace under +// root// for use by the archive tests: +// +// root/ +// └── / +// ├── input/some-input-file.txt +// ├── results/results.db +// └── metadata.json (Stale: false) +func writeTestProviderWorkspace(t *testing.T, root, name string) { + t.Helper() + pdir := filepath.Join(root, name) + require.NoError(t, os.MkdirAll(filepath.Join(pdir, "input"), 0755)) + require.NoError(t, os.MkdirAll(filepath.Join(pdir, "results"), 0755)) + require.NoError(t, os.WriteFile(filepath.Join(pdir, "input", "some-input-file.txt"), []byte("raw"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(pdir, "results", "results.db"), []byte("results"), 0644)) + + state := dbprovider.State{ + Provider: name, + Version: 1, + Stale: false, + } + b, err := json.MarshalIndent(state, "", " ") + require.NoError(t, err) + require.NoError(t, os.WriteFile(filepath.Join(pdir, "metadata.json"), b, 0644)) +} + +func TestArchiveProvider(t *testing.T) { + tests := []struct { + name string + resultsOnly bool + wantNames *strset.Set + wantStateStale bool + }{ + { + name: "default config includes input", + resultsOnly: false, + wantNames: strset.New( + "test-provider/input/some-input-file.txt", + "test-provider/metadata.json", + "test-provider/results/results.db", + ), + wantStateStale: false, + }, + { + name: "results only excludes input and marks metadata stale", + resultsOnly: true, + wantNames: strset.New( + "test-provider/metadata.json", + "test-provider/results/results.db", + ), + wantStateStale: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + root := t.TempDir() + writeTestProviderWorkspace(t, root, "test-provider") + + archivePath := filepath.Join(t.TempDir(), "archive.tar") + tw, err := tarutil.NewWriter(archivePath) + require.NoError(t, err) + + opts := options.DefaultDatabaseBuild() + opts.Provider.Root = root + opts.Cache.ResultsOnly = tt.resultsOnly + + require.NoError(t, archiveProvider(opts, "test-provider", tw)) + require.NoError(t, tw.Close()) + + f, err := os.Open(archivePath) + require.NoError(t, err) + defer f.Close() + + var state dbprovider.State + foundNames := strset.New() + tr := tar.NewReader(f) + for { + header, err := tr.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + foundNames.Add(header.Name) + if header.Name == "test-provider/metadata.json" { + require.NoError(t, json.NewDecoder(tr).Decode(&state)) + } + } + + assert.True(t, foundNames.IsEqual(tt.wantNames), + "archive contents mismatch:\n got: %v\n want: %v", foundNames.List(), tt.wantNames.List()) + assert.Equal(t, tt.wantStateStale, state.Stale, "metadata.Stale flag") + }) + } +} + +func TestArchiveProvider_RestoresCWD(t *testing.T) { + // archiveProvider chdirs into Provider.Root; on completion the cwd must be restored + // so subsequent providers in the same run aren't operating from an unexpected dir. + root := t.TempDir() + writeTestProviderWorkspace(t, root, "alpine") + + cwdBefore, err := os.Getwd() + require.NoError(t, err) + t.Cleanup(func() { + // be defensive in case the test fails mid-run and leaves cwd in tempdir + _ = os.Chdir(cwdBefore) + }) + + archivePath := filepath.Join(t.TempDir(), "archive.tar") + tw, err := tarutil.NewWriter(archivePath) + require.NoError(t, err) + defer tw.Close() + + opts := options.DefaultDatabaseBuild() + opts.Provider.Root = root + + require.NoError(t, archiveProvider(opts, "alpine", tw)) + + cwdAfter, err := os.Getwd() + require.NoError(t, err) + assert.Equal(t, cwdBefore, cwdAfter, "cwd was not restored after archiveProvider") +} diff --git a/cmd/grype/cli/commands/db_builder_cache_restore_test.go b/cmd/grype/cli/commands/db_builder_cache_restore_test.go new file mode 100644 index 00000000000..0efe2b27a49 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_restore_test.go @@ -0,0 +1,163 @@ +package commands + +import ( + "os" + "path/filepath" + "sort" + "testing" + + "github.com/scylladb/go-set/strset" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/grype/cmd/grype/cli/options" + "github.com/anchore/grype/internal/tarutil" +) + +func TestGetProviderNameFromPath(t *testing.T) { + tests := []struct { + path string + want string + }{ + {path: "alpine/results/results.db", want: "alpine"}, + {path: "alpine", want: "alpine"}, + {path: "alpine/", want: "alpine"}, + {path: "./alpine/metadata.json", want: "alpine"}, + {path: "", want: "."}, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + assert.Equal(t, tt.want, getProviderNameFromPath(tt.path)) + }) + } +} + +func TestDetectPathTraversal(t *testing.T) { + root := "/work/data" + tests := []struct { + name string + cleanedPath string + wantErr bool + }{ + {name: "inside root", cleanedPath: "/work/data/alpine/metadata.json", wantErr: false}, + {name: "exact root", cleanedPath: "/work/data", wantErr: false}, + {name: "outside root", cleanedPath: "/etc/passwd", wantErr: true}, + {name: "sibling dir", cleanedPath: "/work/other/file", wantErr: true}, + {name: "empty path ok", cleanedPath: "", wantErr: false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := detectPathTraversal(root, tt.cleanedPath) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + }) + } +} + +func TestDetectLinkTraversal(t *testing.T) { + root := "/work/data" + tests := []struct { + name string + cleanedPath string + linkTarget string + wantErr bool + }{ + {name: "relative link inside root", cleanedPath: "/work/data/alpine/link", linkTarget: "metadata.json", wantErr: false}, + {name: "relative link escapes root", cleanedPath: "/work/data/alpine/link", linkTarget: "../../etc/passwd", wantErr: true}, + {name: "absolute link inside root", cleanedPath: "/work/data/alpine/link", linkTarget: "/work/data/alpine/file", wantErr: false}, + {name: "absolute link outside root", cleanedPath: "/work/data/alpine/link", linkTarget: "/etc/passwd", wantErr: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := detectLinkTraversal(root, tt.cleanedPath, tt.linkTarget) + if tt.wantErr { + require.Error(t, err, "expected traversal to be rejected for %s -> %s", tt.cleanedPath, tt.linkTarget) + return + } + require.NoError(t, err) + }) + } +} + +// TestBackupRestoreRoundTrip exercises the full data-sync archive cycle: +// build a provider workspace, archive it with cache backup, then restore +// the archive into a fresh root and verify file equivalence. +func TestBackupRestoreRoundTrip(t *testing.T) { + srcRoot := t.TempDir() + writeTestProviderWorkspace(t, srcRoot, "alpine") + + archivePath := filepath.Join(t.TempDir(), "cache.tar.gz") + + // --- backup --- + tw, err := tarutil.NewWriter(archivePath) + require.NoError(t, err) + + backupOpts := options.DefaultDatabaseBuild() + backupOpts.Provider.Root = srcRoot + + require.NoError(t, archiveProvider(backupOpts, "alpine", tw)) + require.NoError(t, tw.Close()) + + // readProviderNamesFromTarGz sees the same providers we wrote + names, err := readProviderNamesFromTarGz(archivePath) + require.NoError(t, err) + sort.Strings(names) + assert.Equal(t, []string{"alpine"}, names) + + // --- restore --- + dstRoot := t.TempDir() + + // extractTarGz runs relative to cwd; restore would chdir for us, but + // we're calling extractTarGz directly to keep the test focused. + cwdBefore, err := os.Getwd() + require.NoError(t, err) + t.Cleanup(func() { _ = os.Chdir(cwdBefore) }) + require.NoError(t, os.Chdir(dstRoot)) + + f, err := os.Open(archivePath) + require.NoError(t, err) + defer f.Close() + + require.NoError(t, extractTarGz(f, strset.New("alpine"))) + + // --- verify --- + for _, rel := range []string{ + "alpine/input/some-input-file.txt", + "alpine/metadata.json", + "alpine/results/results.db", + } { + _, err := os.Stat(filepath.Join(dstRoot, rel)) + assert.NoError(t, err, "expected restored file %s", rel) + } +} + +func TestExtractTarGz_RejectsEmptySelection(t *testing.T) { + srcRoot := t.TempDir() + writeTestProviderWorkspace(t, srcRoot, "alpine") + + archivePath := filepath.Join(t.TempDir(), "cache.tar.gz") + tw, err := tarutil.NewWriter(archivePath) + require.NoError(t, err) + + opts := options.DefaultDatabaseBuild() + opts.Provider.Root = srcRoot + require.NoError(t, archiveProvider(opts, "alpine", tw)) + require.NoError(t, tw.Close()) + + cwdBefore, err := os.Getwd() + require.NoError(t, err) + t.Cleanup(func() { _ = os.Chdir(cwdBefore) }) + require.NoError(t, os.Chdir(t.TempDir())) + + f, err := os.Open(archivePath) + require.NoError(t, err) + defer f.Close() + + // Selecting only a provider that isn't in the archive should error. + err = extractTarGz(f, strset.New("wolfi")) + require.Error(t, err) + assert.Contains(t, err.Error(), "no provider data was restored") +} diff --git a/cmd/grype/cli/commands/db_builder_cache_test.go b/cmd/grype/cli/commands/db_builder_cache_test.go new file mode 100644 index 00000000000..84285b3e500 --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_cache_test.go @@ -0,0 +1,82 @@ +package commands + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestReadProviderNamesFromRoot(t *testing.T) { + root := t.TempDir() + for _, name := range []string{"alpine", "alma", "rhel"} { + require.NoError(t, os.Mkdir(filepath.Join(root, name), 0755)) + } + // regular file at root should be ignored + require.NoError(t, os.WriteFile(filepath.Join(root, "notes.txt"), []byte("ignored"), 0644)) + + got, err := readProviderNamesFromRoot(root) + require.NoError(t, err) + sort.Strings(got) + assert.Equal(t, []string{"alma", "alpine", "rhel"}, got) +} + +func TestReadProviderNamesFromRoot_MissingRoot(t *testing.T) { + _, err := readProviderNamesFromRoot(filepath.Join(t.TempDir(), "does-not-exist")) + require.Error(t, err) +} + +func TestValidateMinRowsCount(t *testing.T) { + tests := []struct { + name string + minRows int + counter func() (int64, error) + want int64 + wantErr bool + }{ + { + name: "empty count passes when min-rows is -1", + minRows: -1, + counter: func() (int64, error) { return 0, nil }, + want: 0, + }, + { + name: "empty count fails when min-rows is 0", + minRows: 0, + counter: func() (int64, error) { return 0, nil }, + wantErr: true, + }, + { + name: "count above threshold passes", + minRows: 12, + counter: func() (int64, error) { return 13, nil }, + want: 13, + }, + { + name: "count equal to threshold fails (strictly more than)", + minRows: 13, + counter: func() (int64, error) { return 13, nil }, + wantErr: true, + }, + { + name: "counter error surfaces", + counter: func() (int64, error) { return 0, fmt.Errorf("boom") }, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + count, err := validateMinRowsCount(tt.minRows, tt.counter) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, count) + }) + } +} diff --git a/cmd/grype/cli/commands/db_builder_common_test.go b/cmd/grype/cli/commands/db_builder_common_test.go new file mode 100644 index 00000000000..819d00dc50d --- /dev/null +++ b/cmd/grype/cli/commands/db_builder_common_test.go @@ -0,0 +1,79 @@ +package commands + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateCPEParts(t *testing.T) { + tests := []struct { + name string + parts []string + wantErr bool + }{ + {name: "all valid", parts: []string{"a", "h", "o"}, wantErr: false}, + {name: "subset valid", parts: []string{"a"}, wantErr: false}, + {name: "empty rejected", parts: nil, wantErr: true}, + {name: "empty slice rejected", parts: []string{}, wantErr: true}, + {name: "invalid part rejected", parts: []string{"a", "x"}, wantErr: true}, + {name: "uppercase not accepted", parts: []string{"A"}, wantErr: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateCPEParts(tt.parts) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + }) + } +} + +func TestValidateRequestedProviders(t *testing.T) { + tests := []struct { + name string + onDisk []string + requested []string + want []string + wantErr bool + }{ + { + name: "no filter returns all on-disk", + onDisk: []string{"alpine", "alma", "rhel"}, + requested: nil, + want: []string{"alpine", "alma", "rhel"}, + }, + { + name: "filter intersects with on-disk", + onDisk: []string{"alpine", "alma", "rhel"}, + requested: []string{"alma", "rhel"}, + want: []string{"alma", "rhel"}, + }, + { + name: "filter preserves on-disk order", + onDisk: []string{"alpine", "alma", "rhel"}, + requested: []string{"rhel", "alpine"}, + want: []string{"alpine", "rhel"}, + }, + { + name: "missing provider returns error", + onDisk: []string{"alpine"}, + requested: []string{"alpine", "wolfi"}, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := validateRequestedProviders(tt.onDisk, tt.requested) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/cmd/grype/cli/options/database_build_test.go b/cmd/grype/cli/options/database_build_test.go new file mode 100644 index 00000000000..8bf605db4d1 --- /dev/null +++ b/cmd/grype/cli/options/database_build_test.go @@ -0,0 +1,73 @@ +package options + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestFlattenCSV(t *testing.T) { + tests := []struct { + name string + in []string + want []string + }{ + {name: "nil", in: nil, want: nil}, + // flattenCSV short-circuits on empty: it returns the input as-is, so an empty slice + // stays an empty slice rather than being normalized to nil. + {name: "empty", in: []string{}, want: []string{}}, + {name: "single", in: []string{"alpine"}, want: []string{"alpine"}}, + {name: "csv in one entry", in: []string{"alpine,alma,rhel"}, want: []string{"alpine", "alma", "rhel"}}, + {name: "multiple entries", in: []string{"alpine", "alma"}, want: []string{"alpine", "alma"}}, + {name: "mixed csv + entries", in: []string{"alpine,alma", "rhel"}, want: []string{"alpine", "alma", "rhel"}}, + {name: "whitespace tolerated", in: []string{" alpine , alma "}, want: []string{"alpine", "alma"}}, + {name: "empty segments dropped", in: []string{"alpine,,", ",alma"}, want: []string{"alpine", "alma"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, flattenCSV(tt.in)) + }) + } +} + +func TestStringMap_String(t *testing.T) { + tests := []struct { + name string + m stringMap + want string + }{ + {name: "nil", m: nil, want: "{}"}, + {name: "empty", m: stringMap{}, want: "{}"}, + {name: "single entry", m: stringMap{"foo": "bar"}, want: "{foo: bar}"}, + {name: "deterministic key order", m: stringMap{"b": "2", "a": "1", "c": "3"}, want: "{a: 1, b: 2, c: 3}"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, tt.m.String()) + }) + } +} + +func TestDatabaseBuild_PostLoad_FlattensProviderName(t *testing.T) { + opts := DefaultDatabaseBuild() + opts.Provider.IncludeFilter = []string{"alpine,alma", "rhel"} + + require.NoError(t, opts.PostLoad()) + + assert.Equal(t, []string{"alpine", "alma", "rhel"}, opts.Provider.IncludeFilter) +} + +func TestDefaultDatabaseBuild_NonZeroDefaults(t *testing.T) { + opts := DefaultDatabaseBuild() + + // guard against regressions in defaults that the grype-db-manager + CI rely on + assert.Equal(t, "./build", opts.Dir) + assert.Equal(t, "./data", opts.Provider.Root) + assert.Equal(t, "docker", opts.Provider.Vunnel.Executor) + assert.Equal(t, "ghcr.io/anchore/vunnel", opts.Provider.Vunnel.DockerImage) + assert.Equal(t, "latest", opts.Provider.Vunnel.DockerTag) + assert.Equal(t, 4, opts.Pull.Parallelism) + assert.NotNil(t, opts.CompressorCommands, "CompressorCommands must be a non-nil stringMap so YAML serialization emits '{}' instead of 'null'") + assert.NotNil(t, opts.Provider.Vunnel.Env, "Env must be a non-nil stringMap so YAML serialization emits '{}' instead of 'null'") +} diff --git a/llms.txt b/llms.txt index edc4da667c9..f82ec644d3f 100644 --- a/llms.txt +++ b/llms.txt @@ -26,8 +26,35 @@ Grype is a vulnerability scanner for container images and filesystems developed - `grype/` - Core library with matchers, database, and scanning logic - `grype/matcher/` - Package-specific vulnerability matchers - `grype/db/` - Database management and vulnerability storage +- `grype/db/build/` - Provider orchestration (vunnel runner) + pull pipeline; backs the `grype db-builder` subcommands - `grype/pkg/` - Package identification and metadata - `grype/presenter/` - Output formatting (JSON, table, SARIF, etc.) +- `internal/tarutil/` - Tar archive read/write used by `grype db-builder` and DB packaging + +## CLI Surface + +Two top-level command groups serve different audiences: + +- `grype db ...` - end-user operations on the **installed** vulnerability DB + (`check`, `update`, `status`, `list`, `import`, `delete`, `diff`, `search`, + `providers`) +- `grype db-builder ...` - producer operations that **build** a vulnerability + DB from upstream provider data. Used by the data-sync CI pipeline and by + grype-db-manager during publish: + - `pull -p ` - run vulnerability providers (vunnel by default) to + refresh on-disk workspace data under `--root` + - `build --dir ` - write a SQLite DB from existing workspace data + - `package --dir ` - archive the built DB into a compressed tarball + - `cache backup --path -p ` - snapshot a provider + workspace into a tar archive + - `cache restore --path ` - extract a workspace tarball back + into `--root` + - `cache status -p ` - verify on-disk workspace state + - `cache delete [-p ]` - remove provider workspace data + +Configuration lives under top-level keys `db:` (consumer) and `db-builder:` +(producer) in `~/.grype.yaml`; corresponding env vars use prefixes +`GRYPE_DB_*` and `GRYPE_DB_BUILDER_*` respectively. ## Usage @@ -41,4 +68,10 @@ Scan with SBOM: grype sbom:./sbom.json ``` +Build a DB from an existing provider workspace: +```bash +grype db-builder build --schema 6 --dir ./build +grype db-builder package --dir ./build +``` + The tool automatically manages its vulnerability database and provides configurable output formats and filtering options. \ No newline at end of file From 2f114a944b167873bbe5f3155eee0198505f5654 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 13:37:58 -0400 Subject: [PATCH 6/8] db build port: make targets Signed-off-by: Will Murphy --- .binny.yaml | 9 ++ .gitignore | 4 + .make/db_builder.go | 256 ++++++++++++++++++++++++++++++++++++++++++++ .make/main.go | 2 + 4 files changed, 271 insertions(+) create mode 100644 .make/db_builder.go diff --git a/.binny.yaml b/.binny.yaml index 07296a526ed..03d00a1b825 100644 --- a/.binny.yaml +++ b/.binny.yaml @@ -4,6 +4,15 @@ cooldown: 7d # need to specify tools here that are not already defined there (or that we want to override # with different versions). tools: + # used by 'make db-builder:cache-restore' to fetch provider workspace tarballs + # from ghcr.io before running vunnel; pins to the same version grype-db uses + - name: oras + version: + want: v1.3.2 + method: github-release + with: + repo: oras-project/oras + # used for integration tests - name: skopeo version: diff --git a/.gitignore b/.gitignore index 3a95629e0b1..2a3020c6ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -58,6 +58,10 @@ coverage.txt /listing.json *.db *.db-journal + +# local db-builder workspace (populated by 'make db-builder:pull') +/data +/.cache !**/testdata/**/*.db !**/testdata/**/bin/ !**/testdata/**/*.jar diff --git a/.make/db_builder.go b/.make/db_builder.go new file mode 100644 index 00000000000..75686a08ab5 --- /dev/null +++ b/.make/db_builder.go @@ -0,0 +1,256 @@ +package main + +import ( + "fmt" + "os" + "strings" + + . "github.com/anchore/go-make" + "github.com/anchore/go-make/file" + "github.com/anchore/go-make/log" + "github.com/anchore/go-make/run" +) + +// Local-iteration defaults: small enough to finish in single-digit minutes +// on a laptop and large enough to exercise both OS and language matchers. +const ( + defaultDBBuilderProviders = "wolfi" + defaultDBBuilderRoot = "./data" + defaultDBBuilderDir = "./build" + defaultDBBuilderCacheDir = "./.cache/vunnel" + defaultDBBuilderVunnelImage = "ghcr.io/anchore/vunnel:latest" + // ghcr.io path retains "grype-db" because that's the existing ORAS + // artifact namespace populated by the daily-data-sync pipeline; the + // string has nothing to do with the grype-db binary. + defaultDBBuilderCacheImage = "ghcr.io/anchore/grype-db/data" + defaultDBBuilderCacheTag = "latest" +) + +// dbBuilderTasks wraps a local end-to-end workflow that builds a vulnerability +// database from a fresh grype checkout, using only `grype db-builder` (no +// grype-db binary required). The intent is twofold: a fast inner-loop for +// developers iterating on the builder code, and a reproducible recipe a +// reviewer can run to confirm grype can produce a real DB on its own. +// +// The pipeline mirrors the production data-sync flow: restore the previous +// day's vunnel workspace from ghcr.io (cache-restore), refresh it with a new +// vunnel run (pull), then build and package. The cache restore is required +// for any non-trivial provider — running vunnel from scratch against NVD, +// for example, takes hours. +// +// Environment knobs: +// +// DB_BUILDER_PROVIDERS csv of provider names to run (default: wolfi) +// DB_BUILDER_ROOT vunnel workspace directory (default: ./data) +// DB_BUILDER_DIR DB build directory (default: ./build) +// DB_BUILDER_CACHE_DIR scratch dir for ORAS pulls (default: ./.cache/vunnel) +// DB_BUILDER_CACHE_IMAGE ghcr.io image base path (default: ghcr.io/anchore/grype-db/data) +// DB_BUILDER_CACHE_TAG image tag to pull (default: latest) +// DB_BUILDER_VUNNEL_IMAGE vunnel docker image (default: ghcr.io/anchore/vunnel:latest) +func dbBuilderTasks() Task { + return Task{ + Name: "db-builder", + Description: "build a local vulnerability DB end-to-end using 'grype db-builder' (no grype-db dependency)", + Dependencies: Deps("db-builder:pull", "db-builder:build", "db-builder:package"), + Tasks: []Task{ + { + Name: "db-builder:bootstrap", + Description: "pull the vunnel docker image so subsequent pulls don't block on network", + Run: func() { + Run(fmt.Sprintf("docker pull %s", dbBuilderVunnelImage())) + }, + }, + { + Name: "db-builder:cache-restore", + Description: "for each provider in DB_BUILDER_PROVIDERS: ORAS pull the workspace tarball " + + "from ghcr.io and extract into DB_BUILDER_ROOT", + Run: func() { + providers := dbBuilderProviderList() + root := dbBuilderRoot() + cacheDir := dbBuilderCacheDir() + image := dbBuilderCacheImage() + tag := dbBuilderCacheTag() + + if err := os.MkdirAll(cacheDir, 0o755); err != nil { + log.Error(fmt.Errorf("create cache dir %s: %w", cacheDir, err)) + return + } + + for _, p := range providers { + providerCacheDir := fmt.Sprintf("%s/%s", cacheDir, p) + tarball := fmt.Sprintf("%s/grype-db-cache.tar.gz", providerCacheDir) + log.Info("restoring provider %q workspace from %s/%s:%s", p, image, p, tag) + + // oras pull writes files into pwd with the path they had when pushed: + // .cache/vunnel//grype-db-cache.tar.gz. Run from repo root. + // NoFail so a missing tag for a new provider doesn't abort the whole batch. + out := Run(fmt.Sprintf("oras pull %s/%s:%s", image, p, tag), run.NoFail()) + if !file.Exists(tarball) { + log.Info("no cache available for %q (oras output: %s); skipping", p, strings.TrimSpace(out)) + continue + } + + Run(fmt.Sprintf("go run ./cmd/grype db-builder cache restore --path %s --delete-existing -p %s", + tarball, p), + run.Env("GRYPE_DB_BUILDER_PROVIDER_ROOT", root), + ) + _ = os.RemoveAll(providerCacheDir) + } + }, + }, + { + Name: "db-builder:cache-backup", + Description: "snapshot the local DB_BUILDER_ROOT workspaces into per-provider tarballs under DB_BUILDER_CACHE_DIR (does not push to ghcr.io)", + Run: func() { + providers := dbBuilderProviderList() + root := dbBuilderRoot() + cacheDir := dbBuilderCacheDir() + + if err := os.MkdirAll(cacheDir, 0o755); err != nil { + log.Error(fmt.Errorf("create cache dir %s: %w", cacheDir, err)) + return + } + for _, p := range providers { + providerCacheDir := fmt.Sprintf("%s/%s", cacheDir, p) + if err := os.MkdirAll(providerCacheDir, 0o755); err != nil { + log.Error(fmt.Errorf("create %s: %w", providerCacheDir, err)) + continue + } + tarball := fmt.Sprintf("%s/grype-db-cache.tar.gz", providerCacheDir) + Run(fmt.Sprintf("go run ./cmd/grype db-builder cache backup --path %s -p %s", tarball, p), + run.Env("GRYPE_DB_BUILDER_PROVIDER_ROOT", root), + ) + } + }, + }, + { + Name: "db-builder:pull", + Description: "restore each provider's workspace from ghcr.io and refresh it with a vunnel run " + + "(mirrors the production data-sync per-provider step)", + Dependencies: Deps("db-builder:bootstrap", "db-builder:cache-restore"), + Run: func() { + providers := dbBuilderProvidersCSV() + Run(fmt.Sprintf("go run ./cmd/grype db-builder pull -p %s", providers), + dbBuilderEnv()...) + }, + }, + { + Name: "db-builder:build", + Description: "write a SQLite DB from existing workspace data in DB_BUILDER_ROOT", + Run: func() { + providers := dbBuilderProvidersCSV() + Run(fmt.Sprintf("go run ./cmd/grype db-builder build -p %s", providers), + dbBuilderEnv()...) + }, + }, + { + Name: "db-builder:package", + Description: "package the DB at DB_BUILDER_DIR into a distributable archive", + Run: func() { + Run("go run ./cmd/grype db-builder package", dbBuilderEnv()...) + }, + }, + { + Name: "db-builder:clean", + Description: "remove DB_BUILDER_ROOT, DB_BUILDER_DIR, and DB_BUILDER_CACHE_DIR", + Run: func() { + for _, dir := range []string{dbBuilderRoot(), dbBuilderDir(), dbBuilderCacheDir()} { + if err := os.RemoveAll(dir); err != nil { + log.Error(fmt.Errorf("remove %s: %w", dir, err)) + } else { + log.Info("removed %s", dir) + } + } + }, + }, + }, + } +} + +// dbBuilderEnv returns run.Options that point the embedded `grype` +// subprocess at the user-selected workspace + build dirs. Using +// GRYPE_DB_BUILDER_* env vars (rather than CLI flags) keeps the Run() +// invocations short and matches how the grype-db-manager will eventually +// invoke grype. +func dbBuilderEnv() []run.Option { + return []run.Option{ + run.Env("GRYPE_DB_BUILDER_PROVIDER_ROOT", dbBuilderRoot()), + run.Env("GRYPE_DB_BUILDER_DIR", dbBuilderDir()), + } +} + +// dbBuilderProviderList returns the parsed list of providers from +// DB_BUILDER_PROVIDERS (or the default), used by per-provider loops. +func dbBuilderProviderList() []string { + raw := os.Getenv("DB_BUILDER_PROVIDERS") + if raw == "" { + raw = defaultDBBuilderProviders + } + var parts []string + for _, s := range strings.Split(raw, ",") { + s = strings.TrimSpace(s) + if s != "" { + parts = append(parts, s) + } + } + return parts +} + +func dbBuilderProvidersCSV() string { + raw := os.Getenv("DB_BUILDER_PROVIDERS") + if raw == "" { + raw = defaultDBBuilderProviders + } + // normalize: trim spaces, drop empties — the underlying PostLoad in grype + // also flattens csv, but we sanitize here so '--help' echoes a clean list. + var parts []string + for _, s := range strings.Split(raw, ",") { + s = strings.TrimSpace(s) + if s != "" { + parts = append(parts, s) + } + } + return strings.Join(parts, ",") +} + +func dbBuilderRoot() string { + if v := os.Getenv("DB_BUILDER_ROOT"); v != "" { + return v + } + return defaultDBBuilderRoot +} + +func dbBuilderDir() string { + if v := os.Getenv("DB_BUILDER_DIR"); v != "" { + return v + } + return defaultDBBuilderDir +} + +func dbBuilderVunnelImage() string { + if v := os.Getenv("DB_BUILDER_VUNNEL_IMAGE"); v != "" { + return v + } + return defaultDBBuilderVunnelImage +} + +func dbBuilderCacheDir() string { + if v := os.Getenv("DB_BUILDER_CACHE_DIR"); v != "" { + return v + } + return defaultDBBuilderCacheDir +} + +func dbBuilderCacheImage() string { + if v := os.Getenv("DB_BUILDER_CACHE_IMAGE"); v != "" { + return v + } + return defaultDBBuilderCacheImage +} + +func dbBuilderCacheTag() string { + if v := os.Getenv("DB_BUILDER_CACHE_TAG"); v != "" { + return v + } + return defaultDBBuilderCacheTag +} diff --git a/.make/main.go b/.make/main.go index 1832807a109..bc999cd5252 100644 --- a/.make/main.go +++ b/.make/main.go @@ -48,6 +48,8 @@ func main() { showTestImageCacheTask(), cleanCacheTask(), + + dbBuilderTasks(), ) } From 694a92afe3ecc0d8682f86c8212a5e17a97f484b Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 17:07:18 -0400 Subject: [PATCH 7/8] avoid deduping to maintain parity Signed-off-by: Will Murphy --- grype/db/v6/build/transformers/nvd/node.go | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/grype/db/v6/build/transformers/nvd/node.go b/grype/db/v6/build/transformers/nvd/node.go index 1a4f4b9fa3e..9da5b643eaa 100644 --- a/grype/db/v6/build/transformers/nvd/node.go +++ b/grype/db/v6/build/transformers/nvd/node.go @@ -157,20 +157,12 @@ func deduplicateCandidates(candidates []affectedPackageCandidate) []affectedPack continue } - // merge platform CPEs... - platformMap := make(map[string]struct{}) - for _, platform := range existing.PlatformCPEs { - platformKey := cpeKey(platform) - platformMap[platformKey] = struct{}{} - } - - for _, platform := range candidate.PlatformCPEs { - platformKey := cpeKey(platform) - if _, ok := platformMap[platformKey]; !ok { - existing.PlatformCPEs = append(existing.PlatformCPEs, platform) - platformMap[platformKey] = struct{}{} - } - } + // merge platform CPEs by appending the merging candidate's list as-is. + // Deduplication of platform_cpes (within and across merged candidates) + // is a deliberate post-migration follow-up — see + // docs/migration-followups.md. Until then, mirror grype-db v0.112.0's + // pass-through behavior so the parity script gets a zero-diff run. + existing.PlatformCPEs = append(existing.PlatformCPEs, candidate.PlatformCPEs...) // merge ranges... existing.Ranges.addRanges(candidate.Ranges.toSlice()...) From a0d8afa75f6ffa07fbf9024614fd1a189b30c6a6 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Fri, 22 May 2026 17:14:23 -0400 Subject: [PATCH 8/8] lint fix Signed-off-by: Will Murphy --- cmd/grype/cli/commands/db.go | 2 + .../cli/commands/db_builder_cache_status.go | 76 ++++++++++--------- cmd/grype/cli/commands/db_status.go | 2 +- cmd/grype/cli/commands/root.go | 2 +- 4 files changed, 44 insertions(+), 38 deletions(-) diff --git a/cmd/grype/cli/commands/db.go b/cmd/grype/cli/commands/db.go index e1328cd745b..28b40f758d7 100644 --- a/cmd/grype/cli/commands/db.go +++ b/cmd/grype/cli/commands/db.go @@ -10,6 +10,8 @@ const ( jsonOutputFormat = "json" tableOutputFormat = "table" textOutputFormat = "text" + + validStatus = "valid" ) func DB(app clio.Application) *cobra.Command { diff --git a/cmd/grype/cli/commands/db_builder_cache_status.go b/cmd/grype/cli/commands/db_builder_cache_status.go index c83e0f95f56..35e3a060d2d 100644 --- a/cmd/grype/cli/commands/db_builder_cache_status.go +++ b/cmd/grype/cli/commands/db_builder_cache_status.go @@ -70,44 +70,10 @@ func runDBBuilderCacheStatus(opts *options.DatabaseBuild) error { } success := true - for idx, sd := range sds { - validMsg := "valid" - isValid := true - if errs[idx] != nil { - validMsg = fmt.Sprintf("INVALID (%s)", errs[idx].Error()) - isValid = false - } else if sd == nil { - validMsg = "INVALID (no state description found)" - isValid = false + if !printProviderStatus(providerNames[idx], sd, errs[idx], opts.Cache.MinRows) { + success = false } - - var count int64 - name := providerNames[idx] - - if sd != nil { - name = sd.Provider - counter := func() (int64, error) { - return entry.Count(sd.Store, sd.ResultPaths()) - } - count, err = validateMinRowsCount(opts.Cache.MinRows, counter) - if err != nil { - isValid = false - validMsg = fmt.Sprintf("INVALID (%s)", err.Error()) - } - } - - success = success && isValid - - fmt.Printf(" • %s\n", name) - statusFmt := color.HiRed - if isValid { - fmt.Printf(" ├── results: %d\n", count) - fmt.Printf(" ├── created: %s\n", sd.Timestamp.Format(time.RFC3339)) - statusFmt = color.HiGreen - } - - fmt.Printf(" └── status: %s\n", statusFmt.Sprint(validMsg)) } if missingProvidersErr != nil { @@ -121,6 +87,44 @@ func runDBBuilderCacheStatus(opts *options.DatabaseBuild) error { return nil } +func printProviderStatus(name string, sd *dbprovider.State, stateErr error, minRows int) bool { + validMsg := validStatus + isValid := true + if stateErr != nil { + validMsg = fmt.Sprintf("INVALID (%s)", stateErr.Error()) + isValid = false + } else if sd == nil { + validMsg = "INVALID (no state description found)" + isValid = false + } + + var count int64 + if sd != nil { + name = sd.Provider + counter := func() (int64, error) { + return entry.Count(sd.Store, sd.ResultPaths()) + } + c, err := validateMinRowsCount(minRows, counter) + if err != nil { + isValid = false + validMsg = fmt.Sprintf("INVALID (%s)", err.Error()) + } + count = c + } + + fmt.Printf(" • %s\n", name) + statusFmt := color.HiRed + if isValid { + fmt.Printf(" ├── results: %d\n", count) + fmt.Printf(" ├── created: %s\n", sd.Timestamp.Format(time.RFC3339)) + statusFmt = color.HiGreen + } + + fmt.Printf(" └── status: %s\n", statusFmt.Sprint(validMsg)) + + return isValid +} + func validateMinRowsCount(minRows int, counter func() (int64, error)) (int64, error) { count, err := counter() if err != nil { diff --git a/cmd/grype/cli/commands/db_status.go b/cmd/grype/cli/commands/db_status.go index af402e27aab..dbfdfddc32d 100644 --- a/cmd/grype/cli/commands/db_status.go +++ b/cmd/grype/cli/commands/db_status.go @@ -99,5 +99,5 @@ func renderStoreValidation(status vulnerability.ProviderStatus) string { if status.Error != nil { return "invalid" } - return "valid" + return validStatus } diff --git a/cmd/grype/cli/commands/root.go b/cmd/grype/cli/commands/root.go index 9c99df5e12d..24955456ed0 100644 --- a/cmd/grype/cli/commands/root.go +++ b/cmd/grype/cli/commands/root.go @@ -162,7 +162,7 @@ func runGrype(ctx context.Context, app clio.Application, opts *options.Grype, us startTime := time.Now() defer func() { - validStr := "valid" + validStr := validStatus if err != nil { validStr = "invalid" }