diff --git a/grype/vex/csaf/csaf.go b/grype/vex/csaf/csaf.go index 43ae6cade62..98aac18cee6 100644 --- a/grype/vex/csaf/csaf.go +++ b/grype/vex/csaf/csaf.go @@ -4,6 +4,10 @@ import ( "slices" "github.com/gocsaf/csaf/v3/csaf" + + "github.com/anchore/grype/grype/pkg" + "github.com/anchore/grype/grype/version" + "github.com/anchore/packageurl-go" ) // advisoryMatch captures the criteria that caused a vulnerability to match a CSAF advisory @@ -127,3 +131,237 @@ func purlsFromProductIdentificationHelpers(helpers []*csaf.ProductIdentification } return purls } + +// synthesisCandidate describes a (vulnerability, package) pair that should be +// added to grype's results based on a CSAF advisory, when no DB-backed match +// already exists. +type synthesisCandidate struct { + Vulnerability *csaf.Vulnerability + Status status + ProductID csaf.ProductID + Package *pkg.Package +} + +// indexedPackage is a package whose purl has been parsed once and whose +// ecosystem version format has been resolved, so synthesis does not re-parse +// the same package on every statement comparison. +type indexedPackage struct { + pkg *pkg.Package + purl packageurl.PackageURL + format version.Format +} + +// purlIdentityKey returns the (type, namespace, name) identity of a purl. +// packageMatchesStatement requires these three to be equal, so a package can +// only ever match a statement that shares this key. Indexing packages by it +// lets synthesis compare each statement against the handful of packages with a +// matching identity instead of the whole catalog. +func purlIdentityKey(p packageurl.PackageURL) string { + return p.Type + "\x00" + p.Namespace + "\x00" + p.Name +} + +// buildPackageIndex parses every package purl once and buckets the packages by +// their (type, namespace, name) identity. +func buildPackageIndex(pkgs []pkg.Package) map[string][]indexedPackage { + index := make(map[string][]indexedPackage) + for i := range pkgs { + if pkgs[i].PURL == "" { + continue + } + parsed, err := packageurl.FromString(pkgs[i].PURL) + if err != nil { + continue + } + key := purlIdentityKey(parsed) + index[key] = append(index[key], indexedPackage{ + pkg: &pkgs[i], + purl: parsed, + format: pkg.VersionFormat(pkgs[i]), + }) + } + return index +} + +// statusProducts pairs a CSAF product-status slice with the synthesis status it +// maps to. Using a fixed slice avoids allocating a map per vulnerability. +type statusProducts struct { + status status + products *csaf.Products +} + +// synthesisStatuses returns the affected-like product-status buckets that are +// eligible for synthesis. fixed and known_not_affected are intentionally +// excluded. +func synthesisStatuses(ps *csaf.ProductStatus) []statusProducts { + if ps == nil { + return nil + } + return []statusProducts{ + {firstAffected, ps.FirstAffected}, + {knownAffected, ps.KnownAffected}, + {lastAffected, ps.LastAffected}, + {recommended, ps.Recommended}, + {underInvestigation, ps.UnderInvestigation}, + } +} + +// findSynthesisCandidates walks every advisory and yields (vuln, package) +// pairs eligible for synthesis. Range semantics are applied per status: +// - last_affected: pkg.version <= stmt.version (ceiling) +// - first_affected: pkg.version >= stmt.version (floor) +// - known_affected, recommended, under_investigation: exact match +// (or wildcard if the statement purl has no version) +// +// Statuses that are not "affected-like" (fixed, known_not_affected) never +// trigger synthesis. +// +// Packages are pre-parsed and indexed by purl identity so each statement purl +// is matched against only the packages that share its (type, namespace, name) +// rather than the entire catalog. Per-advisory product purls are cached so the +// product tree is walked once per product instead of once per package. +// +//nolint:gocognit +func (advisories advisories) findSynthesisCandidates(pkgs []pkg.Package) []synthesisCandidate { + var out []synthesisCandidate + if len(pkgs) == 0 { + return out + } + + index := buildPackageIndex(pkgs) + if len(index) == 0 { + return out + } + + for _, adv := range advisories { + if adv == nil || adv.Vulnerabilities == nil { + continue + } + + // Cache product purls per advisory so CollectProductIdentificationHelpers + // (which walks the whole product tree) runs once per product ID. + helpersCache := map[csaf.ProductID][]string{} + purlsForProduct := func(productID csaf.ProductID) []string { + if cached, ok := helpersCache[productID]; ok { + return cached + } + purls := purlsFromProductIdentificationHelpers(adv.ProductTree.CollectProductIdentificationHelpers(productID)) + helpersCache[productID] = purls + return purls + } + + for _, vuln := range adv.Vulnerabilities { + if vuln == nil || vuln.CVE == nil { + continue + } + + for _, sp := range synthesisStatuses(vuln.ProductStatus) { + if sp.products == nil { + continue + } + for _, productIDPtr := range *sp.products { + if productIDPtr == nil { + continue + } + productID := *productIDPtr + for _, stmtPURL := range purlsForProduct(productID) { + stmt, err := packageurl.FromString(stmtPURL) + if err != nil { + continue + } + for _, cand := range index[purlIdentityKey(stmt)] { + if !packageMatchesParsed(stmt, cand.purl, cand.format, sp.status) { + continue + } + out = append(out, synthesisCandidate{ + Vulnerability: vuln, + Status: sp.status, + ProductID: productID, + Package: cand.pkg, + }) + } + } + } + } + } + } + + return out +} + +// packageMatchesStatement reports whether the given package's purl falls +// within the scope of a VEX statement that names stmtPURL with the given +// CSAF status. Type/namespace/name/qualifiers must always match; the version +// dimension is interpreted according to the status. +func packageMatchesStatement(stmtPURL string, p *pkg.Package, st status) bool { + stmt, err := packageurl.FromString(stmtPURL) + if err != nil { + return false + } + pkgPURL, err := packageurl.FromString(p.PURL) + if err != nil { + return false + } + return packageMatchesParsed(stmt, pkgPURL, pkg.VersionFormat(*p), st) +} + +// packageMatchesParsed is packageMatchesStatement operating on already-parsed +// purls and a resolved version format, so the hot synthesis loop does not +// re-parse purls it has already seen. +func packageMatchesParsed(stmt, pkgPURL packageurl.PackageURL, format version.Format, st status) bool { + if stmt.Type != pkgPURL.Type || stmt.Namespace != pkgPURL.Namespace || stmt.Name != pkgPURL.Name { + return false + } + if !qualifierSubset(stmt.Qualifiers, pkgPURL.Qualifiers) { + return false + } + + // No version in the statement -> wildcard, matches any pkg version. + if stmt.Version == "" { + return true + } + if pkgPURL.Version == "" { + // Statement is version-specific but the package's purl has none. + return false + } + + switch st { + case lastAffected: + return compareVersions(pkgPURL.Version, stmt.Version, format, version.LTE) + case firstAffected: + return compareVersions(pkgPURL.Version, stmt.Version, format, version.GTE) + default: + // knownAffected, recommended, underInvestigation: exact match. + return stmt.Version == pkgPURL.Version + } +} + +func compareVersions(pkgVersion, stmtVersion string, format version.Format, op version.Operator) bool { + pkgV := version.New(pkgVersion, format) + stmtV := version.New(stmtVersion, format) + ok, err := pkgV.Is(op, stmtV) + if err != nil { + return false + } + return ok +} + +func qualifierSubset(stmtQ, pkgQ packageurl.Qualifiers) bool { + pkgMap := pkgQ.Map() + for _, sq := range stmtQ { + if v, ok := pkgMap[sq.Key]; !ok || v != sq.Value { + return false + } + } + return true +} + +// toAdvisoryMatch returns the advisoryMatch shape expected by the rest of the +// CSAF code (so a synthesis candidate plugs into matchingRule, statement(), +// etc.). +func (c synthesisCandidate) toAdvisoryMatch() *advisoryMatch { + return &advisoryMatch{ + Vulnerability: c.Vulnerability, + Status: c.Status, + ProductID: c.ProductID, + } +} diff --git a/grype/vex/csaf/implementation.go b/grype/vex/csaf/implementation.go index 48e4d40c113..ccfb1ac5e5e 100644 --- a/grype/vex/csaf/implementation.go +++ b/grype/vex/csaf/implementation.go @@ -11,6 +11,7 @@ import ( "github.com/anchore/grype/grype/match" "github.com/anchore/grype/grype/pkg" vexStatus "github.com/anchore/grype/grype/vex/status" + "github.com/anchore/grype/grype/vulnerability" ) // searchedBy captures the parameters used to search through the VEX data @@ -121,9 +122,13 @@ func (*Processor) FilterMatches( // AugmentMatches adds results to the match.Matches array when matching data // about an affected VEX product is found on loaded VEX documents. Matches -// are moved from the ignore list back to active matches. +// are moved from the ignore list back to active matches, or synthesized from +// the package catalog when the vulnerability database has no record of the +// affected (vulnerability, package) pair. last_affected and first_affected +// statuses are interpreted as version range bounds; other affected-like +// statuses use exact version match. func (*Processor) AugmentMatches( - docRaw any, ignoreRules []match.IgnoreRule, _ *pkg.Context, matches *match.Matches, ignoredMatches []match.IgnoredMatch, + docRaw any, ignoreRules []match.IgnoreRule, _ *pkg.Context, pkgs []pkg.Package, matches *match.Matches, ignoredMatches []match.IgnoredMatch, ) (*match.Matches, []match.IgnoredMatch, error) { advisories, ok := docRaw.(advisories) if !ok { @@ -152,9 +157,86 @@ func (*Processor) AugmentMatches( remainingIgnoredMatches = append(remainingIgnoredMatches, m) } + synthesizeFromCatalog(advisories, ignoreRules, pkgs, matches, remainingIgnoredMatches) + return matches, remainingIgnoredMatches, nil } +// synthesizeFromCatalog walks the package catalog and creates new matches for +// any (vulnerability, package) pair named as affected (or under_investigation) +// in the loaded CSAF advisories that is not already represented in the +// remaining or ignored match sets. +func synthesizeFromCatalog( + advs advisories, + ignoreRules []match.IgnoreRule, + pkgs []pkg.Package, + remainingMatches *match.Matches, + ignoredMatches []match.IgnoredMatch, +) { + candidates := advs.findSynthesisCandidates(pkgs) + if len(candidates) == 0 { + return + } + + known := existingVulnPackageKeys(remainingMatches, ignoredMatches) + + for _, c := range candidates { + advMatch := c.toAdvisoryMatch() + vulnID := advMatch.cve() + if vulnID == "" { + continue + } + key := vulnPackageKey(vulnID, c.Package.PURL) + if _, seen := known[key]; seen { + continue + } + + synthesized := match.Match{ + Vulnerability: vulnerability.Vulnerability{ + Reference: vulnerability.Reference{ + ID: vulnID, + Namespace: "vex", + }, + }, + Package: *c.Package, + } + if rule := matchingRule(ignoreRules, synthesized, advMatch, vexStatus.AugmentList()); rule == nil { + continue + } + + synthesized.Details = []match.Detail{ + { + Type: match.ExactDirectMatch, + SearchedBy: &searchedBy{ + Vulnerability: vulnID, + Purl: c.Package.PURL, + }, + Found: advMatch, + Matcher: match.CsafVexMatcher, + Confidence: 1, + }, + } + + remainingMatches.Add(synthesized) + known[key] = struct{}{} + } +} + +func existingVulnPackageKeys(remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch) map[string]struct{} { + known := map[string]struct{}{} + for m := range remainingMatches.Enumerate() { + known[vulnPackageKey(m.Vulnerability.ID, m.Package.PURL)] = struct{}{} + } + for _, m := range ignoredMatches { + known[vulnPackageKey(m.Vulnerability.ID, m.Package.PURL)] = struct{}{} + } + return known +} + +func vulnPackageKey(vulnID, purl string) string { + return vulnID + "\x00" + purl +} + // matchingRule cycles through a set of ignore rules and returns the first // one that matches the statement and the match. Returns nil if none match. func matchingRule(ignoreRules []match.IgnoreRule, m match.Match, advMatch *advisoryMatch, allowedStatuses []vexStatus.Status) *match.IgnoreRule { diff --git a/grype/vex/csaf/implementation_test.go b/grype/vex/csaf/implementation_test.go index 4f06a662a46..e8bb337e54b 100644 --- a/grype/vex/csaf/implementation_test.go +++ b/grype/vex/csaf/implementation_test.go @@ -198,3 +198,219 @@ func Test_matchingRule(t *testing.T) { }) } } + +func TestPackageMatchesStatement(t *testing.T) { + mkPkg := func(purl string) *pkg.Package { + return &pkg.Package{ + Type: "go-module", + PURL: purl, + } + } + + tests := []struct { + name string + stmtPURL string + pkgPURL string + status status + want bool + }{ + // last_affected: ceiling + {"last_affected matches lower pkg version", "pkg:golang/golang.org/x/net@v0.54.0", "pkg:golang/golang.org/x/net@v0.53.0", lastAffected, true}, + {"last_affected matches equal pkg version", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.53.0", lastAffected, true}, + {"last_affected excludes higher pkg version", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.54.0", lastAffected, false}, + + // first_affected: floor + {"first_affected matches higher pkg version", "pkg:golang/golang.org/x/net@v0.50.0", "pkg:golang/golang.org/x/net@v0.53.0", firstAffected, true}, + {"first_affected matches equal pkg version", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.53.0", firstAffected, true}, + {"first_affected excludes lower pkg version", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.50.0", firstAffected, false}, + + // known_affected, recommended, under_investigation: exact + {"known_affected matches equal", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.53.0", knownAffected, true}, + {"known_affected excludes lower", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.52.0", knownAffected, false}, + {"recommended excludes lower", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.52.0", recommended, false}, + {"under_investigation matches equal", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.53.0", underInvestigation, true}, + {"under_investigation excludes lower", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/net@v0.52.0", underInvestigation, false}, + + // wildcard (no statement version) matches any version regardless of status + {"wildcard last_affected matches any", "pkg:golang/golang.org/x/net", "pkg:golang/golang.org/x/net@v0.99.0", lastAffected, true}, + {"wildcard known_affected matches any", "pkg:golang/golang.org/x/net", "pkg:golang/golang.org/x/net@v0.99.0", knownAffected, true}, + + // name / namespace / type mismatches + {"name mismatch excludes", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/golang.org/x/text@v0.53.0", lastAffected, false}, + {"namespace mismatch excludes", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:golang/example.com/x/net@v0.53.0", lastAffected, false}, + {"type mismatch excludes", "pkg:golang/golang.org/x/net@v0.53.0", "pkg:npm/x-net@v0.53.0", lastAffected, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := packageMatchesStatement(tt.stmtPURL, mkPkg(tt.pkgPURL), tt.status) + require.Equal(t, tt.want, got) + }) + } +} + +func TestAugmentMatches_SynthesizesFromPackageCatalog(t *testing.T) { + const ( + vulnID = "CVE-2099-0001" + basePkg = "pkg:golang/golang.org/x/net@v0.53.0" + ) + + xNet := pkg.Package{ + ID: "deadbeefcafebabe", + Name: "golang.org/x/net", + Version: "v0.53.0", + Type: "go-module", + PURL: basePkg, + } + + mkAdv := func(stmtPURL string, statusKey status) *csaf.Advisory { + productID := csaf.ProductID("test-pkg") + cve := csaf.CVE(vulnID) + pvCat := csaf.CSAFBranchCategoryProductVersion + pvName := "1.0" + pnCat := csaf.CSAFBranchCategoryProductName + pnName := "Test" + productBranch := &csaf.Branch{ + Category: &pvCat, + Name: &pvName, + Product: &csaf.FullProductName{ + Name: &[]string{"test"}[0], + ProductID: &productID, + ProductIdentificationHelper: &csaf.ProductIdentificationHelper{ + PURL: &[]csaf.PURL{csaf.PURL(stmtPURL)}[0], + }, + }, + } + rootBranch := &csaf.Branch{ + Category: &pnCat, + Name: &pnName, + Branches: csaf.Branches{productBranch}, + } + + ps := csaf.ProductStatus{} + products := csaf.Products{&productID} + switch statusKey { + case lastAffected: + ps.LastAffected = &products + case firstAffected: + ps.FirstAffected = &products + case knownAffected: + ps.KnownAffected = &products + case recommended: + ps.Recommended = &products + case underInvestigation: + ps.UnderInvestigation = &products + case fixed: + ps.Fixed = &products + case knownNotAffected: + ps.KnownNotAffected = &products + } + + return &csaf.Advisory{ + ProductTree: &csaf.ProductTree{ + Branches: csaf.Branches{rootBranch}, + }, + Vulnerabilities: []*csaf.Vulnerability{{ + CVE: &cve, + ProductStatus: &ps, + Title: &[]string{"test"}[0], + }}, + } + } + + tests := []struct { + name string + stmtPURL string + stmtStat status + pkgs []pkg.Package + wantSynth bool + }{ + // last_affected: ceiling + {"last_affected synthesizes for lower pkg", "pkg:golang/golang.org/x/net@v0.55.0", lastAffected, []pkg.Package{xNet}, true}, + {"last_affected synthesizes for equal pkg", "pkg:golang/golang.org/x/net@v0.53.0", lastAffected, []pkg.Package{xNet}, true}, + {"last_affected skips for higher pkg", "pkg:golang/golang.org/x/net@v0.50.0", lastAffected, []pkg.Package{xNet}, false}, + + // first_affected: floor + {"first_affected synthesizes for higher pkg", "pkg:golang/golang.org/x/net@v0.50.0", firstAffected, []pkg.Package{xNet}, true}, + {"first_affected skips for lower pkg", "pkg:golang/golang.org/x/net@v0.99.0", firstAffected, []pkg.Package{xNet}, false}, + + // known_affected: exact + {"known_affected synthesizes for equal pkg", "pkg:golang/golang.org/x/net@v0.53.0", knownAffected, []pkg.Package{xNet}, true}, + {"known_affected skips for lower pkg", "pkg:golang/golang.org/x/net@v0.55.0", knownAffected, []pkg.Package{xNet}, false}, + + // fixed/known_not_affected: must not synthesize + {"fixed does not synthesize", "pkg:golang/golang.org/x/net@v0.53.0", fixed, []pkg.Package{xNet}, false}, + {"known_not_affected does not synthesize", "pkg:golang/golang.org/x/net@v0.53.0", knownNotAffected, []pkg.Package{xNet}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + advs := advisories{mkAdv(tt.stmtPURL, tt.stmtStat)} + matches := match.NewMatches() + + processor := &Processor{} + out, _, err := processor.AugmentMatches(advs, nil, nil, tt.pkgs, &matches, nil) + require.NoError(t, err) + + if tt.wantSynth { + require.Len(t, out.Sorted(), 1, "expected one synthesized match") + got := out.Sorted()[0] + require.Equal(t, vulnID, got.Vulnerability.ID) + require.Equal(t, xNet.PURL, got.Package.PURL) + } else { + require.Empty(t, out.Sorted(), "did not expect a synthesized match") + } + }) + } +} + +func TestAugmentMatches_DoesNotDuplicateExistingMatches_CSAF(t *testing.T) { + const vulnID = "CVE-2099-0002" + p := pkg.Package{ + Name: "example.com/foo", + PURL: "pkg:golang/example.com/foo@v1.0.0", + Type: "go-module", + } + + existing := match.Match{ + Vulnerability: vulnerability.Vulnerability{Reference: vulnerability.Reference{ID: vulnID}}, + Package: p, + } + + productID := csaf.ProductID("p1") + cve := csaf.CVE(vulnID) + fullProductName := &csaf.FullProductName{ + Name: &[]string{"foo"}[0], + ProductID: &productID, + ProductIdentificationHelper: &csaf.ProductIdentificationHelper{ + PURL: &[]csaf.PURL{csaf.PURL(p.PURL)}[0], + }, + } + pvCat := csaf.CSAFBranchCategoryProductVersion + pvName := "1.0.0" + pnCat := csaf.CSAFBranchCategoryProductName + pnName := "Foo" + productBranch := &csaf.Branch{Category: &pvCat, Name: &pvName, Product: fullProductName} + rootBranch := &csaf.Branch{Category: &pnCat, Name: &pnName, Branches: csaf.Branches{productBranch}} + products := csaf.Products{&productID} + advs := advisories{&csaf.Advisory{ + ProductTree: &csaf.ProductTree{Branches: csaf.Branches{rootBranch}}, + Vulnerabilities: []*csaf.Vulnerability{{ + CVE: &cve, + ProductStatus: &csaf.ProductStatus{LastAffected: &products}, + Title: &[]string{"t"}[0], + }}, + }} + + matches := match.NewMatches(existing) + + processor := &Processor{} + out, _, err := processor.AugmentMatches(advs, nil, nil, []pkg.Package{p}, &matches, nil) + require.NoError(t, err) + + require.Len(t, out.Sorted(), 1, "synthesis must dedupe against existing matches") + // Use slices so we don't accidentally accept additional unrelated matches. + require.True(t, slices.ContainsFunc(out.Sorted(), func(m match.Match) bool { + return m.Vulnerability.ID == vulnID && m.Package.PURL == p.PURL + })) +} diff --git a/grype/vex/openvex/implementation.go b/grype/vex/openvex/implementation.go index f8cca793466..113c1f6bcca 100644 --- a/grype/vex/openvex/implementation.go +++ b/grype/vex/openvex/implementation.go @@ -12,6 +12,7 @@ import ( "github.com/anchore/grype/grype/match" "github.com/anchore/grype/grype/pkg" vexStatus "github.com/anchore/grype/grype/vex/status" + "github.com/anchore/grype/grype/vulnerability" "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/source" ) @@ -314,9 +315,11 @@ func matchingRule(ignoreRules []match.IgnoreRule, m match.Match, statement *open // AugmentMatches adds results to the match.Matches array when matching data // about an affected VEX product is found on loaded VEX documents. Matches -// are moved from the ignore list or synthesized when no previous data is found. +// are moved from the ignore list back to active matches, or synthesized from +// the package catalog when the vulnerability database has no record of the +// affected (vulnerability, package) pair. func (ovm *Processor) AugmentMatches( - docRaw any, ignoreRules []match.IgnoreRule, pkgContext *pkg.Context, remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch, + docRaw any, ignoreRules []match.IgnoreRule, pkgContext *pkg.Context, pkgs []pkg.Package, remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch, ) (*match.Matches, []match.IgnoredMatch, error) { doc, ok := docRaw.(*openvex.VEX) if !ok { @@ -363,5 +366,223 @@ func (ovm *Processor) AugmentMatches( remainingMatches.Add(newMatch) } + synthesizeFromCatalog(doc, ignoreRules, products, pkgs, remainingMatches, additionalIgnoredMatches) + return remainingMatches, additionalIgnoredMatches, nil } + +// synthesizeFromCatalog walks the package catalog and, for each VEX statement +// that names a package as affected or under_investigation but has no corresponding +// match in either the remaining or ignored match sets, creates a new match.Match. +// This covers vulnerabilities that are present in the VEX document but absent from +// grype's vulnerability database for the given package. +func synthesizeFromCatalog( + doc *openvex.VEX, + ignoreRules []match.IgnoreRule, + products []string, + pkgs []pkg.Package, + remainingMatches *match.Matches, + ignoredMatches []match.IgnoredMatch, +) { + if len(pkgs) == 0 || len(doc.Statements) == 0 { + return + } + + known := existingVulnPackageKeys(remainingMatches, ignoredMatches) + index := buildPackageIndex(pkgs) + + for stmtIdx := range doc.Statements { + stmt := &doc.Statements[stmtIdx] + if stmt.Status != openvex.StatusAffected && stmt.Status != openvex.StatusUnderInvestigation { + continue + } + + vulnID := string(stmt.Vulnerability.Name) + if vulnID == "" { + continue + } + + for _, pi := range candidatePackages(stmt, products, pkgs, index) { + p := &pkgs[pi] + if p.PURL == "" { + continue + } + if _, seen := known[vulnPackageKey(vulnID, p.PURL)]; seen { + continue + } + + matchedProduct, matchedSubcmp := matchPackageAgainstStatement(stmt, products, p.PURL) + if matchedProduct == "" { + continue + } + + synthesized := buildSynthesizedMatch(*p, vulnID, stmt, matchedProduct, matchedSubcmp) + if rule := matchingRule(ignoreRules, synthesized, stmt, vexStatus.AugmentList()); rule == nil { + continue + } + + remainingMatches.Add(synthesized) + known[vulnPackageKey(vulnID, p.PURL)] = struct{}{} + } + } +} + +// buildPackageIndex parses every package purl once and buckets the package +// indices by their (type, namespace, name) identity. A statement can only +// synthesize a match for a package whose purl shares this identity (see +// PurlMatches), so the index lets each statement consider just the relevant +// packages rather than the whole catalog. +func buildPackageIndex(pkgs []pkg.Package) map[string][]int { + index := make(map[string][]int) + for i := range pkgs { + if pkgs[i].PURL == "" { + continue + } + parsed, err := packageurl.FromString(pkgs[i].PURL) + if err != nil { + continue + } + key := purlIdentityKey(parsed) + index[key] = append(index[key], i) + } + return index +} + +func purlIdentityKey(p packageurl.PackageURL) string { + return p.Type + "\x00" + p.Namespace + "\x00" + p.Name +} + +// candidatePackages returns the indices of packages that could match the given +// statement. For statements that name packages by purl (as a product or as a +// subcomponent) only packages sharing a purl identity with one of those purls +// are returned. Image-wide statements (an image/context product with no +// subcomponents) apply to every package, matching the behavior of +// matchPackageAgainstStatement, so the whole catalog is returned in that case. +func candidatePackages(stmt *openvex.Statement, products []string, pkgs []pkg.Package, index map[string][]int) []int { + if statementIsImageWide(stmt, products) { + all := make([]int, len(pkgs)) + for i := range pkgs { + all[i] = i + } + return all + } + + var out []int + seen := map[int]struct{}{} + for _, sp := range statementPurls(stmt) { + parsed, err := packageurl.FromString(sp) + if err != nil { + continue + } + for _, pi := range index[purlIdentityKey(parsed)] { + if _, ok := seen[pi]; ok { + continue + } + seen[pi] = struct{}{} + out = append(out, pi) + } + } + return out +} + +// statementPurls collects every purl referenced by a statement, both as a +// product component and as a subcomponent. +func statementPurls(stmt *openvex.Statement) []string { + var out []string + add := func(s string) { + if strings.HasPrefix(s, "pkg:") { + out = append(out, s) + } + } + addComponent := func(c openvex.Component) { + add(c.ID) + for t, id := range c.Identifiers { + if t == openvex.PURL { + add(id) + } + } + } + for i := range stmt.Products { + addComponent(stmt.Products[i].Component) + for j := range stmt.Products[i].Subcomponents { + addComponent(stmt.Products[i].Subcomponents[j].Component) + } + } + return out +} + +// statementIsImageWide reports whether the statement names an image/context +// product with no subcomponents, in which case matchPackageAgainstStatement +// matches every package in the catalog. +func statementIsImageWide(stmt *openvex.Statement, products []string) bool { + for i := range stmt.Products { + if len(stmt.Products[i].Subcomponents) != 0 { + continue + } + for _, prod := range products { + if stmt.Products[i].Component.Matches(prod) { + return true + } + } + } + return false +} + +func existingVulnPackageKeys(remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch) map[string]struct{} { + known := map[string]struct{}{} + for m := range remainingMatches.Enumerate() { + known[vulnPackageKey(m.Vulnerability.ID, m.Package.PURL)] = struct{}{} + } + for _, m := range ignoredMatches { + known[vulnPackageKey(m.Vulnerability.ID, m.Package.PURL)] = struct{}{} + } + return known +} + +func vulnPackageKey(vulnID, purl string) string { + return vulnID + "\x00" + purl +} + +// matchPackageAgainstStatement returns the matched product identifier and the +// subcomponents that satisfied the match, or empty strings/nil when the +// statement does not name the given package. +func matchPackageAgainstStatement(stmt *openvex.Statement, products []string, pkgPURL string) (string, []string) { + // Image/context as product, package as subcomponent. + for _, product := range products { + if stmt.MatchesProduct(product, pkgPURL) { + return product, []string{pkgPURL} + } + } + // Package itself as product. + if stmt.MatchesProduct(pkgPURL, "") { + return pkgPURL, nil + } + return "", nil +} + +func buildSynthesizedMatch(p pkg.Package, vulnID string, stmt *openvex.Statement, matchedProduct string, matchedSubcmp []string) match.Match { + return match.Match{ + Vulnerability: vulnerability.Vulnerability{ + Reference: vulnerability.Reference{ + ID: vulnID, + Namespace: "vex", + }, + }, + Package: p, + Details: []match.Detail{ + { + Type: match.ExactDirectMatch, + SearchedBy: &SearchedBy{ + Vulnerability: vulnID, + Product: matchedProduct, + Subcomponents: matchedSubcmp, + }, + Found: Match{ + Statement: *stmt, + }, + Matcher: match.OpenVexMatcher, + Confidence: 1, + }, + }, + } +} diff --git a/grype/vex/openvex/implementation_test.go b/grype/vex/openvex/implementation_test.go index e0869de9241..01b84ed0ef8 100644 --- a/grype/vex/openvex/implementation_test.go +++ b/grype/vex/openvex/implementation_test.go @@ -567,6 +567,159 @@ func TestIdentifiersFromDigests_NormalizesDockerHubRepositoryURL(t *testing.T) { require.Equal(t, "index.docker.io/library", repoURL) } +func TestAugmentMatches_SynthesizesFromPackageCatalog(t *testing.T) { + // A package is present in the SBOM (e.g. via syft of a Go binary) but the + // vulnerability database has no entry for it. A VEX affected statement + // naming the package's purl should synthesize a match so the finding shows + // up in grype's output (parity with govulncheck reachability findings). + const ( + vulnID = "GO-2026-5030" + pkgPURL = "pkg:golang/golang.org/x/net@v0.53.0" + ) + + pkgCtx := &pkg.Context{ + Source: &source.Description{ + Metadata: source.FileMetadata{Path: "/tmp/step"}, + }, + } + + xNet := pkg.Package{ + ID: "deadbeefcafebabe", + Name: "golang.org/x/net", + Version: "v0.53.0", + Type: "go-module", + Language: "go", + PURL: pkgPURL, + } + + makeStmt := func(status openvex.Status, productID string) openvex.Statement { + return openvex.Statement{ + Vulnerability: openvex.Vulnerability{Name: openvex.VulnerabilityID(vulnID)}, + Products: []openvex.Product{{Component: openvex.Component{ID: productID}}}, + Status: status, + } + } + + tests := []struct { + name string + stmts []openvex.Statement + pkgs []pkg.Package + ignoreRules []match.IgnoreRule + wantSynth bool + }{ + { + name: "affected statement synthesizes match", + stmts: []openvex.Statement{makeStmt(openvex.StatusAffected, pkgPURL)}, + pkgs: []pkg.Package{xNet}, + wantSynth: true, + }, + { + name: "under_investigation synthesizes match", + stmts: []openvex.Statement{makeStmt(openvex.StatusUnderInvestigation, pkgPURL)}, + pkgs: []pkg.Package{xNet}, + wantSynth: true, + }, + { + name: "not_affected does not synthesize", + stmts: []openvex.Statement{makeStmt(openvex.StatusNotAffected, pkgPURL)}, + pkgs: []pkg.Package{xNet}, + wantSynth: false, + }, + { + name: "fixed does not synthesize", + stmts: []openvex.Statement{makeStmt(openvex.StatusFixed, pkgPURL)}, + pkgs: []pkg.Package{xNet}, + wantSynth: false, + }, + { + name: "purl mismatch does not synthesize", + stmts: []openvex.Statement{makeStmt(openvex.StatusAffected, "pkg:golang/golang.org/x/net@v0.99.0")}, + pkgs: []pkg.Package{xNet}, + wantSynth: false, + }, + { + name: "empty package catalog does not synthesize", + stmts: []openvex.Statement{makeStmt(openvex.StatusAffected, pkgPURL)}, + pkgs: nil, + wantSynth: false, + }, + { + name: "ignore rule with non-matching vulnerability does not synthesize", + stmts: []openvex.Statement{makeStmt(openvex.StatusAffected, pkgPURL)}, + pkgs: []pkg.Package{xNet}, + ignoreRules: []match.IgnoreRule{{Namespace: "vex", VexStatus: string(openvex.StatusAffected), Vulnerability: "CVE-9999-0000"}}, + wantSynth: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + doc := &openvex.VEX{Statements: tt.stmts} + remaining := match.NewMatches() + + processor := New() + out, _, err := processor.AugmentMatches(doc, tt.ignoreRules, pkgCtx, tt.pkgs, &remaining, nil) + require.NoError(t, err) + + if tt.wantSynth { + require.Len(t, out.Sorted(), 1, "expected one synthesized match") + got := out.Sorted()[0] + require.Equal(t, vulnID, got.Vulnerability.ID) + require.Equal(t, pkgPURL, got.Package.PURL) + require.NotEmpty(t, got.Details) + require.Equal(t, match.OpenVexMatcher, got.Details[0].Matcher) + } else { + require.Empty(t, out.Sorted(), "did not expect a synthesized match") + } + }) + } +} + +func TestAugmentMatches_DoesNotDuplicateExistingMatches(t *testing.T) { + // When grype already produced a match for the same (vuln, pkg), the + // synthesis step must not add a duplicate. + const ( + vulnID = "CVE-2023-9999" + pkgPURL = "pkg:golang/example.com/foo@v1.0.0" + ) + + pkgCtx := &pkg.Context{ + Source: &source.Description{ + Metadata: source.FileMetadata{Path: "/tmp/bin"}, + }, + } + + p := pkg.Package{ + ID: "abcdef0123456789", + Name: "example.com/foo", + PURL: pkgPURL, + Type: "go-module", + } + + existing := match.Match{ + Vulnerability: vulnerability.Vulnerability{Reference: vulnerability.Reference{ID: vulnID}}, + Package: p, + } + + doc := &openvex.VEX{ + Statements: []openvex.Statement{ + { + Vulnerability: openvex.Vulnerability{Name: openvex.VulnerabilityID(vulnID)}, + Products: []openvex.Product{{Component: openvex.Component{ID: pkgPURL}}}, + Status: openvex.StatusAffected, + }, + }, + } + + remaining := match.NewMatches(existing) + + processor := New() + out, _, err := processor.AugmentMatches(doc, nil, pkgCtx, []pkg.Package{p}, &remaining, nil) + require.NoError(t, err) + + require.Len(t, out.Sorted(), 1, "synthesis must dedupe against existing matches") +} + func TestNormalizeDockerHubRepositoryURL(t *testing.T) { tests := []struct { input string diff --git a/grype/vex/processor.go b/grype/vex/processor.go index b43eeb40a4a..947a3f107d9 100644 --- a/grype/vex/processor.go +++ b/grype/vex/processor.go @@ -28,8 +28,10 @@ type vexProcessorImplementation interface { // AugmentMatches reads known affected VEX products from loaded documents and // adds new results to the scanner results when the product is marked as - // affected in the VEX data. - AugmentMatches(any, []match.IgnoreRule, *pkg.Context, *match.Matches, []match.IgnoredMatch) (*match.Matches, []match.IgnoredMatch, error) + // affected in the VEX data. The package catalog is provided so that + // implementations can synthesize matches for affected packages that the + // vulnerability database has no record of. + AugmentMatches(any, []match.IgnoreRule, *pkg.Context, []pkg.Package, *match.Matches, []match.IgnoredMatch) (*match.Matches, []match.IgnoredMatch, error) } // getVexImplementation this function returns the vex processor implementation @@ -77,8 +79,10 @@ type ProcessorOptions struct { // ApplyVEX receives the results from a scan run and applies any VEX information // in the files specified in the grype invocation. Any filtered results will -// be moved to the ignored matches slice. -func (vm *Processor) ApplyVEX(pkgContext *pkg.Context, remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch) (*match.Matches, []match.IgnoredMatch, error) { +// be moved to the ignored matches slice. The package catalog is forwarded to +// the underlying implementation so that affected statements can synthesize +// matches for packages that the vulnerability database does not cover. +func (vm *Processor) ApplyVEX(pkgContext *pkg.Context, pkgs []pkg.Package, remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch) (*match.Matches, []match.IgnoredMatch, error) { var err error // If no VEX documents are loaded, just pass through the matches, effectively NOOP @@ -102,7 +106,7 @@ func (vm *Processor) ApplyVEX(pkgContext *pkg.Context, remainingMatches *match.M } remainingMatches, ignoredMatches, err = vm.impl.AugmentMatches( - rawVexData, vexRules, pkgContext, remainingMatches, ignoredMatches, + rawVexData, vexRules, pkgContext, pkgs, remainingMatches, ignoredMatches, ) if err != nil { return nil, nil, fmt.Errorf("checking matches to augment from VEX data: %w", err) diff --git a/grype/vex/processor_test.go b/grype/vex/processor_test.go index 588eb890df0..44dbed491ad 100644 --- a/grype/vex/processor_test.go +++ b/grype/vex/processor_test.go @@ -347,7 +347,7 @@ func TestProcessor_ApplyVEX(t *testing.T) { if err != nil { return } - actualMatches, actualIgnoredMatches, err := p.ApplyVEX(tt.args.pkgContext, tt.args.matches, tt.args.ignoredMatches) + actualMatches, actualIgnoredMatches, err := p.ApplyVEX(tt.args.pkgContext, nil, tt.args.matches, tt.args.ignoredMatches) tt.wantErr(t, err) if err != nil { return diff --git a/grype/vulnerability_matcher.go b/grype/vulnerability_matcher.go index 8e7eb3322bc..c74d81a9f8f 100644 --- a/grype/vulnerability_matcher.go +++ b/grype/vulnerability_matcher.go @@ -116,7 +116,7 @@ func (m *VulnerabilityMatcher) FindMatchesContext( return remainingMatches, ignoredMatches, err } - remainingMatches, ignoredMatches, err = m.findVEXMatches(pkgContext, remainingMatches, ignoredMatches, progressMonitor) + remainingMatches, ignoredMatches, err = m.findVEXMatches(pkgContext, pkgs, remainingMatches, ignoredMatches, progressMonitor) if err != nil { err = fmt.Errorf("unable to find matches against VEX sources: %w", err) return remainingMatches, ignoredMatches, err @@ -275,14 +275,14 @@ func callMatcherSafely(m match.Matcher, vp vulnerability.Provider, p pkg.Package return m.Match(vp, p) } -func (m *VulnerabilityMatcher) findVEXMatches(pkgContext pkg.Context, remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch, progressMonitor *monitorWriter) (*match.Matches, []match.IgnoredMatch, error) { +func (m *VulnerabilityMatcher) findVEXMatches(pkgContext pkg.Context, pkgs []pkg.Package, remainingMatches *match.Matches, ignoredMatches []match.IgnoredMatch, progressMonitor *monitorWriter) (*match.Matches, []match.IgnoredMatch, error) { if m.VexProcessor == nil { log.Trace("no VEX documents provided, skipping VEX matching") return remainingMatches, ignoredMatches, nil } log.Trace("finding matches against available VEX documents") - matchesAfterVex, ignoredMatchesAfterVex, err := m.VexProcessor.ApplyVEX(&pkgContext, remainingMatches, ignoredMatches) + matchesAfterVex, ignoredMatchesAfterVex, err := m.VexProcessor.ApplyVEX(&pkgContext, pkgs, remainingMatches, ignoredMatches) if err != nil { return nil, nil, fmt.Errorf("unable to find matches against VEX documents: %w", err) } diff --git a/test/integration/match_by_image_test.go b/test/integration/match_by_image_test.go index d3992b83f66..8c2d16f1fbc 100644 --- a/test/integration/match_by_image_test.go +++ b/test/integration/match_by_image_test.go @@ -1043,7 +1043,7 @@ func vexMatches(t *testing.T, ignoredMatches []match.IgnoredMatch, vexStatus vex }, } - vexedMatches, ignoredMatches, err := vexMatcher.ApplyVEX(pctx, &matches, ignoredMatches) + vexedMatches, ignoredMatches, err := vexMatcher.ApplyVEX(pctx, nil, &matches, ignoredMatches) if err != nil { t.Errorf("applying VEX data: %s", err) }