SERVER-111072 Automate SBOM Generation from Endor Labs API (#42196)

GitOrigin-RevId: a3f60e9a3c4c086b3c6418a82655e2aed011ab24
This commit is contained in:
Jason Hills 2025-11-14 16:46:35 -05:00 committed by MongoDB Bot
parent 6f06d6d31a
commit 8bd12e2877
24 changed files with 7501 additions and 3022 deletions

13
.github/CODEOWNERS vendored
View File

@ -32,7 +32,8 @@ pnpm-lock.yaml @10gen/devprod-correctness @svc-auto-approve-bot
poetry.lock @10gen/devprod-build @10gen/devprod-correctness @svc-auto-approve-bot poetry.lock @10gen/devprod-build @10gen/devprod-correctness @svc-auto-approve-bot
pyproject.toml @10gen/devprod-build @10gen/devprod-correctness @svc-auto-approve-bot pyproject.toml @10gen/devprod-build @10gen/devprod-correctness @svc-auto-approve-bot
poetry_requirements.txt @10gen/devprod-build @10gen/devprod-correctness @svc-auto-approve-bot poetry_requirements.txt @10gen/devprod-build @10gen/devprod-correctness @svc-auto-approve-bot
sbom.json @10gen/server-security @svc-auto-approve-bot README.third_party.md @10gen/code-review-team-ssdlc @svc-auto-approve-bot
sbom.json @10gen/code-review-team-ssdlc @svc-auto-approve-bot
MODULE.bazel* @10gen/devprod-build @svc-auto-approve-bot MODULE.bazel* @10gen/devprod-build @svc-auto-approve-bot
WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
@ -285,6 +286,9 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/s3_binary/OWNERS.yml # The following patterns are parsed from ./buildscripts/s3_binary/OWNERS.yml
/buildscripts/s3_binary/ @10gen/devprod-build @svc-auto-approve-bot /buildscripts/s3_binary/ @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/sbom/OWNERS.yml
/buildscripts/sbom/ @10gen/code-review-team-ssdlc @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/smoke_tests/OWNERS.yml # The following patterns are parsed from ./buildscripts/smoke_tests/OWNERS.yml
/buildscripts/smoke_tests/**/server_programmability.yml @10gen/server-programmability @svc-auto-approve-bot /buildscripts/smoke_tests/**/server_programmability.yml @10gen/server-programmability @svc-auto-approve-bot
/buildscripts/smoke_tests/**/catalog_and_routing.yml @10gen/server-catalog-and-routing @svc-auto-approve-bot /buildscripts/smoke_tests/**/catalog_and_routing.yml @10gen/server-catalog-and-routing @svc-auto-approve-bot
@ -317,6 +321,9 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/tests/resmokelib/OWNERS.yml # The following patterns are parsed from ./buildscripts/tests/resmokelib/OWNERS.yml
/buildscripts/tests/resmokelib/ @10gen/devprod-correctness @svc-auto-approve-bot /buildscripts/tests/resmokelib/ @10gen/devprod-correctness @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/tests/sbom_linter/OWNERS.yml
/buildscripts/tests/sbom_linter/ @10gen/code-review-team-ssdlc @svc-auto-approve-bot
# The following patterns are parsed from ./buildscripts/tests/timeouts/OWNERS.yml # The following patterns are parsed from ./buildscripts/tests/timeouts/OWNERS.yml
/buildscripts/tests/timeouts/ @10gen/devprod-correctness @svc-auto-approve-bot /buildscripts/tests/timeouts/ @10gen/devprod-correctness @svc-auto-approve-bot
@ -3277,8 +3284,8 @@ WORKSPACE.bazel @10gen/devprod-build @svc-auto-approve-bot
/src/third_party/libmongocrypt/**/* @10gen/server-security @svc-auto-approve-bot /src/third_party/libmongocrypt/**/* @10gen/server-security @svc-auto-approve-bot
# The following patterns are parsed from ./src/third_party/scripts/OWNERS.yml # The following patterns are parsed from ./src/third_party/scripts/OWNERS.yml
/src/third_party/scripts/**/gen_thirdpartyreadme.py @10gen/server-security @svc-auto-approve-bot /src/third_party/scripts/**/gen_thirdpartyreadme.py @10gen/code-review-team-ssdlc @svc-auto-approve-bot
/src/third_party/scripts/**/README.third_party.md.template @10gen/server-security @svc-auto-approve-bot /src/third_party/scripts/**/README.third_party.md.template @10gen/code-review-team-ssdlc @svc-auto-approve-bot
# The following patterns are parsed from ./tools/OWNERS.yml # The following patterns are parsed from ./tools/OWNERS.yml
/tools/**/* @10gen/devprod-build @svc-auto-approve-bot /tools/**/* @10gen/devprod-build @svc-auto-approve-bot

View File

@ -89,9 +89,12 @@ filters:
approvers: approvers:
- 10gen/devprod-correctness - 10gen/devprod-correctness
- 10gen/devprod-build - 10gen/devprod-build
- "README.third_party.md":
approvers:
- 10gen/code-review-team-ssdlc
- "sbom.json": - "sbom.json":
approvers: approvers:
- 10gen/server-security - 10gen/code-review-team-ssdlc
- "MODULE.bazel*": - "MODULE.bazel*":
approvers: approvers:
- 10gen/devprod-build - 10gen/devprod-build

View File

@ -27,140 +27,103 @@ a notice will be included in
| [Apache Avro C++] | Apache-2.0 | 1.12.0 | | ✗ | | [Apache Avro C++] | Apache-2.0 | 1.12.0 | | ✗ |
| [Asio C++ Library] | BSL-1.0 | 1.34.2 | | ✗ | | [Asio C++ Library] | BSL-1.0 | 1.34.2 | | ✗ |
| [AWS SDK for C++] | Apache-2.0 | 1.11.471 | | ✗ | | [AWS SDK for C++] | Apache-2.0 | 1.11.471 | | ✗ |
| [benchmark] | Apache-2.0 | v1.5.2 | | | | [benchmark] | Apache-2.0 | 1.5.2 | | |
| [Boost C++ Libraries] | BSL-1.0 | 1.88.0 | | ✗ | | [Boost C++ Libraries] | BSL-1.0 | 1.88.0 | | ✗ |
| [c-ares] | MIT | 1.27.0 | | ✗ | | [c-ares] | MIT | 1.27.0 | | ✗ |
| [cpptrace] | MIT | 1.0.3 | | |
| [CRoaring] | Apache-2.0 OR MIT | 3.0.1 | | ✗ | | [CRoaring] | Apache-2.0 OR MIT | 3.0.1 | | ✗ |
| [Cyrus SASL] | BSD-Attribution-HPND-disclaimer | 2.1.28 | | | | [Cyrus SASL] | BSD-Attribution-HPND-disclaimer | 2.1.28 | | |
| [fmt] | MIT | 11.2.0 | | ✗ | | [fmt] | MIT | 11.2.0 | | ✗ |
| [github.com/facebook/folly] | Apache-2.0 | v2025.04.21.00 | | ✗ | | [github.com/facebook/folly] | Apache-2.0 | 2023.12.25.00 | | ✗ |
| [googletest] | BSD-3-Clause | 1.17.0 | | | | [googletest] | BSD-3-Clause | 1.17.0 | | |
| [gperftools] | BSD-3-Clause | 2.9.1 | | ✗ | | [gperftools] | BSD-3-Clause | 2.9.1 | | ✗ |
| [gRPC (C++)] | Apache-2.0 | 1.59.5 | | ✗ | | [gRPC (C++)] | Apache-2.0 | 1.59.5 | | ✗ |
| [immer] | BSL-1.0 | 0.8.0 | | ✗ | | [ICU4C - International Components for Unicode C/C++] | Unicode-3.0 | 57.1 | ✗ | ✗ |
| [Intel® Decimal Floating-Point Math Library] | BSD-3-Clause | v2.0U1 | | ✗ | | [immer] | BSL-1.0 | 0b3aaf699b9d6f2e89f8e2b6d1221c307e02bda3 | | ✗ |
| [International Components for Unicode C/C++ (ICU4C)] | Unicode-3.0 | 57.1 | ✗ | ✗ | | [Intel® Decimal Floating-Point Math Library] | BSD-3-Clause | 2.0.1 | | ✗ |
| [JSON Schema Store] | Apache-2.0 | 6847cfc3a17a04a7664474212db50c627e1e3408 | | | | [JSON Schema Store] | Apache-2.0 | 6847cfc3a17a04a7664474212db50c627e1e3408 | | |
| [JSON-Schema-Test-Suite] | MIT | 728066f9c5c258ba3b1804a22a5b998f2ec77ec0 | | | | [JSON-Schema-Test-Suite] | MIT | 728066f9c5c258ba3b1804a22a5b998f2ec77ec0 | | |
| [libdwarf] | LGPL-2.1-or-later, BSD-3-Clause, Public Domain | v2.1.0 | | | | [libdwarf] | LGPL-2.1-or-later, BSD-3-Clause, Public Domain | 2.1.0 | | |
| [libmongocrypt] | Apache-2.0 | 1.15.0 | ✗ | ✗ | | [libmongocrypt] | Apache-2.0 | 1.15.0 | ✗ | ✗ |
| [librdkafka - The Apache Kafka C/C++ library] | BSD-2-Clause | 2.6.0 | | ✗ | | [librdkafka - The Apache Kafka C/C++ library] | BSD-2-Clause | 2.6.0 | | ✗ |
| [LibTomCrypt] | Unlicense | 1.18.2 | ✗ | ✗ | | [LibTomCrypt] | Unlicense | 1.18.2 | ✗ | ✗ |
| [libunwind] | MIT | v1.8.1 | | ✗ | | [libunwind] | MIT | 1.8.1 | | ✗ |
| [linenoise] | BSD-2-Clause | 6cdc775807e57b2c3fd64bd207814f8ee1fe35f3 | | ✗ | | [linenoise] | BSD-2-Clause | 6cdc775807e57b2c3fd64bd207814f8ee1fe35f3 | | ✗ |
| [MongoDB C Driver] | Apache-2.0 | 1.28.1 | ✗ | ✗ | | [MongoDB C Driver] | Apache-2.0 | 1.28.1 | ✗ | ✗ |
| [Mozilla Firefox ESR] | MPL-2.0 | 128.11.0esr | | ✗ | | [Mozilla Firefox ESR] | MPL-2.0 | 128.11.0esr | | ✗ |
| [MurmurHash3] | Public Domain | a6bd3ce7be8ad147ea820a7cf6229a975c0c96bb | | ✗ | | [MurmurHash3] | Public Domain | a6bd3ce7be8ad147ea820a7cf6229a975c0c96bb | | ✗ |
| [nlohmann/json] | MIT | 3.10.5 | | |
| [nlohmann/json] | MIT | 3.11.3 | ✗ | | | [nlohmann/json] | MIT | 3.11.3 | ✗ | |
| [node] | ISC | 22.1.0 | | | | [node] | ISC | 22.1.0 | | |
| [opentelemetry-cpp] | Apache-2.0 | 1.17 | ✗ | | | [opentelemetry-cpp] | Apache-2.0 | 1.17.0 | ✗ | |
| [opentelemetry-proto] | Apache-2.0 | 1.3.2 | ✗ | | | [opentelemetry-proto] | Apache-2.0 | 1.3.2 | ✗ | |
| [PCRE2 - Perl-Compatible Regular Expressions] | BSD-3-Clause WITH PCRE2-exception | 10.40 | | ✗ | | [PCRE2 - Perl-Compatible Regular Expressions] | BSD-3-Clause WITH PCRE2-exception | 10.40 | | ✗ |
| [Protobuf] | BSD-3-Clause | v4.25.0 | | ✗ | | [Protobuf] | BSD-3-Clause | v25.0 | | ✗ |
| [pypi/asn1crypto] | MIT | 1.5.1 | | |
| [pypi/bottle] | MIT | 0.12.25 | | |
| [pypi/concurrencytest] | GPL-3.0-or-later | 0.1.2 | | |
| [pypi/discover] | BSD-3-Clause | 0.4.0 | | |
| [pypi/extras] | MIT | 0.0.3 | | |
| [pypi/iso8601] | MIT | 2.1.0 | | |
| [pypi/ocspbuilder] | MIT | 0.10.2 | | | | [pypi/ocspbuilder] | MIT | 0.10.2 | | |
| [pypi/ocspresponder] | Apache-2.0 | 0.5.0 | | | | [pypi/ocspresponder] | Apache-2.0 | 0.5.0 | | |
| [pypi/oscrypto] | MIT | 1.3.0 | | | | [re2] | BSD-3-Clause | 2025-08-05 | | ✗ |
| [pypi/python-subunit] | (Apache-2.0 OR BSD-3-Clause) | 1.4.4 | | | | [S2 Geometry Library] | Apache-2.0 | a25c502bda9d7e0274b9e2b7825fbddf13cc0306 | ✗ | ✗ |
| [pypi/testscenarios] | BSD-3-Clause | 0.4 | | | | [SafeInt] | MIT | 3.0.28a | | ✗ |
| [pypi/testtools] | MIT | 2.7.1 | | |
| [re2] | BSD-3-Clause | 2023-11-01 | | ✗ |
| [S2 Geometry Library] | Apache-2.0 | c872048da5d1 | ✗ | ✗ |
| [SafeInt] | MIT | 3.0.26 | | ✗ |
| [snappy] | BSD-3-Clause | 1.1.10 | ✗ | ✗ | | [snappy] | BSD-3-Clause | 1.1.10 | ✗ | ✗ |
| [Snowball Stemming Algorithms (libstemmer)] | BSD-3-Clause | 7b264ffa0f767c579d052fd8142558dc8264d795 | ✗ | ✗ | | [Snowball Stemming Algorithms (libstemmer)] | BSD-3-Clause | 1.0.0 | ✗ | ✗ |
| [tcmalloc] | Apache-2.0 | 093ba93c1bd6dca03b0a8334f06d01b019244291 | | ✗ | | [tcmalloc] | Apache-2.0 | f3b20f9a07e175c5d897df7b49d9830d4efa6110 | | ✗ |
| [timelib] | MIT | 2022.13 | | ✗ | | [timelib] | MIT | 2022.13 | | ✗ |
| [Unicode Character Database] | Unicode-DFS-2016 | 8.0.0 | ✗ | ✗ | | [Unicode Character Database] | Unicode-DFS-2016 | 8.0.0 | ✗ | ✗ |
| [valgrind.h] | BSD-4-Clause | 3.17.0 | | ✗ | | [valgrind.h] | BSD-4-Clause | 093bef43d69236287ccc748591c9560a71181b0a | | ✗ |
| [WiredTiger] | GPL-2.0-only OR GPL-3.0-only | mongodb-master | ✗ | ✗ | | [WiredTiger] | GPL-2.0-only OR GPL-3.0-only | 8.2.0-alpha2 | ✗ | ✗ |
| [yaml-cpp] | MIT | 0.6.3 | | ✗ | | [yaml-cpp] | MIT | 0.6.3 | | ✗ |
| [zlib] | Zlib | 1.3.1 | ✗ | ✗ | | [zlib] | Zlib | 1.3.1 | ✗ | ✗ |
| [Zstandard (zstd)] | BSD-3-Clause OR GPL-2.0-only | 1.5.5 | ✗ | ✗ | | [Zstandard (zstd)] | BSD-3-Clause OR GPL-2.0-only | 1.5.5 | ✗ | ✗ |
[AWS SDK for C++]: https://github.com/aws/aws-sdk-cpp [AWS SDK for C++]: https://github.com/aws/aws-sdk-cpp.git
[Abseil Common Libraries (C++)]: https://github.com/abseil/abseil-cpp [Abseil Common Libraries (C++)]: https://github.com/abseil/abseil-cpp.git
[Apache Avro C++]: https://avro.apache.org/ [Apache Avro C++]: https://github.com/apache/avro.git
[Asio C++ Library]: https://github.com/chriskohlhoff/asio [Asio C++ Library]: https://github.com/chriskohlhoff/asio.git
[Boost C++ Libraries]: http://www.boost.org/ [Boost C++ Libraries]: https://github.com/boostorg/boost.git
[CRoaring]: https://github.com/RoaringBitmap/CRoaring [CRoaring]: https://github.com/roaringbitmap/croaring.git
[Cyrus SASL]: https://www.cyrusimap.org/sasl/ [Cyrus SASL]: https://github.com/cyrusimap/cyrus-sasl.git
[Intel® Decimal Floating-Point Math Library]: https://software.intel.com/en-us/articles/intel-decimal-floating-point-math-library [ICU4C - International Components for Unicode C/C++]: https://github.com/unicode-org/icu.git
[International Components for Unicode C/C++ (ICU4C)]: http://site.icu-project.org/download/ [Intel® Decimal Floating-Point Math Library]: https://www.netlib.org/misc/intel/
[JSON Schema Store]: https://www.schemastore.org/json/ [JSON Schema Store]: https://github.com/schemastore/schemastore.git
[JSON-Schema-Test-Suite]: https://github.com/json-schema-org/JSON-Schema-Test-Suite [JSON-Schema-Test-Suite]: https://github.com/json-schema-org/JSON-Schema-Test-Suite.git
[LibTomCrypt]: https://github.com/libtom/libtomcrypt/releases [LibTomCrypt]: https://github.com/libtom/libtomcrypt.git
[MongoDB C Driver]: https://github.com/mongodb/mongo-c-driver [MongoDB C Driver]: https://github.com/mongodb/mongo-c-driver.git
[Mozilla Firefox ESR]: https://www.mozilla.org/en-US/security/known-vulnerabilities/firefox-esr [Mozilla Firefox ESR]: https://github.com/mozilla-firefox/firefox.git
[MurmurHash3]: https://github.com/aappleby/smhasher/blob/a6bd3ce/ [MurmurHash3]: https://github.com/aappleby/smhasher/blob/a6bd3ce/
[PCRE2 - Perl-Compatible Regular Expressions]: http://www.pcre.org/ [PCRE2 - Perl-Compatible Regular Expressions]: https://github.com/pcre2project/pcre2.git
[Protobuf]: https://github.com/protocolbuffers/protobuf [Protobuf]: https://github.com/protocolbuffers/protobuf.git
[S2 Geometry Library]: https://github.com/google/s2geometry [S2 Geometry Library]: https://github.com/google/s2geometry.git
[SafeInt]: https://github.com/dcleblanc/SafeInt [SafeInt]: https://github.com/dcleblanc/safeint.git
[Snowball Stemming Algorithms (libstemmer)]: https://github.com/snowballstem/snowball [Snowball Stemming Algorithms (libstemmer)]: http://github.com/snowballstem/snowball.git
[Unicode Character Database]: http://www.unicode.org/versions/enumeratedversions.html [Unicode Character Database]: http://www.unicode.org/versions/enumeratedversions.html
[WiredTiger]: https://source.wiredtiger.com/ [WiredTiger]: https://github.com/wiredtiger/wiredtiger.git
[Zstandard (zstd)]: https://github.com/facebook/zstd [Zstandard (zstd)]: https://github.com/facebook/zstd.git
[benchmark]: https://github.com/google/benchmark [benchmark]: https://github.com/google/benchmark.git
[c-ares]: https://c-ares.org/ [c-ares]: https://github.com/c-ares/c-ares.git
[fmt]: http://fmtlib.net/ [cpptrace]: https://github.com/jeremy-rifkin/cpptrace.git
[gRPC (C++)]: https://github.com/grpc/grpc [fmt]: https://github.com/fmtlib/fmt.git
[github.com/facebook/folly]: https://github.com/facebook/folly [gRPC (C++)]: https://github.com/grpc/grpc.git
[googletest]: https://github.com/google/googletest [github.com/facebook/folly]: https://github.com/facebook/folly.git
[gperftools]: https://github.com/gperftools/gperftools [googletest]: https://github.com/google/googletest.git
[immer]: https://github.com/arximboldi/immer [gperftools]: https://github.com/gperftools/gperftools.git
[libdwarf]: https://github.com/davea42/libdwarf-code [immer]: https://github.com/arximboldi/immer.git
[libmongocrypt]: https://github.com/mongodb/libmongocrypt [libdwarf]: https://github.com/davea42/libdwarf-code.git
[librdkafka - The Apache Kafka C/C++ library]: https://github.com/confluentinc/librdkafka [libmongocrypt]: https://github.com/mongodb/libmongocrypt.git
[libunwind]: http://www.github.com/libunwind/libunwind [librdkafka - The Apache Kafka C/C++ library]: https://github.com/confluentinc/librdkafka.git
[libunwind]: https://github.com/libunwind/libunwind.git
[linenoise]: https://github.com/antirez/linenoise [linenoise]: https://github.com/antirez/linenoise
[nlohmann/json]: https://github.com/nlohmann/json [nlohmann/json]: https://github.com/nlohmann/json.git
[nlohmann/json]: https://github.com/open-telemetry/opentelemetry-proto [node]: https://nodejs.org/
[node]: https://nodejs.org/en/blog/release [opentelemetry-cpp]: https://github.com/open-telemetry/opentelemetry-cpp.git
[opentelemetry-cpp]: https://github.com/open-telemetry/opentelemetry-cpp/
[opentelemetry-proto]: https://github.com/open-telemetry/opentelemetry-proto [opentelemetry-proto]: https://github.com/open-telemetry/opentelemetry-proto
[pypi/asn1crypto]: https://pypi.org/project/asn1crypto/
[pypi/bottle]: https://bottlepy.org/docs/dev/
[pypi/concurrencytest]: https://pypi.org/project/concurrencytest/
[pypi/discover]: https://pypi.org/project/discover/
[pypi/extras]: https://github.com/testing-cabal/extras
[pypi/iso8601]: https://pypi.org/project/iso8601/
[pypi/ocspbuilder]: https://github.com/wbond/ocspbuilder [pypi/ocspbuilder]: https://github.com/wbond/ocspbuilder
[pypi/ocspresponder]: https://github.com/threema-ch/ocspresponder [pypi/ocspresponder]: https://github.com/threema-ch/ocspresponder
[pypi/oscrypto]: https://pypi.org/project/oscrypto/ [re2]: https://github.com/google/re2.git
[pypi/python-subunit]: https://github.com/testing-cabal/subunit [snappy]: https://github.com/google/tcmalloc.git
[pypi/testscenarios]: https://pypi.org/project/testscenarios/ [tcmalloc]: https://github.com/google/tcmalloc.git
[pypi/testtools]: https://github.com/testing-cabal/testtools [timelib]: https://github.com/derickr/timelib.git
[re2]: https://github.com/google/re2 [valgrind.h]: https://sourceware.org/git/valgrind.git
[snappy]: https://github.com/google/snappy/releases [yaml-cpp]: https://github.com/jbeder/yaml-cpp.git
[tcmalloc]: https://github.com/google/tcmalloc [zlib]: https://zlib.net/fossils/
[timelib]: https://github.com/derickr/timelib
[valgrind.h]: http://valgrind.org/downloads/current.html
[yaml-cpp]: https://github.com/jbeder/yaml-cpp/releases
[zlib]: https://zlib.net/
## WiredTiger Vendored Test Libraries
The following libraries are transitively included by WiredTiger,
and are used by that component for testing. They don't appear in
released binary artifacts.
| Name |
| -------------------------- |
| nlohmann/json@3.10.5 |
| pypi/concurrencytest@0.1.2 |
| pypi/discover@0.4.0 |
| pypi/extras@0.0.3 |
| pypi/iso8601@2.1.0 |
| pypi/python-subunit@1.4.4 |
| pypi/testscenarios@0.4 |
| pypi/testtools@2.7.1 |
## Dynamically Linked Libraries ## Dynamically Linked Libraries

View File

@ -0,0 +1,29 @@
load("@rules_python//python:defs.bzl", "py_binary", "py_library")
py_library(
name = "config",
srcs = ["config.py"],
visibility = ["//visibility:public"],
)
py_library(
name = "endorctl_utils",
srcs = ["endorctl_utils.py"],
visibility = ["//visibility:public"],
)
py_binary(
name = "generate_sbom",
srcs = ["generate_sbom.py"],
visibility = ["//visibility:public"],
deps = [
"config",
"endorctl_utils",
],
)
py_binary(
name = "sbom_files_pr",
srcs = ["sbom_files_pr.py"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,5 @@
version: 2.0.0
filters:
- "*":
approvers:
- 10gen/code-review-team-ssdlc

225
buildscripts/sbom/config.py Normal file
View File

@ -0,0 +1,225 @@
#!/usr/bin/env python3
"""generate_sbom.py config. Operational configuration values stored separately from the core code."""
import json
import logging
import re
logger = logging.getLogger("generate_sbom")
logger.setLevel(logging.NOTSET)
# ################ Component Filters ################
# List of Endor Labs SBOM components that must be removed before processing
endor_components_remove = []
# bom-ref prefixes (Endor Labs has been changing them, so add all that we have seen)
prefixes = [
"pkg:c/github.com/",
"pkg:generic/github.com/",
"pkg:github/",
]
components_remove = [
# Endor Labs includes the main component in 'components'. This is not standard, so we remove it.
"10gen/mongo",
# should be pkg:github/antirez/linenoise - waiting on Endor Labs fix
"amokhuginnsson/replxx",
# a transitive dependency of s2 that is not necessary to include
"sparsehash/sparsehash",
]
for component in components_remove:
for prefix in prefixes:
endor_components_remove.append(prefix + component)
# ################ Component Renaming ################
# Endor does not have syntactically valid PURLs for C/C++ packages.
# e.g.,
# Invalid: pkg:c/github.com/abseil/abseil-cpp@20250512.1
# Valid: pkg:github/abseil/abseil-cpp@20250512.1
# Run string replacements to correct for this:
endor_components_rename = [
["pkg:c/sourceware.org/git/valgrind", "pkg:generic/valgrind/valgrind"],
["pkg:generic/sourceware.org/git/valgrind", "pkg:generic/valgrind/valgrind"],
["pkg:generic/zlib.net/zlib", "pkg:github/madler/zlib"],
["pkg:generic/tartarus.org/libstemmer", "pkg:github/snowballstem/snowball"],
["pkg:generic/intel.com/intel-dfp-math", "pkg:generic/intel/IntelRDFPMathLib"],
["pkg:c/git.openldap.org/openldap/openldap", "pkg:generic/openldap/openldap"],
["pkg:generic/github.com/", "pkg:github/"],
["pkg:c/github.com/", "pkg:github/"],
]
# ################ PURL Validation ################
REGEX_STR_PURL_OPTIONAL = ( # Optional Version (any chars except ? @ #)
r"(?:@[^?@#]*)?"
# Optional Qualifiers (any chars except @ #)
r"(?:\?[^@#]*)?"
# Optional Subpath (any chars)
r"(?:#.*)?$"
)
REGEX_PURL = {
# deb PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/deb-definition.md
"deb": re.compile(
r"^pkg:deb/" # Scheme and type
# Namespace (organization/user), letters must be lowercase
r"(debian|ubuntu)+"
r"/"
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name
),
# Generic PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/generic-definition.md
"generic": re.compile(
r"^pkg:generic/" # Scheme and type
r"([a-zA-Z0-9._-]+/)?" # Optional namespace segment
r"[a-zA-Z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (required)
),
# GitHub PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/github-definition.md
"github": re.compile(
r"^pkg:github/" # Scheme and type
# Namespace (organization/user), letters must be lowercase
r"[a-z0-9-]+"
r"/"
r"[a-z0-9._-]+" + REGEX_STR_PURL_OPTIONAL # Name (repository)
),
# PyPI PURL. https://github.com/package-url/purl-spec/blob/main/types-doc/pypi-definition.md
"pypi": re.compile(
r"^pkg:pypi/" # Scheme and type
r"[a-z0-9_-]+" # Name, letters must be lowercase, dashes, underscore
+ REGEX_STR_PURL_OPTIONAL
),
}
def is_valid_purl(purl: str) -> bool:
"""Validate a GitHub or Generic PURL"""
for purl_type, regex in REGEX_PURL.items():
if regex.match(purl):
logger.debug(f"PURL: {purl} matched PURL type '{purl_type}' regex '{regex.pattern}'")
return True
return False
# ################ Version Transformation ################
# In some cases we need to transform the version string to strip out tag-related text
# It is unknown what patterns may appear in the future, so we have targeted (not broad) regex
# This a list of 'pattern' and 'repl' inputs to re.sub()
RE_VER_NUM = r"(0|[1-9]\d*)"
RE_VER_LBL = r"(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
RE_SEMVER = rf"{RE_VER_NUM}\.{RE_VER_NUM}\.{RE_VER_NUM}{RE_VER_LBL}"
regex_semver = re.compile(RE_SEMVER)
VERSION_PATTERN_REPL = [
# 'debian/1.28.1-1' pkg:github/mongodb/mongo-c-driver (temporary workaround)
[re.compile(rf"^debian/({RE_SEMVER})-\d$"), r"\1"],
# 'gperftools-2.9.1' pkg:github/gperftools/gperftools
# 'mongo/v1.5.2' pkg:github/google/benchmark
# 'mongodb-8.2.0-alpha2' pkg:github/wiredtiger/wiredtiger
# 'release-1.12.0' pkg:github/apache/avro
# 'yaml-cpp-0.6.3' pkg:github/jbeder/yaml-cpp
[re.compile(rf"^[-a-z]+[-/][vr]?({RE_SEMVER})$"), r"\1"],
# 'asio-1-34-2' pkg:github/chriskohlhoff/asio
# 'cares-1_27_0' pkg:github/c-ares/c-ares
[
re.compile(rf"^[a-z]+-{RE_VER_NUM}[_-]{RE_VER_NUM}[_-]{RE_VER_NUM}{RE_VER_LBL}$"),
r"\1.\2.\3",
],
# 'pcre2-10.40' pkg:github/pcre2project/pcre2
[re.compile(rf"^[a-z0-9]+-({RE_VER_NUM}\.{RE_VER_NUM})$"), r"\1"],
# 'icu-release-57-1' pkg:github/unicode-org/icu
[re.compile(rf"^[a-z]+-?[a-z]+-{RE_VER_NUM}-{RE_VER_NUM}$"), r"\1.\2"],
# 'v2.6.0' pkg:github/confluentinc/librdkafka
# 'r2.5.1'
[re.compile(rf"^[rv]({RE_SEMVER})$"), r"\1"],
# 'v2025.04.21.00' pkg:github/facebook/folly
[re.compile(r"^v(\d+\.\d+\.\d+\.\d+)$"), r"\1"],
]
def get_semver_from_release_version(release_ver: str) -> str:
"""Extract the version number from string with tags or other annotations"""
if release_ver:
for re_obj, repl in VERSION_PATTERN_REPL:
if re_obj.match(release_ver):
return re_obj.sub(repl, release_ver)
return release_ver
# region special component use-case functions
def get_version_from_wiredtiger_import_data(file_path: str) -> str:
"""Get the info in the 'import.data' file saved in the wiredtiger folder"""
try:
with open(file_path, "r") as input_json:
import_data = input_json.read()
result = json.loads(import_data)
except Exception as e:
logger.error(f"Error loading JSON file from {file_path}")
logger.error(e)
return None
return result.get("commit")
def get_version_sasl_from_workspace(file_path: str) -> str:
"""Determine the version that is pulled for Windows Cyrus SASL by searching WORKSPACE.bazel"""
# e.g.,
# "https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-2.1.28.zip",
try:
with open(file_path, "r") as file:
for line in file:
if line.strip().startswith(
'"https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-'
):
return line.strip().split("windows_cyrus_sasl-")[1].split(".zip")[0]
except Exception as e:
logger.warning(f"Unable to load {file_path}")
logger.warning(e)
else:
return None
def process_component_special_cases(
component_key: str, component: dict, versions: dict, repo_root: str
) -> None:
## Special case for Cyrus SASL ##
if component_key == "pkg:github/cyrusimap/cyrus-sasl":
# Cycrus SASL is optionally loaded as a Windows library, when needed. There is no source code for Endor Labs to scan.
# The version of Cyrus SASL that is used is defined in the WORKSPACE.bazel file:
# "https://s3.amazonaws.com/boxes.10gen.com/build/windows_cyrus_sasl-2.1.28.zip",
# Rather than add the complexity of Bazel queries to this script, we just search the text.
versions["import_script"] = get_version_sasl_from_workspace(repo_root + "/WORKSPACE.bazel")
logger.info(
f"VERSION SPECIAL CASE: {component_key}: Found version '{versions['import_script']}' in 'WORKSPACE.bazel' file"
)
## Special case for wiredtiger ##
elif component_key == "pkg:github/wiredtiger/wiredtiger":
# MongoDB release branches import wiredtiger commits via a bot. These commits will likely not line up with a release or tag.
# Endor labs will try to pull the nearest release/tag, but we want the more precise commit hash, which is stored in:
# src/third_party/wiredtiget/import.data
occurrences = component.get("evidence", {}).get("occurrences", [])
if occurrences:
location = occurrences[0].get("location")
versions["import_script"] = get_version_from_wiredtiger_import_data(
f"{repo_root}/{location}/import.data"
)
logger.info(
f"VERSION SPECIAL CASE: {component_key}: Found version '{versions['import_script']}' in 'import.data' file"
)
## Special case for opentelemetry-cpp ##
elif component_key == "pkg:github/open-telemetry/opentelemetry-cpp":
# The opentelementry import script has the mongodb-forks version ref in Major.Minor format (e.g., 1.17), which deviates from
# what the open-telemetry/opentelemetry-cpp project uses (v{SEMVER}). This corrects the version string by adding a '.0', if needed
if re.match(rf"^{RE_VER_NUM}\.{RE_VER_NUM}$", versions["import_script"]):
versions["import_script"] += ".0"
logger.info(
f"VERSION SPECIAL CASE: {component_key}: Adjusted import script version string to semver format: '{versions['import_script']}'"
)
# endregion special component use-case functions

View File

@ -0,0 +1,486 @@
#!/usr/bin/env python3
"""
Utility functions for the Endor Labs API via endorctl
"""
import json
import logging
import subprocess
import time
from datetime import datetime
from enum import Enum
logger = logging.getLogger("generate_sbom")
logger.setLevel(logging.NOTSET)
default_field_masks = {
"PackageVersion": [
"context",
"meta",
"processing_status",
"spec.package_name",
"spec.resolved_dependencies.dependencies",
"spec.source_code_reference",
],
"ScanResult": [
"context",
"meta",
"spec.end_time",
"spec.logs",
"spec.refs",
"spec.start_time",
"spec.status",
"spec.versions",
],
}
def _get_default_field_mask(kind):
default_field_mask = default_field_masks.get(kind, [])
return ",".join(default_field_mask)
class EndorResourceKind(Enum):
"""Enumeration for Endor Labs API resource kinds"""
PROJECT = "Project"
REPOSITORY_VERSION = "RepositoryVersion"
SCAN_RESULT = "ScanResult"
PACKAGE_VERSION = "PackageVersion"
class EndorContextType(Enum):
"""Most objects include a common nested object called Context. Contexts keep objects from different scans separated.
https://docs.endorlabs.com/rest-api/using-the-rest-api/data-model/common-fields/#context"""
# Objects from a scan of the default branch. All objects in the OSS namespace are in the main context. The context ID is always default.
MAIN = "CONTEXT_TYPE_MAIN"
# Objects from a scan of a specific branch. The context ID is the branch reference name.
REF = "CONTEXT_TYPE_REF"
# Objects from a PR scan. The context ID is the PR UUID. Objects in this context are deleted after 30 days.
CI_RUN = "CONTEXT_TYPE_CI_RUN"
class EndorFilter:
"""Provide standard filters for Endor Labs API resource kinds"""
def __init__(self, context_id=None, context_type=None):
self.context_id = context_id
self.context_type = context_type
def _base_filters(self):
base_filters = []
if self.context_id:
base_filters.append(f"context.id=={self.context_id}")
if self.context_type:
base_filters.append(f"context.type=={self.context_type}")
return base_filters
def repository_version(self, project_uuid=None, sha=None, ref=None):
filters = self._base_filters()
if project_uuid:
filters.append(f"meta.parent_uuid=={project_uuid}")
if sha:
filters.append(f"spec.version.sha=={sha}")
if ref:
filters.append(f"spec.version.ref=={ref}")
return " and ".join(filters)
def package_version(
self,
context_type: EndorContextType = None,
context_id=None,
project_uuid=None,
name=None,
package_name=None,
):
filters = self._base_filters()
if context_type:
filters.append(f"context.type=={context_type.value}")
if context_type:
filters.append(f"context.id=={context_id}")
if project_uuid:
filters.append(f"spec.project_uuid=={project_uuid}")
if name:
filters.append(f"spec.package_name=={name}")
if package_name:
filters.append(f"meta.name=={package_name}")
return " and ".join(filters)
def scan_result(
self,
context_type: EndorContextType = None,
project_uuid=None,
ref=None,
sha=None,
status=None,
):
filters = self._base_filters()
if context_type:
filters.append(f"context.type=={context_type.value}")
if project_uuid:
filters.append(f"meta.parent_uuid=={project_uuid}")
if ref:
filters.append(f"spec.versions.ref contains '{ref}'")
if sha:
filters.append(f"spec.versions.sha contains '{sha}'")
if status:
filters.append(f"spec.status=={status}")
return " and ".join(filters)
class EndorCtl:
"""Interact with endorctl (Endor Labs CLI)"""
# region internal functions
def __init__(
self,
namespace,
retry_limit=5,
sleep_duration=30,
endorctl_path="endorctl",
config_path=None,
):
self.namespace = namespace
self.retry_limit = retry_limit
self.sleep_duration = sleep_duration
self.endorctl_path = endorctl_path
self.config_path = config_path
def _call_endorctl(self, command, subcommand, **kwargs):
"""https://docs.endorlabs.com/endorctl/"""
try:
command = [self.endorctl_path, command, subcommand, f"--namespace={self.namespace}"]
if self.config_path:
command.append(f"--config-path={self.config_path}")
# parse args into flags
for key, value in kwargs.items():
# Handle endorctl flags with hyphens that are defined in the script with underscores
flag = key.replace("_", "-")
if value:
command.append(f"--{flag}={value}")
logger.info("Running: %s", " ".join(command))
result = subprocess.run(command, capture_output=True, text=True, check=True)
resource = json.loads(result.stdout)
except subprocess.CalledProcessError as e:
logger.error(f"Error executing command: {e}")
logger.error(e.stderr)
except json.JSONDecodeError as e:
logger.error(f"Error decoding JSON: {e}")
logger.error(f"Stdout: {result.stdout}")
except FileNotFoundError as e:
logger.error(f"FileNotFoundError: {e}")
logger.error(
f"'endorctl' not found in path '{self.endorctl_path}'. Supply the correct path, run 'buildscripts/install_endorctl.sh' or visit https://docs.endorlabs.com/endorctl/install-and-configure/"
)
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")
else:
return resource
def _api_get(self, resource, **kwargs):
"""https://docs.endorlabs.com/endorctl/commands/api/"""
return self._call_endorctl("api", "get", resource=resource, **kwargs)
def _api_list(self, resource, filter=None, retry=True, **kwargs):
"""https://docs.endorlabs.com/endorctl/commands/api/"""
# If this script is run immediately after making a commit, Endor Labs will likely not yet have created the assocaited ScanResult object. The wait/retry logic below handles this scenario.
tries = 0
while True:
tries += 1
result = self._call_endorctl("api", "list", resource=resource, filter=filter, **kwargs)
# The expected output of 'endorctl api list' is: { "list": { "objects": [...] } }
# We want to just return the objects. In case we get an empty list, return a list
# with a single None to avoid having to handle index errors downstream.
if result and result["list"].get("objects") and len(result["list"]["objects"]) > 0:
return result["list"]["objects"]
elif retry:
logger.info(
f"API LIST: Resource not found: {resource} with filter '{filter}' in namespace '{self.namespace}'"
)
if tries <= self.retry_limit:
logger.info(
f"API LIST: Waiting for {self.sleep_duration} seconds before retry attempt {tries} of {self.retry_limit}"
)
time.sleep(self.sleep_duration)
else:
logger.warning(
f"API LIST: Maximum number of allowed retries {self.retry_limit} attempted with no {resource} found using filter '{filter}'"
)
return [None]
else:
return [None]
def _check_resource(self, resource, resource_description) -> None:
if not resource:
raise LookupError(f"Resource not found: {resource_description}")
logger.info(f"Retrieved: {resource_description}")
# endregion internal functions
# region resource functions
def get_resource(self, resource, uuid=None, name=None, field_mask=None, **kwargs):
"""https://docs.endorlabs.com/rest-api/using-the-rest-api/data-model/resource-kinds/"""
if not field_mask:
field_mask = _get_default_field_mask(resource)
return self._api_get(
resource=resource, uuid=uuid, name=name, field_mask=field_mask, **kwargs
)
def get_resources(
self,
resource,
filter=None,
field_mask=None,
sort_path="meta.create_time",
sort_order="descending",
retry=True,
**kwargs,
):
"""https://docs.endorlabs.com/rest-api/using-the-rest-api/data-model/resource-kinds/"""
if not field_mask:
field_mask = _get_default_field_mask(resource)
return self._api_list(
resource=resource,
filter=filter,
field_mask=field_mask,
sort_path=sort_path,
sort_order=sort_order,
retry=retry,
**kwargs,
)
def get_project(self, git_url):
resource_kind = EndorResourceKind.PROJECT.value
resource_description = (
f"{resource_kind} with name '{git_url}' in namespace '{self.namespace}'"
)
project = self.get_resource(resource_kind, name=git_url)
self._check_resource(project, resource_description)
return project
def get_repository_version(self, filter=None, retry=True):
resource_kind = EndorResourceKind.REPOSITORY_VERSION.value
resource_description = (
f"{resource_kind} with filter '{filter}' in namespace '{self.namespace}'"
)
repository_version = self.get_resources(
resource_kind, filter=filter, retry=retry, page_size=1
)[0]
self._check_resource(repository_version, resource_description)
return repository_version
def get_scan_result(self, filter=None, retry=True):
resource_kind = EndorResourceKind.SCAN_RESULT.value
resource_description = (
f"{resource_kind} with filter '{filter}' in namespace '{self.namespace}'"
)
scan_result = self.get_resources(resource_kind, filter=filter, retry=retry, page_size=1)[0]
self._check_resource(scan_result, resource_description)
uuid = scan_result.get("uuid")
start_time = scan_result["spec"].get("start_time")
refs = scan_result["spec"].get("refs")
polling_start_time = datetime.now()
while True:
status = scan_result["spec"].get("status")
end_time = scan_result["spec"].get("end_time")
if status == "STATUS_SUCCESS":
logger.info(
f" Scan completed successfully. ScanResult uuid {uuid} for refs {refs} started at {start_time}, ended at {end_time}."
)
return scan_result
elif status == "STATUS_RUNNING":
logger.info(
f" Scan is running. ScanResult uuid {uuid} for refs {refs} started at {start_time}."
)
logger.info(
f" Waiting {self.sleep_duration} seconds before checking status. Total wait time: {(datetime.now() - polling_start_time).total_seconds()/60:.2f} minutes"
)
time.sleep(self.sleep_duration)
scan_result = self.get_resources(
resource_kind, filter=filter, retry=retry, page_size=1
)[0]
elif status == "STATUS_PARTIAL_SUCCESS":
scan_logs = scan_result["spec"].get("logs")
raise RuntimeError(
f" Scan completed, but with critical warnings or errors. ScanResult uuid {uuid} for refs {refs} started at {start_time}, ended at {end_time}. Scan logs: {scan_logs}"
)
elif status == "STATUS_FAILURE":
scan_logs = scan_result["spec"].get("logs")
raise RuntimeError(
f" Scan failed. ScanResult uuid {uuid} for refs {refs} started at {start_time}, ended at {end_time}. Scan logs: {scan_logs}"
)
def get_package_versions(self, filter):
resource_kind = EndorResourceKind.PACKAGE_VERSION.value
resource_description = (
f"{resource_kind} with filter '{filter}' in namespace '{self.namespace}'"
)
package_versions = self.get_resources(resource_kind, filter=filter)
self._check_resource(package_versions, resource_description)
return package_versions
def export_sbom(
self,
package_version_uuid=None,
package_version_uuids=None,
package_version_name=None,
app_name=None,
project_name=None,
project_uuid=None,
):
"""Export an SBOM from Endor Labs
Valid parameter sets (other combinations result in an error from 'endorctl'):
Single-Package SBOM:
package_version_uuid
package_version_name
Multi-Package SBOM:
package_version_uuids,app_name
project_uuid,app_name,app_name
project_name,app_name,app_name
https://docs.endorlabs.com/endorctl/commands/sbom/export/
"""
if package_version_uuids:
package_version_uuids = ",".join(package_version_uuids)
return self._call_endorctl(
"sbom",
"export",
package_version_uuid=package_version_uuid,
package_version_uuids=package_version_uuids,
package_version_name=package_version_name,
app_name=app_name,
project_name=project_name,
project_uuid=project_uuid,
)
# endregion resource functions
# region workflow functions
def get_sbom_for_commit(self, git_url: str, commit_sha: str) -> dict:
"""Export SBOM for the PR commit (sha)"""
endor_filter = EndorFilter()
try:
# Project: get uuid
project = self.get_project(git_url)
project_uuid = project["uuid"]
app_name = project["spec"]["git"]["full_name"]
# RepositoryVersion: get the context for the PR scan
endor_filter.context_type = EndorContextType.CI_RUN.value
filter_str = endor_filter.repository_version(project_uuid, commit_sha)
repository_version = self.get_repository_version(filter_str)
context_id = repository_version["context"]["id"]
# ScanResult: wait for a completed scan
endor_filter.context_id = context_id
filter_str = endor_filter.scan_result(project_uuid)
self.get_scan_result(filter_str)
# PackageVersions: get package versions for SBOM
filter_str = endor_filter.package_version(project_uuid)
package_versions = self.get_package_versions(filter_str)
package_version_uuids = [
package_version["uuid"] for package_version in package_versions
]
package_version_names = [
package_version["meta"]["name"] for package_version in package_versions
]
# Export SBOM
sbom = self.export_sbom(package_version_uuids=package_version_uuids, app_name=app_name)
print(
f"Retrieved: CycloneDX SBOM for PackageVersion(s), name: {package_version_names}, uuid: {package_version_uuids}"
)
return sbom
except Exception as e:
print(f"Exception: {e}")
return
def get_sbom_for_branch(self, git_url: str, branch: str) -> dict:
"""Export lastest SBOM for a monitored branch/ref"""
endor_filter = EndorFilter()
try:
# Project: get uuid
project = self.get_project(git_url)
project_uuid = project["uuid"]
app_name = project["spec"]["git"]["full_name"]
# RepositoryVersion: get the context for the latest branch scan
filter_str = endor_filter.repository_version(project_uuid, ref=branch)
repository_version = self.get_repository_version(filter_str)
repository_version_uuid = repository_version["uuid"]
repository_version_ref = repository_version["spec"]["version"]["ref"]
repository_version_sha = repository_version["spec"]["version"]["sha"]
repository_version_scan_object_status = repository_version["scan_object"]["status"]
if repository_version_scan_object_status != "STATUS_SCANNED":
logger.warning(
f"RepositoryVersion (uuid: {repository_version_uuid}, ref: {repository_version_ref}, sha: {repository_version_sha}) scan status is '{repository_version_scan_object_status}' (expected 'STATUS_SCANNED')"
)
# ScanResult: search for a completed scan
filter_str = endor_filter.scan_result(
EndorContextType.MAIN, project_uuid, repository_version_ref, repository_version_sha
)
scan_result = self.get_scan_result(filter_str, retry=False)
project_uuid = scan_result["meta"]["parent_uuid"]
# PackageVersions: get package versions for SBOM
if branch == "master":
context_type = EndorContextType.MAIN
context_id = "default"
else:
context_type = EndorContextType.REF
context_id = branch
filter_str = endor_filter.package_version(context_type, context_id, project_uuid)
package_version = self.get_package_versions(filter_str)[0]
package_version_name = package_version["meta"]["name"]
package_version_uuid = package_version["uuid"]
# Export SBOM
sbom = self.export_sbom(package_version_uuid=package_version_uuid, app_name=app_name)
logger.info(
f"SBOM: Retrieved CycloneDX SBOM for PackageVersion, name: {package_version_name}, uuid {package_version_uuid}"
)
return sbom
except Exception as e:
print(f"Exception: {e}")
return
def get_sbom_for_project(self, git_url: str) -> dict:
"""Export latest SBOM for EndorCtl project default branch"""
try:
# Project: get uuid
project = self.get_project(git_url)
project_uuid = project["uuid"]
app_name = project["spec"]["git"]["full_name"]
# Export SBOM
sbom = self.export_sbom(project_uuid=project_uuid, app_name=app_name)
logger.info(f"Retrieved: CycloneDX SBOM for Project {app_name}")
return sbom
except Exception as e:
print(f"Exception: {e}")
return
# endregion workflow functions

View File

@ -0,0 +1,849 @@
#!/usr/bin/env python3
"""
Generate a CycloneDX SBOM using scan results from Endor Labs.
Schema validation of output is not performed.
Use 'buildscripts/sbom_linter.py' for validation.
Invoke with ---help or -h for help message.
"""
import argparse
import json
import logging
import os
import re
import subprocess
import sys
import urllib.parse
import uuid
from datetime import datetime, timezone
from pathlib import Path
from config import (
endor_components_remove,
endor_components_rename,
get_semver_from_release_version,
is_valid_purl,
process_component_special_cases,
)
from endorctl_utils import EndorCtl
from git import Commit, Repo
# region init
class WarningListHandler(logging.Handler):
"""Collect warnings"""
def __init__(self):
super().__init__()
self.warnings = []
def emit(self, record):
if record.levelno >= logging.WARNING:
self.warnings.append(record)
logging.basicConfig(stream=sys.stdout)
logger = logging.getLogger("generate_sbom")
logger.setLevel(logging.INFO)
# Create an instance of the custom handler
warning_handler = WarningListHandler()
# Add the handler to the logger
logger.addHandler(warning_handler)
# Get the absolute path of the script file and directory
script_path = Path(__file__).resolve()
script_directory = script_path.parent
# Regex for validation
REGEX_COMMIT_SHA = r"^[0-9a-fA-F]{40}$"
REGEX_GIT_BRANCH = r"^[a-zA-Z0-9_.\-/]+$"
REGEX_GITHUB_URL = r"^(https://github.com/)([a-zA-Z0-9-]{1,39}/[a-zA-Z0-9-_.]{1,100})(\.git)$"
REGEX_RELEASE_BRANCH = r"^v\d\.\d$"
REGEX_RELEASE_TAG = r"^r\d\.\d.\d(-\w*)?$"
# endregion init
# region functions and classes
class GitInfo:
"""Get, set, format git info"""
def __init__(self):
print_banner("Gathering git info")
try:
self.repo_root = Path(
subprocess.run(
"git rev-parse --show-toplevel", shell=True, text=True, capture_output=True
).stdout.strip()
)
self._repo = Repo(self.repo_root)
except Exception as e:
logger.warning(
"Unable to read git repo information. All necessary script arguments must be provided."
)
logger.warning(e)
self._repo = None
else:
try:
self.project = self._repo.remotes.origin.config_reader.get("url")
if not self.project.endswith(".git"):
self.project += ".git"
org_repo = extract_repo_from_git_url(self.project)
self.org = org_repo["org"]
self.repo = org_repo["repo"]
self.commit = self._repo.head.commit.hexsha
self.branch = self._repo.active_branch.name
# filter tags for latest release e.g., r8.2.1
release_tags = []
filtered_tags = [
tag for tag in self._repo.tags if re.fullmatch(REGEX_RELEASE_TAG, tag.name)
]
logging.info(f"GIT: Parsing {len(filtered_tags)} release tags for match to commit")
for tag in filtered_tags:
if tag.commit == self.commit:
release_tags.append(tag.name)
if len(release_tags) > 0:
self.release_tag = release_tags[-1]
else:
self.release_tag = None
logging.debug(f"GitInfo->release_tag(): {self.release_tag}")
logging.debug(f"GitInfo->__init__: {self}")
except Exception as e:
logger.warning("Unable to fully parse git info.")
logger.warning(e)
def close(self):
"""Closes the underlying Git repo object to release resources."""
if self._repo:
logger.debug("Closing Git repo object.")
self._repo.close()
self._repo = None
def added_new_3p_folder(self, commit: Commit) -> bool:
"""
Checks if a given commit added a new third-party subfolder.
Args:
commit: The GitPython Commit object to analyze.
Returns:
True if the commit added a new subfolder, False otherwise.
"""
if not commit.parents:
# If it's the initial commit, all folders are "new"
# You might want to refine this logic based on your definition of "new"
# Check if there are any subfolders in the initial commit
return bool(commit.tree.trees)
parent_commit = commit.parents[0]
diff_index = commit.diff(parent_commit)
for diff in diff_index:
# Check for added items that are directories
if diff.change_type == "A" and diff.b_is_dir:
return True
return False
def print_banner(text: str) -> None:
"""print() a padded status message to stdout"""
print()
print(text.center(len(text) + 2, " ").center(120, "="))
def extract_repo_from_git_url(git_url: str) -> dict:
"""Determine org/repo for a given git url"""
git_org = git_url.split("/")[-2].replace(".git", "")
git_repo = git_url.split("/")[-1].replace(".git", "")
return {
"org": git_org,
"repo": git_repo,
}
def sbom_components_to_dict(sbom: dict, with_version: bool = False) -> dict:
"""Create a dict of SBOM components with a version-less PURL as the key"""
components = sbom["components"]
if with_version:
components_dict = {
urllib.parse.unquote(component["bom-ref"]): component for component in components
}
else:
components_dict = {
urllib.parse.unquote(component["bom-ref"]).split("@")[0]: component
for component in components
}
return components_dict
def read_sbom_json_file(file_path: str) -> dict:
"""Load a JSON SBOM file (schema is not validated)"""
try:
with open(file_path, "r", encoding="utf-8") as input_json:
sbom_json = input_json.read()
result = json.loads(sbom_json)
except Exception as e:
logger.error(f"Error loading SBOM file from {file_path}")
logger.error(e)
else:
logger.info(f"SBOM loaded from {file_path} with {len(result['components'])} components")
return result
def write_sbom_json_file(sbom_dict: dict, file_path: str) -> dict:
"""Save a JSON SBOM file (schema is not validated)"""
try:
file_path = os.path.abspath(file_path)
with open(file_path, "w", encoding="utf-8") as output_json:
json.dump(sbom_dict, output_json, indent=2)
output_json.write("\n")
except Exception as e:
logger.error(f"Error writing SBOM file to {file_path}")
logger.error(e)
else:
logger.info(f"SBOM file saved to {file_path}")
def set_component_version(
component: dict, version: str, purl_version: str = None, cpe_version: str = None
) -> None:
"""Update the appropriate version fields in a component from the metadata SBOM"""
if not purl_version:
purl_version = version
if not cpe_version:
cpe_version = version
component["bom-ref"] = component["bom-ref"].replace("{{VERSION}}", purl_version)
component["version"] = component["version"].replace("{{VERSION}}", version)
if component.get("purl"):
component["purl"] = component["purl"].replace("{{VERSION}}", purl_version)
if not is_valid_purl(component["purl"]):
logger.warning(f"PURL: Invalid PURL ({component['purl']})")
if component.get("cpe"):
component["cpe"] = component["cpe"].replace("{{VERSION}}", cpe_version)
def set_dependency_version(dependencies: list, meta_bom_ref: str, purl_version: str) -> None:
"""Update the appropriate dependency version fields in the metadata SBOM"""
r = 0
d = 0
for dependency in dependencies:
if "{{VERSION}}" in dependency["ref"] and dependency["ref"] == meta_bom_ref:
dependency["ref"] = dependency["ref"].replace("{{VERSION}}", purl_version)
r += 1
for i in range(len(dependency["dependsOn"])):
if dependency["dependsOn"][i] == meta_bom_ref:
dependency["dependsOn"][i] = dependency["dependsOn"][i].replace(
"{{VERSION}}", purl_version
)
d += 1
logger.debug(f"set_dependency_version: '{meta_bom_ref}' updated {r} refs and {d} dependsOn")
def get_subfolders_dict(folder_path: str = ".") -> dict:
"""Get list of all directories in the specified path"""
subfolders = []
try:
# Get all entries (files and directories) in the specified path
entries = os.listdir(folder_path)
# Filter for directories
for entry in entries:
full_path = os.path.join(folder_path, entry)
if os.path.isdir(full_path):
subfolders.append(entry)
except FileNotFoundError:
logger.error(f"Error: Directory '{folder_path}' not found.")
except Exception as e:
logger.error(f"An error occurred: {e}")
subfolders.sort()
return {key: 0 for key in subfolders}
def get_component_import_script_path(component: dict) -> str:
"""Extract the path to a third-party library import script as defined in component 'properties' as 'import_script_path'"""
import_script_path = [
p.get("value")
for p in component.get("properties", [])
if p.get("name") == "import_script_path"
]
if len(import_script_path):
# There should only be 1 result, if any
return import_script_path[0]
else:
return None
def get_version_from_import_script(file_path: str) -> str:
"""A rudimentary parse of a shell script file to extract the static value defined for the VERSION variable"""
try:
with open(file_path, "r", encoding="utf-8") as file:
for line in file:
if line.strip().startswith("VERSION="):
return re.sub(
r"^VERSION=(?P<quote>[\"']?)(?P<content>\S+)(?P=quote).*$",
r"\g<content>",
line.strip(),
)
except Exception as e:
logger.warning(f"Unable to load {file_path}")
logger.warning(e)
else:
return None
# endregion functions and classes
def main() -> None:
# region define args
parser = argparse.ArgumentParser(
description="""Generate a CycloneDX v1.5 JSON SBOM file using a combination of scan results from Endor Labs, pre-defined SBOM metadata, and the existing SBOM.
Requires endorctl to be installed and configured, which can be done using 'buildscripts/sbom/install_endorctl.sh'.
For use in CI, script may be run with no arguments.""",
epilog="Note: The git-related default values are dynamically generated.",
formatter_class=argparse.MetavarTypeHelpFormatter,
)
endor = parser.add_argument_group("Endor Labs API (via 'endorctl')")
endor.add_argument(
"--endorctl-path",
help="Path to endorctl, the Endor Labs CLI (Default: 'endorctl')",
default="endorctl",
type=str,
)
endor.add_argument(
"--config-path",
help="Path to endor config directory containing config.yaml (Default: '$HOME/.endorctl')",
default=None,
type=str,
)
endor.add_argument(
"--namespace", help="Endor Labs namespace (Default: mongodb.{git org})", type=str
)
endor.add_argument(
"--target",
help="Target for generated SBOM. Commit: results from running/completed PR scan, Branch: results from latest monitoring scan, Project: results from latest monitoring scan of the 'default' branch (default: commit)",
choices=["commit", "branch", "project"],
default="commit",
type=str,
)
endor.add_argument(
"--project",
help="Full GitHub git URL [e.g., https://github.com/10gen/mongo.git] (Default: current git URL)",
type=str,
)
target = parser.add_argument_group("Target values. Apply only if --target is not 'project'")
exclusive_target = target.add_mutually_exclusive_group()
exclusive_target.add_argument(
"--commit",
help="PR commit SHA [40-character hex string] (Default: current git commit)",
type=str,
)
exclusive_target.add_argument(
"--branch",
help="Git repo branch monitored by Endor Labs [e.g., v8.0] (Default: current git org/repo)",
type=str,
)
files = parser.add_argument_group("SBOM files")
files.add_argument(
"--sbom-metadata",
help="Input path for template SBOM file with metadata (Default: './buildscripts/sbom/metadata.cdx.json')",
default="./buildscripts/sbom/metadata.cdx.json",
type=str,
)
files.add_argument(
"--sbom-in",
help="Input path for previous SBOM file (Default: './sbom.json')",
default="./sbom.json",
type=str,
)
files.add_argument(
"--sbom-out",
help="Output path for SBOM file (Default: './sbom.json')",
default="./sbom.json",
type=str,
)
parser.add_argument(
"--retry-limit",
help="Maximum number of times to retry when a target PR scan has not started (Default: 5)",
default=5,
type=int,
)
parser.add_argument(
"--sleep-duration",
help="Number of seconds to wait between retries (Default: 30)",
default=30,
type=int,
)
parser.add_argument("--debug", help="Set logging level to DEBUG", action="store_true")
# endregion define args
# region parse args
args = parser.parse_args()
git_info = GitInfo()
# endor
endorctl_path = args.endorctl_path
config_path = args.config_path
namespace = args.namespace if args.namespace else f"mongodb.{git_info.org}"
target = args.target
# project
if args.project and args.project != git_info.project:
if not re.fullmatch(REGEX_GITHUB_URL, args.project):
parser.error(f"Invalid Git URL: {args.project}.")
git_info.project = args.project
git_info.org, git_info.repo = map(
extract_repo_from_git_url(git_info.project).get, ("org", "repo")
)
git_info.release_tag = None
# targets
# commit
if args.commit and args.commit != git_info.commit:
if not re.fullmatch(REGEX_COMMIT_SHA, args.commit):
parser.error(
f"Invalid Git commit SHA: {args.commit}. Must be a 40-character hexadecimal string (SHA-1)."
)
git_info.commit = args.commit
# branch
if args.branch and args.branch != git_info.branch:
if len(args.branch.encode("utf-8")) > 244 or not re.fullmatch(
REGEX_GIT_BRANCH, args.branch
):
parser.error(
f"Invalid Git branch name: {args.branch}. Limit is 244 bytes with allowed characters: [a-zA-Z0-9_.-/]"
)
git_info.branch = args.branch
# files
sbom_out_path = args.sbom_out
sbom_in_path = args.sbom_in
sbom_metadata_path = args.sbom_metadata
# environment
retry_limit = args.retry_limit
sleep_duration = args.sleep_duration
if args.debug:
logger.setLevel(logging.DEBUG)
# endregion parse args
# region export Endor Labs SBOM
print_banner(f"Exporting Endor Labs SBOM for {target} {getattr(git_info, target)}")
endorctl = EndorCtl(namespace, retry_limit, sleep_duration, endorctl_path, config_path)
if target == "commit":
endor_bom = endorctl.get_sbom_for_commit(git_info.project, git_info.commit)
elif target == "branch":
endor_bom = endorctl.get_sbom_for_branch(git_info.project, git_info.branch)
elif target == "project":
endor_bom = endorctl.get_sbom_for_project(git_info.project)
if not endor_bom:
logger.error("Empty result for Endor SBOM!")
if target == "commit":
logger.error("Check Endor Labs for any unanticipated issues with the target PR scan.")
else:
logger.error("Check Endor Labs for status of the target monitoring scan.")
sys.exit(1)
# endregion export Endor Labs SBOM
# region Pre-process Endor Labs SBOM
print_banner("Pre-Processing Endor Labs SBOM")
## remove uneeded components ##
# [list]endor_components_remove is defined in config.py
# Reverse iterate the SBOM components list to safely modify in situ
for i in range(len(endor_bom["components"]) - 1, -1, -1):
component = endor_bom["components"][i]
removed = False
for remove in endor_components_remove:
if component["bom-ref"].startswith(remove):
logger.info("ENDOR SBOM PRE-PROCESS: removing " + component["bom-ref"])
del endor_bom["components"][i]
removed = True
break
if not removed:
for rename in endor_components_rename:
old = rename[0]
new = rename[1]
component["bom-ref"] = component["bom-ref"].replace(old, new)
component["purl"] = component["purl"].replace(old, new)
logger.info(f"Endor Labs SBOM pre-processed with {len(endor_bom['components'])} components")
# endregion Pre-process Endor Labs SBOM
# region load metadata and previous SBOMs
print_banner("Loading metadata SBOM and previous SBOM")
meta_bom = read_sbom_json_file(sbom_metadata_path)
if not meta_bom:
logger.error("No SBOM metadata. This is fatal.")
sys.exit(1)
prev_bom = read_sbom_json_file(sbom_in_path)
if not prev_bom:
logger.warning(
"Unable to load previous SBOM data. The new SBOM will be generated without any previous context. This is unexpected, but not fatal."
)
# Create empty prev_bom to avoid downstream processing errors
prev_bom = {
"bom-ref": None,
"metadata": {
"timestamp": endor_bom["metadata"]["timestamp"],
"component": {
"version": None,
},
},
"components": [],
}
# endregion load metadata and previous SBOMs
# region Build composite SBOM
# Note: No exception handling here. The most likely reason for an exception is missing data elements
# in SBOM files, which is fatal if it happens. Code is in place to handle the situation
# where there is no previous SBOM to include, but we want to fail if required data is absent.
print_banner("Building composite SBOM (metadata + endor + previous)")
# Sort components by bom-ref
endor_bom["components"].sort(key=lambda c: c["bom-ref"])
meta_bom["components"].sort(key=lambda c: c["bom-ref"])
prev_bom["components"].sort(key=lambda c: c["bom-ref"])
# Create SBOM component lookup dicts
endor_components = sbom_components_to_dict(endor_bom)
prev_components = sbom_components_to_dict(prev_bom)
# region MongoDB primary component
# Attempt to determine the MongoDB Version being scanned
logger.debug(
f"Available MongoDB version options, tag: {git_info.release_tag}, branch: {git_info.branch}, previous SBOM: {prev_bom['metadata']['component']['version']}"
)
meta_bom_ref = meta_bom["metadata"]["component"]["bom-ref"]
# Project scan always set to 'master' or if using 'master' branch
if target == "project" or git_info.branch == "master":
version = "master"
purl_version = "master"
cpe_version = "master"
logger.info("Using branch 'master' as MongoDB version")
# tagged release. e.g., r8.1.0, r8.2.1-rc0
elif git_info.release_tag:
version = git_info.release_tag[1:] # remove leading 'r'
purl_version = git_info.release_tag
cpe_version = version # without leading 'r'
logger.info(f"Using release_tag '{git_info.release_tag}' as MongoDB version")
# Release branch e.g., v7.0 or v8.2
elif target == "branch" and re.fullmatch(REGEX_RELEASE_BRANCH, git_info.branch):
version = git_info.branch
purl_version = git_info.branch
# remove leading 'v', add wildcard. e.g. 8.2.*
cpe_version = version[1:] + ".*"
logger.info(f"Using release branch '{git_info.branch}' as MongoDB version")
# Previous SBOM app version, if all needed specifiers exist
elif (
prev_bom.get("metadata", {}).get("component", {}).get("version")
and prev_bom.get("metadata", {}).get("component", {}).get("purl")
and prev_bom.get("metadata", {}).get("component", {}).get("cpe")
):
version = prev_bom["metadata"]["component"]["version"]
purl_version = prev_bom["metadata"]["component"]["purl"].split("@")[-1]
cpe_version = prev_bom["metadata"]["component"]["cpe"].split(":")[5]
logger.info(f"Using previous SBOM version '{version}' as MongoDB version")
else:
# Fall back to the version specified in the Endor SBOM
# This is unlikely to be accurate
version = endor_bom["metadata"]["component"]["version"]
purl_version = version
cpe_version = version
logger.warning(
f"Using SBOM version '{version}' from Endor Labs scan. This is unlikely to be accurate and may specify a PR #."
)
# Set main component version
set_component_version(meta_bom["metadata"]["component"], version, purl_version, cpe_version)
# Run through 'dependency' objects to set main component version
set_dependency_version(meta_bom["dependencies"], meta_bom_ref, purl_version)
# endregion MongoDB primary component
# region SBOM components
# region Parse metadata SBOM components
third_party_folders = get_subfolders_dict(git_info.repo_root.as_posix() + "/src/third_party")
# pre-exclude 'scripts' folder
del third_party_folders["scripts"]
for component in meta_bom["components"]:
versions = {
"endor": None,
"import_script": None,
"metadata": None,
}
component_key = component["bom-ref"].split("@")[0]
print_banner("Component: " + component_key)
################ Endor Labs ################
if component_key in endor_components:
# Pop component from dict so we are left with only unmatched components
endor_component = endor_components.pop(component_key)
versions["endor"] = endor_component.get("version")
logger.debug(
f"VERSION ENDOR: {component_key}: Found version '{versions['endor']}' in Endor Labs results"
)
############## Import Script ###############
# Import script version, if exists
import_script_path = get_component_import_script_path(component)
if import_script_path:
import_script = Path(import_script_path)
if import_script.exists():
versions["import_script"] = get_version_from_import_script(import_script_path)
if versions["import_script"]:
versions["import_script"] = versions["import_script"].replace("release-", "")
if versions["import_script"]:
logger.debug(
f"VERSION IMPORT SCRIPT: {component_key}: Found version '{versions['import_script']}' in import script '{import_script_path}'"
)
else:
logger.debug(
f"VERSION IMPORT SCRIPT: {component_key}: Import script not found! '{import_script_path}'"
)
############## Metadata ###############
# Hard-coded metadata version, if exists
if "{{VERSION}}" not in component["version"]:
versions["metadata"] = component.get("version")
logger.info(f"VERSIONS: {component_key}: " + str(versions))
############## Component Special Cases ###############
process_component_special_cases(
component_key, component, versions, git_info.repo_root.as_posix()
)
# For the standard workflow, we favor the Endor Labs version, followed by import script, followed by hard coded
if (
versions["endor"]
and versions["import_script"]
and get_semver_from_release_version(versions["endor"])
!= get_semver_from_release_version(versions["import_script"])
):
logger.debug(
",".join(
[
"endor:",
versions["endor"],
"semver(endor):",
get_semver_from_release_version(versions["endor"]),
"import_script:",
versions["import_script"],
"semver(import_script):",
get_semver_from_release_version(versions["import_script"]),
]
)
)
logger.warning(
f"VERSION MISMATCH: {component_key}: Endor version {versions['endor']} does not match import script version {versions['import_script']}"
)
version = versions["endor"] or versions["import_script"] or versions["metadata"]
############## Assign Version ###############
if version:
meta_bom_ref = component["bom-ref"]
## Special case for FireFox ##
# The CPE for FireFox ESR needs the 'esr' removed from the version, as it is specified in another section
if component["bom-ref"].startswith("pkg:deb/debian/firefox-esr@"):
set_component_version(component, version, cpe_version=version.replace("esr", ""))
else:
semver = get_semver_from_release_version(version)
set_component_version(component, semver, version, semver)
set_dependency_version(meta_bom["dependencies"], meta_bom_ref, version)
# check against third_party folders
component_defines_location = False
for occurrence in component.get("evidence", {}).get("occurrences", []):
location = occurrence.get("location")
if location:
component_defines_location = True
if location.startswith("src/third_party/"):
location = location.replace("src/third_party/", "")
if location in third_party_folders:
third_party_folders[location] += 1
logger.debug(
f"THIRD_PARTY FOLDER: {component_key} matched folder {location} specified in SBOM"
)
else:
logger.warning(
f"THIRD_PARTY FOLDER: {component_key} lists third-party location folder as {location}, which does not exist!"
)
else:
logger.warning(
f"THIRD_PARTY FOLDER: {component_key} lists a location as '{location}'. Ideally, all third-party components are located under 'src/third_party/'."
)
if not component_defines_location:
logger.warning(
f"THIRD_PARTY FOLDER: {component_key} does not define a location in '.evidence.occurrences[]'"
)
else:
logger.warning(
f"VERSION NOT FOUND: Could not find a version for {component_key}! Removing from SBOM. Component may need to be removed from the {sbom_metadata_path} file."
)
del component
print_banner("Third Party Folders")
third_party_folders_missed = {
key: value for key, value in third_party_folders.items() if value == 0
}
if third_party_folders_missed:
logger.warning(
"THIRD_PARTY FOLDERS: 'src/third_party' folders not matched with a component: "
+ ",".join(third_party_folders_missed.keys())
)
else:
logger.info(
"THIRD_PARTY FOLDERS: All 'src/third_party' folders successfully matched with one or more components."
)
# explicit cleanup to avoid gc race condition on script temination
git_info.close()
del git_info
# endregion Parse metadata SBOM components
# region Parse unmatched Endor Labs components
print_banner("New Endor Labs components")
if endor_components:
logger.info(
f"ENDOR SBOM: There are {len(endor_components)} unmatched components in the Endor Labs SBOM. Adding as-is. The applicable metadata should be added to the metadata SBOM for the next run."
)
for component in endor_components:
# set scope to excluded by default until the component is evaluated
endor_components[component]["scope"] = "excluded"
meta_bom["components"].append(endor_components[component])
meta_bom["dependencies"].append(
{"ref": endor_components[component]["bom-ref"], "dependsOn": []}
)
logger.info(f"SBOM AS-IS COMPONENT: Added {component}")
# endregion Parse unmatched Endor Labs components
# region Finalize SBOM
# Have the SBOM app version changed?
sbom_app_version_changed = (
prev_bom["metadata"]["component"]["version"] != meta_bom["metadata"]["component"]["version"]
)
logger.info(f"SUMMARY: MongoDB version changed: {sbom_app_version_changed}")
# Have the components changed?
prev_components = sbom_components_to_dict(prev_bom, with_version=True)
meta_components = sbom_components_to_dict(meta_bom, with_version=True)
sbom_components_changed = prev_components.keys() != meta_components.keys()
logger.info(
f"SBOM_DIFF: SBOM components changed (added, removed, or version): {sbom_components_changed}. Previous SBOM has {len(prev_components)} components; New SBOM has {len(meta_components)} components"
)
# Components in prev SBOM but not in generated SBOM
prev_components = sbom_components_to_dict(prev_bom, with_version=False)
meta_components = sbom_components_to_dict(meta_bom, with_version=False)
prev_components_diff = list(set(prev_components.keys()) - set(meta_components.keys()))
if prev_components_diff:
logger.info(
"SBOM_DIFF: Components in previous SBOM and not in generated SBOM: "
+ ",".join(prev_components_diff)
)
# Components in generated SBOM but not in prev SBOM
meta_components_diff = list(set(meta_components.keys()) - set(prev_components.keys()))
if meta_components_diff:
logger.info(
"SBOM_DIFF: Components in generated SBOM and not in previous SBOM: "
+ ",".join(meta_components_diff)
)
# serialNumber https://cyclonedx.org/docs/1.5/json/#serialNumber
# version (SBOM version) https://cyclonedx.org/docs/1.5/json/#version
if sbom_app_version_changed:
# New MongoDB version requires a unique serial number and version 1
meta_bom["serialNumber"] = uuid.uuid4().urn
meta_bom["version"] = 1
else:
# MongoDB version is the same, so reuse the serial number and SBOM version
meta_bom["serialNumber"] = prev_bom["serialNumber"]
meta_bom["version"] = prev_bom["version"]
# If the components have changed, bump the SBOM version
if sbom_components_changed:
meta_bom["version"] += 1
# metadata.timestamp https://cyclonedx.org/docs/1.5/json/#metadata_timestamp
# Only update the timestamp if something has changed
if sbom_app_version_changed or sbom_components_changed:
meta_bom["metadata"]["timestamp"] = (
datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
)
else:
meta_bom["metadata"]["timestamp"] = prev_bom["metadata"]["timestamp"]
# metadata.tools https://cyclonedx.org/docs/1.5/json/#metadata_tools
meta_bom["metadata"]["tools"] = endor_bom["metadata"]["tools"]
write_sbom_json_file(meta_bom, sbom_out_path)
# Access the collected warnings
print_banner("CONSOLIDATED WARNINGS")
for record in warning_handler.warnings:
print(record.getMessage())
print_banner("COMPLETED")
if not os.getenv("CI"):
print("Be sure to add the SBOM to your next commit if the file content has changed.")
# endregion Finalize SBOM
# endregion Build composite SBOM
if __name__ == "__main__":
main()

View File

@ -0,0 +1,199 @@
#!/bin/bash
set -o errexit
echo "+----------------------------------------------------------------------------+"
echo "| Script to install the Endor Labs CLI and verify authentication |"
echo "| endorctl (https://docs.endorlabs.com/endorctl/) |"
echo "| Environment Variables (optional): |"
echo "| ENDOR_INSTALL_PATH - only if in CI or not installed with homebrew or npm |"
echo "| ENDOR_CONFIG_PATH - endor config directory (default: ~/.endorctl) |"
echo "+----------------------------------------------------------------------------+"
echo
function endorctl_check_install() {
# Check if installed
ENDOR_INSTALLED_PATH=$(command -v endorctl)
if [[ -n "$ENDOR_INSTALLED_PATH" ]]; then
# Is Installed
echo "Binary 'endorctl' is installed in '${ENDOR_INSTALLED_PATH}'."
chmod +x $ENDOR_INSTALLED_PATH
if [[ -x "$ENDOR_INSTALLED_PATH" ]]; then
echo "Binary 'endorctl' is executable."
return 0 # True (success)
else
echo "Binary 'endorctl' is NOT executable after attempting to make it executable."
return 1 # False (failure)
fi
else
echo "Binary 'endorctl' is NOT installed or not in PATH."
return 1 # False (failure)
fi
}
function endorctl_install() {
# Skip trying homebrew and npm if runing in CI
if [[ "$CI" == "true" ]]; then
echo "---------------------------------"
echo "Detected that script is running in CI. Skipping Homebrew and NPM."
else
# Try brew
echo "---------------------------------"
echo "Checking if Homebrew is available"
if command -v brew --version &>/dev/null; then
echo "Attempting to install with Homebrew"
brew tap endorlabs/tap
brew install endorctl
if [ $? -ne 0 ]; then
echo "Warning: Homebrew installation failed."
else
echo "Installed with Homebrew"
return 0 # True (success)
fi
else
echo "Homebrew is not available"
fi
# Try NPM
echo "---------------------------------"
echo "Checking if npm is available"
if command -v npm --version &>/dev/null; then
# Install binary for linux or macos
echo "Attempting to install with npm"
npm install --global endorctl
if [ $? -ne 0 ]; then
echo "Warning: npm installation failed."
else
echo "Installed with npm"
return 0 # True (success)
fi
else
echo "npm is not available"
fi
fi
# Try binary installation
echo "---------------------------------"
echo "Attempting binary install"
if [[ -z "$ENDOR_INSTALL_PATH" ]]; then
ENDOR_INSTALL_PATH="${HOME}/.local/bin"
fi
echo "Installation path set to $ENDOR_INSTALL_PATH"
mkdir -p "$ENDOR_INSTALL_PATH"
export PATH="${ENDOR_INSTALL_PATH}:$PATH"
ENDOR_BIN_PATH="${ENDOR_INSTALL_PATH}/endorctl"
case $(uname -m) in
"x86_64" | "amd64")
ARCH="amd64"
;;
"aarch64" | "arm64")
ARCH="arm64"
;;
*)
echo "Error: Unexpected architecture: $(uname -m). Expected x86_64, amd64, or arm64."
return 1 # False (failure)
;;
esac
case "$OSTYPE" in
linux*)
PLATFORM="linux"
;;
darwin*)
PLATFORM="macos"
;;
msys* | cygwin* | "Windows_NT")
echo "Error: Automated installation on Windows without npm is not implemented in this script."
echo "For manual Windows installation, follow instructions at:"
echo " https://docs.endorlabs.com/endorctl/install-and-configure/#download-and-install-the-endorctl-binary-directly"
echo ""
echo_auth_instructions
return 1 # False (failure)
;;
*)
echo "Error: Unexpected OS type: $OSTYPE"
return 1 # False (failure)
;;
esac
## Download the latest CLI for supported platform and architecture
URL="https://api.endorlabs.com/download/latest/endorctl_${PLATFORM}_${ARCH}"
echo "Downloading latest CLI for $PLATFORM $ARCH to $BIN_PATH from $URL"
curl --silent $URL --output "$ENDOR_BIN_PATH"
## Verify the checksum of the binary
echo "Verifying checksum of binary"
case "$PLATFORM" in
linux)
echo "$(curl -s https://api.endorlabs.com/sha/latest/endorctl_${PLATFORM}_${ARCH})" $ENDOR_BIN_PATH | sha256sum -c
;;
macos)
echo "$(curl -s https://api.endorlabs.com/sha/latest/endorctl_${PLATFORM}_${ARCH})" $ENDOR_BIN_PATH | shasum -a 256 -c
;;
esac
## Modify the permissions of the binary to ensure it is executable
echo " Modifying binary permissions to executable"
chmod +x $ENDOR_BIN_PATH
## Create an alias endorctl of the binary to ensure it is available in other directory
alias endorctl=$ENDOR_BIN_PATH
echo "endorctl installed in $ENDOR_BIN_PATH"
return 0 # True (success)
}
function endorctl_check_auth() {
# Check authentication
echo "Checking authentication with command: endorctl api get --resource Project --namespace mongodb.10gen --name https://github.com/10gen/mongo.git --config-path $ENDOR_CONFIG_PATH"
endorctl api get --resource Project --namespace mongodb.10gen --name "https://github.com/10gen/mongo.git" --config-path $ENDOR_CONFIG_PATH >/dev/null
if [ $? -eq 0 ]; then
echo "Authentication confirmed."
return 0 # True (success)
else
echo "Authentication failure. Command exit code: $?"
echo_auth_instructions
return 1 # False (failure)
fi
}
function echo_auth_instructions() {
echo ""
echo "------------------------------------------------ AUTOMATED AUTH ------------------------------------------------"
echo "Set the following environment variables:"
echo " export ENDOR_API_CREDENTIALS_KEY=<api-key>"
echo " export ENDOR_API_CREDENTIALS_SECRET=<api-key-secret>"
echo " export ENDOR_NAMESPACE=mongodb.{github_org}"
echo ""
echo "--------------------------------------------------- USER AUTH ---------------------------------------------------"
echo "To authenticate endorctl, visit the following URL, authenticate via Okta SSO, and copy the authentication token."
echo " https://api.endorlabs.com/v1/auth/sso?tenant=mongodb.10gen&redirect=headless"
echo "Then run:"
echo " endorctl auth --token [AUTH_TOKEN]"
echo ""
echo "Alternatively, run the init command. Must use headless mode when no GUI is available:"
echo " endorctl init --auth-mode=sso --auth-tenant=mongodb.10gen --headless-mode"
echo ""
echo "Enter 'y' if prompted to overwrite existing configuration and/or delete account keys."
echo ""
echo "If authentication fails, confirm in MANA that you are a member of a '10gen-endor-labs-*' Okta group."
echo ""
}
# Set/Create config folder
if [[ -z "$ENDOR_CONFIG_PATH" ]]; then
ENDOR_CONFIG_PATH="${HOME}/.endorctl"
fi
echo "Config path set to ${ENDOR_CONFIG_PATH}"
if ! endorctl_check_install; then
if ! endorctl_install; then
exit 1
fi
fi
if ! endorctl_check_auth; then
exit 1
fi
exit 0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,123 @@
#!/usr/bin/env python3
"""
Script that opens a PR using a bot to update SBOM-related files.
"""
import argparse
import os
import time
from github.GithubException import GithubException
from github.GithubIntegration import GithubIntegration
SBOM_FILES = ["sbom.json", "README.third_party.md"]
def get_repository(github_owner, github_repo, app_id, private_key):
"""
Gets the mongo github repository
"""
app = GithubIntegration(int(app_id), private_key)
installation = app.get_repo_installation(github_owner, github_repo)
g = installation.get_github_for_installation()
return g.get_repo(f"{github_owner}/{github_repo}")
def create_branch(base_branch, new_branch) -> None:
"""
Creates a new branch
"""
try:
print(f"Attempting to create branch '{new_branch}' with base branch '{base_branch}'.")
base_repo_branch = repo.get_branch(base_branch)
ref = f"refs/heads/{new_branch}"
repo.create_git_ref(ref=ref, sha=base_repo_branch.commit.sha)
print("Created branch.")
except GithubException as e:
if e.status == 422:
print("Branch already exists. Continuing...")
else:
raise
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="This script checks for changes to SBOM and related files and creats a PR if files have been updated.",
)
parser.add_argument("--github-owner", help="GitHub org/owner (e.g., 10gen).", type=str)
parser.add_argument("--github-repo", help="GitHub repository name (e.g., mongo).", type=str)
parser.add_argument("--base-branch", help="base branch to merge into.", type=str)
parser.add_argument("--new-branch", help="New branch for the PR.", type=str)
parser.add_argument("--pr-title", help="Title for the PR.", type=str)
parser.add_argument(
"--app-id",
help="GitHub App ID used for authentication.",
type=str,
default=os.getenv("MONGO_PR_BOT_APP_ID"),
)
parser.add_argument(
"--private-key",
help="Key to use for GitHub App authentication.",
type=str,
default=os.getenv("MONGO_PR_BOT_PRIVATE_KEY"),
)
args = parser.parse_args()
if not args.app_id or not args.private_key:
parser.error(
"Must define --app-id or env MONGO_PR_BOT_APP_ID and --private-key or env MONGO_PR_BOT_PRIVATE_KEY."
)
# Replace spaces with newline, if applicable
private_key = (
args.private_key[:31] + args.private_key[31:-29].replace(" ", "\n") + args.private_key[-29:]
)
repo = get_repository(args.github_owner, args.github_repo, args.app_id, private_key)
pr_needed = False
for file_path in SBOM_FILES:
original_file = repo.get_contents(file_path, ref=f"refs/heads/{args.base_branch}")
original_content = original_file.decoded_content.decode()
try:
with open(file_path, "r", encoding="utf-8") as file:
new_content = file.read()
except FileNotFoundError:
print("Error: file '%s' not found.", file_path)
# compare strings without whitespace
if "".join(new_content.split()) != "".join(original_content.split()):
create_branch(args.base_branch, args.new_branch)
original_file_new_branch = repo.get_contents(
file_path, ref=f"refs/heads/{args.new_branch}"
)
print("New file is different from original file.")
print("repo.update_file:")
print(f" message: Updating '{file_path}'")
print(f" path: '{file_path}'")
print(f" sha: {original_file_new_branch.sha}")
print(f" content: '{new_content:.256}'")
print(f" branch: {args.new_branch}")
time.sleep(10) # Wait to reduce chance of 409 errors
update_file_result = repo.update_file(
message=f"Updating '{file_path}'",
path=file_path,
sha=original_file_new_branch.sha,
content=new_content,
branch=args.new_branch,
)
print("Results:")
print(" commit: ", update_file_result["commit"])
pr_needed = True
if pr_needed:
repo.create_pull(
base=args.base_branch,
head=args.new_branch,
title=args.pr_title,
body="Automated PR updating SBOM and related files.",
)
else:
print(f"Files '{SBOM_FILES}' have not changed. Skipping PR.")

View File

@ -35,7 +35,7 @@ MISSING_VERSION_IN_SBOM_COMPONENT_ERROR = "Component must include a version."
MISSING_VERSION_IN_IMPORT_FILE_ERROR = "Missing version in the import file: " MISSING_VERSION_IN_IMPORT_FILE_ERROR = "Missing version in the import file: "
MISSING_LICENSE_IN_SBOM_COMPONENT_ERROR = "Component must include a license." MISSING_LICENSE_IN_SBOM_COMPONENT_ERROR = "Component must include a license."
COULD_NOT_FIND_OR_READ_SCRIPT_FILE_ERROR = "Could not find or read the import script file" COULD_NOT_FIND_OR_READ_SCRIPT_FILE_ERROR = "Could not find or read the import script file"
VERSION_MISMATCH_ERROR = "Version mismatch: " VERSION_MISMATCH_ERROR = "Version mismatch (may simply be an artifact of SBOM automation): "
# A class for managing error messages for components # A class for managing error messages for components
@ -123,14 +123,15 @@ def validate_license(component: dict, error_manager: ErrorManager) -> None:
return return
valid_license = False valid_license = False
for license in component["licenses"]: expression = None
if "expression" in license: for component_license in component["licenses"]:
expression = license.get("expression") if "expression" in component_license:
elif "license" in license: expression = component_license.get("expression")
if "id" in license["license"]: elif "license" in component_license:
if "id" in component_license["license"]:
# Should be a valid SPDX license ID # Should be a valid SPDX license ID
expression = license["license"].get("id") expression = component_license["license"].get("id")
elif "name" in license["license"]: elif "name" in component_license["license"]:
# If SPDX does not define the license used, the name field may be used to provide the license name # If SPDX does not define the license used, the name field may be used to provide the license name
valid_license = True valid_license = True
@ -196,9 +197,8 @@ def validate_properties(component: dict, error_manager: ErrorManager) -> None:
elif strip_extra_prefixes(script_version) != strip_extra_prefixes( elif strip_extra_prefixes(script_version) != strip_extra_prefixes(
comp_version comp_version
) and strip_extra_prefixes(script_version) != strip_extra_prefixes(comp_pedigree_version): ) and strip_extra_prefixes(script_version) != strip_extra_prefixes(comp_pedigree_version):
error_manager.append_full_error_message( print(
VERSION_MISMATCH_ERROR f"WARNING: {VERSION_MISMATCH_ERROR}\n script version:{script_version}\n sbom component version:{comp_version}\n sbom component pedigree version:{comp_pedigree_version}"
+ f"\nscript version:{script_version}\nsbom component version:{comp_version}\nsbom component pedigree version:{comp_pedigree_version}"
) )

View File

@ -0,0 +1,5 @@
version: 2.0.0
filters:
- "*":
approvers:
- 10gen/code-review-team-ssdlc

View File

@ -1,64 +0,0 @@
#!/bin/bash
# This script creates a copy of sources for librdkafka and librdkafka++.
# This script currently only works on Linux x86_64 and aarch64 platforms.
set -euo pipefail
IFS=$'\n\t'
if [ "$#" -ne 0 ]; then
echo "This script does not take any arguments"
exit 1
fi
# Create a temporary directory to clone and configure librdkafka
TEMP_DIR=$(mktemp -d /tmp/librdkafka.XXXXXX)
# Setup some directory variables
DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/librdkafka
DIST_DIR=$DEST_DIR/dist
PLATFORM_DIR=$DIST_DIR/platform
VERSION = "wrong version"
# Clean the output directories
rm -rf $DIST_DIR
rm -rf $PLATFORM_DIR
rm -rf $TEMP_DIR/*
pushd $TEMP_DIR
# Clone the v2.0.2 branch of librdkafka.
git clone --depth 1 --branch v2.0.2 https://github.com/confluentinc/librdkafka.git
pushd librdkafka
echo "Generating config.h"
# Run configure to generate config.h, and move it into a platform specific directory.
./configure --source-deps-only
platformName=linux_$(uname -m)
# Copy the config.h into a platform specific directory
mkdir -p $PLATFORM_DIR/$platformName/include
mv config.h $PLATFORM_DIR/$platformName/include
# Remove un-used files
rm -rf CHANGELOG.md CODE_OF_CONDUCT.md CONFIGURATION.md CONTRIBUTING.md INTRODUCTION.md \
README.md README.win32 STATISTICS.md config.log.old dev-conf.sh examples/ \
CMakeLists.txt lds-gen.py mklove/ packaging/ service.yml tests/ vcpkg.json win32/ \
Makefile Makefile.config config.cache configure.self configure debian mainpage.doxy Doxyfile \
src/CMakeLists.txt src/Makefile src/generate_proto.sh src/librdkafka_cgrp_synch.png src/statistics_schema.json \
src-cpp/CMakeLists.txt src-cpp/Makefile src-cpp/README.md config.log
pushd src
# Replace all instances of the string "LZ4" and "XXH" with "KLZ4" and "KXXH" in the C source code.
# This is to avoid symbol conflicts with the LZ4 and XXH source that is used by
# third_party/mozjs.
sed -i 's/LZ4/KLZ4/g' *
sed -i 's/XXH/KXXH/g' *
popd
mkdir -p $DIST_DIR
cp -r * $DIST_DIR
popd
popd

View File

@ -1,42 +0,0 @@
{
"properties": [
{
"name": "comment",
"value": "SBOM for MDB server product; this file should comply with the format specified here: https://cyclonedx.org/docs/1.5/json/#components_items_publisher; This file is still in development; see https://jira.mongodb.org/browse/DEVPROD-2623 for details."
}
],
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"version": 1,
"components": [
{
"type": "library",
"name": "kafka",
"version": "v4.25.0",
"scope": "required",
"licenses": [
{
"expression": "BSD-3-Clause"
}
],
"cpe": "test_cpe",
"properties": [
{
"name": "internal:team_responsible",
"value": "server_security"
},
{
"name": "import_script_path",
"value": "buildscripts/tests/sbom_linter/inputs/kafka_wrong_version_import.sh"
}
],
"evidence": {
"occurrences": [
{
"location": "src/third_party/librdkafka"
}
]
}
}
]
}

View File

@ -1,4 +1,4 @@
"""Unit tests for the selected_tests script.""" """Unit tests for the buildscripts/sbom_linter.py script."""
import os import os
import shutil import shutil
@ -97,12 +97,6 @@ class TestSbom(unittest.TestCase):
error_manager, sbom_linter.COULD_NOT_FIND_OR_READ_SCRIPT_FILE_ERROR error_manager, sbom_linter.COULD_NOT_FIND_OR_READ_SCRIPT_FILE_ERROR
) )
def test_version_mismatch(self):
test_file = os.path.join(self.input_dir, "sbom_version_mismatch.json")
third_party_libs = {"librdkafka"}
error_manager = sbom_linter.lint_sbom(test_file, test_file, third_party_libs, False)
self.assert_message_in_errors(error_manager, sbom_linter.VERSION_MISMATCH_ERROR)
def test_pedigree_version_match(self): def test_pedigree_version_match(self):
test_file = os.path.join(self.input_dir, "sbom_pedigree_version_match.json") test_file = os.path.join(self.input_dir, "sbom_pedigree_version_match.json")
third_party_libs = {"kafka"} third_party_libs = {"kafka"}

View File

@ -0,0 +1,218 @@
import json
import logging
import os
import sys
import unittest
sys.path.append(".")
from buildscripts.sbom.config import get_semver_from_release_version, is_valid_purl, regex_semver
from buildscripts.sbom.endorctl_utils import EndorCtl
logging.basicConfig(level=logging.INFO, stream=sys.stdout)
class TestEndorctl(unittest.TestCase):
def test_endorctl_init(self):
"""Tests the Endorctl constructor."""
e = EndorCtl(namespace="mongodb.10gen", retry_limit=1, sleep_duration=5)
self.assertEqual(e.namespace, "mongodb.10gen")
self.assertEqual(e.retry_limit, 1)
self.assertEqual(e.sleep_duration, 5)
def test_call_endorctl_missing(self):
"""Tests EndorCtl execution with endorctl not in path."""
logger = logging.getLogger("generate_sbom")
logger.setLevel(logging.INFO)
e = EndorCtl(namespace="mongodb.10gen", endorctl_path="this_path_does_not_exist")
result = e.get_sbom_for_project("https://github.com/10gen/mongo.git")
self.assertRaises(FileNotFoundError)
self.assertIsNone(result, None)
class TestConfigRegex(unittest.TestCase):
def test_semver_regex(self):
"""Tests the regex_semver."""
# List of valid semantic version strings
valid_semvers = [
"0.0.1",
"1.2.3",
"10.20.30",
"1.2.3-alpha",
"1.2.3-alpha.1",
"1.2.3-0.beta",
"1.2.3+build.123",
"1.2.3-rc.1+build.456",
"1.0.0-beta+exp.sha.5114f85",
]
# List of invalid semantic version strings
invalid_semvers = [
"1.2", # Incomplete
"1", # Incomplete
"v1.2.3", # Has a 'v' prefix (regex is for the version part only)
"1.2.3-", # Trailing hyphen in pre-release
"1.2.3+", # Trailing plus in build
"1.02.3", # Leading zero in minor component
"1.2.03", # Leading zero in patch component
"alpha", # Not a valid version
"1.2.3.4", # Four components (SemVer is 3)
"1.2.3-alpha_beta", # Underscore in pre-release
]
print("\nTesting regex_semver:")
for v in valid_semvers:
with self.subTest(v=v):
self.assertIsNotNone(
regex_semver.fullmatch(v), f"Expected '{v}' to be a valid semver"
)
for v in invalid_semvers:
with self.subTest(v=v):
self.assertIsNone(
regex_semver.fullmatch(v), f"Expected '{v}' to be an invalid semver"
)
def test_get_semver_from_release_version(self):
"""Tests the transformation function that uses VERSION_PATTERN_REPL."""
# (input, expected_output)
test_cases = [
# Pattern 1: 'debian/1.28.1-1'
("debian/1.28.1-1", "1.28.1"),
("debian/1.2.3-rc.1-2", "1.2.3-rc.1"),
# Pattern 2: 'gperftools-2.9.1', 'mongo/v1.5.2', etc.
("gperftools-2.9.1", "2.9.1"),
("mongo/v1.5.2", "1.5.2"),
("mongodb-8.2.0-alpha2", "8.2.0-alpha2"),
("release-1.12.0", "1.12.0"),
("yaml-cpp-0.6.3", "0.6.3"),
("mongo/1.2.3-beta+build", "1.2.3-beta+build"),
# Pattern 3: 'asio-1-34-2', 'cares-1_27_0'
("asio-1-34-2", "1.34.2"),
("cares-1_27_0", "1.27.0"),
# Pattern 4: 'pcre2-10.40'
("pcre2-10.40", "10.40"),
("something-1.2", "1.2"),
# Pattern 5: 'icu-release-57-1'
("icu-release-57-1", "57.1"),
("foo-bar-12-3", "12.3"),
# Pattern 6: 'v2.6.0'
("v2.6.0", "2.6.0"),
("v1.2.3-alpha.1", "1.2.3-alpha.1"),
# Pattern 7: 'r2.5.1'
("r2.5.1", "2.5.1"),
("r1.2.3-alpha.1", "1.2.3-alpha.1"),
# Pattern 7: 'v2025.04.21.00' (non-semver but specific pattern)
("v2025.04.21.00", "2025.04.21.00"),
# --- Cases that should not match ---
("1.2.3", "1.2.3"), # Already clean
("latest", "latest"), # No match
("not-a-version", "not-a-version"), # No match
("v1.2", "v1.2"), # Not matched by any pattern
]
print("\nTesting get_semver_from_release_version():")
for input_str, expected_str in test_cases:
with self.subTest(input=input_str):
result = get_semver_from_release_version(input_str)
self.assertEqual(
result,
expected_str,
f"Input: '{input_str}', Expected: '{expected_str}', Got: '{result}'",
)
def test_purls_valid(self):
"""Tests valid PURLs."""
valid_purls = [
"pkg:github/gperftools/gperftools@gperftools-2.9.1",
"pkg:github/mongodb/mongo-c-driver@1.23.4",
"pkg:github/google/benchmark", # No version
"pkg:github/c-ares/c-ares@cares-1_27_0",
"pkg:github/apache/avro@release-1.12.0",
"pkg:github/jbeder/yaml-cpp@yaml-cpp-0.6.3",
"pkg:github/pcre2project/pcre2@pcre2-10.40",
"pkg:github/unicode-org/icu@icu-release-57-1",
"pkg:github/confluentinc/librdkafka@v2.6.0",
"pkg:github/facebook/folly@v2025.04.21.00?foo=bar#src/main", # With qualifiers/subpath
"pkg:generic/valgrind/valgrind@3.23.0", # namespace/name@version
"pkg:generic/intel/IntelRDFPMathLib@2.0u2",
"pkg:generic/openldap/openldap", # namespace/name
"pkg:generic/openssl@3.0.13", # name@version
"pkg:generic/my-package", # name only
"pkg:generic/my-package@1.2.3?arch=x86_64#README.md", # With qualifiers/subpath
"pkg:deb/debian/firefox-esr@128.11.0esr-1?arch=source",
"pkg:pypi/ocspbuilder@0.10.2",
]
print("\nTesting Valid PURLs:")
for purl in valid_purls:
with self.subTest(purl=purl):
self.assertTrue(is_valid_purl(purl), f"Expected '{purl}' to be valid")
def test_purls_invalid(self):
"""Tests invalid PURLs."""
invalid_purls = [
"pkg:github/gperftools", # Missing name
"pkg:github/", # Missing namespace and name
"pkg:c/github.com/abseil/abseil-cpp", # Wrong type (from your config.py)
"pkg:github/mongodb/mongo-c-driver@1.2.3@4.5.6", # Double version
"pkg:generic/github/mongodb/mongo", # Wrong type
"pkg:generic/", # Missing name
"pkg:github/valgrind/", # Missing name
"pkg:generic/my-package@1.2@3.4", # Double version
"pkg:generic/spaces in name", # Spaces not allowed (must be encoded)
"pkg:deb/firefox-esr@128.11.0esr-1?arch=source", # Missing vendor
"pkg:pypi/ocsp/ocspbuilder@0.10.2", # no namespace for PyPI
]
print("\nTesting Invalid PURLs:")
for purl in invalid_purls:
with self.subTest(purl=purl):
self.assertFalse(is_valid_purl(purl), f"Expected '{purl}' to be invalid")
__unittest = True
class TestMetadataFile(unittest.TestCase):
TEST_DIR = os.path.join("buildscripts", "sbom")
VERSION_TAG = "{{VERSION}}"
def read_sbom_json_file(self, file_path: str) -> dict:
"""Load a JSON SBOM file (schema is not validated)"""
with open(file_path, "r", encoding="utf-8") as input_json:
sbom_json = input_json.read()
return json.loads(sbom_json)
def test_metadata_sbom_version_tags(self):
sbom_metadata_file = os.path.join(self.TEST_DIR, "metadata.cdx.json")
print(sbom_metadata_file)
meta_bom = self.read_sbom_json_file(sbom_metadata_file)
for component in meta_bom["components"]:
with self.subTest(component=component):
properties = []
properties.append(component["bom-ref"])
properties.append(component["version"])
if "purl" in component:
properties.append(component["purl"])
if "cpe" in component:
properties.append(component["cpe"])
# make sure component has a minimum of bom-ref, version and at least one of purl or cpe
self.assertGreater(
len(properties),
2,
f"Component must have a minimum of bom-ref, version and at least one of purl or cpe. {properties}",
)
# make sure all properites either have version tag or no version tags
self.assertTrue(
all(self.VERSION_TAG in p for p in properties)
or all(self.VERSION_TAG not in p for p in properties),
f"Component must have version tag '{self.VERSION_TAG}' in all or none of bom-ref, version and purl and/or cpe. {properties})",
)
if __name__ == "__main__":
unittest.main(verbosity=2)

View File

@ -1805,6 +1805,92 @@ tasks:
GITHUB_REPO: ${github_repo} GITHUB_REPO: ${github_repo}
GITHUB_TOKEN: ${github_token} GITHUB_TOKEN: ${github_token}
- name: update_sbom
tags: []
exec_timeout_secs: 3600 # 60 minute timeout
commands:
- command: manifest.load
- command: git.get_project
params:
directory: src
clone_depth: 1
- func: "restore git history and tags"
- func: "add git tag"
- func: "f_expansions_write"
- func: "kill processes"
- func: "cleanup environment"
- func: "set up venv"
- command: ec2.assume_role
display_name: Assume Silkbomb IAM role (access to Endor Labs API key)
params:
role_arn: arn:aws:iam::119629040606:role/silkbomb
- command: shell.exec
display_name: Write Endor Labs API credentials to config.yml
params:
silent: true
shell: bash
include_expansions_in_env:
[AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN]
env:
ENDOR_CONFIG_PATH: "${workdir}/.endorctl"
script: |
set -e
# use AWS CLI to get the Endor Labs API credentials from AWS Secrets Manager
ENDOR_API_CREDENTIALS_KEY=$(aws secretsmanager get-secret-value --secret-id silkbomb-environment --region us-east-1 --query SecretString --output text | jq -r '.ENDOR_API_CREDENTIALS_KEY')
ENDOR_API_CREDENTIALS_SECRET=$(aws secretsmanager get-secret-value --secret-id silkbomb-environment --region us-east-1 --query SecretString --output text | jq -r '.ENDOR_API_CREDENTIALS_SECRET')
# save credentials to config file
mkdir -p ${workdir}/.endorctl
cat << EOF > $ENDOR_CONFIG_PATH/config.yaml
ENDOR_API: https://api.endorlabs.com
ENDOR_API_CREDENTIALS_KEY: $ENDOR_API_CREDENTIALS_KEY
ENDOR_API_CREDENTIALS_SECRET: $ENDOR_API_CREDENTIALS_SECRET
ENDOR_NAMESPACE: ${ENDOR_NAMESPACE}
EOF
- command: subprocess.exec
display_name: Install endorctl
params:
binary: bash
env:
ENDOR_INSTALL_PATH: ${workdir}
ENDOR_CONFIG_PATH: "${workdir}/.endorctl"
args:
- "src/buildscripts/sbom/install_endorctl.sh"
- command: subprocess.exec
display_name: Generate SBOM file
params:
binary: bash
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/sbom/generate_sbom.py"
- "--project=https://github.com/10gen/mongo.git"
- "--target=branch"
- "--branch=${branch_name}"
- "--endorctl-path=${workdir}/endorctl"
- "--config-path=${workdir}/.endorctl"
- "--namespace=${ENDOR_NAMESPACE}"
- command: subprocess.exec
display_name: Generate third-party readme file
params:
binary: bash
args:
- "src/evergreen/run_python_script.sh"
- "src/third_party/scripts/gen_thirdpartyreadme.py"
- command: subprocess.exec
display_name: Create pull request for SBOM files, if changed
params:
binary: bash
include_expansions_in_env:
- MONGO_PR_BOT_APP_ID
- MONGO_PR_BOT_PRIVATE_KEY
args:
- "src/evergreen/run_python_script.sh"
- "buildscripts/sbom/sbom_files_pr.py"
- "--github-owner=${github_org}"
- "--github-repo=${github_repo}"
- "--base-branch=${branch_name}"
- "--new-branch=SERVER-111072/sbom_update_${revision}"
- "--pr-title=SERVER-111072 Auto-generated SBOM files [${branch_name}] ${revision}"
- name: upload_sbom_via_silkbomb_if_changed - name: upload_sbom_via_silkbomb_if_changed
allowed_requesters: ["commit"] allowed_requesters: ["commit"]
tags: [] tags: []

View File

@ -92,3 +92,16 @@ buildvariants:
- ubuntu2204-large - ubuntu2204-large
tasks: tasks:
- name: devcontainer_test - name: devcontainer_test
- name: create_sbom_and_pr
display_name: "Generate SBOM files and create PR"
# Don't run as part of patch builds
patchable: false
# Run at 6 am UTC daily
cron: "0 6 * * *"
run_on: rhel92-small
expansions:
ENDOR_NAMESPACE: mongodb.10gen
stepback: false
tasks:
- name: update_sbom

5091
sbom.json

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,8 @@ set -vx
NAME=protobuf NAME=protobuf
REVISION="v4.25.0" REVISION="v4.25.0"
VERSION="4.25.0" # VERSION variable is not used in this script, but is in here for SBOM generation. Should match the official release tag
VERSION="v25.0"
DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/protobuf DEST_DIR=$(git rev-parse --show-toplevel)/src/third_party/protobuf
PATCH_DIR=$(git rev-parse --show-toplevel)/src/third_party/protobuf/patches PATCH_DIR=$(git rev-parse --show-toplevel)/src/third_party/protobuf/patches

View File

@ -1,10 +1,8 @@
version: 1.0.0 version: 1.0.0
filters: filters:
# TODO(SERVER-SERVER-110179): Change ownership of `gen_thirdpartyreadme.py` and
# `README.third_party.md.template` to @10gen/code-review-team-ssdlc.
- "gen_thirdpartyreadme.py": - "gen_thirdpartyreadme.py":
approvers: approvers:
- 10gen/server-security - 10gen/code-review-team-ssdlc
- "README.third_party.md.template": - "README.third_party.md.template":
approvers: approvers:
- 10gen/server-security - 10gen/code-review-team-ssdlc

View File

@ -23,14 +23,6 @@ $component_chart
$component_links $component_links
## WiredTiger Vendored Test Libraries
The following libraries are transitively included by WiredTiger,
and are used by that component for testing. They don't appear in
released binary artifacts.
$wiredtiger_chart
## Dynamically Linked Libraries ## Dynamically Linked Libraries
Sometimes MongoDB needs to load libraries provided and managed by the Sometimes MongoDB needs to load libraries provided and managed by the

View File

@ -9,9 +9,9 @@ warnings.filterwarnings("ignore", message="\nYou don't have the C version of Nam
from Cheetah.Template import Template from Cheetah.Template import Template
SBOM_PATH = "../../../sbom.json" SBOM_PATH = "sbom.json"
TEMPLATE_PATH = "README.third_party.md.template" TEMPLATE_PATH = "src/third_party/scripts/README.third_party.md.template"
README_PATH = "../../../README.third_party.md" README_PATH = "README.third_party.md"
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@ -26,14 +26,9 @@ def main():
component_links_string = sbom_to_component_links_string(sbom) component_links_string = sbom_to_component_links_string(sbom)
wiredtiger_chart = sbom_to_wiredtiger_chart(sbom)
right_pad_chart_values(wiredtiger_chart)
wiredtiger_chart_string = chart_to_string(wiredtiger_chart)
template_data = { template_data = {
"component_chart": component_chart_string, "component_chart": component_chart_string,
"component_links": component_links_string, "component_links": component_links_string,
"wiredtiger_chart": wiredtiger_chart_string,
} }
create_markdown_with_template(template_data) create_markdown_with_template(template_data)
@ -121,23 +116,6 @@ def sbom_to_component_links_string(sbom: dict) -> list[list[str]]:
return "\n".join(link_list) return "\n".join(link_list)
def sbom_to_wiredtiger_chart(sbom: dict) -> list[list[str]]:
components = sbom["components"]
wiredtiger_chart = [["Name"]]
for component in components:
check_component_validity(component)
locations = get_component_locations(component)
for location in locations:
if location.startswith("src/third_party/wiredtiger/"):
bisect.insort(
wiredtiger_chart,
([component["name"].replace("|", "") + "@" + component["version"]]),
)
return wiredtiger_chart
def check_component_validity(component) -> None: def check_component_validity(component) -> None:
for required_key in ["name", "version", "licenses"]: for required_key in ["name", "version", "licenses"]:
if required_key not in component: if required_key not in component:
@ -147,15 +125,21 @@ def check_component_validity(component) -> None:
def get_component_info_link(component) -> str: def get_component_info_link(component) -> str:
# Get externalReferences or "info_link" properties
name = component["name"] name = component["name"]
links = [] links = []
if "externalReferences" in component:
for externalReference in component["externalReferences"]:
u, t = externalReference["url"], externalReference["type"]
if t in ["distribution","vcs","website"]:
links.append(u)
if "properties" in component: if "properties" in component:
for prop in component["properties"]: for prop in component["properties"]:
k, v = prop["name"], prop["value"] k, v = prop["name"], prop["value"]
if k == "info_link": if k == "info_link":
links.append(v) links.append(v)
if len(links) != 1: if len(links) != 1:
logging.warning("Warning: Expected 1 info_link for %s. Got %d:", name, len(links)) logging.warning("Warning: Expected 1 externalReferences or info_link for %s. Got %d:", name, len(links))
if len(links) > 1: if len(links) > 1:
logging.warning(" ".join(links)) logging.warning(" ".join(links))
logging.warning("Using first link only.") logging.warning("Using first link only.")