pep440: fix version ordering (#1883)

A couple moons ago, I introduced an optimization for version comparisons
by devising a format where *most* versions would be represented by a
single `u64`. This in turn meant most comparisons (of which many are
done during resolution) would be extremely cheap.

Unfortunately, when I did that, I screwed up the preservation of
ordering as defined by the [Version Specifiers spec]. I think I messed
it up because I had originally devised the representation so that we
could pack things like `1.2.3.dev1.post5`, but later realized it would
be better to limit ourselves to a single suffix. However, I never
updated the binary encoding to better match "up to 4 release versions
and up to precisely 1 suffix." Because of that, there were cases where
versions weren't ordered correctly. For example, this fixes a bug where
`1.0a2 < 1.0dev2`, even though all dev releases should order before
pre-releases.

We also update a test so that it catches these kinds of bugs in the
future. (By testing all pairs of versions in a sequence instead of just
the adjacent versions.)

[Version Specifiers spec]:
https://packaging.python.org/en/latest/specifications/version-specifiers/#summary-of-permitted-suffixes-and-relative-ordering
This commit is contained in:
Andrew Gallant 2024-02-22 18:01:42 -05:00 committed by GitHub
parent 4f129d2a98
commit b7942164ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 128 additions and 84 deletions

View File

@ -743,35 +743,30 @@ impl FromStr for Version {
/// * Bytes 6 and 7 correspond to the first release segment as a `u16`. /// * Bytes 6 and 7 correspond to the first release segment as a `u16`.
/// * Bytes 5, 4 and 3 correspond to the second, third and fourth release /// * Bytes 5, 4 and 3 correspond to the second, third and fourth release
/// segments, respectively. /// segments, respectively.
/// * Byte 2 corresponds to the post-release segment. If there is no /// * Bytes 2, 1 and 0 represent *one* of the following:
/// post-release segment, then byte 2 is set to 0x00. This makes "no /// `.devN, aN, bN, rcN, <no suffix>, .postN`. Its representation is thus:
/// post-release" sort before "has post-release." The numeric value /// * The most significant 3 bits of Byte 2 corresponds to a value in
/// (constrained to be <u8::MAX) has 1 added to it so that 0x00 is reserved to /// the range 0-5 inclusive, corresponding to dev, pre-a, pre-b, pre-rc,
/// indicate absence. /// no-suffix or post releases, respectively.
/// * Byte 1 corresponds to the pre-release segment. If there is no pre-release /// * The low 5 bits combined with the bits in bytes 1 and 0 correspond
/// segment, then byte 1 is set to 0xFF. This makes "no pre-release" sort /// to the release number of the suffix, if one exists. If there is no
/// after "has pre-release." The most significant two bits of byte 1 encode /// suffix, then this bits are always 0.
/// the type of pre-release (alpha, beta, rc) while the low 6 bits encode the
/// pre-release numeric value.
/// * Byte 0 corresponds to the dev-release segment. If there is no dev-release
/// segment, then byte 0 is set to 0xFF. This makes "no dev-release" sort after
/// "has dev-release." The dev-release value (constrained to be <u8::MAX) is
/// stored in byte 0 as-is.
/// ///
/// The order of the encoding above is significant. For example, the /// The order of the encoding above is significant. For example, suffixes are
/// post-release segment is encoded at a more significant byte in the `u64` /// encoded at a less significant location than the release numbers, so that
/// than the pre-release segment because `1.2.3.post1 > 1.2.3rc9999`. /// `1.2.3 < 1.2.3.post4`.
/// ///
/// Notice also that nothing about the representation inherently prohibits /// In a previous representation, we tried to enocode the suffixes in different
/// storing any combination of pre, dev or post release components. We /// locations so that, in theory, you could represent `1.2.3.dev2.post3` in the
/// could absolutely store all three (assuming they fit into their various /// packed form. But getting the ordering right for this is difficult (perhaps
/// constraints outlined above). But, if we did that, a simple `u64::cmp` would /// impossible without extra space?). So we limited to only storing one suffix.
/// no longer be correct. For example, `1.0.post456.dev34 < 1.0.post456`, but /// But even then, we wound up with a bug where `1.0dev1 > 1.0a1`, when of
/// in the representation above, it would treat `1.0.post456.dev34` as greater /// course, all dev releases should compare less than pre releases. This was
/// than `1.0.post456`. To make comparisons cheap for multi-component versions /// because the encoding recorded the pre-release as "absent", and this in turn
/// like that, we'd need to use more space. Thankfully, such versions are /// screwed up the order comparisons.
/// incredibly rare. Virtually all versions have zero or one pre, dev or post ///
/// release components. /// Thankfully, such versions are incredibly rare. Virtually all versions have
/// zero or one pre, dev or post release components.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
#[cfg_attr( #[cfg_attr(
feature = "rkyv", feature = "rkyv",
@ -815,10 +810,18 @@ struct VersionSmall {
} }
impl VersionSmall { impl VersionSmall {
const SUFFIX_DEV: u64 = 0;
const SUFFIX_PRE_ALPHA: u64 = 1;
const SUFFIX_PRE_BETA: u64 = 2;
const SUFFIX_PRE_RC: u64 = 3;
const SUFFIX_NONE: u64 = 4;
const SUFFIX_POST: u64 = 5;
const SUFFIX_MAX_VERSION: u64 = 0x1FFFFF;
#[inline] #[inline]
fn new() -> VersionSmall { fn new() -> VersionSmall {
VersionSmall { VersionSmall {
repr: 0x00000000_0000FFFF, repr: 0x00000000_00800000,
release: [0, 0, 0, 0], release: [0, 0, 0, 0],
len: 0, len: 0,
} }
@ -876,29 +879,28 @@ impl VersionSmall {
#[inline] #[inline]
fn post(&self) -> Option<u64> { fn post(&self) -> Option<u64> {
let v = (self.repr >> 16) & 0xFF; if self.suffix_kind() == VersionSmall::SUFFIX_POST {
if v == 0 { Some(self.suffix_version())
None
} else { } else {
Some(v - 1) None
} }
} }
#[inline] #[inline]
fn set_post(&mut self, value: Option<u64>) -> bool { fn set_post(&mut self, value: Option<u64>) -> bool {
if value.is_some() && (self.pre().is_some() || self.dev().is_some()) { if self.pre().is_some() || self.dev().is_some() {
return false; return value.is_none();
} }
match value { match value {
None => { None => {
self.repr &= !(0xFF << 16); self.set_suffix_kind(VersionSmall::SUFFIX_NONE);
} }
Some(number) => { Some(number) => {
if number > 0b1111_1110 { if number > VersionSmall::SUFFIX_MAX_VERSION {
return false; return false;
} }
self.repr &= !(0xFF << 16); self.set_suffix_kind(VersionSmall::SUFFIX_POST);
self.repr |= (number + 1) << 16; self.set_suffix_version(number);
} }
} }
true true
@ -906,40 +908,52 @@ impl VersionSmall {
#[inline] #[inline]
fn pre(&self) -> Option<PreRelease> { fn pre(&self) -> Option<PreRelease> {
let v = (self.repr >> 8) & 0xFF; let (kind, number) = (self.suffix_kind(), self.suffix_version());
if v == 0xFF { if kind == VersionSmall::SUFFIX_PRE_ALPHA {
return None; Some(PreRelease {
kind: PreReleaseKind::Alpha,
number,
})
} else if kind == VersionSmall::SUFFIX_PRE_BETA {
Some(PreRelease {
kind: PreReleaseKind::Beta,
number,
})
} else if kind == VersionSmall::SUFFIX_PRE_RC {
Some(PreRelease {
kind: PreReleaseKind::Rc,
number,
})
} else {
None
} }
let number = v & 0b0011_1111;
let kind = match v >> 6 {
0 => PreReleaseKind::Alpha,
1 => PreReleaseKind::Beta,
2 => PreReleaseKind::Rc,
_ => unreachable!(),
};
Some(PreRelease { kind, number })
} }
#[inline] #[inline]
fn set_pre(&mut self, value: Option<PreRelease>) -> bool { fn set_pre(&mut self, value: Option<PreRelease>) -> bool {
if value.is_some() && (self.post().is_some() || self.dev().is_some()) { if self.dev().is_some() || self.post().is_some() {
return false; return value.is_none();
} }
match value { match value {
None => { None => {
self.repr |= 0xFF << 8; self.set_suffix_kind(VersionSmall::SUFFIX_NONE);
} }
Some(PreRelease { kind, number }) => { Some(PreRelease { kind, number }) => {
if number > 0b0011_1111 { if number > VersionSmall::SUFFIX_MAX_VERSION {
return false; return false;
} }
let kind = match kind { match kind {
PreReleaseKind::Alpha => 0, PreReleaseKind::Alpha => {
PreReleaseKind::Beta => 1, self.set_suffix_kind(VersionSmall::SUFFIX_PRE_ALPHA);
PreReleaseKind::Rc => 2, }
}; PreReleaseKind::Beta => {
self.repr &= !(0xFF << 8); self.set_suffix_kind(VersionSmall::SUFFIX_PRE_BETA);
self.repr |= ((kind << 6) | number) << 8; }
PreReleaseKind::Rc => {
self.set_suffix_kind(VersionSmall::SUFFIX_PRE_RC);
}
}
self.set_suffix_version(number);
} }
} }
true true
@ -947,29 +961,28 @@ impl VersionSmall {
#[inline] #[inline]
fn dev(&self) -> Option<u64> { fn dev(&self) -> Option<u64> {
let v = self.repr & 0xFF; if self.suffix_kind() == VersionSmall::SUFFIX_DEV {
if v == 0xFF { Some(self.suffix_version())
None
} else { } else {
Some(v) None
} }
} }
#[inline] #[inline]
fn set_dev(&mut self, value: Option<u64>) -> bool { fn set_dev(&mut self, value: Option<u64>) -> bool {
if value.is_some() && (self.pre().is_some() || self.post().is_some()) { if self.pre().is_some() || self.post().is_some() {
return false; return value.is_none();
} }
match value { match value {
None => { None => {
self.repr |= 0xFF; self.set_suffix_kind(VersionSmall::SUFFIX_NONE);
} }
Some(number) => { Some(number) => {
if number > 0b1111_1110 { if number > VersionSmall::SUFFIX_MAX_VERSION {
return false; return false;
} }
self.repr &= !0xFF; self.set_suffix_kind(VersionSmall::SUFFIX_DEV);
self.repr |= number; self.set_suffix_version(number);
} }
} }
true true
@ -981,6 +994,35 @@ impl VersionSmall {
// of local segments. // of local segments.
&[] &[]
} }
#[inline]
fn suffix_kind(&self) -> u64 {
let kind = (self.repr >> 21) & 0b111;
debug_assert!(kind <= VersionSmall::SUFFIX_POST);
kind
}
#[inline]
fn set_suffix_kind(&mut self, kind: u64) {
debug_assert!(kind <= VersionSmall::SUFFIX_POST);
self.repr &= !0xE00000;
self.repr |= kind << 21;
if kind == VersionSmall::SUFFIX_NONE {
self.set_suffix_version(0);
}
}
#[inline]
fn suffix_version(&self) -> u64 {
self.repr & 0x1FFFFF
}
#[inline]
fn set_suffix_version(&mut self, value: u64) {
debug_assert!(value <= 0x1FFFFF);
self.repr &= !0x1FFFFF;
self.repr |= value;
}
} }
/// The "full" representation of a version. /// The "full" representation of a version.
@ -1368,9 +1410,9 @@ impl<'a> Parser<'a> {
| (u64::from(release[1]) << 40) | (u64::from(release[1]) << 40)
| (u64::from(release[2]) << 32) | (u64::from(release[2]) << 32)
| (u64::from(release[3]) << 24) | (u64::from(release[3]) << 24)
| (0x00 << 16) | (0x80 << 16)
| (0xFF << 8) | (0x00 << 8)
| (0xFF << 0), | (0x00 << 0),
release: [ release: [
u64::from(release[0]), u64::from(release[0]),
u64::from(release[1]), u64::from(release[1]),
@ -3453,16 +3495,18 @@ mod tests {
"1.0.15", "1.0.15",
"1.1.dev1", "1.1.dev1",
]; ];
for pair in versions.windows(2) { for (i, v1) in versions.iter().enumerate() {
let less = pair[0].parse::<Version>().unwrap(); for v2 in versions[i + 1..].iter() {
let greater = pair[1].parse::<Version>().unwrap(); let less = v1.parse::<Version>().unwrap();
assert_eq!( let greater = v2.parse::<Version>().unwrap();
less.cmp(&greater), assert_eq!(
Ordering::Less, less.cmp(&greater),
"less: {:?}\ngreater: {:?}", Ordering::Less,
less.as_bloated_debug(), "less: {:?}\ngreater: {:?}",
greater.as_bloated_debug() less.as_bloated_debug(),
); greater.as_bloated_debug()
);
}
} }
} }

View File

@ -521,7 +521,7 @@ impl CacheBucket {
CacheBucket::FlatIndex => "flat-index-v0", CacheBucket::FlatIndex => "flat-index-v0",
CacheBucket::Git => "git-v0", CacheBucket::Git => "git-v0",
CacheBucket::Interpreter => "interpreter-v0", CacheBucket::Interpreter => "interpreter-v0",
CacheBucket::Simple => "simple-v1", CacheBucket::Simple => "simple-v2",
CacheBucket::Wheels => "wheels-v0", CacheBucket::Wheels => "wheels-v0",
CacheBucket::Archive => "archive-v0", CacheBucket::Archive => "archive-v0",
} }