diff --git a/core/Cargo.toml b/core/Cargo.toml index a3dd82bb98..f9cbce313f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -15,6 +15,6 @@ itertools = "0.10.3" lz4_flex = "0.9.2" num-bigint = { version = "0.4.3", features = ["serde"] } num-complex = { version = "0.4.0", features = ["serde"] } +num_enum = "0.5.7" serde = { version = "1.0.136", features = ["derive"] } -static_assertions = "1.1.0" thiserror = "1.0" diff --git a/core/src/bytecode.rs b/core/src/bytecode.rs index ea2477ae27..b7c456e989 100644 --- a/core/src/bytecode.rs +++ b/core/src/bytecode.rs @@ -7,8 +7,10 @@ use bstr::ByteSlice; use itertools::Itertools; use num_bigint::BigInt; use num_complex::Complex64; +use num_enum::{IntoPrimitive, TryFromPrimitive}; use serde::{Deserialize, Serialize}; -use std::{collections::BTreeSet, fmt, hash}; +use std::marker::PhantomData; +use std::{collections::BTreeSet, fmt, hash, mem}; pub trait Constant: Sized { type Name: AsRef; @@ -62,7 +64,7 @@ impl ConstantBag for BasicBag { /// a codeobject. Also a module has a codeobject. #[derive(Clone, Serialize, Deserialize)] pub struct CodeObject { - pub instructions: Box<[Instruction]>, + pub instructions: Box<[CodeUnit]>, pub locations: Box<[Location]>, pub flags: CodeFlags, pub posonlyarg_count: usize, @@ -111,12 +113,188 @@ impl CodeFlags { ]; } -#[derive(Serialize, Debug, Deserialize, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] +/// an opcode argument that may be extended by a prior ExtendedArg +#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[repr(transparent)] +pub struct OpArgByte(pub u8); +impl OpArgByte { + pub const fn null() -> Self { + OpArgByte(0) + } +} +impl fmt::Debug for OpArgByte { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// a full 32-bit oparg, including any possible ExtendedArg extension +#[derive(Copy, Clone, Debug)] +#[repr(transparent)] +pub struct OpArg(pub u32); +impl OpArg { + pub const fn null() -> Self { + OpArg(0) + } + + /// Returns how many CodeUnits a instruction with this oparg will be encoded as + #[inline] + pub fn instr_size(self) -> usize { + (self.0 > 0xff) as usize + (self.0 > 0xff_ff) as usize + (self.0 > 0xff_ff_ff) as usize + 1 + } + + /// returns the arg split into any necessary ExtendedArg components (in big-endian order) and + /// the arg for the real opcode itself + #[inline(always)] + pub fn split(self) -> (impl ExactSizeIterator, OpArgByte) { + let mut it = self + .0 + .to_le_bytes() + .map(OpArgByte) + .into_iter() + .take(self.instr_size()); + let lo = it.next().unwrap(); + (it.rev(), lo) + } +} + +#[derive(Default, Copy, Clone)] +#[repr(transparent)] +pub struct OpArgState { + state: u32, +} + +impl OpArgState { + #[inline(always)] + pub fn get(&mut self, ins: CodeUnit) -> (Instruction, OpArg) { + let arg = self.extend(ins.arg); + if ins.op != Instruction::ExtendedArg { + self.reset(); + } + (ins.op, arg) + } + #[inline(always)] + pub fn extend(&mut self, arg: OpArgByte) -> OpArg { + self.state = self.state << 8 | u32::from(arg.0); + OpArg(self.state) + } + #[inline(always)] + pub fn reset(&mut self) { + self.state = 0 + } +} + +pub trait OpArgType: Copy { + fn from_oparg(x: u32) -> Option; + fn to_oparg(self) -> u32; +} + +impl OpArgType for u32 { + #[inline(always)] + fn from_oparg(x: u32) -> Option { + Some(x) + } + #[inline(always)] + fn to_oparg(self) -> u32 { + self + } +} + +impl OpArgType for bool { + #[inline(always)] + fn from_oparg(x: u32) -> Option { + Some(x != 0) + } + #[inline(always)] + fn to_oparg(self) -> u32 { + self as u32 + } +} + +macro_rules! enum_oparg { + ($t:ident) => { + impl OpArgType for $t { + #[inline(always)] + fn from_oparg(x: u32) -> Option { + $t::try_from_primitive(x as _).ok() + } + #[inline(always)] + fn to_oparg(self) -> u32 { + u8::from(self).into() + } + } + }; +} + +#[derive(Copy, Clone, Serialize, Deserialize)] +#[serde(bound = "")] +pub struct Arg(PhantomData); + +impl Arg { + #[inline] + pub fn marker() -> Self { + Arg(PhantomData) + } + #[inline] + pub fn new(arg: T) -> (Self, OpArg) { + (Self(PhantomData), OpArg(arg.to_oparg())) + } + #[inline] + pub fn new_single(arg: T) -> (Self, OpArgByte) + where + T: Into, + { + (Self(PhantomData), OpArgByte(arg.into())) + } + #[inline(always)] + pub fn get(self, arg: OpArg) -> T { + self.try_get(arg).unwrap() + } + #[inline(always)] + pub fn try_get(self, arg: OpArg) -> Option { + T::from_oparg(arg.0) + } + #[inline(always)] + /// # Safety + /// T::from_oparg(self) must succeed + pub unsafe fn get_unchecked(self, arg: OpArg) -> T { + match T::from_oparg(arg.0) { + Some(t) => t, + None => std::hint::unreachable_unchecked(), + } + } +} + +impl PartialEq for Arg { + fn eq(&self, _: &Self) -> bool { + true + } +} +impl Eq for Arg {} + +impl fmt::Debug for Arg { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Arg<{}>", std::any::type_name::()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] #[repr(transparent)] // XXX: if you add a new instruction that stores a Label, make sure to add it in -// Instruction::label_arg{,_mut} +// Instruction::label_arg pub struct Label(pub u32); +impl OpArgType for Label { + #[inline(always)] + fn from_oparg(x: u32) -> Option { + Some(Label(x)) + } + #[inline(always)] + fn to_oparg(self) -> u32 { + self.0 + } +} + impl fmt::Display for Label { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.0.fmt(f) @@ -124,7 +302,7 @@ impl fmt::Display for Label { } /// Transforms a value prior to formatting it. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)] #[repr(u8)] pub enum ConversionFlag { /// No conversion @@ -136,37 +314,36 @@ pub enum ConversionFlag { /// Converts by calling `repr()`. Repr = b'r', } +enum_oparg!(ConversionFlag); impl TryFrom for ConversionFlag { type Error = usize; fn try_from(b: usize) -> Result { - let b = b.try_into().map_err(|_| b)?; - match b { - 0 => Ok(Self::None), - b's' => Ok(Self::Str), - b'a' => Ok(Self::Ascii), - b'r' => Ok(Self::Repr), - b => Err(b as usize), - } + u8::try_from(b) + .ok() + .and_then(|b| Self::try_from(b).ok()) + .ok_or(b) } } /// The kind of Raise that occurred. -#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive, IntoPrimitive)] +#[repr(u8)] pub enum RaiseKind { Reraise, Raise, RaiseCause, } +enum_oparg!(RaiseKind); pub type NameIdx = u32; /// A Single bytecode instruction. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum Instruction { /// Importing by name ImportName { - idx: NameIdx, + idx: Arg, }, /// Importing without name ImportNameless, @@ -174,52 +351,52 @@ pub enum Instruction { ImportStar, /// from ... import ... ImportFrom { - idx: NameIdx, + idx: Arg, }, - LoadFast(NameIdx), - LoadNameAny(NameIdx), - LoadGlobal(NameIdx), - LoadDeref(NameIdx), - LoadClassDeref(NameIdx), - StoreFast(NameIdx), - StoreLocal(NameIdx), - StoreGlobal(NameIdx), - StoreDeref(NameIdx), - DeleteFast(NameIdx), - DeleteLocal(NameIdx), - DeleteGlobal(NameIdx), - DeleteDeref(NameIdx), - LoadClosure(NameIdx), + LoadFast(Arg), + LoadNameAny(Arg), + LoadGlobal(Arg), + LoadDeref(Arg), + LoadClassDeref(Arg), + StoreFast(Arg), + StoreLocal(Arg), + StoreGlobal(Arg), + StoreDeref(Arg), + DeleteFast(Arg), + DeleteLocal(Arg), + DeleteGlobal(Arg), + DeleteDeref(Arg), + LoadClosure(Arg), Subscript, StoreSubscript, DeleteSubscript, StoreAttr { - idx: NameIdx, + idx: Arg, }, DeleteAttr { - idx: NameIdx, + idx: Arg, }, LoadConst { /// index into constants vec - idx: u32, + idx: Arg, }, UnaryOperation { - op: UnaryOperator, + op: Arg, }, BinaryOperation { - op: BinaryOperator, + op: Arg, }, BinaryOperationInplace { - op: BinaryOperator, + op: Arg, }, LoadAttr { - idx: NameIdx, + idx: Arg, }, TestOperation { - op: TestOperator, + op: Arg, }, CompareOperation { - op: ComparisonOperator, + op: Arg, }, Pop, Rotate2, @@ -228,71 +405,69 @@ pub enum Instruction { Duplicate2, GetIter, Continue { - target: Label, + target: Arg