1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#![no_std]
#![doc = include_str!("../README.md")]

use core::fmt::{self, Debug, Display};
use core::hash::Hash;

#[cfg(feature="use-serde")]
#[macro_use] extern crate serde_derive;

#[cfg(feature="use-serde")]
use serde::{Serialize, Deserialize};

mod address;
pub use address::{Address, AddressBase, AddressDiff, AddressDiffAmount, AddressDisplay};
pub use address::{AddressDisplayUsize, AddressDisplayU64, AddressDisplayU32, AddressDisplayU16};
#[cfg(feature="address-parse")]
pub use address::AddrParse;

pub mod annotation;

mod color;
pub use color::{Colorize, NoColors, YaxColors};

#[cfg(feature="colors")]
pub use color::ColorSettings;

pub mod display;
mod reader;
pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be};

/// the minimum set of errors a `yaxpeax-arch` disassembler may produce.
///
/// it is permissible for an implementor of `DecodeError` to have items that return `false` for
/// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know
/// about.
pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static {
    /// did the decoder fail because it reached the end of input?
    fn data_exhausted(&self) -> bool;
    /// did the decoder error because the instruction's opcode is invalid?
    ///
    /// this may not be a sensical question for some instruction sets - `bad_opcode` should
    /// generally indicate an issue with the instruction itself. this is in contrast to one
    /// specific operand being invalid for the instruction, or some other issue to do with decoding
    /// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy
    /// and left as best-effort for decoder implementors.
    fn bad_opcode(&self) -> bool;
    /// did the decoder error because an operand of the instruction to decode is invalid?
    ///
    /// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on
    /// the part of implementors.
    fn bad_operand(&self) -> bool;
    /// a human-friendly description of this decode error.
    fn description(&self) -> &'static str;
}

/// a minimal enum implementing `DecodeError`. this is intended to be enough for a low effort,
/// low-fidelity error taxonomy, without boilerplate of a `DecodeError` implementation.
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum StandardDecodeError {
    ExhaustedInput,
    InvalidOpcode,
    InvalidOperand,
}

/// a slightly less minimal enum `DecodeError`. similar to `StandardDecodeError`, this is an
/// anti-boilerplate measure. it additionally provides `IncompleteDecoder`, making it suitable to
/// represent error kinds for decoders that are ... not yet complete.
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum StandardPartialDecoderError {
    ExhaustedInput,
    InvalidOpcode,
    InvalidOperand,
    IncompleteDecoder,
}

#[cfg(feature = "std")]
extern crate std;
#[cfg(feature = "std")]
impl std::error::Error for StandardDecodeError {
    fn description(&self) -> &str {
        <Self as DecodeError>::description(self)
    }
}
#[cfg(feature = "std")]
impl std::error::Error for StandardPartialDecoderError {
    fn description(&self) -> &str {
        <Self as DecodeError>::description(self)
    }
}

impl fmt::Display for StandardDecodeError {
    fn fmt(&self, f:  &mut fmt::Formatter) -> fmt::Result {
        f.write_str(self.description())
    }
}

impl fmt::Display for StandardPartialDecoderError {
    fn fmt(&self, f:  &mut fmt::Formatter) -> fmt::Result {
        f.write_str(self.description())
    }
}

impl DecodeError for StandardDecodeError {
    fn data_exhausted(&self) -> bool { *self == StandardDecodeError::ExhaustedInput }
    fn bad_opcode(&self) -> bool { *self == StandardDecodeError::InvalidOpcode }
    fn bad_operand(&self) -> bool { *self == StandardDecodeError::InvalidOperand }
    fn description(&self) -> &'static str {
        match self {
            StandardDecodeError::ExhaustedInput => "exhausted input",
            StandardDecodeError::InvalidOpcode => "invalid opcode",
            StandardDecodeError::InvalidOperand => "invalid operand",
        }
    }
}

impl DecodeError for StandardPartialDecoderError {
    fn data_exhausted(&self) -> bool { *self == StandardPartialDecoderError::ExhaustedInput }
    fn bad_opcode(&self) -> bool { *self == StandardPartialDecoderError::InvalidOpcode }
    fn bad_operand(&self) -> bool { *self == StandardPartialDecoderError::InvalidOperand }
    fn description(&self) -> &'static str {
        match self {
            StandardPartialDecoderError::ExhaustedInput => "exhausted input",
            StandardPartialDecoderError::InvalidOpcode => "invalid opcode",
            StandardPartialDecoderError::InvalidOperand => "invalid operand",
            StandardPartialDecoderError::IncompleteDecoder => "incomplete decoder",
        }
    }
}

#[derive(Copy, Clone)]
struct NoDescription {}

impl fmt::Display for NoDescription {
    fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
        Ok(())
    }
}

/// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are
/// the architecture-defined [`DecodeError`] implemention.
pub trait Decoder<A: Arch + ?Sized> {
    /// decode one instruction for this architecture from the [`crate::Reader`] of this
    /// architecture's `Word`.
    fn decode<T: Reader<A::Address, A::Word>>(&self, words: &mut T) -> Result<A::Instruction, A::DecodeError> {
        let mut inst = A::Instruction::default();
        self.decode_into(&mut inst, words).map(|_: ()| inst)
    }

    /// decode one instruction for this architecture from the [`crate::Reader`] of this
    /// architecture's `Word`, writing into the provided `inst`.
    ///
    /// SAFETY:
    ///
    /// while `inst` MUST be left in a state that does not violate Rust's safety guarantees,
    /// implementors are NOT obligated to leave `inst` in a semantically meaningful state if
    /// decoding fails. if `decode_into` returns an error, callers may find contradictory and
    /// useless information in `inst`, as well as *stale data* from whatever was passed in.
    fn decode_into<T: Reader<A::Address, A::Word>>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>;
}

#[cfg(feature = "use-serde")]
pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {}
#[cfg(not(feature = "use-serde"))]
pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq {}

#[cfg(feature = "use-serde")]
impl<T> AddressBounds for T where T: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {}
#[cfg(not(feature = "use-serde"))]
impl<T> AddressBounds for T where T: Address + Debug + Hash + PartialEq + Eq {}

#[cfg(feature = "std")]
/// this is not a particularly interesting trait. it just exists to add a `std::error::Error`
/// bound onto `DecodeError` for `std` builds.
pub trait DecodeErrorBounds: std::error::Error + DecodeError {}
#[cfg(feature = "std")]
impl<T: std::error::Error + DecodeError> DecodeErrorBounds for T {}
#[cfg(not(feature = "std"))]
/// this is not a particularly interesting trait. it just exists to add a `std::error::Error`
/// bound onto `DecodeError` for `std` builds.
pub trait DecodeErrorBounds: DecodeError {}
#[cfg(not(feature = "std"))]
impl<T: DecodeError> DecodeErrorBounds for T {}


/// a collection of associated type parameters that constitute the definitions for an instruction
/// set. `Arch` provides an `Instruction` and its associated `Operand`s, which is guaranteed to be
/// decodable by this `Arch::Decoder`. `Arch::Decoder` can always be constructed with a `Default`
/// implementation, and decodes from a `Reader<Arch::Address, Arch::Word>`.
///
/// `Arch` is suitable as the foundational trait to implement more complex logic on top of; for
/// example, it would be entirely expected to have a
/// ```text
/// pub fn emulate<A: Arch, E: Emulator<A>>(
///     reader: &mut Reader<A::Address, A::Word>,
///     emu: &mut E
/// ) -> Result<A::Address, DecodeOrEvaluationError>;
/// ```
///
/// in some library built on top of `yaxpeax-arch`.
pub trait Arch {
    type Word: Debug + Display + PartialEq + Eq;
    type Address: AddressBounds;
    type Instruction: Instruction + LengthedInstruction<Unit=AddressDiff<Self::Address>> + Debug + Default + Sized;
    type DecodeError: DecodeErrorBounds + Debug + Display;
    type Decoder: Decoder<Self> + Default;
    type Operand;
}

/// instructions have lengths, and minimum possible sizes for advancing a decoder on error.
///
/// unfortunately, this means calling `x.len()` for some `Arch::Instruction` requires importing
/// this trait. sorry.
pub trait LengthedInstruction {
    type Unit;
    /// the length, in terms of `Unit`, of this instruction. because `Unit` will be a diff of an
    /// architecture's `Address` type, this almost always is a number of bytes. implementations
    /// should indicate if this is ever not the case.
    fn len(&self) -> Self::Unit;
    /// the length, in terms of `Unit`, of the shortest possible instruction in a given
    /// architecture.. because `Unit` will be a diff of an architecture's `Address` type, this
    /// almost always is a number of bytes. implementations should indicate if this is ever not the
    /// case.
    fn min_size() -> Self::Unit;
}

pub trait Instruction {
    fn well_defined(&self) -> bool;
}

pub trait ShowContextual<Addr, Ctx: ?Sized, T: fmt::Write, Y: YaxColors> {
    fn contextualize(&self, colors: &Y, address: Addr, context: Option<&Ctx>, out: &mut T) -> fmt::Result;
}

/*
impl <C: ?Sized, T: fmt::Write, U: Colorize<T>> ShowContextual<C, T> for U {
    fn contextualize(&self, colors: Option<&ColorSettings>, context: Option<&C>, out: &mut T) -> fmt::Result {
        self.colorize(colors, out)
    }
}
*/