
← Back to blog

Published on 01/30/2024 00:01 by Jacob Latonis

100 Days of Yara in 2024: Day 30

In Day 28, I went over the parsing structure change for YARA-X’s Mach-O module. In Day 29, I went over reworking my current PR for parsing code signature data. Now, we’re going to rework the entitlements parsing as well!

Original Way

When parsing the entitlements data from Mach-o binaries, I had written it for the old way. Let’s take a look at what that looks like in Rust:

/// `CSBlob`: Represents a CSBlob structure in the Mach-O file.
/// Fields: magic, length
#[derive(Debug, Default, Clone, Copy)]
struct CSBlob {
    magic: u32,
    length: u32,

/// `CSBlobIndex`: Represents a BlobIndex structure in the Mach-O file.
/// Fields: blobtype, offset, blob
#[derive(Debug, Default, Clone, Copy)]
struct CSBlobIndex {
    _blobtype: u32,
    offset: u32,
    blob: CSBlob,

/// `CSSuperBlob`: Represents a CSSuperBlob structure in the Mach-O file.
/// Fields: magic, length, count, index
#[derive(Debug, Default, Clone)]
struct CSSuperBlob {
    _magic: u32,
    _length: u32,
    count: u32,
    index: Vec<CSBlobIndex>,

/// Parse the embedded-signature CSBlob structure for code signature data for a Mach-O.
/// # Arguments
/// * `input`: A slice of bytes containing the raw code signature blob.
/// # Returns
/// A `nom` IResult containing the remaining input and the parsed
/// CSBlob structure, or a `nom` error if the parsing fails.
/// # Errors
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_cs_blob(input: &[u8]) -> IResult<&[u8], CSBlob> {
    let (input, magic) = be_u32(input)?;
    let (input, length) = be_u32(input)?;

    Ok((input, CSBlob { magic, length }))

/// Parse the embedded-signature CSBlobIndex structure for code signature data for a Mach-O.
/// # Arguments
/// * `input`: A slice of bytes containing the raw code signature blob index.
/// # Returns
/// A `nom` IResult containing the remaining input and the parsed
/// CSBlobIndex structure, or a `nom` error if the parsing fails.
/// # Errors
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_cs_index(input: &[u8]) -> IResult<&[u8], CSBlobIndex> {
    let (input, blobtype) = be_u32(input)?;
    let (input, offset) = be_u32(input)?;

        CSBlobIndex { _blobtype: blobtype, offset, ..Default::default() },

/// Parse the embedded-signature SuperBlob for code signature data for a Mach-O.
/// # Arguments
/// * `input`: A slice of bytes containing the raw code signature superblob data.
/// # Returns
/// A `nom` IResult containing the remaining input and the parsed
/// CSSuperBlob structure, or a `nom` error if the parsing fails.
/// # Errors
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_cs_superblob(data: &[u8]) -> IResult<&[u8], CSSuperBlob> {
    // CSSuperBlobs are already network byte order, which is BE
    let (input, magic) = be_u32(data)?;
    let (input, length) = be_u32(input)?;
    let (input, count) = be_u32(input)?;

    let mut super_blob = CSSuperBlob {
        _magic: magic,
        _length: length,

    let mut input: &[u8] = input;
    let mut cs_index: CSBlobIndex;

    for _ in 0..super_blob.count {
        (input, cs_index) = parse_cs_index(input)?;

        let offset = cs_index.offset as usize;

        let (_, blob) = parse_cs_blob(&data[offset..])?;

        cs_index.blob = blob;

    Ok((input, super_blob))

/// Processes the code signature data based on the values calculated
/// from the LC_CODE_SIGNATURE load command.
/// # Arguments
/// * `data`: The raw byte data of the Mach-O file.
/// * `macho_file`: The protobuf representation of the Mach-O file to be populated.
/// # Returns
/// Returns a `Result<(), MachoError>` indicating the success or failure of the
/// parsing operation.
fn parse_macho_code_signature(
    data: &[u8],
    macho_file: &mut File,
) -> Result<(), MachoError> {
    if macho_file.code_signature_data.is_some() {
        let code_signature_data =
        let data_offset = code_signature_data.dataoff() as usize;
        let data_size = code_signature_data.datasize() as usize;

        if data_offset < data.len() {
            let super_data = &data[data_offset..data_offset + data_size];
            let (_, super_blob) = parse_cs_superblob(super_data)
                .map_err(|e| MachoError::ParsingError(format!("{:?}", e)))?;

            for blob_index in super_blob.index {
                let offset = blob_index.offset as usize;
                let length = blob_index.blob.length as usize;
                let size_of_blob = std::mem::size_of::<CSBlob>();

                match blob_index.blob.magic {
                    CS_MAGIC_BLOBWRAPPER => {
                    CS_MAGIC_EMBEDDED_ENTITLEMENTS => {
                        let xml_data = &super_data
                            [offset + size_of_blob..offset + length];
                        let xml_string =

                        let opt = roxmltree::ParsingOptions {
                            allow_dtd: true,

                        let parsed_xml =
                                xml_string, opt,
                            .map_err(|e| {
                                MachoError::ParsingError(format!("{:?}", e))

                        for node in parsed_xml
                            .filter(|n| n.has_tag_name("key"))
                            if let Some(entitlement) = node.text() {
                    _ => {}

You can see above, we have a parsing function, a handling function, and then more parsing functions for each code signature blob and structure type. It’s a fair amount of code.

New Way

The new way is a little simpler to look at in my opinion.

        if let Some(ref code_signature_data) = macho.code_signature_data {
            let offset = code_signature_data.dataoff as usize;
            let size = code_signature_data.datasize as usize;
            let super_data = &data[offset..offset + size];
            if let Err(_err) = macho.cs_superblob()(super_data) {
                #[cfg(feature = "logging")]
                error!("Error parsing Mach-O file: {:?}", _err);
                // fail silently if it fails, data was not formatted
                // correctly but parsing should still proceed for
                // everything else

        fn cs_blob(
    ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], CSBlob> + '_ {
        move |input: &'a [u8]| {
            let (_, (magic, length)) = tuple((
                u32(Endianness::Big), // magic
                u32(Endianness::Big), // length,

            Ok((&[], CSBlob { magic, length }))

    fn cs_index(
    ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], CSBlobIndex> + '_ {
        move |input: &'a [u8]| {
            let (input, (blobtype, offset)) = tuple((
                u32(Endianness::Big), // blobtype
                u32(Endianness::Big), // offset,

            Ok((input, CSBlobIndex { blobtype, offset, blob: None }))

    fn cs_superblob(
        &mut self,
    ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], CSSuperBlob> + '_ {
        move |data: &'a [u8]| {
            let (remainder, (_magic, _length, count)) = tuple((
                u32(Endianness::Big), // magic
                u32(Endianness::Big), // offset,
                u32(Endianness::Big), // count,

            let mut super_blob =
                CSSuperBlob { _magic, _length, count, index: Vec::new() };

            let mut input: &[u8] = remainder;
            let mut cs_index: CSBlobIndex;

            for _ in 0..super_blob.count {
                (input, cs_index) = self.cs_index()(input)?;
                let offset: usize = cs_index.offset as usize;
                let (_, blob) = self.cs_blob()(&data[offset..])?;

                cs_index.blob = Some(blob);

            let super_data = data;

            for blob_index in &super_blob.index {
                let _blob_type = blob_index.blobtype as usize;
                if let Some(blob) = &blob_index.blob {
                    let offset = blob_index.offset as usize;
                    let length = blob.length as usize;
                    let size_of_blob = std::mem::size_of::<CSBlob>();
                    match blob.magic {
                        CS_MAGIC_EMBEDDED_ENTITLEMENTS => {
                            let xml_data = &super_data
                                [offset + size_of_blob..offset + length];
                            let xml_string = std::str::from_utf8(xml_data)

                            let opt = roxmltree::ParsingOptions {
                                allow_dtd: true,

                            if let Ok(parsed_xml) =
                                    xml_string, opt,
                                for node in parsed_xml
                                    .filter(|n| n.has_tag_name("key"))
                                    if let Some(entitlement) = node.text() {
                        CS_MAGIC_BLOBWRAPPER => {
                            // TODO: Parse certificates
                        _ => {}

            Ok((&[], super_blob))

Finished Work

This is just part of the work from #73 that I am cleaning up after the refactor. There will be more posts like this :’).

This specific work implemented today can be seen in #78 on YARA-X.

Written by Jacob Latonis

← Back to blog