jacoblatonis.me

← Back to blog

Published on 01/30/2024 00:01 by Jacob Latonis

100 Days of Yara in 2024: Day 30

In Day 28, I went over the parsing structure change for YARA-X’s Mach-O module. In Day 29, I went over reworking my current PR for parsing code signature data. Now, we’re going to rework the entitlements parsing as well!

Original Way

When parsing the entitlements data from Mach-o binaries, I had written it for the old way. Let’s take a look at what that looks like in Rust:

/// `CSBlob`: Represents a CSBlob structure in the Mach-O file.
/// Fields: magic, length
#[derive(Debug, Default, Clone, Copy)]
struct CSBlob {
    magic: u32,
    length: u32,
}

/// `CSBlobIndex`: Represents a BlobIndex structure in the Mach-O file.
/// Fields: blobtype, offset, blob
#[derive(Debug, Default, Clone, Copy)]
struct CSBlobIndex {
    _blobtype: u32,
    offset: u32,
    blob: CSBlob,
}

/// `CSSuperBlob`: Represents a CSSuperBlob structure in the Mach-O file.
/// Fields: magic, length, count, index
#[derive(Debug, Default, Clone)]
struct CSSuperBlob {
    _magic: u32,
    _length: u32,
    count: u32,
    index: Vec<CSBlobIndex>,
}

/// Parse the embedded-signature CSBlob structure for code signature data for a Mach-O.
///
/// # Arguments
///
/// * `input`: A slice of bytes containing the raw code signature blob.
///
/// # Returns
///
/// A `nom` IResult containing the remaining input and the parsed
/// CSBlob structure, or a `nom` error if the parsing fails.
///
/// # Errors
///
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_cs_blob(input: &[u8]) -> IResult<&[u8], CSBlob> {
    let (input, magic) = be_u32(input)?;
    let (input, length) = be_u32(input)?;

    Ok((input, CSBlob { magic, length }))
}

/// Parse the embedded-signature CSBlobIndex structure for code signature data for a Mach-O.
///
/// # Arguments
///
/// * `input`: A slice of bytes containing the raw code signature blob index.
///
/// # Returns
///
/// A `nom` IResult containing the remaining input and the parsed
/// CSBlobIndex structure, or a `nom` error if the parsing fails.
///
/// # Errors
///
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_cs_index(input: &[u8]) -> IResult<&[u8], CSBlobIndex> {
    let (input, blobtype) = be_u32(input)?;
    let (input, offset) = be_u32(input)?;

    Ok((
        input,
        CSBlobIndex { _blobtype: blobtype, offset, ..Default::default() },
    ))
}

/// Parse the embedded-signature SuperBlob for code signature data for a Mach-O.
///
/// # Arguments
///
/// * `input`: A slice of bytes containing the raw code signature superblob data.
///
/// # Returns
///
/// A `nom` IResult containing the remaining input and the parsed
/// CSSuperBlob structure, or a `nom` error if the parsing fails.
///
/// # Errors
///
/// Returns a `nom` error if the input data is insufficient or malformed.
fn parse_cs_superblob(data: &[u8]) -> IResult<&[u8], CSSuperBlob> {
    // CSSuperBlobs are already network byte order, which is BE
    let (input, magic) = be_u32(data)?;
    let (input, length) = be_u32(input)?;
    let (input, count) = be_u32(input)?;

    let mut super_blob = CSSuperBlob {
        _magic: magic,
        _length: length,
        count,
        ..Default::default()
    };

    let mut input: &[u8] = input;
    let mut cs_index: CSBlobIndex;

    for _ in 0..super_blob.count {
        (input, cs_index) = parse_cs_index(input)?;

        let offset = cs_index.offset as usize;

        let (_, blob) = parse_cs_blob(&data[offset..])?;

        cs_index.blob = blob;
        super_blob.index.push(cs_index);
    }

    Ok((input, super_blob))
}

/// Processes the code signature data based on the values calculated
/// from the LC_CODE_SIGNATURE load command.
///
/// # Arguments
///
/// * `data`: The raw byte data of the Mach-O file.
/// * `macho_file`: The protobuf representation of the Mach-O file to be populated.
///
/// # Returns
///
/// Returns a `Result<(), MachoError>` indicating the success or failure of the
/// parsing operation.
fn parse_macho_code_signature(
    data: &[u8],
    macho_file: &mut File,
) -> Result<(), MachoError> {
    if macho_file.code_signature_data.is_some() {
        let code_signature_data =
            macho_file.code_signature_data.as_mut().unwrap();
        let data_offset = code_signature_data.dataoff() as usize;
        let data_size = code_signature_data.datasize() as usize;

        if data_offset < data.len() {
            let super_data = &data[data_offset..data_offset + data_size];
            let (_, super_blob) = parse_cs_superblob(super_data)
                .map_err(|e| MachoError::ParsingError(format!("{:?}", e)))?;

            for blob_index in super_blob.index {
                let offset = blob_index.offset as usize;
                let length = blob_index.blob.length as usize;
                let size_of_blob = std::mem::size_of::<CSBlob>();

                match blob_index.blob.magic {
                    CS_MAGIC_BLOBWRAPPER => {
                        [...]
                    }
                    CS_MAGIC_EMBEDDED_ENTITLEMENTS => {
                        let xml_data = &super_data
                            [offset + size_of_blob..offset + length];
                        let xml_string =
                            std::str::from_utf8(xml_data).unwrap_or_default();

                        let opt = roxmltree::ParsingOptions {
                            allow_dtd: true,
                            ..roxmltree::ParsingOptions::default()
                        };

                        let parsed_xml =
                            roxmltree::Document::parse_with_options(
                                xml_string, opt,
                            )
                            .map_err(|e| {
                                MachoError::ParsingError(format!("{:?}", e))
                            })?;

                        for node in parsed_xml
                            .descendants()
                            .filter(|n| n.has_tag_name("key"))
                        {
                            if let Some(entitlement) = node.text() {
                                macho_file
                                    .entitlements
                                    .push(entitlement.to_string());
                            }
                        }
                    }
                    _ => {}
                }
            }
        }
    }
    Ok(())
}

You can see above, we have a parsing function, a handling function, and then more parsing functions for each code signature blob and structure type. It’s a fair amount of code.

New Way

The new way is a little simpler to look at in my opinion.

        if let Some(ref code_signature_data) = macho.code_signature_data {
            let offset = code_signature_data.dataoff as usize;
            let size = code_signature_data.datasize as usize;
            let super_data = &data[offset..offset + size];
            if let Err(_err) = macho.cs_superblob()(super_data) {
                #[cfg(feature = "logging")]
                error!("Error parsing Mach-O file: {:?}", _err);
                // fail silently if it fails, data was not formatted
                // correctly but parsing should still proceed for
                // everything else
            };
        }

        fn cs_blob(
        &self,
    ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], CSBlob> + '_ {
        move |input: &'a [u8]| {
            let (_, (magic, length)) = tuple((
                u32(Endianness::Big), // magic
                u32(Endianness::Big), // length,
            ))(input)?;

            Ok((&[], CSBlob { magic, length }))
        }
    }

    fn cs_index(
        &self,
    ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], CSBlobIndex> + '_ {
        move |input: &'a [u8]| {
            let (input, (blobtype, offset)) = tuple((
                u32(Endianness::Big), // blobtype
                u32(Endianness::Big), // offset,
            ))(input)?;

            Ok((input, CSBlobIndex { blobtype, offset, blob: None }))
        }
    }

    fn cs_superblob(
        &mut self,
    ) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], CSSuperBlob> + '_ {
        move |data: &'a [u8]| {
            let (remainder, (_magic, _length, count)) = tuple((
                u32(Endianness::Big), // magic
                u32(Endianness::Big), // offset,
                u32(Endianness::Big), // count,
            ))(data)?;

            let mut super_blob =
                CSSuperBlob { _magic, _length, count, index: Vec::new() };

            let mut input: &[u8] = remainder;
            let mut cs_index: CSBlobIndex;

            for _ in 0..super_blob.count {
                (input, cs_index) = self.cs_index()(input)?;
                let offset: usize = cs_index.offset as usize;
                let (_, blob) = self.cs_blob()(&data[offset..])?;

                cs_index.blob = Some(blob);
                super_blob.index.push(cs_index);
            }

            let super_data = data;

            for blob_index in &super_blob.index {
                let _blob_type = blob_index.blobtype as usize;
                if let Some(blob) = &blob_index.blob {
                    let offset = blob_index.offset as usize;
                    let length = blob.length as usize;
                    let size_of_blob = std::mem::size_of::<CSBlob>();
                    match blob.magic {
                        CS_MAGIC_EMBEDDED_ENTITLEMENTS => {
                            let xml_data = &super_data
                                [offset + size_of_blob..offset + length];
                            let xml_string = std::str::from_utf8(xml_data)
                                .unwrap_or_default();

                            let opt = roxmltree::ParsingOptions {
                                allow_dtd: true,
                                ..roxmltree::ParsingOptions::default()
                            };

                            if let Ok(parsed_xml) =
                                roxmltree::Document::parse_with_options(
                                    xml_string, opt,
                                )
                            {
                                for node in parsed_xml
                                    .descendants()
                                    .filter(|n| n.has_tag_name("key"))
                                {
                                    if let Some(entitlement) = node.text() {
                                        self.entitlements
                                            .push(entitlement.to_string());
                                    }
                                }
                            }
                        }
                        CS_MAGIC_BLOBWRAPPER => {
                            // TODO: Parse certificates
                        }
                        _ => {}
                    }
                }
            }

            Ok((&[], super_blob))
        }
    }

Finished Work

This is just part of the work from #73 that I am cleaning up after the refactor. There will be more posts like this :’).

This specific work implemented today can be seen in #78 on YARA-X.

Written by Jacob Latonis

← Back to blog