MemoryAccess.h

// MemoryAccess.h
//
// Author David Barrett-Lennard  
// (C)opyright Cedanet Pty Ltd 2022

#pragma once
#ifndef Ceda_cxUtils_MemoryAccess_H
#define Ceda_cxUtils_MemoryAccess_H

#include "BasicTypes.h"
#include <memory>

namespace ceda
{    
    #if defined(__i386) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) || defined(__x86_64) | defined(__amd64) | defined(__amd64__) | defined(_M_AMD64)
        // x86 and x86_64 support unaligned memory access
        #define CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
    #else
        // As an example, ARMv7 doesn't support unaligned memory access
        // Raspberry Pi will produce a SIGBUS fault
        #define CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS 0
    #endif

    #define CEDA_LITTLE_ENDIAN 1

    template <class T> struct dependent_false : std::false_type {};

    //////////////////// GetUnalignedLE

    inline uint16 GetUnalignedLE16(const uint8* p)
    {
        return (static_cast<uint16>(p[0])) | 
               (static_cast<uint16>(p[1]) << 8); 
    }

    inline uint32 GetUnalignedLE32(const uint8* p)
    {
        return (static_cast<uint32>(p[0])) | 
               (static_cast<uint32>(p[1]) << 8) | 
               (static_cast<uint32>(p[2]) << 16) | 
               (static_cast<uint32>(p[3]) << 24); 
    }

    inline uint64 GetUnalignedLE64(const uint8* p)
    {
        return (static_cast<uint64>(p[0])) | 
               (static_cast<uint64>(p[1]) << 8) | 
               (static_cast<uint64>(p[2]) << 16) | 
               (static_cast<uint64>(p[3]) << 24) | 
               (static_cast<uint64>(p[4]) << 32) | 
               (static_cast<uint64>(p[5]) << 40) | 
               (static_cast<uint64>(p[6]) << 48) | 
               (static_cast<uint64>(p[7]) << 56); 
    }

    #if defined __SIZEOF_INT128__
        inline uint128 GetUnalignedLE128(const uint8* p)
        {
            return (static_cast<uint128>(p[0])) | 
                   (static_cast<uint128>(p[1]) << 8) | 
                   (static_cast<uint128>(p[2]) << 16) | 
                   (static_cast<uint128>(p[3]) << 24) | 
                   (static_cast<uint128>(p[4]) << 32) | 
                   (static_cast<uint128>(p[5]) << 40) | 
                   (static_cast<uint128>(p[6]) << 48) | 
                   (static_cast<uint128>(p[7]) << 56) |
                   (static_cast<uint128>(p[8]) << 64) | 
                   (static_cast<uint128>(p[9]) << 72) | 
                   (static_cast<uint128>(p[10]) << 80) | 
                   (static_cast<uint128>(p[11]) << 88) | 
                   (static_cast<uint128>(p[12]) << 96) | 
                   (static_cast<uint128>(p[13]) << 104) | 
                   (static_cast<uint128>(p[14]) << 112) | 
                   (static_cast<uint128>(p[15]) << 120)                   ; 
        }
    #endif    

    template<typename T>
    inline void GetUnalignedLE(T& dst, const void* pSrc) 
    { 
        #if CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS && CEDA_LITTLE_ENDIAN
            dst = *reinterpret_cast<const T*>(pSrc);
        #else
            if constexpr(sizeof(T) == 1)
            {
                dst = *reinterpret_cast<const T*>(pSrc);
            }
            else if constexpr(sizeof(T) == 2)
            {
                reinterpret_cast<uint16&>(dst) = GetUnalignedLE16( reinterpret_cast<const uint8*>(pSrc) );
            }
            else if constexpr(sizeof(T) == 4)
            {
                reinterpret_cast<uint32&>(dst) = GetUnalignedLE32( reinterpret_cast<const uint8*>(pSrc) );
            }
            else if constexpr(sizeof(T) == 8)
            {
                reinterpret_cast<uint64&>(dst) = GetUnalignedLE64( reinterpret_cast<const uint8*>(pSrc) );
            }
            #if defined __SIZEOF_INT128__
            else if constexpr(sizeof(T) == 16)
            {
                reinterpret_cast<uint128&>(dst) = GetUnalignedLE128( reinterpret_cast<const uint8*>(pSrc) );
            }
            #endif
            else
            {
                static_assert(dependent_false<T>::value, "Invalid size for GetUnalignedLE");
            }
        #endif
    }

    //////////////////// GetUnalignedBE

    inline uint16 GetUnalignedBE16(const uint8* p)
    {
        return (static_cast<uint16>(p[1])) | 
               (static_cast<uint16>(p[0]) << 8); 
    }

    inline uint32 GetUnalignedBE32(const uint8* p)
    {
        return (static_cast<uint32>(p[3])) | 
               (static_cast<uint32>(p[2]) << 8) | 
               (static_cast<uint32>(p[1]) << 16) | 
               (static_cast<uint32>(p[0]) << 24); 
    }

    inline uint64 GetUnalignedBE64(const uint8* p)
    {
        return (static_cast<uint64>(p[7])) | 
               (static_cast<uint64>(p[6]) << 8) | 
               (static_cast<uint64>(p[5]) << 16) | 
               (static_cast<uint64>(p[4]) << 24) | 
               (static_cast<uint64>(p[3]) << 32) | 
               (static_cast<uint64>(p[2]) << 40) | 
               (static_cast<uint64>(p[1]) << 48) | 
               (static_cast<uint64>(p[0]) << 56);
    }

    #if defined __SIZEOF_INT128__
        inline uint128 GetUnalignedBE128(const uint8* p)
        {
            return (static_cast<uint128>(p[15])) | 
                   (static_cast<uint128>(p[14]) << 8) | 
                   (static_cast<uint128>(p[13]) << 16) | 
                   (static_cast<uint128>(p[12]) << 24) | 
                   (static_cast<uint128>(p[11]) << 32) | 
                   (static_cast<uint128>(p[10]) << 40) | 
                   (static_cast<uint128>(p[9]) << 48) | 
                   (static_cast<uint128>(p[8]) << 56) |
                   (static_cast<uint128>(p[7]) << 64) | 
                   (static_cast<uint128>(p[6]) << 72) | 
                   (static_cast<uint128>(p[5]) << 80) | 
                   (static_cast<uint128>(p[4]) << 88) | 
                   (static_cast<uint128>(p[3]) << 96) | 
                   (static_cast<uint128>(p[2]) << 104) | 
                   (static_cast<uint128>(p[1]) << 112) | 
                   (static_cast<uint128>(p[0]) << 120)                   ; 
        }
    #endif    

    template<typename T>
    inline void GetUnalignedBE(T& dst, const void* pSrc) 
    { 
        #if CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS && !CEDA_LITTLE_ENDIAN
            dst = *reinterpret_cast<const T*>(pSrc);
        #else
            if constexpr(sizeof(T) == 1)
            {
                dst = *reinterpret_cast<const T*>(pSrc);
            }
            else if constexpr(sizeof(T) == 2)
            {
                reinterpret_cast<uint16&>(dst) = GetUnalignedBE16( reinterpret_cast<const uint8*>(pSrc) );
            }
            else if constexpr(sizeof(T) == 4)
            {
                reinterpret_cast<uint32&>(dst) = GetUnalignedBE32( reinterpret_cast<const uint8*>(pSrc) );
            }
            else if constexpr(sizeof(T) == 8)
            {
                reinterpret_cast<uint64&>(dst) = GetUnalignedBE64( reinterpret_cast<const uint8*>(pSrc) );
            }
            #if defined __SIZEOF_INT128__
            else if constexpr(sizeof(T) == 16)
            {
                reinterpret_cast<uint128&>(dst) = GetUnalignedBE128( reinterpret_cast<const uint8*>(pSrc) );
            }
            #endif
            else
            {
                static_assert(dependent_false<T>::value, "Invalid size for GetUnalignedBE");
            }
        #endif
    }

    //////////////////// SetUnalignedLE

    inline void SetUnalignedLE16(uint8* p, uint16 v)
    {
	    p[0] = (uint8) v;
	    p[1] = (uint8) (v >> 8);
    }

    inline void SetUnalignedLE32(uint8* p, uint32 v)
    {
	    p[0] = (uint8) v;
	    p[1] = (uint8) (v >> 8);
	    p[2] = (uint8) (v >> 16);
	    p[3] = (uint8) (v >> 24);
    }

    inline void SetUnalignedLE64(uint8* p, uint64 v)
    {
	    p[0] = (uint8) v;
	    p[1] = (uint8) (v >> 8);
	    p[2] = (uint8) (v >> 16);
	    p[3] = (uint8) (v >> 24);
	    p[4] = (uint8) (v >> 32);
	    p[5] = (uint8) (v >> 40);
	    p[6] = (uint8) (v >> 48);
	    p[7] = (uint8) (v >> 56);
    }

    #if defined __SIZEOF_INT128__
        inline void SetUnalignedLE128(uint8* p, uint128 v)
        {
	        p[0] = (uint8) v;
	        p[1] = (uint8) (v >> 8);
	        p[2] = (uint8) (v >> 16);
	        p[3] = (uint8) (v >> 24);
	        p[4] = (uint8) (v >> 32);
	        p[5] = (uint8) (v >> 40);
	        p[6] = (uint8) (v >> 48);
	        p[7] = (uint8) (v >> 56);
	        p[8] = (uint8) (v >> 64);
	        p[9] = (uint8) (v >> 72);
	        p[10] = (uint8) (v >> 80);
	        p[11] = (uint8) (v >> 88);
	        p[12] = (uint8) (v >> 96);
	        p[13] = (uint8) (v >> 104);
	        p[14] = (uint8) (v >> 112);
	        p[15] = (uint8) (v >> 120);
        }
    #endif

    template<typename T>
    inline void SetUnalignedLE(void* pDst, const T& src)
    { 
        #if CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS && CEDA_LITTLE_ENDIAN
            *reinterpret_cast<T*>(pDst) = src;
        #else
            if constexpr(sizeof(T) == 1)
            {
                *reinterpret_cast<T*>(pDst) = src;
            }
            else if constexpr(sizeof(T) == 2)
            {
                SetUnalignedLE16(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint16&>(src) );
            }
            else if constexpr(sizeof(T) == 4)
            {
                SetUnalignedLE32(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint32&>(src) );
            }
            else if constexpr(sizeof(T) == 8)
            {
                SetUnalignedLE64(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint64&>(src) );
            }
            #if defined __SIZEOF_INT128__
            else if constexpr(sizeof(T) == 16)
            {
                SetUnalignedLE128(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint128&>(src) );
            }
            #endif
            else
            {
                static_assert(dependent_false<T>::value, "Invalid size for SetUnalignedLE");
            }
        #endif
    }

    //////////////////// SetUnalignedBE

    inline void SetUnalignedBE16(uint8* p, uint16 v)
    {
	    p[1] = (uint8) v;
	    p[0] = (uint8) (v >> 8);
    }

    inline void SetUnalignedBE32(uint8* p, uint32 v)
    {
	    p[3] = (uint8) v;
	    p[2] = (uint8) (v >> 8);
	    p[1] = (uint8) (v >> 16);
	    p[0] = (uint8) (v >> 24);
    }

    inline void SetUnalignedBE64(uint8* p, uint64 v)
    {
	    p[7] = (uint8) v;
	    p[6] = (uint8) (v >> 8);
	    p[5] = (uint8) (v >> 16);
	    p[4] = (uint8) (v >> 24);
	    p[3] = (uint8) (v >> 32);
	    p[2] = (uint8) (v >> 40);
	    p[1] = (uint8) (v >> 48);
	    p[0] = (uint8) (v >> 56);
    }

    #if defined __SIZEOF_INT128__
        inline void SetUnalignedBE128(uint8* p, uint128 v)
        {
	        p[15] = (uint8) v;
	        p[14] = (uint8) (v >> 8);
	        p[13] = (uint8) (v >> 16);
	        p[12] = (uint8) (v >> 24);
	        p[11] = (uint8) (v >> 32);
	        p[10] = (uint8) (v >> 40);
	        p[9] = (uint8) (v >> 48);
	        p[8] = (uint8) (v >> 56);
	        p[7] = (uint8) (v >> 64);
	        p[6] = (uint8) (v >> 72);
	        p[5] = (uint8) (v >> 80);
	        p[4] = (uint8) (v >> 88);
	        p[3] = (uint8) (v >> 96);
	        p[2] = (uint8) (v >> 104);
	        p[1] = (uint8) (v >> 112);
	        p[0] = (uint8) (v >> 120);
        }
    #endif

    template<typename T>
    inline void SetUnalignedBE(void* pDst, const T& src)
    { 
        #if CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS && !CEDA_LITTLE_ENDIAN
            *reinterpret_cast<T*>(pDst) = src;
        #else
            if constexpr(sizeof(T) == 1)
            {
                *reinterpret_cast<T*>(pDst) = src;
            }
            else if constexpr(sizeof(T) == 2)
            {
                SetUnalignedBE16(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint16&>(src) );
            }
            else if constexpr(sizeof(T) == 4)
            {
                SetUnalignedBE32(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint32&>(src) );
            }
            else if constexpr(sizeof(T) == 8)
            {
                SetUnalignedBE64(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint64&>(src) );
            }
            #if defined __SIZEOF_INT128__
            else if constexpr(sizeof(T) == 16)
            {
                SetUnalignedBE128(reinterpret_cast<uint8*>(pDst), reinterpret_cast<const uint128&>(src) );
            }
            #endif
            else
            {
                static_assert(dependent_false<T>::value, "Invalid size for SetUnalignedBE");
            }
        #endif
    }

    //////////////////// GetUnaligned, SetUnaligned

    // These functions are not concerned with byte swapping

    template<typename T>
    inline void GetUnaligned(T& dst, const void* pSrc) 
    { 
        #if CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS
            dst = *reinterpret_cast<const T*>(pSrc);
        #else
            std::memcpy(&dst,pSrc,sizeof(T));
        #endif
        
        // This might be better, depends on how well memcpy works on 1,2,4,8 byte copies
        /*
        if constexpr(sizeof(T) == 1 || sizeof(T) == 2 ||  sizeof(T) == 4 ||  sizeof(T) == 8)
        {
            #if CEDA_LITTLE_ENDIAN
                GetUnalignedLE(dst,pSrc);
            #else
                GetUnalignedBE(dst,pSrc);
            #endif
        }
        else
        {
            std::memcpy(&dst,pSrc,sizeof(T));
        }
        */
    }

    template<typename T>
    inline void SetUnaligned(void* pDst, const T& src)
    { 
        #if CEDA_HAVE_EFFICIENT_UNALIGNED_ACCESS
            *reinterpret_cast<T*>(pDst) = src;
        #else
            std::memcpy(pDst,&src,sizeof(T));
        #endif

        // This might be better, depends on how well memcpy works on 1,2,4,8 byte copies
        /*
        if constexpr(sizeof(T) == 1 || sizeof(T) == 2 ||  sizeof(T) == 4 ||  sizeof(T) == 8)
        {
            #if CEDA_LITTLE_ENDIAN
                SetUnalignedLE(pDst,src);
            #else
                SetUnalignedBE(pDst,src);
            #endif
        }
        else
        {
            std::memcpy(pDst,&src,sizeof(T));
        }
        */
    }

} // namespace ceda

#endif // include guard