DynamicDispatch.h

// DynamicDispatch.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2007

@import "cxObject.h"
@import "IObject.h"
#include "Ceda/cxUtils/xvector.h"
@import "DynamicDispatchFfi.h"

#if CEDA_ENABLE_ORIGINAL_DYNAMIC_DISPATCH

@if (@str(@platform) == "windows-x64")
{
    #include <malloc.h>  // for _alloca()
}

///////////////////////////////////////////////////////////////////////////////////////////////////
// DynamicDispatcher

/*
Dynamic Dispatch
----------------

Dynamic dispatch refers to the use of reflection information to invoke a function.

The following types of function are supported:-

    1.  a global function
    2.  a non-virtual, non-static method on a class
    3.  a method on an interface
    
A client makes use of the reflection information available in a ReflectedGlobalFunction,
ReflectedClass or ReflectedInterface respectively.

A dynamic dispatch involves the following steps:-


1.  Use one of the following macros, depending on whether it is a global function, class method
    or interface method
    
        mDynamicDispatch_PrepareToCallGlobalFunction(rgf)
        mDynamicDispatch_PrepareToCallClassMethod(rc,methodIndex,self)
        mDynamicDispatch_PrepareToCallInterfaceMethod(ri,methodIndex,p)


2.  If the return type is not void then designate a buffer (area of memory) to be interpreted as 
    a variable to hold the return value of the function call.
    
    The required size of the buffer can be determined from the return type byte code.  Note however
    that the required buffer size must be rounded up to the nearest multiple of 4 bytes.
    As an example, for a function that returns an int8 the client will need to provide a buffer of
    size 4, not 1.
    
    The buffer itself doesn't need to be initialised in any way.  As far as the caller is concerned
    the buffer will be written with the return value in the manner of a constructor (i.e. like 
    placement new).

    Use the following macro 

        mDynamicDispatch_SetReturnAddress(pRet) 
        
    to set the address of the buffer, where pRet is a void*.
    
3   Use the following macro

        mDynamicDispatch_ReserveArgs
        
    This is required even if there are no formal arguments.

    
4.  The DynamicDispatcher uses the stack frame to hold the arguments.  The macro

        mDynamicDispatch_ArgPtr 
    
    retrieves the address (as a void*) of each argument in turn (from left to right), allowing the
    client to initialise each argument variable with a value.  Each argument variable on the frame 
    requires construction (i.e. like placement new).
    
    The called function will destruct the arguments.

    
5.  Use the following macro to invoke the function

        mDynamicDispatch_InvokeFunction

6.  Use the following macro to unreserve the stack space for the arguments

        mDynamicDispatch_UnreserveArgs
        

Note that it is permissible to abort the function call part way through step 4 or prior to step 5.  
In that case it is important to still do the mDynamicDispatch_UnreserveArgs.

If an exception is thrown from 5 (because the function being called threw an exception) then it is
important not to do the mDynamicDispatch_UnreserveArgs.


Support for non-relocateable objects
------------------------------------

Consider a class with a public copy constructor, allowing it to be passed by value or returned
by value in a function call.

Just after copy constructing an object, consider that the copy is "relocated" by simply using
memcpy to move the object to a new memory location, and we regard the old location as no longer
representing a variable that needs to be destructed.  In many cases this will work correctly, even
if the top object contains pointers to child objects allocated from the heap.  The main reason
why relocation won't work is that child objects contain "back pointers" to the top object.

The DynamicDispatcher supports non-relocateable classes that are passed by value or returned by 
value.
*/

namespace ceda
{

@if (@str(@platform) == "windows-x64")
{
    /*
    Win64

    Stack frame for making a call
    -----------------------------

    When making a call the stack frame for an invocation with 6 arguments looks like the following 
    (where memory addresses increase "going upwards", and the stack grows downwards by decrementing 
    the stack register rsp)

        <used stack area>    
        [64 bit param 6]
        [64 bit param 5]
        [64 bit spill for param4 in r9/xmm3]
        [64 bit spill for param3 in r8/xmm2]
        [64 bit spill for param2 in rdx/xmm1]
        [64 bit spill for param1 in rcx/xmm0]
        [64 bit return address]
        <unused stack area>
        
    Note that all parameters are at most 64 bit.

    Any of these 64 bit parameters may represent pointers to structs/unions that although formally are
    passed by value are in the implementation passed by reference.  The callee makes no assumptions
    about the location of these "large objects" - they don't even need to exist on the stack frame at 
    all.
    
    If the return value is passed by reference then rcx is used for a ptr to the return value.
    
    If there is a 'this' ptr then this is passed as the first argument, apart from the address of the
    return value if any.


    Passing structs by value or reference
    -------------------------------------

    For various reasons, a struct is not ncessarily passed by value on the stack frame.  For example, 
    a struct larger than 8 bytes cannot fit into a 64 bit register and so must always be passed by 
    reference.

    We provide the arguments to the DynDispatchx64() function (which is written in assembly) using a 
    single block of memory organised as follows:
    
        [
           Large object region
           
           we used this region for 
           structs/unions that cannot 
           be passed in a 64 bit value
                                        ]
        [64 bit slot for param 6]
        [64 bit slot for param 5]
        [64 bit slot for param 4]
        [64 bit slot for param 3]
        [64 bit slot for param 2]
        [64 bit slot for param 1]
        
    This single block of memory can be allocated from the heap (with malloc) or from the stack frame 
    (using alloca).  For performance we use alloca since it basically only performs a subtraction on 
    rsp.
    
    GetNextArg() either returns the address of the 64 bit slot, or else an address within the
    large object region.  Note that large objects are constructed using placement-new at their 
    location in the large object region, and are not moved by the dynamic dispatch code.  Therefore
    we avoid any need to assume objects are relocatable.
    */

    enum EReturnMode 
    {
        RM_VOID,
        RM_FLOAT32,
        RM_FLOAT64,
        RM_INT8,
        RM_INT16,
        RM_INT32,
        RM_INT64,
        RM_PUSH_RET
    };

    enum EFunctionType
    {
        FT_GLOBAL_FUNCTION,
        FT_CLASS_METHOD,
        FT_INTERFACE_METHOD,
    };

    struct DynDispatch_t
    {
        ssize_t numArgs;
        void* m_ptrArgs;
        FunctionPointer m_fnAddress;
        void* m_pRet;
        EReturnMode m_retMode;    
    };

    struct @api DynamicDispatcher
    {
        DynamicDispatcher(const ReflectedGlobalFunction& rgf);
        DynamicDispatcher(const ReflectedClass& rc,ssize_t methodIndex,void* self);
        DynamicDispatcher(const ReflectedInterface& ri,ssize_t methodIndex,AnyInterface p);
        
        void Init();
        void ReserveArgs(void* p);
        void* GetNextArg();
        void Invoke();
        
        EFunctionType m_functionType;
        void* m_self;
        ssize_t m_returnTypeSize;
        const ReflectedFunction& m_rf;
        ConstStringZ const* stringTable;
        EReturnMode m_mode;
        
        // total size required for allocation (for all args and for values that are stored separately on
        // the frame and passed by reference)
        ssize_t m_totSize;
        
        // Zero based index into list of reflected formal args.
        // list of reflected formal args doesn't include ptr to return value if any, or 'this' ptr if 
        // any
        ssize_t m_argIndex;
        
        // Ptr to next address for an argument that is too large or complex to be passed in an 8 byte
        // register or slot on the frame.  May be advanced by calls to GetNextArg()
        octet_t* m_ptrNextLargeObj;
        
        ssize_t m_totNumArgs;       // Total num args including ptr to return value and 'this' if any
        ssize_t* m_totPtrArgs;      // ptr to all args including return value and 'this' if any
        
        ssize_t* m_pArg;            // ptr to next arg, advanced with calls to GetNextArg()
        FunctionPointer m_fnAddress;
        void* m_pRet;               // Location of the return value to be placement-new constructed
                                    // by the invocation.
    };
}
@else
{
    enum EReturnMode 
    {
        RM_VOID,
        RM_FLOAT32,
        RM_INT32,
        RM_FLOAT64,
        RM_INT64,
        RM_PUSH_RET
    };

    enum EFunctionType
    {
        FT_GLOBAL_FUNCTION,
        FT_CLASS_METHOD,
        FT_INTERFACE_METHOD,
    };

    struct @api DynamicDispatcher
    {
        DynamicDispatcher(const ReflectedGlobalFunction& rgf);
        DynamicDispatcher(const ReflectedClass& rc,ssize_t methodIndex,void* self);
        DynamicDispatcher(const ReflectedInterface& ri,ssize_t methodIndex,AnyInterface p);
        
        void* GetNextArg();
        
        EFunctionType m_functionType;
        void* m_self;
        FunctionPointer m_fnAddress;
        void* m_pRet;
        ssize_t m_returnTypeSize;
        const ReflectedFunction& m_rf;
        ConstStringZ const* stringTable;
        EReturnMode m_mode;
        ssize_t m_argsspace;
        ssize_t m_argIndex;
        void* m_ptrArgs;
    };
}

} // namespace ceda

#endif // CEDA_ENABLE_ORIGINAL_DYNAMIC_DISPATCH

///////////////////////////////////////////////////////////////////////////////////////////////
// Macros to allow for dynamic invocation

@def mDynamicDispatch_PrepareToCallGlobalFunction(rgf) =
{
    ceda::DynamicDispatcher dd_(rgf);
}

@def mDynamicDispatch_PrepareToCallClassMethod(rc,methodIndex,self) =
{
    ceda::DynamicDispatcher dd_(rc,methodIndex,self);
}

@def mDynamicDispatch_PrepareToCallInterfaceMethod(ri,methodIndex,p) =
{
    ceda::DynamicDispatcher dd_(ri,methodIndex,p);
}

@def mDynamicDispatch_GetReturnTypeSize = dd_.m_returnTypeSize

@def mDynamicDispatch_SetReturnAddress(pRet) =
{
    dd_.m_pRet = pRet;
}

@def mDynamicDispatch_ReserveArgs =
{
    @if (@str(@platform) == "windows-x64")
    {
        dd_.ReserveArgs(_alloca(dd_.m_totSize));
    }
    @else
    {
        __asm
        {
            sub esp, dd_.m_argsspace
            mov dd_.m_ptrArgs, esp
        }
    }
}

@def mDynamicDispatch_ArgPtr = dd_.GetNextArg()

@def mDynamicDispatch_InvokeFunction =
{    
    @if (@str(@platform) == "windows-x64")
    {
        dd_.Invoke();
    }
    @else
    {
        if (dd_.m_self)
        {
            __asm
            {
                push        dword ptr dd_.m_self
                add         dd_.m_argsspace, 4
            }
        }
        if (dd_.m_mode == ceda::RM_VOID)
        {
            __asm
            {
                call        dword ptr dd_.m_fnAddress
            }
        }
        else if (dd_.m_mode == ceda::RM_FLOAT32)
        {
            __asm
            {
                call        dword ptr dd_.m_fnAddress
                mov         eax, dd_.m_pRet
                fstp        dword ptr [eax]
            }
        }
        else if (dd_.m_mode == ceda::RM_INT32)
        {
            __asm
            {
                call        dword ptr dd_.m_fnAddress
                mov         edx, dd_.m_pRet
                mov         dword ptr [edx],eax
            }
        }
        else if (dd_.m_mode == ceda::RM_FLOAT64)
        {
            __asm
            {
                call        dword ptr dd_.m_fnAddress
                mov         eax, dd_.m_pRet
                fstp        qword ptr [eax]
            }
        }
        else if (dd_.m_mode == ceda::RM_INT64)
        {
            __asm
            {
                call        dword ptr dd_.m_fnAddress
                push        edx
                mov         edx, dd_.m_pRet
                mov         dword ptr [edx],eax
                pop         eax
                mov         dword ptr [edx+4],eax
            }
        }
        else
        {
            // Return value is not a POD or larger than 8 bytes
            __asm
            {
                push        dd_.m_pRet
                call        dword ptr dd_.m_fnAddress
                add         dd_.m_argsspace, 4
            }
        }
    }
}

@def mDynamicDispatch_UnreserveArgs =
{
    @if (@str(@platform) == "windows-x64")
    {
    }
    @else
    {
        @def mCdecl = dd_.m_functionType == ceda::FT_GLOBAL_FUNCTION
        if (mCdecl)
        {
            __asm
            {
                add esp, dd_.m_argsspace
            }
        }
    }
}