DynamicDispatchFfi.h

// DynamicDispatchFfi.h
//
// Author Jesse Pepper
// (C)opyright Cedanet Pty Ltd 2007

@import "Object.h"
#include "Ceda/cxUtils/xvector.h"
#include <map>

#ifdef __ANDROID__
    // Dynamic dispatch not supported at all yet
    #define CEDA_ENABLE_ORIGINAL_DYNAMIC_DISPATCH 0
    #define CEDA_ENABLE_DYNAMIC_DISPATCH_USING_LIBFFI 0
#else
    #define CEDA_ENABLE_ORIGINAL_DYNAMIC_DISPATCH 0
    #define CEDA_ENABLE_DYNAMIC_DISPATCH_USING_LIBFFI 1
#endif

#if CEDA_ENABLE_DYNAMIC_DISPATCH_USING_LIBFFI

///////////////////////////////////////////////////////////////////////////////////////////////////
// DynamicDispatcher

/*
Dynamic Dispatch
----------------

Dynamic dispatch refers to the use of reflection information to invoke a function.

The following types of function are supported:-

    1.  a global function
    2.  a non-virtual, non-static method on a class
    3.  a method on an interface
    
A client makes use of the reflection information available in a ReflectedGlobalFunction,
ReflectedClass or ReflectedInterface respectively.

A dynamic dispatch involves the following steps:-


1.  Use one of the following macros, depending on whether it is a global function, class method
    or interface method
    
        mDynamicDispatch_PrepareToCallGlobalFunction(rgf)
        mDynamicDispatch_PrepareToCallClassMethod(rc,methodIndex,self)
        mDynamicDispatch_PrepareToCallInterfaceMethod(ri,methodIndex,p)


2.  If the return type is not void then designate a buffer (area of memory) to be interpreted as 
    a variable to hold the return value of the function call.
    
    The required size of the buffer can be determined from the return type byte code.  Note however
    that the required buffer size must be rounded up to the nearest multiple of 4 bytes.
    As an example, for a function that returns an int8 the client will need to provide a buffer of
    size 4, not 1.
    
    The buffer itself doesn't need to be initialised in any way.  As far as the caller is concerned
    the buffer will be written with the return value in the manner of a constructor (i.e. like 
    placement new).

    Use the following macro 

        mDynamicDispatch_SetReturnAddress(pRet) 
        
    to set the address of the buffer, where pRet is a void*.
    
3   Use the following macro

        mDynamicDispatch_ReserveArgs
        
    This is required even if there are no formal arguments.

    
4.  The DynamicDispatcher uses the stack frame to hold the arguments.  The macro

        mDynamicDispatch_ArgPtr 
    
    retrieves the address (as a void*) of each argument in turn (from left to right), allowing the
    client to initialise each argument variable with a value.  Each argument variable on the frame 
    requires construction (i.e. like placement new).
    
    The called function will destruct the arguments.

    
5.  Use the following macro to invoke the function

        mDynamicDispatch_InvokeFunction

6.  Use the following macro to unreserve the stack space for the arguments

        mDynamicDispatch_UnreserveArgs
        

Note that it is permissible to abort the function call part way through step 4 or prior to step 5.  
In that case it is important to still do the mDynamicDispatch_UnreserveArgs.

If an exception is thrown from 5 (because the function being called threw an exception) then it is
important not to do the mDynamicDispatch_UnreserveArgs.


Support for non-relocateable objects
------------------------------------

Consider a class with a public copy constructor, allowing it to be passed by value or returned
by value in a function call.

Just after copy constructing an object, consider that the copy is "relocated" by simply using
memcpy to move the object to a new memory location, and we regard the old location as no longer
representing a variable that needs to be destructed.  In many cases this will work correctly, even
if the top object contains pointers to child objects allocated from the heap.  The main reason
why relocation won't work is that child objects contain "back pointers" to the top object.

The DynamicDispatcher supports non-relocateable classes that are passed by value or returned by 
value.
*/

struct _ffi_type;
typedef _ffi_type ffi_type;

namespace ceda
{

/*
Win64

Stack frame for making a call
-----------------------------

When making a call the stack frame for an invocation with 6 arguments looks like the following 
(where memory addresses increase "going upwards", and the stack grows downwards by decrementing 
the stack register rsp)

    <used stack area>    
    [64 bit param 6]
    [64 bit param 5]
    [64 bit spill for param4 in r9/xmm3]
    [64 bit spill for param3 in r8/xmm2]
    [64 bit spill for param2 in rdx/xmm1]
    [64 bit spill for param1 in rcx/xmm0]
    [64 bit return address]
    <unused stack area>
        
Note that all parameters are at most 64 bit.

Any of these 64 bit parameters may represent pointers to structs/unions that although formally are
passed by value are in the implementation passed by reference.  The callee makes no assumptions
about the location of these "large objects" - they don't even need to exist on the stack frame at 
all.
    
If the return value is passed by reference then rcx is used for a ptr to the return value.
    
If there is a 'this' ptr then this is passed as the first argument, apart from the address of the
return value if any.


Passing structs by value or reference
-------------------------------------

For various reasons, a struct is not ncessarily passed by value on the stack frame.  For example, 
a struct larger than 8 bytes cannot fit into a 64 bit register and so must always be passed by 
reference.

We provide the arguments to the DynDispatchx64() function (which is written in assembly) using a 
single block of memory organised as follows:
    
    [
        Large object region
           
        we used this region for 
        structs/unions that cannot 
        be passed in a 64 bit value
                                    ]
    [64 bit slot for param 6]
    [64 bit slot for param 5]
    [64 bit slot for param 4]
    [64 bit slot for param 3]
    [64 bit slot for param 2]
    [64 bit slot for param 1]
        
This single block of memory can be allocated from the heap (with malloc) or from the stack frame 
(using alloca).  For performance we use alloca since it basically only performs a subtraction on 
rsp.
    
GetNextArg() either returns the address of the 64 bit slot, or else an address within the
large object region.  Note that large objects are constructed using placement-new at their 
location in the large object region, and are not moved by the dynamic dispatch code.  Therefore
we avoid any need to assume objects are relocatable.
*/

enum class EDynamicDispatchRetMode 
{
    Void,
    Float32,
    Float64,
    Int8,
    Int16,
    Int32,
    Int64,
    Struct,
    PushRet
};

enum class EDynamicDispatchFuncType
{
    GlobalFunction,
    Functor,
    ClassMethod,
    InterfaceMethod
};

struct @api DynamicDispatcherFfi
{
    DynamicDispatcherFfi(const ReflectedGlobalFunction& rgf);
    DynamicDispatcherFfi(const ReflectedFunctor& f,FunctionPointer fnAddress);
    DynamicDispatcherFfi(const ReflectedClass& rc,ssize_t methodIndex,void* self);
    DynamicDispatcherFfi(const ReflectedInterface& ri,ssize_t methodIndex,AnyInterface p);
    ~DynamicDispatcherFfi();

    void Init();
    void ReserveArgs(void* p);
    void* GetNextArg();
    void Invoke();

    EDynamicDispatchRetMode GetReturnMode(ReflectionByteCode rbc, ssize_t size);

    ffi_type* GetFfiType(ReflectionByteCode& rbc);
    ffi_type* GetClassFfiType(ReflectionByteCode& rbc);
    void GetFfiArgTypes(ceda::xvector<ffi_type*>& argTypes);
    ffi_type* GetFfiReturnType();

    EDynamicDispatchFuncType m_functionType;
    void* self_;
    ssize_t returnTypeSize_;
    const ReflectedFunction& rf_;
    ConstStringZ const* stringTable_;
    EDynamicDispatchRetMode returnMode_;
        
    // total size required for allocation (for all args and for values that are stored separately on
    // the frame and passed by reference)
    ssize_t m_totSize;
        
    // Zero based index into list of reflected formal args.
    // list of reflected formal args doesn't include ptr to return value if any, or 'this' ptr if 
    // any
    ssize_t m_argIndex;
        
    // Ptr to next address for an argument that is too large or complex to be passed in an 8 byte
    // register or slot on the frame.  May be advanced by calls to GetNextArg()
    octet_t* ptrNextLargeObj_;
        
    ssize_t totNumArgs_;       // Total num args including ptr to return value and 'this' if any
    //ssize_t* totPtrArgs_;      // ptr to all args including return value and 'this' if any
    ssize_t* totPtrArgs_;
    void* totPtrArgsAsVoid_;
        
    ssize_t* pArg_;            // ptr to next arg, advanced with calls to GetNextArg()
    FunctionPointer fnAddress_;
    void* pRet_;               // Location of the return value to be placement-new constructed
                                // by the invocation.

    xvector<void*> argValues_;

    //typedef std::map<const octet_t*, ffi_type*> MapType;
    //MapType m_types;
};

} // namespace ceda

#endif // CEDA_ENABLE_DYNAMIC_DISPATCH_USING_LIBFFI