RodPerformance.cpp

// RodPerformance.cpp
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2016

@import "Ceda/cxOperation/UTRoot.h"
@import "Ceda/cxOperation/IWorkingSetMachine.h"
@import "Ceda/cxPersistStore/IPersistStore.h"
@import "Ceda/cxObject/IObjectVisitor.h"
@import "Ceda/cxObject/gcroot.h"
#include "Ceda/cxUtils/CedaAssert.h"
#include "Ceda/cxUtils/Tracer.h"
#include "Ceda/cxUtils/Environ.h"
#include "Ceda/cxUtils/TestTimer.h"
#include "Ceda/cxUtils/UnitPrefix.h"


/*
Create two working sets in a single PSpace and allow for synchronisation between them
*/

const ceda::xstring UtEntryName("MyApp");

class TwoWorkingSetsWithoutSockets
{
public:
    TwoWorkingSetsWithoutSockets(const char* filename) :
        pstore(nullptr),
        pspace(nullptr),
        ws1(nullptr),
        ws2(nullptr)
    {
        path = ceda::GetCedaTestPath(filename);
    }

    void PrepareThreadLocalStorage()
    {
        cxAssert(pspace);
        ceda::SetThreadPtr(pspace);
        ceda::SetThreadPtr(ceda::GetCSpace(pspace));
    }

    void CreateOrOpen(bool create)
    {
        //Tracer() << "Opening store " << path << '\n';
        pstore = ceda::OpenPersistStore(path.c_str(), create ? ceda::OM_CREATE_ALWAYS : ceda::OM_OPEN_EXISTING);

        // Open or create a PSpace
        pspace = ceda::OpenPSpace(pstore, "MyPSpace");

        PrepareThreadLocalStorage();

        {
            ceda::CSpaceTxn txn;
            ws1 = ceda::OpenWorkingSetMachine("WS1", true);
            ws2 = ceda::OpenWorkingSetMachine("WS2", false);

            SetIsCustodian(ws1,true);
            SetIsCustodian(ws2, true);
        }

        {
            ceda::CSpaceTxn txn;
            drwSrc = CreateDeltaRW(ws1, &m);
            drwDst = CreateDeltaRW(ws2, &m);
            rsSrc = CreateRodSession(ws1, &m);
            rsDst = CreateRodSession(ws2, &m);
        }

        ceda::ExchangeVectorTimes(ws1, ws2, drwSrc, drwDst);
    }

    void Reopen()
    {
        Close();
        CreateOrOpen(false);
    }

    void Sync()
    {
        ceda::SendAndReceiveRodObjects(rsSrc, rsDst);
        ceda::SendAndReceiveRodObjects(rsDst, rsSrc);

        ceda::SendAndReceiveDelta(drwSrc, drwDst);
        ceda::SendAndReceiveDelta(drwDst, drwSrc);
    }

    // Must be called within a CSpace lock
    template <typename T>
    T* BootstrapRootInWs1()
    {
        //Tracer() << "Bootstrap " << path << "\n";
        cxAssert(pspace);
        cxAssert(ws1);
        T* root = $new T;
        GetUTRoot(ws1).Map[UtEntryName].insert(0,root);
        return root;
    }

    // Must be called within a CSpace lock
    template <typename T>
    T* GetRootInWs1()
    {
        return ceda::GetUTRootEntry<T>(ws1, UtEntryName);
    }

    void Close()
    {
        //Tracer() << "Closing " << path << "\n";

        {
            ceda::DeclareThreadPSpace dt(pspace);
            ceda::CSpaceTxn txn;

            if (rsSrc) ceda::Close(rsSrc);
            rsSrc = nullptr;

            if (rsDst) ceda::Close(rsDst);
            rsDst = nullptr;

            if (drwSrc) ceda::Close(drwSrc);
            drwSrc = nullptr;

            if (drwDst) ceda::Close(drwDst);
            drwDst = nullptr;

            if (ws1) ceda::Close(ws1);
            ws1 = nullptr;

            if (ws2) ceda::Close(ws2);
            ws2 = nullptr;
        }

        if (pspace) ceda::Close(pspace);
        pspace = nullptr;

        if (pstore) ceda::Close(pstore);
        pstore = nullptr;
    }

public:
    ceda::xstring path;
    ceda::PersistStore* pstore;
    ceda::PSpace* pspace;
    ceda::WorkingSetMachine* ws1;
    ceda::WorkingSetMachine* ws2;
    ceda::EmptyMoreToSendHandler m;

    ceda::DeltaRW* drwSrc;
    ceda::DeltaRW* drwDst;
    ceda::RodSession* rsSrc;
    ceda::RodSession* rsDst;
};

namespace RodPerformance
{
    $struct+ RodObject <<rod -os>> isa ceda::IPersistable :
        model
        {
            string8 buffer;
        }
    {
    };

    $struct+ RootNode <<-os>> isa ceda::IPersistable :
        model
        {
            cref<RodObject> Children[];
        }
    {
    };

    void WriteTimingResult(ceda::HPTimer& timer, int numRodObjects, int rodObjectSize, const char* msg)
    {
        double t = timer.GetElapsedTimeInSeconds();
        ceda::TracerX os;
        os << msg << " with " << numRodObjects << " rod objects each of size " << rodObjectSize << " bytes\n"
           << "  Time = " << t << "s\n"
           << "  Object rate = "; ceda::WriteRate(os, numRodObjects / t, false);
        os << '\n'
           << "  Data   rate = "; ceda::WriteRate(os, (double)numRodObjects*rodObjectSize / t, true);
        os << '\n';
    }

    void Run(int numRodObjects, int rodObjectSize)
    {
        TwoWorkingSetsWithoutSockets store("TestInteract.ced");

        {
            ceda::HPTimer timer;

            store.CreateOrOpen(true);

            {
                ceda::gcroot<RootNode*> root;
                {
                    ceda::CSpaceTxn txn;
                    root.reset(store.BootstrapRootInWs1<RootNode>());
                }

                // Create lots of ROD objects in WS1
                for (int i = 0 ; i < numRodObjects ; ++i)
                {
                    ceda::CSpaceTxn txn;
                    RodObject* rod = $new RodObject();
                    cxAssert(rod);
                    rod->model().buffer.resize(rodObjectSize);

                    root->Children[numRodObjects-1-i] = rod;
                }
            }

            store.Close();
            WriteTimingResult(timer, numRodObjects, rodObjectSize, "Create store");
        }

        {
            ceda::HPTimer timer;

            store.CreateOrOpen(false);

            // Sync until all rod objects have been transferred
            while(1)
            {
                store.Sync();

                ceda::WorkingSetMachineCounters counters;
                {
                    ceda::CSpaceTxn txn;
                    GetCounters(store.ws2, counters);
                }
                if (counters.numReceivedRodObjects == numRodObjects) break;
            }

            store.Close();
            WriteTimingResult(timer, numRodObjects, rodObjectSize, "Sync store");
        }

        Tracer() << '\n';
    }

    /*
    Results on Intel Core i7-4700MQ 2.40 GHz, 16GB RAM, x64 Windows 10, LSS file written to a RAM drive

    Old CRC on flush units in LSS

                                                    create store                        sync store
                                             ------------------------------    ------------------------------
           Num              Rod object       time     object      data         time     object      data
        Rod objects           size           (sec)     rate       rate         (sec)     rate       rate
        -----------------------------------------------------------------------------------------------------
            10               10000000        0.400    24.97 Hz   238.2 MB/s    0.648    15.43 Hz   147.2 MB/s
            100              1000000         0.397    251.8 Hz   240.1 MB/s    0.598    167.3 Hz   159.6 MB/s
            1000             100000          0.409    2.447 kHz  233.4 MB/s    0.661    1.514 kHz  144.4 MB/s
            10000            10000           0.466    21.46 kHz  204.6 MB/s    3.745    2.67 kHz   25.46 MB/s
            100000           1000            1.001    99.89 kHz  95.26 MB/s    35.35    2.829 kHz  2.698 MB/s
            1000000          100             6.524    153.3 kHz  14.62 MB/s    353.7    2.827 kHz  276.1 kB/s
            10000000         10              76.9     130 kHz     1.24 MB/s

    Slicing-by-16 CRC algorithm on flush units in LSS
    
                                                    create store                        sync store
                                             ------------------------------    ------------------------------
           Num              Rod object       time     object      data         time     object      data
        Rod objects           size           (sec)     rate       rate         (sec)     rate       rate
        -----------------------------------------------------------------------------------------------------
            10               10000000        0.158    63.33 Hz   603.9 MB/s    0.395    25.28 Hz   241.1 MB/s
            100              1000000         0.153    652.8 Hz   622.6 MB/s    0.338    295.5 Hz   281.8 MB/s
            1000             100000          0.160    6.245 kHz  595.5 MB/s    0.390    2.563 kHz  244.4 MB/s
            10000            10000           0.217    46.13 kHz  440.0 MB/s    3.675    2.721 kHz  25.95 MB/s
            100000           1000            0.757    132.1 kHz  126.0 MB/s    36.04    2.775 kHz  2.646 MB/s
            1000000          100             6.105    163.8 kHz  15.62 MB/s    348.6    2.869 kHz  280.1 kB/s
            10000000         10              73.0     137   kHz  1.307 MB/s
    
    No CRC on flush units in LSS

                                                    create store                        sync store
                                             ------------------------------    ------------------------------
           Num              Rod object       time     object      data         time     object      data
        Rod objects           size           (sec)     rate       rate         (sec)     rate       rate
        -----------------------------------------------------------------------------------------------------
            10               10000000        0.113    87.9  Hz   838.3 MB/s    0.350    28.56 Hz   272.4 MB/s
            100              1000000         0.107    933.8 Hz   890.5 MB/s    0.308    324.7 Hz   309.6 MB/s
            1000             100000          0.114    8.734 kHz  833 MB/s      0.355    2.813 kHz  268.2 MB/s
            10000            10000           0.155    64.56 kHz  615.7 MB/s    3.073    3.254 kHz  31.03 MB/s
            100000           1000            0.678    147.5 kHz  140.7 MB/s    32.78    3.05 kHz   2.909 MB/s
            1000000          100             6.043    165.5 kHz  15.78 MB/s    317.4    3.151 kHz  307.7 kB/s
            10000000         10              70.9     141.1 kHz  1.346 MB/s
    */
    void Run()
    {
        Tracer() << "Rod Performance test\n";
        ceda::TraceIndenter indent;

        Run(10, 10000000);
        Run(100, 1000000);
        Run(1000, 100000);
        Run(10000, 10000);
        Run(100000, 1000);
        Run(1000000, 100);
        Run(10000000, 10);
    }

}  // namespace RodPerformance