SessionValueCache.h

// SessionValueCache.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2009

#pragma once
#ifndef Ceda_cxUtils_SessionValueCache_H
#define Ceda_cxUtils_SessionValueCache_H

#include <map>
#include "xvector.h"
#include "Archive.h"
#include "Tracer.h"
#include "VariableLengthSerialise.h"

#ifdef _MSC_VER
    // struct 'X' needs to have dll-interface to be used by clients of class 'Y'
    #pragma warning(disable:4251)
#endif

namespace ceda
{

///////////////////////////////////////////////////////////////////////////////////////////////////
// LRUIndexCache

/*
The values to be recorded and evicted on an LRU basis are zero based index values (called z 
values) into a cache recorded as an array of some type T.  For example, let cache[] be an array 
of cached strings:

      z       cache[z]
   ---------------------   
      0       "hello"
      1       "world"
      2       "foo"
      3       "bar"

LRUIndexCache is used to record the eviction order using a double linked list of z values.
i.e. the eviction queue is some permutation of [0,1,2,3].  Eg

        2   3   0   1
        
meaning that the next cache entry to be evicted is at z = 2 (cache[2] = "foo").  The next z
to evict is at the front of the list.  The LRU policy is implemented by sending entries to the
back of the list.  E.g. if cache[3] = "bar" is sent, we want to move z = 3 from its current
position to the back of the list.

To avoid heap allocations we use an array of nodes indexed by z where each node stores the 
previous and next z values in the linked list.

Operations
    1.  Rotate (i.e. send front of the queue to the back)
    
                2   3   0   1
        -->     3   0   1   2
        
    2.  Send to back(3)
        
                2   3   0   1
        -->     2   0   1   3
*/

class cxUtils_API LRUIndexCache
{
public:
    LRUIndexCache() : zFirst(-1), zLast(-1) {}

    int size() const { return (int) m_entrys.size(); }
    
    int first() const { return zFirst; }
    
    // Grow the cache by 1
    void Grow();
    
    // Sends the value at the front of the list to the back.
    void Rotate();

    // Send z to the back of the list (if not already)
    void SendToBack(int z);
    
    void Write(xostream& os) const;
    
    void Get(xvector<int>& L) const;

private:    
    bool IsValidZ(int z) const
    {
        return 0 <= z && z < size();
    }
    static bool IsNull(int z)
    {
        return z == -1;
    }

private:
    struct Entry
    {
        int zPrev;
        int zNext;
    };
    xvector<Entry> m_entrys;    // Entries indexed by z form a double linked list
    int zFirst;                 // z position at start of the list, or -1 if list is empty
    int zLast;                  // z position at end of the list, or -1 if list is empty
};

#define TRACE_SessionValueCache 0

///////////////////////////////////////////////////////////////////////////////////////////////////
// SessionValueCache

/*
Used by a session to amortize the cost of sending values in type T that are expensive to 
send, on the assumption that for each value to be sent it is often the case that the same value
had been sent recently.  Therefore it is instead possible to send an index value - typically
only a single byte.

An instance of a SessionValueCache<T,N> is used by the sender, and a separate instance is used
by the receiver.  It is crucial that N is the same on both sender and receiver.

This will cache up to N values of type T during an ongoing session.  The sender calls 
Serialise(ar,v) to send each value v of type T.  The reader has an independent copy of a 
SessionValueCache<T,N> and calls Deserialise(ar,T& v) to read each value v of type T.

If the sender finds the value in the cache then it reduces the bandwidth by only sending an 
index value (called a 'z+1' value).  This may only take up a single byte.  Otherwise the sender
first sends a 0 byte (a marker) and the value of v.

The receiver uses exactly the same logic for inserting new values into the cache, making it 
possible for the receiver to implicitly know the mapping between cached values and their z 
values used by the sender.
*/

template<typename T, ssize_t N, class Base>
class SessionValueCache : public Base
{
public:
    SessionValueCache() : size_(0) {}
    
    #if TRACE_SessionValueCache
        void Write(xostream& os) const
        {
            xvector<int> L;
            m_ic.Get(L);
            os << "IC = " << L << "   ";
            os << '[';
            for (int i=0 ; i < L.size() ; ++i)
            {
                if (i) os << ',';
                os << m_lut[L[i]];
            }
            os << ']';
            os << '\n';
        }
    #endif    
    
    // Write the given value to the archive, making use of a cached value if possible
    template<typename Archive>
    void Serialise(Archive& ar, const T& v)
    {
        typename MAP::const_iterator j = m_map.find(v);
        if (j == m_map.end())
        {
            #if TRACE_SessionValueCache
                Tracer() << "************** Writing 0\n";
            #endif
            ar << (octet_t) 0;   // Send marker to indicate new cache value being sent
            ar << v;
            InsertIntoCache(v);
        }
        else
        {
            // Found in the cache - send the index.  
            Base::SerialiseIndex(ar,j->second);
            
            #if TRACE_SessionValueCache
                Tracer() << "Found in cache : sending v = " << v << " using index " << j->second-1 << '\n';
            #endif
            m_ic.SendToBack(j->second-1);
        }
        
        #if TRACE_SessionValueCache
            Write(Tracer());
        #endif
    }    

    // A version of Write that allows for a class to supply the write of the value to
    // the archive.
    template <typename Archive, typename F>
    void Serialise(Archive& ar, const T& v, F& f)
    {
        typename MAP::const_iterator j = m_map.find(v);
        if (j == m_map.end())
        {
            #if TRACE_SessionValueCache
                Tracer() << "************** Writing 0\n";
            #endif
            ar << (octet_t) 0;   // Send marker to indicate new cache value being sent
            f.Serialise(ar,v);
            InsertIntoCache(v);
        }
        else
        {
            // Found in the cache - send the index.  
            Base::SerialiseIndex(ar,j->second);
            
            #if TRACE_SessionValueCache
                Tracer() << "Found in cache : sending v = " << v << " using index " << j->second-1 << '\n';
            #endif
            m_ic.SendToBack(j->second-1);
        }
        
        #if TRACE_SessionValueCache
            Write(Tracer());
        #endif
    }    

    // Read the given value from the archive, making use of a cached value if possible
    template<typename Archive>
    void Deserialise(Archive& ar, T& v)
    {
        unsigned int i;
        Base::DeserialiseIndex(ar,i);
        if (i)
        {
            #if TRACE_SessionValueCache
                Tracer() << "i = " << i << "  size_ = " << size_ << '\n';
            #endif
            
            cxAssert(i <= size_);
            v = m_lut[i-1];
            m_ic.SendToBack(i-1);
            #if TRACE_SessionValueCache
                Tracer() << "Found in cache : reading v = " << v << " using index " << i-1 << '\n';
            #endif
        }
        else
        {
            ar >> v;
            
            #if TRACE_SessionValueCache
                Tracer() << "Read v = " << v << '\n';
            #endif
            
            // v won't be in the cache (otherwise the sender would have sent the index position
            cxAssert(m_map.find(v) == m_map.end());
            
            InsertIntoCache(v);
        }
        #if TRACE_SessionValueCache
            Write(Tracer());
        #endif
    }

    // A version of Read that allows for a class to supply the read of the value from
    // the archive.
    template <typename Archive, typename F>
    void Deserialise(Archive& ar, T& v, F& f)
    {
        unsigned int i;
        Base::DeserialiseIndex(ar,i);
        if (i)
        {
            cxAssert(i <= size_);
            v = m_lut[i-1];
            m_ic.SendToBack(i-1);
        }
        else
        {
            f.Deserialise(ar,v);
            cxAssert(m_map.find(v) == m_map.end());
            InsertIntoCache(v);
        }
    }
    
    void BoostrapValue(const T& v)
    {
        InsertIntoCache(v);    
    }
    
private:
    // Assuming v is not already present, update the cache so that it now holds v.
    void InsertIntoCache(const T& v)
    {
        if (size_ < N)
        {
            // Cache is not full so no need to evict.
            
            m_ic.Grow();
            
            // Record mapping  size_ <--> v
            //Tracer() << "Not found in cache : grow with v = " << v << " at index " << size_ << '\n';
            m_map[v] = size_+1;
            m_lut[size_] = v;
            ++size_;
        }
        else
        {
            // Cache is full.  So eviction is used.  We evict LRU in the cache - i.e. at the front
            // of the eviction queue.
            int i = m_ic.first();
            cxAssert(0 <= i && i < size_);

            // We erase the entry at the front of the eviction queue - i.e. we steal its index 
            // value.  Hence we need to rotate the index values in the eviction queue
            m_ic.Rotate();

            //Tracer() << "Not found in cache : evicted v = " << m_lut[i] << " and replacing v = " << v << " at index " << i << '\n';
            typename MAP::iterator k = m_map.find(m_lut[i]);
            cxAssert(k != m_map.end());
            m_map.erase(k);
            
            // Evict the old value in the cache
            m_lut[i] = v;
            m_map[v] = i+1;
        }
    }
    
private:
    typedef std::map<T,int> MAP;
    MAP m_map;
    T m_lut[N];
    int size_;
    LRUIndexCache m_ic;
};

struct SessionValueCacheBase1
{
    template<typename Archive>
    inline static void SerialiseIndex(Archive& ar, unsigned int i)
    {
        ar << AsCompressedInt(i);
    }
    template<typename Archive>
    inline static void DeserialiseIndex(Archive& ar, unsigned int& i)
    {
        CompressedInt<unsigned int> c;
        ar >> c;
        i = c.val;
    }
    
    inline static void SerialiseIndex(Archive& ar, unsigned int i)
    {
        SerialiseVariableLengthUint(ar,i);
    }
};

struct SessionValueCacheBase2
{
    template<typename Archive>
    inline static void SerialiseIndex(Archive& ar, unsigned int i)
    {
        cxAssert(i < 256);
        ar << (octet_t) i;
    }
    template<typename Archive>
    inline static void DeserialiseIndex(Archive& ar, unsigned int& i)
    {
        octet_t b;
        ar >> b;
        i = b;
    }

    inline static void SerialiseIndex(Archive& ar, unsigned int i)
    {
        cxAssert(i < 256);
        ar << (octet_t) i;
    }
};

// Optimised version for N = 255
template<typename T,ssize_t N = 255>
class SessionValueCache2 : public SessionValueCache<T,N,SessionValueCacheBase2>
{
};

} // namespace ceda

#endif // include guard