CSParser.h

// CSParser.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2012

@import "cxCedaScript.h"
@import "CSValue.h"
@import "Ceda/cxObject/Object.h"
#include "Ceda/cxMacroExpander/ParserBase.h"
#include "Ceda/cxUtils/xstring.h"
#include "Ceda/cxUtils/CedaAssert.h"
#include "Ceda/cxUtils/IException.h"

/*
1)  We need a threadsafe system for caching files that have been 
    a)  read into memory
    b)  macro-expanded.  After macro expansion we can throw away the original file?  Maybe not because
        we want to be able to pin point errors.
    c)  scanned to find the functions
    
    CSFunctionParams:
        CSSourceFile* m_file;           // File in which the function appears
        set<SubString> m_nonlocals;     // Names of the nonlocals
        vector<SubString> m_args;       // Ordered list of the formal arguments
        SubString m_body;               // Body of the function
        
    typedef pair<const SubString,CSFunctionParams> CSFunction;    

    CSSourceFileParams:
        CSSourceFileSet* m_files;       // File set in which this file belongs to
        set<CSSourceFile*> m_imports;   // all directly or indirectly imported files
        map<SubString, CSFunctionParams> m_functions;  // All functions defined in this file

    typedef pair<const xstring,CSSourceFileParams> CSSourceFile;
    
    CSSourceFileSet:
        map<xstring, CSSourceFileParams> m_files;   // Files keyed by canonical representation of path

2)  A cedascript thread has a stack of frames and an entry function in a given file.  
    Each frame has its own parser for parsing the body of the associated function.

    LSToken:
        EToken m_type;
        SubString m_str;

    ParserBase:
        xstring m_stringForToken;
        LSToken m_token;
        LSToken m_prevToken;
        SubString m_str;

    CSFrame:
        ParserBase
        std::map<Varname,CSValue> m_locals;
        CSFrame* m_parent;
        CSFunction* m_function;         // Associated function

    Note that from a frame we have access to:
        -   the locals
        -   the non-locals
        -   the parent frame which in turn has locals, nonlocals, and parent frame
        -   the containing file
            -   the functions in that file
            -   the set of imports of that file
    Therefore name lookup is straighforward.


------------------------------

Proposal
--------

MpfwSourceFileList allows for a set of sources files where for each source file we record both the original
and post-macro-expanded form, plus a data structure which allows for errors in the expanded form to be
mapped back to positions in an original file.

    MpfwSourceFile:
        xstring m_localPath;    // We record a local path to the file; this is appropriate for display in the
                                // MSVC output window.
        xstring m_buffer;       // The contents of the file read into memory in a single contiguous buffer.
        xstring m_expanded;     // After macro expansion
        OutputPieces m_pieces;  // Allows position in m_expanded to be mapped back to source 
                                // positions

    MpfwSourceFileList:
        xvector<MpfwSourceFile*>

We assume the cedascript system builds on top of this system.  We even use MpfwParser which subtypes 
ExpressionParser and ImportHandler.   But this is not used for executing cedascript.  Instead it is only used 
by the comparatively simple parsing that occurs when cedascript files are loaded into memory, in order to 
handle @import directives, and to scan the file for function definitions.

We can subclass MpfwSourceFile.  This works because of the virtual destructor, and we have the ability to 
override the following function 

    // Override if a subclass of MpfwSourceFile is to be used instead
    virtual MpfwSourceFile* MpfwParser::CreateMpfwSourceFile()
    {
        return new MpfwSourceFile;
    }
    
to instead new the subclass.

Issue
-----

How do we make the MpfwSourceFileList threadsafe?  Currently it isn't, and it seems difficult to do
this after the fact.

How will thread safety work?  We have a few issues to consider:

1.  If two threads try to import the same file at the same time, then we must either allow both 
    of them to import the same file twice and only add it once, or make once thread wait on the 
    other.
    
2.  After a file has been 
        a)  read into memory
        b)  macro-expanded 
        c)  scanned to find the functions
    
    We expect that only then do we update a data structure that make the file visible to executing
    threads.  However we want access to not require a mutex!
    
    Proposal:  the following map is protected by a mutex
    
        map<xstring,CSFile>
        
    All access to this map must go through the mutex.  An entry is only inserted into the
    map when the CSFile state has been fully calculated.
    
    A thread that wants to update the map should
        1)  in a lock check if there is already an entry. If so do nothing
        2)  Calculate a private CSFile.  As import statements are processed process these same steps
        3)  in a lock try to insert the CSFile.  If already found then use the existing one instead
            and throw away the calculated one.
        
    Note that a CSFile in the map is immutable.  It is therefore available to any number of threads
    without any need for locks.
    
    Note well that a CSFile records pointers to it imported CSFiles, so from a CSFile one can navigate 
    to its imports without needing to access the map, so there is no need to lock a mutex.
    
Proposal 2
----------

    Definition:  To /prepare/ a CSFile means to
    
                1)  load the file into memory
                2)  Macro expand it
                3)  Scan all the import statements, ensure they are prepared recursively
                    and get back a set pointers to CSFiles that are directly or indirectly imported
                4)  Scan and record information about all the functions in the file

    class CFFile
    {
    
        bool ready;                     // ready to be executed by cedascript
                                        // only accessed inside mutex of CSFileManager
        ManualResetEvent readyEvent;    // Signalled when ready to be executed by cedascript
    };

    class CSFileManager
    {
    public:
        const CSFile& GetCSFile(xstring path)
        {
            const CSFile* f;
            bool needToPrepare;
            {
                std::lock_guard<std::mutex> lock(mutex_);
                if (m_files.find(path))
                {
                    f = m_files[path];
                    if (f->ready)
                    {
                        return *f;
                    }
                    needToPrepare = false;
                }
                else
                {
                    f = new CSFile;
                    m_files[path] = f;
                    
                    // Make this thread responsible for preparing the CSFile
                    needToPrepare = true;
                }
            }
            
            assert(f);
            
            if (needToPrepare)
            {
                // Do everything needed to make CSFile ready to be executed by cedascript
                Prepare(path,*f);
                
                {
                    std::lock_guard<std::mutex> lock(mutex_);
                    f->ready = true;
                }
                f->readyEvent.Signal();
            }
            else
            {
                // entry already exists, but not ready.  Wait until it is ready
                f->readyEvent.Wait();
                {
                    std::lock_guard<std::mutex> lock(mutex_);        // is this needed?
                    assert(f->ready);
                }
            }
            return *f;
        }
    
    private:
        mutable std::mutex mutex_;
        map<xstring,CSFile*> m_files;
    };
*/

namespace ceda
{

///////////////////////////////////////////////////////////////////////////////////////////////////
// CSParser

class @api CSParser : public ParserBase
{
public:
    template <typename T>
    T ParseValue(bool allowExplicitConversions = false)
    {
        try
        {
            CSValue x;
            UnsafeParseExpression(x);
            if (x.is<void>())
            {
                throw LexScannerException(cxMakeString("Expected " << GetTypeName<T>() << " when read " << m_token), m_token);
            }
            return As<T>(x,allowExplicitConversions);
        }
        catch(CSValueException& e)
        {
            throw LexScannerException(e.m_description.c_str(), m_token);
        }
    }
    
    float64 ParseFloat64Arg();

    void ParseExpression(CSValue& literal);

    void UnsafeParseExpression(CSValue& literal);
    void ParseAssignmentExpression(CSValue& literal);
    void ParseConditionalExpression(CSValue& literal);
    void ParseLogicalOrExpression(CSValue& literal);
    void ParseLogicalAndExpression(CSValue& literal);
    void ParseBitwiseOrExpression(CSValue& literal);
    void ParseBitwiseExOrExpression(CSValue& literal);
    void ParseBitwiseAndExpression(CSValue& literal);
    void ParseEqualityExpression(CSValue& literal);
    void ParseRelationalExpression(CSValue& literal);
    void ParseShiftExpression(CSValue& literal);
    void ParseAdditiveExpression(CSValue& literal);
    void ParseMultiplicativeExpression(CSValue& literal);
    void ParsePowerExpression(CSValue& literal);
    void ParseUnaryExpression(CSValue& literal);
    void ParsePostFixExpression(CSValue& literal);
    void ParsePrimaryExpression(CSValue& literal);
    void ParseLiteralInBrackets(CSValue& literal);
    void ParsePrimaryIdentifierExpression(CSValue& literal);
};

} // namespace ceda