CSParser.h
// CSParser.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2012
@import "cxCedaScript.h"
@import "CSValue.h"
@import "Ceda/cxObject/Object.h"
#include "Ceda/cxMacroExpander/ParserBase.h"
#include "Ceda/cxUtils/xstring.h"
#include "Ceda/cxUtils/CedaAssert.h"
#include "Ceda/cxUtils/IException.h"
/*
1) We need a threadsafe system for caching files that have been
a) read into memory
b) macro-expanded. After macro expansion we can throw away the original file? Maybe not because
we want to be able to pin point errors.
c) scanned to find the functions
CSFunctionParams:
CSSourceFile* m_file; // File in which the function appears
set<SubString> m_nonlocals; // Names of the nonlocals
vector<SubString> m_args; // Ordered list of the formal arguments
SubString m_body; // Body of the function
typedef pair<const SubString,CSFunctionParams> CSFunction;
CSSourceFileParams:
CSSourceFileSet* m_files; // File set in which this file belongs to
set<CSSourceFile*> m_imports; // all directly or indirectly imported files
map<SubString, CSFunctionParams> m_functions; // All functions defined in this file
typedef pair<const xstring,CSSourceFileParams> CSSourceFile;
CSSourceFileSet:
map<xstring, CSSourceFileParams> m_files; // Files keyed by canonical representation of path
2) A cedascript thread has a stack of frames and an entry function in a given file.
Each frame has its own parser for parsing the body of the associated function.
LSToken:
EToken m_type;
SubString m_str;
ParserBase:
xstring m_stringForToken;
LSToken m_token;
LSToken m_prevToken;
SubString m_str;
CSFrame:
ParserBase
std::map<Varname,CSValue> m_locals;
CSFrame* m_parent;
CSFunction* m_function; // Associated function
Note that from a frame we have access to:
- the locals
- the non-locals
- the parent frame which in turn has locals, nonlocals, and parent frame
- the containing file
- the functions in that file
- the set of imports of that file
Therefore name lookup is straighforward.
------------------------------
Proposal
--------
MpfwSourceFileList allows for a set of sources files where for each source file we record both the original
and post-macro-expanded form, plus a data structure which allows for errors in the expanded form to be
mapped back to positions in an original file.
MpfwSourceFile:
xstring m_localPath; // We record a local path to the file; this is appropriate for display in the
// MSVC output window.
xstring m_buffer; // The contents of the file read into memory in a single contiguous buffer.
xstring m_expanded; // After macro expansion
OutputPieces m_pieces; // Allows position in m_expanded to be mapped back to source
// positions
MpfwSourceFileList:
xvector<MpfwSourceFile*>
We assume the cedascript system builds on top of this system. We even use MpfwParser which subtypes
ExpressionParser and ImportHandler. But this is not used for executing cedascript. Instead it is only used
by the comparatively simple parsing that occurs when cedascript files are loaded into memory, in order to
handle @import directives, and to scan the file for function definitions.
We can subclass MpfwSourceFile. This works because of the virtual destructor, and we have the ability to
override the following function
// Override if a subclass of MpfwSourceFile is to be used instead
virtual MpfwSourceFile* MpfwParser::CreateMpfwSourceFile()
{
return new MpfwSourceFile;
}
to instead new the subclass.
Issue
-----
How do we make the MpfwSourceFileList threadsafe? Currently it isn't, and it seems difficult to do
this after the fact.
How will thread safety work? We have a few issues to consider:
1. If two threads try to import the same file at the same time, then we must either allow both
of them to import the same file twice and only add it once, or make once thread wait on the
other.
2. After a file has been
a) read into memory
b) macro-expanded
c) scanned to find the functions
We expect that only then do we update a data structure that make the file visible to executing
threads. However we want access to not require a mutex!
Proposal: the following map is protected by a mutex
map<xstring,CSFile>
All access to this map must go through the mutex. An entry is only inserted into the
map when the CSFile state has been fully calculated.
A thread that wants to update the map should
1) in a lock check if there is already an entry. If so do nothing
2) Calculate a private CSFile. As import statements are processed process these same steps
3) in a lock try to insert the CSFile. If already found then use the existing one instead
and throw away the calculated one.
Note that a CSFile in the map is immutable. It is therefore available to any number of threads
without any need for locks.
Note well that a CSFile records pointers to it imported CSFiles, so from a CSFile one can navigate
to its imports without needing to access the map, so there is no need to lock a mutex.
Proposal 2
----------
Definition: To /prepare/ a CSFile means to
1) load the file into memory
2) Macro expand it
3) Scan all the import statements, ensure they are prepared recursively
and get back a set pointers to CSFiles that are directly or indirectly imported
4) Scan and record information about all the functions in the file
class CFFile
{
bool ready; // ready to be executed by cedascript
// only accessed inside mutex of CSFileManager
ManualResetEvent readyEvent; // Signalled when ready to be executed by cedascript
};
class CSFileManager
{
public:
const CSFile& GetCSFile(xstring path)
{
const CSFile* f;
bool needToPrepare;
{
std::lock_guard<std::mutex> lock(mutex_);
if (m_files.find(path))
{
f = m_files[path];
if (f->ready)
{
return *f;
}
needToPrepare = false;
}
else
{
f = new CSFile;
m_files[path] = f;
// Make this thread responsible for preparing the CSFile
needToPrepare = true;
}
}
assert(f);
if (needToPrepare)
{
// Do everything needed to make CSFile ready to be executed by cedascript
Prepare(path,*f);
{
std::lock_guard<std::mutex> lock(mutex_);
f->ready = true;
}
f->readyEvent.Signal();
}
else
{
// entry already exists, but not ready. Wait until it is ready
f->readyEvent.Wait();
{
std::lock_guard<std::mutex> lock(mutex_); // is this needed?
assert(f->ready);
}
}
return *f;
}
private:
mutable std::mutex mutex_;
map<xstring,CSFile*> m_files;
};
*/
namespace ceda
{
///////////////////////////////////////////////////////////////////////////////////////////////////
// CSParser
class @api CSParser : public ParserBase
{
public:
template <typename T>
T ParseValue(bool allowExplicitConversions = false)
{
try
{
CSValue x;
UnsafeParseExpression(x);
if (x.is<void>())
{
throw LexScannerException(cxMakeString("Expected " << GetTypeName<T>() << " when read " << m_token), m_token);
}
return As<T>(x,allowExplicitConversions);
}
catch(CSValueException& e)
{
throw LexScannerException(e.m_description.c_str(), m_token);
}
}
float64 ParseFloat64Arg();
void ParseExpression(CSValue& literal);
void UnsafeParseExpression(CSValue& literal);
void ParseAssignmentExpression(CSValue& literal);
void ParseConditionalExpression(CSValue& literal);
void ParseLogicalOrExpression(CSValue& literal);
void ParseLogicalAndExpression(CSValue& literal);
void ParseBitwiseOrExpression(CSValue& literal);
void ParseBitwiseExOrExpression(CSValue& literal);
void ParseBitwiseAndExpression(CSValue& literal);
void ParseEqualityExpression(CSValue& literal);
void ParseRelationalExpression(CSValue& literal);
void ParseShiftExpression(CSValue& literal);
void ParseAdditiveExpression(CSValue& literal);
void ParseMultiplicativeExpression(CSValue& literal);
void ParsePowerExpression(CSValue& literal);
void ParseUnaryExpression(CSValue& literal);
void ParsePostFixExpression(CSValue& literal);
void ParsePrimaryExpression(CSValue& literal);
void ParseLiteralInBrackets(CSValue& literal);
void ParsePrimaryIdentifierExpression(CSValue& literal);
};
} // namespace ceda