MultiSubString.h
// MultiSubString.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2006
#pragma once
#ifndef Ceda_cxUtils_MultiSubString_H
#define Ceda_cxUtils_MultiSubString_H
#include "cxUtils.h"
#include "xstring.h"
#include "xvector.h"
#include "SubString.h"
#include "StringExt.h"
namespace ceda
{
class xostream;
///////////////////////////////////////////////////////////////////////////////////////////////////
// MultiSubString
// Logically represents a single string that is physically represented as an ordered list of
// SubStrings. Note that a SubString is itself an alias to a range of characters within some other
// string.
// None of the text represented by the MultiSubString is owned by the MultiSubString.
// Note that unlike a SubString, a MultiSubString is closed under insert or erase operations.
// For example, erasing some characters out of the middle of a SubString leaves two disconnected
// pieces that can only be represented by a MultiSubString.
class cxUtils_API MultiSubString
{
public:
MultiSubString() : size_(0) {}
MultiSubString(ConstStringZ str) { SubString s(str); m_pieces.push_back(s); size_ = s.size(); }
MultiSubString(const xchar* p1, const xchar* p2) { SubString s(p1,p2); m_pieces.push_back(s); size_ = s.size(); }
MultiSubString(const xstring& str) { SubString s(str); m_pieces.push_back(s); size_ = s.size(); }
ssize_t size() const { return size_; }
bool empty() const { return size_ == 0; }
void GetString(xstring& str) const;
xstring GetString() const { xstring s; GetString(s); return s; }
xchar operator[](ssize_t i) const;
// todo: implement erase() and insert() methods
class cxUtils_API const_iterator
{
public:
const_iterator() : m_mss(nullptr), m_index(0), m_pos(nullptr) {}
const_iterator(const MultiSubString* mss, ssize_t index, xvector<SubString>::const_iterator i, const xchar* pos) :
m_mss(mss),
m_index(index),
m_i(i),
m_pos(pos)
{
}
// Get the string obtained from the next n characters from this position
void GetString(ssize_t n, xstring& str) const;
bool operator==(const const_iterator& rhs) const
{
return m_index == rhs.m_index && m_mss == rhs.m_mss;
}
bool operator!=(const const_iterator& rhs) const
{
return !operator==(rhs);
}
xchar operator[](ssize_t i) const;
ssize_t index() const { return m_index; }
const xchar* ptr() const
{
cxAssert(m_pos);
return m_pos;
}
xchar operator*() const
{
cxAssert(m_pos);
return *m_pos;
}
void ScanForPrevNonEmptySubString();
void ScanForNextNonEmptySubString();
const_iterator& operator++()
{
cxAssert(m_mss);
cxAssert(m_index < m_mss->size_);
cxAssert(m_i != m_mss->m_pieces.end());
if (m_pos == nullptr || ++m_pos == m_i->end())
{
ScanForNextNonEmptySubString();
}
++m_index;
return *this;
}
const const_iterator operator++(int) { const_iterator s = *this; ++(*this); return s; }
const_iterator& operator--()
{
cxAssert(m_mss);
cxAssert(0 <= m_index);
if (m_pos == nullptr || m_pos == m_i->begin())
{
ScanForPrevNonEmptySubString();
}
else
{
--m_pos;
}
--m_index;
return *this;
}
const const_iterator operator--(int) { const_iterator s = *this; --(*this); return s; }
const_iterator& operator+=(ssize_t i);
const_iterator& operator-=(ssize_t i) { return operator+=(-i); }
const_iterator operator+(ssize_t i) const { const_iterator s = *this; s += i; return s; }
const_iterator operator-(ssize_t i) const { const_iterator s = *this; s -= i; return s; }
ssize_t operator-(const const_iterator& rhs) const
{
cxAssert(m_mss == rhs.m_mss);
return m_index - rhs.m_index;
}
private:
const MultiSubString* m_mss;
// Zero based index position relative to the MultiSubString
ssize_t m_index;
// Points at current piece
xvector<SubString>::const_iterator m_i;
// Points at current character within a piece
const xchar* m_pos;
};
const_iterator begin() const;
const_iterator end() const { return const_iterator(this, size_, m_pieces.end(), nullptr); }
MultiSubString& operator+=(SubString rhs);
MultiSubString& operator+=(const MultiSubString& rhs);
// Needed to avoid ambiguities
MultiSubString& operator+=(const xstring& rhs) { operator+=(SubString(rhs)); return *this; }
MultiSubString& operator+=(ConstStringZ rhs) { operator+=(SubString(rhs)); return *this; }
class const_reverse_iterator
{
public:
const_reverse_iterator() {}
const_reverse_iterator(const MultiSubString* mss, ssize_t index, xvector<SubString>::const_iterator i, const xchar* pos) :
m_it(mss,index,i,pos)
{
}
bool operator==(const const_reverse_iterator& rhs) const { return m_it == rhs.m_it; }
bool operator!=(const const_reverse_iterator& rhs) const { return !operator==(rhs); }
xchar operator[](ssize_t i) const { return m_it[-i]; }
const xchar* ptr() const { return m_it.ptr(); }
xchar operator*() const { return *m_it; }
const_reverse_iterator& operator++() { --m_it; return *this; }
const const_reverse_iterator operator++(int) { const_reverse_iterator s = *this; ++(*this); return s; }
const_reverse_iterator& operator--() { ++m_it; return *this; }
const const_reverse_iterator operator--(int) { const_reverse_iterator s = *this; --(*this); return s; }
const_reverse_iterator& operator+=(ssize_t i) { m_it -= i; return *this; }
const_reverse_iterator& operator-=(ssize_t i) { return operator+=(-i); }
ssize_t operator-(const const_reverse_iterator& rhs) const { return rhs.m_it - m_it; }
private:
const_iterator m_it;
};
const_reverse_iterator rbegin() const;
const_reverse_iterator rend() const { return const_reverse_iterator(this, -1, m_pieces.begin()-1, nullptr); }
private:
xvector<SubString> m_pieces;
ssize_t size_;
friend class const_iterator;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// MultiSubStringRange
// Represents a substring within a MultiRangeSubString
class MultiSubStringRange
{
public:
MultiSubStringRange() : size_(0) {}
MultiSubStringRange(MultiSubString::const_iterator p1, ssize_t size) :
m_p1(p1),
size_(size)
{
}
MultiSubStringRange(MultiSubString::const_iterator p1, MultiSubString::const_iterator p2) :
m_p1(p1),
size_(p2-p1)
{
}
MultiSubStringRange(const MultiSubString& s) :
m_p1(s.begin()),
size_(s.size())
{
}
xchar operator[](ssize_t i) const { return m_p1[i]; }
void GetString(xstring& str) const { m_p1.GetString(size_,str); }
xstring GetString() const { xstring s; GetString(s); return s; }
MultiSubString::const_iterator begin() const { return m_p1; }
void setbegin(MultiSubString::const_iterator p1) { m_p1 = p1; }
explicit operator bool() const { return size_ != 0; }
bool empty() const { return size_ == 0; }
void setend(MultiSubString::const_iterator p2) { size_ = p2 - m_p1; }
ssize_t size() const { return size_; }
void setsize(ssize_t sz) { size_ = sz; }
const xchar* ptr() const { return m_p1.ptr(); }
xchar operator*() const { return *m_p1; }
MultiSubStringRange& operator++()
{
cxAssert(size_ > 0);
++m_p1;
--size_;
return *this;
}
const MultiSubStringRange operator++(int) { MultiSubStringRange s = *this; ++(*this); return s; }
MultiSubStringRange& operator+=(ssize_t i)
{
cxAssert(0 <= i && i <= size_);
m_p1 += i;
size_ -= i;
return *this;
}
/*
MultiSubStringRange& operator-=(ssize_t i)
{
m_p1 -= i;
size_ += i;
return *this;
}
*/
private:
MultiSubString::const_iterator m_p1;
ssize_t size_;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Compare two substrings lexicographically. Similar in functionality to strcmp(const char*, const char*)
cxUtils_API int strcmp(MultiSubStringRange s1, MultiSubStringRange s2);
cxUtils_API int strcmp(MultiSubStringRange s1, ConstStringZ s2);
// String comparison
cxUtils_API bool operator==(MultiSubStringRange s1, MultiSubStringRange s2);
inline bool operator!=(MultiSubStringRange s1, MultiSubStringRange s2) { return !(s1 == s2); }
inline bool operator<(MultiSubStringRange s1, MultiSubStringRange s2) { return strcmp(s1,s2) < 0; }
inline bool operator<=(MultiSubStringRange s1, MultiSubStringRange s2) { return !(s2 < s1); }
inline bool operator>(MultiSubStringRange s1, MultiSubStringRange s2) { return s2 < s1; }
inline bool operator>=(MultiSubStringRange s1, MultiSubStringRange s2) { return s2 <= s1; }
inline bool operator==(MultiSubStringRange s1, ConstStringZ s2) { return strcmp(s1,s2) == 0; }
inline bool operator!=(MultiSubStringRange s1, ConstStringZ s2) { return !(s1 == s2); }
inline bool operator==(const MultiSubString& s1, ConstStringZ s2) { return strcmp(MultiSubStringRange(s1),s2) == 0; }
inline bool operator!=(const MultiSubString& s1, ConstStringZ s2) { return !(s1 == s2); }
inline bool operator==(const MultiSubString& s1, const MultiSubString& s2) { return MultiSubStringRange(s1) == MultiSubStringRange(s2); }
inline bool operator!=(const MultiSubString& s1, const MultiSubString& s2) { return !(s1 == s2); }
inline bool operator<(const MultiSubString& s1, const MultiSubString& s2) { return MultiSubStringRange(s1) < MultiSubStringRange(s2); }
inline bool operator<=(const MultiSubString& s1, const MultiSubString& s2) { return !(s2 < s1); }
inline bool operator>(const MultiSubString& s1, const MultiSubString& s2) { return s2 < s1; }
inline bool operator>=(const MultiSubString& s1, const MultiSubString& s2) { return s2 <= s1; }
cxUtils_API void ScanIndentation(MultiSubStringRange& s, ssize_t indent, ssize_t tabSize = 4);
cxUtils_API void WriteMultiSubStringWithAdjustedIndent(xostream& os, ssize_t dstIndent, MultiSubStringRange s, ssize_t srcIndent);
// Returns pointer to start of given line (zero based), or nullptr if s doesn't contain enough lines
cxUtils_API const xchar* GetLineNumberPosition(MultiSubStringRange s, ssize_t lineNumber);
// Returns true if s begins with the given prefix. The comparison is case sensitive. Always returns true
// if the prefix is empty.
cxUtils_API bool CheckPrefix(MultiSubStringRange s, MultiSubStringRange prefix);
// Returns true if s ends with the given suffix. The comparison is case sensitive. Always returns true
// if the prefix is empty.
cxUtils_API bool CheckSuffix(MultiSubStringRange s, MultiSubStringRange suffix);
// If s starts with the given prefix then advance s past the prefix and return true
cxUtils_API bool EatPrefix(MultiSubStringRange& s, MultiSubStringRange prefix);
// Find position of the first occurence of sFind within s. Returns nullptr if not found
cxUtils_API const xchar* ForwardsFind(MultiSubStringRange s, MultiSubStringRange sFind);
// Find position of the first occurence of character c within s. Returns nullptr if not found
cxUtils_API const xchar* ForwardsFind(MultiSubStringRange s, xchar c);
template<typename T>
void MultiSubStringRangeToHexInt(MultiSubStringRange str, T& value)
{
ssize_t n = str.size();
cxAssert(n > 2);
cxAssert(str[0] == '0');
cxAssert(str[1] == 'x' || str[1] == 'X');
value = 0;
for (ssize_t i=2 ; i < n ; ++i)
{
int d = MapHexDigit(str[i]);
cxAssert(d != -1);
value = (value << 4) | d;
}
}
template<typename T>
void MultiSubStringRangeToInt(MultiSubStringRange str, T& value)
{
ssize_t n = str.size();
cxAssert(n > 0);
value = 0;
for (ssize_t i=0 ; i < n ; ++i)
{
int d = MapDecDigit(str[i]);
cxAssert(d != -1);
value = value * 10 + d;
}
}
template <typename T>
bool MultiSubStringRangeToHex(MultiSubStringRange& s, T& x)
{
const xchar* start = s.ptr();
x = 0;
int d;
while(s && (d = MapHexDigit(*s)) != -1)
{
x = (x << 4) | d;
++s;
}
return s.ptr() != start; // Return true if read one or more digits
}
cxUtils_API void MakeDoubleQuotedString(MultiSubStringRange s, xstring& t);
// Get the line number corresponding to the given position, or -1 if [s.p1, s.p2] doesn't contain pos
cxUtils_API ssize_t GetLineNumber(MultiSubStringRange s, const xchar* pos);
// Count the number of lines of text by scanning the sub string for line feeds. The last line
// doesn't need to end with a line feed
cxUtils_API ssize_t CountNumLinesOfText(MultiSubStringRange s);
///////////////////////////////////////////////////////////////////////////////////////////////////
// HumanReadableCharInMultiSubStringRange
struct HumanReadableCharInMultiSubStringRange
{
HumanReadableCharInMultiSubStringRange(MultiSubStringRange s) : m_s(s) {}
MultiSubStringRange m_s;
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Scanning and tokenising
cxUtils_API void ScanWhiteSpace(MultiSubStringRange& s);
cxUtils_API bool ScanDigits(MultiSubStringRange& s);
cxUtils_API bool ScanIdentifier(MultiSubStringRange& s);
///////////////////////////////////////////////////////////////////////////////////////////////////
// Must be inside ceda namespace !!!
cxUtils_API xostream& operator<<(xostream& os, MultiSubStringRange s);
inline xostream& operator<<(xostream& os, const MultiSubString& s)
{
return os << MultiSubStringRange(s);
}
cxUtils_API xostream& operator<<(xostream& os, const HumanReadableCharInMultiSubStringRange& s);
} // namespace ceda
#endif // include guard