MultiSubString.h

// MultiSubString.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2006

#pragma once
#ifndef Ceda_cxUtils_MultiSubString_H
#define Ceda_cxUtils_MultiSubString_H

#include "cxUtils.h"
#include "xstring.h"
#include "xvector.h"
#include "SubString.h"
#include "StringExt.h"

namespace ceda
{
class xostream;

///////////////////////////////////////////////////////////////////////////////////////////////////
// MultiSubString

// Logically represents a single string that is physically represented as an ordered list of 
// SubStrings.  Note that a SubString is itself an alias to a range of characters within some other
// string.
// None of the text represented by the MultiSubString is owned by the MultiSubString.
// Note that unlike a SubString, a MultiSubString is closed under insert or erase operations.  
// For example, erasing some characters out of the middle of a SubString leaves two disconnected 
// pieces that can only be represented by a MultiSubString.

class cxUtils_API MultiSubString
{
public:
    MultiSubString() : size_(0) {}
    MultiSubString(ConstStringZ str) { SubString s(str); m_pieces.push_back(s); size_ = s.size(); }
    MultiSubString(const xchar* p1, const xchar* p2) { SubString s(p1,p2); m_pieces.push_back(s); size_ = s.size(); }
    MultiSubString(const xstring& str) { SubString s(str); m_pieces.push_back(s); size_ = s.size(); }

    ssize_t size() const { return size_; }
    bool empty() const { return size_ == 0; }
    
    void GetString(xstring& str) const;
    xstring GetString() const { xstring s; GetString(s); return s; }

    xchar operator[](ssize_t i) const;

    // todo: implement erase() and insert() methods

    class cxUtils_API const_iterator
    {
    public:
        const_iterator() : m_mss(nullptr), m_index(0), m_pos(nullptr) {}
        
        const_iterator(const MultiSubString* mss, ssize_t index, xvector<SubString>::const_iterator i, const xchar* pos) : 
            m_mss(mss),
            m_index(index),
            m_i(i),
            m_pos(pos)
        {
        }

        // Get the string obtained from the next n characters from this position
        void GetString(ssize_t n, xstring& str) const;

        bool operator==(const const_iterator& rhs) const
        {
            return m_index == rhs.m_index && m_mss == rhs.m_mss;
        }
        bool operator!=(const const_iterator& rhs) const
        {
            return !operator==(rhs);
        }

        xchar operator[](ssize_t i) const;

        ssize_t index() const { return m_index; }

        const xchar* ptr() const
        {
            cxAssert(m_pos);
            return m_pos;
        }
        
        xchar operator*() const
        {
            cxAssert(m_pos);
            return *m_pos;
        }

        void ScanForPrevNonEmptySubString();
        void ScanForNextNonEmptySubString();

        const_iterator& operator++()
        {
            cxAssert(m_mss);
            cxAssert(m_index < m_mss->size_);
            cxAssert(m_i != m_mss->m_pieces.end());

            if (m_pos == nullptr || ++m_pos == m_i->end())
            {
                ScanForNextNonEmptySubString();
            }
            ++m_index;
            return *this;
        }
        const const_iterator operator++(int) { const_iterator s = *this; ++(*this); return s; }

        const_iterator& operator--()
        {
            cxAssert(m_mss);
            cxAssert(0 <= m_index);
            if (m_pos == nullptr || m_pos == m_i->begin())
            {
                ScanForPrevNonEmptySubString();
            }
            else
            {
                --m_pos;
            }
            --m_index;
            return *this;
        }
        const const_iterator operator--(int) { const_iterator s = *this; --(*this); return s; }

        const_iterator& operator+=(ssize_t i);
        const_iterator& operator-=(ssize_t i) { return operator+=(-i); }

        const_iterator operator+(ssize_t i) const { const_iterator s = *this; s += i; return s; }
        const_iterator operator-(ssize_t i) const { const_iterator s = *this; s -= i; return s; }

        ssize_t operator-(const const_iterator& rhs) const 
        { 
            cxAssert(m_mss == rhs.m_mss);
            return m_index - rhs.m_index; 
        }

    private:
        const MultiSubString* m_mss;
        
        // Zero based index position relative to the MultiSubString
        ssize_t m_index;
        
        // Points at current piece
        xvector<SubString>::const_iterator m_i;

        // Points at current character within a piece
        const xchar* m_pos;
    };

    const_iterator begin() const;
    const_iterator end() const { return const_iterator(this, size_, m_pieces.end(), nullptr); }

    MultiSubString& operator+=(SubString rhs);
    MultiSubString& operator+=(const MultiSubString& rhs);

    // Needed to avoid ambiguities
    MultiSubString& operator+=(const xstring& rhs) { operator+=(SubString(rhs)); return *this; }
    MultiSubString& operator+=(ConstStringZ rhs) { operator+=(SubString(rhs)); return *this; }

    class const_reverse_iterator
    {
    public:
        const_reverse_iterator() {}
        
        const_reverse_iterator(const MultiSubString* mss, ssize_t index, xvector<SubString>::const_iterator i, const xchar* pos) : 
            m_it(mss,index,i,pos)
        {
        }

        bool operator==(const const_reverse_iterator& rhs) const { return m_it == rhs.m_it; }
        bool operator!=(const const_reverse_iterator& rhs) const { return !operator==(rhs); }

        xchar operator[](ssize_t i) const { return m_it[-i]; }
        const xchar* ptr() const { return m_it.ptr(); }
        xchar operator*() const { return *m_it; }

        const_reverse_iterator& operator++() { --m_it; return *this; }
        const const_reverse_iterator operator++(int) { const_reverse_iterator s = *this; ++(*this); return s; }

        const_reverse_iterator& operator--() { ++m_it; return *this; }
        const const_reverse_iterator operator--(int) { const_reverse_iterator s = *this; --(*this); return s; }

        const_reverse_iterator& operator+=(ssize_t i) { m_it -= i; return *this; }
        const_reverse_iterator& operator-=(ssize_t i) { return operator+=(-i); }

        ssize_t operator-(const const_reverse_iterator& rhs) const { return rhs.m_it - m_it; }
    private:
        const_iterator m_it;
    };

    const_reverse_iterator rbegin() const;
    const_reverse_iterator rend() const { return const_reverse_iterator(this, -1, m_pieces.begin()-1, nullptr); }

private:
    xvector<SubString> m_pieces;
    ssize_t size_;

    friend class const_iterator;
};

///////////////////////////////////////////////////////////////////////////////////////////////////
// MultiSubStringRange

// Represents a substring within a MultiRangeSubString
class MultiSubStringRange
{
public:
    MultiSubStringRange() : size_(0) {}
    
    MultiSubStringRange(MultiSubString::const_iterator p1, ssize_t size) :
        m_p1(p1),
        size_(size)
    {
    }
    MultiSubStringRange(MultiSubString::const_iterator p1, MultiSubString::const_iterator p2) :
        m_p1(p1),
        size_(p2-p1)
    {
    }
    MultiSubStringRange(const MultiSubString& s) :
        m_p1(s.begin()),
        size_(s.size())
    {
    }

    xchar operator[](ssize_t i) const { return m_p1[i]; }

    void GetString(xstring& str) const { m_p1.GetString(size_,str); }
    xstring GetString() const { xstring s; GetString(s); return s; }

    MultiSubString::const_iterator begin() const { return m_p1; }
    void setbegin(MultiSubString::const_iterator p1) { m_p1 = p1; }

    explicit operator bool() const { return size_ != 0; }
    bool empty() const { return size_ == 0; }

    void setend(MultiSubString::const_iterator p2) { size_ = p2 - m_p1; }

    ssize_t size() const { return size_; }
    void setsize(ssize_t sz) { size_ = sz; }

    const xchar* ptr() const { return m_p1.ptr(); }
    
    xchar operator*() const { return *m_p1; }

    MultiSubStringRange& operator++()
    {
        cxAssert(size_ > 0);
        ++m_p1;
        --size_;
        return *this;
    }
    const MultiSubStringRange operator++(int) { MultiSubStringRange s = *this; ++(*this); return s; }

    MultiSubStringRange& operator+=(ssize_t i)
    {
        cxAssert(0 <= i && i <= size_);
        m_p1 += i;
        size_ -= i;
        return *this;
    }

    /*
    MultiSubStringRange& operator-=(ssize_t i)
    {
        m_p1 -= i;
        size_ += i;
        return *this;
    }
    */

private:
    MultiSubString::const_iterator m_p1;
    ssize_t size_;    
};

///////////////////////////////////////////////////////////////////////////////////////////////////

// Compare two substrings lexicographically.  Similar in functionality to strcmp(const char*, const char*)
cxUtils_API int strcmp(MultiSubStringRange s1, MultiSubStringRange s2);

cxUtils_API int strcmp(MultiSubStringRange s1, ConstStringZ s2);

// String comparison
cxUtils_API bool operator==(MultiSubStringRange s1, MultiSubStringRange s2);
inline bool operator!=(MultiSubStringRange s1, MultiSubStringRange s2) { return !(s1 == s2); }
inline bool operator<(MultiSubStringRange s1, MultiSubStringRange s2) { return strcmp(s1,s2) < 0; }
inline bool operator<=(MultiSubStringRange s1, MultiSubStringRange s2) { return !(s2 < s1); }
inline bool operator>(MultiSubStringRange s1, MultiSubStringRange s2) { return s2 < s1; }
inline bool operator>=(MultiSubStringRange s1, MultiSubStringRange s2) { return s2 <= s1; }

inline bool operator==(MultiSubStringRange s1, ConstStringZ s2) { return strcmp(s1,s2) == 0; }
inline bool operator!=(MultiSubStringRange s1, ConstStringZ s2) { return !(s1 == s2); }

inline bool operator==(const MultiSubString& s1, ConstStringZ s2) { return strcmp(MultiSubStringRange(s1),s2) == 0; }
inline bool operator!=(const MultiSubString& s1, ConstStringZ s2) { return !(s1 == s2); }

inline bool operator==(const MultiSubString& s1, const MultiSubString& s2) { return MultiSubStringRange(s1) == MultiSubStringRange(s2); }
inline bool operator!=(const MultiSubString& s1, const MultiSubString& s2) { return !(s1 == s2); }
inline bool operator<(const MultiSubString& s1, const MultiSubString& s2) { return MultiSubStringRange(s1) < MultiSubStringRange(s2); }
inline bool operator<=(const MultiSubString& s1, const MultiSubString& s2) { return !(s2 < s1); }
inline bool operator>(const MultiSubString& s1, const MultiSubString& s2) { return s2 < s1; }
inline bool operator>=(const MultiSubString& s1, const MultiSubString& s2) { return s2 <= s1; }


cxUtils_API void ScanIndentation(MultiSubStringRange& s, ssize_t indent, ssize_t tabSize = 4);

cxUtils_API void WriteMultiSubStringWithAdjustedIndent(xostream& os, ssize_t dstIndent, MultiSubStringRange s, ssize_t srcIndent);

// Returns pointer to start of given line (zero based), or nullptr if s doesn't contain enough lines
cxUtils_API const xchar* GetLineNumberPosition(MultiSubStringRange s, ssize_t lineNumber);

// Returns true if s begins with the given prefix.  The comparison is case sensitive.  Always returns true
// if the prefix is empty.
cxUtils_API bool CheckPrefix(MultiSubStringRange s, MultiSubStringRange prefix);

// Returns true if s ends with the given suffix.  The comparison is case sensitive.  Always returns true
// if the prefix is empty.
cxUtils_API bool CheckSuffix(MultiSubStringRange s, MultiSubStringRange suffix);

// If s starts with the given prefix then advance s past the prefix and return true
cxUtils_API bool EatPrefix(MultiSubStringRange& s, MultiSubStringRange prefix);

// Find position of the first occurence of sFind within s.  Returns nullptr if not found
cxUtils_API const xchar* ForwardsFind(MultiSubStringRange s, MultiSubStringRange sFind);

// Find position of the first occurence of character c within s.  Returns nullptr if not found
cxUtils_API const xchar* ForwardsFind(MultiSubStringRange s, xchar c);


template<typename T>
void MultiSubStringRangeToHexInt(MultiSubStringRange str, T& value)
{
    ssize_t n = str.size();
    cxAssert(n > 2);
    cxAssert(str[0] == '0');
    cxAssert(str[1] == 'x' || str[1] == 'X');
    value = 0;
    for (ssize_t i=2 ; i < n ; ++i)
    {
        int d = MapHexDigit(str[i]);
        cxAssert(d != -1);
        value = (value << 4) | d;
    }
}

template<typename T>
void MultiSubStringRangeToInt(MultiSubStringRange str, T& value)
{
    ssize_t n = str.size();
    cxAssert(n > 0);
    value = 0;
    for (ssize_t i=0 ; i < n ; ++i)
    {
        int d = MapDecDigit(str[i]);
        cxAssert(d != -1);
        value = value * 10 + d;
    }
}

template <typename T>
bool MultiSubStringRangeToHex(MultiSubStringRange& s, T& x)
{
    const xchar* start = s.ptr();
    x = 0;
    int d;
    while(s && (d = MapHexDigit(*s)) != -1)
    {
        x = (x << 4) | d;
        ++s;
    }
    return s.ptr() != start;     // Return true if read one or more digits
}

cxUtils_API void MakeDoubleQuotedString(MultiSubStringRange s, xstring& t);

// Get the line number corresponding to the given position, or -1 if [s.p1, s.p2] doesn't contain pos
cxUtils_API ssize_t GetLineNumber(MultiSubStringRange s, const xchar* pos);

// Count the number of lines of text by scanning the sub string for line feeds.  The last line 
// doesn't need to end with a line feed
cxUtils_API ssize_t CountNumLinesOfText(MultiSubStringRange s);

///////////////////////////////////////////////////////////////////////////////////////////////////
// HumanReadableCharInMultiSubStringRange

struct HumanReadableCharInMultiSubStringRange
{
    HumanReadableCharInMultiSubStringRange(MultiSubStringRange s) : m_s(s) {}

    MultiSubStringRange m_s;
};
 
///////////////////////////////////////////////////////////////////////////////////////////////////
// Scanning and tokenising

cxUtils_API void ScanWhiteSpace(MultiSubStringRange& s);
cxUtils_API bool ScanDigits(MultiSubStringRange& s);
cxUtils_API bool ScanIdentifier(MultiSubStringRange& s);

///////////////////////////////////////////////////////////////////////////////////////////////////
// Must be inside ceda namespace !!!

cxUtils_API xostream& operator<<(xostream& os, MultiSubStringRange s);

inline xostream& operator<<(xostream& os, const MultiSubString& s)
{
    return os << MultiSubStringRange(s);
}

cxUtils_API xostream& operator<<(xostream& os, const HumanReadableCharInMultiSubStringRange& s);

} // namespace ceda

#endif // include guard