xstring.h

// xstring.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2006

#pragma once
#ifndef Ceda_cxUtils_xstring_H
#define Ceda_cxUtils_xstring_H

#include "cxUtils.h"
#include "CedaAssert.h"
#include "IException.h"
#include "xvector.h"
#include "xchar.h"

/*
xstring - a string class that is "plugin compatible" with std::string.

Ceda needs its own string class for the following reasons

*   The STL doesn't define a binary standard.  Unfortunately this means that components can't
    interoperate using STL classes unless they agree to use the same STL implementation.

*   xstring is implemented in terms of VectorOfByte, which appears to be somewhat faster than
    the STL that comes with Microsoft Visual Studio VC++ 6

*   The Microsoft Visual Studio VC++ 6 uses a flawed implementation of COW (Copy On Write) that
    is not thread-safe.

*   For the purposes of operational transform, it is important that a string be binary 
    compatible with xvector<octet_t>

*   This implementation of xstring shares the xvector<T> reallocation strategy (i.e. doubling in
    size).  By contrast, the Microsoft Visual Studio VC++ 6 string continually reallocates the
    buffer as the string is grown a character at a time.
*/

namespace ceda
{
class xostream;

inline ssize_t GetStringLength(const char* s) { return strlen(s); }

#ifdef _WIN32
    inline ssize_t GetStringLength(const char16* s) 
    { 
        static_assert( sizeof(wchar_t) == sizeof(char16), "wchar_t is 16 bit" );
        return wcslen( (const wchar_t*) s ); 
    }
#else
    // wchar_t might be 32 bit so can't use wcslen()
    inline ssize_t GetStringLength(const char16* s) 
    {
        const char16* p = s;
        while(*p) ++p;
        return p-s;
    }
#endif

///////////////////////////////////////////////////////////////////////////////////////////////////
// basic_xstring<T>

template <typename T>
class basic_xstring : public xvector<T>
{
public:
    using xvector<T>::append;
    using xvector<T>::assign;
    using xvector<T>::insert;
    using xvector<T>::compare;
    using xvector<T>::replace;
    using xvector<T>::find;
    using xvector<T>::rfind;
    using xvector<T>::find_first_of;
    using xvector<T>::find_first_not_of;
    using xvector<T>::find_last_of;
    using xvector<T>::find_last_not_of;
    using xvector<T>::operator+=;
    using xvector<T>::operator=;
    using xvector<T>::npos;
    using xvector<T>::size;
    using xvector<T>::reserve;
    using xvector<T>::data;

    basic_xstring() {}
    basic_xstring(const basic_xstring& r, ssize_t ri, ssize_t rn) : xvector<T>(r,ri,rn) {}
    basic_xstring(const T* r, ssize_t rn) : xvector<T>(r,rn) {}
    basic_xstring(const T* r) : xvector<T>(r,r+GetStringLength(r)) {}
    explicit basic_xstring(ssize_t count, T x = T()) : xvector<T>(count,x) {}
    template <typename It> basic_xstring(It r1, It r2) : xvector<T>(r1,r2) {}
    basic_xstring(const T* r1, const T* r2) : xvector<T>(r1,r2) {}
    basic_xstring(const basic_xstring& r) : xvector<T>(r) {}
    
    basic_xstring& operator=(const T* rhs) { assign(rhs); return *this; }

    // todo: inefficient
	int compare(const T* p) const { return compare(p, GetStringLength(p)); }
	int compare(ssize_t i, ssize_t n, const T* p) const { return compare(i,n, p, GetStringLength(p)); }

    bool _Eq(const T* rhs) const { return compare(rhs) == 0; }
    bool _Lt(const T* rhs) const { return compare(rhs) < 0; }

    // c_str() must return a null terminated string.  This is achieved by reserving space for the 
    // '\0', and setting the byte to zero.  Note that calling c_str() more than once is not 
    // expensive because at most one of the calls will cause the buffer to be reallocated.
    const T* c_str() const 
    { 
        ssize_t n = size();
        reserve(n+1);
        const T* p = data();
        cxAssert(p);
        const_cast<T*>(p)[n] = '\0';
        return p;
    }
    
    basic_xstring substr(ssize_t offset = 0, ssize_t count = npos) const
    {
        xvector<T> ret = xvector<T>::substr(offset,count);
        return reinterpret_cast<const basic_xstring<T>&>(ret);
    }

    ssize_t find(const T* p, ssize_t offset = 0) const { return find(p,p+GetStringLength(p),offset); }
    ssize_t rfind(const T* p, ssize_t offset = npos) const { return rfind(p,p+GetStringLength(p),offset); }

    ssize_t find_first_of(const T* s, ssize_t offset = 0) const { return xvector<T>::find_first_of(s,s+GetStringLength(s),offset); }
    ssize_t find_first_not_of(const T* s, ssize_t offset = 0) const { return xvector<T>::find_first_not_of(s,s+GetStringLength(s),offset); }
    ssize_t find_last_of(const T* s, ssize_t offset = npos) const { return xvector<T>::find_last_of(s,s+GetStringLength(s),offset); }
    ssize_t find_last_not_of(const T* s, ssize_t offset = npos) const { return xvector<T>::find_last_not_of(s,s+GetStringLength(s),offset); }

    basic_xstring& operator+=(const T *s) { xvector<T>::push_back(s, s+GetStringLength(s)); return *this; }
    basic_xstring& append(const T* s) { xvector<T>::push_back(s, s+GetStringLength(s)); return *this; }
    basic_xstring& assign(const T* s) { assign(s, GetStringLength(s)); return *this; }
    basic_xstring& replace(ssize_t i,ssize_t n,const T* p) { replace(i,n,p,GetStringLength(p)); return *this; }
    basic_xstring& insert(ssize_t i,const T *s) { insert(i,s,s+GetStringLength(s)); return *this; }
    
};

template<typename T> inline bool operator==(const basic_xstring<T>& x, const T* y) { return x._Eq(y); }
template<typename T> inline bool operator==(const T* y, const basic_xstring<T>& x) { return x._Eq(y); }

template<typename T> inline bool operator!=(const basic_xstring<T>& x, const T* y) { return !(x == y); }
template<typename T> inline bool operator!=(const T* y, const basic_xstring<T>& x) { return !(x == y); }

template<typename T> inline bool operator<(const basic_xstring<T>& x, const T* y) { return x.compare(y) < 0; }
template<typename T> inline bool operator>(const T* y, const basic_xstring<T>& x) { return x.compare(y) < 0; }

template<typename T> inline bool operator>(const basic_xstring<T>& x, const T* y) { return x.compare(y) > 0; }
template<typename T> inline bool operator<(const T* y, const basic_xstring<T>& x) { return x.compare(y) > 0; }

template<typename T> inline bool operator<=(const basic_xstring<T>& x, const T* y) { return x.compare(y) <= 0; }
template<typename T> inline bool operator>=(const T* y, const basic_xstring<T>& x) { return x.compare(y) <= 0; }

template<typename T> inline bool operator>=(const basic_xstring<T>& x, const T* y) { return x.compare(y) >= 0; }
template<typename T> inline bool operator<=(const T* y, const basic_xstring<T>& x) { return x.compare(y) >= 0; }

template <typename T> inline const basic_xstring<T> operator+(const basic_xstring<T>& x, const basic_xstring<T>& y)
{
    xvector<T> r = (const xvector<T>&)x + (const xvector<T>&)y;
    return (const basic_xstring<T>&)r;
}

template <typename T> inline const basic_xstring<T> operator+(const basic_xstring<T>& x, const T& y)
{
    xvector<T> r = ((const xvector<T>&)x + y);
    return (const basic_xstring<T>&)r;
}

template <typename T> inline const basic_xstring<T> operator+(const basic_xstring<T>& x, const T* y)
{
    basic_xstring<T> z = x;
    z.append(y);
    return z;
}

template <typename T> inline const basic_xstring<T> operator+(const T* x, const basic_xstring<T>& y)
{
    basic_xstring<T> z = x;
    z.append(y);
    return z;
}

///////////////////////////////////////////////////////////////////////////////////////////////////
// string8, string16

extern template class cxUtils_API_T xvector<char8>;
extern template class cxUtils_API_T basic_xstring<char8>;
extern template class cxUtils_API_T xvector<char16>;
extern template class cxUtils_API_T basic_xstring<char16>;

using string8 = basic_xstring<char8>;
using string16 = basic_xstring<char16>;

///////////////////////////////////////////////////////////////////////////////////////////////////

// Non ascii characters are mapped to '?'
cxUtils_API void Utf16FromAscii(string16& dst, const char8* src, ssize_t srcLen);

inline void Utf16FromAscii(string16& dst, const string8& src)
{
    Utf16FromAscii(dst, src.data(), src.size());
}

///////////////////////////////////////////////////////////////////////////////////////////////////
// Note that conversions between UTF-8 and UTF-16 are lossless

// Returns false if 'src' isn't a valid UTF-8 encoding of a unicode string.
cxUtils_API bool Utf16FromUtf8(string16& dst, const char8* src, ssize_t len);
cxUtils_API bool Utf16FromUtf8(string16& dst, const char8* src);
inline bool Utf16FromUtf8(string16& dst, const string8& src) { return Utf16FromUtf8(dst,src.data(),src.size()); }

// Returns false if 'src' isn't a valid UTF-16 encoding of a unicode string.
cxUtils_API bool Utf8FromUtf16(string8& dst, const char16* src, ssize_t len);
cxUtils_API bool Utf8FromUtf16(string8& dst, const char16* src);
inline bool Utf8FromUtf16(string8& dst, const string16& src) { return Utf8FromUtf16(dst,src.data(),src.size()); }

struct StringConversionException : public IException
{
	virtual void Write(xostream& os) const 
    { 
        os << "String conversion failed"; 
    }
};

// Convert from string8 to string16 
inline void Convert(string16& dst, const string8& src) { if (!Utf16FromUtf8(dst,src)) throw StringConversionException(); }
inline void Convert(string16& dst, const char8* src) { if (!Utf16FromUtf8(dst,src)) throw StringConversionException(); }
inline void Convert(string16& dst, const char8* src, ssize_t len) { if (!Utf16FromUtf8(dst,src,len)) throw StringConversionException(); }

// Convert from string16 to string8
inline void Convert(string8& dst, const string16& src) { if (!Utf8FromUtf16(dst,src)) throw StringConversionException(); }
inline void Convert(string8& dst, const char16* src) { if (!Utf8FromUtf16(dst,src)) throw StringConversionException(); }
inline void Convert(string8& dst, const char16* src, ssize_t len) { if (!Utf8FromUtf16(dst,src,len)) throw StringConversionException(); }

inline void Convert(string16& dst, const string16& src) { dst = src; }
inline void Convert(string16& dst, const char16* src) { dst = src; }
inline void Convert(string16& dst, const char16* src, ssize_t len) { dst.assign(src,len); }

inline void Convert(string8& dst, const string8& src) { dst = src; }
inline void Convert(string8& dst, const char8* src) { dst = src; }
inline void Convert(string8& dst, const char8* src, ssize_t len) { dst.assign(src,len); }

#ifdef _WIN32
    inline void Convert(string8& dst, const wchar_t* src) { if (!Utf8FromUtf16(dst,(const char16*)src)) throw StringConversionException(); }
    inline void Convert(string8& dst, const wchar_t* src, ssize_t len) { if (!Utf8FromUtf16(dst,(const char16*)src,len)) throw StringConversionException(); }
    inline void Convert(string16& dst, const wchar_t* src) { dst = (const char16*)src; }
    inline void Convert(string16& dst, const wchar_t* src, ssize_t len) { dst.assign((const char16*)src,len); }
#endif

inline const string8& AsString8(const string8& s) { return s; }
inline string8 AsString8(const string16& s) { string8 d; Convert(d,s); return d; }
inline string8 AsString8(const char8* s) { return s; }
inline string8 AsString8(const char8* s, ssize_t len) { return string8(s,len); }
inline string8 AsString8(const char16* s) { string8 d; Convert(d,s); return d; }
inline string8 AsString8(const char16* s, ssize_t len) { string8 d; Convert(d,s,len); return d; }

inline string16 AsString16(const string8& s) { string16 d; Convert(d,s); return d; }
inline const string16& AsString16(const string16& s) { return s; }
inline string16 AsString16(const char8* s) { string16 d; Convert(d,s); return d; }
inline string16 AsString16(const char8* s, ssize_t len) { string16 d; Convert(d,s,len); return d; }
inline string16 AsString16(const char16* s) { return s; }
inline string16 AsString16(const char16* s, ssize_t len) { return string16(s,len); }

inline xstring AsXstring(const char8* s) { return AsString8(s); }
inline xstring AsXstring(const char16* s) { return AsString8(s); }
inline xstring AsXstring(const char8* s, ssize_t len) { return AsString8(s,len); }
inline xstring AsXstring(const char16* s, ssize_t len) { return AsString8(s,len); }
inline xstring AsXstring(const string8& s) { return AsString8(s); }
inline xstring AsXstring(const string16& s) { return AsString8(s); }

#ifdef _WIN32
    inline string8 AsString8(const wchar_t* s) { string8 d; Convert(d,s); return d; }
    inline string8 AsString8(const wchar_t* s, ssize_t len) { string8 d; Convert(d,s,len); return d; }

    inline string16 AsString16(const wchar_t* s) { return (const char16*)s; }
    inline string16 AsString16(const wchar_t* s, ssize_t len) { return string16((const char16*)s,len); }

    inline xstring AsXstring(const wchar_t* s) { return AsString8((const char16*) s); }
    inline xstring AsXstring(const wchar_t* s, ssize_t len) { return AsString8((const char16*) s,len); }
#endif

// Conversions between vector of chars and strings
inline const string8& AsString8(const xvector<char8>& s) { return static_cast<const string8&>(s); }
inline string8& AsString8(xvector<char8>& s) { return static_cast<string8&>(s); }

inline const string16& AsString16(const xvector<char16>& s) { return static_cast<const string16&>(s); }
inline string16& AsString16(xvector<char16>& s) { return static_cast<string16&>(s); }

inline const xstring& AsXstring(const xvector<xchar>& s) { return static_cast<const xstring&>(s); }
inline xstring& AsXstring(xvector<xchar>& s) { return static_cast<xstring&>(s); }

///////////////////////////////////////////////////////////////////////////////////////////////////
// xstring

typedef basic_xstring<xchar> xstring;

cxUtils_API xostream& operator<<(xostream& os, const xstring& v);

} // namespace ceda

#endif // include guard