xstring.h
// xstring.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2006
#pragma once
#ifndef Ceda_cxUtils_xstring_H
#define Ceda_cxUtils_xstring_H
#include "cxUtils.h"
#include "CedaAssert.h"
#include "IException.h"
#include "xvector.h"
#include "xchar.h"
/*
xstring - a string class that is "plugin compatible" with std::string.
Ceda needs its own string class for the following reasons
* The STL doesn't define a binary standard. Unfortunately this means that components can't
interoperate using STL classes unless they agree to use the same STL implementation.
* xstring is implemented in terms of VectorOfByte, which appears to be somewhat faster than
the STL that comes with Microsoft Visual Studio VC++ 6
* The Microsoft Visual Studio VC++ 6 uses a flawed implementation of COW (Copy On Write) that
is not thread-safe.
* For the purposes of operational transform, it is important that a string be binary
compatible with xvector<octet_t>
* This implementation of xstring shares the xvector<T> reallocation strategy (i.e. doubling in
size). By contrast, the Microsoft Visual Studio VC++ 6 string continually reallocates the
buffer as the string is grown a character at a time.
*/
namespace ceda
{
class xostream;
inline ssize_t GetStringLength(const char* s) { return strlen(s); }
#ifdef _WIN32
inline ssize_t GetStringLength(const char16* s)
{
static_assert( sizeof(wchar_t) == sizeof(char16), "wchar_t is 16 bit" );
return wcslen( (const wchar_t*) s );
}
#else
// wchar_t might be 32 bit so can't use wcslen()
inline ssize_t GetStringLength(const char16* s)
{
const char16* p = s;
while(*p) ++p;
return p-s;
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////
// basic_xstring<T>
template <typename T>
class basic_xstring : public xvector<T>
{
public:
using xvector<T>::append;
using xvector<T>::assign;
using xvector<T>::insert;
using xvector<T>::compare;
using xvector<T>::replace;
using xvector<T>::find;
using xvector<T>::rfind;
using xvector<T>::find_first_of;
using xvector<T>::find_first_not_of;
using xvector<T>::find_last_of;
using xvector<T>::find_last_not_of;
using xvector<T>::operator+=;
using xvector<T>::operator=;
using xvector<T>::npos;
using xvector<T>::size;
using xvector<T>::reserve;
using xvector<T>::data;
basic_xstring() {}
basic_xstring(const basic_xstring& r, ssize_t ri, ssize_t rn) : xvector<T>(r,ri,rn) {}
basic_xstring(const T* r, ssize_t rn) : xvector<T>(r,rn) {}
basic_xstring(const T* r) : xvector<T>(r,r+GetStringLength(r)) {}
explicit basic_xstring(ssize_t count, T x = T()) : xvector<T>(count,x) {}
template <typename It> basic_xstring(It r1, It r2) : xvector<T>(r1,r2) {}
basic_xstring(const T* r1, const T* r2) : xvector<T>(r1,r2) {}
basic_xstring(const basic_xstring& r) : xvector<T>(r) {}
basic_xstring& operator=(const T* rhs) { assign(rhs); return *this; }
// todo: inefficient
int compare(const T* p) const { return compare(p, GetStringLength(p)); }
int compare(ssize_t i, ssize_t n, const T* p) const { return compare(i,n, p, GetStringLength(p)); }
bool _Eq(const T* rhs) const { return compare(rhs) == 0; }
bool _Lt(const T* rhs) const { return compare(rhs) < 0; }
// c_str() must return a null terminated string. This is achieved by reserving space for the
// '\0', and setting the byte to zero. Note that calling c_str() more than once is not
// expensive because at most one of the calls will cause the buffer to be reallocated.
const T* c_str() const
{
ssize_t n = size();
reserve(n+1);
const T* p = data();
cxAssert(p);
const_cast<T*>(p)[n] = '\0';
return p;
}
basic_xstring substr(ssize_t offset = 0, ssize_t count = npos) const
{
xvector<T> ret = xvector<T>::substr(offset,count);
return reinterpret_cast<const basic_xstring<T>&>(ret);
}
ssize_t find(const T* p, ssize_t offset = 0) const { return find(p,p+GetStringLength(p),offset); }
ssize_t rfind(const T* p, ssize_t offset = npos) const { return rfind(p,p+GetStringLength(p),offset); }
ssize_t find_first_of(const T* s, ssize_t offset = 0) const { return xvector<T>::find_first_of(s,s+GetStringLength(s),offset); }
ssize_t find_first_not_of(const T* s, ssize_t offset = 0) const { return xvector<T>::find_first_not_of(s,s+GetStringLength(s),offset); }
ssize_t find_last_of(const T* s, ssize_t offset = npos) const { return xvector<T>::find_last_of(s,s+GetStringLength(s),offset); }
ssize_t find_last_not_of(const T* s, ssize_t offset = npos) const { return xvector<T>::find_last_not_of(s,s+GetStringLength(s),offset); }
basic_xstring& operator+=(const T *s) { xvector<T>::push_back(s, s+GetStringLength(s)); return *this; }
basic_xstring& append(const T* s) { xvector<T>::push_back(s, s+GetStringLength(s)); return *this; }
basic_xstring& assign(const T* s) { assign(s, GetStringLength(s)); return *this; }
basic_xstring& replace(ssize_t i,ssize_t n,const T* p) { replace(i,n,p,GetStringLength(p)); return *this; }
basic_xstring& insert(ssize_t i,const T *s) { insert(i,s,s+GetStringLength(s)); return *this; }
};
template<typename T> inline bool operator==(const basic_xstring<T>& x, const T* y) { return x._Eq(y); }
template<typename T> inline bool operator==(const T* y, const basic_xstring<T>& x) { return x._Eq(y); }
template<typename T> inline bool operator!=(const basic_xstring<T>& x, const T* y) { return !(x == y); }
template<typename T> inline bool operator!=(const T* y, const basic_xstring<T>& x) { return !(x == y); }
template<typename T> inline bool operator<(const basic_xstring<T>& x, const T* y) { return x.compare(y) < 0; }
template<typename T> inline bool operator>(const T* y, const basic_xstring<T>& x) { return x.compare(y) < 0; }
template<typename T> inline bool operator>(const basic_xstring<T>& x, const T* y) { return x.compare(y) > 0; }
template<typename T> inline bool operator<(const T* y, const basic_xstring<T>& x) { return x.compare(y) > 0; }
template<typename T> inline bool operator<=(const basic_xstring<T>& x, const T* y) { return x.compare(y) <= 0; }
template<typename T> inline bool operator>=(const T* y, const basic_xstring<T>& x) { return x.compare(y) <= 0; }
template<typename T> inline bool operator>=(const basic_xstring<T>& x, const T* y) { return x.compare(y) >= 0; }
template<typename T> inline bool operator<=(const T* y, const basic_xstring<T>& x) { return x.compare(y) >= 0; }
template <typename T> inline const basic_xstring<T> operator+(const basic_xstring<T>& x, const basic_xstring<T>& y)
{
xvector<T> r = (const xvector<T>&)x + (const xvector<T>&)y;
return (const basic_xstring<T>&)r;
}
template <typename T> inline const basic_xstring<T> operator+(const basic_xstring<T>& x, const T& y)
{
xvector<T> r = ((const xvector<T>&)x + y);
return (const basic_xstring<T>&)r;
}
template <typename T> inline const basic_xstring<T> operator+(const basic_xstring<T>& x, const T* y)
{
basic_xstring<T> z = x;
z.append(y);
return z;
}
template <typename T> inline const basic_xstring<T> operator+(const T* x, const basic_xstring<T>& y)
{
basic_xstring<T> z = x;
z.append(y);
return z;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// string8, string16
extern template class cxUtils_API_T xvector<char8>;
extern template class cxUtils_API_T basic_xstring<char8>;
extern template class cxUtils_API_T xvector<char16>;
extern template class cxUtils_API_T basic_xstring<char16>;
using string8 = basic_xstring<char8>;
using string16 = basic_xstring<char16>;
///////////////////////////////////////////////////////////////////////////////////////////////////
// Non ascii characters are mapped to '?'
cxUtils_API void Utf16FromAscii(string16& dst, const char8* src, ssize_t srcLen);
inline void Utf16FromAscii(string16& dst, const string8& src)
{
Utf16FromAscii(dst, src.data(), src.size());
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Note that conversions between UTF-8 and UTF-16 are lossless
// Returns false if 'src' isn't a valid UTF-8 encoding of a unicode string.
cxUtils_API bool Utf16FromUtf8(string16& dst, const char8* src, ssize_t len);
cxUtils_API bool Utf16FromUtf8(string16& dst, const char8* src);
inline bool Utf16FromUtf8(string16& dst, const string8& src) { return Utf16FromUtf8(dst,src.data(),src.size()); }
// Returns false if 'src' isn't a valid UTF-16 encoding of a unicode string.
cxUtils_API bool Utf8FromUtf16(string8& dst, const char16* src, ssize_t len);
cxUtils_API bool Utf8FromUtf16(string8& dst, const char16* src);
inline bool Utf8FromUtf16(string8& dst, const string16& src) { return Utf8FromUtf16(dst,src.data(),src.size()); }
struct StringConversionException : public IException
{
virtual void Write(xostream& os) const
{
os << "String conversion failed";
}
};
// Convert from string8 to string16
inline void Convert(string16& dst, const string8& src) { if (!Utf16FromUtf8(dst,src)) throw StringConversionException(); }
inline void Convert(string16& dst, const char8* src) { if (!Utf16FromUtf8(dst,src)) throw StringConversionException(); }
inline void Convert(string16& dst, const char8* src, ssize_t len) { if (!Utf16FromUtf8(dst,src,len)) throw StringConversionException(); }
// Convert from string16 to string8
inline void Convert(string8& dst, const string16& src) { if (!Utf8FromUtf16(dst,src)) throw StringConversionException(); }
inline void Convert(string8& dst, const char16* src) { if (!Utf8FromUtf16(dst,src)) throw StringConversionException(); }
inline void Convert(string8& dst, const char16* src, ssize_t len) { if (!Utf8FromUtf16(dst,src,len)) throw StringConversionException(); }
inline void Convert(string16& dst, const string16& src) { dst = src; }
inline void Convert(string16& dst, const char16* src) { dst = src; }
inline void Convert(string16& dst, const char16* src, ssize_t len) { dst.assign(src,len); }
inline void Convert(string8& dst, const string8& src) { dst = src; }
inline void Convert(string8& dst, const char8* src) { dst = src; }
inline void Convert(string8& dst, const char8* src, ssize_t len) { dst.assign(src,len); }
#ifdef _WIN32
inline void Convert(string8& dst, const wchar_t* src) { if (!Utf8FromUtf16(dst,(const char16*)src)) throw StringConversionException(); }
inline void Convert(string8& dst, const wchar_t* src, ssize_t len) { if (!Utf8FromUtf16(dst,(const char16*)src,len)) throw StringConversionException(); }
inline void Convert(string16& dst, const wchar_t* src) { dst = (const char16*)src; }
inline void Convert(string16& dst, const wchar_t* src, ssize_t len) { dst.assign((const char16*)src,len); }
#endif
inline const string8& AsString8(const string8& s) { return s; }
inline string8 AsString8(const string16& s) { string8 d; Convert(d,s); return d; }
inline string8 AsString8(const char8* s) { return s; }
inline string8 AsString8(const char8* s, ssize_t len) { return string8(s,len); }
inline string8 AsString8(const char16* s) { string8 d; Convert(d,s); return d; }
inline string8 AsString8(const char16* s, ssize_t len) { string8 d; Convert(d,s,len); return d; }
inline string16 AsString16(const string8& s) { string16 d; Convert(d,s); return d; }
inline const string16& AsString16(const string16& s) { return s; }
inline string16 AsString16(const char8* s) { string16 d; Convert(d,s); return d; }
inline string16 AsString16(const char8* s, ssize_t len) { string16 d; Convert(d,s,len); return d; }
inline string16 AsString16(const char16* s) { return s; }
inline string16 AsString16(const char16* s, ssize_t len) { return string16(s,len); }
inline xstring AsXstring(const char8* s) { return AsString8(s); }
inline xstring AsXstring(const char16* s) { return AsString8(s); }
inline xstring AsXstring(const char8* s, ssize_t len) { return AsString8(s,len); }
inline xstring AsXstring(const char16* s, ssize_t len) { return AsString8(s,len); }
inline xstring AsXstring(const string8& s) { return AsString8(s); }
inline xstring AsXstring(const string16& s) { return AsString8(s); }
#ifdef _WIN32
inline string8 AsString8(const wchar_t* s) { string8 d; Convert(d,s); return d; }
inline string8 AsString8(const wchar_t* s, ssize_t len) { string8 d; Convert(d,s,len); return d; }
inline string16 AsString16(const wchar_t* s) { return (const char16*)s; }
inline string16 AsString16(const wchar_t* s, ssize_t len) { return string16((const char16*)s,len); }
inline xstring AsXstring(const wchar_t* s) { return AsString8((const char16*) s); }
inline xstring AsXstring(const wchar_t* s, ssize_t len) { return AsString8((const char16*) s,len); }
#endif
// Conversions between vector of chars and strings
inline const string8& AsString8(const xvector<char8>& s) { return static_cast<const string8&>(s); }
inline string8& AsString8(xvector<char8>& s) { return static_cast<string8&>(s); }
inline const string16& AsString16(const xvector<char16>& s) { return static_cast<const string16&>(s); }
inline string16& AsString16(xvector<char16>& s) { return static_cast<string16&>(s); }
inline const xstring& AsXstring(const xvector<xchar>& s) { return static_cast<const xstring&>(s); }
inline xstring& AsXstring(xvector<xchar>& s) { return static_cast<xstring&>(s); }
///////////////////////////////////////////////////////////////////////////////////////////////////
// xstring
typedef basic_xstring<xchar> xstring;
cxUtils_API xostream& operator<<(xostream& os, const xstring& v);
} // namespace ceda
#endif // include guard