The following example illustrates the poor performance which can result when using C++ virtual inheritance.
This is with Microsoft Visual Studio 16.11.7 (Nov 2021) release x64 build with /O2 /GL switches.
struct Iw
{
virtual void inc1() = 0;
virtual void inc5() = 0;
};
struct Ix : public virtual Iw
{
virtual void inc2() = 0;
};
struct Iy : public virtual Iw
{
virtual void inc3() = 0;
};
struct Iz : public virtual Ix, public virtual Iy
{
virtual void inc4() = 0;
};
// size = 88 bytes!
struct C : public Iz
{
C() : x(0) {}
virtual void inc1() { x += 1; }
virtual void inc2() { inc1(); inc1(); }
virtual void inc3() { inc2(); inc1(); }
virtual void inc4() { inc3(); inc1(); }
virtual void inc5() { inc4(); inc1(); }
int x;
};
void run()
{
C c;
Iw* p = &c;
// Rate is 52 MHz on Ryzen9 3900X Windows-x64
for (int i=0 ; i < 1000000 ; ++i)
{
p->inc5();
}
}
The inefficiency is apparent from the machine code which is executed for the loop.
00007FF68ECF0A36 mov rax,qword ptr [r15]
00007FF68ECF0A39 mov rcx,r15
00007FF68ECF0A3C call qword ptr [rax+8]
00007FF68ECA1253 jmp InterfaceTimingTests::Cz2::inc5 (07FF68ECF1FD4h)
00007FF68ECF1FD4 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1FD8 sub rcx,rax
00007FF68ECF1FDB jmp InterfaceTimingTests::Cz2::inc5 (07FF68ECA2B8Ah)
00007FF68ECA2B8A jmp InterfaceTimingTests::Cz2::inc5 (07FF68ECF1FF0h)
00007FF68ECF1FF0 push rbx
00007FF68ECF1FF2 sub rsp,20h
00007FF68ECF1FF6 mov rax,qword ptr [rcx-20h]
00007FF68ECF1FFA mov rbx,rcx
00007FF68ECF1FFD add rcx,0FFFFFFFFFFFFFFE0h
00007FF68ECF2001 call qword ptr [rax]
00007FF68ECA180C jmp InterfaceTimingTests::Cz2::inc4 (07FF68ECF1F70h)
00007FF68ECF1F70 push rbx
00007FF68ECF1F72 sub rsp,20h
00007FF68ECF1F76 mov rax,qword ptr [rcx+8]
00007FF68ECF1F7A mov rbx,rcx
00007FF68ECF1F7D add rcx,8
00007FF68ECF1F81 movsxd rdx,dword ptr [rax+0Ch]
00007FF68ECF1F85 add rcx,rdx
00007FF68ECF1F88 mov rax,qword ptr [rcx]
00007FF68ECF1F8B call qword ptr [rax]
00007FF68ECA32D3 jmp InterfaceTimingTests::Cz2::inc3 (07FF68ECF1EF4h)
00007FF68ECF1EF4 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1EF8 sub rcx,rax
00007FF68ECF1EFB jmp InterfaceTimingTests::Cz2::inc3 (07FF68ECA4868h)
00007FF68ECA4868 jmp InterfaceTimingTests::Cz2::inc3 (07FF68ECF1F10h)
00007FF68ECF1F10 push rbx
00007FF68ECF1F12 sub rsp,20h
00007FF68ECF1F16 mov rax,qword ptr [rcx-40h]
00007FF68ECF1F1A mov rbx,rcx
00007FF68ECF1F1D add rcx,0FFFFFFFFFFFFFFC0h
00007FF68ECF1F21 movsxd rdx,dword ptr [rax+8]
00007FF68ECF1F25 add rcx,rdx
00007FF68ECF1F28 mov rax,qword ptr [rcx]
00007FF68ECF1F2B call qword ptr [rax]
00007FF68ECA2B3F jmp InterfaceTimingTests::Cz2::inc2 (07FF68ECF1E84h)
00007FF68ECF1E84 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1E88 sub rcx,rax
00007FF68ECF1E8B jmp InterfaceTimingTests::Cz2::inc2 (07FF68ECA31D9h)
00007FF68ECA31D9 jmp InterfaceTimingTests::Cz2::inc2 (07FF68ECF1EA0h)
00007FF68ECF1EA0 push rbx
00007FF68ECF1EA2 sub rsp,20h
00007FF68ECF1EA6 mov rax,qword ptr [rcx-28h]
00007FF68ECF1EAA mov rbx,rcx
00007FF68ECF1EAD add rcx,0FFFFFFFFFFFFFFD8h
00007FF68ECF1EB1 movsxd rdx,dword ptr [rax+4]
00007FF68ECF1EB5 add rcx,rdx
00007FF68ECF1EB8 mov rax,qword ptr [rcx]
00007FF68ECF1EBB call qword ptr [rax]
00007FF68ECA2914 jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E54h)
00007FF68ECF1E54 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1E58 sub rcx,rax
00007FF68ECF1E5B jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECA1E6Fh)
00007FF68ECA1E6F jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E70h)
00007FF68ECF1E70 inc dword ptr [rcx-10h]
00007FF68ECF1E73 ret
00007FF68ECF1EBD mov rax,qword ptr [rbx-28h]
00007FF68ECF1EC1 lea rcx,[rbx-28h]
00007FF68ECF1EC5 movsxd rdx,dword ptr [rax+4]
00007FF68ECF1EC9 add rcx,rdx
00007FF68ECF1ECC mov rax,qword ptr [rcx]
00007FF68ECF1ECF add rsp,20h
00007FF68ECF1ED3 pop rbx
00007FF68ECF1ED4 jmp qword ptr [rax]
00007FF68ECA2914 jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E54h)
00007FF68ECF1E54 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1E58 sub rcx,rax
00007FF68ECF1E5B jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECA1E6Fh)
00007FF68ECA1E6F jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E70h)
00007FF68ECF1E70 inc dword ptr [rcx-10h]
00007FF68ECF1E73 ret
00007FF68ECF1F2D mov rax,qword ptr [rbx-40h]
00007FF68ECF1F31 lea rcx,[rbx-40h]
00007FF68ECF1F35 movsxd rdx,dword ptr [rax+4]
00007FF68ECF1F39 add rcx,rdx
00007FF68ECF1F3C mov rax,qword ptr [rcx]
00007FF68ECF1F3F add rsp,20h
00007FF68ECF1F43 pop rbx
00007FF68ECF1F44 jmp qword ptr [rax]
00007FF68ECA2914 jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E54h)
00007FF68ECF1E54 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1E58 sub rcx,rax
00007FF68ECF1E5B jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECA1E6Fh)
00007FF68ECA1E6F jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E70h)
00007FF68ECF1E70 inc dword ptr [rcx-10h]
00007FF68ECF1E73 ret
00007FF68ECF1F8D mov rax,qword ptr [rbx+8]
00007FF68ECF1F91 lea rcx,[rbx+8]
00007FF68ECF1F95 movsxd rdx,dword ptr [rax+4]
00007FF68ECF1F99 add rcx,rdx
00007FF68ECF1F9C mov rax,qword ptr [rcx]
00007FF68ECF1F9F add rsp,20h
00007FF68ECF1FA3 pop rbx
00007FF68ECF1FA4 jmp qword ptr [rax]
00007FF68ECA2914 jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E54h)
00007FF68ECF1E54 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1E58 sub rcx,rax
00007FF68ECF1E5B jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECA1E6Fh)
00007FF68ECA1E6F jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E70h)
00007FF68ECF1E70 inc dword ptr [rcx-10h]
00007FF68ECF1E73 ret
00007FF68ECF2003 mov rax,qword ptr [rbx-18h]
00007FF68ECF2007 lea rcx,[rbx-18h]
00007FF68ECF200B movsxd rdx,dword ptr [rax+4]
00007FF68ECF200F add rcx,rdx
00007FF68ECF2012 mov rax,qword ptr [rcx]
00007FF68ECF2015 add rsp,20h
00007FF68ECF2019 pop rbx
00007FF68ECF201A jmp qword ptr [rax]
00007FF68ECA2914 jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E54h)
00007FF68ECF1E54 movsxd rax,dword ptr [rcx-4]
00007FF68ECF1E58 sub rcx,rax
00007FF68ECF1E5B jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECA1E6Fh)
00007FF68ECA1E6F jmp InterfaceTimingTests::Cz2::inc1 (07FF68ECF1E70h)
00007FF68ECF1E70 inc dword ptr [rcx-10h]
00007FF68ECF1E73 ret
00007FF68ECF0A3F sub rsi,1
00007FF68ECF0A43 jne InterfaceTimingTests::Run+916h (07FF68ECF0A36h)
Consider the same example, except that we now use $interfaces
$interface+ Iw
{
void inc1();
void inc5();
};
$interface+ Ix : Iw
{
void inc2();
};
$interface+ Iy : Iw
{
void inc3();
};
$interface+ Iz : Ix, Iy
{
void inc4();
};
// Size is 4 bytes
struct C
{
C() : x(0) {}
void inc1() { x += 1; }
void inc2() { inc1(); inc1(); }
void inc3() { inc2(); inc1(); }
void inc4() { inc3(); inc1(); }
void inc5() { inc4(); inc1(); }
int x;
};
void run()
{
C c;
ptr<Iw> p = &c;
// Rate is 4.4 GHz on Ryzen9 3900X Windows-x64
for (int i=0 ; i < 1000000 ; ++i)
{
p->inc5();
}
}
Note that the size of C is now 4 bytes (previously was 88 bytes).
Also since the struct C doesn't have any virtual functions or virtual inheritance, the compiler is much more likely to inline aggressively. In this case the loop executes in 1/85 of the time, and the following machine code executes in the loop:
again: add eax,5
sub rcx,1
jne again
The following is the generated C++ code for the interfaces Iw, Ix, Iy and Iz
struct Iw
{
struct FnTable
{
void (*inc1)(void* _self);
};
Iw(void* _self, const FnTable* table) : m_self(_self), m_table(table) {}
void AssignFrom(Iw& _rhs) { m_self = _rhs.m_self; m_table = _rhs.m_table; }
void AssignFrom(Iw const& _rhs) const { const_cast<Iw*>(this)->m_self = _rhs.m_self; const_cast<Iw*>(this)->m_table = _rhs.m_table; }
template<typename T> struct Stubs
{
static void inc1(void* _self) { ((T*)_self)->inc1(); }
static const FnTable& GetTable()
{
static const FnTable t =
{
&inc1
};
return t;
}
};
template<typename T> void CoerceFrom(T* _rhs) { m_self = _rhs; m_table = &Stubs<T>::GetTable(); }
template<typename T> void CoerceFrom(const T* _rhs) const { const_cast<Iw*>(this)->m_self = const_cast<T*>(_rhs); const_cast<Iw*>(this)->m_table = &Stubs<T>::GetTable(); }
void inc1() { m_table->inc1(m_self); }
union
{
void* m_self;
};
FnTable const* m_table;
};
struct Ix
{
struct FnTable
{
Iw::FnTable _Iw;
void (*inc2)(void* _self);
};
Ix(void* _self, const FnTable* table) : m_self(_self), m_table(table) {}
void AssignFrom(Ix& _rhs) { m_self = _rhs.m_self; m_table = _rhs.m_table; }
void AssignFrom(Ix const& _rhs) const { const_cast<Ix*>(this)->m_self = _rhs.m_self; const_cast<Ix*>(this)->m_table = _rhs.m_table; }
template<typename T> struct Stubs
{
static void inc2(void* _self) { ((T*)_self)->inc2(); }
static const FnTable& GetTable()
{
static const FnTable t =
{
{
&Iw::Stubs<T>::inc1,
},
&inc2
};
return t;
}
};
template<typename T> void CoerceFrom(T* _rhs) { m_self = _rhs; m_table = &Stubs<T>::GetTable(); }
template<typename T> void CoerceFrom(const T* _rhs) const { const_cast<Ix*>(this)->m_self = const_cast<T*>(_rhs); const_cast<Ix*>(this)->m_table = &Stubs<T>::GetTable(); }
void inc1() { m_table->_Iw.inc1(m_self); }
void inc2() { m_table->inc2(m_self); }
operator Iw() const { return Iw(m_self, &m_table->_Iw); }
union
{
void* m_self;
};
FnTable const* m_table;
};
struct Iy
{
struct FnTable
{
Iw::FnTable _Iw;
void (*inc3)(void* _self);
};
Iy(void* _self, const FnTable* table) : m_self(_self), m_table(table) {}
void AssignFrom(Iy& _rhs) { m_self = _rhs.m_self; m_table = _rhs.m_table; }
void AssignFrom(Iy const& _rhs) const { const_cast<Iy*>(this)->m_self = _rhs.m_self; const_cast<Iy*>(this)->m_table = _rhs.m_table; }
template<typename T> struct Stubs
{
static void inc3(void* _self) { ((T*)_self)->inc3(); }
static const FnTable& GetTable()
{
static const FnTable t =
{
{
&Iw::Stubs<T>::inc1,
},
&inc3
};
return t;
}
};
template<typename T> void CoerceFrom(T* _rhs) { m_self = _rhs; m_table = &Stubs<T>::GetTable(); }
template<typename T> void CoerceFrom(const T* _rhs) const { const_cast<Iy*>(this)->m_self = const_cast<T*>(_rhs); const_cast<Iy*>(this)->m_table = &Stubs<T>::GetTable(); }
void inc1() { m_table->_Iw.inc1(m_self); }
void inc3() { m_table->inc3(m_self); }
operator Iw() const { return Iw(m_self, &m_table->_Iw); }
union
{
void* m_self;
};
FnTable const* m_table;
};
struct Iz
{
struct FnTable
{
Ix::FnTable _Ix;
Iy::FnTable _Iy;
void (*inc4)(void* _self);
};
Iz(void* _self, const FnTable* table) : m_self(_self), m_table(table) {}
void AssignFrom(Iz& _rhs) { m_self = _rhs.m_self; m_table = _rhs.m_table; }
void AssignFrom(Iz const& _rhs) const { const_cast<Iz*>(this)->m_self = _rhs.m_self; const_cast<Iz*>(this)->m_table = _rhs.m_table; }
template<typename T> struct Stubs
{
static void inc4(void* _self) { ((T*)_self)->inc4(); }
static const FnTable& GetTable()
{
static const FnTable t =
{
{
{
&Iw::Stubs<T>::inc1,
},
&Ix::Stubs<T>::inc2,
},
{
{
&Iw::Stubs<T>::inc1,
},
&Iy::Stubs<T>::inc3,
},
&inc4
};
return t;
}
};
template<typename T> void CoerceFrom(T* _rhs) { m_self = _rhs; m_table = &Stubs<T>::GetTable(); }
template<typename T> void CoerceFrom(const T* _rhs) const { const_cast<Iz*>(this)->m_self = const_cast<T*>(_rhs); const_cast<Iz*>(this)->m_table = &Stubs<T>::GetTable(); }
void inc1() { m_table->_Ix._Iw.inc1(m_self); }
void inc2() { m_table->_Ix.inc2(m_self); }
void inc3() { m_table->_Iy.inc3(m_self); }
void inc4() { m_table->inc4(m_self); }
operator Iw() const { return Iw(m_self, &m_table->_Ix._Iw); }
operator Ix() const { return Ix(m_self, &m_table->_Ix); }
operator Iy() const { return Iy(m_self, &m_table->_Iy); }
union
{
void* m_self;
};
FnTable const* m_table;
};