Bounds.h
// Bounds.h
//
// Author David Barrett-Lennard
// (C)opyright Cedanet Pty Ltd 2022
#pragma once
#ifndef Ceda_cxUtils_Bounds_H
#define Ceda_cxUtils_Bounds_H
#include "cxUtils.h"
#include "CedaAssert.h"
namespace ceda
{
///////////////////////////////////////////////////////////////////////////////////////////////////
// Reinterpret casts on pointers that leave constness intact
/*
Let T be a type that isn't const qualified. E.g. T = Circle (not const Circle)
Then given any pointer p, cast_ptr<T>(p) converts it either to a T* or a const T*
depending on whether p points at a const object.
This is similar to reinterpret_cast<T*> or reinterpret_cast<const T*> including the fact
that constness cannot be cast away. The important difference is that you always use the
form cast_ptr<T> and the constness is passed through automatically. By contrast, with
reinterpret_cast you must either use reinterpret_cast<T*> or reinterpret_cast<const T*>
depending on the situation.
*/
template <typename T>
inline T* cast_ptr(void* p)
{
return (T*) p;
}
template <typename T>
inline const T* cast_ptr(const void* p)
{
return (const T*) p;
}
// offset_ptr is similar to cast_ptr<T> but also allows for an offset in bytes
template <typename T>
inline T* offset_ptr(void* p, ssize_t offset)
{
return (T*) ((octet_t*) p + offset);
}
template <typename T>
inline const T* offset_ptr(const void* p, ssize_t offset)
{
return (const T*) ((const octet_t*) p + offset);
}
/*
// Apply an offset to a given pointer, regardless of its type
template <typename T>
T* OffsetPointerInBytes(T* p, ssize_t offset)
{
return (T*) ((octet_t*)p + offset);
}
*/
// Apply an offset to a given pointer, regardless of its type
template <typename T>
inline void ApplyStride(T*& p, ssize_t offset)
{
(octet_t*&)p += offset;
}
/*
We have an array A with n elements with finite values: A[0] ... A[n-1]. This is sorted meaning
A[i-1] <= A[i]. Note that repeated values are possible.
We use the terminology of lower and upper bounds used by the standard library
Lower bound
-----------
Returns an iterator pointing to the first element in the sorted range [first,last) which does
not compare less than value.
not (A[i] < v)
i.e. v <= A[i]
Upper bound
-----------
Returns an iterator pointing to the first element in the sorted range [first,last) which
compares greater than value.
A[i] > v
i.e. v < A[i]
There are actually 4 distinct functions
---------------------------------------
Let v = 1. We can show each option:
(reverse_upper_bound) (reverse_lower_bound)
right most right most
A[i] < v A[i] <= v
| |
A[] = 0 0 0 1 1 1 1 1 1 2 2 2
| |
left most left most
A[i] >= v A[i] > v
(STL lower_bound) (STL upper_bound)
Proposed naming convention (that complies with the standard library)
lower_bound
upper_bound
reverse_upper_bound = upper_bound of reversed list associated with swapping < and >
= lower_bound - 1
reverse_lower_bound = lower_bound of reversed list associated wwith swapping < and >
= upper_bound - 1
Given v1,v2 we are interested in finding the largest interval [i,j) such that for each k in
[i,j), A[k] is in [v1,v2).
This interval corresponds to
[i,j) = [ lower_bound(v1), reverse_upper_bound(v2) ]
= [ lower_bound(v1), lower_bound(v2) )
*/
// Returns smallest index in [0,n) satisfying A[i] >= v, or else n
template <typename T>
ssize_t LinearLowerBound(const T* A, ssize_t s, ssize_t n, const T* v)
{
ssize_t i;
for (i=0 ; i < n ; ++i)
{
if (!(*A < *v)) break;
ApplyStride(A,s);
}
return i;
}
// Returns smallest index in [0,n) satisfying A[i] > v, or else n
template <typename T>
ssize_t LinearUpperBound(const T* A, ssize_t s, ssize_t n, const T* v)
{
ssize_t i;
for (i=0 ; i < n ; ++i)
{
if (*v < *A) break;
ApplyStride(A,s);
}
return i;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
// Binary versions
/*
Optmised binary search that assumes there are no repeats
*/
template <typename T>
ssize_t __stdcall BinaryLowerBoundNoRepeats(const T* A, ssize_t s, ssize_t n, const T* v)
{
/*
Use binary search to find the insertion position. We have keys K(0), .... , K(n-1)
Let K be the key to be inserted
For convenience, define K(-1) = -inf, and K(n) = +inf.
Claim: Each assignment to hi makes it strictly decrease
Proof: hi can only be replaced by hi' = mid-1 where mid = floor( (lo+hi) / 2)
mid = floor( (lo+hi) / 2) => mid <= (lo+hi)/2
=> 2*mid <= lo + hi
=> 2*mid <= 2*hi (because lo <= hi in while condition)
=> mid <= hi
=> mid-1 < hi
=> hi' < hi
Claim: Each assignment to lo makes it strictly increase
Proof: lo can only be replaced by lo' = mid+1 where mid = floor( (lo+hi) / 2)
mid = floor( (lo+hi) / 2) => mid >= floor( (lo+lo)/2 ) (because lo <= hi)
=> mid >= lo
=> mid+1 > lo
=> lo' > lo
Claim: An invariant of the loop is hi < n
Proof: Initially hi = n-1 < n. At each iteration hi cannot increase
Claim: At each iteration mid = floor( (lo+hi)/2 ) satisfies mid < n
Proof: From a proof above we see that mid <= hi. Now hi < n therefore mid < n
Claim: An invariant of the binary search is : K is in [ K(lo-1), K(hi+1) )
Proof: (by induction)
* Initially lo = 0, hi = n-1.
So [ K(lo-1), K(hi+1) ) = [ K(-1), K(n) ) = [-inf, +inf) which must contain K
* At each iteration mid = floor( (lo+hi) / 2).
Therefore lo <= mid <= hi
Therefore lo-1 < mid < hi+1
1. if K > K(mid) then lo becomes lo' = mid+1.
lo-1 < mid < hi+1 => lo-1 < lo'-1 < hi+1. Hence the interval strictly decreases
in size but remains non-empty.
K > K(mid) => K > K(lo'-1) so the invariant remains true
2. if K < K(mid) then hi becomes hi' = mid-1
lo-1 < mid < hi+1 => lo-1 < hi'+1 < hi+1. Hence the interval strictly decreases
in size but remains non-empty.
K < K(mid) => K < K(hi'+1) so the invariant remains true
Claim: The loop eventually exits with lo = hi+1
Proof: At each iteration either lo strictly increases or else hi strictly decreases,
therefore an infinite loop is not possible.
The invariant implies lo-1 < hi+1 => lo < hi+2
The iteration exits with lo > hi.
So we have hi < lo < hi+2
This implies lo = h1+1.
*/
ssize_t insertPos;
{
ssize_t lo = 0;
ssize_t hi = n-1;
while (lo <= hi)
{
const ssize_t mid = (lo + hi)/2;
cxAssert(0 <= mid && mid < n);
const T& pmid = *offset_ptr<T>(A, s*mid);
if (*v < pmid)
{
hi = mid - 1;
}
else
{
// By assumption of no repeats
cxAssert(pmid < *v);
lo = mid + 1;
}
}
cxAssert(lo == hi+1);
// Correct insertion position is at lo. Note that lo = n is possible!
insertPos = lo;
}
cxAssert(0 <= insertPos && insertPos <= n);
return insertPos;
}
// Assumes A[] consists of monotone increasing values
// Returns smallest index in [0,n) satisfying A[i] >= v, or else n
template <typename T>
ssize_t __stdcall BinaryLowerBound(const T* A, ssize_t s, ssize_t n, const T* v)
{
/*
For convenience in the following proofs let's define
A[-1] = -infinity
A[n] = +infinity
Claim: At each iteration mid = floor((lo+hi)/2) must be in the range [lo,hi)
Proof: By the condition in the while loop we know that lo < hi.
Clearly mid >= lo, and mid <= hi.
Suppose mid = hi. Then hi <= (lo+hi)/2. i.e. 2hi <= lo+hi. i.e. hi <= lo.
This contradicts lo < hi.
Claim: An invariant of the binary search is v is in the half open interval
( A[lo-1], A[hi] ]
Proof (by induction)
1. This invariant is met with the initial settings of lo=0 and hi=n,
because v in ( -infinity, +infinity ] = ( A[lo-1], A[hi] ]
2. At each iteration lo < hi therefore mid = floor( (lo+hi)/2 ) must in in the range
[lo,hi).
a. If v > A[mid] then lo becomes mid+1, and A[lo-1] = A[mid+1-1] = A[mid].
Therefore after changing lo we still have v > A[lo-1]. i.e. the invariant
remains true
b. Otherwise if v <= A[mid] then hi is assigned to mid. Therefore v <= A[hi]
remains true.
Claim : The binary search exits with lo = hi
Proof :
Initially lo = 0 and hi = n. Now 0 <= n therefore lo <= hi
At each iteration mid is in the range [lo,hi). If lo is set to mid+1 then lo <= hi.
Otherwise if hi is set to mid then lo <= mid. Therefore it is not possible for the loop
to exit with lo > hi. It also clearly can't exit with hi < lo. Therefore we deduce
that it must exit with lo = hi.
It only remains to show that it doesn't get stuck in an infinite loop. This follows
because each iteration always increases lo or decreases hi.
*/
// loop invariant : ( A[lo-1], A[hi] ] contains v, where A[-1] = -infinity and A[n] = +infinity
// finishes with lo = hi, and hi is our lower bound
ssize_t lo = 0;
ssize_t hi = n;
while (lo < hi)
{
ssize_t mid = (lo + hi)/2;
cxAssert(0 <= mid && mid < n);
if (*offset_ptr<T>(A, s*mid) < *v) lo = mid + 1;
else hi = mid;
}
cxAssert(lo == hi);
cxAssert(0 <= hi && hi <= n);
return hi;
}
// Assumes A[] consists of monotone increasing values
// Returns smallest index in [0,n) satisfying A[i] > v, or else n
template <typename T>
ssize_t __stdcall BinaryUpperBound(const T* A, ssize_t s, ssize_t n, const T* v)
{
// loop invariant : [ A[lo-1], A[hi] ) contains v, where A[-1] = -infinity and A[n] = +infinity
// finishes with lo = hi, and hi is our upper bound
ssize_t lo = 0;
ssize_t hi = n;
while (lo < hi)
{
ssize_t mid = (lo + hi)/2;
cxAssert(0 <= mid && mid < n);
if (*v < *offset_ptr<T>(A, s*mid)) hi = mid;
else lo = mid + 1;
}
cxAssert(lo == hi);
cxAssert(0 <= hi && hi <= n);
return hi;
}
} // namespace ceda
#endif // include guard