498 lines
14 KiB
Go
Raw Permalink Normal View History

2021-12-04 16:42:11 +00:00
package graviton
import "io"
import "math"
import "encoding/binary"
import "golang.org/x/xerrors"
// TODO optimize these structures for less RAM/DISK
type inner struct {
hash []byte
hash_backer [HASHSIZE]byte
bucket_name []byte // only valid if bit is zero
fpos, findex uint32 // 0 values are invalid
left_fpos, left_findex uint32
right_fpos, right_findex uint32
left, right node
version_previous uint64 // previous version
version_current uint64 // currentversion
dirty, loaded_partial bool
bit uint8
}
func newInner(bit uint8) *inner {
in := &inner{
bit: bit,
dirty: true, // new nodes are dirty by default
}
in.hash = in.hash_backer[:0]
return in
}
func (in *inner) isDirty() bool {
return in.dirty
}
func (in *inner) isEmpty() bool {
return in.left == nil && in.right == nil
}
func (in *inner) lhash(store *Store) ([]byte, error) {
if in.left != nil {
return in.left.Hash(store)
}
return zerosHash[:], nil
}
func (in *inner) rhash(store *Store) ([]byte, error) {
if in.right != nil {
return in.right.Hash(store)
}
return zerosHash[:], nil
}
func (in *inner) load_partial(store *Store) error {
if in.loaded_partial { // if inner is loaded partially, load it fully now
return in.loadinnerfromstore(store)
}
return nil
}
func (in *inner) Hash(store *Store) ([]byte, error) {
if in.loaded_partial { // if leaf is loaded partially, load it fully now
if err := in.loadinnerfromstore(store); err != nil {
return nil, err
}
}
if len(in.hash) > 0 {
return in.hash, nil
}
var buf [2*HASHSIZE_BYTES + 1]byte
buf[0] = innerNODE
var lhash, rhash []byte
var err error
if lhash, err = in.lhash(store); err == nil {
copy(buf[1:], lhash)
if rhash, err = in.rhash(store); err == nil {
copy(buf[1+HASHSIZE_BYTES:], rhash)
hash := sum(buf[:])
in.hash = append(in.hash[:0], hash[:]...)
return in.hash, nil
}
}
return nil, err
}
func (in *inner) Position() (uint32, uint32) {
return in.findex, in.fpos
}
// all puts must be checked with deduplication and skipped if duplicate
func (in *inner) Insert(store *Store, nodes ...*leaf) error {
if err := in.load_partial(store); err != nil { // if inner node is loaded partially, load it fully now
return err
}
in.dirty = true // mark node as dirty
in.hash = in.hash[:0] // cleanup old hash
for _, n := range nodes {
if err := in.insert(store, n); err != nil {
return err
}
}
return nil
}
// insert a node recursively till it gets inserted at the correct position
func (in *inner) insert(store *Store, n *leaf) error {
if isBitSet(n.keyhash[:], uint(in.bit)) {
if in.right == nil { // if right node is dead end, we are done
in.right = n
return nil
}
switch tmp := in.right.(type) { // if right node is not dead end
case *inner: // if its inner node, recursively insert the node
return tmp.Insert(store, n)
case *leaf: // below case inserts or overwrites existing value, which dropping chains of long length
// TODO, since the leaf is already stored, we just need the new inner nodes and thus change only the pointer
// above optimization will be worthy enough for the slight complexity it creates
// but it is todo
if tmp.loaded_partial { // if leaf is loaded partially, load it fully now
if err := tmp.loadfullleaffromstore(store); err != nil {
return err
}
}
if (tmp.keyhash[0] == n.keyhash[0] && tmp.keyhash == n.keyhash) || in.bit == lastBit { // if its last node, we are overwriting data, so do it, old versions will be accessible using old roots
return tmp.Put(store, n.keyhash, n.value)
}
in.right = newInner(in.bit + 1) // otherwise we have enough slack, insert the node, by creating new inner node,
return in.right.(*inner).Insert(store, tmp, n)
// default: panic("unknown node type")
}
}
//if in.left == nil { }loadfullleaffromstore
switch tmp := in.left.(type) { // if right node is not dead end
case *inner: // if its inner node, recursively insert the node
return tmp.Insert(store, n)
case *leaf: // below case inserts or overwrites existing value, which dropping chains of long length
if tmp.loaded_partial { // if leaf is loaded partially, load it fully now
if err := tmp.loadfullleaffromstore(store); err != nil {
return err
}
}
if (tmp.keyhash[0] == n.keyhash[0] && tmp.keyhash == n.keyhash) || in.bit == lastBit { // if its last node, we are overwriting data, so do it, old versions will be accessible using old roots
return tmp.Put(store, n.keyhash, n.value)
}
in.left = newInner(in.bit + 1) // otherwise we have enough slack, insert the node
return in.left.(*inner).Insert(store, tmp, n)
default:
in.left = n // if left node is dead end, we are done, this is nil case
return nil
//default: panic("unknown node type")
}
}
func (in *inner) Get(store *Store, keyhash [HASHSIZE]byte) ([]byte, error) {
if err := in.load_partial(store); err != nil { // if inner node is loaded partially, load it fully now
return nil, err
}
if isBitSet(keyhash[:], uint(in.bit)) {
if in.right == nil {
return nil, xerrors.Errorf("%w: right dead end at %d. keyhash %x", ErrNotFound, in.bit, keyhash)
}
// we need to fut
return in.right.Get(store, keyhash)
}
if in.left == nil {
return nil, xerrors.Errorf("%w: left dead end at %d. keyhash %x", ErrNotFound, in.bit, keyhash)
}
return in.left.Get(store, keyhash)
}
// leafs return nil,false, inner returns nil, false if both children are present or absent, if single child is present, it is returned
// nodes can only be collapsed, if it's an end leaf node, if the chain hangs lower, keep it hanging
func isOnlyChildleaf(n node) (node, bool) {
switch v := n.(type) { // draw left branch
case nil:
return nil, false
case *inner:
if (v.left != nil && v.right != nil) || (v.left == nil && v.right == nil) {
return nil, false
}
if v.left != nil {
if getNodeType(v.left) == leafNODE {
return v.left, true
}
return nil, false
} else {
if getNodeType(v.right) == leafNODE {
return v.right, true
}
return nil, false
}
case *leaf:
return nil, false
default:
panic("unknown node type")
}
}
// todo we need to take care to prune single branches to achieve same root hash insertion/deletion
// the returns are in this order empty, changed, err
func (in *inner) Delete(store *Store, keyhash [HASHSIZE]byte) (bool, bool, error) {
if err := in.load_partial(store); err != nil { // if inner node is loaded partially, load it fully now
return false, false, err
}
if isBitSet(keyhash[:], uint(in.bit)) {
if in.right == nil {
return false, false, nil
}
empty, changed, err := in.right.Delete(store, keyhash)
if err != nil {
return false, false, err
}
if changed {
in.dirty = true
in.hash = in.hash[:0]
}
if empty {
in.right = nil
return in.isEmpty(), changed, nil
}
if n, single := isOnlyChildleaf(in.right); single {
in.right = n
return false, changed, nil
}
return false, changed, nil
}
if in.left == nil {
return false, false, nil
}
empty, changed, err := in.left.Delete(store, keyhash)
if err != nil {
return false, false, err
}
if changed {
in.dirty = true
in.hash = in.hash[:0]
}
if empty {
in.left = nil
return in.isEmpty(), changed, nil
}
if n, single := isOnlyChildleaf(in.left); single {
in.left = n
return false, changed, nil
}
return false, changed, nil
}
func (in *inner) loadinnerfromstore(store *Store) error { // loading leaf from store
if in.findex <= 0 && in.fpos <= 0 {
return xerrors.Errorf("Invalid findex %d fpos %d", in.findex, in.fpos)
}
var buf [MINBLOCK]byte
read_count, err := store.read(in.findex, in.fpos, buf[:]) // atleast children hashes will be available in this read
if err != nil && !xerrors.Is(err, io.EOF) {
return err
}
err = in.Unmarshal(buf[:read_count])
in.loaded_partial = false
return err
}
func (in *inner) Prove(store *Store, keyhash [HASHSIZE]byte, proof *Proof) error {
var err error
if err = in.load_partial(store); err == nil { // if inner node is loaded partially, load it fully now
proof.version = 1
if isBitSet(keyhash[:], uint(in.bit)) {
var lhash []byte
if lhash, err = in.lhash(store); err == nil {
proof.addTrace(lhash)
if in.right != nil {
return in.right.Prove(store, keyhash, proof)
}
proof.addDeadend()
//return nil
}
return err
}
}
var rhash []byte
if rhash, err = in.rhash(store); err == nil {
proof.addTrace(rhash)
if in.left != nil {
return in.left.Prove(store, keyhash, proof)
}
proof.addDeadend()
}
return err
}
// minimum size is 3 bytes
func (in *inner) MarshalTo(store *Store, buf []byte, bucket string) (int, error) {
buf[1] = getNodeType(in.left) // 1
buf[2] = getNodeType(in.right) // 1 + 1
done := 3
var errors []error
if in.bit == 0 { // it's a root node so write current and previous version number also
tsize := binary.PutUvarint(buf[done:], in.version_current) // current version
done += tsize
tsize = binary.PutUvarint(buf[done:], in.version_previous) // previous version
done += tsize
tsize = binary.PutUvarint(buf[done:], uint64(len(bucket))) // bucket name length
done += tsize
2022-01-02 15:49:13 +00:00
done += copy(buf[done: done+len(bucket)], []byte(bucket)) // write bucket name, panic if buffer is small
2021-12-04 16:42:11 +00:00
}
switch getNodeType(in.left) {
case nullNODE: // no more space needed
case innerNODE, leafNODE:
tsize := binary.PutUvarint(buf[done:], uint64(in.left_findex)) // max 10 bytes, but expecting 2 bytes for few years
done += tsize
tsize = binary.PutUvarint(buf[done:], uint64(in.left_fpos)) // max 10 bytes, but expecting 5 bytes
done += tsize
lhash, err := in.lhash(store)
errors = append(errors, err)
2022-01-02 15:49:13 +00:00
done += copy(buf[done: done+32], lhash) // insert left hash
2021-12-04 16:42:11 +00:00
}
switch getNodeType(in.right) {
case nullNODE: // no more space needed
case innerNODE, leafNODE:
tsize := binary.PutUvarint(buf[done:], uint64(in.right_findex)) // max 10 bytes, but expecting 2 bytes for few years
done += tsize
tsize = binary.PutUvarint(buf[done:], uint64(in.right_fpos)) // max 10 bytes, but expecting 5 bytes
done += tsize
rhash, err := in.rhash(store)
errors = append(errors, err)
2022-01-02 15:49:13 +00:00
done += copy(buf[done:done+32], rhash) // insert right hash
2021-12-04 16:42:11 +00:00
}
buf[0] = byte(done) // prepend with length
for i := range errors {
if errors[i] != nil {
return 0, errors[i]
}
}
return done, nil
}
func parse_node(level byte, nodetype byte, buf []byte) (node, int, error) {
var done, tsize int
var tmp uint64
switch nodetype { // load the left side node
case nullNODE: // nothing to do
return nil, 0, nil
case innerNODE:
left := newInner(level + 1) // increase bit level
left.dirty = false
left.loaded_partial = true
tmp, tsize = binary.Uvarint(buf[done:])
if tsize <= 0 || tmp > math.MaxUint32 {
return nil, 0, xerrors.Errorf("Probably data corruption, we current do not support than 4 billion files")
}
left.findex = uint32(tmp)
done += tsize
tmp, tsize = binary.Uvarint(buf[done:])
if tsize <= 0 || tmp > math.MaxUint32 {
return nil, 0, xerrors.Errorf("Probably data corruption, we current do not support file pos more than 4GiB")
}
left.fpos = uint32(tmp)
done += tsize
if len(buf) < done+HASHSIZE {
return nil, 0, xerrors.Errorf("Probably data corruption, input buffer has incomplete data")
}
left.hash = append(left.hash_backer[:0], buf[done:done+HASHSIZE]...)
done += HASHSIZE
return left, done, nil
case leafNODE:
//fmt.Printf("parsing leaf bytes %x\n", buf[:])
left := &leaf{loaded_partial: true} // hash will be refilled below
left.dirty = false
left.loaded_partial = true
tmp, tsize = binary.Uvarint(buf[done:]) // max 5 bytes, but expecting 2 bytes
if tsize <= 0 || tmp > math.MaxUint32 {
return nil, 0, xerrors.Errorf("Probably data corruption, we current do not support than 4 billion files")
}
left.findex = uint32(tmp)
done += tsize
tmp, tsize = binary.Uvarint(buf[done:]) // max 10 bytes, but expecting 5 bytes
if tsize <= 0 || tmp > math.MaxUint32 {
return nil, 0, xerrors.Errorf("Probably data corruption, we current do not support file pos more than 4GiB")
}
left.fpos = uint32(tmp)
done += tsize
if len(buf) < done+HASHSIZE {
2022-01-02 15:49:13 +00:00
return nil, 0, xerrors.Errorf("Probably data corruption, input buffer has incomplete data len(buf) %d done %d (%s)\n",len(buf),done, string(buf))
2021-12-04 16:42:11 +00:00
}
copy(left.hash[:], buf[done:done+HASHSIZE])
copy(left.hash_check[:], buf[done:done+HASHSIZE]) // this will be used later on verify
left.leaf_init = true
done += HASHSIZE
return left, done, nil
default:
return nil, 0, xerrors.Errorf("Probably data corruption, unknown node type")
}
}
// first byte is skipped and processed elsewhere
func (in *inner) Unmarshal(buf []byte) (err error) {
/*length, length_bytes := binary.Varint(buf)
if length_bytes <0 || length <= 0 || len(buf) < (int(length) + length_bytes) {
panic("inner node length cannot be zero")
return fmt.Errorf("inner node invalid length")
}
*/
if len(buf) < 3 {
return xerrors.Errorf("0 byte buffer cannot be Unmarshalled")
}
length_bytes := 1
length := int(uint(buf[0]))
_ = length
buf = buf[length_bytes:]
done := 2
var tsize int
if in.bit == 0 { // it's a root node so write current and previous version number also
in.version_current, tsize = binary.Uvarint(buf[done:]) // current version
done += tsize
in.version_previous, tsize = binary.Uvarint(buf[done:]) // previous version
done += tsize
blen, tsize := binary.Uvarint(buf[done:])
done += tsize
//var lbuf[BUCKET_NAME_LIMIT]byte
//copy(lbuf[:], buf[done : done+int(blen)] )
//bucketname = string(lbuf[:blen])
in.bucket_name = append(in.bucket_name[:0], buf[done:done+int(blen)]...)
done += int(blen)
}
in.left, tsize, err = parse_node(in.bit, buf[0], buf[done:])
if err != nil {
return
}
done += tsize
in.right, tsize, err = parse_node(in.bit, buf[1], buf[done:])
if err != nil {
return
}
return
}