408 lines
12 KiB
Raw Normal View History

2021-12-04 16:42:11 +00:00
package graviton
import "fmt"
import "bytes"
import "encoding/binary"
import "golang.org/x/xerrors"
// after commits all leaves will be discarded from ram,
// all inner nodes will be discarded above this level
// todo this parameter must be tunable by the user or by system automagically
const innernode_cache_level = 17
// Tree structure which is the end result,
// TODO: tree does not cache anything currently, just caching the top level tree entries, will increase the speed by X multiplication factors.
type Tree struct {
store *Store
root *inner // main root , this provides all proof checking, authentication, snapshot etc
treename string // tree name
size int
Tags []string // tags used while commit, will get cleaned after commit
snapshot_version uint64 // used to track which snapshot version this tree has loaded from
tmp_buffer bytes.Buffer
// Get current version number of tree
func (t *Tree) GetVersion() uint64 {
return t.root.version_current
// Get parent version number of tree from which this tree was derived, they might not be sequential but they will be monotonically increasing
// this can be used to build out a DAG
func (t *Tree) GetParentVersion() uint64 {
return t.root.version_previous
// put a key value in the tree, if the value exists, it's overwritten.
// ToDO: it should ignore duplicate key value, if first using a get and then a put
func (t *Tree) Put(key, value []byte) error {
return t.putRaw(sum(key), key, value)
func (t *Tree) putRaw(keyhash [HASHSIZE]byte, key, value []byte) error {
if len(value) > MAX_VALUE_SIZE {
return xerrors.Errorf("value is longer then max allowed value size, %d > %d", len(value), MAX_VALUE_SIZE)
leaf := newLeaf(keyhash, key, value)
return t.root.Insert(t.store, leaf)
// Get a specifically value associated with a key
// TODO, we need to expose this in other forms so as memory allocations and better error detection could be done
func (t *Tree) Get(key []byte) ([]byte, error) {
return t.getRaw(sum(key))
// Get a specific value associated with a specific key hash
// TODO, this api should not be exposed
func (t *Tree) getRaw(keyhash [HASHSIZE]byte) ([]byte, error) {
return t.root.Get(t.store, keyhash)
// Give the merkle hash of the entire tree
func (t *Tree) Hash() (h [HASHSIZE]byte, err error) {
hash, err := t.root.Hash(t.store)
if err != nil {
copy(h[:], hash)
// delete a specific key from the tree
func (t *Tree) Delete(key []byte) error {
_, _, err := t.root.Delete(t.store, sum(key))
return err
// Check whether the tree is currently dirty or not
func (t *Tree) IsDirty() bool {
return t.root.isDirty()
// Generate proof of any key, which can be used to prove whether the key exists or not.Please note that
// the tree root hash (tree.Hash()) is not part of the structure and must be available to the verifier separately
// for eg. in an encrypted blockchain, the entire state is carried forward from block to block, this state can be
// queried from a number of sources and then it is verified
func (t *Tree) GenerateProof(key []byte) (*Proof, error) {
var p Proof
err := t.generateProofRaw(sum(key), &p)
return &p, err
func (t *Tree) generateProofRaw(key [HASHSIZE]byte, proof *Proof) error {
return t.root.Prove(t.store, key, proof)
// Commit the tree (or a number of trees) to persistance, write a new snapshot which can be accessed henceforth without any modifications
// Commiting multiple trees or multiple changes as batch is much more effecient than
// committing each change independently.
func Commit(trees ...*Tree) (committed_version uint64, err error) {
if len(trees) == 0 {
return 0, nil
defer trees[0].store.commitsync.Unlock()
// sanity checkthat all trees were derived from the same snapshot
first_tree_snapshot_version := trees[0].snapshot_version
for i := range trees {
if first_tree_snapshot_version != trees[i].snapshot_version {
return 0, fmt.Errorf("all trees simultaneously committed must be derived from the same snapshot")
gv, err := trees[0].store.LoadSnapshot(first_tree_snapshot_version)
if err != nil {
for _, tree := range trees { // commit all the trees with reference to same snapshot
if err = gv.commit(tree); err != nil {
var findex, fpos uint32
if findex, fpos, err = trees[0].commit_inner(gv, true, 0, gv.vroot); err == nil { // version number increments here
if err = trees[0].store.writeVersionData(gv.vroot.version_current, findex, fpos); err == nil {
committed_version = gv.vroot.version_current
for i := range trees {
trees[i].snapshot_version = committed_version // increment version
//fmt.Printf("committing version tree %x\n", gv.vroot.Hash(gv.store))
// this is we are cleaning up the trees, should we report bak any error , why should this code be here
for _, tree := range trees {
newtree, err1 := gv.GetTreeWithVersion(tree.bucket, tree.GetVersion()) // get last committed version of the current branch
if err1 == nil {
*tree = *newtree
} else {
return 0, err1
// commit a single tree at a time
func (gv *Snapshot) commit(tree *Tree) (err error) {
var findex, fpos uint32
tree.size = 0
if tree.IsDirty() {
if findex, fpos, err = tree.commit_inner(gv, false, 0, tree.root); err != nil {
return err
} else {
findex, fpos = tree.root.Position()
var roothash [HASHSIZE]byte
if roothash, err = tree.Hash(); err == nil {
// at this point tree cannot be dirty
//root_size := tree.size
var valuearray [HASHSIZE]byte
valuesize := encode(findex, fpos, valuearray[:])
valuebuf := valuearray[:valuesize]
var key [512]byte
key[0] = ':'
done := 1
done += copy(key[done:], []byte(tree.treename))
done += binary.PutUvarint(key[done:], tree.root.version_current)
if err = gv.vroot.Insert(tree.store, newLeaf(sum(key[:done]), key[:done], valuebuf[:])); err == nil { // always ensure tree is accessible by its bucket & version number
done = 1
done += copy(key[done:], []byte(tree.treename))
done += copy(key[done:], roothash[:])
if err = gv.vroot.Insert(tree.store, newLeaf(sum(key[:done]), key[:done], valuebuf[:])); err == nil {
if err = gv.vroot.Insert(tree.store, newLeaf(sum(roothash[:]), roothash[:], valuebuf[:])); err == nil {
for i := 0; i < len(tree.Tags) && err == nil; i++ {
err = gv.vroot.Insert(tree.store, newLeaf(sum([]byte(tree.Tags[i])), []byte(tree.Tags[i]), valuebuf[:]))
tree.tmp_buffer = bytes.Buffer{} // so as storage space could be be reclaimed
//version_size := tree.size
//_ = version_size
//_ = root_size
// we should expose these stats somehow so as users could make some judgements about overheads
//fmt.Printf("tree committed findex %d fpos %d err %s committed bytes rootsize:%d versionsize:%d version: %d pversion %d\n", findex, fpos, err, root_size, version_size, tree.GetVersion(), tree.GetParentVersion())
// commit the tree to disk, the current version
func (t *Tree) Commit(tags ...string) error {
t.Tags = tags
_, err := Commit(t)
return err
// Reload the tree from the disk, causing all current changes to be discarded,
func (t *Tree) Discard() error {
gv, err := t.store.LoadSnapshot(0)
if err == nil {
var newtree *Tree
if newtree, err = gv.GetTreeWithVersion(t.treename, t.GetVersion()); err == nil { // get last committed version of the current branch
*t = *newtree
return err // it will generally be nil
// this is never recursive
// leaf marshalling is done at only one place while committing
// this is done here avoid an allocation which can be done from the stack
func (t *Tree) commit_leaf(level int, l *leaf) (findex uint32, fpos uint32, err error) {
var tbuf [10]byte
size := binary.PutUvarint(tbuf[:], uint64(len(l.key)))
if len(l.key) > 0 {
size = binary.PutUvarint(tbuf[:], uint64(len(l.value)))
if len(l.value) > 0 {
// here we must write it to store
t.size += len(t.tmp_buffer.Bytes())
findex, fpos, err = t.store.write(t.tmp_buffer.Bytes())
l.findex = findex
l.fpos = fpos
l.dirty = false
l.loaded_partial = true
l.key = nil
l.value = nil
// this is mostly recursive and must skip non modified branches reusing them
// and must skip dirty parts
func (t *Tree) commit_inner(gv *Snapshot, specialversion bool, level int, in *inner) (findex uint32, fpos uint32, err error) {
var old_old_version, old_version uint64
var success bool
if in.left == nil { // handle all left cases
in.left_findex, in.left_fpos = 0, 0
} else if !in.left.isDirty() {
in.left_findex, in.left_fpos = in.left.Position()
} else { // node is dirty and must be written
switch v := in.left.(type) { // commit left branch
case *inner:
in.left_findex, in.left_fpos, err = t.commit_inner(gv, specialversion, level+1, v)
case *leaf:
in.left_findex, in.left_fpos, err = t.commit_leaf(level+1, v)
err = fmt.Errorf("unknown node type")
if err != nil {
if in.right == nil { // handle all rights cases
in.right_findex, in.right_fpos = 0, 0
} else if !in.right.isDirty() {
in.right_findex, in.right_fpos = in.right.Position()
} else { // node is dirty and must be written
switch v := in.right.(type) { // commit right branch
case *inner:
in.right_findex, in.right_fpos, err = t.commit_inner(gv, specialversion, level+1, v)
case *leaf:
in.right_findex, in.right_fpos, err = t.commit_leaf(level+1, v)
err = fmt.Errorf("unknown node type")
if err != nil {
// increment and reserve a version number, this takes 1 read iop and 1 write iop
if in.bit == 0 {
old_old_version = in.version_previous
old_version = in.version_current
if specialversion { // this is for the version root
// lets increment the version number and put it again
_, in.version_current, _, _, _ = t.store.findhighestsnapshotinram() // setup index properly
in.version_previous = old_version
} else {
in.version_previous = old_version
// now gets highest version for this bucket from version root, increment and store it again
// Todo below 2 operations must be integrated or protected with a lock to avoid race condition
var highest_version uint64
if highest_version, err = gv.GetTreeHighestVersion(t.treename); err == nil {
// lets increment the highest version
if err = gv.putTreeHighestVersion(t.treename, highest_version); err == nil {
in.version_current = highest_version
if err != nil {
return 0, 0, err
//fmt.Printf("oldold %d old %d current %d\n", old_old_version, old_version, in.version_current)
// below block has been replaced
/// defer func() {
// if !success { // if this ever occurs, we will skip a version number
// in.version_current = old_version
// in.version_previous = old_old_version
// }
/// }()
var buf [128]byte
var done int
if done, err = in.MarshalTo(t.store, buf[:], t.treename); err == nil {
// here we must write it to store
t.size += len(buf)
findex, fpos, err = t.store.write(buf[:done])
if err == nil {
in.findex = findex
in.fpos = fpos
in.dirty = false
success = true
if success && in.bit >= innernode_cache_level {
in.left, in.right = nil, nil
in.loaded_partial = true
if in.bit == 0 && !success { // if this ever occurs, we will skip a version number
in.version_current = old_version
in.version_previous = old_old_version
// encode findex,fpos
func encode(findex, fpos uint32, buf []byte) int {
bytes_written := binary.PutUvarint(buf[:], uint64(findex))
done := bytes_written
bytes_written = binary.PutUvarint(buf[done:], uint64(fpos))
return bytes_written + done
// decode findex,fpos
func decode(buf []byte) (uint32, uint32) {
var size int
var findex, fpos uint64
findex, size = binary.Uvarint(buf[:])
fpos, size = binary.Uvarint(buf[size:])
return uint32(findex), uint32(fpos)