2020-12-19 10:01:29 +00:00
package graviton
import "fmt"
import "bytes"
import "encoding/binary"
import "golang.org/x/xerrors"
// after commits all leaves will be discarded from ram,
// all inner nodes will be discarded above this level
// todo this parameter must be tunable by the user or by system automagically
const innernode_cache_level = 17
// Tree structure which is the end result,
// TODO: tree does not cache anything currently, just caching the top level tree entries, will increase the speed by X multiplication factors.
type Tree struct {
store * Store
root * inner // main root , this provides all proof checking, authentication, snapshot etc
treename string // tree name
size int
Tags [ ] string // tags used while commit, will get cleaned after commit
2020-12-27 13:44:23 +00:00
snapshot_version uint64 // used to track which snapshot version this tree has loaded from
2020-12-19 10:01:29 +00:00
tmp_buffer bytes . Buffer
}
// Get current version number of tree
func ( t * Tree ) GetVersion ( ) uint64 {
return t . root . version_current
}
// Get parent version number of tree from which this tree was derived, they might not be sequential but they will be monotonically increasing
// this can be used to build out a DAG
func ( t * Tree ) GetParentVersion ( ) uint64 {
return t . root . version_previous
}
// put a key value in the tree, if the value exists, it's overwritten.
// ToDO: it should ignore duplicate key value, if first using a get and then a put
//
func ( t * Tree ) Put ( key , value [ ] byte ) error {
return t . putRaw ( sum ( key ) , key , value )
}
func ( t * Tree ) putRaw ( keyhash [ HASHSIZE ] byte , key , value [ ] byte ) error {
if len ( value ) > MAX_VALUE_SIZE {
return xerrors . Errorf ( "value is longer then max allowed value size, %d > %d" , len ( value ) , MAX_VALUE_SIZE )
}
leaf := newLeaf ( keyhash , key , value )
return t . root . Insert ( t . store , leaf )
}
// Get a specifically value associated with a key
// TODO, we need to expose this in other forms so as memory allocations and better error detection could be done
func ( t * Tree ) Get ( key [ ] byte ) ( [ ] byte , error ) {
return t . getRaw ( sum ( key ) )
}
// Get a specific value associated with a specific key hash
// TODO, this api should not be exposed
func ( t * Tree ) getRaw ( keyhash [ HASHSIZE ] byte ) ( [ ] byte , error ) {
return t . root . Get ( t . store , keyhash )
}
// Give the merkle hash of the entire tree
func ( t * Tree ) Hash ( ) ( h [ HASHSIZE ] byte , err error ) {
hash , err := t . root . Hash ( t . store )
if err != nil {
return
}
copy ( h [ : ] , hash )
return
}
// delete a specific key from the tree
func ( t * Tree ) Delete ( key [ ] byte ) error {
_ , _ , err := t . root . Delete ( t . store , sum ( key ) )
return err
}
// Check whether the tree is currently dirty or not
func ( t * Tree ) IsDirty ( ) bool {
return t . root . isDirty ( )
}
// Generate proof of any key, which can be used to prove whether the key exists or not.Please note that
// the tree root hash (tree.Hash()) is not part of the structure and must be available to the verifier separately
// for eg. in an encrypted blockchain, the entire state is carried forward from block to block, this state can be
// queried from a number of sources and then it is verified
func ( t * Tree ) GenerateProof ( key [ ] byte ) ( * Proof , error ) {
var p Proof
err := t . generateProofRaw ( sum ( key ) , & p )
return & p , err
}
func ( t * Tree ) generateProofRaw ( key [ HASHSIZE ] byte , proof * Proof ) error {
return t . root . Prove ( t . store , key , proof )
}
// Commit the tree (or a number of trees) to persistance, write a new snapshot which can be accessed henceforth without any modifications
// Commiting multiple trees or multiple changes as batch is much more effecient than
// committing each change independently.
func Commit ( trees ... * Tree ) ( committed_version uint64 , err error ) {
if len ( trees ) == 0 {
return 0 , nil
}
trees [ 0 ] . store . commitsync . Lock ( )
defer trees [ 0 ] . store . commitsync . Unlock ( )
2020-12-27 13:44:23 +00:00
// sanity checkthat all trees were derived from the same snapshot
first_tree_snapshot_version := trees [ 0 ] . snapshot_version
for i := range trees {
if first_tree_snapshot_version != trees [ i ] . snapshot_version {
return 0 , fmt . Errorf ( "all trees simultaneously committed must be derived from the same snapshot" )
}
}
gv , err := trees [ 0 ] . store . LoadSnapshot ( first_tree_snapshot_version )
2020-12-19 10:01:29 +00:00
if err != nil {
return
}
2020-12-27 13:44:23 +00:00
for _ , tree := range trees { // commit all the trees with reference to same snapshot
2020-12-19 10:01:29 +00:00
if err = gv . commit ( tree ) ; err != nil {
return
}
}
2020-12-27 13:44:23 +00:00
var findex , fpos uint32
if findex , fpos , err = trees [ 0 ] . commit_inner ( gv , true , 0 , gv . vroot ) ; err == nil { // version number increments here
if err = trees [ 0 ] . store . writeVersionData ( gv . vroot . version_current , findex , fpos ) ; err == nil {
committed_version = gv . vroot . version_current
for i := range trees {
trees [ i ] . snapshot_version = committed_version // increment version
2020-12-19 10:01:29 +00:00
}
}
}
//fmt.Printf("committing version tree %x\n", gv.vroot.Hash(gv.store))
// this is we are cleaning up the trees, should we report bak any error , why should this code be here
/ *
for _ , tree := range trees {
newtree , err1 := gv . GetTreeWithVersion ( tree . bucket , tree . GetVersion ( ) ) // get last committed version of the current branch
if err1 == nil {
* tree = * newtree
} else {
return 0 , err1
}
}
* /
return
}
// commit a single tree at a time
func ( gv * Snapshot ) commit ( tree * Tree ) ( err error ) {
var findex , fpos uint32
tree . size = 0
if tree . IsDirty ( ) {
if findex , fpos , err = tree . commit_inner ( gv , false , 0 , tree . root ) ; err != nil {
return err
}
} else {
findex , fpos = tree . root . Position ( )
}
var roothash [ HASHSIZE ] byte
if roothash , err = tree . Hash ( ) ; err == nil {
// at this point tree cannot be dirty
//root_size := tree.size
var valuearray [ HASHSIZE ] byte
valuesize := encode ( findex , fpos , valuearray [ : ] )
valuebuf := valuearray [ : valuesize ]
var key [ 512 ] byte
key [ 0 ] = ':'
done := 1
done += copy ( key [ done : ] , [ ] byte ( tree . treename ) )
done += binary . PutUvarint ( key [ done : ] , tree . root . version_current )
if err = gv . vroot . Insert ( tree . store , newLeaf ( sum ( key [ : done ] ) , key [ : done ] , valuebuf [ : ] ) ) ; err == nil { // always ensure tree is accessible by its bucket & version number
done = 1
done += copy ( key [ done : ] , [ ] byte ( tree . treename ) )
done += copy ( key [ done : ] , roothash [ : ] )
if err = gv . vroot . Insert ( tree . store , newLeaf ( sum ( key [ : done ] ) , key [ : done ] , valuebuf [ : ] ) ) ; err == nil {
if err = gv . vroot . Insert ( tree . store , newLeaf ( sum ( roothash [ : ] ) , roothash [ : ] , valuebuf [ : ] ) ) ; err == nil {
for i := 0 ; i < len ( tree . Tags ) && err == nil ; i ++ {
err = gv . vroot . Insert ( tree . store , newLeaf ( sum ( [ ] byte ( tree . Tags [ i ] ) ) , [ ] byte ( tree . Tags [ i ] ) , valuebuf [ : ] ) )
}
}
}
}
}
tree . tmp_buffer = bytes . Buffer { } // so as storage space could be be reclaimed
//version_size := tree.size
//_ = version_size
//_ = root_size
// we should expose these stats somehow so as users could make some judgements about overheads
//fmt.Printf("tree committed findex %d fpos %d err %s committed bytes rootsize:%d versionsize:%d version: %d pversion %d\n", findex, fpos, err, root_size, version_size, tree.GetVersion(), tree.GetParentVersion())
return
}
// commit the tree to disk, the current version
func ( t * Tree ) Commit ( tags ... string ) error {
t . Tags = tags
_ , err := Commit ( t )
return err
}
// Reload the tree from the disk, causing all current changes to be discarded,
func ( t * Tree ) Discard ( ) error {
gv , err := t . store . LoadSnapshot ( 0 )
if err == nil {
var newtree * Tree
if newtree , err = gv . GetTreeWithVersion ( t . treename , t . GetVersion ( ) ) ; err == nil { // get last committed version of the current branch
* t = * newtree
}
}
return err // it will generally be nil
}
// this is never recursive
// leaf marshalling is done at only one place while committing
// this is done here avoid an allocation which can be done from the stack
func ( t * Tree ) commit_leaf ( level int , l * leaf ) ( findex uint32 , fpos uint32 , err error ) {
t . tmp_buffer . Reset ( )
var tbuf [ 10 ] byte
size := binary . PutUvarint ( tbuf [ : ] , uint64 ( len ( l . key ) ) )
t . tmp_buffer . Write ( tbuf [ : size ] )
if len ( l . key ) > 0 {
t . tmp_buffer . Write ( l . key [ : ] )
}
size = binary . PutUvarint ( tbuf [ : ] , uint64 ( len ( l . value ) ) )
t . tmp_buffer . Write ( tbuf [ : size ] )
if len ( l . value ) > 0 {
t . tmp_buffer . Write ( l . value [ : ] )
}
// here we must write it to store
t . size += len ( t . tmp_buffer . Bytes ( ) )
findex , fpos , err = t . store . write ( t . tmp_buffer . Bytes ( ) )
l . findex = findex
l . fpos = fpos
l . dirty = false
l . loaded_partial = true
l . key = nil
l . value = nil
return
}
// this is mostly recursive and must skip non modified branches reusing them
// and must skip dirty parts
func ( t * Tree ) commit_inner ( gv * Snapshot , specialversion bool , level int , in * inner ) ( findex uint32 , fpos uint32 , err error ) {
var old_old_version , old_version uint64
var success bool
if in . left == nil { // handle all left cases
in . left_findex , in . left_fpos = 0 , 0
} else if ! in . left . isDirty ( ) {
in . left_findex , in . left_fpos = in . left . Position ( )
} else { // node is dirty and must be written
switch v := in . left . ( type ) { // commit left branch
case * inner :
in . left_findex , in . left_fpos , err = t . commit_inner ( gv , specialversion , level + 1 , v )
case * leaf :
in . left_findex , in . left_fpos , err = t . commit_leaf ( level + 1 , v )
default :
err = fmt . Errorf ( "unknown node type" )
}
}
if err != nil {
return
}
if in . right == nil { // handle all rights cases
in . right_findex , in . right_fpos = 0 , 0
} else if ! in . right . isDirty ( ) {
in . right_findex , in . right_fpos = in . right . Position ( )
} else { // node is dirty and must be written
switch v := in . right . ( type ) { // commit right branch
case * inner :
in . right_findex , in . right_fpos , err = t . commit_inner ( gv , specialversion , level + 1 , v )
case * leaf :
in . right_findex , in . right_fpos , err = t . commit_leaf ( level + 1 , v )
default :
err = fmt . Errorf ( "unknown node type" )
}
}
if err != nil {
return
}
// increment and reserve a version number, this takes 1 read iop and 1 write iop
if in . bit == 0 {
old_old_version = in . version_previous
old_version = in . version_current
if specialversion { // this is for the version root
// lets increment the version number and put it again
2020-12-27 13:44:23 +00:00
_ , in . version_current , _ , _ , _ = t . store . findhighestsnapshotinram ( ) // setup index properly
2020-12-19 10:01:29 +00:00
in . version_current ++
in . version_previous = old_version
} else {
in . version_previous = old_version
// now gets highest version for this bucket from version root, increment and store it again
// Todo below 2 operations must be integrated or protected with a lock to avoid race condition
var highest_version uint64
if highest_version , err = gv . GetTreeHighestVersion ( t . treename ) ; err == nil {
// lets increment the highest version
highest_version ++
if err = gv . putTreeHighestVersion ( t . treename , highest_version ) ; err == nil {
in . version_current = highest_version
}
}
if err != nil {
return 0 , 0 , err
}
}
//fmt.Printf("oldold %d old %d current %d\n", old_old_version, old_version, in.version_current)
// below block has been replaced
/// defer func() {
// if !success { // if this ever occurs, we will skip a version number
// in.version_current = old_version
// in.version_previous = old_old_version
// }
/// }()
}
var buf [ 128 ] byte
var done int
if done , err = in . MarshalTo ( t . store , buf [ : ] , t . treename ) ; err == nil {
// here we must write it to store
t . size += len ( buf )
findex , fpos , err = t . store . write ( buf [ : done ] )
if err == nil {
in . findex = findex
in . fpos = fpos
in . dirty = false
success = true
}
if success && in . bit >= innernode_cache_level {
in . left , in . right = nil , nil
in . loaded_partial = true
}
if in . bit == 0 && ! success { // if this ever occurs, we will skip a version number
in . version_current = old_version
in . version_previous = old_old_version
}
}
return
}
// encode findex,fpos
func encode ( findex , fpos uint32 , buf [ ] byte ) int {
bytes_written := binary . PutUvarint ( buf [ : ] , uint64 ( findex ) )
done := bytes_written
bytes_written = binary . PutUvarint ( buf [ done : ] , uint64 ( fpos ) )
return bytes_written + done
}
// decode findex,fpos
func decode ( buf [ ] byte ) ( uint32 , uint32 ) {
var size int
var findex , fpos uint64
findex , size = binary . Uvarint ( buf [ : ] )
fpos , size = binary . Uvarint ( buf [ size : ] )
return uint32 ( findex ) , uint32 ( fpos )
}