derohe-miniblock-mod/astrobwt/astrobwt_fast/astrobwt_optimized.go
2022-03-10 11:15:46 +00:00

163 lines
4.0 KiB
Go

package astrobwt_fast
import "unsafe"
import "hash"
import "sync"
import "crypto/rand"
import "encoding/binary"
import "golang.org/x/crypto/sha3"
import "golang.org/x/crypto/salsa20/salsa"
const stage1_length uint32 = 9973 // it is a prime
// see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md
// this optimized algorithm is used only in the miner and not in the blockchain
type ScratchData struct {
hasher hash.Hash
stage1 [stage1_length + 64]byte // 10 KB stages are taken from it
stage1_result *[stage1_length + 1]uint16
stage1_result_bytes *[(stage1_length) * 2]uint8
indices [stage1_length + 1]uint32 // 40 KB
tmp_indices [stage1_length + 1]uint32 // 40 KB
}
var Pool = sync.Pool{New: func() interface{} {
var d ScratchData
d.hasher = sha3.New256()
d.stage1_result = ((*[stage1_length + 1]uint16)(unsafe.Pointer(&d.indices[0])))
d.stage1_result_bytes = ((*[(stage1_length) * 2]byte)(unsafe.Pointer(&d.indices[0])))
return &d
}}
func POW_optimized(inputdata []byte, data *ScratchData) (outputhash [32]byte) {
defer func() {
if r := recover(); r != nil { // if something happens due to RAM issues in miner, we should continue, system will crash sooner or later
var buf [16]byte
rand.Read(buf[:])
outputhash = sha3.Sum256(buf[:]) // return a falsified has which will fail the check
}
}()
var key [32]byte
for i := range data.stage1 {
data.stage1[i] = 0
}
var counter [16]byte
data.hasher.Reset()
data.hasher.Write(inputdata)
_ = data.hasher.Sum(key[:0])
salsa.XORKeyStream(data.stage1[:stage1_length], data.stage1[:stage1_length], &counter, &key)
sort_indices(stage1_length, data.stage1[:stage1_length+40], data.stage1_result[:], data) // extra 40 bytes since we may read them, but we never write them
if LittleEndian {
data.hasher.Reset()
data.hasher.Write(data.stage1_result_bytes[:])
_ = data.hasher.Sum(key[:0])
} else {
var s [stage1_length * 2]byte
for i, c := range data.stage1_result {
binary.LittleEndian.PutUint16(s[i<<1:], c)
}
data.hasher.Reset()
data.hasher.Write(s[:])
_ = data.hasher.Sum(key[:0])
}
copy(outputhash[:], key[:])
return
}
func fix(v []byte, indices []uint32, i int) {
prev_t := indices[i]
t := indices[i+1]
data_a := binary.BigEndian.Uint32(v[((t)&0xffff)+2:])
if data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {
t2 := prev_t
j := i
_ = indices[j+1]
for {
indices[j+1] = prev_t
j--
if j < 0 {
break
}
prev_t = indices[j]
if (t^prev_t) <= 0xffff && data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {
continue
} else {
break
}
}
indices[j+1] = t
t = t2
}
}
// basically
func sort_indices(N uint32, v []byte, output []uint16, d *ScratchData) {
var byte_counters [2][256]byte
var counters [2][256]uint16
v[N] = 0 // make sure extra byte accessed is zero
indices := d.indices[:]
tmp_indices := d.tmp_indices[:]
for _, c := range v[:N] {
byte_counters[1][c]++
}
byte_counters[0] = byte_counters[1]
byte_counters[0][v[0]]--
counters[0][0] = uint16(byte_counters[0][0])
counters[1][0] = uint16(byte_counters[1][0]) - 1
c0 := counters[0][0]
c1 := counters[1][0]
for i := 1; i < 256; i++ {
c0 += uint16(byte_counters[0][i])
c1 += uint16(byte_counters[1][i])
counters[0][i] = c0
counters[1][i] = c1
}
counters0 := counters[0][:]
for i := int(N); i >= 1; i-- {
byte0 := uint32(v[i-1])
byte1 := uint32(v[i]) // here we can access extra byte from input array so make sure its zero
tmp_indices[counters0[v[i]]] = byte0<<24 | byte1<<16 | uint32(i-1)
counters0[v[i]]--
}
counters1 := counters[1][:]
_ = tmp_indices[N-1]
for i := int(N - 1); i >= 0; i-- {
data := tmp_indices[i]
tmp := counters1[data>>24]
counters1[data>>24]--
indices[tmp] = data
}
for i := 1; i < int(N); i++ { // no BC here
if indices[i-1]&0xffff0000 == indices[i]&0xffff0000 {
fix(v, indices, i-1)
}
}
// after fixing, convert indices to output
_ = output[N]
for i, c := range indices[:N] {
output[i] = uint16(c)
}
}