163 lines
4.0 KiB
Go
163 lines
4.0 KiB
Go
|
package astrobwt_fast
|
||
|
|
||
|
import "unsafe"
|
||
|
import "hash"
|
||
|
import "sync"
|
||
|
import "crypto/rand"
|
||
|
import "encoding/binary"
|
||
|
import "golang.org/x/crypto/sha3"
|
||
|
import "golang.org/x/crypto/salsa20/salsa"
|
||
|
|
||
|
const stage1_length uint32 = 9973 // it is a prime
|
||
|
|
||
|
// see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md
|
||
|
// this optimized algorithm is used only in the miner and not in the blockchain
|
||
|
|
||
|
type ScratchData struct {
|
||
|
hasher hash.Hash
|
||
|
stage1 [stage1_length + 64]byte // 10 KB stages are taken from it
|
||
|
stage1_result *[stage1_length + 1]uint16
|
||
|
stage1_result_bytes *[(stage1_length) * 2]uint8
|
||
|
indices [stage1_length + 1]uint32 // 40 KB
|
||
|
tmp_indices [stage1_length + 1]uint32 // 40 KB
|
||
|
}
|
||
|
|
||
|
var Pool = sync.Pool{New: func() interface{} {
|
||
|
var d ScratchData
|
||
|
d.hasher = sha3.New256()
|
||
|
d.stage1_result = ((*[stage1_length + 1]uint16)(unsafe.Pointer(&d.indices[0])))
|
||
|
d.stage1_result_bytes = ((*[(stage1_length) * 2]byte)(unsafe.Pointer(&d.indices[0])))
|
||
|
|
||
|
return &d
|
||
|
}}
|
||
|
|
||
|
func POW_optimized(inputdata []byte, data *ScratchData) (outputhash [32]byte) {
|
||
|
|
||
|
defer func() {
|
||
|
if r := recover(); r != nil { // if something happens due to RAM issues in miner, we should continue, system will crash sooner or later
|
||
|
var buf [16]byte
|
||
|
rand.Read(buf[:])
|
||
|
outputhash = sha3.Sum256(buf[:]) // return a falsified has which will fail the check
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
var key [32]byte
|
||
|
for i := range data.stage1 {
|
||
|
data.stage1[i] = 0
|
||
|
}
|
||
|
|
||
|
var counter [16]byte
|
||
|
|
||
|
data.hasher.Reset()
|
||
|
data.hasher.Write(inputdata)
|
||
|
_ = data.hasher.Sum(key[:0])
|
||
|
|
||
|
salsa.XORKeyStream(data.stage1[:stage1_length], data.stage1[:stage1_length], &counter, &key)
|
||
|
sort_indices(stage1_length, data.stage1[:stage1_length+40], data.stage1_result[:], data) // extra 40 bytes since we may read them, but we never write them
|
||
|
|
||
|
if LittleEndian {
|
||
|
data.hasher.Reset()
|
||
|
data.hasher.Write(data.stage1_result_bytes[:])
|
||
|
_ = data.hasher.Sum(key[:0])
|
||
|
} else {
|
||
|
var s [stage1_length * 2]byte
|
||
|
for i, c := range data.stage1_result {
|
||
|
binary.LittleEndian.PutUint16(s[i<<1:], c)
|
||
|
}
|
||
|
data.hasher.Reset()
|
||
|
data.hasher.Write(s[:])
|
||
|
_ = data.hasher.Sum(key[:0])
|
||
|
}
|
||
|
|
||
|
copy(outputhash[:], key[:])
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func fix(v []byte, indices []uint32, i int) {
|
||
|
prev_t := indices[i]
|
||
|
t := indices[i+1]
|
||
|
|
||
|
data_a := binary.BigEndian.Uint32(v[((t)&0xffff)+2:])
|
||
|
if data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {
|
||
|
t2 := prev_t
|
||
|
j := i
|
||
|
_ = indices[j+1]
|
||
|
for {
|
||
|
indices[j+1] = prev_t
|
||
|
j--
|
||
|
if j < 0 {
|
||
|
break
|
||
|
}
|
||
|
prev_t = indices[j]
|
||
|
if (t^prev_t) <= 0xffff && data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {
|
||
|
continue
|
||
|
} else {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
indices[j+1] = t
|
||
|
t = t2
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// basically
|
||
|
func sort_indices(N uint32, v []byte, output []uint16, d *ScratchData) {
|
||
|
|
||
|
var byte_counters [2][256]byte
|
||
|
var counters [2][256]uint16
|
||
|
|
||
|
v[N] = 0 // make sure extra byte accessed is zero
|
||
|
|
||
|
indices := d.indices[:]
|
||
|
tmp_indices := d.tmp_indices[:]
|
||
|
|
||
|
for _, c := range v[:N] {
|
||
|
byte_counters[1][c]++
|
||
|
}
|
||
|
byte_counters[0] = byte_counters[1]
|
||
|
byte_counters[0][v[0]]--
|
||
|
|
||
|
counters[0][0] = uint16(byte_counters[0][0])
|
||
|
counters[1][0] = uint16(byte_counters[1][0]) - 1
|
||
|
|
||
|
c0 := counters[0][0]
|
||
|
c1 := counters[1][0]
|
||
|
|
||
|
for i := 1; i < 256; i++ {
|
||
|
c0 += uint16(byte_counters[0][i])
|
||
|
c1 += uint16(byte_counters[1][i])
|
||
|
|
||
|
counters[0][i] = c0
|
||
|
counters[1][i] = c1
|
||
|
}
|
||
|
|
||
|
counters0 := counters[0][:]
|
||
|
for i := int(N); i >= 1; i-- {
|
||
|
byte0 := uint32(v[i-1])
|
||
|
byte1 := uint32(v[i]) // here we can access extra byte from input array so make sure its zero
|
||
|
tmp_indices[counters0[v[i]]] = byte0<<24 | byte1<<16 | uint32(i-1)
|
||
|
counters0[v[i]]--
|
||
|
}
|
||
|
|
||
|
counters1 := counters[1][:]
|
||
|
_ = tmp_indices[N-1]
|
||
|
for i := int(N - 1); i >= 0; i-- {
|
||
|
data := tmp_indices[i]
|
||
|
tmp := counters1[data>>24]
|
||
|
counters1[data>>24]--
|
||
|
indices[tmp] = data
|
||
|
}
|
||
|
|
||
|
for i := 1; i < int(N); i++ { // no BC here
|
||
|
if indices[i-1]&0xffff0000 == indices[i]&0xffff0000 {
|
||
|
fix(v, indices, i-1)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// after fixing, convert indices to output
|
||
|
_ = output[N]
|
||
|
for i, c := range indices[:N] {
|
||
|
output[i] = uint16(c)
|
||
|
}
|
||
|
}
|