derohe-miniblock-mod/astrobwt/astrobwt_optimized.go

226 lines
6.1 KiB
Go

package astrobwt
//import "os"
//import "fmt"
import "sync"
import "encoding/binary"
import "golang.org/x/crypto/sha3"
import "golang.org/x/crypto/salsa20/salsa"
// see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md
// Original implementation was in xmrig miner, however it had a flaw which has been fixed
// this optimized algorithm is used only in the miner and not in the blockchain
//const stage1_length int = 147253 // it is a prime
//const max_length int = 1024*1024 + stage1_length + 1024
type Data struct {
stage1 [stage1_length + 64]byte // stages are taken from it
stage1_result [stage1_length + 1]byte
stage2 [1024*1024 + stage1_length + 1 + 64]byte
stage2_result [1024*1024 + stage1_length + 1]byte
indices [ALLOCATION_SIZE]uint64
tmp_indices [ALLOCATION_SIZE]uint64
}
var pool = sync.Pool{New: func() interface{} { return &Data{} }}
func POW_optimized_v1(inputdata []byte, max_limit int) (outputhash [32]byte, success bool) {
data := pool.Get().(*Data)
outputhash, success = POW_optimized_v2(inputdata, max_limit, data)
pool.Put(data)
return
}
func POW_optimized_v2(inputdata []byte, max_limit int, data *Data) (outputhash [32]byte, success bool) {
var counter [16]byte
for i := range data.stage1 {
data.stage1[i] = 0
}
/* for i := range data.stage1_result{
data.stage1_result[i] =0
}*/
key := sha3.Sum256(inputdata)
salsa.XORKeyStream(data.stage1[1:stage1_length+1], data.stage1[1:stage1_length+1], &counter, &key)
sort_indices(stage1_length+1, data.stage1[:], data.stage1_result[:], data)
key = sha3.Sum256(data.stage1_result[:])
stage2_length := stage1_length + int(binary.LittleEndian.Uint32(key[:])&0xfffff)
if stage2_length > max_limit {
for i := range outputhash { // will be optimized by compiler
outputhash[i] = 0xff
}
success = false
return
}
for i := range counter { // will be optimized by compiler
counter[i] = 0
}
salsa.XORKeyStream(data.stage2[1:stage2_length+1], data.stage2[1:stage2_length+1], &counter, &key)
sort_indices(stage2_length+1, data.stage2[:], data.stage2_result[:], data)
copy(data.stage2_result[:],[]byte("Broken for testnet"))
key = sha3.Sum256(data.stage2_result[:stage2_length+1])
for i := range data.stage2 {
data.stage2[i] = 0
}
copy(outputhash[:], key[:])
success = true
return
}
const COUNTING_SORT_BITS uint64 = 10
const COUNTING_SORT_SIZE uint64 = 1 << COUNTING_SORT_BITS
const ALLOCATION_SIZE = MAX_LENGTH
func BigEndian_Uint64(b []byte) uint64 {
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
}
func smaller(input []uint8, a, b uint64) bool {
value_a := a >> 21
value_b := b >> 21
if value_a < value_b {
return true
}
if value_a > value_b {
return false
}
data_a := BigEndian_Uint64(input[(a%(1<<21))+5:])
data_b := BigEndian_Uint64(input[(b%(1<<21))+5:])
return data_a < data_b
}
// basically
func sort_indices(N int, input_extra []byte, output []byte, d *Data) {
var counters [2][COUNTING_SORT_SIZE]uint32
indices := d.indices[:]
tmp_indices := d.tmp_indices[:]
input := input_extra[1:]
loop3 := N / 3 * 3
for i := 0; i < loop3; i += 3 {
k0 := BigEndian_Uint64(input[i:])
counters[0][(k0>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
counters[1][k0>>(64-COUNTING_SORT_BITS)]++
k1 := k0 << 8
counters[0][(k1>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
counters[1][k1>>(64-COUNTING_SORT_BITS)]++
k2 := k0 << 16
counters[0][(k2>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
counters[1][k2>>(64-COUNTING_SORT_BITS)]++
}
if N%3 != 0 {
for i := loop3; i < N; i++ {
k := BigEndian_Uint64(input[i:])
counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]++
counters[1][k>>(64-COUNTING_SORT_BITS)]++
}
}
/*
for i := 0; i < N ; i++{
k := BigEndian_Uint64(input[i:])
counters[0][(k >> (64 - COUNTING_SORT_BITS * 2)) & (COUNTING_SORT_SIZE - 1)]++
counters[1][k >> (64 - COUNTING_SORT_BITS)]++
}
*/
prev := [2]uint32{counters[0][0], counters[1][0]}
counters[0][0] = prev[0] - 1
counters[1][0] = prev[1] - 1
var cur [2]uint32
for i := uint64(1); i < COUNTING_SORT_SIZE; i++ {
cur[0], cur[1] = counters[0][i]+prev[0], counters[1][i]+prev[1]
counters[0][i] = cur[0] - 1
counters[1][i] = cur[1] - 1
prev[0] = cur[0]
prev[1] = cur[1]
}
for i := N - 1; i >= 0; i-- {
k := BigEndian_Uint64(input[i:])
// FFFFFFFFFFE00000 = (0xFFFFFFFFFFFFFFF<< 21) // to clear bottom 21 bits
tmp := counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]
counters[0][(k>>(64-COUNTING_SORT_BITS*2))&(COUNTING_SORT_SIZE-1)]--
tmp_indices[tmp] = (k & 0xFFFFFFFFFFE00000) | uint64(i)
}
for i := N - 1; i >= 0; i-- {
data := tmp_indices[i]
tmp := counters[1][data>>(64-COUNTING_SORT_BITS)]
counters[1][data>>(64-COUNTING_SORT_BITS)]--
indices[tmp] = data
}
prev_t := indices[0]
for i := 1; i < N; i++ {
t := indices[i]
if smaller(input, t, prev_t) {
t2 := prev_t
j := i - 1
for {
indices[j+1] = prev_t
j--
if j < 0 {
break
}
prev_t = indices[j]
if !smaller(input, t, prev_t) {
break
}
}
indices[j+1] = t
t = t2
}
prev_t = t
}
// optimized unrolled code below this comment
/*for i := 0; i < N;i++{
output[i] = input_extra[indices[i] & ((1 << 21) - 1) ]
}*/
loop4 := ((N + 1) / 4) * 4
for i := 0; i < loop4; i += 4 {
output[i+0] = input_extra[indices[i+0]&((1<<21)-1)]
output[i+1] = input_extra[indices[i+1]&((1<<21)-1)]
output[i+2] = input_extra[indices[i+2]&((1<<21)-1)]
output[i+3] = input_extra[indices[i+3]&((1<<21)-1)]
}
for i := loop4; i < N; i++ {
output[i] = input_extra[indices[i]&((1<<21)-1)]
}
// there is an issue above, if the last byte of input is 0x00, initialbytes are wrong, this fix may not be complete
if N > 3 && input[N-2] == 0 {
backup_byte := output[0]
output[0] = 0
for i := 1; i < N; i++ {
if output[i] != 0 {
output[i-1] = backup_byte
break
}
}
}
}