derohe-miniblock-mod/astrobwt/astrobwt_fast/astrobwt_optimized.go

package astrobwt_fast

import "unsafe"
import "hash"
import "sync"
import "crypto/rand"
import "encoding/binary"
import "golang.org/x/crypto/sha3"
import "golang.org/x/crypto/salsa20/salsa"

const stage1_length uint32 = 9973 // it is a prime

// see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md
// this optimized algorithm is used only  in the miner and not in the blockchain

type ScratchData struct {
	hasher              hash.Hash
	stage1              [stage1_length + 64]byte // 10 KB stages are taken from it
	stage1_result       *[stage1_length + 1]uint16
	stage1_result_bytes *[(stage1_length) * 2]uint8
	indices             [stage1_length + 1]uint32 // 40 KB
	tmp_indices         [stage1_length + 1]uint32 // 40 KB
}

var Pool = sync.Pool{New: func() interface{} {
	var d ScratchData
	d.hasher = sha3.New256()
	d.stage1_result = ((*[stage1_length + 1]uint16)(unsafe.Pointer(&d.indices[0])))
	d.stage1_result_bytes = ((*[(stage1_length) * 2]byte)(unsafe.Pointer(&d.indices[0])))

	return &d
}}

func POW_optimized(inputdata []byte, data *ScratchData) (outputhash [32]byte) {

	defer func() {
		if r := recover(); r != nil { // if something happens due to RAM issues in miner, we should continue, system will crash sooner or later
			var buf [16]byte
			rand.Read(buf[:])
			outputhash = sha3.Sum256(buf[:]) // return a falsified has which will fail the check
		}
	}()

	var key [32]byte
	for i := range data.stage1 {
		data.stage1[i] = 0
	}

	var counter [16]byte

	data.hasher.Reset()
	data.hasher.Write(inputdata)
	_ = data.hasher.Sum(key[:0])

	salsa.XORKeyStream(data.stage1[:stage1_length], data.stage1[:stage1_length], &counter, &key)
	sort_indices(stage1_length, data.stage1[:stage1_length+40], data.stage1_result[:], data) // extra 40 bytes since we may read them, but we never write them

	if LittleEndian {
		data.hasher.Reset()
		data.hasher.Write(data.stage1_result_bytes[:])
		_ = data.hasher.Sum(key[:0])
	} else {
		var s [stage1_length * 2]byte
		for i, c := range data.stage1_result {
			binary.LittleEndian.PutUint16(s[i<<1:], c)
		}
		data.hasher.Reset()
		data.hasher.Write(s[:])
		_ = data.hasher.Sum(key[:0])
	}

	copy(outputhash[:], key[:])
	return
}

func fix(v []byte, indices []uint32, i int) {
	prev_t := indices[i]
	t := indices[i+1]

	data_a := binary.BigEndian.Uint32(v[((t)&0xffff)+2:])
	if data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {
		t2 := prev_t
		j := i
		_ = indices[j+1]
		for {
			indices[j+1] = prev_t
			j--
			if j < 0 {
				break
			}
			prev_t = indices[j]
			if (t^prev_t) <= 0xffff && data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {
				continue
			} else {
				break
			}
		}
		indices[j+1] = t
		t = t2
	}
}

// basically
func sort_indices(N uint32, v []byte, output []uint16, d *ScratchData) {

	var byte_counters [2][256]byte
	var counters [2][256]uint16

	v[N] = 0 // make sure extra byte accessed is zero

	indices := d.indices[:]
	tmp_indices := d.tmp_indices[:]

	for _, c := range v[:N] {
		byte_counters[1][c]++
	}
	byte_counters[0] = byte_counters[1]
	byte_counters[0][v[0]]--

	counters[0][0] = uint16(byte_counters[0][0])
	counters[1][0] = uint16(byte_counters[1][0]) - 1

	c0 := counters[0][0]
	c1 := counters[1][0]

	for i := 1; i < 256; i++ {
		c0 += uint16(byte_counters[0][i])
		c1 += uint16(byte_counters[1][i])

		counters[0][i] = c0
		counters[1][i] = c1
	}

	counters0 := counters[0][:]
	for i := int(N); i >= 1; i-- {
		byte0 := uint32(v[i-1])
		byte1 := uint32(v[i]) // here we can access extra byte from input array so make sure its zero
		tmp_indices[counters0[v[i]]] = byte0<<24 | byte1<<16 | uint32(i-1)
		counters0[v[i]]--
	}

	counters1 := counters[1][:]
	_ = tmp_indices[N-1]
	for i := int(N - 1); i >= 0; i-- {
		data := tmp_indices[i]
		tmp := counters1[data>>24]
		counters1[data>>24]--
		indices[tmp] = data
	}

	for i := 1; i < int(N); i++ { // no BC here
		if indices[i-1]&0xffff0000 == indices[i]&0xffff0000 {
			fix(v, indices, i-1)
		}
	}

	// after fixing, convert indices to output
	_ = output[N]
	for i, c := range indices[:N] {
		output[i] = uint16(c)
	}
}
DERO-HE STARGATE Mainnet Release49 2022-03-10 11:15:46 +00:00			`package astrobwt_fast`

			`import "unsafe"`
			`import "hash"`
			`import "sync"`
			`import "crypto/rand"`
			`import "encoding/binary"`
			`import "golang.org/x/crypto/sha3"`
			`import "golang.org/x/crypto/salsa20/salsa"`

			`const stage1_length uint32 = 9973 // it is a prime`

			`// see here to improve the algorithms more https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md`
			`// this optimized algorithm is used only in the miner and not in the blockchain`

			`type ScratchData struct {`
			`hasher hash.Hash`
			`stage1 [stage1_length + 64]byte // 10 KB stages are taken from it`
			`stage1_result *[stage1_length + 1]uint16`
			`stage1_result_bytes [(stage1_length) 2]uint8`
			`indices [stage1_length + 1]uint32 // 40 KB`
			`tmp_indices [stage1_length + 1]uint32 // 40 KB`
			`}`

			`var Pool = sync.Pool{New: func() interface{} {`
			`var d ScratchData`
			`d.hasher = sha3.New256()`
			`d.stage1_result = ((*[stage1_length + 1]uint16)(unsafe.Pointer(&d.indices[0])))`
			`d.stage1_result_bytes = (([(stage1_length) 2]byte)(unsafe.Pointer(&d.indices[0])))`

			`return &d`
			`}}`

			`func POW_optimized(inputdata []byte, data *ScratchData) (outputhash [32]byte) {`

			`defer func() {`
			`if r := recover(); r != nil { // if something happens due to RAM issues in miner, we should continue, system will crash sooner or later`
			`var buf [16]byte`
			`rand.Read(buf[:])`
			`outputhash = sha3.Sum256(buf[:]) // return a falsified has which will fail the check`
			`}`
			`}()`

			`var key [32]byte`
			`for i := range data.stage1 {`
			`data.stage1[i] = 0`
			`}`

			`var counter [16]byte`

			`data.hasher.Reset()`
			`data.hasher.Write(inputdata)`
			`_ = data.hasher.Sum(key[:0])`

			`salsa.XORKeyStream(data.stage1[:stage1_length], data.stage1[:stage1_length], &counter, &key)`
			`sort_indices(stage1_length, data.stage1[:stage1_length+40], data.stage1_result[:], data) // extra 40 bytes since we may read them, but we never write them`

			`if LittleEndian {`
			`data.hasher.Reset()`
			`data.hasher.Write(data.stage1_result_bytes[:])`
			`_ = data.hasher.Sum(key[:0])`
			`} else {`
			`var s [stage1_length * 2]byte`
			`for i, c := range data.stage1_result {`
			`binary.LittleEndian.PutUint16(s[i<<1:], c)`
			`}`
			`data.hasher.Reset()`
			`data.hasher.Write(s[:])`
			`_ = data.hasher.Sum(key[:0])`
			`}`

			`copy(outputhash[:], key[:])`
			`return`
			`}`

			`func fix(v []byte, indices []uint32, i int) {`
			`prev_t := indices[i]`
			`t := indices[i+1]`

			`data_a := binary.BigEndian.Uint32(v[((t)&0xffff)+2:])`
			`if data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {`
			`t2 := prev_t`
			`j := i`
			`_ = indices[j+1]`
			`for {`
			`indices[j+1] = prev_t`
			`j--`
			`if j < 0 {`
			`break`
			`}`
			`prev_t = indices[j]`
			`if (t^prev_t) <= 0xffff && data_a < binary.BigEndian.Uint32(v[((prev_t)&0xffff)+2:]) {`
			`continue`
			`} else {`
			`break`
			`}`
			`}`
			`indices[j+1] = t`
			`t = t2`
			`}`
			`}`

			`// basically`
			`func sort_indices(N uint32, v []byte, output []uint16, d *ScratchData) {`

			`var byte_counters [2][256]byte`
			`var counters [2][256]uint16`

			`v[N] = 0 // make sure extra byte accessed is zero`

			`indices := d.indices[:]`
			`tmp_indices := d.tmp_indices[:]`

			`for _, c := range v[:N] {`
			`byte_counters[1][c]++`
			`}`
			`byte_counters[0] = byte_counters[1]`
			`byte_counters[0][v[0]]--`

			`counters[0][0] = uint16(byte_counters[0][0])`
			`counters[1][0] = uint16(byte_counters[1][0]) - 1`

			`c0 := counters[0][0]`
			`c1 := counters[1][0]`

			`for i := 1; i < 256; i++ {`
			`c0 += uint16(byte_counters[0][i])`
			`c1 += uint16(byte_counters[1][i])`

			`counters[0][i] = c0`
			`counters[1][i] = c1`
			`}`

			`counters0 := counters[0][:]`
			`for i := int(N); i >= 1; i-- {`
			`byte0 := uint32(v[i-1])`
			`byte1 := uint32(v[i]) // here we can access extra byte from input array so make sure its zero`
			`tmp_indices[counters0[v[i]]] = byte0<<24 \| byte1<<16 \| uint32(i-1)`
			`counters0[v[i]]--`
			`}`

			`counters1 := counters[1][:]`
			`_ = tmp_indices[N-1]`
			`for i := int(N - 1); i >= 0; i-- {`
			`data := tmp_indices[i]`
			`tmp := counters1[data>>24]`
			`counters1[data>>24]--`
			`indices[tmp] = data`
			`}`

			`for i := 1; i < int(N); i++ { // no BC here`
			`if indices[i-1]&0xffff0000 == indices[i]&0xffff0000 {`
			`fix(v, indices, i-1)`
			`}`
			`}`

			`// after fixing, convert indices to output`
			`_ = output[N]`
			`for i, c := range indices[:N] {`
			`output[i] = uint16(c)`
			`}`
			`}`