730 lines
19 KiB
Go

/*
* Copyright 2017 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package table
import (
"fmt"
"math/rand"
"os"
"sort"
"testing"
"github.com/dgraph-io/badger/options"
"github.com/dgraph-io/badger/y"
"github.com/stretchr/testify/require"
)
func key(prefix string, i int) string {
return prefix + fmt.Sprintf("%04d", i)
}
func buildTestTable(t *testing.T, prefix string, n int) *os.File {
y.AssertTrue(n <= 10000)
keyValues := make([][]string, n)
for i := 0; i < n; i++ {
k := key(prefix, i)
v := fmt.Sprintf("%d", i)
keyValues[i] = []string{k, v}
}
return buildTable(t, keyValues)
}
// keyValues is n by 2 where n is number of pairs.
func buildTable(t *testing.T, keyValues [][]string) *os.File {
b := NewTableBuilder()
defer b.Close()
// TODO: Add test for file garbage collection here. No files should be left after the tests here.
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}
sort.Slice(keyValues, func(i, j int) bool {
return keyValues[i][0] < keyValues[j][0]
})
for _, kv := range keyValues {
y.AssertTrue(len(kv) == 2)
err := b.Add(y.KeyWithTs([]byte(kv[0]), 0), y.ValueStruct{Value: []byte(kv[1]), Meta: 'A', UserMeta: 0})
if t != nil {
require.NoError(t, err)
} else {
y.Check(err)
}
}
f.Write(b.Finish())
f.Close()
f, _ = y.OpenSyncedFile(filename, true)
return f
}
func TestTableIterator(t *testing.T) {
for _, n := range []int{99, 100, 101} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
count := 0
for it.Rewind(); it.Valid(); it.Next() {
v := it.Value()
k := y.KeyWithTs([]byte(key("key", count)), 0)
require.EqualValues(t, k, it.Key())
require.EqualValues(t, fmt.Sprintf("%d", count), string(v.Value))
count++
}
require.Equal(t, count, n)
})
}
}
func TestSeekToFirst(t *testing.T) {
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
it.seekToFirst()
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, "0", string(v.Value))
require.EqualValues(t, 'A', v.Meta)
})
}
}
func TestSeekToLast(t *testing.T) {
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
it.seekToLast()
require.True(t, it.Valid())
v := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", n-1), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
it.prev()
require.True(t, it.Valid())
v = it.Value()
require.EqualValues(t, fmt.Sprintf("%d", n-2), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
})
}
}
func TestSeek(t *testing.T) {
f := buildTestTable(t, "k", 10000)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
var data = []struct {
in string
valid bool
out string
}{
{"abc", true, "k0000"},
{"k0100", true, "k0100"},
{"k0100b", true, "k0101"}, // Test case where we jump to next block.
{"k1234", true, "k1234"},
{"k1234b", true, "k1235"},
{"k9999", true, "k9999"},
{"z", false, ""},
}
for _, tt := range data {
it.seek(y.KeyWithTs([]byte(tt.in), 0))
if !tt.valid {
require.False(t, it.Valid())
continue
}
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, tt.out, string(y.ParseKey(k)))
}
}
func TestSeekForPrev(t *testing.T) {
f := buildTestTable(t, "k", 10000)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
it := table.NewIterator(false)
defer it.Close()
var data = []struct {
in string
valid bool
out string
}{
{"abc", false, ""},
{"k0100", true, "k0100"},
{"k0100b", true, "k0100"}, // Test case where we jump to next block.
{"k1234", true, "k1234"},
{"k1234b", true, "k1234"},
{"k9999", true, "k9999"},
{"z", true, "k9999"},
}
for _, tt := range data {
it.seekForPrev(y.KeyWithTs([]byte(tt.in), 0))
if !tt.valid {
require.False(t, it.Valid())
continue
}
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, tt.out, string(y.ParseKey(k)))
}
}
func TestIterateFromStart(t *testing.T) {
// Vary the number of elements added.
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
ti := table.NewIterator(false)
defer ti.Close()
ti.reset()
ti.seekToFirst()
require.True(t, ti.Valid())
// No need to do a Next.
// ti.Seek brings us to the first key >= "". Essentially a SeekToFirst.
var count int
for ; ti.Valid(); ti.next() {
v := ti.Value()
require.EqualValues(t, fmt.Sprintf("%d", count), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
count++
}
require.EqualValues(t, n, count)
})
}
}
func TestIterateFromEnd(t *testing.T) {
// Vary the number of elements added.
for _, n := range []int{99, 100, 101, 199, 200, 250, 9999, 10000} {
t.Run(fmt.Sprintf("n=%d", n), func(t *testing.T) {
f := buildTestTable(t, "key", n)
table, err := OpenTable(f, options.FileIO)
require.NoError(t, err)
defer table.DecrRef()
ti := table.NewIterator(false)
defer ti.Close()
ti.reset()
ti.seek(y.KeyWithTs([]byte("zzzzzz"), 0)) // Seek to end, an invalid element.
require.False(t, ti.Valid())
for i := n - 1; i >= 0; i-- {
ti.prev()
require.True(t, ti.Valid())
v := ti.Value()
require.EqualValues(t, fmt.Sprintf("%d", i), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
}
ti.prev()
require.False(t, ti.Valid())
})
}
}
func TestTable(t *testing.T) {
f := buildTestTable(t, "key", 10000)
table, err := OpenTable(f, options.FileIO)
require.NoError(t, err)
defer table.DecrRef()
ti := table.NewIterator(false)
defer ti.Close()
kid := 1010
seek := y.KeyWithTs([]byte(key("key", kid)), 0)
for ti.seek(seek); ti.Valid(); ti.next() {
k := ti.Key()
require.EqualValues(t, string(y.ParseKey(k)), key("key", kid))
kid++
}
if kid != 10000 {
t.Errorf("Expected kid: 10000. Got: %v", kid)
}
ti.seek(y.KeyWithTs([]byte(key("key", 99999)), 0))
require.False(t, ti.Valid())
ti.seek(y.KeyWithTs([]byte(key("key", -1)), 0))
require.True(t, ti.Valid())
k := ti.Key()
require.EqualValues(t, string(y.ParseKey(k)), key("key", 0))
}
func TestIterateBackAndForth(t *testing.T) {
f := buildTestTable(t, "key", 10000)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
seek := y.KeyWithTs([]byte(key("key", 1010)), 0)
it := table.NewIterator(false)
defer it.Close()
it.seek(seek)
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, seek, k)
it.prev()
it.prev()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 1008), string(y.ParseKey(k)))
it.next()
it.next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 1010), y.ParseKey(k))
it.seek(y.KeyWithTs([]byte(key("key", 2000)), 0))
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 2000), y.ParseKey(k))
it.prev()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, key("key", 1999), y.ParseKey(k))
it.seekToFirst()
k = it.Key()
require.EqualValues(t, key("key", 0), y.ParseKey(k))
}
func TestUniIterator(t *testing.T) {
f := buildTestTable(t, "key", 10000)
table, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer table.DecrRef()
{
it := table.NewIterator(false)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
v := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", count), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
count++
}
require.EqualValues(t, 10000, count)
}
{
it := table.NewIterator(true)
defer it.Close()
var count int
for it.Rewind(); it.Valid(); it.Next() {
v := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", 10000-1-count), string(v.Value))
require.EqualValues(t, 'A', v.Meta)
count++
}
require.EqualValues(t, 10000, count)
}
}
// Try having only one table.
func TestConcatIteratorOneTable(t *testing.T) {
f := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
tbl, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer tbl.DecrRef()
it := NewConcatIterator([]*Table{tbl}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
}
func TestConcatIterator(t *testing.T) {
f := buildTestTable(t, "keya", 10000)
f2 := buildTestTable(t, "keyb", 10000)
f3 := buildTestTable(t, "keyc", 10000)
tbl, err := OpenTable(f, options.MemoryMap)
require.NoError(t, err)
defer tbl.DecrRef()
tbl2, err := OpenTable(f2, options.LoadToRAM)
require.NoError(t, err)
defer tbl2.DecrRef()
tbl3, err := OpenTable(f3, options.LoadToRAM)
require.NoError(t, err)
defer tbl3.DecrRef()
{
it := NewConcatIterator([]*Table{tbl, tbl2, tbl3}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
var count int
for ; it.Valid(); it.Next() {
vs := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", count%10000), string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
count++
}
require.EqualValues(t, 30000, count)
it.Seek(y.KeyWithTs([]byte("a"), 0))
require.EqualValues(t, "keya0000", string(y.ParseKey(it.Key())))
vs := it.Value()
require.EqualValues(t, "0", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyb"), 0))
require.EqualValues(t, "keyb0000", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "0", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyb9999b"), 0))
require.EqualValues(t, "keyc0000", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "0", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyd"), 0))
require.False(t, it.Valid())
}
{
it := NewConcatIterator([]*Table{tbl, tbl2, tbl3}, true)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
var count int
for ; it.Valid(); it.Next() {
vs := it.Value()
require.EqualValues(t, fmt.Sprintf("%d", 10000-(count%10000)-1), string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
count++
}
require.EqualValues(t, 30000, count)
it.Seek(y.KeyWithTs([]byte("a"), 0))
require.False(t, it.Valid())
it.Seek(y.KeyWithTs([]byte("keyb"), 0))
require.EqualValues(t, "keya9999", string(y.ParseKey(it.Key())))
vs := it.Value()
require.EqualValues(t, "9999", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyb9999b"), 0))
require.EqualValues(t, "keyb9999", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "9999", string(vs.Value))
it.Seek(y.KeyWithTs([]byte("keyd"), 0))
require.EqualValues(t, "keyc9999", string(y.ParseKey(it.Key())))
vs = it.Value()
require.EqualValues(t, "9999", string(vs.Value))
}
}
func TestMergingIterator(t *testing.T) {
f1 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
f2 := buildTable(t, [][]string{
{"k1", "b1"},
{"k2", "b2"},
})
tbl1, err := OpenTable(f1, options.LoadToRAM)
require.NoError(t, err)
defer tbl1.DecrRef()
tbl2, err := OpenTable(f2, options.LoadToRAM)
require.NoError(t, err)
defer tbl2.DecrRef()
it1 := tbl1.NewIterator(false)
it2 := NewConcatIterator([]*Table{tbl2}, false)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
func TestMergingIteratorReversed(t *testing.T) {
f1 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
f2 := buildTable(t, [][]string{
{"k1", "b1"},
{"k2", "b2"},
})
tbl1, err := OpenTable(f1, options.LoadToRAM)
require.NoError(t, err)
defer tbl1.DecrRef()
tbl2, err := OpenTable(f2, options.LoadToRAM)
require.NoError(t, err)
defer tbl2.DecrRef()
it1 := tbl1.NewIterator(true)
it2 := NewConcatIterator([]*Table{tbl2}, true)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, true)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
// Take only the first iterator.
func TestMergingIteratorTakeOne(t *testing.T) {
f1 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
f2 := buildTable(t, [][]string{})
t1, err := OpenTable(f1, options.LoadToRAM)
require.NoError(t, err)
defer t1.DecrRef()
t2, err := OpenTable(f2, options.LoadToRAM)
require.NoError(t, err)
defer t2.DecrRef()
it1 := NewConcatIterator([]*Table{t1}, false)
it2 := NewConcatIterator([]*Table{t2}, false)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
// Take only the second iterator.
func TestMergingIteratorTakeTwo(t *testing.T) {
f1 := buildTable(t, [][]string{})
f2 := buildTable(t, [][]string{
{"k1", "a1"},
{"k2", "a2"},
})
t1, err := OpenTable(f1, options.LoadToRAM)
require.NoError(t, err)
defer t1.DecrRef()
t2, err := OpenTable(f2, options.LoadToRAM)
require.NoError(t, err)
defer t2.DecrRef()
it1 := NewConcatIterator([]*Table{t1}, false)
it2 := NewConcatIterator([]*Table{t2}, false)
it := y.NewMergeIterator([]y.Iterator{it1, it2}, false)
defer it.Close()
it.Rewind()
require.True(t, it.Valid())
k := it.Key()
require.EqualValues(t, "k1", string(y.ParseKey(k)))
vs := it.Value()
require.EqualValues(t, "a1", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.True(t, it.Valid())
k = it.Key()
require.EqualValues(t, "k2", string(y.ParseKey(k)))
vs = it.Value()
require.EqualValues(t, "a2", string(vs.Value))
require.EqualValues(t, 'A', vs.Meta)
it.Next()
require.False(t, it.Valid())
}
func BenchmarkRead(b *testing.B) {
n := 5 << 20
builder := NewTableBuilder()
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
y.Check(err)
for i := 0; i < n; i++ {
k := fmt.Sprintf("%016x", i)
v := fmt.Sprintf("%d", i)
y.Check(builder.Add([]byte(k), y.ValueStruct{Value: []byte(v), Meta: 123, UserMeta: 0}))
}
f.Write(builder.Finish())
tbl, err := OpenTable(f, options.MemoryMap)
y.Check(err)
defer tbl.DecrRef()
// y.Printf("Size of table: %d\n", tbl.Size())
b.ResetTimer()
// Iterate b.N times over the entire table.
for i := 0; i < b.N; i++ {
func() {
it := tbl.NewIterator(false)
defer it.Close()
for it.seekToFirst(); it.Valid(); it.next() {
}
}()
}
}
func BenchmarkReadAndBuild(b *testing.B) {
n := 5 << 20
builder := NewTableBuilder()
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
y.Check(err)
for i := 0; i < n; i++ {
k := fmt.Sprintf("%016x", i)
v := fmt.Sprintf("%d", i)
y.Check(builder.Add([]byte(k), y.ValueStruct{Value: []byte(v), Meta: 123, UserMeta: 0}))
}
f.Write(builder.Finish())
tbl, err := OpenTable(f, options.MemoryMap)
y.Check(err)
defer tbl.DecrRef()
// y.Printf("Size of table: %d\n", tbl.Size())
b.ResetTimer()
// Iterate b.N times over the entire table.
for i := 0; i < b.N; i++ {
func() {
newBuilder := NewTableBuilder()
it := tbl.NewIterator(false)
defer it.Close()
for it.seekToFirst(); it.Valid(); it.next() {
vs := it.Value()
newBuilder.Add(it.Key(), vs)
}
newBuilder.Finish()
}()
}
}
func BenchmarkReadMerged(b *testing.B) {
n := 5 << 20
m := 5 // Number of tables.
y.AssertTrue((n % m) == 0)
tableSize := n / m
var tables []*Table
for i := 0; i < m; i++ {
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
builder := NewTableBuilder()
f, err := y.OpenSyncedFile(filename, true)
y.Check(err)
for j := 0; j < tableSize; j++ {
id := j*m + i // Arrays are interleaved.
// id := i*tableSize+j (not interleaved)
k := fmt.Sprintf("%016x", id)
v := fmt.Sprintf("%d", id)
y.Check(builder.Add([]byte(k), y.ValueStruct{Value: []byte(v), Meta: 123, UserMeta: 0}))
}
f.Write(builder.Finish())
tbl, err := OpenTable(f, options.MemoryMap)
y.Check(err)
tables = append(tables, tbl)
defer tbl.DecrRef()
}
b.ResetTimer()
// Iterate b.N times over the entire table.
for i := 0; i < b.N; i++ {
func() {
var iters []y.Iterator
for _, tbl := range tables {
iters = append(iters, tbl.NewIterator(false))
}
it := y.NewMergeIterator(iters, false)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
}
}()
}
}