From a379a0c7f6b92ad9683d3a2f6ca6041b506327a2 Mon Sep 17 00:00:00 2001 From: Jidian Shan Date: Tue, 26 Nov 2024 10:46:04 +0800 Subject: [PATCH] add new compare function according to Java version's compare funtion(oNeilCompare), only for number which is >= 0 --- BitSliceIndexing/bsi.go | 175 ++++++++++++++++++++++++++++ BitSliceIndexing/bsi_test.go | 74 ++++++++++++ roaring64/bsi64.go | 216 ++++++++++++++++++++++++++++++++++- roaring64/bsi64_test.go | 81 +++++++++++++ 4 files changed, 545 insertions(+), 1 deletion(-) diff --git a/BitSliceIndexing/bsi.go b/BitSliceIndexing/bsi.go index 5487edbf..22247806 100644 --- a/BitSliceIndexing/bsi.go +++ b/BitSliceIndexing/bsi.go @@ -868,3 +868,178 @@ func (b *BSI) Increment(foundSet *roaring.Bitmap) { func (b *BSI) IncrementAll() { b.Increment(b.GetExistenceBitmap()) } + + +// CompareValueONeil compares value. +func (b *BSI) CompareValueONeil(op Operation, valueOrStart, end int64, + foundSet *roaring.Bitmap) *roaring.Bitmap { + + minValue := b.minValue() + maxValue := b.maxValue() + + all := roaring.New() + all.Or(b.eBM) + if foundSet != nil { + all.And(foundSet) + } + + result := b.compareUsingMinMax(op, valueOrStart, end, all, minValue, maxValue) + if result != nil { + return result + } + switch op { + case EQ: + return b.oNeilCompare(EQ, valueOrStart, foundSet) + case GE: + return b.oNeilCompare(GE, valueOrStart, foundSet) + case GT: + return b.oNeilCompare(GT, valueOrStart, foundSet) + case LT: + return b.oNeilCompare(LT, valueOrStart, foundSet) + case LE: + return b.oNeilCompare(LE, valueOrStart, foundSet) + case RANGE: + if valueOrStart < minValue { + valueOrStart = minValue + } + if end > maxValue { + end = maxValue + } + left := b.oNeilCompare(GE, valueOrStart, foundSet) + right := b.oNeilCompare(LE, end, foundSet) + return roaring.And(left, right) + default: + return nil + } +} + +func (b *BSI) compareUsingMinMax(operation Operation, valueOrStart, end int64, all *roaring.Bitmap, minValue, maxValue int64) *roaring.Bitmap { + empty := roaring.New() + switch operation { + case LT: + if valueOrStart > maxValue { + return all + } else if valueOrStart <= minValue { + return empty + } + break + case LE: + if valueOrStart >= maxValue { + return all + } else if valueOrStart < minValue { + return empty + } + break + case GT: + if valueOrStart < minValue { + return all + } else if valueOrStart >= maxValue { + return empty + } + break + case GE: + if valueOrStart <= minValue { + return all + } else if valueOrStart > maxValue { + return empty + } + break + case EQ: + if minValue == maxValue && minValue == valueOrStart { + return all + } else if valueOrStart < minValue || valueOrStart > maxValue { + return empty + } + break + case RANGE: + if valueOrStart <= minValue && end>=maxValue { + return all + } else if valueOrStart > maxValue || end < minValue { + return empty + } + break + default: + return nil + } + return nil +} + +func (b *BSI) oNeilCompare(operation Operation, predicate int64, foundSet *roaring.Bitmap) *roaring.Bitmap { + fixedFoundSet := foundSet + if foundSet == nil { + fixedFoundSet = b.eBM + } + GTB := roaring.New() + LTB := roaring.New() + EQB := roaring.New() + EQB.Or(b.eBM) + + for i := b.BitCount() - 1; i >= 0; i-- { + bit := (predicate >> i) & 1 + if bit == 1 { + LTB.Or(roaring.AndNot(EQB, b.bA[i])) + EQB.And(b.bA[i]) + } else { + GTB.Or(roaring.And(EQB, b.bA[i])) + EQB.AndNot(b.bA[i]) + } + } + + EQB.And(fixedFoundSet) + switch operation { + case EQ: + return EQB + case GT: + return roaring.And(GTB, fixedFoundSet) + case LT: + return roaring.And(LTB, fixedFoundSet) + case LE: + LTB.Or(EQB) + return roaring.And(LTB, fixedFoundSet) + case GE: + GTB.Or(EQB) + return roaring.And(GTB, fixedFoundSet) + } + return nil +} + +// minValue get min value from bsi, cost less time than MinMax +func (b *BSI) minValue() int64 { + if b.eBM.IsEmpty() { + return 0 + } + minValueId := roaring.New() + minValueId.Or(b.eBM) + for i := len(b.bA) - 1; i >= 0; i-- { + tmp := roaring.AndNot(minValueId, b.bA[i]) + if !tmp.IsEmpty() { + minValueId = tmp + } + } + return b.valueAt(minValueId.Minimum()) +} + +func (b *BSI) maxValue() int64 { + if b.eBM.IsEmpty() { + return 0 + } + maxValueId := roaring.New() + maxValueId.Or(b.eBM) + for i := len(b.bA) - 1; i >= 0; i-- { + tmp := roaring.And(maxValueId, b.bA[i]) + if !tmp.IsEmpty() { + maxValueId = tmp + } + } + return b.valueAt(maxValueId.Minimum()) +} + +func (b *BSI) valueAt(columnId uint32) int64 { + value := int64(0) + for i := 0; i < len(b.bA); i++ { + if b.bA[i].Contains(columnId) { + value |= 1 << i + } + } + return value +} \ No newline at end of file diff --git a/BitSliceIndexing/bsi_test.go b/BitSliceIndexing/bsi_test.go index a28a5d4e..668c55ba 100644 --- a/BitSliceIndexing/bsi_test.go +++ b/BitSliceIndexing/bsi_test.go @@ -500,3 +500,77 @@ func TestTransposeWithCountsNil(t *testing.T) { assert.True(t, ok) assert.Equal(t, int64(2), a) } + +func TestEQONeil(t *testing.T) { + bsi := setup() + eq := bsi.CompareValue(0, EQ, 50, 0, nil) + assert.Equal(t, uint64(1), eq.GetCardinality()) + + assert.True(t, eq.ContainsInt(50)) +} + +func TestLTONeil(t *testing.T) { + + bsi := setup() + lt := bsi.CompareValue(0, LT, 50, 0, nil) + assert.Equal(t, uint64(50), lt.GetCardinality()) + + i := lt.Iterator() + for i.HasNext() { + v := i.Next() + assert.Less(t, uint64(v), uint64(50)) + } +} + +func TestGTONeil(t *testing.T) { + + bsi := setup() + gt := bsi.CompareValue(0, GT, 50, 0, nil) + assert.Equal(t, uint64(49), gt.GetCardinality()) + + i := gt.Iterator() + for i.HasNext() { + v := i.Next() + assert.Greater(t, uint64(v), uint64(50)) + } +} + +func TestGEONeil(t *testing.T) { + + bsi := setup() + ge := bsi.CompareValue(0, GE, 50, 0, nil) + assert.Equal(t, uint64(50), ge.GetCardinality()) + + i := ge.Iterator() + for i.HasNext() { + v := i.Next() + assert.GreaterOrEqual(t, uint64(v), uint64(50)) + } +} + +func TestLEONeil(t *testing.T) { + + bsi := setup() + le := bsi.CompareValue(0, LE, 50, 0, nil) + assert.Equal(t, uint64(51), le.GetCardinality()) + + i := le.Iterator() + for i.HasNext() { + v := i.Next() + assert.LessOrEqual(t, uint64(v), uint64(50)) + } +} + +func TestRangeONeil(t *testing.T) { + + bsi := setup() + set := bsi.CompareValue(0, RANGE, 45, 55, nil) + assert.Equal(t, uint64(11), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + v := i.Next() + assert.GreaterOrEqual(t, uint64(v), uint64(45)) + assert.LessOrEqual(t, uint64(v), uint64(55)) + } +} \ No newline at end of file diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 46dbe121..1ff4f7e3 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -738,7 +738,7 @@ func (b *BSI) ParOr(parallelism int, bsis ...*BSI) { bits := len(b.bA) for i := 0; i < len(bsis); i++ { if len(bsis[i].bA) > bits { - bits = len(bsis[i].bA ) + bits = len(bsis[i].bA) } } @@ -1104,3 +1104,217 @@ func (b *BSI) GetSizeInBytes() int { } return int(size) } + +// CompareValueONeil compares value. +// BSI Compare using single thread +// this Function compose algorithm from O'Neil and Owen Kaser +// the GE algorithm is from Owen since the performance is better. others are from O'Neil +// Only for param (startOrValue and end) >= 0 +// @param operation +// @param startOrValue the start or value of comparison, when the comparison operation is range, it's start, +// when others,it's value. +// @param end the end value of comparison. when the comparison operation is not range,the end = 0 +// @param foundSet columnId set we want compare,using RoaringBitmap to express +// @return columnId set we found in this bsi with giving conditions, using RoaringBitmap to express +func (b *BSI) CompareValueONeil(op Operation, valueOrStart, end int64, + foundSet *Bitmap) *Bitmap { + return b.CompareBigValueONeil(op, big.NewInt(valueOrStart), big.NewInt(end), foundSet) +} + +// CompareBigValueONeil compares value. +func (b *BSI) CompareBigValueONeil(op Operation, valueOrStart, end *big.Int, + foundSet *Bitmap) *Bitmap { + + minValue := big.NewInt(b.minValue()) + maxValue := big.NewInt(b.maxValue()) + if valueOrStart == nil { + valueOrStart = minValue + } + if end == nil && op == RANGE { + end = maxValue + } + + all := New() + all.Or(&b.eBM) + if foundSet != nil { + all.And(foundSet) + } + + result := b.compareUsingMinMax(op, valueOrStart, end, all, minValue, maxValue) + if result != nil { + return result + } + switch op { + case EQ: + return b.oNeilCompare(EQ, valueOrStart, foundSet) + case GE: + return b.oNeilCompare(GE, valueOrStart, foundSet) + case GT: + return b.oNeilCompare(GT, valueOrStart, foundSet) + case LT: + return b.oNeilCompare(LT, valueOrStart, foundSet) + case LE: + return b.oNeilCompare(LE, valueOrStart, foundSet) + case RANGE: + if valueOrStart.Cmp(minValue) == -1 { + valueOrStart = minValue + } + if end.Cmp(maxValue) == 1 { + end = maxValue + } + left := b.oNeilCompare(GE, valueOrStart, foundSet) + right := b.oNeilCompare(LE, end, foundSet) + return And(left, right) + default: + return nil + } +} + +func (b *BSI) compareUsingMinMax(operation Operation, valueOrStart, end *big.Int, all *Bitmap, minValue, maxValue *big.Int) *Bitmap { + empty := New() + switch operation { + case LT: + if valueOrStart.Cmp(maxValue) == 1 { + return all + } else if !(valueOrStart.Cmp(minValue) == 1) { + return empty + } + break + case LE: + if !(valueOrStart.Cmp(maxValue) == -1) { + return all + } else if valueOrStart.Cmp(minValue) == -1 { + return empty + } + break + case GT: + if valueOrStart.Cmp(minValue) == -1 { + return all + } else if !(valueOrStart.Cmp(maxValue) == -1) { + return empty + } + break + case GE: + if !(valueOrStart.Cmp(minValue) == 1) { + return all + } else if valueOrStart.Cmp(maxValue) == 1 { + return empty + } + break + case EQ: + if minValue == maxValue && minValue == valueOrStart { + return all + } else if valueOrStart.Cmp(minValue) == -1 || valueOrStart.Cmp(maxValue) == 1 { + return empty + } + break + case RANGE: + if !(valueOrStart.Cmp(minValue) == 1) && !(end.Cmp(maxValue) == -1) { + return all + } else if valueOrStart.Cmp(maxValue) == 1 || end.Cmp(minValue) == -1 { + return empty + } + break + default: + return nil + } + return nil +} + +func (b *BSI) oNeilCompare(operation Operation, predicate *big.Int, foundSet *Bitmap) *Bitmap { + fixedFoundSet := foundSet + if foundSet == nil { + fixedFoundSet = &b.eBM + } + GTB := New() + LTB := New() + EQB := New() + EQB.Or(&b.eBM) + + for i := b.BitCount() - 1; i >= 0; i-- { + bit := predicate.Bit(i) + if bit == 1 { + LTB.Or(AndNot(EQB, &b.bA[i])) + EQB.And(&b.bA[i]) + } else { + GTB.Or(And(EQB, &b.bA[i])) + EQB.AndNot(&b.bA[i]) + } + } + + EQB.And(fixedFoundSet) + switch operation { + case EQ: + return EQB + case GT: + return And(GTB, fixedFoundSet) + case LT: + return And(LTB, fixedFoundSet) + case LE: + LTB.Or(EQB) + return And(LTB, fixedFoundSet) + case GE: + GTB.Or(EQB) + return And(GTB, fixedFoundSet) + } + return nil +} + +// minValue get min value from bsi, cost less time than MinMax +func (b *BSI) minValue() int64 { + if b.eBM.IsEmpty() { + return 0 + } + return b.minBigValue().Int64() +} + +func (b *BSI) minBigValue() *big.Int { + if b.eBM.IsEmpty() { + return big.NewInt(0) + } + minValueId := New() + minValueId.Or(&b.eBM) + for i := len(b.bA) - 1; i >= 0; i-- { + tmp := AndNot(minValueId, &b.bA[i]) + if !tmp.IsEmpty() { + minValueId = tmp + } + } + return big.NewInt(b.valueAt(minValueId.Minimum())) +} + +func (b *BSI) maxValue() int64 { + if b.eBM.IsEmpty() { + return 0 + } + return b.maxBigValue().Int64() +} + +func (b *BSI) maxBigValue() *big.Int { + if b.eBM.IsEmpty() { + return big.NewInt(0) + } + maxValueId := New() + maxValueId.Or(&b.eBM) + for i := len(b.bA) - 1; i >= 0; i-- { + tmp := And(maxValueId, &b.bA[i]) + if !tmp.IsEmpty() { + maxValueId = tmp + } + } + return big.NewInt(b.valueAt(maxValueId.Minimum())) +} + +func (b *BSI) valueAt(columnId uint64) int64 { + return b.bigValueAt(columnId).Int64() +} + +func (b *BSI) bigValueAt(columnId uint64) *big.Int { + value := big.NewInt(0) + for i := 0; i < len(b.bA); i++ { + if b.bA[i].Contains(columnId) { + value.SetBit(value, i, 1) + } + } + return value +} diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index 238b538e..a9f8c5d9 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -822,3 +822,84 @@ func TestRangeNilBig(t *testing.T) { tmpAll := bsi.CompareBigValue(0, RANGE, bsi.MinMaxBig(0, MIN, nil), bsi.MinMaxBig(0, MAX, nil), nil) assert.Equal(t, tmpAll.GetCardinality(), setAll.GetCardinality()) } + +func TestEQONeil(t *testing.T) { + + bsi := setup() + eq := bsi.CompareValueONeil( EQ, 50, 0, nil) + assert.Equal(t, uint64(1), eq.GetCardinality()) + + assert.True(t, eq.ContainsInt(50)) +} + +func TestLTONeil(t *testing.T) { + + bsi := setup() + lt := bsi.CompareValueONeil(LT, 50, 0, nil) + assert.Equal(t, uint64(50), lt.GetCardinality()) + + i := lt.Iterator() + for i.HasNext() { + v := i.Next() + assert.Less(t, uint64(v), uint64(50)) + } +} + +func TestGTONeil(t *testing.T) { + + bsi := setup() + gt := bsi.CompareValueONeil( GT, 50, 0, nil) + assert.Equal(t, uint64(50), gt.GetCardinality()) + + i := gt.Iterator() + for i.HasNext() { + v := i.Next() + assert.Greater(t, uint64(v), uint64(50)) + } +} + +func TestGEONeil(t *testing.T) { + + bsi := setup() + ge := bsi.CompareValueONeil(GE, 50, 0, nil) + assert.Equal(t, uint64(51), ge.GetCardinality()) + + i := ge.Iterator() + for i.HasNext() { + v := i.Next() + assert.GreaterOrEqual(t, uint64(v), uint64(50)) + } +} + +func TestLEONeil(t *testing.T) { + + bsi := setup() + le := bsi.CompareValueONeil( LE, 50, 0, nil) + assert.Equal(t, uint64(51), le.GetCardinality()) + + i := le.Iterator() + for i.HasNext() { + v := i.Next() + assert.LessOrEqual(t, uint64(v), uint64(50)) + } +} + +func TestRangeSimpleONeil(t *testing.T) { + + bsi := setup() + set := bsi.CompareValueONeil( RANGE, 45, 55, nil) + assert.Equal(t, uint64(11), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + v := i.Next() + assert.GreaterOrEqual(t, uint64(v), uint64(45)) + assert.LessOrEqual(t, uint64(v), uint64(55)) + } +} + +func TestRangeBigONeil(t *testing.T) { + bsi := setup() + set := bsi.CompareValueONeil( RANGE, 0, 103, nil) + assert.Equal(t, uint64(101), set.GetCardinality()) +} \ No newline at end of file