Skip to content

Commit 367149b

Browse files
committed
Add (*Set).FromJSON benchmarks and switch to post-accumulation sort.
Set deserialization with children in reverse order performs an insertion sort. Sets serialized by structured-merge-diff are sorted, but in practice the serialization of sets is controlled by clients and they can (intentionally or not) exercise this worst case. Switching to a bulk insert followed by a sort appears to perform no worse in the best case (sorted) and is significantly better in the worst case: │ master │ bulk │ │ sec/op │ sec/op vs base │ SetFromJSON/children_in_ascending_order 1.286m ± 3% 1.341m ± 1% +4.27% (p=0.000 n=10) SetFromJSON/children_in_descending_order 20.189m ± 0% 1.385m ± 1% -93.14% (p=0.000 n=10) SetFromJSON/grandchildren_in_ascending_order 7.821m ± 1% 8.211m ± 1% +4.98% (p=0.000 n=10) SetFromJSON/grandchildren_in_descending_order 75.16m ± 4% 13.31m ± 13% -82.28% (p=0.000 n=10) geomean 11.11m 3.774m -66.04% │ master │ bulk │ │ B/op │ B/op vs base │ SetFromJSON/children_in_ascending_order 1.320Mi ± 0% 1.320Mi ± 0% ~ (p=1.000 n=10) SetFromJSON/children_in_descending_order 1.320Mi ± 0% 1.320Mi ± 0% +0.00% (p=0.000 n=10) SetFromJSON/grandchildren_in_ascending_order 5.395Mi ± 0% 5.395Mi ± 0% +0.00% (p=0.000 n=10) SetFromJSON/grandchildren_in_descending_order 6.187Mi ± 0% 5.395Mi ± 0% -12.80% (p=0.000 n=10) geomean 2.761Mi 2.668Mi -3.37% │ master │ bulk │ │ allocs/op │ allocs/op vs base │ SetFromJSON/children_in_ascending_order 25.99k ± 0% 25.99k ± 0% ~ (p=1.000 n=10) ¹ SetFromJSON/children_in_descending_order 25.99k ± 0% 25.99k ± 0% ~ (p=1.000 n=10) ¹ SetFromJSON/grandchildren_in_ascending_order 138.4k ± 0% 138.4k ± 0% ~ (p=1.000 n=10) ¹ SetFromJSON/grandchildren_in_descending_order 155.7k ± 0% 138.4k ± 0% -11.11% (p=0.000 n=10) geomean 61.77k 59.98k -2.90% ¹ all samples are equal
1 parent 3392408 commit 367149b

File tree

6 files changed

+82
-18
lines changed

6 files changed

+82
-18
lines changed

fieldpath/serialize.go

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package fieldpath
1919
import (
2020
"bytes"
2121
"io"
22+
"slices"
2223
"unsafe"
2324

2425
jsoniter "github.com/json-iterator/go"
@@ -204,34 +205,40 @@ func readIterV1(iter *jsoniter.Iterator) (children *Set, isMember bool) {
204205
if children == nil {
205206
children = &Set{}
206207
}
207-
m := &children.Members.members
208-
// Since we expect that most of the time these will have been
209-
// serialized in the right order, we just verify that and append.
210-
appendOK := len(*m) == 0 || (*m)[len(*m)-1].Less(pe)
211-
if appendOK {
212-
*m = append(*m, pe)
213-
} else {
214-
children.Members.Insert(pe)
215-
}
208+
children.Members.members = append(children.Members.members, pe)
216209
}
217210
if grandchildren != nil {
218211
if children == nil {
219212
children = &Set{}
220213
}
221-
// Since we expect that most of the time these will have been
222-
// serialized in the right order, we just verify that and append.
223-
m := &children.Children.members
224-
appendOK := len(*m) == 0 || (*m)[len(*m)-1].pathElement.Less(pe)
225-
if appendOK {
226-
*m = append(*m, setNode{pe, grandchildren})
227-
} else {
228-
*children.Children.Descend(pe) = *grandchildren
229-
}
214+
children.Children.members = append(children.Children.members, setNode{pe, grandchildren})
230215
}
231216
return true
232217
})
233218
if children == nil {
234219
isMember = true
220+
} else {
221+
slices.SortFunc(children.Members.members, func(a, b PathElement) int {
222+
return a.Compare(b)
223+
})
224+
children.Members.members = slices.CompactFunc(children.Members.members, func(a, b PathElement) bool {
225+
return a.Equals(b)
226+
})
227+
if children.Children.members != nil {
228+
slices.SortStableFunc(children.Children.members, func(a, b setNode) int {
229+
return a.pathElement.Compare(b.pathElement)
230+
})
231+
// Compact duplicates, keeping the last occurrence to preserve historical behavior:
232+
var dst int
233+
for src := 1; src < len(children.Children.members); src++ {
234+
if !children.Children.members[dst].pathElement.Equals(children.Children.members[src].pathElement) {
235+
dst++
236+
}
237+
children.Children.members[dst] = children.Children.members[src]
238+
}
239+
clear(children.Children.members[dst+1:])
240+
children.Children.members = children.Children.members[:dst+1]
241+
}
235242
}
236243

237244
return children, isMember

fieldpath/serialize_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package fieldpath_test
1919
import (
2020
"bytes"
2121
"fmt"
22+
"os"
2223
"strings"
2324
"testing"
2425

@@ -330,3 +331,55 @@ func TestDropUnknown(t *testing.T) {
330331
t.Errorf("Failed;\ngot: %s\nwant: %s\n", b, expect)
331332
}
332333
}
334+
335+
func TestDuplicateMembers(t *testing.T) {
336+
s := NewSet()
337+
if err := s.FromJSON(strings.NewReader(`{"f:":{},"f:":{}}`)); err != nil {
338+
t.Fatal(err)
339+
}
340+
if size := s.Members.Size(); size != 1 {
341+
t.Errorf("want 1 member, got %d", size)
342+
}
343+
}
344+
345+
func TestDuplicateChildren(t *testing.T) {
346+
d := NewSet()
347+
if err := d.FromJSON(strings.NewReader(`{"f:":{"f:":{".":{},"f:1":{}}},"f:":{"f:":{".":{},"f:2":{}}}}`)); err != nil {
348+
t.Fatal(err)
349+
}
350+
s := NewSet()
351+
if err := s.FromJSON(strings.NewReader(`{"f:":{"f:":{".":{},"f:2":{}}}}`)); err != nil {
352+
t.Fatal(err)
353+
}
354+
if !d.Equals(s) {
355+
// The last occurrence of the duplicated child set is preserved.
356+
t.Fatalf("want:\n%s\ngot:\n%s", s, d)
357+
}
358+
}
359+
360+
func BenchmarkSetFromJSON(b *testing.B) {
361+
for _, tc := range []struct {
362+
name string
363+
fixture string
364+
}{
365+
{"children in ascending order", "testdata/set_ascending.json"},
366+
{"children in descending order", "testdata/set_descending.json"},
367+
{"grandchildren in ascending order", "testdata/set_ascending_grandchildren.json"},
368+
{"grandchildren in descending order", "testdata/set_descending_grandchildren.json"},
369+
} {
370+
b.Run(tc.name, func(b *testing.B) {
371+
data, err := os.ReadFile(tc.fixture)
372+
if err != nil {
373+
b.Fatal(err)
374+
}
375+
376+
b.ResetTimer()
377+
for range b.N {
378+
x := NewSet()
379+
if err := x.FromJSON(bytes.NewReader(data)); err != nil {
380+
b.Fatal(err)
381+
}
382+
}
383+
})
384+
}
385+
}

fieldpath/testdata/set_ascending.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fieldpath/testdata/set_ascending_grandchildren.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fieldpath/testdata/set_descending.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fieldpath/testdata/set_descending_grandchildren.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)