blob: 0ea28bd03079ad369fd9634c3ebca8c32d935bc4 [file] [log] [blame]
khenaidooab1f7bd2019-11-14 14:00:27 -05001// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package yaml
6
7import (
8 "bytes"
9 "encoding"
10 "encoding/json"
11 "reflect"
12 "sort"
13 "strings"
14 "sync"
15 "unicode"
16 "unicode/utf8"
17)
18
Abhay Kumara2ae5992025-11-10 14:02:24 +000019// indirect walks down 'value' allocating pointers as needed,
khenaidooab1f7bd2019-11-14 14:00:27 -050020// until it gets to a non-pointer.
21// if it encounters an Unmarshaler, indirect stops and returns that.
22// if decodingNull is true, indirect stops at the last pointer so it can be set to nil.
Abhay Kumara2ae5992025-11-10 14:02:24 +000023func indirect(value reflect.Value, decodingNull bool) (json.Unmarshaler, encoding.TextUnmarshaler, reflect.Value) {
24 // If 'value' is a named type and is addressable,
khenaidooab1f7bd2019-11-14 14:00:27 -050025 // start with its address, so that if the type has pointer methods,
26 // we find them.
Abhay Kumara2ae5992025-11-10 14:02:24 +000027 if value.Kind() != reflect.Ptr && value.Type().Name() != "" && value.CanAddr() {
28 value = value.Addr()
khenaidooab1f7bd2019-11-14 14:00:27 -050029 }
30 for {
31 // Load value from interface, but only if the result will be
32 // usefully addressable.
Abhay Kumara2ae5992025-11-10 14:02:24 +000033 if value.Kind() == reflect.Interface && !value.IsNil() {
34 element := value.Elem()
35 if element.Kind() == reflect.Ptr && !element.IsNil() && (!decodingNull || element.Elem().Kind() == reflect.Ptr) {
36 value = element
khenaidooab1f7bd2019-11-14 14:00:27 -050037 continue
38 }
39 }
40
Abhay Kumara2ae5992025-11-10 14:02:24 +000041 if value.Kind() != reflect.Ptr {
khenaidooab1f7bd2019-11-14 14:00:27 -050042 break
43 }
44
Abhay Kumara2ae5992025-11-10 14:02:24 +000045 if value.Elem().Kind() != reflect.Ptr && decodingNull && value.CanSet() {
khenaidooab1f7bd2019-11-14 14:00:27 -050046 break
47 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000048 if value.IsNil() {
49 if value.CanSet() {
50 value.Set(reflect.New(value.Type().Elem()))
khenaidooab1f7bd2019-11-14 14:00:27 -050051 } else {
Abhay Kumara2ae5992025-11-10 14:02:24 +000052 value = reflect.New(value.Type().Elem())
khenaidooab1f7bd2019-11-14 14:00:27 -050053 }
54 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000055 if value.Type().NumMethod() > 0 {
56 if u, ok := value.Interface().(json.Unmarshaler); ok {
khenaidooab1f7bd2019-11-14 14:00:27 -050057 return u, nil, reflect.Value{}
58 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000059 if u, ok := value.Interface().(encoding.TextUnmarshaler); ok {
khenaidooab1f7bd2019-11-14 14:00:27 -050060 return nil, u, reflect.Value{}
61 }
62 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000063 value = value.Elem()
khenaidooab1f7bd2019-11-14 14:00:27 -050064 }
Abhay Kumara2ae5992025-11-10 14:02:24 +000065 return nil, nil, value
khenaidooab1f7bd2019-11-14 14:00:27 -050066}
67
68// A field represents a single field found in a struct.
69type field struct {
70 name string
71 nameBytes []byte // []byte(name)
72 equalFold func(s, t []byte) bool // bytes.EqualFold or equivalent
73
74 tag bool
75 index []int
76 typ reflect.Type
77 omitEmpty bool
78 quoted bool
79}
80
81func fillField(f field) field {
82 f.nameBytes = []byte(f.name)
83 f.equalFold = foldFunc(f.nameBytes)
84 return f
85}
86
87// byName sorts field by name, breaking ties with depth,
88// then breaking ties with "name came from json tag", then
89// breaking ties with index sequence.
90type byName []field
91
92func (x byName) Len() int { return len(x) }
93
94func (x byName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
95
96func (x byName) Less(i, j int) bool {
97 if x[i].name != x[j].name {
98 return x[i].name < x[j].name
99 }
100 if len(x[i].index) != len(x[j].index) {
101 return len(x[i].index) < len(x[j].index)
102 }
103 if x[i].tag != x[j].tag {
104 return x[i].tag
105 }
106 return byIndex(x).Less(i, j)
107}
108
109// byIndex sorts field by index sequence.
110type byIndex []field
111
112func (x byIndex) Len() int { return len(x) }
113
114func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
115
116func (x byIndex) Less(i, j int) bool {
117 for k, xik := range x[i].index {
118 if k >= len(x[j].index) {
119 return false
120 }
121 if xik != x[j].index[k] {
122 return xik < x[j].index[k]
123 }
124 }
125 return len(x[i].index) < len(x[j].index)
126}
127
128// typeFields returns a list of fields that JSON should recognize for the given type.
129// The algorithm is breadth-first search over the set of structs to include - the top struct
130// and then any reachable anonymous structs.
131func typeFields(t reflect.Type) []field {
132 // Anonymous fields to explore at the current level and the next.
133 current := []field{}
134 next := []field{{typ: t}}
135
136 // Count of queued names for current level and the next.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000137 var count map[reflect.Type]int
138 var nextCount map[reflect.Type]int
khenaidooab1f7bd2019-11-14 14:00:27 -0500139
140 // Types already visited at an earlier level.
141 visited := map[reflect.Type]bool{}
142
143 // Fields found.
144 var fields []field
145
146 for len(next) > 0 {
147 current, next = next, current[:0]
148 count, nextCount = nextCount, map[reflect.Type]int{}
149
150 for _, f := range current {
151 if visited[f.typ] {
152 continue
153 }
154 visited[f.typ] = true
155
156 // Scan f.typ for fields to include.
157 for i := 0; i < f.typ.NumField(); i++ {
158 sf := f.typ.Field(i)
159 if sf.PkgPath != "" { // unexported
160 continue
161 }
162 tag := sf.Tag.Get("json")
163 if tag == "-" {
164 continue
165 }
166 name, opts := parseTag(tag)
167 if !isValidTag(name) {
168 name = ""
169 }
170 index := make([]int, len(f.index)+1)
171 copy(index, f.index)
172 index[len(f.index)] = i
173
174 ft := sf.Type
175 if ft.Name() == "" && ft.Kind() == reflect.Ptr {
176 // Follow pointer.
177 ft = ft.Elem()
178 }
179
180 // Record found field and index sequence.
181 if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
182 tagged := name != ""
183 if name == "" {
184 name = sf.Name
185 }
186 fields = append(fields, fillField(field{
187 name: name,
188 tag: tagged,
189 index: index,
190 typ: ft,
191 omitEmpty: opts.Contains("omitempty"),
192 quoted: opts.Contains("string"),
193 }))
194 if count[f.typ] > 1 {
195 // If there were multiple instances, add a second,
196 // so that the annihilation code will see a duplicate.
197 // It only cares about the distinction between 1 or 2,
198 // so don't bother generating any more copies.
199 fields = append(fields, fields[len(fields)-1])
200 }
201 continue
202 }
203
204 // Record new anonymous struct to explore in next round.
205 nextCount[ft]++
206 if nextCount[ft] == 1 {
207 next = append(next, fillField(field{name: ft.Name(), index: index, typ: ft}))
208 }
209 }
210 }
211 }
212
213 sort.Sort(byName(fields))
214
215 // Delete all fields that are hidden by the Go rules for embedded fields,
216 // except that fields with JSON tags are promoted.
217
218 // The fields are sorted in primary order of name, secondary order
219 // of field index length. Loop over names; for each name, delete
220 // hidden fields by choosing the one dominant field that survives.
221 out := fields[:0]
222 for advance, i := 0, 0; i < len(fields); i += advance {
223 // One iteration per name.
224 // Find the sequence of fields with the name of this first field.
225 fi := fields[i]
226 name := fi.name
227 for advance = 1; i+advance < len(fields); advance++ {
228 fj := fields[i+advance]
229 if fj.name != name {
230 break
231 }
232 }
233 if advance == 1 { // Only one field with this name
234 out = append(out, fi)
235 continue
236 }
237 dominant, ok := dominantField(fields[i : i+advance])
238 if ok {
239 out = append(out, dominant)
240 }
241 }
242
243 fields = out
244 sort.Sort(byIndex(fields))
245
246 return fields
247}
248
249// dominantField looks through the fields, all of which are known to
250// have the same name, to find the single field that dominates the
251// others using Go's embedding rules, modified by the presence of
252// JSON tags. If there are multiple top-level fields, the boolean
253// will be false: This condition is an error in Go and we skip all
254// the fields.
255func dominantField(fields []field) (field, bool) {
256 // The fields are sorted in increasing index-length order. The winner
257 // must therefore be one with the shortest index length. Drop all
258 // longer entries, which is easy: just truncate the slice.
259 length := len(fields[0].index)
260 tagged := -1 // Index of first tagged field.
261 for i, f := range fields {
262 if len(f.index) > length {
263 fields = fields[:i]
264 break
265 }
266 if f.tag {
267 if tagged >= 0 {
268 // Multiple tagged fields at the same level: conflict.
269 // Return no field.
270 return field{}, false
271 }
272 tagged = i
273 }
274 }
275 if tagged >= 0 {
276 return fields[tagged], true
277 }
278 // All remaining fields have the same length. If there's more than one,
279 // we have a conflict (two fields named "X" at the same level) and we
280 // return no field.
281 if len(fields) > 1 {
282 return field{}, false
283 }
284 return fields[0], true
285}
286
287var fieldCache struct {
288 sync.RWMutex
289 m map[reflect.Type][]field
290}
291
292// cachedTypeFields is like typeFields but uses a cache to avoid repeated work.
293func cachedTypeFields(t reflect.Type) []field {
294 fieldCache.RLock()
295 f := fieldCache.m[t]
296 fieldCache.RUnlock()
297 if f != nil {
298 return f
299 }
300
301 // Compute fields without lock.
302 // Might duplicate effort but won't hold other computations back.
303 f = typeFields(t)
304 if f == nil {
305 f = []field{}
306 }
307
308 fieldCache.Lock()
309 if fieldCache.m == nil {
310 fieldCache.m = map[reflect.Type][]field{}
311 }
312 fieldCache.m[t] = f
313 fieldCache.Unlock()
314 return f
315}
316
317func isValidTag(s string) bool {
318 if s == "" {
319 return false
320 }
321 for _, c := range s {
322 switch {
323 case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
324 // Backslash and quote chars are reserved, but
325 // otherwise any punctuation chars are allowed
326 // in a tag name.
327 default:
328 if !unicode.IsLetter(c) && !unicode.IsDigit(c) {
329 return false
330 }
331 }
332 }
333 return true
334}
335
336const (
337 caseMask = ^byte(0x20) // Mask to ignore case in ASCII.
338 kelvin = '\u212a'
339 smallLongEss = '\u017f'
340)
341
342// foldFunc returns one of four different case folding equivalence
343// functions, from most general (and slow) to fastest:
344//
345// 1) bytes.EqualFold, if the key s contains any non-ASCII UTF-8
346// 2) equalFoldRight, if s contains special folding ASCII ('k', 'K', 's', 'S')
347// 3) asciiEqualFold, no special, but includes non-letters (including _)
348// 4) simpleLetterEqualFold, no specials, no non-letters.
349//
350// The letters S and K are special because they map to 3 runes, not just 2:
Abhay Kumara2ae5992025-11-10 14:02:24 +0000351// - S maps to s and to U+017F 'ſ' Latin small letter long s
352// - k maps to K and to U+212A 'K' Kelvin sign
353//
khenaidooab1f7bd2019-11-14 14:00:27 -0500354// See http://play.golang.org/p/tTxjOc0OGo
355//
356// The returned function is specialized for matching against s and
357// should only be given s. It's not curried for performance reasons.
358func foldFunc(s []byte) func(s, t []byte) bool {
359 nonLetter := false
360 special := false // special letter
361 for _, b := range s {
362 if b >= utf8.RuneSelf {
363 return bytes.EqualFold
364 }
365 upper := b & caseMask
366 if upper < 'A' || upper > 'Z' {
367 nonLetter = true
368 } else if upper == 'K' || upper == 'S' {
369 // See above for why these letters are special.
370 special = true
371 }
372 }
373 if special {
374 return equalFoldRight
375 }
376 if nonLetter {
377 return asciiEqualFold
378 }
379 return simpleLetterEqualFold
380}
381
382// equalFoldRight is a specialization of bytes.EqualFold when s is
383// known to be all ASCII (including punctuation), but contains an 's',
384// 'S', 'k', or 'K', requiring a Unicode fold on the bytes in t.
385// See comments on foldFunc.
386func equalFoldRight(s, t []byte) bool {
387 for _, sb := range s {
388 if len(t) == 0 {
389 return false
390 }
391 tb := t[0]
392 if tb < utf8.RuneSelf {
393 if sb != tb {
394 sbUpper := sb & caseMask
395 if 'A' <= sbUpper && sbUpper <= 'Z' {
396 if sbUpper != tb&caseMask {
397 return false
398 }
399 } else {
400 return false
401 }
402 }
403 t = t[1:]
404 continue
405 }
406 // sb is ASCII and t is not. t must be either kelvin
407 // sign or long s; sb must be s, S, k, or K.
408 tr, size := utf8.DecodeRune(t)
409 switch sb {
410 case 's', 'S':
411 if tr != smallLongEss {
412 return false
413 }
414 case 'k', 'K':
415 if tr != kelvin {
416 return false
417 }
418 default:
419 return false
420 }
421 t = t[size:]
422
423 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000424
425 return len(t) <= 0
khenaidooab1f7bd2019-11-14 14:00:27 -0500426}
427
428// asciiEqualFold is a specialization of bytes.EqualFold for use when
429// s is all ASCII (but may contain non-letters) and contains no
430// special-folding letters.
431// See comments on foldFunc.
432func asciiEqualFold(s, t []byte) bool {
433 if len(s) != len(t) {
434 return false
435 }
436 for i, sb := range s {
437 tb := t[i]
438 if sb == tb {
439 continue
440 }
441 if ('a' <= sb && sb <= 'z') || ('A' <= sb && sb <= 'Z') {
442 if sb&caseMask != tb&caseMask {
443 return false
444 }
445 } else {
446 return false
447 }
448 }
449 return true
450}
451
452// simpleLetterEqualFold is a specialization of bytes.EqualFold for
453// use when s is all ASCII letters (no underscores, etc) and also
454// doesn't contain 'k', 'K', 's', or 'S'.
455// See comments on foldFunc.
456func simpleLetterEqualFold(s, t []byte) bool {
457 if len(s) != len(t) {
458 return false
459 }
460 for i, b := range s {
461 if b&caseMask != t[i]&caseMask {
462 return false
463 }
464 }
465 return true
466}
467
468// tagOptions is the string following a comma in a struct field's "json"
469// tag, or the empty string. It does not include the leading comma.
470type tagOptions string
471
472// parseTag splits a struct field's json tag into its name and
473// comma-separated options.
474func parseTag(tag string) (string, tagOptions) {
475 if idx := strings.Index(tag, ","); idx != -1 {
476 return tag[:idx], tagOptions(tag[idx+1:])
477 }
478 return tag, tagOptions("")
479}
480
481// Contains reports whether a comma-separated list of options
482// contains a particular substr flag. substr must be surrounded by a
483// string boundary or commas.
484func (o tagOptions) Contains(optionName string) bool {
485 if len(o) == 0 {
486 return false
487 }
488 s := string(o)
489 for s != "" {
490 var next string
491 i := strings.Index(s, ",")
492 if i >= 0 {
493 s, next = s[:i], s[i+1:]
494 }
495 if s == optionName {
496 return true
497 }
498 s = next
499 }
500 return false
501}