blob: 3feebf328ae61948f154f45f690ae56aa431a960 [file] [log] [blame]
khenaidoo59ce9dd2019-11-11 13:05:32 -05001// Copyright 2013 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package model
15
16import (
Abhay Kumar40252eb2025-10-13 13:25:53 +000017 "encoding/json"
18 "errors"
khenaidoo59ce9dd2019-11-11 13:05:32 -050019 "fmt"
20 "regexp"
21 "sort"
Abhay Kumar40252eb2025-10-13 13:25:53 +000022 "strconv"
khenaidoo59ce9dd2019-11-11 13:05:32 -050023 "strings"
Abhay Kumar40252eb2025-10-13 13:25:53 +000024 "unicode/utf8"
25
26 dto "github.com/prometheus/client_model/go"
27 "go.yaml.in/yaml/v2"
28 "google.golang.org/protobuf/proto"
khenaidoo59ce9dd2019-11-11 13:05:32 -050029)
30
31var (
Abhay Kumar40252eb2025-10-13 13:25:53 +000032 // NameValidationScheme determines the global default method of the name
33 // validation to be used by all calls to IsValidMetricName() and LabelName
34 // IsValid().
35 //
36 // Deprecated: This variable should not be used and might be removed in the
37 // far future. If you wish to stick to the legacy name validation use
38 // `IsValidLegacyMetricName()` and `LabelName.IsValidLegacy()` methods
39 // instead. This variable is here as an escape hatch for emergency cases,
40 // given the recent change from `LegacyValidation` to `UTF8Validation`, e.g.,
41 // to delay UTF-8 migrations in time or aid in debugging unforeseen results of
42 // the change. In such a case, a temporary assignment to `LegacyValidation`
43 // value in the `init()` function in your main.go or so, could be considered.
44 //
45 // Historically we opted for a global variable for feature gating different
46 // validation schemes in operations that were not otherwise easily adjustable
47 // (e.g. Labels yaml unmarshaling). That could have been a mistake, a separate
48 // Labels structure or package might have been a better choice. Given the
49 // change was made and many upgraded the common already, we live this as-is
50 // with this warning and learning for the future.
51 NameValidationScheme = UTF8Validation
52
53 // NameEscapingScheme defines the default way that names will be escaped when
54 // presented to systems that do not support UTF-8 names. If the Content-Type
55 // "escaping" term is specified, that will override this value.
56 // NameEscapingScheme should not be set to the NoEscaping value. That string
57 // is used in content negotiation to indicate that a system supports UTF-8 and
58 // has that feature enabled.
59 NameEscapingScheme = UnderscoreEscaping
khenaidoo59ce9dd2019-11-11 13:05:32 -050060)
61
Abhay Kumar40252eb2025-10-13 13:25:53 +000062// ValidationScheme is a Go enum for determining how metric and label names will
63// be validated by this library.
64type ValidationScheme int
65
66const (
67 // UnsetValidation represents an undefined ValidationScheme.
68 // Should not be used in practice.
69 UnsetValidation ValidationScheme = iota
70
71 // LegacyValidation is a setting that requires that all metric and label names
72 // conform to the original Prometheus character requirements described by
73 // MetricNameRE and LabelNameRE.
74 LegacyValidation
75
76 // UTF8Validation only requires that metric and label names be valid UTF-8
77 // strings.
78 UTF8Validation
79)
80
81var _ interface {
82 yaml.Marshaler
83 yaml.Unmarshaler
84 json.Marshaler
85 json.Unmarshaler
86 fmt.Stringer
87} = new(ValidationScheme)
88
89// String returns the string representation of s.
90func (s ValidationScheme) String() string {
91 switch s {
92 case UnsetValidation:
93 return "unset"
94 case LegacyValidation:
95 return "legacy"
96 case UTF8Validation:
97 return "utf8"
98 default:
99 panic(fmt.Errorf("unhandled ValidationScheme: %d", s))
100 }
101}
102
103// MarshalYAML implements the yaml.Marshaler interface.
104func (s ValidationScheme) MarshalYAML() (any, error) {
105 switch s {
106 case UnsetValidation:
107 return "", nil
108 case LegacyValidation, UTF8Validation:
109 return s.String(), nil
110 default:
111 panic(fmt.Errorf("unhandled ValidationScheme: %d", s))
112 }
113}
114
115// UnmarshalYAML implements the yaml.Unmarshaler interface.
116func (s *ValidationScheme) UnmarshalYAML(unmarshal func(any) error) error {
117 var scheme string
118 if err := unmarshal(&scheme); err != nil {
119 return err
120 }
121 return s.Set(scheme)
122}
123
124// MarshalJSON implements the json.Marshaler interface.
125func (s ValidationScheme) MarshalJSON() ([]byte, error) {
126 switch s {
127 case UnsetValidation:
128 return json.Marshal("")
129 case UTF8Validation, LegacyValidation:
130 return json.Marshal(s.String())
131 default:
132 return nil, fmt.Errorf("unhandled ValidationScheme: %d", s)
133 }
134}
135
136// UnmarshalJSON implements the json.Unmarshaler interface.
137func (s *ValidationScheme) UnmarshalJSON(bytes []byte) error {
138 var repr string
139 if err := json.Unmarshal(bytes, &repr); err != nil {
140 return err
141 }
142 return s.Set(repr)
143}
144
145// Set implements the pflag.Value interface.
146func (s *ValidationScheme) Set(text string) error {
147 switch text {
148 case "":
149 // Don't change the value.
150 case LegacyValidation.String():
151 *s = LegacyValidation
152 case UTF8Validation.String():
153 *s = UTF8Validation
154 default:
155 return fmt.Errorf("unrecognized ValidationScheme: %q", text)
156 }
157 return nil
158}
159
160// IsValidMetricName returns whether metricName is valid according to s.
161func (s ValidationScheme) IsValidMetricName(metricName string) bool {
162 switch s {
163 case LegacyValidation:
164 if len(metricName) == 0 {
165 return false
166 }
167 for i, b := range metricName {
168 if !isValidLegacyRune(b, i) {
169 return false
170 }
171 }
172 return true
173 case UTF8Validation:
174 if len(metricName) == 0 {
175 return false
176 }
177 return utf8.ValidString(metricName)
178 default:
179 panic(fmt.Sprintf("Invalid name validation scheme requested: %s", s.String()))
180 }
181}
182
183// IsValidLabelName returns whether labelName is valid according to s.
184func (s ValidationScheme) IsValidLabelName(labelName string) bool {
185 switch s {
186 case LegacyValidation:
187 if len(labelName) == 0 {
188 return false
189 }
190 for i, b := range labelName {
191 // TODO: Apply De Morgan's law. Make sure there are tests for this.
192 if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { //nolint:staticcheck
193 return false
194 }
195 }
196 return true
197 case UTF8Validation:
198 if len(labelName) == 0 {
199 return false
200 }
201 return utf8.ValidString(labelName)
202 default:
203 panic(fmt.Sprintf("Invalid name validation scheme requested: %s", s))
204 }
205}
206
207// Type implements the pflag.Value interface.
208func (ValidationScheme) Type() string {
209 return "validationScheme"
210}
211
212type EscapingScheme int
213
214const (
215 // NoEscaping indicates that a name will not be escaped. Unescaped names that
216 // do not conform to the legacy validity check will use a new exposition
217 // format syntax that will be officially standardized in future versions.
218 NoEscaping EscapingScheme = iota
219
220 // UnderscoreEscaping replaces all legacy-invalid characters with underscores.
221 UnderscoreEscaping
222
223 // DotsEscaping is similar to UnderscoreEscaping, except that dots are
224 // converted to `_dot_` and pre-existing underscores are converted to `__`.
225 DotsEscaping
226
227 // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid
228 // characters with the unicode value, surrounded by underscores. Single
229 // underscores are replaced with double underscores.
230 ValueEncodingEscaping
231)
232
233const (
234 // EscapingKey is the key in an Accept or Content-Type header that defines how
235 // metric and label names that do not conform to the legacy character
236 // requirements should be escaped when being scraped by a legacy prometheus
237 // system. If a system does not explicitly pass an escaping parameter in the
238 // Accept header, the default NameEscapingScheme will be used.
239 EscapingKey = "escaping"
240
241 // Possible values for Escaping Key.
242 AllowUTF8 = "allow-utf-8" // No escaping required.
243 EscapeUnderscores = "underscores"
244 EscapeDots = "dots"
245 EscapeValues = "values"
246)
247
248// MetricNameRE is a regular expression matching valid metric
249// names. Note that the IsValidMetricName function performs the same
250// check but faster than a match with this regular expression.
251var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
252
khenaidoo59ce9dd2019-11-11 13:05:32 -0500253// A Metric is similar to a LabelSet, but the key difference is that a Metric is
254// a singleton and refers to one and only one stream of samples.
255type Metric LabelSet
256
257// Equal compares the metrics.
258func (m Metric) Equal(o Metric) bool {
259 return LabelSet(m).Equal(LabelSet(o))
260}
261
262// Before compares the metrics' underlying label sets.
263func (m Metric) Before(o Metric) bool {
264 return LabelSet(m).Before(LabelSet(o))
265}
266
267// Clone returns a copy of the Metric.
268func (m Metric) Clone() Metric {
269 clone := make(Metric, len(m))
270 for k, v := range m {
271 clone[k] = v
272 }
273 return clone
274}
275
276func (m Metric) String() string {
277 metricName, hasName := m[MetricNameLabel]
278 numLabels := len(m) - 1
279 if !hasName {
280 numLabels = len(m)
281 }
282 labelStrings := make([]string, 0, numLabels)
283 for label, value := range m {
284 if label != MetricNameLabel {
285 labelStrings = append(labelStrings, fmt.Sprintf("%s=%q", label, value))
286 }
287 }
288
289 switch numLabels {
290 case 0:
291 if hasName {
292 return string(metricName)
293 }
294 return "{}"
295 default:
296 sort.Strings(labelStrings)
297 return fmt.Sprintf("%s{%s}", metricName, strings.Join(labelStrings, ", "))
298 }
299}
300
301// Fingerprint returns a Metric's Fingerprint.
302func (m Metric) Fingerprint() Fingerprint {
303 return LabelSet(m).Fingerprint()
304}
305
306// FastFingerprint returns a Metric's Fingerprint calculated by a faster hashing
307// algorithm, which is, however, more susceptible to hash collisions.
308func (m Metric) FastFingerprint() Fingerprint {
309 return LabelSet(m).FastFingerprint()
310}
311
Abhay Kumar40252eb2025-10-13 13:25:53 +0000312// IsValidMetricName returns true iff name matches the pattern of MetricNameRE
313// for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is
314// selected.
315//
316// Deprecated: This function should not be used and might be removed in the future.
317// Use [ValidationScheme.IsValidMetricName] instead.
318func IsValidMetricName(n LabelValue) bool {
319 return NameValidationScheme.IsValidMetricName(string(n))
320}
321
322// IsValidLegacyMetricName is similar to IsValidMetricName but always uses the
323// legacy validation scheme regardless of the value of NameValidationScheme.
khenaidoo59ce9dd2019-11-11 13:05:32 -0500324// This function, however, does not use MetricNameRE for the check but a much
325// faster hardcoded implementation.
Abhay Kumar40252eb2025-10-13 13:25:53 +0000326//
327// Deprecated: This function should not be used and might be removed in the future.
328// Use [LegacyValidation.IsValidMetricName] instead.
329func IsValidLegacyMetricName(n string) bool {
330 return LegacyValidation.IsValidMetricName(n)
331}
332
333// EscapeMetricFamily escapes the given metric names and labels with the given
334// escaping scheme. Returns a new object that uses the same pointers to fields
335// when possible and creates new escaped versions so as not to mutate the
336// input.
337func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily {
338 if v == nil {
339 return nil
khenaidoo59ce9dd2019-11-11 13:05:32 -0500340 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000341
342 if scheme == NoEscaping {
343 return v
344 }
345
346 out := &dto.MetricFamily{
347 Help: v.Help,
348 Type: v.Type,
349 Unit: v.Unit,
350 }
351
352 // If the name is nil, copy as-is, don't try to escape.
353 if v.Name == nil || IsValidLegacyMetricName(v.GetName()) {
354 out.Name = v.Name
355 } else {
356 out.Name = proto.String(EscapeName(v.GetName(), scheme))
357 }
358 for _, m := range v.Metric {
359 if !metricNeedsEscaping(m) {
360 out.Metric = append(out.Metric, m)
361 continue
362 }
363
364 escaped := &dto.Metric{
365 Gauge: m.Gauge,
366 Counter: m.Counter,
367 Summary: m.Summary,
368 Untyped: m.Untyped,
369 Histogram: m.Histogram,
370 TimestampMs: m.TimestampMs,
371 }
372
373 for _, l := range m.Label {
374 if l.GetName() == MetricNameLabel {
375 if l.Value == nil || IsValidLegacyMetricName(l.GetValue()) {
376 escaped.Label = append(escaped.Label, l)
377 continue
378 }
379 escaped.Label = append(escaped.Label, &dto.LabelPair{
380 Name: proto.String(MetricNameLabel),
381 Value: proto.String(EscapeName(l.GetValue(), scheme)),
382 })
383 continue
384 }
385 if l.Name == nil || IsValidLegacyMetricName(l.GetName()) {
386 escaped.Label = append(escaped.Label, l)
387 continue
388 }
389 escaped.Label = append(escaped.Label, &dto.LabelPair{
390 Name: proto.String(EscapeName(l.GetName(), scheme)),
391 Value: l.Value,
392 })
393 }
394 out.Metric = append(out.Metric, escaped)
395 }
396 return out
397}
398
399func metricNeedsEscaping(m *dto.Metric) bool {
400 for _, l := range m.Label {
401 if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(l.GetValue()) {
402 return true
403 }
404 if !IsValidLegacyMetricName(l.GetName()) {
405 return true
khenaidoo59ce9dd2019-11-11 13:05:32 -0500406 }
407 }
Abhay Kumar40252eb2025-10-13 13:25:53 +0000408 return false
409}
410
411// EscapeName escapes the incoming name according to the provided escaping
412// scheme. Depending on the rules of escaping, this may cause no change in the
413// string that is returned. (Especially NoEscaping, which by definition is a
414// noop). This function does not do any validation of the name.
415func EscapeName(name string, scheme EscapingScheme) string {
416 if len(name) == 0 {
417 return name
418 }
419 var escaped strings.Builder
420 switch scheme {
421 case NoEscaping:
422 return name
423 case UnderscoreEscaping:
424 if IsValidLegacyMetricName(name) {
425 return name
426 }
427 for i, b := range name {
428 if isValidLegacyRune(b, i) {
429 escaped.WriteRune(b)
430 } else {
431 escaped.WriteRune('_')
432 }
433 }
434 return escaped.String()
435 case DotsEscaping:
436 // Do not early return for legacy valid names, we still escape underscores.
437 for i, b := range name {
438 switch {
439 case b == '_':
440 escaped.WriteString("__")
441 case b == '.':
442 escaped.WriteString("_dot_")
443 case isValidLegacyRune(b, i):
444 escaped.WriteRune(b)
445 default:
446 escaped.WriteString("__")
447 }
448 }
449 return escaped.String()
450 case ValueEncodingEscaping:
451 if IsValidLegacyMetricName(name) {
452 return name
453 }
454 escaped.WriteString("U__")
455 for i, b := range name {
456 switch {
457 case b == '_':
458 escaped.WriteString("__")
459 case isValidLegacyRune(b, i):
460 escaped.WriteRune(b)
461 case !utf8.ValidRune(b):
462 escaped.WriteString("_FFFD_")
463 default:
464 escaped.WriteRune('_')
465 escaped.WriteString(strconv.FormatInt(int64(b), 16))
466 escaped.WriteRune('_')
467 }
468 }
469 return escaped.String()
470 default:
471 panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
472 }
473}
474
475// lower function taken from strconv.atoi.
476func lower(c byte) byte {
477 return c | ('x' - 'X')
478}
479
480// UnescapeName unescapes the incoming name according to the provided escaping
481// scheme if possible. Some schemes are partially or totally non-roundtripable.
482// If any error is enountered, returns the original input.
483func UnescapeName(name string, scheme EscapingScheme) string {
484 if len(name) == 0 {
485 return name
486 }
487 switch scheme {
488 case NoEscaping:
489 return name
490 case UnderscoreEscaping:
491 // It is not possible to unescape from underscore replacement.
492 return name
493 case DotsEscaping:
494 name = strings.ReplaceAll(name, "_dot_", ".")
495 name = strings.ReplaceAll(name, "__", "_")
496 return name
497 case ValueEncodingEscaping:
498 escapedName, found := strings.CutPrefix(name, "U__")
499 if !found {
500 return name
501 }
502
503 var unescaped strings.Builder
504 TOP:
505 for i := 0; i < len(escapedName); i++ {
506 // All non-underscores are treated normally.
507 if escapedName[i] != '_' {
508 unescaped.WriteByte(escapedName[i])
509 continue
510 }
511 i++
512 if i >= len(escapedName) {
513 return name
514 }
515 // A double underscore is a single underscore.
516 if escapedName[i] == '_' {
517 unescaped.WriteByte('_')
518 continue
519 }
520 // We think we are in a UTF-8 code, process it.
521 var utf8Val uint
522 for j := 0; i < len(escapedName); j++ {
523 // This is too many characters for a utf8 value based on the MaxRune
524 // value of '\U0010FFFF'.
525 if j >= 6 {
526 return name
527 }
528 // Found a closing underscore, convert to a rune, check validity, and append.
529 if escapedName[i] == '_' {
530 utf8Rune := rune(utf8Val)
531 if !utf8.ValidRune(utf8Rune) {
532 return name
533 }
534 unescaped.WriteRune(utf8Rune)
535 continue TOP
536 }
537 r := lower(escapedName[i])
538 utf8Val *= 16
539 switch {
540 case r >= '0' && r <= '9':
541 utf8Val += uint(r) - '0'
542 case r >= 'a' && r <= 'f':
543 utf8Val += uint(r) - 'a' + 10
544 default:
545 return name
546 }
547 i++
548 }
549 // Didn't find closing underscore, invalid.
550 return name
551 }
552 return unescaped.String()
553 default:
554 panic(fmt.Sprintf("invalid escaping scheme %d", scheme))
555 }
556}
557
558func isValidLegacyRune(b rune, i int) bool {
559 return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0)
560}
561
562func (e EscapingScheme) String() string {
563 switch e {
564 case NoEscaping:
565 return AllowUTF8
566 case UnderscoreEscaping:
567 return EscapeUnderscores
568 case DotsEscaping:
569 return EscapeDots
570 case ValueEncodingEscaping:
571 return EscapeValues
572 default:
573 panic(fmt.Sprintf("unknown format scheme %d", e))
574 }
575}
576
577func ToEscapingScheme(s string) (EscapingScheme, error) {
578 if s == "" {
579 return NoEscaping, errors.New("got empty string instead of escaping scheme")
580 }
581 switch s {
582 case AllowUTF8:
583 return NoEscaping, nil
584 case EscapeUnderscores:
585 return UnderscoreEscaping, nil
586 case EscapeDots:
587 return DotsEscaping, nil
588 case EscapeValues:
589 return ValueEncodingEscaping, nil
590 default:
591 return NoEscaping, fmt.Errorf("unknown format scheme %s", s)
592 }
khenaidoo59ce9dd2019-11-11 13:05:32 -0500593}