blob: 8f2edde32447f0e43b056290d6a9e43dcdbab34c [file] [log] [blame]
khenaidooab1f7bd2019-11-14 14:00:27 -05001// Copyright 2014 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package expfmt
15
16import (
17 "bufio"
18 "bytes"
Abhay Kumara2ae5992025-11-10 14:02:24 +000019 "errors"
khenaidooab1f7bd2019-11-14 14:00:27 -050020 "fmt"
21 "io"
22 "math"
23 "strconv"
24 "strings"
Abhay Kumara2ae5992025-11-10 14:02:24 +000025 "unicode/utf8"
khenaidooab1f7bd2019-11-14 14:00:27 -050026
27 dto "github.com/prometheus/client_model/go"
Abhay Kumara2ae5992025-11-10 14:02:24 +000028 "google.golang.org/protobuf/proto"
khenaidooab1f7bd2019-11-14 14:00:27 -050029
khenaidooab1f7bd2019-11-14 14:00:27 -050030 "github.com/prometheus/common/model"
31)
32
33// A stateFn is a function that represents a state in a state machine. By
34// executing it, the state is progressed to the next state. The stateFn returns
35// another stateFn, which represents the new state. The end state is represented
36// by nil.
37type stateFn func() stateFn
38
39// ParseError signals errors while parsing the simple and flat text-based
40// exchange format.
41type ParseError struct {
42 Line int
43 Msg string
44}
45
46// Error implements the error interface.
47func (e ParseError) Error() string {
48 return fmt.Sprintf("text format parsing error in line %d: %s", e.Line, e.Msg)
49}
50
51// TextParser is used to parse the simple and flat text-based exchange format. Its
52// zero value is ready to use.
53type TextParser struct {
54 metricFamiliesByName map[string]*dto.MetricFamily
55 buf *bufio.Reader // Where the parsed input is read through.
56 err error // Most recent error.
57 lineCount int // Tracks the line count for error messages.
58 currentByte byte // The most recent byte read.
59 currentToken bytes.Buffer // Re-used each time a token has to be gathered from multiple bytes.
60 currentMF *dto.MetricFamily
61 currentMetric *dto.Metric
62 currentLabelPair *dto.LabelPair
Abhay Kumara2ae5992025-11-10 14:02:24 +000063 currentLabelPairs []*dto.LabelPair // Temporarily stores label pairs while parsing a metric line.
khenaidooab1f7bd2019-11-14 14:00:27 -050064
65 // The remaining member variables are only used for summaries/histograms.
66 currentLabels map[string]string // All labels including '__name__' but excluding 'quantile'/'le'
67 // Summary specific.
68 summaries map[uint64]*dto.Metric // Key is created with LabelsToSignature.
69 currentQuantile float64
70 // Histogram specific.
71 histograms map[uint64]*dto.Metric // Key is created with LabelsToSignature.
72 currentBucket float64
73 // These tell us if the currently processed line ends on '_count' or
74 // '_sum' respectively and belong to a summary/histogram, representing the sample
75 // count and sum of that summary/histogram.
76 currentIsSummaryCount, currentIsSummarySum bool
77 currentIsHistogramCount, currentIsHistogramSum bool
Abhay Kumara2ae5992025-11-10 14:02:24 +000078 // These indicate if the metric name from the current line being parsed is inside
79 // braces and if that metric name was found respectively.
80 currentMetricIsInsideBraces, currentMetricInsideBracesIsPresent bool
81 // scheme sets the desired ValidationScheme for names. Defaults to the invalid
82 // UnsetValidation.
83 scheme model.ValidationScheme
84}
85
86// NewTextParser returns a new TextParser with the provided nameValidationScheme.
87func NewTextParser(nameValidationScheme model.ValidationScheme) TextParser {
88 return TextParser{scheme: nameValidationScheme}
khenaidooab1f7bd2019-11-14 14:00:27 -050089}
90
91// TextToMetricFamilies reads 'in' as the simple and flat text-based exchange
92// format and creates MetricFamily proto messages. It returns the MetricFamily
93// proto messages in a map where the metric names are the keys, along with any
94// error encountered.
95//
96// If the input contains duplicate metrics (i.e. lines with the same metric name
97// and exactly the same label set), the resulting MetricFamily will contain
98// duplicate Metric proto messages. Similar is true for duplicate label
99// names. Checks for duplicates have to be performed separately, if required.
100// Also note that neither the metrics within each MetricFamily are sorted nor
101// the label pairs within each Metric. Sorting is not required for the most
102// frequent use of this method, which is sample ingestion in the Prometheus
103// server. However, for presentation purposes, you might want to sort the
104// metrics, and in some cases, you must sort the labels, e.g. for consumption by
105// the metric family injection hook of the Prometheus registry.
106//
107// Summaries and histograms are rather special beasts. You would probably not
108// use them in the simple text format anyway. This method can deal with
109// summaries and histograms if they are presented in exactly the way the
110// text.Create function creates them.
111//
112// This method must not be called concurrently. If you want to parse different
113// input concurrently, instantiate a separate Parser for each goroutine.
114func (p *TextParser) TextToMetricFamilies(in io.Reader) (map[string]*dto.MetricFamily, error) {
115 p.reset(in)
116 for nextState := p.startOfLine; nextState != nil; nextState = nextState() {
117 // Magic happens here...
118 }
119 // Get rid of empty metric families.
120 for k, mf := range p.metricFamiliesByName {
121 if len(mf.GetMetric()) == 0 {
122 delete(p.metricFamiliesByName, k)
123 }
124 }
125 // If p.err is io.EOF now, we have run into a premature end of the input
126 // stream. Turn this error into something nicer and more
127 // meaningful. (io.EOF is often used as a signal for the legitimate end
128 // of an input stream.)
Abhay Kumara2ae5992025-11-10 14:02:24 +0000129 if p.err != nil && errors.Is(p.err, io.EOF) {
khenaidooab1f7bd2019-11-14 14:00:27 -0500130 p.parseError("unexpected end of input stream")
131 }
132 return p.metricFamiliesByName, p.err
133}
134
135func (p *TextParser) reset(in io.Reader) {
136 p.metricFamiliesByName = map[string]*dto.MetricFamily{}
Abhay Kumara2ae5992025-11-10 14:02:24 +0000137 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500138 if p.buf == nil {
139 p.buf = bufio.NewReader(in)
140 } else {
141 p.buf.Reset(in)
142 }
143 p.err = nil
144 p.lineCount = 0
145 if p.summaries == nil || len(p.summaries) > 0 {
146 p.summaries = map[uint64]*dto.Metric{}
147 }
148 if p.histograms == nil || len(p.histograms) > 0 {
149 p.histograms = map[uint64]*dto.Metric{}
150 }
151 p.currentQuantile = math.NaN()
152 p.currentBucket = math.NaN()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000153 p.currentMF = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500154}
155
156// startOfLine represents the state where the next byte read from p.buf is the
157// start of a line (or whitespace leading up to it).
158func (p *TextParser) startOfLine() stateFn {
159 p.lineCount++
Abhay Kumara2ae5992025-11-10 14:02:24 +0000160 p.currentMetricIsInsideBraces = false
161 p.currentMetricInsideBracesIsPresent = false
khenaidooab1f7bd2019-11-14 14:00:27 -0500162 if p.skipBlankTab(); p.err != nil {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000163 // This is the only place that we expect to see io.EOF,
164 // which is not an error but the signal that we are done.
165 // Any other error that happens to align with the start of
166 // a line is still an error.
167 if errors.Is(p.err, io.EOF) {
168 p.err = nil
169 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500170 return nil
171 }
172 switch p.currentByte {
173 case '#':
174 return p.startComment
175 case '\n':
176 return p.startOfLine // Empty line, start the next one.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000177 case '{':
178 p.currentMetricIsInsideBraces = true
179 return p.readingLabels
khenaidooab1f7bd2019-11-14 14:00:27 -0500180 }
181 return p.readingMetricName
182}
183
184// startComment represents the state where the next byte read from p.buf is the
185// start of a comment (or whitespace leading up to it).
186func (p *TextParser) startComment() stateFn {
187 if p.skipBlankTab(); p.err != nil {
188 return nil // Unexpected end of input.
189 }
190 if p.currentByte == '\n' {
191 return p.startOfLine
192 }
193 if p.readTokenUntilWhitespace(); p.err != nil {
194 return nil // Unexpected end of input.
195 }
196 // If we have hit the end of line already, there is nothing left
197 // to do. This is not considered a syntax error.
198 if p.currentByte == '\n' {
199 return p.startOfLine
200 }
201 keyword := p.currentToken.String()
202 if keyword != "HELP" && keyword != "TYPE" {
203 // Generic comment, ignore by fast forwarding to end of line.
204 for p.currentByte != '\n' {
205 if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
206 return nil // Unexpected end of input.
207 }
208 }
209 return p.startOfLine
210 }
211 // There is something. Next has to be a metric name.
212 if p.skipBlankTab(); p.err != nil {
213 return nil // Unexpected end of input.
214 }
215 if p.readTokenAsMetricName(); p.err != nil {
216 return nil // Unexpected end of input.
217 }
218 if p.currentByte == '\n' {
219 // At the end of the line already.
220 // Again, this is not considered a syntax error.
221 return p.startOfLine
222 }
223 if !isBlankOrTab(p.currentByte) {
224 p.parseError("invalid metric name in comment")
225 return nil
226 }
227 p.setOrCreateCurrentMF()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000228 if p.err != nil {
229 return nil
230 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500231 if p.skipBlankTab(); p.err != nil {
232 return nil // Unexpected end of input.
233 }
234 if p.currentByte == '\n' {
235 // At the end of the line already.
236 // Again, this is not considered a syntax error.
237 return p.startOfLine
238 }
239 switch keyword {
240 case "HELP":
241 return p.readingHelp
242 case "TYPE":
243 return p.readingType
244 }
245 panic(fmt.Sprintf("code error: unexpected keyword %q", keyword))
246}
247
248// readingMetricName represents the state where the last byte read (now in
249// p.currentByte) is the first byte of a metric name.
250func (p *TextParser) readingMetricName() stateFn {
251 if p.readTokenAsMetricName(); p.err != nil {
252 return nil
253 }
254 if p.currentToken.Len() == 0 {
255 p.parseError("invalid metric name")
256 return nil
257 }
258 p.setOrCreateCurrentMF()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000259 if p.err != nil {
260 return nil
261 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500262 // Now is the time to fix the type if it hasn't happened yet.
263 if p.currentMF.Type == nil {
264 p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
265 }
266 p.currentMetric = &dto.Metric{}
267 // Do not append the newly created currentMetric to
268 // currentMF.Metric right now. First wait if this is a summary,
269 // and the metric exists already, which we can only know after
270 // having read all the labels.
271 if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
272 return nil // Unexpected end of input.
273 }
274 return p.readingLabels
275}
276
277// readingLabels represents the state where the last byte read (now in
278// p.currentByte) is either the first byte of the label set (i.e. a '{'), or the
279// first byte of the value (otherwise).
280func (p *TextParser) readingLabels() stateFn {
281 // Summaries/histograms are special. We have to reset the
282 // currentLabels map, currentQuantile and currentBucket before starting to
283 // read labels.
284 if p.currentMF.GetType() == dto.MetricType_SUMMARY || p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
285 p.currentLabels = map[string]string{}
286 p.currentLabels[string(model.MetricNameLabel)] = p.currentMF.GetName()
287 p.currentQuantile = math.NaN()
288 p.currentBucket = math.NaN()
289 }
290 if p.currentByte != '{' {
291 return p.readingValue
292 }
293 return p.startLabelName
294}
295
296// startLabelName represents the state where the next byte read from p.buf is
297// the start of a label name (or whitespace leading up to it).
298func (p *TextParser) startLabelName() stateFn {
299 if p.skipBlankTab(); p.err != nil {
300 return nil // Unexpected end of input.
301 }
302 if p.currentByte == '}' {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000303 p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
304 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500305 if p.skipBlankTab(); p.err != nil {
306 return nil // Unexpected end of input.
307 }
308 return p.readingValue
309 }
310 if p.readTokenAsLabelName(); p.err != nil {
311 return nil // Unexpected end of input.
312 }
313 if p.currentToken.Len() == 0 {
314 p.parseError(fmt.Sprintf("invalid label name for metric %q", p.currentMF.GetName()))
315 return nil
316 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500317 if p.skipBlankTabIfCurrentBlankTab(); p.err != nil {
318 return nil // Unexpected end of input.
319 }
320 if p.currentByte != '=' {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000321 if p.currentMetricIsInsideBraces {
322 if p.currentMetricInsideBracesIsPresent {
323 p.parseError(fmt.Sprintf("multiple metric names for metric %q", p.currentMF.GetName()))
324 return nil
325 }
326 switch p.currentByte {
327 case ',':
328 p.setOrCreateCurrentMF()
329 if p.err != nil {
330 return nil
331 }
332 if p.currentMF.Type == nil {
333 p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
334 }
335 p.currentMetric = &dto.Metric{}
336 p.currentMetricInsideBracesIsPresent = true
337 return p.startLabelName
338 case '}':
339 p.setOrCreateCurrentMF()
340 if p.err != nil {
341 p.currentLabelPairs = nil
342 return nil
343 }
344 if p.currentMF.Type == nil {
345 p.currentMF.Type = dto.MetricType_UNTYPED.Enum()
346 }
347 p.currentMetric = &dto.Metric{}
348 p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
349 p.currentLabelPairs = nil
350 if p.skipBlankTab(); p.err != nil {
351 return nil // Unexpected end of input.
352 }
353 return p.readingValue
354 default:
355 p.parseError(fmt.Sprintf("unexpected end of metric name %q", p.currentByte))
356 return nil
357 }
358 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500359 p.parseError(fmt.Sprintf("expected '=' after label name, found %q", p.currentByte))
Abhay Kumara2ae5992025-11-10 14:02:24 +0000360 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500361 return nil
362 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000363 p.currentLabelPair = &dto.LabelPair{Name: proto.String(p.currentToken.String())}
364 if p.currentLabelPair.GetName() == string(model.MetricNameLabel) {
365 p.parseError(fmt.Sprintf("label name %q is reserved", model.MetricNameLabel))
366 p.currentLabelPairs = nil
367 return nil
368 }
369 if !p.scheme.IsValidLabelName(p.currentLabelPair.GetName()) {
370 p.parseError(fmt.Sprintf("invalid label name %q", p.currentLabelPair.GetName()))
371 p.currentLabelPairs = nil
372 return nil
373 }
374 // Special summary/histogram treatment. Don't add 'quantile' and 'le'
375 // labels to 'real' labels.
376 if (p.currentMF.GetType() != dto.MetricType_SUMMARY || p.currentLabelPair.GetName() != model.QuantileLabel) &&
377 (p.currentMF.GetType() != dto.MetricType_HISTOGRAM || p.currentLabelPair.GetName() != model.BucketLabel) {
378 p.currentLabelPairs = append(p.currentLabelPairs, p.currentLabelPair)
379 }
khenaidood948f772021-08-11 17:49:24 -0400380 // Check for duplicate label names.
381 labels := make(map[string]struct{})
Abhay Kumara2ae5992025-11-10 14:02:24 +0000382 for _, l := range p.currentLabelPairs {
khenaidood948f772021-08-11 17:49:24 -0400383 lName := l.GetName()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000384 if _, exists := labels[lName]; exists {
khenaidood948f772021-08-11 17:49:24 -0400385 p.parseError(fmt.Sprintf("duplicate label names for metric %q", p.currentMF.GetName()))
Abhay Kumara2ae5992025-11-10 14:02:24 +0000386 p.currentLabelPairs = nil
khenaidood948f772021-08-11 17:49:24 -0400387 return nil
388 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000389 labels[lName] = struct{}{}
khenaidood948f772021-08-11 17:49:24 -0400390 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500391 return p.startLabelValue
392}
393
394// startLabelValue represents the state where the next byte read from p.buf is
395// the start of a (quoted) label value (or whitespace leading up to it).
396func (p *TextParser) startLabelValue() stateFn {
397 if p.skipBlankTab(); p.err != nil {
398 return nil // Unexpected end of input.
399 }
400 if p.currentByte != '"' {
401 p.parseError(fmt.Sprintf("expected '\"' at start of label value, found %q", p.currentByte))
402 return nil
403 }
404 if p.readTokenAsLabelValue(); p.err != nil {
405 return nil
406 }
407 if !model.LabelValue(p.currentToken.String()).IsValid() {
408 p.parseError(fmt.Sprintf("invalid label value %q", p.currentToken.String()))
409 return nil
410 }
411 p.currentLabelPair.Value = proto.String(p.currentToken.String())
412 // Special treatment of summaries:
413 // - Quantile labels are special, will result in dto.Quantile later.
414 // - Other labels have to be added to currentLabels for signature calculation.
415 if p.currentMF.GetType() == dto.MetricType_SUMMARY {
416 if p.currentLabelPair.GetName() == model.QuantileLabel {
khenaidood948f772021-08-11 17:49:24 -0400417 if p.currentQuantile, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
khenaidooab1f7bd2019-11-14 14:00:27 -0500418 // Create a more helpful error message.
419 p.parseError(fmt.Sprintf("expected float as value for 'quantile' label, got %q", p.currentLabelPair.GetValue()))
Abhay Kumara2ae5992025-11-10 14:02:24 +0000420 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500421 return nil
422 }
423 } else {
424 p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
425 }
426 }
427 // Similar special treatment of histograms.
428 if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
429 if p.currentLabelPair.GetName() == model.BucketLabel {
khenaidood948f772021-08-11 17:49:24 -0400430 if p.currentBucket, p.err = parseFloat(p.currentLabelPair.GetValue()); p.err != nil {
khenaidooab1f7bd2019-11-14 14:00:27 -0500431 // Create a more helpful error message.
432 p.parseError(fmt.Sprintf("expected float as value for 'le' label, got %q", p.currentLabelPair.GetValue()))
433 return nil
434 }
435 } else {
436 p.currentLabels[p.currentLabelPair.GetName()] = p.currentLabelPair.GetValue()
437 }
438 }
439 if p.skipBlankTab(); p.err != nil {
440 return nil // Unexpected end of input.
441 }
442 switch p.currentByte {
443 case ',':
444 return p.startLabelName
445
446 case '}':
Abhay Kumara2ae5992025-11-10 14:02:24 +0000447 if p.currentMF == nil {
448 p.parseError("invalid metric name")
449 return nil
450 }
451 p.currentMetric.Label = append(p.currentMetric.Label, p.currentLabelPairs...)
452 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500453 if p.skipBlankTab(); p.err != nil {
454 return nil // Unexpected end of input.
455 }
456 return p.readingValue
457 default:
458 p.parseError(fmt.Sprintf("unexpected end of label value %q", p.currentLabelPair.GetValue()))
Abhay Kumara2ae5992025-11-10 14:02:24 +0000459 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500460 return nil
461 }
462}
463
464// readingValue represents the state where the last byte read (now in
465// p.currentByte) is the first byte of the sample value (i.e. a float).
466func (p *TextParser) readingValue() stateFn {
467 // When we are here, we have read all the labels, so for the
468 // special case of a summary/histogram, we can finally find out
469 // if the metric already exists.
Abhay Kumara2ae5992025-11-10 14:02:24 +0000470 switch p.currentMF.GetType() {
471 case dto.MetricType_SUMMARY:
khenaidooab1f7bd2019-11-14 14:00:27 -0500472 signature := model.LabelsToSignature(p.currentLabels)
473 if summary := p.summaries[signature]; summary != nil {
474 p.currentMetric = summary
475 } else {
476 p.summaries[signature] = p.currentMetric
477 p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
478 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000479 case dto.MetricType_HISTOGRAM:
khenaidooab1f7bd2019-11-14 14:00:27 -0500480 signature := model.LabelsToSignature(p.currentLabels)
481 if histogram := p.histograms[signature]; histogram != nil {
482 p.currentMetric = histogram
483 } else {
484 p.histograms[signature] = p.currentMetric
485 p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
486 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000487 default:
khenaidooab1f7bd2019-11-14 14:00:27 -0500488 p.currentMF.Metric = append(p.currentMF.Metric, p.currentMetric)
489 }
490 if p.readTokenUntilWhitespace(); p.err != nil {
491 return nil // Unexpected end of input.
492 }
khenaidood948f772021-08-11 17:49:24 -0400493 value, err := parseFloat(p.currentToken.String())
khenaidooab1f7bd2019-11-14 14:00:27 -0500494 if err != nil {
495 // Create a more helpful error message.
496 p.parseError(fmt.Sprintf("expected float as value, got %q", p.currentToken.String()))
497 return nil
498 }
499 switch p.currentMF.GetType() {
500 case dto.MetricType_COUNTER:
501 p.currentMetric.Counter = &dto.Counter{Value: proto.Float64(value)}
502 case dto.MetricType_GAUGE:
503 p.currentMetric.Gauge = &dto.Gauge{Value: proto.Float64(value)}
504 case dto.MetricType_UNTYPED:
505 p.currentMetric.Untyped = &dto.Untyped{Value: proto.Float64(value)}
506 case dto.MetricType_SUMMARY:
507 // *sigh*
508 if p.currentMetric.Summary == nil {
509 p.currentMetric.Summary = &dto.Summary{}
510 }
511 switch {
512 case p.currentIsSummaryCount:
513 p.currentMetric.Summary.SampleCount = proto.Uint64(uint64(value))
514 case p.currentIsSummarySum:
515 p.currentMetric.Summary.SampleSum = proto.Float64(value)
516 case !math.IsNaN(p.currentQuantile):
517 p.currentMetric.Summary.Quantile = append(
518 p.currentMetric.Summary.Quantile,
519 &dto.Quantile{
520 Quantile: proto.Float64(p.currentQuantile),
521 Value: proto.Float64(value),
522 },
523 )
524 }
525 case dto.MetricType_HISTOGRAM:
526 // *sigh*
527 if p.currentMetric.Histogram == nil {
528 p.currentMetric.Histogram = &dto.Histogram{}
529 }
530 switch {
531 case p.currentIsHistogramCount:
532 p.currentMetric.Histogram.SampleCount = proto.Uint64(uint64(value))
533 case p.currentIsHistogramSum:
534 p.currentMetric.Histogram.SampleSum = proto.Float64(value)
535 case !math.IsNaN(p.currentBucket):
536 p.currentMetric.Histogram.Bucket = append(
537 p.currentMetric.Histogram.Bucket,
538 &dto.Bucket{
539 UpperBound: proto.Float64(p.currentBucket),
540 CumulativeCount: proto.Uint64(uint64(value)),
541 },
542 )
543 }
544 default:
545 p.err = fmt.Errorf("unexpected type for metric name %q", p.currentMF.GetName())
546 }
547 if p.currentByte == '\n' {
548 return p.startOfLine
549 }
550 return p.startTimestamp
551}
552
553// startTimestamp represents the state where the next byte read from p.buf is
554// the start of the timestamp (or whitespace leading up to it).
555func (p *TextParser) startTimestamp() stateFn {
556 if p.skipBlankTab(); p.err != nil {
557 return nil // Unexpected end of input.
558 }
559 if p.readTokenUntilWhitespace(); p.err != nil {
560 return nil // Unexpected end of input.
561 }
562 timestamp, err := strconv.ParseInt(p.currentToken.String(), 10, 64)
563 if err != nil {
564 // Create a more helpful error message.
565 p.parseError(fmt.Sprintf("expected integer as timestamp, got %q", p.currentToken.String()))
566 return nil
567 }
568 p.currentMetric.TimestampMs = proto.Int64(timestamp)
569 if p.readTokenUntilNewline(false); p.err != nil {
570 return nil // Unexpected end of input.
571 }
572 if p.currentToken.Len() > 0 {
573 p.parseError(fmt.Sprintf("spurious string after timestamp: %q", p.currentToken.String()))
574 return nil
575 }
576 return p.startOfLine
577}
578
579// readingHelp represents the state where the last byte read (now in
580// p.currentByte) is the first byte of the docstring after 'HELP'.
581func (p *TextParser) readingHelp() stateFn {
582 if p.currentMF.Help != nil {
583 p.parseError(fmt.Sprintf("second HELP line for metric name %q", p.currentMF.GetName()))
584 return nil
585 }
586 // Rest of line is the docstring.
587 if p.readTokenUntilNewline(true); p.err != nil {
588 return nil // Unexpected end of input.
589 }
590 p.currentMF.Help = proto.String(p.currentToken.String())
591 return p.startOfLine
592}
593
594// readingType represents the state where the last byte read (now in
595// p.currentByte) is the first byte of the type hint after 'HELP'.
596func (p *TextParser) readingType() stateFn {
597 if p.currentMF.Type != nil {
598 p.parseError(fmt.Sprintf("second TYPE line for metric name %q, or TYPE reported after samples", p.currentMF.GetName()))
599 return nil
600 }
601 // Rest of line is the type.
602 if p.readTokenUntilNewline(false); p.err != nil {
603 return nil // Unexpected end of input.
604 }
605 metricType, ok := dto.MetricType_value[strings.ToUpper(p.currentToken.String())]
606 if !ok {
607 p.parseError(fmt.Sprintf("unknown metric type %q", p.currentToken.String()))
608 return nil
609 }
610 p.currentMF.Type = dto.MetricType(metricType).Enum()
611 return p.startOfLine
612}
613
614// parseError sets p.err to a ParseError at the current line with the given
615// message.
616func (p *TextParser) parseError(msg string) {
617 p.err = ParseError{
618 Line: p.lineCount,
619 Msg: msg,
620 }
621}
622
623// skipBlankTab reads (and discards) bytes from p.buf until it encounters a byte
624// that is neither ' ' nor '\t'. That byte is left in p.currentByte.
625func (p *TextParser) skipBlankTab() {
626 for {
627 if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil || !isBlankOrTab(p.currentByte) {
628 return
629 }
630 }
631}
632
633// skipBlankTabIfCurrentBlankTab works exactly as skipBlankTab but doesn't do
634// anything if p.currentByte is neither ' ' nor '\t'.
635func (p *TextParser) skipBlankTabIfCurrentBlankTab() {
636 if isBlankOrTab(p.currentByte) {
637 p.skipBlankTab()
638 }
639}
640
641// readTokenUntilWhitespace copies bytes from p.buf into p.currentToken. The
642// first byte considered is the byte already read (now in p.currentByte). The
643// first whitespace byte encountered is still copied into p.currentByte, but not
644// into p.currentToken.
645func (p *TextParser) readTokenUntilWhitespace() {
646 p.currentToken.Reset()
647 for p.err == nil && !isBlankOrTab(p.currentByte) && p.currentByte != '\n' {
648 p.currentToken.WriteByte(p.currentByte)
649 p.currentByte, p.err = p.buf.ReadByte()
650 }
651}
652
653// readTokenUntilNewline copies bytes from p.buf into p.currentToken. The first
654// byte considered is the byte already read (now in p.currentByte). The first
655// newline byte encountered is still copied into p.currentByte, but not into
656// p.currentToken. If recognizeEscapeSequence is true, two escape sequences are
657// recognized: '\\' translates into '\', and '\n' into a line-feed character.
658// All other escape sequences are invalid and cause an error.
659func (p *TextParser) readTokenUntilNewline(recognizeEscapeSequence bool) {
660 p.currentToken.Reset()
661 escaped := false
662 for p.err == nil {
663 if recognizeEscapeSequence && escaped {
664 switch p.currentByte {
665 case '\\':
666 p.currentToken.WriteByte(p.currentByte)
667 case 'n':
668 p.currentToken.WriteByte('\n')
Abhay Kumara2ae5992025-11-10 14:02:24 +0000669 case '"':
670 p.currentToken.WriteByte('"')
khenaidooab1f7bd2019-11-14 14:00:27 -0500671 default:
672 p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
673 return
674 }
675 escaped = false
676 } else {
677 switch p.currentByte {
678 case '\n':
679 return
680 case '\\':
681 escaped = true
682 default:
683 p.currentToken.WriteByte(p.currentByte)
684 }
685 }
686 p.currentByte, p.err = p.buf.ReadByte()
687 }
688}
689
690// readTokenAsMetricName copies a metric name from p.buf into p.currentToken.
691// The first byte considered is the byte already read (now in p.currentByte).
692// The first byte not part of a metric name is still copied into p.currentByte,
693// but not into p.currentToken.
694func (p *TextParser) readTokenAsMetricName() {
695 p.currentToken.Reset()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000696 // A UTF-8 metric name must be quoted and may have escaped characters.
697 quoted := false
698 escaped := false
khenaidooab1f7bd2019-11-14 14:00:27 -0500699 if !isValidMetricNameStart(p.currentByte) {
700 return
701 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000702 for p.err == nil {
703 if escaped {
704 switch p.currentByte {
705 case '\\':
706 p.currentToken.WriteByte(p.currentByte)
707 case 'n':
708 p.currentToken.WriteByte('\n')
709 case '"':
710 p.currentToken.WriteByte('"')
711 default:
712 p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
713 return
714 }
715 escaped = false
716 } else {
717 switch p.currentByte {
718 case '"':
719 quoted = !quoted
720 if !quoted {
721 p.currentByte, p.err = p.buf.ReadByte()
722 return
723 }
724 case '\n':
725 p.parseError(fmt.Sprintf("metric name %q contains unescaped new-line", p.currentToken.String()))
726 return
727 case '\\':
728 escaped = true
729 default:
730 p.currentToken.WriteByte(p.currentByte)
731 }
732 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500733 p.currentByte, p.err = p.buf.ReadByte()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000734 if !isValidMetricNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == ' ') {
khenaidooab1f7bd2019-11-14 14:00:27 -0500735 return
736 }
737 }
738}
739
740// readTokenAsLabelName copies a label name from p.buf into p.currentToken.
741// The first byte considered is the byte already read (now in p.currentByte).
742// The first byte not part of a label name is still copied into p.currentByte,
743// but not into p.currentToken.
744func (p *TextParser) readTokenAsLabelName() {
745 p.currentToken.Reset()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000746 // A UTF-8 label name must be quoted and may have escaped characters.
747 quoted := false
748 escaped := false
khenaidooab1f7bd2019-11-14 14:00:27 -0500749 if !isValidLabelNameStart(p.currentByte) {
750 return
751 }
Abhay Kumara2ae5992025-11-10 14:02:24 +0000752 for p.err == nil {
753 if escaped {
754 switch p.currentByte {
755 case '\\':
756 p.currentToken.WriteByte(p.currentByte)
757 case 'n':
758 p.currentToken.WriteByte('\n')
759 case '"':
760 p.currentToken.WriteByte('"')
761 default:
762 p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
763 return
764 }
765 escaped = false
766 } else {
767 switch p.currentByte {
768 case '"':
769 quoted = !quoted
770 if !quoted {
771 p.currentByte, p.err = p.buf.ReadByte()
772 return
773 }
774 case '\n':
775 p.parseError(fmt.Sprintf("label name %q contains unescaped new-line", p.currentToken.String()))
776 return
777 case '\\':
778 escaped = true
779 default:
780 p.currentToken.WriteByte(p.currentByte)
781 }
782 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500783 p.currentByte, p.err = p.buf.ReadByte()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000784 if !isValidLabelNameContinuation(p.currentByte, quoted) || (!quoted && p.currentByte == '=') {
khenaidooab1f7bd2019-11-14 14:00:27 -0500785 return
786 }
787 }
788}
789
790// readTokenAsLabelValue copies a label value from p.buf into p.currentToken.
791// In contrast to the other 'readTokenAs...' functions, which start with the
792// last read byte in p.currentByte, this method ignores p.currentByte and starts
793// with reading a new byte from p.buf. The first byte not part of a label value
794// is still copied into p.currentByte, but not into p.currentToken.
795func (p *TextParser) readTokenAsLabelValue() {
796 p.currentToken.Reset()
797 escaped := false
798 for {
799 if p.currentByte, p.err = p.buf.ReadByte(); p.err != nil {
800 return
801 }
802 if escaped {
803 switch p.currentByte {
804 case '"', '\\':
805 p.currentToken.WriteByte(p.currentByte)
806 case 'n':
807 p.currentToken.WriteByte('\n')
808 default:
809 p.parseError(fmt.Sprintf("invalid escape sequence '\\%c'", p.currentByte))
Abhay Kumara2ae5992025-11-10 14:02:24 +0000810 p.currentLabelPairs = nil
khenaidooab1f7bd2019-11-14 14:00:27 -0500811 return
812 }
813 escaped = false
814 continue
815 }
816 switch p.currentByte {
817 case '"':
818 return
819 case '\n':
820 p.parseError(fmt.Sprintf("label value %q contains unescaped new-line", p.currentToken.String()))
821 return
822 case '\\':
823 escaped = true
824 default:
825 p.currentToken.WriteByte(p.currentByte)
826 }
827 }
828}
829
830func (p *TextParser) setOrCreateCurrentMF() {
831 p.currentIsSummaryCount = false
832 p.currentIsSummarySum = false
833 p.currentIsHistogramCount = false
834 p.currentIsHistogramSum = false
835 name := p.currentToken.String()
Abhay Kumara2ae5992025-11-10 14:02:24 +0000836 if !p.scheme.IsValidMetricName(name) {
837 p.parseError(fmt.Sprintf("invalid metric name %q", name))
838 return
839 }
khenaidooab1f7bd2019-11-14 14:00:27 -0500840 if p.currentMF = p.metricFamiliesByName[name]; p.currentMF != nil {
841 return
842 }
843 // Try out if this is a _sum or _count for a summary/histogram.
844 summaryName := summaryMetricName(name)
845 if p.currentMF = p.metricFamiliesByName[summaryName]; p.currentMF != nil {
846 if p.currentMF.GetType() == dto.MetricType_SUMMARY {
847 if isCount(name) {
848 p.currentIsSummaryCount = true
849 }
850 if isSum(name) {
851 p.currentIsSummarySum = true
852 }
853 return
854 }
855 }
856 histogramName := histogramMetricName(name)
857 if p.currentMF = p.metricFamiliesByName[histogramName]; p.currentMF != nil {
858 if p.currentMF.GetType() == dto.MetricType_HISTOGRAM {
859 if isCount(name) {
860 p.currentIsHistogramCount = true
861 }
862 if isSum(name) {
863 p.currentIsHistogramSum = true
864 }
865 return
866 }
867 }
868 p.currentMF = &dto.MetricFamily{Name: proto.String(name)}
869 p.metricFamiliesByName[name] = p.currentMF
870}
871
872func isValidLabelNameStart(b byte) bool {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000873 return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == '"'
khenaidooab1f7bd2019-11-14 14:00:27 -0500874}
875
Abhay Kumara2ae5992025-11-10 14:02:24 +0000876func isValidLabelNameContinuation(b byte, quoted bool) bool {
877 return isValidLabelNameStart(b) || (b >= '0' && b <= '9') || (quoted && utf8.ValidString(string(b)))
khenaidooab1f7bd2019-11-14 14:00:27 -0500878}
879
880func isValidMetricNameStart(b byte) bool {
881 return isValidLabelNameStart(b) || b == ':'
882}
883
Abhay Kumara2ae5992025-11-10 14:02:24 +0000884func isValidMetricNameContinuation(b byte, quoted bool) bool {
885 return isValidLabelNameContinuation(b, quoted) || b == ':'
khenaidooab1f7bd2019-11-14 14:00:27 -0500886}
887
888func isBlankOrTab(b byte) bool {
889 return b == ' ' || b == '\t'
890}
891
892func isCount(name string) bool {
893 return len(name) > 6 && name[len(name)-6:] == "_count"
894}
895
896func isSum(name string) bool {
897 return len(name) > 4 && name[len(name)-4:] == "_sum"
898}
899
900func isBucket(name string) bool {
901 return len(name) > 7 && name[len(name)-7:] == "_bucket"
902}
903
904func summaryMetricName(name string) string {
905 switch {
906 case isCount(name):
907 return name[:len(name)-6]
908 case isSum(name):
909 return name[:len(name)-4]
910 default:
911 return name
912 }
913}
914
915func histogramMetricName(name string) string {
916 switch {
917 case isCount(name):
918 return name[:len(name)-6]
919 case isSum(name):
920 return name[:len(name)-4]
921 case isBucket(name):
922 return name[:len(name)-7]
923 default:
924 return name
925 }
926}
khenaidood948f772021-08-11 17:49:24 -0400927
928func parseFloat(s string) (float64, error) {
929 if strings.ContainsAny(s, "pP_") {
Abhay Kumara2ae5992025-11-10 14:02:24 +0000930 return 0, errors.New("unsupported character in float")
khenaidood948f772021-08-11 17:49:24 -0400931 }
932 return strconv.ParseFloat(s, 64)
933}