Blame - vendor/github.com/rivo/uniseg/sentence.go - voltha-openolt-adapter

blob: adc2a3577363500214ded01498e09141c68161cf [file] [log] [blame]

Abhay Kumar	a61c522	2025-11-10 07:32:50 +0000	[diff] [blame]	1	package uniseg
				2
				3	import "unicode/utf8"
				4
				5	// FirstSentence returns the first sentence found in the given byte slice
				6	// according to the rules of [Unicode Standard Annex #29, Sentence Boundaries].
				7	// This function can be called continuously to extract all sentences from a byte
				8	// slice, as illustrated in the example below.
				9	//
				10	// If you don't know the current state, for example when calling the function
				11	// for the first time, you must pass -1. For consecutive calls, pass the state
				12	// and rest slice returned by the previous call.
				13	//
				14	// The "rest" slice is the sub-slice of the original byte slice "b" starting
				15	// after the last byte of the identified sentence. If the length of the "rest"
				16	// slice is 0, the entire byte slice "b" has been processed. The "sentence" byte
				17	// slice is the sub-slice of the input slice containing the identified sentence.
				18	//
				19	// Given an empty byte slice "b", the function returns nil values.
				20	//
				21	// [Unicode Standard Annex #29, Sentence Boundaries]: http://unicode.org/reports/tr29/#Sentence_Boundaries
				22	func FirstSentence(b []byte, state int) (sentence, rest []byte, newState int) {
				23	// An empty byte slice returns nothing.
				24	if len(b) == 0 {
				25	return
				26	}
				27
				28	// Extract the first rune.
				29	r, length := utf8.DecodeRune(b)
				30	if len(b) <= length { // If we're already past the end, there is nothing else to parse.
				31	return b, nil, sbAny
				32	}
				33
				34	// If we don't know the state, determine it now.
				35	if state < 0 {
				36	state, _ = transitionSentenceBreakState(state, r, b[length:], "")
				37	}
				38
				39	// Transition until we find a boundary.
				40	var boundary bool
				41	for {
				42	r, l := utf8.DecodeRune(b[length:])
				43	state, boundary = transitionSentenceBreakState(state, r, b[length+l:], "")
				44
				45	if boundary {
				46	return b[:length], b[length:], state
				47	}
				48
				49	length += l
				50	if len(b) <= length {
				51	return b, nil, sbAny
				52	}
				53	}
				54	}
				55
				56	// FirstSentenceInString is like [FirstSentence] but its input and outputs are
				57	// strings.
				58	func FirstSentenceInString(str string, state int) (sentence, rest string, newState int) {
				59	// An empty byte slice returns nothing.
				60	if len(str) == 0 {
				61	return
				62	}
				63
				64	// Extract the first rune.
				65	r, length := utf8.DecodeRuneInString(str)
				66	if len(str) <= length { // If we're already past the end, there is nothing else to parse.
				67	return str, "", sbAny
				68	}
				69
				70	// If we don't know the state, determine it now.
				71	if state < 0 {
				72	state, _ = transitionSentenceBreakState(state, r, nil, str[length:])
				73	}
				74
				75	// Transition until we find a boundary.
				76	var boundary bool
				77	for {
				78	r, l := utf8.DecodeRuneInString(str[length:])
				79	state, boundary = transitionSentenceBreakState(state, r, nil, str[length+l:])
				80
				81	if boundary {
				82	return str[:length], str[length:], state
				83	}
				84
				85	length += l
				86	if len(str) <= length {
				87	return str, "", sbAny
				88	}
				89	}
				90	}