vendor/github.com/rivo/uniseg/word.go - voltha-openolt-adapter - Gitiles

 package uniseg

 import "unicode/utf8"

 // FirstWord returns the first word found in the given byte slice according to
 // the rules of [Unicode Standard Annex #29, Word Boundaries]. This function can
 // be called continuously to extract all words from a byte slice, as illustrated
 // in the example below.
 //
 // If you don't know the current state, for example when calling the function
 // for the first time, you must pass -1. For consecutive calls, pass the state
 // and rest slice returned by the previous call.
 //
 // The "rest" slice is the sub-slice of the original byte slice "b" starting
 // after the last byte of the identified word. If the length of the "rest" slice
 // is 0, the entire byte slice "b" has been processed. The "word" byte slice is
 // the sub-slice of the input slice containing the identified word.
 //
 // Given an empty byte slice "b", the function returns nil values.
 //
 // [Unicode Standard Annex #29, Word Boundaries]: http://unicode.org/reports/tr29/#Word_Boundaries
 func FirstWord(b []byte, state int) (word, rest []byte, newState int) {
 	// An empty byte slice returns nothing.
 	if len(b) == 0 {
 		return
 	}

 	// Extract the first rune.
 	r, length := utf8.DecodeRune(b)
 	if len(b) <= length { // If we're already past the end, there is nothing else to parse.
 		return b, nil, wbAny
 	}

 	// If we don't know the state, determine it now.
 	if state < 0 {
 		state, _ = transitionWordBreakState(state, r, b[length:], "")
 	}

 	// Transition until we find a boundary.
 	var boundary bool
 	for {
 		r, l := utf8.DecodeRune(b[length:])
 		state, boundary = transitionWordBreakState(state, r, b[length+l:], "")

 		if boundary {
 			return b[:length], b[length:], state
 		}

 		length += l
 		if len(b) <= length {
 			return b, nil, wbAny
 		}
 	}
 }

 // FirstWordInString is like [FirstWord] but its input and outputs are strings.
 func FirstWordInString(str string, state int) (word, rest string, newState int) {
 	// An empty byte slice returns nothing.
 	if len(str) == 0 {
 		return
 	}

 	// Extract the first rune.
 	r, length := utf8.DecodeRuneInString(str)
 	if len(str) <= length { // If we're already past the end, there is nothing else to parse.
 		return str, "", wbAny
 	}

 	// If we don't know the state, determine it now.
 	if state < 0 {
 		state, _ = transitionWordBreakState(state, r, nil, str[length:])
 	}

 	// Transition until we find a boundary.
 	var boundary bool
 	for {
 		r, l := utf8.DecodeRuneInString(str[length:])
 		state, boundary = transitionWordBreakState(state, r, nil, str[length+l:])

 		if boundary {
 			return str[:length], str[length:], state
 		}

 		length += l
 		if len(str) <= length {
 			return str, "", wbAny
 		}
 	}
 }
	package uniseg

	import "unicode/utf8"

	// FirstWord returns the first word found in the given byte slice according to
	// the rules of [Unicode Standard Annex #29, Word Boundaries]. This function can
	// be called continuously to extract all words from a byte slice, as illustrated
	// in the example below.
	//
	// If you don't know the current state, for example when calling the function
	// for the first time, you must pass -1. For consecutive calls, pass the state
	// and rest slice returned by the previous call.
	//
	// The "rest" slice is the sub-slice of the original byte slice "b" starting
	// after the last byte of the identified word. If the length of the "rest" slice
	// is 0, the entire byte slice "b" has been processed. The "word" byte slice is
	// the sub-slice of the input slice containing the identified word.
	//
	// Given an empty byte slice "b", the function returns nil values.
	//
	// [Unicode Standard Annex #29, Word Boundaries]: http://unicode.org/reports/tr29/#Word_Boundaries
	func FirstWord(b []byte, state int) (word, rest []byte, newState int) {
	// An empty byte slice returns nothing.
	if len(b) == 0 {
	return
	}

	// Extract the first rune.
	r, length := utf8.DecodeRune(b)
	if len(b) <= length { // If we're already past the end, there is nothing else to parse.
	return b, nil, wbAny
	}

	// If we don't know the state, determine it now.
	if state < 0 {
	state, _ = transitionWordBreakState(state, r, b[length:], "")
	}

	// Transition until we find a boundary.
	var boundary bool
	for {
	r, l := utf8.DecodeRune(b[length:])
	state, boundary = transitionWordBreakState(state, r, b[length+l:], "")

	if boundary {
	return b[:length], b[length:], state
	}

	length += l
	if len(b) <= length {
	return b, nil, wbAny
	}
	}
	}

	// FirstWordInString is like [FirstWord] but its input and outputs are strings.
	func FirstWordInString(str string, state int) (word, rest string, newState int) {
	// An empty byte slice returns nothing.
	if len(str) == 0 {
	return
	}

	// Extract the first rune.
	r, length := utf8.DecodeRuneInString(str)
	if len(str) <= length { // If we're already past the end, there is nothing else to parse.
	return str, "", wbAny
	}

	// If we don't know the state, determine it now.
	if state < 0 {
	state, _ = transitionWordBreakState(state, r, nil, str[length:])
	}

	// Transition until we find a boundary.
	var boundary bool
	for {
	r, l := utf8.DecodeRuneInString(str[length:])
	state, boundary = transitionWordBreakState(state, r, nil, str[length+l:])

	if boundary {
	return str[:length], str[length:], state
	}

	length += l
	if len(str) <= length {
	return str, "", wbAny
	}
	}
	}