Blame - vendor/github.com/rivo/uniseg/line.go - voltha-openolt-adapter

blob: 7a46318d93d26f8309dfb00596b3ff932d9b2bcf [file] [log] [blame]

Abhay Kumar	a61c522	2025-11-10 07:32:50 +0000	[diff] [blame]	1	package uniseg
				2
				3	import "unicode/utf8"
				4
				5	// FirstLineSegment returns the prefix of the given byte slice after which a
				6	// decision to break the string over to the next line can or must be made,
				7	// according to the rules of [Unicode Standard Annex #14]. This is used to
				8	// implement line breaking.
				9	//
				10	// Line breaking, also known as word wrapping, is the process of breaking a
				11	// section of text into lines such that it will fit in the available width of a
				12	// page, window or other display area.
				13	//
				14	// The returned "segment" may not be broken into smaller parts, unless no other
				15	// breaking opportunities present themselves, in which case you may break by
				16	// grapheme clusters (using the [FirstGraphemeCluster] function to determine the
				17	// grapheme clusters).
				18	//
				19	// The "mustBreak" flag indicates whether you MUST break the line after the
				20	// given segment (true), for example after newline characters, or you MAY break
				21	// the line after the given segment (false).
				22	//
				23	// This function can be called continuously to extract all non-breaking sub-sets
				24	// from a byte slice, as illustrated in the example below.
				25	//
				26	// If you don't know the current state, for example when calling the function
				27	// for the first time, you must pass -1. For consecutive calls, pass the state
				28	// and rest slice returned by the previous call.
				29	//
				30	// The "rest" slice is the sub-slice of the original byte slice "b" starting
				31	// after the last byte of the identified line segment. If the length of the
				32	// "rest" slice is 0, the entire byte slice "b" has been processed. The
				33	// "segment" byte slice is the sub-slice of the input slice containing the
				34	// identified line segment.
				35	//
				36	// Given an empty byte slice "b", the function returns nil values.
				37	//
				38	// Note that in accordance with [UAX #14 LB3], the final segment will end with
				39	// "mustBreak" set to true. You can choose to ignore this by checking if the
				40	// length of the "rest" slice is 0 and calling [HasTrailingLineBreak] or
				41	// [HasTrailingLineBreakInString] on the last rune.
				42	//
				43	// Note also that this algorithm may break within grapheme clusters. This is
				44	// addressed in Section 8.2 Example 6 of UAX #14. To avoid this, you can use
				45	// the [Step] function instead.
				46	//
				47	// [Unicode Standard Annex #14]: https://www.unicode.org/reports/tr14/
				48	// [UAX #14 LB3]: https://www.unicode.org/reports/tr14/#Algorithm
				49	func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool, newState int) {
				50	// An empty byte slice returns nothing.
				51	if len(b) == 0 {
				52	return
				53	}
				54
				55	// Extract the first rune.
				56	r, length := utf8.DecodeRune(b)
				57	if len(b) <= length { // If we're already past the end, there is nothing else to parse.
				58	return b, nil, true, lbAny // LB3.
				59	}
				60
				61	// If we don't know the state, determine it now.
				62	if state < 0 {
				63	state, _ = transitionLineBreakState(state, r, b[length:], "")
				64	}
				65
				66	// Transition until we find a boundary.
				67	var boundary int
				68	for {
				69	r, l := utf8.DecodeRune(b[length:])
				70	state, boundary = transitionLineBreakState(state, r, b[length+l:], "")
				71
				72	if boundary != LineDontBreak {
				73	return b[:length], b[length:], boundary == LineMustBreak, state
				74	}
				75
				76	length += l
				77	if len(b) <= length {
				78	return b, nil, true, lbAny // LB3
				79	}
				80	}
				81	}
				82
				83	// FirstLineSegmentInString is like [FirstLineSegment] but its input and outputs
				84	// are strings.
				85	func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) {
				86	// An empty byte slice returns nothing.
				87	if len(str) == 0 {
				88	return
				89	}
				90
				91	// Extract the first rune.
				92	r, length := utf8.DecodeRuneInString(str)
				93	if len(str) <= length { // If we're already past the end, there is nothing else to parse.
				94	return str, "", true, lbAny // LB3.
				95	}
				96
				97	// If we don't know the state, determine it now.
				98	if state < 0 {
				99	state, _ = transitionLineBreakState(state, r, nil, str[length:])
				100	}
				101
				102	// Transition until we find a boundary.
				103	var boundary int
				104	for {
				105	r, l := utf8.DecodeRuneInString(str[length:])
				106	state, boundary = transitionLineBreakState(state, r, nil, str[length+l:])
				107
				108	if boundary != LineDontBreak {
				109	return str[:length], str[length:], boundary == LineMustBreak, state
				110	}
				111
				112	length += l
				113	if len(str) <= length {
				114	return str, "", true, lbAny // LB3.
				115	}
				116	}
				117	}
				118
				119	// HasTrailingLineBreak returns true if the last rune in the given byte slice is
				120	// one of the hard line break code points defined in LB4 and LB5 of [UAX #14].
				121	//
				122	// [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm
				123	func HasTrailingLineBreak(b []byte) bool {
				124	r, _ := utf8.DecodeLastRune(b)
				125	property, _ := propertyLineBreak(r)
				126	return property == prBK \|\| property == prCR \|\| property == prLF \|\| property == prNL
				127	}
				128
				129	// HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string.
				130	func HasTrailingLineBreakInString(str string) bool {
				131	r, _ := utf8.DecodeLastRuneInString(str)
				132	property, _ := propertyLineBreak(r)
				133	return property == prBK \|\| property == prCR \|\| property == prLF \|\| property == prNL
				134	}