| package uniseg |
| |
| // The Unicode properties as used in the various parsers. Only the ones needed |
| // in the context of this package are included. |
| const ( |
| prXX = 0 // Same as prAny. |
| prAny = iota // prAny must be 0. |
| prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector. |
| prCR |
| prLF |
| prControl |
| prExtend |
| prRegionalIndicator |
| prSpacingMark |
| prL |
| prV |
| prT |
| prLV |
| prLVT |
| prZWJ |
| prExtendedPictographic |
| prNewline |
| prWSegSpace |
| prDoubleQuote |
| prSingleQuote |
| prMidNumLet |
| prNumeric |
| prMidLetter |
| prMidNum |
| prExtendNumLet |
| prALetter |
| prFormat |
| prHebrewLetter |
| prKatakana |
| prSp |
| prSTerm |
| prClose |
| prSContinue |
| prATerm |
| prUpper |
| prLower |
| prSep |
| prOLetter |
| prCM |
| prBA |
| prBK |
| prSP |
| prEX |
| prQU |
| prAL |
| prPR |
| prPO |
| prOP |
| prCP |
| prIS |
| prHY |
| prSY |
| prNU |
| prCL |
| prNL |
| prGL |
| prAI |
| prBB |
| prHL |
| prSA |
| prJL |
| prJV |
| prJT |
| prNS |
| prZW |
| prB2 |
| prIN |
| prWJ |
| prID |
| prEB |
| prCJ |
| prH2 |
| prH3 |
| prSG |
| prCB |
| prRI |
| prEM |
| prN |
| prNa |
| prA |
| prW |
| prH |
| prF |
| prEmojiPresentation |
| ) |
| |
| // Unicode General Categories. Only the ones needed in the context of this |
| // package are included. |
| const ( |
| gcNone = iota // gcNone must be 0. |
| gcCc |
| gcZs |
| gcPo |
| gcSc |
| gcPs |
| gcPe |
| gcSm |
| gcPd |
| gcNd |
| gcLu |
| gcSk |
| gcPc |
| gcLl |
| gcSo |
| gcLo |
| gcPi |
| gcCf |
| gcNo |
| gcPf |
| gcLC |
| gcLm |
| gcMn |
| gcMe |
| gcMc |
| gcNl |
| gcZl |
| gcZp |
| gcCn |
| gcCs |
| gcCo |
| ) |
| |
| // Special code points. |
| const ( |
| vs15 = 0xfe0e // Variation Selector-15 (text presentation) |
| vs16 = 0xfe0f // Variation Selector-16 (emoji presentation) |
| ) |
| |
| // propertySearch performs a binary search on a property slice and returns the |
| // entry whose range (start = first array element, end = second array element) |
| // includes r, or an array of 0's if no such entry was found. |
| func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) { |
| // Run a binary search. |
| from := 0 |
| to := len(dictionary) |
| for to > from { |
| middle := (from + to) / 2 |
| cpRange := dictionary[middle] |
| if int(r) < cpRange[0] { |
| to = middle |
| continue |
| } |
| if int(r) > cpRange[1] { |
| from = middle + 1 |
| continue |
| } |
| return cpRange |
| } |
| return |
| } |
| |
| // property returns the Unicode property value (see constants above) of the |
| // given code point. |
| func property(dictionary [][3]int, r rune) int { |
| return propertySearch(dictionary, r)[2] |
| } |
| |
| // propertyLineBreak returns the Unicode property value and General Category |
| // (see constants above) of the given code point, as listed in the line break |
| // code points table, while fast tracking ASCII digits and letters. |
| func propertyLineBreak(r rune) (property, generalCategory int) { |
| if r >= 'a' && r <= 'z' { |
| return prAL, gcLl |
| } |
| if r >= 'A' && r <= 'Z' { |
| return prAL, gcLu |
| } |
| if r >= '0' && r <= '9' { |
| return prNU, gcNd |
| } |
| entry := propertySearch(lineBreakCodePoints, r) |
| return entry[2], entry[3] |
| } |
| |
| // propertyGraphemes returns the Unicode grapheme cluster property value of the |
| // given code point while fast tracking ASCII characters. |
| func propertyGraphemes(r rune) int { |
| if r >= 0x20 && r <= 0x7e { |
| return prAny |
| } |
| if r == 0x0a { |
| return prLF |
| } |
| if r == 0x0d { |
| return prCR |
| } |
| if r >= 0 && r <= 0x1f || r == 0x7f { |
| return prControl |
| } |
| return property(graphemeCodePoints, r) |
| } |
| |
| // propertyEastAsianWidth returns the Unicode East Asian Width property value of |
| // the given code point while fast tracking ASCII characters. |
| func propertyEastAsianWidth(r rune) int { |
| if r >= 0x20 && r <= 0x7e { |
| return prNa |
| } |
| if r >= 0 && r <= 0x1f || r == 0x7f { |
| return prN |
| } |
| return property(eastAsianWidth, r) |
| } |