Module unicode

This module provides support to handle the Unicode UTF-8 encoding.

Types

RuneImpl = int
  Source
Rune = distinct RuneImpl
type that can hold any Unicode character   Source
Rune16 = distinct int16
16 bit Unicode character   Source

Procs

proc `<=%`(a, b: Rune): bool {.raises: [], tags: [].}
  Source
proc `<%`(a, b: Rune): bool {.raises: [], tags: [].}
  Source
proc `==`(a, b: Rune): bool {.raises: [], tags: [].}
  Source
proc runeLen(s: string): int {.gcsafe, extern: "nuc$1", raises: [], tags: [].}
Returns the number of Unicode characters of the string s   Source
proc runeLenAt(s: string; i: Natural): int {.raises: [], tags: [].}
Returns the number of bytes the rune starting at s[i] takes   Source
proc validateUtf8(s: string): int {.raises: [], tags: [].}
Returns the position of the invalid byte in s if the string s does not hold valid UTF-8 data. Otherwise -1 is returned.   Source
proc runeAt(s: string; i: Natural): Rune {.raises: [], tags: [].}
Returns the unicode character in s at byte index i   Source
proc toUTF8(c: Rune): string {.gcsafe, extern: "nuc$1", raises: [], tags: [].}
Converts a rune into its UTF-8 representation   Source
proc `$`(rune: Rune): string {.raises: [], tags: [].}
Converts a Rune to a string   Source
proc `$`(runes: seq[Rune]): string {.raises: [], tags: [].}
Converts a sequence of Runes to a string   Source
proc toLower(c: Rune): Rune {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Converts c into lower case. This works for any Unicode character. If possible, prefer toLower over toUpper.   Source
proc toUpper(c: Rune): Rune {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Converts c into upper case. This works for any Unicode character. If possible, prefer toLower over toUpper.   Source
proc toTitle(c: Rune): Rune {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Converts c to title case   Source
proc isLower(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Returns true iff c is a lower case Unicode character. If possible, prefer isLower over isUpper.   Source
proc isUpper(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Returns true iff c is a upper case Unicode character. If possible, prefer isLower over isUpper.   Source
proc isAlpha(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Returns true iff c is an alpha Unicode character (i.e., a letter)   Source
proc isTitle(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Returns true iff c is a Unicode titlecase character   Source
proc isWhiteSpace(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Returns true iff c is a Unicode whitespace character   Source
proc isCombining(c: Rune): bool {.gcsafe, extern: "nuc$1", procvar, raises: [], tags: [].}
Returns true iff c is a Unicode combining character   Source
proc toRunes(s: string): seq[Rune] {.raises: [], tags: [].}
Obtains a sequence containing the Runes in s   Source
proc cmpRunesIgnoreCase(a, b: string): int {.gcsafe, extern: "nuc$1", procvar,
                                        raises: [], tags: [].}
Compares two UTF-8 strings and ignores the case. Returns:

0 iff a == b
< 0 iff a < b
> 0 iff a > b

  Source
proc reversed(s: string): string {.raises: [], tags: [].}
Returns the reverse of s, interpreting it as Unicode characters. Unicode combining characters are correctly interpreted as well:
assert reversed("Reverse this!") == "!siht esreveR"
assert reversed("先秦兩漢") == "漢兩秦先"
assert reversed("as⃝df̅") == "f̅ds⃝a"
assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
  Source
proc graphemeLen(s: string; i: Natural): Natural {.raises: [], tags: [].}
The number of bytes belonging to 's[i]' including following combining characters.   Source
proc lastRune(s: string; last: int): (Rune, int) {.raises: [], tags: [].}
length of the last rune in 's[0..last]'. Returns the rune and its length in bytes.   Source

Iterators

iterator runes(s: string): Rune {.raises: [], tags: [].}
Iterates over any unicode character of the string s   Source

Templates

template fastRuneAt(s: string; i: int; result: expr; doInc = true)
Returns the Unicode character s[i] in result. If doInc == true i is incremented by the number of bytes that have been processed.   Source