parseutils
This module contains helpers for parsing tokens, numbers, integers, floats, identifiers, etc.
To unpack raw bytes look at the streams module.
let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"] var outp: seq[string] for log in logs: var res: string if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10 outp.add(res & " - " & captureBetween(log, ' ', '_')) doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"]
from strutils import Digits, parseInt let input1 = "2019 school start" input2 = "3 years back" startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019 yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3 examYear = parseInt(startYear) + parseInt(yearsBack) doAssert "Examination is in " & $examYear == "Examination is in 2022"
See also:
- strutils module for combined and identical parsing proc's
- json module for a JSON parser
- parsecfg module for a configuration file parser
- parsecsv module for a simple CSV (comma separated value) parser
- parseopt module for a command line parser
- parsexml module for a XML / HTML parser
- other parsers for other parsers
Types
InterpolatedKind = enum ikStr, ## ``str`` part of the interpolated string ikDollar, ## escaped ``$`` part of the interpolated string ikVar, ## ``var`` part of the interpolated string ikExpr ## ``expr`` part of the interpolated string
- Describes for
interpolatedFragments
which part of the interpolated string is yielded; for example in "str$$$var${expr}" Source Edit
Procs
proc parseBin[T: SomeInteger](s: string; number: var T; start = 0; maxLen = 0): int {...}{. noSideEffect.}
-
Parses a binary number and stores its value in
number
.Returns the number of the parsed characters or 0 in case of an error. If error, the value of
number
is not changed.If
maxLen == 0
, the parsing continues until the first non-bin character or to the end of the string. Otherwise, no more thanmaxLen
characters are parsed starting from thestart
position.It does not check for overflow. If the value represented by the string is too big to fit into
number
, only the value of last fitting characters will be stored innumber
without producing an error.Example:
var num: int doAssert parseBin("0100_1110_0110_1001_1110_1101", num) == 29 doAssert num == 5138925 doAssert parseBin("3", num) == 0 var num8: int8 doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8) == 32 doAssert num8 == 0b1110_1101'i8 doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8, 3, 9) == 9 doAssert num8 == 0b0100_1110'i8 var num8u: uint8 doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8u) == 32 doAssert num8u == 237 var num64: int64 doAssert parseBin("0100111001101001111011010100111001101001", num64) == 40 doAssert num64 == 336784608873
Source Edit proc parseOct[T: SomeInteger](s: string; number: var T; start = 0; maxLen = 0): int {...}{. noSideEffect.}
-
Parses an octal number and stores its value in
number
.Returns the number of the parsed characters or 0 in case of an error. If error, the value of
number
is not changed.If
maxLen == 0
, the parsing continues until the first non-oct character or to the end of the string. Otherwise, no more thanmaxLen
characters are parsed starting from thestart
position.It does not check for overflow. If the value represented by the string is too big to fit into
number
, only the value of last fitting characters will be stored innumber
without producing an error.Example:
var num: int doAssert parseOct("0o23464755", num) == 10 doAssert num == 5138925 doAssert parseOct("8", num) == 0 var num8: int8 doAssert parseOct("0o_1464_755", num8) == 11 doAssert num8 == -19 doAssert parseOct("0o_1464_755", num8, 3, 3) == 3 doAssert num8 == 102 var num8u: uint8 doAssert parseOct("1464755", num8u) == 7 doAssert num8u == 237 var num64: int64 doAssert parseOct("2346475523464755", num64) == 16 doAssert num64 == 86216859871725
Source Edit proc parseHex[T: SomeInteger](s: string; number: var T; start = 0; maxLen = 0): int {...}{. noSideEffect.}
-
Parses a hexadecimal number and stores its value in
number
.Returns the number of the parsed characters or 0 in case of an error. If error, the value of
number
is not changed.If
maxLen == 0
, the parsing continues until the first non-hex character or to the end of the string. Otherwise, no more thanmaxLen
characters are parsed starting from thestart
position.It does not check for overflow. If the value represented by the string is too big to fit into
number
, only the value of last fitting characters will be stored innumber
without producing an error.Example:
var num: int doAssert parseHex("4E_69_ED", num) == 8 doAssert num == 5138925 doAssert parseHex("X", num) == 0 doAssert parseHex("#ABC", num) == 4 var num8: int8 doAssert parseHex("0x_4E_69_ED", num8) == 11 doAssert num8 == 0xED'i8 doAssert parseHex("0x_4E_69_ED", num8, 3, 2) == 2 doAssert num8 == 0x4E'i8 var num8u: uint8 doAssert parseHex("0x_4E_69_ED", num8u) == 11 doAssert num8u == 237 var num64: int64 doAssert parseHex("4E69ED4E69ED", num64) == 12 doAssert num64 == 86216859871725
Source Edit proc parseIdent(s: string; ident: var string; start = 0): int {...}{.raises: [], tags: [].}
- Parses an identifier and stores it in
ident
. Returns the number of the parsed characters or 0 in case of an error.Example:
var res: string doAssert parseIdent("Hello World", res, 0) == 5 doAssert res == "Hello" doAssert parseIdent("Hello World", res, 1) == 4 doAssert res == "ello" doAssert parseIdent("Hello World", res, 6) == 5 doAssert res == "World"
Source Edit proc parseIdent(s: string; start = 0): string {...}{.raises: [], tags: [].}
- Parses an identifier and returns it or an empty string in case of an error.
Example:
doAssert parseIdent("Hello World", 0) == "Hello" doAssert parseIdent("Hello World", 1) == "ello" doAssert parseIdent("Hello World", 5) == "" doAssert parseIdent("Hello World", 6) == "World"
Source Edit proc skipWhitespace(s: string; start = 0): int {...}{.inline, raises: [], tags: [].}
- Skips the whitespace starting at
s[start]
. Returns the number of skipped characters.Example:
doAssert skipWhitespace("Hello World", 0) == 0 doAssert skipWhitespace(" Hello World", 0) == 1 doAssert skipWhitespace("Hello World", 5) == 1 doAssert skipWhitespace("Hello World", 5) == 2
Source Edit proc skip(s, token: string; start = 0): int {...}{.inline, raises: [], tags: [].}
- Skips the
token
starting ats[start]
. Returns the length oftoken
or 0 if there was notoken
ats[start]
.Example:
doAssert skip("2019-01-22", "2019", 0) == 4 doAssert skip("2019-01-22", "19", 0) == 0 doAssert skip("2019-01-22", "19", 2) == 2 doAssert skip("CAPlow", "CAP", 0) == 3 doAssert skip("CAPlow", "cap", 0) == 0
Source Edit proc skipIgnoreCase(s, token: string; start = 0): int {...}{.raises: [], tags: [].}
- Same as
skip
but case is ignored for token matching.Example:
doAssert skipIgnoreCase("CAPlow", "CAP", 0) == 3 doAssert skipIgnoreCase("CAPlow", "cap", 0) == 3
Source Edit proc skipUntil(s: string; until: set[char]; start = 0): int {...}{.inline, raises: [], tags: [].}
- Skips all characters until one char from the set
until
is found or the end is reached. Returns number of characters skipped.Example:
doAssert skipUntil("Hello World", {'W', 'e'}, 0) == 1 doAssert skipUntil("Hello World", {'W'}, 0) == 6 doAssert skipUntil("Hello World", {'W', 'd'}, 0) == 6
Source Edit proc skipUntil(s: string; until: char; start = 0): int {...}{.inline, raises: [], tags: [].}
- Skips all characters until the char
until
is found or the end is reached. Returns number of characters skipped.Example:
doAssert skipUntil("Hello World", 'o', 0) == 4 doAssert skipUntil("Hello World", 'o', 4) == 0 doAssert skipUntil("Hello World", 'W', 0) == 6 doAssert skipUntil("Hello World", 'w', 0) == 11
Source Edit proc skipWhile(s: string; toSkip: set[char]; start = 0): int {...}{.inline, raises: [], tags: [].}
- Skips all characters while one char from the set
token
is found. Returns number of characters skipped.Example:
doAssert skipWhile("Hello World", {'H', 'e'}) == 2 doAssert skipWhile("Hello World", {'e'}) == 0 doAssert skipWhile("Hello World", {'W', 'o', 'r'}, 6) == 3
Source Edit proc parseUntil(s: string; token: var string; until: set[char]; start = 0): int {...}{. inline, raises: [], tags: [].}
- Parses a token and stores it in
token
. Returns the number of the parsed characters or 0 in case of an error. A token consists of the characters notinuntil
.Example:
var myToken: string doAssert parseUntil("Hello World", myToken, {'W', 'o', 'r'}) == 4 doAssert myToken == "Hell" doAssert parseUntil("Hello World", myToken, {'W', 'r'}) == 6 doAssert myToken == "Hello " doAssert parseUntil("Hello World", myToken, {'W', 'r'}, 3) == 3 doAssert myToken == "lo "
Source Edit proc parseUntil(s: string; token: var string; until: char; start = 0): int {...}{. inline, raises: [], tags: [].}
- Parses a token and stores it in
token
. Returns the number of the parsed characters or 0 in case of an error. A token consists of any character that is not theuntil
character.Example:
var myToken: string doAssert parseUntil("Hello World", myToken, 'W') == 6 doAssert myToken == "Hello " doAssert parseUntil("Hello World", myToken, 'o') == 4 doAssert myToken == "Hell" doAssert parseUntil("Hello World", myToken, 'o', 2) == 2 doAssert myToken == "ll"
Source Edit proc parseUntil(s: string; token: var string; until: string; start = 0): int {...}{. inline, raises: [], tags: [].}
- Parses a token and stores it in
token
. Returns the number of the parsed characters or 0 in case of an error. A token consists of any character that comes before theuntil
token.Example:
var myToken: string doAssert parseUntil("Hello World", myToken, "Wor") == 6 doAssert myToken == "Hello " doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4 doAssert myToken == "llo "
Source Edit proc parseWhile(s: string; token: var string; validChars: set[char]; start = 0): int {...}{. inline, raises: [], tags: [].}
- Parses a token and stores it in
token
. Returns the number of the parsed characters or 0 in case of an error. A token consists of the characters invalidChars
.Example:
var myToken: string doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 0) == 0 doAssert myToken.len() == 0 doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 6) == 3 doAssert myToken == "Wor"
Source Edit proc captureBetween(s: string; first: char; second = '\x00'; start = 0): string {...}{. raises: [], tags: [].}
- Finds the first occurrence of
first
, then returns everything from there up tosecond
(ifsecond
is '0', thenfirst
is used).Example:
doAssert captureBetween("Hello World", 'e') == "llo World" doAssert captureBetween("Hello World", 'e', 'r') == "llo Wo" doAssert captureBetween("Hello World", 'l', start = 6) == "d"
Source Edit proc parseBiggestInt(s: string; number: var BiggestInt; start = 0): int {...}{. gcsafe, extern: "npuParseBiggestInt", noSideEffect, raises: [ValueError], tags: [].}
- Parses an integer starting at
start
and stores the value intonumber
. Result is the number of processed chars or 0 if there is no integer.ValueError
is raised if the parsed integer is out of the valid range.Example:
var res: BiggestInt doAssert parseBiggestInt("9223372036854775807", res, 0) == 19 doAssert res == 9223372036854775807
Source Edit proc parseInt(s: string; number: var int; start = 0): int {...}{.gcsafe, extern: "npuParseInt", noSideEffect, raises: [ValueError], tags: [].}
- Parses an integer starting at
start
and stores the value intonumber
. Result is the number of processed chars or 0 if there is no integer.ValueError
is raised if the parsed integer is out of the valid range.Example:
var res: int doAssert parseInt("2019", res, 0) == 4 doAssert res == 2019 doAssert parseInt("2019", res, 2) == 2 doAssert res == 19
Source Edit proc parseSaturatedNatural(s: string; b: var int; start = 0): int {...}{.raises: [], tags: [].}
- Parses a natural number into
b
. This cannot raise an overflow error.high(int)
is returned for an overflow. The number of processed character is returned. This is usually what you really want to use instead of parseInt.Example:
var res = 0 discard parseSaturatedNatural("848", res) doAssert res == 848
Source Edit proc parseBiggestUInt(s: string; number: var BiggestUInt; start = 0): int {...}{. gcsafe, extern: "npuParseBiggestUInt", noSideEffect, raises: [ValueError], tags: [].}
- Parses an unsigned integer starting at
start
and stores the value intonumber
.ValueError
is raised if the parsed integer is out of the valid range.Example:
var res: BiggestUInt doAssert parseBiggestUInt("12", res, 0) == 2 doAssert res == 12 doAssert parseBiggestUInt("1111111111111111111", res, 0) == 19 doAssert res == 1111111111111111111'u64
Source Edit proc parseUInt(s: string; number: var uint; start = 0): int {...}{.gcsafe, extern: "npuParseUInt", noSideEffect, raises: [ValueError], tags: [].}
- Parses an unsigned integer starting at
start
and stores the value intonumber
.ValueError
is raised if the parsed integer is out of the valid range.Example:
var res: uint doAssert parseUInt("3450", res) == 4 doAssert res == 3450 doAssert parseUInt("3450", res, 2) == 2 doAssert res == 50
Source Edit proc parseBiggestFloat(s: string; number: var BiggestFloat; start = 0): int {...}{. magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect.}
- Parses a float starting at
start
and stores the value intonumber
. Result is the number of processed chars or 0 if a parsing error occurred. Source Edit proc parseFloat(s: string; number: var float; start = 0): int {...}{.gcsafe, extern: "npuParseFloat", noSideEffect, raises: [], tags: [].}
- Parses a float starting at
start
and stores the value intonumber
. Result is the number of processed chars or 0 if there occurred a parsing error.Example:
var res: float doAssert parseFloat("32", res, 0) == 2 doAssert res == 32.0 doAssert parseFloat("32.57", res, 0) == 5 doAssert res == 32.57 doAssert parseFloat("32.57", res, 3) == 2 doAssert res == 57.00
Source Edit
Iterators
iterator interpolatedFragments(s: string): tuple[kind: InterpolatedKind, value: string] {...}{.raises: [ValueError], tags: [].}
- Tokenizes the string
s
into substrings for interpolation purposes.Example:
var outp: seq[tuple[kind: InterpolatedKind, value: string]] for k, v in interpolatedFragments(" $this is ${an example} $$"): outp.add (k, v) doAssert outp == @[(ikStr, " "), (ikVar, "this"), (ikStr, " is "), (ikExpr, "an example"), (ikStr, " "), (ikDollar, "$")]
Source Edit
© 2006–2021 Andreas Rumpf
Licensed under the MIT License.
https://nim-lang.org/docs/parseutils.html