yaml_parser.lua 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. --
  2. -- Copyright 2019 The FATE Authors. All Rights Reserved.
  3. --
  4. -- Licensed under the Apache License, Version 2.0 (the "License");
  5. -- you may not use this file except in compliance with the License.
  6. -- You may obtain a copy of the License at
  7. --
  8. -- http://www.apache.org/licenses/LICENSE-2.0
  9. --
  10. -- Unless required by applicable law or agreed to in writing, software
  11. -- distributed under the License is distributed on an "AS IS" BASIS,
  12. -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. -- See the License for the specific language governing permissions and
  14. -- limitations under the License.
  15. --
  16. local schar = string.char
  17. local ssub, gsub = string.sub, string.gsub
  18. local sfind, smatch = string.find, string.match
  19. local tinsert, tremove = table.insert, table.remove
  20. local UNESCAPES = {
  21. ['0'] = "\x00", z = "\x00", N = "\x85",
  22. a = "\x07", b = "\x08", t = "\x09",
  23. n = "\x0a", v = "\x0b", f = "\x0c",
  24. r = "\x0d", e = "\x1b", ['\\'] = '\\',
  25. }
  26. -- help function
  27. local function select(list, pred)
  28. local selected = {}
  29. for i = 0, #list do
  30. local v = list[i]
  31. if v and pred(v, i) then
  32. tinsert(selected, v)
  33. end
  34. end
  35. return selected
  36. end
  37. -- return: indent_count, left_string
  38. local function count_indent(line)
  39. local _, j = sfind(line, '^%s+')
  40. if not j then
  41. return 0, line
  42. end
  43. return j, ssub(line, j+1)
  44. end
  45. local function trim(str)
  46. return string.gsub(str, "^%s*(.-)%s*$", "%1")
  47. end
  48. local function ltrim(str)
  49. return smatch(str, "^%s*(.-)$")
  50. end
  51. local function rtrim(str)
  52. return smatch(str, "^(.-)%s*$")
  53. end
  54. local function isemptyline(line)
  55. return line == '' or sfind(line, '^%s*$') or sfind(line, '^%s*#')
  56. end
  57. local function startswith(haystack, needle)
  58. return ssub(haystack, 1, #needle) == needle
  59. end
  60. local function startswithline(line, needle)
  61. return startswith(line, needle) and isemptyline(ssub(line, #needle+1))
  62. end
  63. -- class
  64. local class = {__meta={}}
  65. function class.__meta.__call(cls, ...)
  66. local self = setmetatable({}, cls)
  67. if cls.__init then
  68. cls.__init(self, ...)
  69. end
  70. return self
  71. end
  72. function class.def(base, type, cls)
  73. base = base or class
  74. local mt = {__metatable=base, __index=base}
  75. for k, v in pairs(base.__meta) do mt[k] = v end
  76. cls = setmetatable(cls or {}, mt)
  77. cls.__index = cls
  78. cls.__metatable = cls
  79. cls.__type = type
  80. cls.__meta = mt
  81. return cls
  82. end
  83. local types = {
  84. null = class:def('null'),
  85. map = class:def('map'),
  86. seq = class:def('seq'),
  87. }
  88. local Null = types.null
  89. function Null.__tostring() return 'yaml.null' end
  90. function Null.isnull(v)
  91. if v == nil then return true end
  92. if type(v) == 'table' and getmetatable(v) == Null then return true end
  93. return false
  94. end
  95. local null = Null()
  96. -- implement function
  97. local function parse_string(line, stopper)
  98. stopper = stopper or ''
  99. local q = ssub(line, 1, 1)
  100. if q == ' ' or q == '\t' then
  101. return parse_string(ssub(line, 2))
  102. end
  103. if q == "'" then
  104. local i = sfind(line, "'", 2, true)
  105. if not i then
  106. return nil, line
  107. end
  108. return ssub(line, 2, i-1), ssub(line, i+1)
  109. end
  110. if q == '"' then
  111. local i, buf = 2, ''
  112. while i < #line do
  113. local c = ssub(line, i, i)
  114. if c == '\\' then
  115. local n = ssub(line, i+1, i+1)
  116. if UNESCAPES[n] ~= nil then
  117. buf = buf..UNESCAPES[n]
  118. elseif n == 'x' then
  119. local h = ssub(i+2,i+3)
  120. if sfind(h, '^[0-9a-fA-F]$') then
  121. buf = buf..schar(tonumber(h, 16))
  122. i = i + 2
  123. else
  124. buf = buf..'x'
  125. end
  126. else
  127. buf = buf..n
  128. end
  129. i = i + 1
  130. elseif c == q then
  131. break
  132. else
  133. buf = buf..c
  134. end
  135. i = i + 1
  136. end
  137. return buf, ssub(line, i+1)
  138. end
  139. if q == '-' or q == ':' then
  140. if ssub(line, 2, 2) == ' ' or #line == 1 then
  141. return nil, line
  142. end
  143. end
  144. local buf = ''
  145. while #line > 0 do
  146. local c = ssub(line, 1, 1)
  147. if sfind(stopper, c, 1, true) then
  148. break
  149. elseif c == ':' and (ssub(line, 2, 2) == ' ' or #line == 1) then
  150. break
  151. elseif c == '#' and (ssub(buf, #buf, #buf) == ' ') then
  152. break
  153. else
  154. buf = buf..c
  155. end
  156. line = ssub(line, 2)
  157. end
  158. return rtrim(buf), line
  159. end
  160. local function parse_flowstyle(line, lines)
  161. local stack = {}
  162. while true do
  163. if #line == 0 then
  164. if #lines == 0 then
  165. break
  166. else
  167. line = tremove(lines, 1)
  168. end
  169. end
  170. local c = ssub(line, 1, 1)
  171. if c == '#' then
  172. line = ''
  173. elseif c == ' ' or c == '\t' or c == '\r' or c == '\n' then
  174. line = ssub(line, 2)
  175. elseif c == '{' or c == '[' then
  176. tinsert(stack, {v={},t=c})
  177. line = ssub(line, 2)
  178. elseif c == ':' then
  179. local s = tremove(stack)
  180. tinsert(stack, {v=s.v, t=':'})
  181. line = ssub(line, 2)
  182. elseif c == ',' then
  183. local value = tremove(stack)
  184. if value.t == ':' or value.t == '{' or value.t == '[' then error() end
  185. if stack[#stack].t == ':' then
  186. -- map
  187. local key = tremove(stack)
  188. stack[#stack].v[key.v] = value.v
  189. elseif stack[#stack].t == '{' then
  190. -- set
  191. stack[#stack].v[value.v] = true
  192. elseif stack[#stack].t == '[' then
  193. -- seq
  194. tinsert(stack[#stack].v, value.v)
  195. end
  196. line = ssub(line, 2)
  197. elseif c == '}' then
  198. if stack[#stack].t == '{' then
  199. if #stack == 1 then break end
  200. stack[#stack].t = '}'
  201. line = ssub(line, 2)
  202. else
  203. line = ','..line
  204. end
  205. elseif c == ']' then
  206. if stack[#stack].t == '[' then
  207. if #stack == 1 then break end
  208. stack[#stack].t = ']'
  209. line = ssub(line, 2)
  210. else
  211. line = ','..line
  212. end
  213. else
  214. local s, rest = parse_string(line, ',{}[]')
  215. if not s then
  216. error('invalid flowstyle line: '..line)
  217. end
  218. tinsert(stack, {v=s, t='s'})
  219. line = rest
  220. end
  221. end
  222. return stack[1].v, line
  223. end
  224. local function parse_scalar(line, lines)
  225. line = ltrim(line)
  226. line = gsub(line, '%s*#.*$', '')
  227. if line == '' or line == '~' then
  228. return null
  229. end
  230. if startswith(line, '{') or startswith(line, '[') then
  231. return parse_flowstyle(line, lines)
  232. end
  233. local s, _ = parse_string(line)
  234. if s and s ~= line then
  235. return s
  236. end
  237. -- Special cases
  238. if sfind('\'"!$', ssub(line, 1, 1), 1, true) then
  239. error('unsupported line: '..line)
  240. end
  241. if startswithline(line, '{}') then
  242. return {}
  243. end
  244. if startswithline(line, '[]') then
  245. return {}
  246. end
  247. -- Regular unquoted string
  248. local v = line
  249. if v == 'null' or v == 'Null' or v == 'NULL'then
  250. return null
  251. elseif v == 'true' or v == 'True' or v == 'TRUE' then
  252. return true
  253. elseif v == 'false' or v == 'False' or v == 'FALSE' then
  254. return false
  255. elseif v == '.inf' or v == '.Inf' or v == '.INF' then
  256. return math.huge
  257. elseif v == '+.inf' or v == '+.Inf' or v == '+.INF' then
  258. return math.huge
  259. elseif v == '-.inf' or v == '-.Inf' or v == '-.INF' then
  260. return -math.huge
  261. elseif v == '.nan' or v == '.NaN' or v == '.NAN' then
  262. return 0 / 0
  263. elseif sfind(v, '^[%+%-]?[0-9]+$') or sfind(v, '^[%+%-]?[0-9]+%.$')then
  264. return tonumber(v)
  265. elseif sfind(v, '^[%+%-]?[0-9]+%.[0-9]+$') then
  266. return tonumber(v)
  267. end
  268. return v
  269. end
  270. local parse_map
  271. local function parse_seq(line, lines, indent)
  272. local seq = setmetatable({}, types.seq)
  273. if line ~= '' then
  274. error()
  275. end
  276. while #lines > 0 do
  277. line = lines[1]
  278. local level = count_indent(line)
  279. if level < indent and indent ~= -1 then
  280. return seq
  281. elseif level > indent and indent ~= -1 then
  282. error("found bad indenting in line: ".. line)
  283. end
  284. local i, j = sfind(line, '%-%s+')
  285. if not i then
  286. i, j = sfind(line, '%-$')
  287. if not i then
  288. return seq
  289. end
  290. end
  291. local rest = ssub(line, j+1)
  292. if sfind(rest, '^[^\'\"%s]*:') then
  293. local indent2 = j
  294. lines[1] = string.rep(' ', indent2)..rest
  295. tinsert(seq, parse_map('', lines, indent2))
  296. elseif isemptyline(rest) then
  297. tremove(lines, 1)
  298. if #lines == 0 then
  299. tinsert(seq, null)
  300. return seq
  301. end
  302. if sfind(lines[1], '^%s*%-') then
  303. local nextline = lines[1]
  304. local indent2 = count_indent(nextline)
  305. if indent2 == indent then
  306. tinsert(seq, null)
  307. else
  308. tinsert(seq, parse_seq('', lines, indent2))
  309. end
  310. else
  311. local nextline = lines[1]
  312. local indent2 = count_indent(nextline)
  313. tinsert(seq, parse_map('', lines, indent2))
  314. end
  315. elseif rest then
  316. tremove(lines, 1)
  317. local tmp = parse_scalar(rest, lines)
  318. tinsert(seq, tmp)
  319. end
  320. end
  321. return seq
  322. end
  323. function parse_map(line, lines, indent)
  324. if not isemptyline(line) then
  325. error('not map line: '..line)
  326. end
  327. local map = setmetatable({}, types.map)
  328. while #lines > 0 do
  329. line = lines[1]
  330. local level, _ = count_indent(line)
  331. if level < indent then
  332. return map
  333. elseif level > indent then
  334. error("found bad indenting in line: ".. line)
  335. end
  336. local key
  337. local s, rest = parse_string(line)
  338. if s and startswith(rest, ':') then
  339. local sc = parse_scalar(s, {})
  340. if sc and type(sc) ~= 'string' then
  341. key = sc
  342. else
  343. key = s
  344. end
  345. line = ssub(rest, 2)
  346. else
  347. error("failed to classify line: "..line)
  348. end
  349. if map[key] ~= nil then
  350. print("found a duplicate key '"..key.."' in line: "..line)
  351. local suffix = 1
  352. while map[key..'__'..suffix] do
  353. suffix = suffix + 1
  354. end
  355. key = key ..'_'..suffix
  356. end
  357. line = ltrim(line)
  358. if not isemptyline(line) then
  359. tremove(lines, 1)
  360. line = ltrim(line)
  361. map[key] = parse_scalar(line, lines)
  362. else
  363. tremove(lines, 1)
  364. if #lines == 0 then
  365. map[key] = null
  366. return map;
  367. end
  368. if sfind(lines[1], '^%s*%-') then
  369. local indent2 = count_indent(lines[1])
  370. map[key] = parse_seq('', lines, indent2)
  371. else
  372. local indent2 = count_indent(lines[1])
  373. if indent >= indent2 then
  374. map[key] = null
  375. else
  376. map[key] = parse_map('', lines, indent2)
  377. end
  378. end
  379. end
  380. end
  381. return map
  382. end
  383. local function parse_documents(lines)
  384. lines = select(lines, function(s) return not isemptyline(s) end)
  385. if #lines == 1 and not sfind(lines[1], '^%s*%-') then
  386. local line = lines[1]
  387. line = ltrim(line)
  388. return parse_scalar(line, lines)
  389. end
  390. local root = {}
  391. while #lines > 0 do
  392. local line = lines[1]
  393. if sfind(line, '^%s*%-') then
  394. tinsert(root, parse_seq('', lines, -1))
  395. elseif sfind(line, '^%s*[^%s]') then
  396. local level = count_indent(line)
  397. tinsert(root, parse_map('', lines, level))
  398. else
  399. error('parse error: '..line)
  400. end
  401. end
  402. if #root > 1 and Null.isnull(root[1]) then
  403. tremove(root, 1)
  404. return root
  405. end
  406. return root
  407. end
  408. local function parse(yaml)
  409. local lines = {}
  410. for line in string.gmatch(yaml..'\n', '(.-)\n') do
  411. table.insert(lines, line)
  412. end
  413. local docs = parse_documents(lines)
  414. if #docs == 1 then
  415. return docs[1]
  416. end
  417. return docs
  418. end
  419. return {
  420. null = null,
  421. parse = parse,
  422. }