You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

251 lines
6.8 KiB

2 months ago
  1. #include "scanscalar.h"
  2. #include <algorithm>
  3. #include "exp.h"
  4. #include "regeximpl.h"
  5. #include "stream.h"
  6. #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
  7. namespace YAML {
  8. // ScanScalar
  9. // . This is where the scalar magic happens.
  10. //
  11. // . We do the scanning in three phases:
  12. // 1. Scan until newline
  13. // 2. Eat newline
  14. // 3. Scan leading blanks.
  15. //
  16. // . Depending on the parameters given, we store or stop
  17. // and different places in the above flow.
  18. std::string ScanScalar(Stream& INPUT, ScanScalarParams& params) {
  19. bool foundNonEmptyLine = false;
  20. bool pastOpeningBreak = (params.fold == FOLD_FLOW);
  21. bool emptyLine = false, moreIndented = false;
  22. int foldedNewlineCount = 0;
  23. bool foldedNewlineStartedMoreIndented = false;
  24. std::size_t lastEscapedChar = std::string::npos;
  25. std::string scalar;
  26. params.leadingSpaces = false;
  27. if (!params.end) {
  28. params.end = &Exp::Empty();
  29. }
  30. while (INPUT) {
  31. // ********************************
  32. // Phase #1: scan until line ending
  33. std::size_t lastNonWhitespaceChar = scalar.size();
  34. bool escapedNewline = false;
  35. while (!params.end->Matches(INPUT) && !Exp::Break().Matches(INPUT)) {
  36. if (!INPUT) {
  37. break;
  38. }
  39. // document indicator?
  40. if (INPUT.column() == 0 && Exp::DocIndicator().Matches(INPUT)) {
  41. if (params.onDocIndicator == BREAK) {
  42. break;
  43. }
  44. if (params.onDocIndicator == THROW) {
  45. throw ParserException(INPUT.mark(), ErrorMsg::DOC_IN_SCALAR);
  46. }
  47. }
  48. foundNonEmptyLine = true;
  49. pastOpeningBreak = true;
  50. // escaped newline? (only if we're escaping on slash)
  51. if (params.escape == '\\' && Exp::EscBreak().Matches(INPUT)) {
  52. // eat escape character and get out (but preserve trailing whitespace!)
  53. INPUT.get();
  54. lastNonWhitespaceChar = scalar.size();
  55. lastEscapedChar = scalar.size();
  56. escapedNewline = true;
  57. break;
  58. }
  59. // escape this?
  60. if (INPUT.peek() == params.escape) {
  61. scalar += Exp::Escape(INPUT);
  62. lastNonWhitespaceChar = scalar.size();
  63. lastEscapedChar = scalar.size();
  64. continue;
  65. }
  66. // otherwise, just add the damn character
  67. char ch = INPUT.get();
  68. scalar += ch;
  69. if (ch != ' ' && ch != '\t') {
  70. lastNonWhitespaceChar = scalar.size();
  71. }
  72. }
  73. // eof? if we're looking to eat something, then we throw
  74. if (!INPUT) {
  75. if (params.eatEnd) {
  76. throw ParserException(INPUT.mark(), ErrorMsg::EOF_IN_SCALAR);
  77. }
  78. break;
  79. }
  80. // doc indicator?
  81. if (params.onDocIndicator == BREAK && INPUT.column() == 0 &&
  82. Exp::DocIndicator().Matches(INPUT)) {
  83. break;
  84. }
  85. // are we done via character match?
  86. int n = params.end->Match(INPUT);
  87. if (n >= 0) {
  88. if (params.eatEnd) {
  89. INPUT.eat(n);
  90. }
  91. break;
  92. }
  93. // do we remove trailing whitespace?
  94. if (params.fold == FOLD_FLOW)
  95. scalar.erase(lastNonWhitespaceChar);
  96. // ********************************
  97. // Phase #2: eat line ending
  98. n = Exp::Break().Match(INPUT);
  99. INPUT.eat(n);
  100. // ********************************
  101. // Phase #3: scan initial spaces
  102. // first the required indentation
  103. while (INPUT.peek() == ' ' &&
  104. (INPUT.column() < params.indent ||
  105. (params.detectIndent && !foundNonEmptyLine)) &&
  106. !params.end->Matches(INPUT)) {
  107. INPUT.eat(1);
  108. }
  109. // update indent if we're auto-detecting
  110. if (params.detectIndent && !foundNonEmptyLine) {
  111. params.indent = std::max(params.indent, INPUT.column());
  112. }
  113. // and then the rest of the whitespace
  114. while (Exp::Blank().Matches(INPUT)) {
  115. // we check for tabs that masquerade as indentation
  116. if (INPUT.peek() == '\t' && INPUT.column() < params.indent &&
  117. params.onTabInIndentation == THROW) {
  118. throw ParserException(INPUT.mark(), ErrorMsg::TAB_IN_INDENTATION);
  119. }
  120. if (!params.eatLeadingWhitespace) {
  121. break;
  122. }
  123. if (params.end->Matches(INPUT)) {
  124. break;
  125. }
  126. INPUT.eat(1);
  127. }
  128. // was this an empty line?
  129. bool nextEmptyLine = Exp::Break().Matches(INPUT);
  130. bool nextMoreIndented = Exp::Blank().Matches(INPUT);
  131. if (params.fold == FOLD_BLOCK && foldedNewlineCount == 0 && nextEmptyLine)
  132. foldedNewlineStartedMoreIndented = moreIndented;
  133. // for block scalars, we always start with a newline, so we should ignore it
  134. // (not fold or keep)
  135. if (pastOpeningBreak) {
  136. switch (params.fold) {
  137. case DONT_FOLD:
  138. scalar += "\n";
  139. break;
  140. case FOLD_BLOCK:
  141. if (!emptyLine && !nextEmptyLine && !moreIndented &&
  142. !nextMoreIndented && INPUT.column() >= params.indent) {
  143. scalar += " ";
  144. } else if (nextEmptyLine) {
  145. foldedNewlineCount++;
  146. } else {
  147. scalar += "\n";
  148. }
  149. if (!nextEmptyLine && foldedNewlineCount > 0) {
  150. scalar += std::string(foldedNewlineCount - 1, '\n');
  151. if (foldedNewlineStartedMoreIndented ||
  152. nextMoreIndented | !foundNonEmptyLine) {
  153. scalar += "\n";
  154. }
  155. foldedNewlineCount = 0;
  156. }
  157. break;
  158. case FOLD_FLOW:
  159. if (nextEmptyLine) {
  160. scalar += "\n";
  161. } else if (!emptyLine && !escapedNewline) {
  162. scalar += " ";
  163. }
  164. break;
  165. }
  166. }
  167. emptyLine = nextEmptyLine;
  168. moreIndented = nextMoreIndented;
  169. pastOpeningBreak = true;
  170. // are we done via indentation?
  171. if (!emptyLine && INPUT.column() < params.indent) {
  172. params.leadingSpaces = true;
  173. break;
  174. }
  175. }
  176. // post-processing
  177. if (params.trimTrailingSpaces) {
  178. std::size_t pos = scalar.find_last_not_of(" \t");
  179. if (lastEscapedChar != std::string::npos) {
  180. if (pos < lastEscapedChar || pos == std::string::npos) {
  181. pos = lastEscapedChar;
  182. }
  183. }
  184. if (pos < scalar.size()) {
  185. scalar.erase(pos + 1);
  186. }
  187. }
  188. switch (params.chomp) {
  189. case CLIP: {
  190. std::size_t pos = scalar.find_last_not_of('\n');
  191. if (lastEscapedChar != std::string::npos) {
  192. if (pos < lastEscapedChar || pos == std::string::npos) {
  193. pos = lastEscapedChar;
  194. }
  195. }
  196. if (pos == std::string::npos) {
  197. scalar.erase();
  198. } else if (pos + 1 < scalar.size()) {
  199. scalar.erase(pos + 2);
  200. }
  201. } break;
  202. case STRIP: {
  203. std::size_t pos = scalar.find_last_not_of('\n');
  204. if (lastEscapedChar != std::string::npos) {
  205. if (pos < lastEscapedChar || pos == std::string::npos) {
  206. pos = lastEscapedChar;
  207. }
  208. }
  209. if (pos == std::string::npos) {
  210. scalar.erase();
  211. } else if (pos < scalar.size()) {
  212. scalar.erase(pos + 1);
  213. }
  214. } break;
  215. default:
  216. break;
  217. }
  218. return scalar;
  219. }
  220. } // namespace YAML