Orcus
css_parser.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9 #define INCLUDED_ORCUS_CSS_PARSER_HPP
10 
11 #define ORCUS_DEBUG_CSS 0
12 
13 #include "orcus/parser_global.hpp"
14 #include "orcus/css_parser_base.hpp"
15 #include "orcus/global.hpp"
16 
17 #include <cassert>
18 
19 #if ORCUS_DEBUG_CSS
20 #include <iostream>
21 using std::cout;
22 using std::endl;
23 #endif
24 
25 namespace orcus {
26 
27 template<typename _Handler>
29 {
30 public:
31  typedef _Handler handler_type;
32 
33  css_parser(const char* p, size_t n, handler_type& hdl);
34  void parse();
35 
36 private:
37  // Handlers - at the time a handler is called the current position is
38  // expected to point to the first unprocessed non-blank character, and
39  // each handler must set the current position to the next unprocessed
40  // non-blank character when it finishes.
41  void rule();
42  void at_rule_name();
43  void simple_selector_name();
44  void property_name();
45  void property();
46  void quoted_value(char c);
47  void value();
48  void function_value(const char* p, size_t len);
49  void function_rgb(bool alpha);
50  void function_hsl(bool alpha);
51  void function_url();
52  void name_sep();
53  void property_sep();
54  void block();
55 
56  handler_type& m_handler;
57 };
58 
59 template<typename _Handler>
60 css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) :
61  css::parser_base(p, n), m_handler(hdl) {}
62 
63 template<typename _Handler>
64 void css_parser<_Handler>::parse()
65 {
66  shrink_stream();
67 
68 #if ORCUS_DEBUG_CSS
69  std::cout << "compressed: '";
70  const char* p = mp_char;
71  for (; p != mp_end; ++p)
72  std::cout << *p;
73  std::cout << "'" << std::endl;
74 #endif
75  m_handler.begin_parse();
76  while (has_char())
77  rule();
78  m_handler.end_parse();
79 }
80 
81 template<typename _Handler>
82 void css_parser<_Handler>::rule()
83 {
84  // <selector name> , ... , <selector name> <block>
85  while (has_char())
86  {
87  if (skip_comment())
88  continue;
89 
90  char c = cur_char();
91  if (is_alpha(c))
92  {
93  simple_selector_name();
94  continue;
95  }
96 
97  switch (c)
98  {
99  case '>':
100  set_combinator(c, css::combinator_t::direct_child);
101  break;
102  case '+':
103  set_combinator(c, css::combinator_t::next_sibling);
104  break;
105  case '.':
106  case '#':
107  case '@':
108  simple_selector_name();
109  break;
110  case ',':
111  name_sep();
112  break;
113  case '{':
114  reset_before_block();
115  block();
116  break;
117  default:
118  css::parse_error::throw_with("rule: failed to parse '", c, "'");
119  }
120  }
121 }
122 
123 template<typename _Handler>
124 void css_parser<_Handler>::at_rule_name()
125 {
126  assert(has_char());
127  assert(cur_char() == '@');
128  next();
129  char c = cur_char();
130  if (!is_alpha(c))
131  throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet.");
132 
133  const char* p;
134  size_t len;
135  identifier(p, len);
136  skip_blanks();
137 
138  m_handler.at_rule_name(p, len);
139 #if ORCUS_DEBUG_CSS
140  std::string foo(p, len);
141  std::cout << "at-rule name: " << foo.c_str() << std::endl;
142 #endif
143 }
144 
145 template<typename _Handler>
146 void css_parser<_Handler>::simple_selector_name()
147 {
148  assert(has_char());
149  char c = cur_char();
150  if (c == '@')
151  {
152  // This is the name of an at-rule.
153  at_rule_name();
154  return;
155  }
156 
157  if (m_simple_selector_count)
158  {
159 #if ORCUS_DEBUG_CSS
160  cout << "combinator: " << m_combinator << endl;
161 #endif
162  m_handler.combinator(m_combinator);
163  m_combinator = css::combinator_t::descendant;
164  }
165  assert(is_alpha(c) || c == '.' || c == '#');
166 
167  const char* p = nullptr;
168  size_t n = 0;
169 
170 #if ORCUS_DEBUG_CSS
171  cout << "simple_selector_name: (" << m_simple_selector_count << ")";
172 #endif
173 
174  if (c != '.' && c != '#')
175  {
176  identifier(p, n);
177 #if ORCUS_DEBUG_CSS
178  std::string s(p, n);
179  cout << " type=" << s;
180 #endif
181  m_handler.simple_selector_type(p, n);
182  }
183 
184  bool in_loop = true;
185  while (in_loop && has_char())
186  {
187  switch (cur_char())
188  {
189  case '.':
190  {
191  next();
192  identifier(p, n);
193  m_handler.simple_selector_class(p, n);
194 #if ORCUS_DEBUG_CSS
195  std::string s(p, n);
196  std::cout << " class=" << s;
197 #endif
198  }
199  break;
200  case '#':
201  {
202  next();
203  identifier(p, n);
204  m_handler.simple_selector_id(p, n);
205 #if ORCUS_DEBUG_CSS
206  std::string s(p, n);
207  std::cout << " id=" << s;
208 #endif
209  }
210  break;
211  case ':':
212  {
213  // This could be either a pseudo element or pseudo class.
214  next();
215  if (cur_char() == ':')
216  {
217  // pseudo element.
218  next();
219  identifier(p, n);
220  css::pseudo_element_t elem = css::to_pseudo_element(p, n);
221  if (!elem)
222  css::parse_error::throw_with(
223  "selector_name: unknown pseudo element '", p, n, "'");
224 
225  m_handler.simple_selector_pseudo_element(elem);
226  }
227  else
228  {
229  // pseudo class (or pseudo element in the older version of CSS).
230  identifier(p, n);
231  css::pseudo_class_t pc = css::to_pseudo_class(p, n);
232  if (!pc)
233  css::parse_error::throw_with(
234  "selector_name: unknown pseudo class '", p, n, "'");
235 
236  m_handler.simple_selector_pseudo_class(pc);
237  }
238  }
239  break;
240  default:
241  in_loop = false;
242  }
243  }
244 
245  m_handler.end_simple_selector();
246  skip_comments_and_blanks();
247 
248  ++m_simple_selector_count;
249 
250 #if ORCUS_DEBUG_CSS
251  std::cout << std::endl;
252 #endif
253 }
254 
255 template<typename _Handler>
256 void css_parser<_Handler>::property_name()
257 {
258  // <identifier>
259 
260  assert(has_char());
261  char c = cur_char();
262  if (!is_alpha(c) && c != '.')
263  css::parse_error::throw_with(
264  "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'");
265 
266  const char* p;
267  size_t len;
268  identifier(p, len);
269  skip_comments_and_blanks();
270 
271  m_handler.property_name(p, len);
272 #if ORCUS_DEBUG_CSS
273  std::string foo(p, len);
274  std::cout << "property name: " << foo.c_str() << std::endl;
275 #endif
276 }
277 
278 template<typename _Handler>
279 void css_parser<_Handler>::property()
280 {
281  // <property name> : <value> , ... , <value>
282 
283  m_handler.begin_property();
284  property_name();
285  if (cur_char() != ':')
286  throw css::parse_error("property: ':' expected.");
287  next();
288  skip_comments_and_blanks();
289 
290  bool in_loop = true;
291  while (in_loop && has_char())
292  {
293  value();
294  char c = cur_char();
295  switch (c)
296  {
297  case ',':
298  {
299  // separated by commas.
300  next();
301  skip_comments_and_blanks();
302  }
303  break;
304  case ';':
305  case '}':
306  in_loop = false;
307  break;
308  default:
309  ;
310  }
311  }
312 
313  skip_comments_and_blanks();
314  m_handler.end_property();
315 }
316 
317 template<typename _Handler>
318 void css_parser<_Handler>::quoted_value(char c)
319 {
320  // Parse until the the end quote is reached.
321  const char* p = nullptr;
322  size_t len = 0;
323  literal(p, len, c);
324  next();
325  skip_blanks();
326 
327  m_handler.value(p, len);
328 #if ORCUS_DEBUG_CSS
329  std::string foo(p, len);
330  std::cout << "quoted value: " << foo.c_str() << std::endl;
331 #endif
332 }
333 
334 template<typename _Handler>
335 void css_parser<_Handler>::value()
336 {
337  assert(has_char());
338  char c = cur_char();
339  if (c == '"' || c == '\'')
340  {
341  quoted_value(c);
342  return;
343  }
344 
345  if (!is_alpha(c) && !is_numeric(c) && !is_in(c, ORCUS_ASCII("-+.#")))
346  css::parse_error::throw_with("value:: illegal first character of a value '", c, "'");
347 
348  const char* p = nullptr;
349  size_t len = 0;
350  identifier(p, len, ORCUS_ASCII(".%"));
351  if (cur_char() == '(')
352  {
353  function_value(p, len);
354  return;
355  }
356 
357  m_handler.value(p, len);
358 
359  skip_comments_and_blanks();
360 
361 #if ORCUS_DEBUG_CSS
362  std::string foo(p, len);
363  std::cout << "value: " << foo.c_str() << std::endl;
364 #endif
365 }
366 
367 template<typename _Handler>
368 void css_parser<_Handler>::function_value(const char* p, size_t len)
369 {
370  assert(cur_char() == '(');
371  css::property_function_t func = css::to_property_function(p, len);
372  if (func == css::property_function_t::unknown)
373  css::parse_error::throw_with("function_value: unknown function '", p, len, "'");
374 
375  // Move to the first character of the first argument.
376  next();
377  skip_comments_and_blanks();
378 
379  switch (func)
380  {
381  case css::property_function_t::rgb:
382  function_rgb(false);
383  break;
384  case css::property_function_t::rgba:
385  function_rgb(true);
386  break;
387  case css::property_function_t::hsl:
388  function_hsl(false);
389  break;
390  case css::property_function_t::hsla:
391  function_hsl(true);
392  break;
393  case css::property_function_t::url:
394  function_url();
395  break;
396  default:
397  css::parse_error::throw_with("function_value: unhandled function '", p, len, "'");
398  }
399 
400  char c = cur_char();
401  if (c != ')')
402  css::parse_error::throw_with("function_value: ')' expected but '", c, "' found.");
403 
404  next();
405  skip_comments_and_blanks();
406 }
407 
408 template<typename _Handler>
409 void css_parser<_Handler>::function_rgb(bool alpha)
410 {
411  // rgb(num, num, num) rgba(num, num, num, float)
412 
413  uint8_t vals[3];
414  uint8_t* p = vals;
415  const uint8_t* plast = p + 2;
416  char c = 0;
417 
418  for (; ; ++p)
419  {
420  *p = parse_uint8();
421 
422  skip_comments_and_blanks();
423 
424  if (p == plast)
425  break;
426 
427  c = cur_char();
428 
429  if (c != ',')
430  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
431 
432  next();
433  skip_comments_and_blanks();
434  }
435 
436  if (alpha)
437  {
438  c = cur_char();
439  if (c != ',')
440  css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
441 
442  next();
443  skip_comments_and_blanks();
444 
445  double alpha_val = parse_double_or_throw();
446 
447  alpha_val = clip(alpha_val, 0.0, 1.0);
448  m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
449  }
450  else
451  m_handler.rgb(vals[0], vals[1], vals[2]);
452 
453 #if ORCUS_DEBUG_CSS
454  std::cout << "rgb";
455  if (alpha)
456  std::cout << 'a';
457  std::cout << '(';
458  p = vals;
459  const uint8_t* pend = plast + 1;
460  for (; p != pend; ++p)
461  std::cout << ' ' << (int)*p;
462  std::cout << " )" << std::endl;
463 #endif
464 }
465 
466 template<typename _Handler>
467 void css_parser<_Handler>::function_hsl(bool alpha)
468 {
469  // hsl(num, percent, percent) hsla(num, percent, percent, float)
470 
471  double hue = parse_double_or_throw(); // casted to uint8_t eventually.
472  hue = clip(hue, 0.0, 360.0);
473  skip_comments_and_blanks();
474 
475  char c = cur_char();
476  if (c != ',')
477  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
478 
479  next();
480  skip_comments_and_blanks();
481 
482  double sat = parse_percent();
483  sat = clip(sat, 0.0, 100.0);
484  skip_comments_and_blanks();
485 
486  c = cur_char();
487  if (c != ',')
488  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
489 
490  next();
491  skip_comments_and_blanks();
492 
493  double light = parse_percent();
494  light = clip(light, 0.0, 100.0);
495  skip_comments_and_blanks();
496 
497  if (!alpha)
498  {
499  m_handler.hsl(hue, sat, light);
500  return;
501  }
502 
503  c = cur_char();
504  if (c != ',')
505  css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
506 
507  next();
508  skip_comments_and_blanks();
509 
510  double alpha_val = parse_double_or_throw();
511  alpha_val = clip(alpha_val, 0.0, 1.0);
512  skip_comments_and_blanks();
513  m_handler.hsla(hue, sat, light, alpha_val);
514 }
515 
516 template<typename _Handler>
517 void css_parser<_Handler>::function_url()
518 {
519  char c = cur_char();
520 
521  if (c == '"' || c == '\'')
522  {
523  // Quoted URL value.
524  const char* p;
525  size_t len;
526  literal(p, len, c);
527  next();
528  skip_comments_and_blanks();
529  m_handler.url(p, len);
530 #if ORCUS_DEBUG_CSS
531  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
532 #endif
533  return;
534  }
535 
536  // Unquoted URL value.
537  const char* p;
538  size_t len;
539  skip_to_or_blank(p, len, ORCUS_ASCII(")"));
540  skip_comments_and_blanks();
541  m_handler.url(p, len);
542 #if ORCUS_DEBUG_CSS
543  std::cout << "url(" << std::string(p, len) << ")" << std::endl;
544 #endif
545 }
546 
547 template<typename _Handler>
548 void css_parser<_Handler>::name_sep()
549 {
550  assert(cur_char() == ',');
551 #if ORCUS_DEBUG_CSS
552  std::cout << "," << std::endl;
553 #endif
554  next();
555  skip_blanks();
556  m_handler.end_selector();
557 }
558 
559 template<typename _Handler>
560 void css_parser<_Handler>::property_sep()
561 {
562 #if ORCUS_DEBUG_CSS
563  std::cout << ";" << std::endl;
564 #endif
565  next();
566  skip_comments_and_blanks();
567 }
568 
569 template<typename _Handler>
570 void css_parser<_Handler>::block()
571 {
572  // '{' <property> ';' ... ';' <property> ';'(optional) '}'
573 
574  assert(cur_char() == '{');
575 #if ORCUS_DEBUG_CSS
576  std::cout << "{" << std::endl;
577 #endif
578  m_handler.end_selector();
579  m_handler.begin_block();
580 
581  next();
582  skip_comments_and_blanks();
583 
584  // parse properties.
585  while (has_char())
586  {
587  property();
588  if (cur_char() != ';')
589  break;
590  property_sep();
591  if (cur_char() == '}')
592  // ';' after the last property. This is optional but allowed.
593  break;
594  }
595 
596  if (cur_char() != '}')
597  throw css::parse_error("block: '}' expected.");
598 
599  m_handler.end_block();
600 
601  next();
602  skip_comments_and_blanks();
603 
604 #if ORCUS_DEBUG_CSS
605  std::cout << "}" << std::endl;
606 #endif
607 }
608 
609 }
610 
611 #endif
612 
613 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:30
Definition: css_parser.hpp:28
Definition: parser_base.hpp:39
Definition: base64.hpp:15