Current Version: 1.0.10
Project Name: csspp
parser.cpp
Go to the documentation of this file.
1 // CSS Preprocessor
2 // Copyright (C) 2015-2016 Made to Order Software Corp.
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
47 #include "csspp/parser.h"
48 
49 #include "csspp/exceptions.h"
50 
51 #include <iostream>
52 
53 namespace csspp
54 {
55 
56 namespace
57 {
58 
61 
62 } // no name namespace
63 
65  : f_lexer(l)
66 {
67  next_token();
68 }
69 
71 {
72  return stylesheet(f_last_token);
73 }
74 
76 {
77  return rule_list(f_last_token);
78 }
79 
81 {
82  return rule(f_last_token);
83 }
84 
86 {
88 }
89 
91 {
93 }
94 
96 {
98 }
99 
101 {
102  f_last_token = f_lexer->next_token();
103 //std::cerr << "*** TOKEN: " << *f_last_token;
104  return f_last_token;
105 }
106 
108 {
109  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
110 
111  for(; !n->is(node_type_t::EOF_TOKEN); n = f_last_token)
112  {
113  // completely ignore the CDO and CDC, if the "assembler"
114  // wants to output them, it will do so, but otherwise it
115  // is just completely ignored
116  //
117  // also white spaces at this level are pretty much useless
118  //
119  if(n->is(node_type_t::CDO)
120  || n->is(node_type_t::CDC)
121  || n->is(node_type_t::WHITESPACE))
122  {
123  next_token();
124  continue;
125  }
126 
130  {
131  error::instance() << n->get_position()
132  << "Unexpected closing block of type: " << n->get_type() << "."
134  break;
135  }
136 
137  if(n->is(node_type_t::COMMENT))
138  {
139  result->add_child(n);
140  next_token();
141  }
142  else if(n->is(node_type_t::AT_KEYWORD))
143  {
144  result->add_child(at_rule(n));
145  }
146  else
147  {
148  // anything else is a qualified rule
149  result->add_child(qualified_rule(n));
150  }
151  }
152 
153  // we always return the LIST because it starts with @import (or rather
154  // is just one @import) or $var then it needs to be replaced and we
155  // could not do that if those were root nodes
156  return result;
157 }
158 
160 {
161  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
162 
163  for(node::pointer_t q; (!q || !q->is(node_type_t::EOF_TOKEN)) && !n->is(node_type_t::EOF_TOKEN); n = f_last_token)
164  {
165  q = rule(n);
166  result->add_child(q);
167  }
168 
169  return result;
170 }
171 
173 {
174  if(n->is(node_type_t::CDO)
175  || n->is(node_type_t::CDC))
176  {
177  error::instance() << n->get_position()
178  << "HTML comment delimiters (<!-- and -->) are not allowed in this CSS document."
180  return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
181  }
182 
186  {
187  error::instance() << n->get_position()
188  << "Unexpected closing block of type: " << n->get_type() << "."
190  return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
191  }
192 
193  if(n->is(node_type_t::WHITESPACE))
194  {
195  // skip potential whitespaces
196  n = next_token();
197  }
198 
199  if(n->is(node_type_t::AT_KEYWORD))
200  {
201  return at_rule(n);
202  }
203 
204  // anything else is a qualified rule
205  return qualified_rule(n);
206 }
207 
209 {
210  // the '@' was already eaten, it will be our result
212 
213  if(n->empty())
214  {
215  error::instance() << at_keyword->get_position()
216  << "At '@' command cannot be empty (missing expression or block) unless ended by a semicolon (;)."
218  }
219  else
220  {
221  node::pointer_t last_child(n->get_last_child());
223  {
224  next_token();
225  }
226  else if(!last_child->is(node_type_t::OPEN_CURLYBRACKET))
227  {
228  error::instance() << at_keyword->get_position()
229  << "At '@' command must end with a block or a ';'."
231  }
232  at_keyword->take_over_children_of(n);
233  }
234 
235  return at_keyword;
236 }
237 
239 {
240  if(n->is(node_type_t::EOF_TOKEN))
241  {
242  return n;
243  }
244  if(n->is(node_type_t::SEMICOLON))
245  {
246  // skip the ';' (i.e. ';' in 'foo { blah: 123 };')
247  next_token();
248 
249  // it is an error, we just make it clear what error it is because
250  // by default it would otherwise come out as "invalid qualified rule"
251  // which is rather hard to understand here...
252  error::instance() << n->get_position()
253  << "A qualified rule cannot end a { ... } block with a ';'."
255  return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
256  }
257 
258  // a qualified rule is a component value list that
259  // ends with a block
261 
262  if(result->empty())
263  {
264  // I have not been able to reach these lines, somehow...
265  error::instance() << n->get_position()
266  << "A qualified rule cannot be empty; you are missing a { ... } block."
268  }
269  else
270  {
271  node::pointer_t last_child(result->get_last_child());
272  if(!is_variable_set(result, false)
273  && !last_child->is(node_type_t::OPEN_CURLYBRACKET))
274  {
275  error::instance() << n->get_position()
276  << "A qualified rule must end with a { ... } block."
278  }
279  }
280 
281  return result;
282 }
283 
285 {
286  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
287 
288  for(;;)
289  {
290  if(n->is(node_type_t::WHITESPACE))
291  {
292  n = next_token();
293  }
294 
295  if(n->is(node_type_t::IDENTIFIER))
296  {
297  result->add_child(declaration(n));
299  {
300  // the EOF_TOKEN below generates an error if we
301  // do not remove those spaces ahead of time
303  {
304  next_token();
305  }
306  break;
307  }
308  // skip the ';'
309  n = next_token();
310  }
311  else if(n->is(node_type_t::AT_KEYWORD))
312  {
313  result->add_child(at_rule(n));
314  n = f_last_token;
315  }
316  else
317  {
318  break;
319  }
320  }
321 
323  {
324  error::instance() << f_last_token->get_position()
325  << "the end of the stream was not reached in this declaration, we stopped on a "
326  << f_last_token->get_type()
327  << "."
329  }
330 
331  return result;
332 }
333 
335 {
336  node::pointer_t result(new node(node_type_t::DECLARATION, identifier->get_position()));
337  result->set_string(identifier->get_string());
338 
340 
341  // allow white spaces
342  if(n->is(node_type_t::WHITESPACE))
343  {
344  n = next_token();
345  }
346 
347  // here we must have a ':'
348  if(n->is(node_type_t::COLON))
349  {
350  // skip the colon, no need to keep it around
351  n = next_token();
352  }
353  else
354  {
355  error::instance() << n->get_position()
356  << "':' missing in your declaration starting with \""
357  << identifier->get_string()
358  << "\"."
360  }
361 
362  // a component value
364 
365  return result;
366 }
367 
369 {
370  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
371 
372  node::pointer_t list(new node(node_type_t::COMPONENT_VALUE, n->get_position()));
373  result->add_child(list);
374  for(;; n = f_last_token)
375  {
376  // this test is rather ugly... also it kinda breaks the
377  // so called 'preserved tokens'
378  //
379  if(n->is(node_type_t::EOF_TOKEN)
383  || ((flags & g_component_value_flag_return_on_semi_colon) != 0 && n->is(node_type_t::SEMICOLON)) // declarations handle the semi-colon differently
384  || n->is(node_type_t::CDO)
385  || n->is(node_type_t::CDC))
386  {
387  break;
388  }
389 
390  if(n->is(node_type_t::AT_KEYWORD))
391  {
392  list->add_child(at_rule(n));
393  continue;
394  }
395 
396  if(n->is(node_type_t::SEMICOLON))
397  {
398  next_token();
399 
400  // remove leading and trailing whitespace, no need really
401  while(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
402  {
403  list->remove_child(0);
404  }
405  while(!list->empty() && list->get_last_child()->is(node_type_t::WHITESPACE))
406  {
407  list->remove_child(list->size() - 1);
408  }
409 
410  // variables are viewed as a terminator string when ended by a
411  // semicolon; a qualified rule normally requires a block to
412  // end, but we have a special case to allow definition of
413  // variables anywhere
415  && is_variable_set(list, false))
416  {
417  break;
418  }
419 
420  if(!list->empty())
421  {
422  // move to a new sub-list
423  list.reset(new node(node_type_t::COMPONENT_VALUE, n->get_position()));
424  result->add_child(list);
425  }
426  continue;
427  }
428 
429  if(n->is(node_type_t::EXCLAMATION))
430  {
431  node::pointer_t exclamation(next_token());
432  if(exclamation->is(node_type_t::WHITESPACE))
433  {
434  exclamation = next_token();
435  }
436  if(exclamation->is(node_type_t::IDENTIFIER))
437  {
438  // remove the WHITESPACE before if there is one
439  if(!list->empty()
440  && list->get_last_child()->is(node_type_t::WHITESPACE))
441  {
442  list->remove_child(list->get_last_child());
443  }
444 
445  // save the identifier in the EXCLAMATION node
446  // and add that to the current COMPONENT_VALUE
447  n->set_string(exclamation->get_string());
448  list->add_child(n);
449 
450  // TBD: should we check that the identifier is either
451  // "important" or "global" at this point?
452  // (there are also others we support like "default")
453 
454  // read the next token and if it is a space, skip it
455  n = next_token();
456  if(n->is(node_type_t::WHITESPACE))
457  {
458  next_token();
459  }
460  }
461  else
462  {
463  error::instance() << exclamation->get_position()
464  << "A '!' must be followed by an identifier, got a "
465  << exclamation->get_type()
466  << " instead."
468  }
469  continue;
470  }
471 
472  // remove trailing whitespace before a block, no need
476  && !list->empty()
477  && list->get_last_child()->is(node_type_t::WHITESPACE))
478  {
479  list->remove_child(list->size() - 1);
480  }
481 
483  {
484  // in this special case, we read the {}-block and return
485  // (i.e. end of an @-rule, etc.)
486  //
487  // however, to support the full SASS syntax we need to
488  // support two special cases:
489  //
490  // $var: { some-value: here; };
491  // font: { family: strange; style: italic };
492  //
493  // For those special entries, we must avoid returning
494  // when we find a block (darn! this grammar...)
495  //
496  // Note that the second test is done after we read the block
497  // since the presence of the block is checked in case of the
498  // nested declaration.
499  //
500  list->add_child(component_value(n));
501 
502  // remove leading and trailing whitespace, no need really
503  // (to make sure the tests below work as expected)
504  while(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
505  {
506  list->remove_child(0);
507  }
508 
509  // return or that were sub-definitions?
510  if(!is_variable_set(list, true)
511  && !is_nested_declaration(list))
512  {
513  break;
514  }
515 
517  {
518  next_token();
519  }
520 
522  {
523  // blocks defining a variable or a nested declaration
524  // must be followed by a semi-colon or we have an error
525  error::instance() << list->get_child(0)->get_position()
526  << "Variable set to a block and a nested property block must end with a semicolon (;) after said block."
528  }
529  }
530  else
531  {
532  list->add_child(component_value(n));
533  }
534  }
535 
536  // remove leading and trailing whitespace, no need really
537  if(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
538  {
539  list->remove_child(0);
540  }
541  if(!list->empty() && list->get_last_child()->is(node_type_t::WHITESPACE))
542  {
543  list->remove_child(list->size() - 1);
544  }
545 
546  if(list->empty())
547  {
548  result->remove_child(list);
549  }
550 
551  if(result->size() == 1)
552  {
553  result = result->get_last_child();
554  }
555 
556  return result;
557 }
558 
560 {
562  {
563  // parse a block up to '}'
564  return block_list(n);
565  }
566 
568  {
569  // parse a block up to ']'
571  }
572 
574  || n->is(node_type_t::FUNCTION)
576  {
577  // parse a block up to ')'
579  }
580 
581  next_token();
582 
583  // n is the token we keep
584  return n;
585 }
586 
588 {
590  b->take_over_children_of(children);
592  {
593  next_token();
594  }
595  if(f_last_token->is(closing_token))
596  {
597  // skip that closing token
598  next_token();
599  }
600  else
601  {
602  error::instance() << b->get_position()
603  << "Block expected to end with "
604  << closing_token
605  << " but got "
606  << f_last_token->get_type()
607  << " instead."
609  }
610 
611  return b;
612 }
613 
615 {
616  // skip the '{'
617  next_token();
618 
619  do
620  {
622  b->add_child(children);
623  // WHITESPACE are skiped between component values
624  // Also the variable tokens that force a return without a next_token()
628  {
629  next_token();
630  }
633  {
634  error::instance() << b->get_position()
635  << "Block expected to end with "
637  << " but got "
638  << f_last_token->get_type()
639  << " instead."
641  next_token();
642  }
643  }
646 
648  {
649  error::instance() << b->get_position()
650  << "Block expected to end with "
652  << " but got "
653  << f_last_token->get_type()
654  << " instead."
656  }
657 
658  // skip the '}'
659  next_token();
660 
661  return b;
662 }
663 
664 bool parser::is_variable_set(node::pointer_t n, bool with_block)
665 {
666  // a variable set is at least 3 tokens:
667  // $var:<value>
668  if(n->size() < 3
669  || (!n->get_child(0)->is(node_type_t::VARIABLE)
670  && !n->get_child(0)->is(node_type_t::VARIABLE_FUNCTION)))
671  {
672  return false;
673  }
674 
675  size_t pos(n->get_child(1)->is(node_type_t::WHITESPACE) ? 2 : 1);
676  if(!n->get_child(pos)->is(node_type_t::COLON))
677  {
678  return false;
679  }
680 
681  if(!with_block)
682  {
683  // in this case the shorthand is enough: $var ':'
684  return true;
685  }
686 
687  // WARNING: from here the size needs to be checked since the list may
688  // be smaller than what we are looking for in it
689 
690  // in this case we need to have: $var ':' '{'
691  ++pos;
692  if(pos < n->size() && n->get_child(pos)->is(node_type_t::WHITESPACE))
693  {
694  ++pos;
695  }
696 
697  return pos < n->size() && n->get_child(pos)->is(node_type_t::OPEN_CURLYBRACKET);
698 }
699 
701 {
702  // a declaration with a sub-block
703  // field: [optional-values] '{' ... '}' ';'
704  if(n->size() < 3
705  || !n->get_child(0)->is(node_type_t::IDENTIFIER)
706  || !n->get_last_child()->is(node_type_t::OPEN_CURLYBRACKET))
707  {
708  return false;
709  }
710 
711  // the colon is mandatory, after an optional whitespace
712  size_t pos(n->get_child(1)->is(node_type_t::WHITESPACE) ? 2 : 1);
713  if(!n->get_child(pos)->is(node_type_t::COLON))
714  {
715  return false;
716  }
717  ++pos; // skip the colon
718  if(pos >= n->size())
719  {
720  // this is "too short" so not really a declaration nor a component value
721  // note: I'm not able to reach this one anymore, I think that's because
722  // of the OPEN_CURLYBRACKET that I moved at the top...
723  return false; // LCOV_EXCL_LINE
724  }
725  if(n->get_child(pos)->is(node_type_t::WHITESPACE)
726  || n->get_child(pos)->is(node_type_t::OPEN_CURLYBRACKET))
727  {
728  // a colon cannot be followed by a space or '{' in a valid selector
729  return true;
730  }
731  if(n->get_child(pos)->is(node_type_t::FUNCTION))
732  {
733  // in this case we have <id>':'<func> which can be a valid selector
734  // so we have to skip this function otherwise we return 'true'
735  ++pos;
736  if(pos >= n->size())
737  {
738  // this test is for security (code may change over time...)
739  // but since the last item must be a curly bracket, it could
740  // not be this function, right?
741  return false; // LCOV_EXCL_LINE
742  }
743  }
744 
745  for(;;)
746  {
747  switch(n->get_child(pos)->get_type())
748  {
749  case node_type_t::COLON:
753  case node_type_t::SCOPE:
754  // a valid declaration cannot include one of those
755  return false;
756 
757  case node_type_t::ADD:
758  case node_type_t::COMMA:
759  //case node_type_t::FUNCTION: -- must be preceded by ':' so no need here we already returned if we hit a colon
761  case node_type_t::HASH:
766  case node_type_t::PERIOD:
768  break;
769 
770  default:
771  // this is something that would not be valid in a selector
772  // so we must have a declaration...
773  return true;
774 
775  }
776 
777  ++pos;
778  if(pos >= n->size())
779  {
780  // everything looks valid for a selector, so return false
781  return false;
782  }
783  }
784 }
785 
787 {
788  switch(separator)
789  {
790  case node_type_t::COMMA:
791  case node_type_t::DIVIDE:
792  break;
793 
794  default:
795  throw csspp_exception_logic("argify only supports ',' and '/' as separators.");
796 
797  }
798 
799  // make sure there are items and these are not already arguments
800  size_t const max_children(n->size());
801  if(max_children > 0
802  && !n->get_child(0)->is(node_type_t::ARG))
803  {
804  node::pointer_t temp(new node(node_type_t::LIST, n->get_position()));
805  temp->take_over_children_of(n);
806 
807  node::pointer_t arg(new node(node_type_t::ARG, n->get_position()));
808  arg->set_integer(static_cast<integer_t>(separator));
809  n->add_child(arg);
810 
811  for(size_t i(0); i < max_children; ++i)
812  {
813  node::pointer_t child(temp->get_child(i));
814  if(child->is(node_type_t::OPEN_CURLYBRACKET))
815  {
816  if(i + 1 != max_children)
817  {
818  throw csspp_exception_logic("compiler.cpp:compiler::argify(): list that has an OPEN_CURLYBRACKET that is not the last child."); // LCOV_EXCL_LINE
819  }
820  n->add_child(child);
821  break;
822  }
823  if(child->is(separator))
824  {
825  // make sure to remove any WHITESPACE appearing just
826  // before a comma
827  while(!arg->empty() && arg->get_last_child()->is(node_type_t::WHITESPACE))
828  {
829  arg->remove_child(arg->get_last_child());
830  }
831  if(arg->empty())
832  {
833  if(n->size() == 1)
834  {
835  error::instance() << n->get_position()
836  << "dangling comma at the beginning of a list of arguments or selectors."
838  }
839  else
840  {
841  error::instance() << n->get_position()
842  << "two commas in a row are invalid in a list of arguments or selectors."
844  }
845  return false;
846  }
847  if(i + 1 == max_children
848  || temp->get_child(i + 1)->is(node_type_t::OPEN_CURLYBRACKET))
849  {
850  error::instance() << n->get_position()
851  << "dangling comma at the end of a list of arguments or selectors."
853  return false;
854  }
855  // move to the next 'arg'
856  arg.reset(new node(node_type_t::ARG, n->get_position()));
857  arg->set_integer(static_cast<integer_t>(separator));
858  n->add_child(arg);
859  }
860  else if(!child->is(node_type_t::WHITESPACE) || !arg->empty())
861  {
862  arg->add_child(child);
863  }
864  }
865  }
866 
867  return true;
868 }
869 
870 } // namespace csspp
871 
872 // Local Variables:
873 // mode: cpp
874 // indent-tabs-mode: nil
875 // c-basic-offset: 4
876 // tab-width: 4
877 // End:
878 
879 // vim: ts=4 sw=4 et
node::pointer_t component_value_list()
Definition: parser.cpp:90
lexer::pointer_t f_lexer
Definition: parser.h:56
node::pointer_t f_last_token
Definition: parser.h:57
node::pointer_t rule()
Definition: parser.cpp:80
node::pointer_t block_list(node::pointer_t b)
Definition: parser.cpp:614
static bool is_nested_declaration(node::pointer_t n)
Definition: parser.cpp:700
parser(lexer::pointer_t l)
Definition: parser.cpp:64
int const g_component_value_flag_return_on_semi_colon
Definition: parser.cpp:59
std::shared_ptr< node > pointer_t
Definition: node.h:122
node_type_t
Definition: node.h:36
node::pointer_t declaration(node::pointer_t identifier)
Definition: parser.cpp:334
static bool is_variable_set(node::pointer_t n, bool with_block)
Definition: parser.cpp:664
node::pointer_t stylesheet()
Definition: parser.cpp:70
node::pointer_t at_rule(node::pointer_t at_keyword)
Definition: parser.cpp:208
int const g_component_value_flag_return_on_variable
Definition: parser.cpp:60
static bool argify(node::pointer_t n, node_type_t const separator=node_type_t::COMMA)
Definition: parser.cpp:786
node::pointer_t component_value()
Definition: parser.cpp:95
node::pointer_t next_token()
Definition: parser.cpp:100
node::pointer_t declaration_list()
Definition: parser.cpp:85
std::shared_ptr< lexer > pointer_t
Definition: lexer.h:28
node::pointer_t rule_list()
Definition: parser.cpp:75
node::pointer_t qualified_rule(node::pointer_t n)
Definition: parser.cpp:238
static error & instance()
Definition: error.cpp:78
node::pointer_t block(node::pointer_t b, node_type_t closing_token)
Definition: parser.cpp:587

Documentation of CSS Preprocessor.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.