Current Version: 1.0.20
Project Name: csspp
parser.cpp
Go to the documentation of this file.
1 // CSS Preprocessor
2 // Copyright (c) 2015-2018 Made to Order Software Corp. All Rights Reserved
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 
47 #include "csspp/parser.h"
48 
49 #include "csspp/exceptions.h"
50 
51 #include <iostream>
52 
53 namespace csspp
54 {
55 
56 namespace
57 {
58 
61 
62 } // no name namespace
63 
65  : f_lexer(l)
66 {
67  next_token();
68 }
69 
71 {
72  return stylesheet(f_last_token);
73 }
74 
76 {
77  return rule_list(f_last_token);
78 }
79 
81 {
82  return rule(f_last_token);
83 }
84 
86 {
88 }
89 
91 {
93 }
94 
96 {
98 }
99 
101 {
102  f_last_token = f_lexer->next_token();
103 //std::cerr << "*** TOKEN: " << *f_last_token;
104  return f_last_token;
105 }
106 
108 {
109  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
110 
111  for(; !n->is(node_type_t::EOF_TOKEN); n = f_last_token)
112  {
113  // completely ignore the CDO and CDC, if the "assembler"
114  // wants to output them, it will do so, but otherwise it
115  // is just completely ignored
116  //
117  // also white spaces at this level are pretty much useless
118  //
119  if(n->is(node_type_t::CDO)
120  || n->is(node_type_t::CDC)
121  || n->is(node_type_t::WHITESPACE))
122  {
123  next_token();
124  continue;
125  }
126 
130  {
131  error::instance() << n->get_position()
132  << "Unexpected closing block of type: " << n->get_type() << "."
134  break;
135  }
136 
137  if(n->is(node_type_t::COMMENT))
138  {
139  result->add_child(n);
140  next_token();
141  }
142  else if(n->is(node_type_t::AT_KEYWORD))
143  {
144  result->add_child(at_rule(n));
145  }
146  else
147  {
148  // anything else is a qualified rule
149  result->add_child(qualified_rule(n));
150  }
151  }
152 
153  // we always return the LIST because it starts with @import (or rather
154  // is just one @import) or $var then it needs to be replaced and we
155  // could not do that if those were root nodes
156  return result;
157 }
158 
160 {
161  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
162 
163  for(node::pointer_t q; (!q || !q->is(node_type_t::EOF_TOKEN)) && !n->is(node_type_t::EOF_TOKEN); n = f_last_token)
164  {
165  q = rule(n);
166  result->add_child(q);
167  }
168 
169  return result;
170 }
171 
173 {
174  if(n->is(node_type_t::CDO)
175  || n->is(node_type_t::CDC))
176  {
177  error::instance() << n->get_position()
178  << "HTML comment delimiters (<!-- and -->) are not allowed in this CSS document."
180  return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
181  }
182 
186  {
187  error::instance() << n->get_position()
188  << "Unexpected closing block of type: " << n->get_type() << "."
190  return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
191  }
192 
193  if(n->is(node_type_t::WHITESPACE))
194  {
195  // skip potential whitespaces
196  n = next_token();
197  }
198 
199  if(n->is(node_type_t::AT_KEYWORD))
200  {
201  return at_rule(n);
202  }
203 
204  // anything else is a qualified rule
205  return qualified_rule(n);
206 }
207 
209 {
210  // the '@' was already eaten, it will be our result
212 
213  if(n->empty())
214  {
215  error::instance() << at_keyword->get_position()
216  << "At '@' command cannot be empty (missing expression or block) unless ended by a semicolon (;)."
218  }
219  else
220  {
221  node::pointer_t last_child(n->get_last_child());
223  {
224  // skip the semi-colon
225  //
226  next_token();
227  }
228  else if(!last_child->is(node_type_t::OPEN_CURLYBRACKET))
229  {
230  error::instance() << at_keyword->get_position()
231  << "At '@' command must end with a block or a ';'."
233  }
234  at_keyword->take_over_children_of(n);
235  }
236 
237  return at_keyword;
238 }
239 
241 {
242  if(n->is(node_type_t::EOF_TOKEN))
243  {
244  return n;
245  }
246  if(n->is(node_type_t::SEMICOLON))
247  {
248  // skip the ';' (i.e. ';' in 'foo { blah: 123 };')
249  next_token();
250 
251  // it is an error, we just make it clear what error it is because
252  // by default it would otherwise come out as "invalid qualified rule"
253  // which is rather hard to understand here...
254  error::instance() << n->get_position()
255  << "A qualified rule cannot end a { ... } block with a ';'."
257  return node::pointer_t(new node(node_type_t::EOF_TOKEN, n->get_position()));
258  }
259 
260  // a qualified rule is a component value list that
261  // ends with a block
263 
264  if(result->empty())
265  {
266  // I have not been able to reach these lines, somehow...
267  error::instance() << n->get_position()
268  << "A qualified rule cannot be empty; you are missing a { ... } block."
270  }
271  else
272  {
273  node::pointer_t last_child(result->get_last_child());
274  if(!is_variable_set(result, false)
275  && !last_child->is(node_type_t::OPEN_CURLYBRACKET))
276  {
277  error::instance() << n->get_position()
278  << "A qualified rule must end with a { ... } block."
280  }
281  }
282 
283  return result;
284 }
285 
287 {
288  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
289 
290  for(;;)
291  {
292  if(n->is(node_type_t::WHITESPACE))
293  {
294  n = next_token();
295  }
296 
297  if(n->is(node_type_t::IDENTIFIER))
298  {
299  result->add_child(declaration(n));
301  {
302  // the EOF_TOKEN below generates an error if we
303  // do not remove those spaces ahead of time
305  {
306  next_token();
307  }
308  break;
309  }
310  // skip the ';'
311  n = next_token();
312  }
313  else if(n->is(node_type_t::AT_KEYWORD))
314  {
315  result->add_child(at_rule(n));
316  n = f_last_token;
317  }
318  else
319  {
320  break;
321  }
322  }
323 
325  {
326  error::instance() << f_last_token->get_position()
327  << "the end of the stream was not reached in this declaration, we stopped on a "
328  << f_last_token->get_type()
329  << "."
331  }
332 
333  return result;
334 }
335 
337 {
338  node::pointer_t result(new node(node_type_t::DECLARATION, identifier->get_position()));
339  result->set_string(identifier->get_string());
340 
342 
343  // allow white spaces
344  if(n->is(node_type_t::WHITESPACE))
345  {
346  n = next_token();
347  }
348 
349  // here we must have a ':'
350  if(n->is(node_type_t::COLON))
351  {
352  // skip the colon, no need to keep it around
353  n = next_token();
354  }
355  else
356  {
357  error::instance() << n->get_position()
358  << "':' missing in your declaration starting with \""
359  << identifier->get_string()
360  << "\"."
362  }
363 
364  // a component value
366 
367  return result;
368 }
369 
371 {
372  node::pointer_t result(new node(node_type_t::LIST, n->get_position()));
373 
374  node::pointer_t list(new node(node_type_t::COMPONENT_VALUE, n->get_position()));
375  result->add_child(list);
376  for(;; n = f_last_token)
377  {
378  // this test is rather ugly... also it kinda breaks the
379  // so called 'preserved tokens'
380  //
381  if(n->is(node_type_t::EOF_TOKEN)
385  || ((flags & g_component_value_flag_return_on_semi_colon) != 0 && n->is(node_type_t::SEMICOLON)) // declarations handle the semi-colon differently
386  || n->is(node_type_t::CDO)
387  || n->is(node_type_t::CDC))
388  {
389  break;
390  }
391 
392  if(n->is(node_type_t::AT_KEYWORD))
393  {
394  list->add_child(at_rule(n));
395  continue;
396  }
397 
398  if(n->is(node_type_t::SEMICOLON))
399  {
400  next_token();
401 
402  // remove leading and trailing whitespace, no need really
403  while(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
404  {
405  list->remove_child(0);
406  }
407  while(!list->empty() && list->get_last_child()->is(node_type_t::WHITESPACE))
408  {
409  list->remove_child(list->size() - 1);
410  }
411 
412  // variables are viewed as a terminator string when ended by a
413  // semicolon; a qualified rule normally requires a block to
414  // end, but we have a special case to allow definition of
415  // variables anywhere
417  && is_variable_set(list, false))
418  {
419  break;
420  }
421 
422  if(!list->empty())
423  {
424  // move to a new sub-list
425  list.reset(new node(node_type_t::COMPONENT_VALUE, n->get_position()));
426  result->add_child(list);
427  }
428  continue;
429  }
430 
431  if(n->is(node_type_t::EXCLAMATION))
432  {
433  node::pointer_t exclamation(next_token());
434  if(exclamation->is(node_type_t::WHITESPACE))
435  {
436  exclamation = next_token();
437  }
438  if(exclamation->is(node_type_t::IDENTIFIER))
439  {
440  // remove the WHITESPACE before if there is one
441  if(!list->empty()
442  && list->get_last_child()->is(node_type_t::WHITESPACE))
443  {
444  list->remove_child(list->get_last_child());
445  }
446 
447  // save the identifier in the EXCLAMATION node
448  // and add that to the current COMPONENT_VALUE
449  n->set_string(exclamation->get_string());
450  list->add_child(n);
451 
452  // TBD: should we check that the identifier is either
453  // "important" or "global" at this point?
454  // (there are also others we support like "default")
455 
456  // read the next token and if it is a space, skip it
457  n = next_token();
458  if(n->is(node_type_t::WHITESPACE))
459  {
460  next_token();
461  }
462  }
463  else
464  {
465  error::instance() << exclamation->get_position()
466  << "A '!' must be followed by an identifier, got a "
467  << exclamation->get_type()
468  << " instead."
470  }
471  continue;
472  }
473 
474  // remove trailing whitespace before a block, no need
478  && !list->empty()
479  && list->get_last_child()->is(node_type_t::WHITESPACE))
480  {
481  list->remove_child(list->size() - 1);
482  }
483 
485  {
486  // in this special case, we read the {}-block and return
487  // (i.e. end of an @-rule, etc.)
488  //
489  // however, to support the full SASS syntax we need to
490  // support two special cases:
491  //
492  // $var: { some-value: here; };
493  // font: { family: strange; style: italic };
494  //
495  // For those special entries, we must avoid returning
496  // when we find a block (darn! this grammar...)
497  //
498  // Note that the second test is done after we read the block
499  // since the presence of the block is checked in case of the
500  // nested declaration.
501  //
502  list->add_child(component_value(n));
503 
504  // remove leading and trailing whitespace, no need really
505  // (to make sure the tests below work as expected)
506  //
507  while(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
508  {
509  list->remove_child(0);
510  }
511 
512  // return or that were sub-definitions?
513  //
514  if(!is_variable_set(list, true)
515  && !is_nested_declaration(list))
516  {
517  break;
518  }
519 
521  {
522  next_token();
523  }
524 
526  {
527  // blocks defining a variable or a nested declaration
528  // must be followed by a semi-colon or we have an error
529  error::instance() << list->get_child(0)->get_position()
530  << "Variable set to a block and a nested property block must end with a semicolon (;) after said block."
532  }
533  }
534  else
535  {
536  list->add_child(component_value(n));
537  }
538  }
539 
540  // remove leading and trailing whitespace, no need really
541  if(!list->empty() && list->get_child(0)->is(node_type_t::WHITESPACE))
542  {
543  list->remove_child(0);
544  }
545  if(!list->empty() && list->get_last_child()->is(node_type_t::WHITESPACE))
546  {
547  list->remove_child(list->size() - 1);
548  }
549 
550  if(list->empty())
551  {
552  result->remove_child(list);
553  }
554 
555  if(result->size() == 1)
556  {
557  result = result->get_last_child();
558  }
559 
560  return result;
561 }
562 
564 {
566  {
567  // parse a block up to '}'
568  return block_list(n);
569  }
570 
572  {
573  // parse a block up to ']'
575  }
576 
578  || n->is(node_type_t::FUNCTION)
580  {
581  // parse a block up to ')'
583  }
584 
585  next_token();
586 
587  // n is the token we keep
588  return n;
589 }
590 
592 {
594  b->take_over_children_of(children);
596  {
597  next_token();
598  }
599  if(f_last_token->is(closing_token))
600  {
601  // skip that closing token
602  next_token();
603  }
604  else
605  {
606  error::instance() << b->get_position()
607  << "Block expected to end with "
608  << closing_token
609  << " but got "
610  << f_last_token->get_type()
611  << " instead."
613  }
614 
615  return b;
616 }
617 
619 {
620  // skip the '{'
621  next_token();
622 
623  do
624  {
626  b->add_child(children);
627  // WHITESPACE are skiped between component values
628  // Also the variable tokens that force a return without a next_token()
632  {
633  next_token();
634  }
637  {
638  error::instance() << b->get_position()
639  << "Block expected to end with "
641  << " but got "
642  << f_last_token->get_type()
643  << " instead."
645  next_token();
646  }
647  }
650 
652  {
653  error::instance() << b->get_position()
654  << "Block expected to end with "
656  << " but got "
657  << f_last_token->get_type()
658  << " instead."
660  }
661 
662  // skip the '}'
663  next_token();
664 
665  return b;
666 }
667 
668 bool parser::is_variable_set(node::pointer_t n, bool with_block)
669 {
670  // a variable set is at least 3 tokens:
671  // $var:<value>
672  if(n->size() < 3
673  || (!n->get_child(0)->is(node_type_t::VARIABLE)
674  && !n->get_child(0)->is(node_type_t::VARIABLE_FUNCTION)))
675  {
676  return false;
677  }
678 
679  size_t pos(n->get_child(1)->is(node_type_t::WHITESPACE) ? 2 : 1);
680  if(!n->get_child(pos)->is(node_type_t::COLON))
681  {
682  return false;
683  }
684 
685  if(!with_block)
686  {
687  // in this case the shorthand is enough: $var ':'
688  return true;
689  }
690 
691  // WARNING: from here the size needs to be checked since the list may
692  // be smaller than what we are looking for in it
693 
694  // in this case we need to have: $var ':' '{'
695  ++pos;
696  if(pos < n->size() && n->get_child(pos)->is(node_type_t::WHITESPACE))
697  {
698  ++pos;
699  }
700 
701  return pos < n->size() && n->get_child(pos)->is(node_type_t::OPEN_CURLYBRACKET);
702 }
703 
705 {
706  // a declaration with a sub-block
707  // field: [optional-values] '{' ... '}' ';'
708  if(n->size() < 3
709  || !n->get_child(0)->is(node_type_t::IDENTIFIER)
710  || !n->get_last_child()->is(node_type_t::OPEN_CURLYBRACKET))
711  {
712  return false;
713  }
714 
715  // the colon is mandatory, after an optional whitespace
716  size_t pos(n->get_child(1)->is(node_type_t::WHITESPACE) ? 2 : 1);
717  if(!n->get_child(pos)->is(node_type_t::COLON))
718  {
719  return false;
720  }
721  ++pos; // skip the colon
722  if(pos >= n->size())
723  {
724  // this is "too short" so not really a declaration nor a component value
725  // note: I'm not able to reach this one anymore, I think that's because
726  // of the OPEN_CURLYBRACKET that I moved at the top...
727  return false; // LCOV_EXCL_LINE
728  }
729  if(n->get_child(pos)->is(node_type_t::WHITESPACE)
730  || n->get_child(pos)->is(node_type_t::OPEN_CURLYBRACKET))
731  {
732  // a colon cannot be followed by a space or '{' in a valid selector
733  return true;
734  }
735  if(n->get_child(pos)->is(node_type_t::FUNCTION))
736  {
737  // in this case we have <id>':'<func> which can be a valid selector
738  // so we have to skip this function otherwise we return 'true'
739  ++pos;
740  if(pos >= n->size())
741  {
742  // this test is for security (code may change over time...)
743  // but since the last item must be a curly bracket, it could
744  // not be this function, right?
745  return false; // LCOV_EXCL_LINE
746  }
747  }
748 
749  for(;;)
750  {
751  switch(n->get_child(pos)->get_type())
752  {
753  case node_type_t::COLON:
757  case node_type_t::SCOPE:
758  // a valid declaration cannot include one of those
759  return false;
760 
761  case node_type_t::ADD:
762  case node_type_t::COMMA:
763  //case node_type_t::FUNCTION: -- must be preceded by ':' so no need here we already returned if we hit a colon
765  case node_type_t::HASH:
770  case node_type_t::PERIOD:
772  break;
773 
774  default:
775  // this is something that would not be valid in a selector
776  // so we must have a declaration...
777  return true;
778 
779  }
780 
781  ++pos;
782  if(pos >= n->size())
783  {
784  // everything looks valid for a selector, so return false
785  return false;
786  }
787  }
788 }
789 
791 {
792  switch(separator)
793  {
794  case node_type_t::COMMA:
795  case node_type_t::DIVIDE:
796  break;
797 
798  default:
799  throw csspp_exception_logic("argify only supports ',' and '/' as separators.");
800 
801  }
802 
803  // make sure there are items and these are not already arguments
804  size_t const max_children(n->size());
805  if(max_children > 0
806  && !n->get_child(0)->is(node_type_t::ARG))
807  {
808  node::pointer_t temp(new node(node_type_t::LIST, n->get_position()));
809  temp->take_over_children_of(n);
810 
811  node::pointer_t arg(new node(node_type_t::ARG, n->get_position()));
812  arg->set_integer(static_cast<integer_t>(separator));
813  n->add_child(arg);
814 
815  for(size_t i(0); i < max_children; ++i)
816  {
817  node::pointer_t child(temp->get_child(i));
818  if(child->is(node_type_t::OPEN_CURLYBRACKET))
819  {
820  if(i + 1 != max_children)
821  {
822  throw csspp_exception_logic("compiler.cpp:compiler::argify(): list that has an OPEN_CURLYBRACKET that is not the last child."); // LCOV_EXCL_LINE
823  }
824  n->add_child(child);
825  break;
826  }
827  if(child->is(separator))
828  {
829  // make sure to remove any WHITESPACE appearing just
830  // before a comma
831  while(!arg->empty() && arg->get_last_child()->is(node_type_t::WHITESPACE))
832  {
833  arg->remove_child(arg->get_last_child());
834  }
835  if(arg->empty())
836  {
837  if(n->size() == 1)
838  {
839  error::instance() << n->get_position()
840  << "dangling comma at the beginning of a list of arguments or selectors."
842  }
843  else
844  {
845  error::instance() << n->get_position()
846  << "two commas in a row are invalid in a list of arguments or selectors."
848  }
849  return false;
850  }
851  if(i + 1 == max_children
852  || temp->get_child(i + 1)->is(node_type_t::OPEN_CURLYBRACKET))
853  {
854  error::instance() << n->get_position()
855  << "dangling comma at the end of a list of arguments or selectors."
857  return false;
858  }
859  // move to the next 'arg'
860  arg.reset(new node(node_type_t::ARG, n->get_position()));
861  arg->set_integer(static_cast<integer_t>(separator));
862  n->add_child(arg);
863  }
864  else if(!child->is(node_type_t::WHITESPACE) || !arg->empty())
865  {
866  arg->add_child(child);
867  }
868  }
869  }
870 
871  return true;
872 }
873 
874 } // namespace csspp
875 
876 // Local Variables:
877 // mode: cpp
878 // indent-tabs-mode: nil
879 // c-basic-offset: 4
880 // tab-width: 4
881 // End:
882 
883 // vim: ts=4 sw=4 et
node::pointer_t component_value_list()
Definition: parser.cpp:90
lexer::pointer_t f_lexer
Definition: parser.h:56
node::pointer_t f_last_token
Definition: parser.h:57
node::pointer_t rule()
Definition: parser.cpp:80
node::pointer_t block_list(node::pointer_t b)
Definition: parser.cpp:618
static bool is_nested_declaration(node::pointer_t n)
Definition: parser.cpp:704
parser(lexer::pointer_t l)
Definition: parser.cpp:64
int const g_component_value_flag_return_on_semi_colon
Definition: parser.cpp:59
std::shared_ptr< node > pointer_t
Definition: node.h:128
The namespace of all the classes in the CSS Preprocessor.
Definition: assembler.cpp:40
node_type_t
Definition: node.h:36
node::pointer_t declaration(node::pointer_t identifier)
Definition: parser.cpp:336
static bool is_variable_set(node::pointer_t n, bool with_block)
Definition: parser.cpp:668
node::pointer_t stylesheet()
Definition: parser.cpp:70
node::pointer_t at_rule(node::pointer_t at_keyword)
Definition: parser.cpp:208
int const g_component_value_flag_return_on_variable
Definition: parser.cpp:60
static bool argify(node::pointer_t n, node_type_t const separator=node_type_t::COMMA)
Definition: parser.cpp:790
node::pointer_t component_value()
Definition: parser.cpp:95
node::pointer_t next_token()
Definition: parser.cpp:100
node::pointer_t declaration_list()
Definition: parser.cpp:85
std::shared_ptr< lexer > pointer_t
Definition: lexer.h:28
node::pointer_t rule_list()
Definition: parser.cpp:75
node::pointer_t qualified_rule(node::pointer_t n)
Definition: parser.cpp:240
static error & instance()
Definition: error.cpp:78
node::pointer_t block(node::pointer_t b, node_type_t closing_token)
Definition: parser.cpp:591

Documentation of CSS Preprocessor.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.