platform_system_core/init/parser/tokenizer.h

// Copyright (C) 2015 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <string>

namespace init {

// Used to tokenize a std::string.
// Call Next() to advance through each token until it returns false,
// indicating there are no more tokens left in the string.
// The current token can be accessed with current(), which returns
// a Token.
// Supported tokens are:
// TOK_START - Next() has yet to be called
// TOK_END - At the end of string
// TOK_NEWLINE - The end of a line denoted by \n.
// TOK_TEXT - A word.
// Comments are denoted with '#' and the tokenizer will ignore
// the rest of the line.
// Double quotes can be used to insert whitespace into words.
// A backslash at the end of a line denotes continuation and
// a TOK_NEWLINE will not be generated for that line.
class Tokenizer {
 public:
  Tokenizer(const std::string& data);
  ~Tokenizer();

  enum TokenType { TOK_START, TOK_END, TOK_NEWLINE, TOK_TEXT };
  struct Token {
    TokenType type;
    std::string text;
  };

  // Returns the curret token.
  const Token& current();

  // Move to the next token, returns false at the end of input.
  bool Next();

 private:
  void GetData();
  void AdvChar();
  void AdvText();
  void AdvUntil(char x);
  void AdvWhiteSpace();
  void StartText();
  void EndText();

  const std::string& data_;
  Token current_;

  bool eof_;
  size_t pos_;
  char cur_char_;
  size_t tok_start_;
};

}  // namespace init
init: Add C++ tokenizer. Adds a C++ tokenizer along with unit tests. This tokenizer will replace the current C implementation which does a poor job of keeping track of pointers. This CL is a prerequisite for up coming changes to the parser. This CL does not wire up this tokenizer and changes no exsiting code. All that builds is the unit tests. Change-Id: Iec3740bce7153640adc5e5bbdc57e644cedf0038 TEST: Unit tests all pass. No leaks under valgrind BUG: 22843198 2015-07-30 18:27:11 +02:00			`// Copyright (C) 2015 The Android Open Source Project`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`#include <string>`

			`namespace init {`

			`// Used to tokenize a std::string.`
			`// Call Next() to advance through each token until it returns false,`
			`// indicating there are no more tokens left in the string.`
			`// The current token can be accessed with current(), which returns`
			`// a Token.`
			`// Supported tokens are:`
			`// TOK_START - Next() has yet to be called`
			`// TOK_END - At the end of string`
			`// TOK_NEWLINE - The end of a line denoted by \n.`
			`// TOK_TEXT - A word.`
			`// Comments are denoted with '#' and the tokenizer will ignore`
			`// the rest of the line.`
			`// Double quotes can be used to insert whitespace into words.`
			`// A backslash at the end of a line denotes continuation and`
			`// a TOK_NEWLINE will not be generated for that line.`
			`class Tokenizer {`
			`public:`
			`Tokenizer(const std::string& data);`
			`~Tokenizer();`

			`enum TokenType { TOK_START, TOK_END, TOK_NEWLINE, TOK_TEXT };`
			`struct Token {`
			`TokenType type;`
			`std::string text;`
			`};`

			`// Returns the curret token.`
			`const Token& current();`

			`// Move to the next token, returns false at the end of input.`
			`bool Next();`

			`private:`
			`void GetData();`
			`void AdvChar();`
			`void AdvText();`
			`void AdvUntil(char x);`
			`void AdvWhiteSpace();`
			`void StartText();`
			`void EndText();`

			`const std::string& data_;`
			`Token current_;`

			`bool eof_;`
			`size_t pos_;`
			`char cur_char_;`
			`size_t tok_start_;`
			`};`

			`} // namespace init`