/******************************************************************************** * Copyright (c) 2025-2026 ZF Friedrichshafen AG * * This program and the accompanying materials are made available under the * terms of the Apache License Version 2.0 which is available at * https://www.apache.org/licenses/LICENSE-2.0 * * SPDX-License-Identifier: Apache-2.0 * * Contributors: * Erik Verhoeven - initial API and implementation ********************************************************************************/ #ifndef LEXER_H #define LEXER_H #include "lexerbase.h" #include "token.h" #include "tokenlist.h" #include "codepos.h" #include #include #include #include /** * @brief OMG IDL 4.2 keywords * @remarks Dependable on the extension settings, the keywords are extended with: char16, char32, u8string, u16string, u32string, * null, pointer, interface_id, interface_t and exception_id. */ const std::vector g_vecOmgIdlKeywords = { "abstract", "any", "alias", "attribute", "bitfield", "bitmask", "bitset", "boolean", "case", "char", "component", "connector", "const", "consumes", "context", "custom", "default", "double", "exception", "emits", "enum", "eventtype", "factory", "finder", "fixed", "float", "getraises", "home", "import", "in", "inout", "interface", "local", "long", "manages", "map", "mirrorport", "module", "multiple", "native", "Object", "octet", "oneway", "out", "primarykey", "private", "port", "porttype", "provides", "public", "publishes", "raises", "readonly", "setraises", "sequence", "short", "string", "struct", "supports", "switch", "truncatable", "typedef", "typeid", "typename", "typeprefix", "unsigned", "union", "uses", "ValueBase", "valuetype", "void", "wchar", "wstring", "int8", "uint8", "int16", "int32", "int64", "uint16", "uint32", "uint64" }; /** * @brief SDV IDL lexer class */ class CLexer { public: /** * @brief Lexing mode enumerator. */ enum class ELexingMode { lexing_idl, ///< Lexing IDL code. lexing_preproc, ///< Lexing preprocessor directives (valid for the current line only). }; /** * @brief Constructor * @param[in] pCallback Pointer to the lexer callback interface. Must not be NULL. * @param[in] bCaseSensitive When set, allow identical names that only differ in case. * @param[in] eLexingMode The lexing mode the lexer should run in. This determines the rule-set to use while lexing. */ CLexer(ILexerCallback* pCallback, bool bCaseSensitive, ELexingMode eLexingMode = ELexingMode::lexing_idl); /** * @brief Destructor */ virtual ~CLexer() = default; /** * Add keywords to the reserved keyword list (based on the enabled extension). * @param[in] rssKeyword Reference to the keyword string to add. */ void AddKeyword(const std::string& rssKeyword); /** * @brief Get a token from the code. * @param[in, out] rCode Reference to the code to be parsed. * @param[in] rptrContext Reference to the smart pointer to the source code context. * @throws Throws CCompileException on parse error. * @remarks Whitespace, comments and preprocessor directions are not provided as a result, but are provided through the * callback interface. * @post Updates the position to the token following the token. * @return Returns the read token or an empty token when OEF has been reached. */ CToken GetToken(CCodePos& rCode, const CContextPtr& rptrContext) const; /** * @brief Get the last valid token. * @return Returns a reference to the member variable containing the last read token or an empty token when no token was read * before. */ const CToken& GetLastValidToken() const; /** * @brief Read until the provided symbol or the end of the text. * @param[in, out] rCode Reference to the code to be parsed. * @param[in] cSymbol The symbol to mark the end. * @post Updates the position to the token of the symbol. * @return Returns the token until the symbol or end of text has been reached. */ CToken GetCustom(CCodePos& rCode, char cSymbol) const; /** * @brief Skip the rest of the line. * @remarks End position is at the carriage return or newline for preprocessing directives and past the carriage return or * newline for code. * @remarks Sets the newline-occurred flag for code. * @remarks Back-slash at the end of the line causes the inclusion of the next line. * @param[in, out] rCode Reference to the code to be parsed. */ void SkipLine(CCodePos& rCode) const; /** * @brief Enable preproc processing. * @details While parsing preprocessor directive, further preprocessor processing is disabled. If including an additional * file, preprocessing needs to be enabled again. */ void EnablePreprocProcessing(); private: /** * @brief Get whitespace (space, form feed, line feed, carriage return, horizontal tab, vertical tab) until there is no white * space any more. * @param[in, out] rCode Reference to the code to be parsed. * @param[out] rbNewline Set when a newline has been passed. * @throws Throws CCompileException on parse error. * @post Updates the position to the token following the whitespace. */ CToken GetWhitespace(CCodePos& rCode, bool& rbNewline) const; /** * @brief Get C-style and C++ comments. * @param[in, out] rCode Reference to the code to be parsed. * @throws Throws CCompileException on parse error. * @post Updates the position to the token following the comment. */ CToken GetComments(CCodePos& rCode) const; /** * @brief Get the identifier or keyword. * @details Get the identifier or keyword. A keyword corresponds to the list of keywords defined by the OMG IDL. All others * are identifiers. Identifiers that differ from a keyword through case differences are illegal. * @param[in, out] rCode Reference to the code to be parsed. * @throws Throws CCompileException on parse error. * @post Updates the position to the token following the identifier. * @return Returns the identifier token or an empty token when OEF has been reached. */ CToken GetIdentifierOrKeyword(CCodePos& rCode) const; /** * @brief Get the separator: { } ( ) : :: ; . * @param[in, out] rCode Reference to the code to be parsed. * @post Updates the position to the token following the separator. * @return Returns the separator token or an empty token when no separator is at the requested token. */ CToken GetSeparator(CCodePos& rCode) const; /** * @brief Get the operator: = + - * / % ^ ! ~ , | # || & && == != < <= > >= ? * @remarks The '#' symbol is only an operator when lexing preprocessor directives. This is covered by the GetToken function. * @param[in, out] rCode Reference to the code to be parsed. * @post Updates the position to the token following the operator. * @return Returns the operator token or an empty token when no operator is at the requested token. */ CToken GetOperator(CCodePos& rCode) const; /** * @brief Get the literal: hex, decimal or octal number, floating/fixed point number, character, strings, boolean and nullptr * values. * @remarks The following extension on the OMG IDL specification are possible: * - binary integer literal as is part of the C++ 14 standard. * - integer suffixes as are part of the C++ standard. * - unicode UTF-16 and UTF-32 character literals as part of the C++ 11 standard. * - ASCII and wide string sequences (2, 4 or 8 characters) as part oft the C++ standard. * - Unicode UTF-8, UTF-16, UTF-32 and wide string literals as part of the C++ 11 standard. * - raw ASCII, UTF-8, UTF-16, UTF-32 and wide string literals as is part of the C++ 11 standard. * - character and string escape sequence using 8 digits to specify a Unicode character as is part of the C++ standard. * - hexadecimal floating points as are part of the C++ 17 standard. * - floating point suffixes as are part of the C++ standard. * - boolean literal as part of the C++ standard (the values 'true' and 'false' as well as 'TRUE' and 'FALSE'). * - nullptr literal as part of the C++ 11 standard (the values 'nullptr' and 'NULL'). * @param[in, out] rCode Reference to the code to be parsed. * @throws Throws CCompileException on parse error. * @post Updates the position to the token following the literal. * @return Returns the literal token or an empty token when OEF has been reached. */ CToken GetLiteral(CCodePos& rCode) const; ILexerCallback* m_pLexerCallback = nullptr; ///< The lexer callback for inserting comments bool m_bCaseSensitive = true; ///< Case sensitivity during name comparison. ELexingMode m_eLexingMode = ELexingMode::lexing_idl; ///< Lexing mode (changes the lexing rules). mutable bool m_bNewlineOccurred = true; ///< A newline occurred - a preprocessor directive could take place. mutable CToken m_tokenLastValid; ///< The last valid token. std::vector m_vecReservedKeywords; ///< List of reserved keywords. }; /** * @brief Tokenize a string into a token list. * @param[in] szCode The code to tokenize. * @param[in] rptrContext Reference to the smart pointer to the source code context. * @return The tokenized string. */ CTokenList Tokenize(const char* szCode, const CContextPtr& rptrContext); /** * @brief Tokenize a string into a token list. * @param[in] rssCode The code to tokenize. * @param[in] rptrContext Reference to the smart pointer to the source code context. * @return The tokenized string. */ CTokenList Tokenize(const std::string& rssCode, const CContextPtr& rptrContext); /** * @brief Dummy callback interface for the lexer ignoring all calls. */ struct SLexerDummyCallback : public ILexerCallback { /** * @brief Insert whitespace. Dummy implementation. Overload of ILexerCallback::InsertWhitespace. */ virtual void InsertWhitespace(const CToken& /*rtoken*/) override {} /** * @brief Insert a comment. Dummy implementation. Overload of ILexerCallback::InsertComment. */ virtual void InsertComment(const CToken& /*rtoken*/) override {} /** * @brief Process a preprocessor directive. Overload of ILexerCallback::ProcessPreprocDirective. * @param[in] rCode Reference to the source code to process the preproc directive for. */ virtual void ProcessPreprocDirective(CCodePos& rCode) override { SLexerDummyCallback sCallback; CLexer(&sCallback, true).SkipLine(rCode); } }; /** * @brief Callback interface for the lexer storing the provided information. */ struct SLexerStoreCallback : public ILexerCallback { /** * @brief Clear the value of the callback structure. */ void Clear() { tokenWhitespace = CToken(); tokenComment = CToken(); ssPreprocLine.clear(); } /** * @brief Insert whitespace. Overload of ILexerCallback::InsertWhitespace. * @param[in] rtoken Reference to token containing the whitespace. */ virtual void InsertWhitespace(const CToken &rtoken) override { tokenWhitespace = rtoken; } /** * @brief Insert a comment. Overload of ILexerCallback::InsertComment. * @param[in] rtoken Reference to the token containing the comment. */ virtual void InsertComment(const CToken &rtoken) override { tokenComment = rtoken; } /** * @brief Process a preprocessor directive. Overload of ILexerCallback::ProcessPreprocDirective. * @param[in] rCode Reference to the source code to process the preproc directive for. */ virtual void ProcessPreprocDirective(CCodePos &rCode) override { SLexerDummyCallback sCallback; CLexer lexer(&sCallback, true); CToken token = rCode.GetLocation(); lexer.SkipLine(rCode); rCode.UpdateLocation(token); ssPreprocLine = static_cast(token); } CToken tokenWhitespace; ///< Token holding whitespace. CToken tokenComment; ///< Token holding comment. std::string ssPreprocLine; ///< String holding preprocessing line. }; #endif // !defined LEXER_H