1 //===-- GoLexer.cpp ---------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
14 using namespace lldb_private;
16 llvm::StringMap<GoLexer::TokenType> *GoLexer::m_keywords;
18 GoLexer::GoLexer(const char *src)
19 : m_src(src), m_end(src + strlen(src)), m_last_token(TOK_INVALID, "") {}
21 bool GoLexer::SkipWhitespace() {
22 bool saw_newline = false;
23 for (; m_src < m_end; ++m_src) {
26 if (*m_src == '/' && !SkipComment())
28 else if (!IsWhitespace(*m_src))
34 bool GoLexer::SkipComment() {
35 if (m_src[0] == '/' && m_src[1] == '/') {
36 for (const char *c = m_src + 2; c < m_end; ++c) {
43 } else if (m_src[0] == '/' && m_src[1] == '*') {
44 for (const char *c = m_src + 2; c < m_end; ++c) {
45 if (c[0] == '*' && c[1] == '/') {
54 const GoLexer::Token &GoLexer::Lex() {
55 bool newline = SkipWhitespace();
56 const char *start = m_src;
57 m_last_token.m_type = InternalLex(newline);
58 m_last_token.m_value = llvm::StringRef(start, m_src - start);
62 GoLexer::TokenType GoLexer::InternalLex(bool newline) {
67 switch (m_last_token.m_type) {
75 case KEYWORD_CONTINUE:
76 case KEYWORD_FALLTHROUGH:
124 if (IsDecimal(m_src[1]))
128 // For lldb persistent vars.
138 if (IsLetterOrDigit(c))
144 GoLexer::TokenType GoLexer::DoOperator() {
145 TokenType t = TOK_INVALID;
146 if (m_end - m_src > 2) {
147 t = LookupKeyword(llvm::StringRef(m_src, 3));
148 if (t != TOK_INVALID)
151 if (t == TOK_INVALID && m_end - m_src > 1) {
152 t = LookupKeyword(llvm::StringRef(m_src, 2));
153 if (t != TOK_INVALID)
156 if (t == TOK_INVALID) {
157 t = LookupKeyword(llvm::StringRef(m_src, 1));
163 GoLexer::TokenType GoLexer::DoIdent() {
164 const char *start = m_src++;
165 while (m_src < m_end && IsLetterOrDigit(*m_src)) {
168 TokenType kw = LookupKeyword(llvm::StringRef(start, m_src - start));
169 if (kw != TOK_INVALID)
171 return TOK_IDENTIFIER;
174 GoLexer::TokenType GoLexer::DoNumber() {
175 if (m_src[0] == '0' && (m_src[1] == 'x' || m_src[1] == 'X')) {
177 while (IsHexChar(*m_src))
184 while (IsDecimal(*m_src))
189 return LIT_IMAGINARY;
200 dot_ok = e_ok = false;
202 if (*m_src == '+' || *m_src == '-')
213 GoLexer::TokenType GoLexer::DoRune() {
214 while (++m_src < m_end) {
222 if (m_src[1] == '\n')
230 GoLexer::TokenType GoLexer::DoString() {
232 while (++m_src < m_end) {
240 while (++m_src < m_end) {
248 if (m_src[1] == '\n')
256 GoLexer::TokenType GoLexer::LookupKeyword(llvm::StringRef id) {
257 if (m_keywords == nullptr)
258 m_keywords = InitKeywords();
259 const auto &it = m_keywords->find(id);
260 if (it == m_keywords->end())
265 llvm::StringRef GoLexer::LookupToken(TokenType t) {
266 if (m_keywords == nullptr)
267 m_keywords = InitKeywords();
268 for (const auto &e : *m_keywords) {
269 if (e.getValue() == t)
275 llvm::StringMap<GoLexer::TokenType> *GoLexer::InitKeywords() {
276 auto &result = *new llvm::StringMap<TokenType>(128);
277 result["break"] = KEYWORD_BREAK;
278 result["default"] = KEYWORD_DEFAULT;
279 result["func"] = KEYWORD_FUNC;
280 result["interface"] = KEYWORD_INTERFACE;
281 result["select"] = KEYWORD_SELECT;
282 result["case"] = KEYWORD_CASE;
283 result["defer"] = KEYWORD_DEFER;
284 result["go"] = KEYWORD_GO;
285 result["map"] = KEYWORD_MAP;
286 result["struct"] = KEYWORD_STRUCT;
287 result["chan"] = KEYWORD_CHAN;
288 result["else"] = KEYWORD_ELSE;
289 result["goto"] = KEYWORD_GOTO;
290 result["package"] = KEYWORD_PACKAGE;
291 result["switch"] = KEYWORD_SWITCH;
292 result["const"] = KEYWORD_CONST;
293 result["fallthrough"] = KEYWORD_FALLTHROUGH;
294 result["if"] = KEYWORD_IF;
295 result["range"] = KEYWORD_RANGE;
296 result["type"] = KEYWORD_TYPE;
297 result["continue"] = KEYWORD_CONTINUE;
298 result["for"] = KEYWORD_FOR;
299 result["import"] = KEYWORD_IMPORT;
300 result["return"] = KEYWORD_RETURN;
301 result["var"] = KEYWORD_VAR;
302 result["+"] = OP_PLUS;
303 result["-"] = OP_MINUS;
304 result["*"] = OP_STAR;
305 result["/"] = OP_SLASH;
306 result["%"] = OP_PERCENT;
307 result["&"] = OP_AMP;
308 result["|"] = OP_PIPE;
309 result["^"] = OP_CARET;
310 result["<<"] = OP_LSHIFT;
311 result[">>"] = OP_RSHIFT;
312 result["&^"] = OP_AMP_CARET;
313 result["+="] = OP_PLUS_EQ;
314 result["-="] = OP_MINUS_EQ;
315 result["*="] = OP_STAR_EQ;
316 result["/="] = OP_SLASH_EQ;
317 result["%="] = OP_PERCENT_EQ;
318 result["&="] = OP_AMP_EQ;
319 result["|="] = OP_PIPE_EQ;
320 result["^="] = OP_CARET_EQ;
321 result["<<="] = OP_LSHIFT_EQ;
322 result[">>="] = OP_RSHIFT_EQ;
323 result["&^="] = OP_AMP_CARET_EQ;
324 result["&&"] = OP_AMP_AMP;
325 result["||"] = OP_PIPE_PIPE;
326 result["<-"] = OP_LT_MINUS;
327 result["++"] = OP_PLUS_PLUS;
328 result["--"] = OP_MINUS_MINUS;
329 result["=="] = OP_EQ_EQ;
333 result["!"] = OP_BANG;
334 result["!="] = OP_BANG_EQ;
335 result["<="] = OP_LT_EQ;
336 result[">="] = OP_GT_EQ;
337 result[":="] = OP_COLON_EQ;
338 result["..."] = OP_DOTS;
339 result["("] = OP_LPAREN;
340 result["["] = OP_LBRACK;
341 result["{"] = OP_LBRACE;
342 result[","] = OP_COMMA;
343 result["."] = OP_DOT;
344 result[")"] = OP_RPAREN;
345 result["]"] = OP_RBRACK;
346 result["}"] = OP_RBRACE;
347 result[";"] = OP_SEMICOLON;
348 result[":"] = OP_COLON;