587 lines
14 KiB
C
587 lines
14 KiB
C
|
// lex.h -- Go frontend lexer. -*- C++ -*-
|
||
|
|
||
|
// Copyright 2009 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
#ifndef GO_LEX_H
|
||
|
#define GO_LEX_H
|
||
|
|
||
|
#include <mpfr.h>
|
||
|
|
||
|
#include "operator.h"
|
||
|
#include "go-linemap.h"
|
||
|
|
||
|
struct Unicode_range;
|
||
|
|
||
|
// The keywords. These must be in sorted order, other than
|
||
|
// KEYWORD_INVALID. They must match the Keywords::mapping_ array in
|
||
|
// lex.cc.
|
||
|
|
||
|
enum Keyword
|
||
|
{
|
||
|
KEYWORD_INVALID, // Not a keyword.
|
||
|
KEYWORD_ASM,
|
||
|
KEYWORD_BREAK,
|
||
|
KEYWORD_CASE,
|
||
|
KEYWORD_CHAN,
|
||
|
KEYWORD_CONST,
|
||
|
KEYWORD_CONTINUE,
|
||
|
KEYWORD_DEFAULT,
|
||
|
KEYWORD_DEFER,
|
||
|
KEYWORD_ELSE,
|
||
|
KEYWORD_FALLTHROUGH,
|
||
|
KEYWORD_FOR,
|
||
|
KEYWORD_FUNC,
|
||
|
KEYWORD_GO,
|
||
|
KEYWORD_GOTO,
|
||
|
KEYWORD_IF,
|
||
|
KEYWORD_IMPORT,
|
||
|
KEYWORD_INTERFACE,
|
||
|
KEYWORD_MAP,
|
||
|
KEYWORD_PACKAGE,
|
||
|
KEYWORD_RANGE,
|
||
|
KEYWORD_RETURN,
|
||
|
KEYWORD_SELECT,
|
||
|
KEYWORD_STRUCT,
|
||
|
KEYWORD_SWITCH,
|
||
|
KEYWORD_TYPE,
|
||
|
KEYWORD_VAR
|
||
|
};
|
||
|
|
||
|
// Pragmas built from magic comments and recorded for functions.
|
||
|
// These are used as bits in a bitmask.
|
||
|
// The set of values is intended to be the same as the gc compiler.
|
||
|
|
||
|
enum GoPragma
|
||
|
{
|
||
|
GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor.
|
||
|
GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape.
|
||
|
GOPRAGMA_NORACE = 1 << 2, // No race detector.
|
||
|
GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack.
|
||
|
GOPRAGMA_NOINLINE = 1 << 4, // Do not inline.
|
||
|
GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack.
|
||
|
GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers.
|
||
|
GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees.
|
||
|
GOPRAGMA_YESWRITEBARRIERREC = 1 << 8, // Stops nowritebarrierrec.
|
||
|
GOPRAGMA_MARK = 1 << 9, // Marker for nowritebarrierrec.
|
||
|
GOPRAGMA_CGOUNSAFEARGS = 1 << 10, // Pointer to arg is pointer to all.
|
||
|
GOPRAGMA_UINTPTRESCAPES = 1 << 11, // uintptr(p) escapes.
|
||
|
GOPRAGMA_NOTINHEAP = 1 << 12 // type is not in heap.
|
||
|
};
|
||
|
|
||
|
// A token returned from the lexer.
|
||
|
|
||
|
class Token
|
||
|
{
|
||
|
public:
|
||
|
// Token classification.
|
||
|
enum Classification
|
||
|
{
|
||
|
// Token is invalid.
|
||
|
TOKEN_INVALID,
|
||
|
// Token indicates end of input.
|
||
|
TOKEN_EOF,
|
||
|
// Token is a keyword.
|
||
|
TOKEN_KEYWORD,
|
||
|
// Token is an identifier.
|
||
|
TOKEN_IDENTIFIER,
|
||
|
// Token is a string of characters.
|
||
|
TOKEN_STRING,
|
||
|
// Token is an operator.
|
||
|
TOKEN_OPERATOR,
|
||
|
// Token is a character constant.
|
||
|
TOKEN_CHARACTER,
|
||
|
// Token is an integer.
|
||
|
TOKEN_INTEGER,
|
||
|
// Token is a floating point number.
|
||
|
TOKEN_FLOAT,
|
||
|
// Token is an imaginary number.
|
||
|
TOKEN_IMAGINARY
|
||
|
};
|
||
|
|
||
|
~Token();
|
||
|
Token(const Token&);
|
||
|
Token& operator=(const Token&);
|
||
|
|
||
|
// Get token classification.
|
||
|
Classification
|
||
|
classification() const
|
||
|
{ return this->classification_; }
|
||
|
|
||
|
// Make a token for an invalid value.
|
||
|
static Token
|
||
|
make_invalid_token(Location location)
|
||
|
{ return Token(TOKEN_INVALID, location); }
|
||
|
|
||
|
// Make a token representing end of file.
|
||
|
static Token
|
||
|
make_eof_token(Location location)
|
||
|
{ return Token(TOKEN_EOF, location); }
|
||
|
|
||
|
// Make a keyword token.
|
||
|
static Token
|
||
|
make_keyword_token(Keyword keyword, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_KEYWORD, location);
|
||
|
tok.u_.keyword = keyword;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make an identifier token.
|
||
|
static Token
|
||
|
make_identifier_token(const std::string& value, bool is_exported,
|
||
|
Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_IDENTIFIER, location);
|
||
|
tok.u_.identifier_value.name = new std::string(value);
|
||
|
tok.u_.identifier_value.is_exported = is_exported;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make a quoted string token.
|
||
|
static Token
|
||
|
make_string_token(const std::string& value, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_STRING, location);
|
||
|
tok.u_.string_value = new std::string(value);
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make an operator token.
|
||
|
static Token
|
||
|
make_operator_token(Operator op, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_OPERATOR, location);
|
||
|
tok.u_.op = op;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make a character constant token.
|
||
|
static Token
|
||
|
make_character_token(mpz_t val, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_CHARACTER, location);
|
||
|
mpz_init(tok.u_.integer_value);
|
||
|
mpz_swap(tok.u_.integer_value, val);
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make an integer token.
|
||
|
static Token
|
||
|
make_integer_token(mpz_t val, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_INTEGER, location);
|
||
|
mpz_init(tok.u_.integer_value);
|
||
|
mpz_swap(tok.u_.integer_value, val);
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make a float token.
|
||
|
static Token
|
||
|
make_float_token(mpfr_t val, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_FLOAT, location);
|
||
|
mpfr_init(tok.u_.float_value);
|
||
|
mpfr_swap(tok.u_.float_value, val);
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Make a token for an imaginary number.
|
||
|
static Token
|
||
|
make_imaginary_token(mpfr_t val, Location location)
|
||
|
{
|
||
|
Token tok(TOKEN_IMAGINARY, location);
|
||
|
mpfr_init(tok.u_.float_value);
|
||
|
mpfr_swap(tok.u_.float_value, val);
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
// Get the location of the token.
|
||
|
Location
|
||
|
location() const
|
||
|
{ return this->location_; }
|
||
|
|
||
|
// Return whether this is an invalid token.
|
||
|
bool
|
||
|
is_invalid() const
|
||
|
{ return this->classification_ == TOKEN_INVALID; }
|
||
|
|
||
|
// Return whether this is the EOF token.
|
||
|
bool
|
||
|
is_eof() const
|
||
|
{ return this->classification_ == TOKEN_EOF; }
|
||
|
|
||
|
// Return the keyword value for a keyword token.
|
||
|
Keyword
|
||
|
keyword() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_KEYWORD);
|
||
|
return this->u_.keyword;
|
||
|
}
|
||
|
|
||
|
// Return whether this is an identifier.
|
||
|
bool
|
||
|
is_identifier() const
|
||
|
{ return this->classification_ == TOKEN_IDENTIFIER; }
|
||
|
|
||
|
// Return the identifier.
|
||
|
const std::string&
|
||
|
identifier() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_IDENTIFIER);
|
||
|
return *this->u_.identifier_value.name;
|
||
|
}
|
||
|
|
||
|
// Return whether the identifier is exported.
|
||
|
bool
|
||
|
is_identifier_exported() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_IDENTIFIER);
|
||
|
return this->u_.identifier_value.is_exported;
|
||
|
}
|
||
|
|
||
|
// Return whether this is a string.
|
||
|
bool
|
||
|
is_string() const
|
||
|
{
|
||
|
return this->classification_ == TOKEN_STRING;
|
||
|
}
|
||
|
|
||
|
// Return the value of a string. The returned value is a string of
|
||
|
// UTF-8 characters.
|
||
|
std::string
|
||
|
string_value() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_STRING);
|
||
|
return *this->u_.string_value;
|
||
|
}
|
||
|
|
||
|
// Return the value of a character constant.
|
||
|
const mpz_t*
|
||
|
character_value() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_CHARACTER);
|
||
|
return &this->u_.integer_value;
|
||
|
}
|
||
|
|
||
|
// Return the value of an integer.
|
||
|
const mpz_t*
|
||
|
integer_value() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_INTEGER);
|
||
|
return &this->u_.integer_value;
|
||
|
}
|
||
|
|
||
|
// Return the value of a float.
|
||
|
const mpfr_t*
|
||
|
float_value() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_FLOAT);
|
||
|
return &this->u_.float_value;
|
||
|
}
|
||
|
|
||
|
// Return the value of an imaginary number.
|
||
|
const mpfr_t*
|
||
|
imaginary_value() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_IMAGINARY);
|
||
|
return &this->u_.float_value;
|
||
|
}
|
||
|
|
||
|
// Return the operator value for an operator token.
|
||
|
Operator
|
||
|
op() const
|
||
|
{
|
||
|
go_assert(this->classification_ == TOKEN_OPERATOR);
|
||
|
return this->u_.op;
|
||
|
}
|
||
|
|
||
|
// Return whether this token is KEYWORD.
|
||
|
bool
|
||
|
is_keyword(Keyword keyword) const
|
||
|
{
|
||
|
return (this->classification_ == TOKEN_KEYWORD
|
||
|
&& this->u_.keyword == keyword);
|
||
|
}
|
||
|
|
||
|
// Return whether this token is OP.
|
||
|
bool
|
||
|
is_op(Operator op) const
|
||
|
{ return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; }
|
||
|
|
||
|
// Print the token for debugging.
|
||
|
void
|
||
|
print(FILE*) const;
|
||
|
|
||
|
private:
|
||
|
// Private constructor used by make_..._token functions above.
|
||
|
Token(Classification, Location);
|
||
|
|
||
|
// Clear the token.
|
||
|
void
|
||
|
clear();
|
||
|
|
||
|
// The token classification.
|
||
|
Classification classification_;
|
||
|
union
|
||
|
{
|
||
|
// The keyword value for TOKEN_KEYWORD.
|
||
|
Keyword keyword;
|
||
|
// The token value for TOKEN_IDENTIFIER.
|
||
|
struct
|
||
|
{
|
||
|
// The name of the identifier. This has been mangled to only
|
||
|
// include ASCII characters.
|
||
|
std::string* name;
|
||
|
// Whether this name should be exported. This is true if the
|
||
|
// first letter in the name is upper case.
|
||
|
bool is_exported;
|
||
|
} identifier_value;
|
||
|
// The string value for TOKEN_STRING.
|
||
|
std::string* string_value;
|
||
|
// The token value for TOKEN_CHARACTER or TOKEN_INTEGER.
|
||
|
mpz_t integer_value;
|
||
|
// The token value for TOKEN_FLOAT or TOKEN_IMAGINARY.
|
||
|
mpfr_t float_value;
|
||
|
// The token value for TOKEN_OPERATOR or the keyword value
|
||
|
Operator op;
|
||
|
} u_;
|
||
|
// The source location.
|
||
|
Location location_;
|
||
|
};
|
||
|
|
||
|
// The lexer itself.
|
||
|
|
||
|
class Lex
|
||
|
{
|
||
|
public:
|
||
|
Lex(const char* input_file_name, FILE* input_file, Linemap *linemap);
|
||
|
|
||
|
~Lex();
|
||
|
|
||
|
// Return the next token.
|
||
|
Token
|
||
|
next_token();
|
||
|
|
||
|
// Return the contents of any current //extern comment.
|
||
|
const std::string&
|
||
|
extern_name() const
|
||
|
{ return this->extern_; }
|
||
|
|
||
|
// Return the current set of pragmas, and clear them.
|
||
|
unsigned int
|
||
|
get_and_clear_pragmas()
|
||
|
{
|
||
|
unsigned int ret = this->pragmas_;
|
||
|
this->pragmas_ = 0;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
struct Linkname
|
||
|
{
|
||
|
std::string ext_name; // External name; empty to just export.
|
||
|
bool is_exported; // Whether the internal name is exported.
|
||
|
Location loc; // Location of go:linkname directive.
|
||
|
|
||
|
Linkname()
|
||
|
: ext_name(), is_exported(false), loc()
|
||
|
{ }
|
||
|
|
||
|
Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a)
|
||
|
: ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a)
|
||
|
{ }
|
||
|
};
|
||
|
|
||
|
typedef std::map<std::string, Linkname> Linknames;
|
||
|
|
||
|
// Return the linknames seen so far, or NULL if none, and clear the
|
||
|
// set. These are from go:linkname compiler directives.
|
||
|
Linknames*
|
||
|
get_and_clear_linknames()
|
||
|
{
|
||
|
Linknames* ret = this->linknames_;
|
||
|
this->linknames_ = NULL;
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
// Return whether there are any current go:embed patterns.
|
||
|
bool
|
||
|
has_embeds() const
|
||
|
{ return !this->embeds_.empty(); }
|
||
|
|
||
|
// If there are any go:embed patterns seen so far, store them in
|
||
|
// *EMBEDS and clear the saved set. *EMBEDS must be an empty
|
||
|
// vector.
|
||
|
void
|
||
|
get_and_clear_embeds(std::vector<std::string>* embeds)
|
||
|
{
|
||
|
go_assert(embeds->empty());
|
||
|
std::swap(*embeds, this->embeds_);
|
||
|
}
|
||
|
|
||
|
// Return whether the identifier NAME should be exported. NAME is a
|
||
|
// mangled name which includes only ASCII characters.
|
||
|
static bool
|
||
|
is_exported_mangled_name(const std::string& name);
|
||
|
|
||
|
// Return whether the identifier NAME should be exported. NAME is
|
||
|
// an unmangled utf-8 string and may contain non-ASCII characters.
|
||
|
static bool
|
||
|
is_exported_name(const std::string& name);
|
||
|
|
||
|
// Return whether the identifier NAME is invalid. When we see an
|
||
|
// invalid character we still build an identifier, but we use a
|
||
|
// magic string to indicate that the identifier is invalid. We then
|
||
|
// use this to avoid knockon errors.
|
||
|
static bool
|
||
|
is_invalid_identifier(const std::string& name);
|
||
|
|
||
|
// A helper function. Append V to STR. IS_CHARACTER is true if V
|
||
|
// is a Unicode character which should be converted into UTF-8,
|
||
|
// false if it is a byte value to be appended directly. The
|
||
|
// location is used to warn about an out of range character.
|
||
|
static void
|
||
|
append_char(unsigned int v, bool is_charater, std::string* str,
|
||
|
Location);
|
||
|
|
||
|
// A helper function. Fetch a UTF-8 character from STR and store it
|
||
|
// in *VALUE. Return the number of bytes read from STR. Return 0
|
||
|
// if STR does not point to a valid UTF-8 character.
|
||
|
static int
|
||
|
fetch_char(const char* str, unsigned int *value);
|
||
|
|
||
|
// Return whether C is a Unicode or "C" locale space character.
|
||
|
static bool
|
||
|
is_unicode_space(unsigned int c);
|
||
|
|
||
|
// Convert the specified hex char into an unsigned integer value.
|
||
|
static unsigned
|
||
|
hex_val(char c);
|
||
|
|
||
|
private:
|
||
|
ssize_t
|
||
|
get_line();
|
||
|
|
||
|
bool
|
||
|
require_line();
|
||
|
|
||
|
// The current location.
|
||
|
Location
|
||
|
location() const;
|
||
|
|
||
|
// A position CHARS column positions before the current location.
|
||
|
Location
|
||
|
earlier_location(int chars) const;
|
||
|
|
||
|
static bool
|
||
|
is_hex_digit(char);
|
||
|
|
||
|
static bool
|
||
|
is_base_digit(int base, char);
|
||
|
|
||
|
static unsigned char
|
||
|
octal_value(char c)
|
||
|
{ return c - '0'; }
|
||
|
|
||
|
Token
|
||
|
make_invalid_token()
|
||
|
{ return Token::make_invalid_token(this->location()); }
|
||
|
|
||
|
Token
|
||
|
make_eof_token()
|
||
|
{ return Token::make_eof_token(this->location()); }
|
||
|
|
||
|
Token
|
||
|
make_operator(Operator op, int chars)
|
||
|
{ return Token::make_operator_token(op, this->earlier_location(chars)); }
|
||
|
|
||
|
Token
|
||
|
gather_identifier();
|
||
|
|
||
|
static bool
|
||
|
could_be_exponent(int base, const char*, const char*);
|
||
|
|
||
|
Token
|
||
|
gather_number();
|
||
|
|
||
|
void
|
||
|
skip_exponent();
|
||
|
|
||
|
Token
|
||
|
gather_character();
|
||
|
|
||
|
Token
|
||
|
gather_string();
|
||
|
|
||
|
Token
|
||
|
gather_raw_string();
|
||
|
|
||
|
const char*
|
||
|
advance_one_utf8_char(const char*, unsigned int*, bool*);
|
||
|
|
||
|
const char*
|
||
|
advance_one_char(const char*, bool, unsigned int*, bool*);
|
||
|
|
||
|
static bool
|
||
|
is_unicode_digit(unsigned int c);
|
||
|
|
||
|
static bool
|
||
|
is_unicode_letter(unsigned int c);
|
||
|
|
||
|
static bool
|
||
|
is_unicode_uppercase(unsigned int c);
|
||
|
|
||
|
static bool
|
||
|
is_in_unicode_range(unsigned int C, const Unicode_range* ranges,
|
||
|
size_t range_size);
|
||
|
|
||
|
Operator
|
||
|
three_character_operator(char, char, char);
|
||
|
|
||
|
Operator
|
||
|
two_character_operator(char, char);
|
||
|
|
||
|
Operator
|
||
|
one_character_operator(char);
|
||
|
|
||
|
bool
|
||
|
skip_c_comment(bool* found_newline);
|
||
|
|
||
|
void
|
||
|
skip_cpp_comment();
|
||
|
|
||
|
void
|
||
|
gather_embed(const char*, const char*);
|
||
|
|
||
|
// The input file name.
|
||
|
const char* input_file_name_;
|
||
|
// The input file.
|
||
|
FILE* input_file_;
|
||
|
// The object used to keep track of file names and line numbers.
|
||
|
Linemap* linemap_;
|
||
|
// The line buffer. This holds the current line.
|
||
|
char* linebuf_;
|
||
|
// The size of the line buffer.
|
||
|
size_t linebufsize_;
|
||
|
// The nmber of characters in the current line.
|
||
|
size_t linesize_;
|
||
|
// The current offset in linebuf_.
|
||
|
size_t lineoff_;
|
||
|
// The current line number.
|
||
|
size_t lineno_;
|
||
|
// Whether to add a semicolon if we see a newline now.
|
||
|
bool add_semi_at_eol_;
|
||
|
// Pragmas for the next function, from magic comments.
|
||
|
unsigned int pragmas_;
|
||
|
// The external name to use for a function declaration, from a magic
|
||
|
// //extern comment.
|
||
|
std::string extern_;
|
||
|
// The list of //go:linkname comments, if any.
|
||
|
Linknames* linknames_;
|
||
|
// The list of //go:embed patterns, if any.
|
||
|
std::vector<std::string> embeds_;
|
||
|
};
|
||
|
|
||
|
#endif // !defined(GO_LEX_H)
|