// embed.cc -- Go frontend go:embed handling. // Copyright 2021 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go-system.h" #include "operator.h" #include "go-diagnostics.h" #include "lex.h" #include "types.h" #include "expressions.h" #include "gogo.h" #ifndef O_BINARY #define O_BINARY 0 #endif // Read a file into *DATA. Returns false on error. static bool read_file(const char* filename, Location loc, std::string* data) { int fd = open(filename, O_RDONLY | O_BINARY); if (fd < 0) { go_error_at(loc, "%s: %m", filename); return false; } struct stat st; if (fstat(fd, &st) < 0) { go_error_at(loc, "%s: %m", filename); return false; } off_t want = st.st_size; // Most files read here are going to be incorporated into the object file // and then the executable. Set a limit on the size we will accept. if (want > 2000000000) { go_error_at(loc, "%s: file too large", filename); return false; } data->resize(want); off_t got = 0; while (want > 0) { // C++11 requires that std::string use contiguous bytes, so this // is safe. ssize_t n = read(fd, &(*data)[got], want); if (n < 0) { close(fd); go_error_at(loc, "%s: %m", filename); return false; } if (n == 0) { data->resize(got); break; } got += n; want -= n; } close(fd); return true; } // A JSON value as read from an embedcfg file. For our purposes a // JSON value is a string, or a list of strings, or a mapping from // strings to values. We don't expect any numbers. We also don't // expect an array of anything other than strings; that is, we don't // accept an array of general JSON values. class Json_value { public: // The types of values. enum Json_value_classification { JSON_VALUE_UNKNOWN, JSON_VALUE_STRING, JSON_VALUE_ARRAY, JSON_VALUE_MAP }; Json_value() : classification_(JSON_VALUE_UNKNOWN), string_(), array_(), map_() { } ~Json_value(); Json_value_classification classification() const { return this->classification_; } // Set to a string value. void set_string(const std::string& str) { go_assert(this->classification_ == JSON_VALUE_UNKNOWN); this->classification_ = JSON_VALUE_STRING; this->string_ = str; } // Start an array value. void start_array() { go_assert(this->classification_ == JSON_VALUE_UNKNOWN); this->classification_ = JSON_VALUE_ARRAY; } // Add an array entry. void add_array_entry(const std::string& s) { go_assert(this->classification_ == JSON_VALUE_ARRAY); this->array_.push_back(s); } // Start a map value. void start_map() { go_assert(this->classification_ == JSON_VALUE_UNKNOWN); this->classification_ = JSON_VALUE_MAP; } // Add a map entry. void add_map_entry(const std::string& key, Json_value* val) { go_assert(this->classification_ == JSON_VALUE_MAP); this->map_[key] = val; } // Return the strings from a string value. const std::string& to_string() const { go_assert(this->classification_ == JSON_VALUE_STRING); return this->string_; } // Fetch a vector of strings, and drop them from the JSON value. void get_and_clear_array(std::vector* v) { go_assert(this->classification_ == JSON_VALUE_ARRAY); std::swap(*v, this->array_); } // Look up a map entry. Returns NULL if not found. Json_value* lookup_map_entry(const std::string& key); // Iterate over a map. typedef Unordered_map(std::string, Json_value*)::iterator map_iterator; map_iterator map_begin() { go_assert(this->classification_ == JSON_VALUE_MAP); return this->map_.begin(); } map_iterator map_end() { return this->map_.end(); } private: // Classification. Json_value_classification classification_; // A string, for JSON_VALUE_STRING. std::string string_; // Array, for JSON_VALUE_ARRAY. std::vector array_; // Mapping, for JSON_VALUE_MAP. Unordered_map(std::string, Json_value*) map_; }; // Delete a JSON value. Json_value::~Json_value() { if (this->classification_ == JSON_VALUE_MAP) { for (map_iterator p = this->map_begin(); p != this->map_end(); ++p) delete p->second; } } // Look up a map entry in a JSON value. Json_value* Json_value::lookup_map_entry(const std::string& key) { go_assert(this->classification_ == JSON_VALUE_MAP); Unordered_map(std::string, Json_value*)::iterator p = this->map_.find(key); if (p == this->map_.end()) return NULL; return p->second; } // Manage reading the embedcfg file. class Embedcfg_reader { public: Embedcfg_reader(const char* filename) : filename_(filename), data_(), p_(NULL), pend_(NULL) {} // Read the contents of FILENAME. Return whether it succeeded. bool initialize_from_file(); // Read a JSON object. bool read_object(Json_value*); // Report an error if not at EOF. void check_eof(); // Report an error for the embedcfg file. void error(const char* msg); private: bool read_value(Json_value*); bool read_array(Json_value*); bool read_string(std::string*); bool skip_whitespace(bool eof_ok); // File name. const char* filename_; // File contents. std::string data_; // Next character to process. const char *p_; // End of data. const char *pend_; }; // Read the embedcfg file. void Gogo::read_embedcfg(const char *filename) { class Embedcfg_reader r(filename); if (!r.initialize_from_file()) return; Json_value val; if (!r.read_object(&val)) return; r.check_eof(); if (val.classification() != Json_value::JSON_VALUE_MAP) { r.error("invalid embedcfg: not a JSON object"); return; } Json_value* patterns = val.lookup_map_entry("Patterns"); if (patterns == NULL) { r.error("invalid embedcfg: missing Patterns"); return; } if (patterns->classification() != Json_value::JSON_VALUE_MAP) { r.error("invalid embedcfg: Patterns is not a JSON object"); return; } Json_value* files = val.lookup_map_entry("Files"); if (files == NULL) { r.error("invalid embedcfg: missing Files"); return; } if (files->classification() != Json_value::JSON_VALUE_MAP) { r.error("invalid embedcfg: Files is not a JSON object"); return; } for (Json_value::map_iterator p = patterns->map_begin(); p != patterns->map_end(); ++p) { if (p->second->classification() != Json_value::JSON_VALUE_ARRAY) { r.error("invalid embedcfg: Patterns entry is not an array"); return; } std::vector files; p->second->get_and_clear_array(&files); std::pair > val; val.first = p->first; std::pair ins = this->embed_patterns_.insert(val); if (!ins.second) { r.error("invalid embedcfg: duplicate Patterns entry"); return; } std::swap(ins.first->second, files); } for (Json_value::map_iterator p = files->map_begin(); p != files->map_end(); ++p) { if (p->second->classification() != Json_value::JSON_VALUE_STRING) { r.error("invalid embedcfg: Files entry is not a string"); return; } this->embed_files_[p->first] = p->second->to_string(); } } // Read the contents of FILENAME into this->data_. Returns whether it // succeeded. bool Embedcfg_reader::initialize_from_file() { if (!read_file(this->filename_, Linemap::unknown_location(), &this->data_)) return false; if (this->data_.empty()) { this->error("empty file"); return false; } this->p_ = this->data_.data(); this->pend_ = this->p_ + this->data_.size(); return true; } // Read a JSON object into VAL. Return whether it succeeded. bool Embedcfg_reader::read_object(Json_value* val) { if (!this->skip_whitespace(false)) return false; if (*this->p_ != '{') { this->error("expected %<{%>"); return false; } ++this->p_; val->start_map(); if (!this->skip_whitespace(false)) return false; if (*this->p_ == '}') { ++this->p_; return true; } while (true) { if (!this->skip_whitespace(false)) return false; if (*this->p_ != '"') { this->error("expected %<\"%>"); return false; } std::string key; if (!this->read_string(&key)) return false; if (!this->skip_whitespace(false)) return false; if (*this->p_ != ':') { this->error("expected %<:%>"); return false; } ++this->p_; Json_value* subval = new Json_value(); if (!this->read_value(subval)) return false; val->add_map_entry(key, subval); if (!this->skip_whitespace(false)) return false; if (*this->p_ == '}') { ++this->p_; return true; } if (*this->p_ != ',') { this->error("expected %<,%> or %<}%>"); return false; } ++this->p_; } } // Read a JSON array into VAL. Return whether it succeeded. bool Embedcfg_reader::read_array(Json_value* val) { if (!this->skip_whitespace(false)) return false; if (*this->p_ != '[') { this->error("expected %<[%>"); return false; } ++this->p_; val->start_array(); if (!this->skip_whitespace(false)) return false; if (*this->p_ == ']') { ++this->p_; return true; } while (true) { // If we were parsing full JSON we would call read_value here, // not read_string. std::string s; if (!this->read_string(&s)) return false; val->add_array_entry(s); if (!this->skip_whitespace(false)) return false; if (*this->p_ == ']') { ++this->p_; return true; } if (*this->p_ != ',') { this->error("expected %<,%> or %<]%>"); return false; } ++this->p_; } } // Read a JSON value into VAL. Return whether it succeeded. bool Embedcfg_reader::read_value(Json_value* val) { if (!this->skip_whitespace(false)) return false; switch (*this->p_) { case '"': { std::string s; if (!this->read_string(&s)) return false; val->set_string(s); return true; } case '{': return this->read_object(val); case '[': return this->read_array(val); default: this->error("invalid JSON syntax"); return false; } } // Read a JSON string. Return whether it succeeded. bool Embedcfg_reader::read_string(std::string* str) { if (!this->skip_whitespace(false)) return false; if (*this->p_ != '"') { this->error("expected %<\"%>"); return false; } ++this->p_; str->clear(); while (this->p_ < this->pend_ && *this->p_ != '"') { if (*this->p_ != '\\') { str->push_back(*this->p_); ++this->p_; continue; } ++this->p_; if (this->p_ >= this->pend_) { this->error("unterminated string"); return false; } switch (*this->p_) { case '"': case '\\': case '/': str->push_back(*this->p_); ++this->p_; break; case 'b': str->push_back('\b'); ++this->p_; break; case 'f': str->push_back('\f'); ++this->p_; break; case 'n': str->push_back('\n'); ++this->p_; break; case 'r': str->push_back('\r'); ++this->p_; break; case 't': str->push_back('\t'); ++this->p_; break; case 'u': { ++this->p_; unsigned int rune = 0; for (int i = 0; i < 4; i++) { if (this->p_ >= this->pend_) { this->error("unterminated string"); return false; } unsigned char c = *this->p_; ++this->p_; rune <<= 4; if (c >= '0' && c <= '9') rune += c - '0'; else if (c >= 'A' && c <= 'F') rune += c - 'A' + 10; else if (c >= 'a' && c <= 'f') rune += c - 'a' + 10; else { this->error("invalid hex digit"); return false; } } Lex::append_char(rune, false, str, Linemap::unknown_location()); } break; default: this->error("unrecognized string escape"); return false; } } if (*this->p_ == '"') { ++this->p_; return true; } this->error("unterminated string"); return false; } // Report an error if not at EOF. void Embedcfg_reader::check_eof() { if (this->skip_whitespace(true)) this->error("extraneous data at end of file"); } // Skip whitespace. Return whether there is more to read. bool Embedcfg_reader::skip_whitespace(bool eof_ok) { while (this->p_ < this->pend_) { switch (*this->p_) { case ' ': case '\t': case '\n': case '\r': ++this->p_; break; default: return true; } } if (!eof_ok) this->error("unexpected EOF"); return false; } // Report an error. void Embedcfg_reader::error(const char* msg) { if (!this->data_.empty() && this->p_ != NULL) go_error_at(Linemap::unknown_location(), "%<-fgo-embedcfg%>: %s: %lu: %s", this->filename_, static_cast(this->p_ - this->data_.data()), msg); else go_error_at(Linemap::unknown_location(), "%<-fgo-embedcfg%>: %s: %s", this->filename_, msg); } // Implement the sort order for a list of embedded files, as discussed // at the docs for embed.FS. class Embedfs_sort { public: bool operator()(const std::string& p1, const std::string& p2) const; private: void split(const std::string&, size_t*, size_t*, size_t*) const; }; bool Embedfs_sort::operator()(const std::string& p1, const std::string& p2) const { size_t dirlen1, elem1, elemlen1; this->split(p1, &dirlen1, &elem1, &elemlen1); size_t dirlen2, elem2, elemlen2; this->split(p2, &dirlen2, &elem2, &elemlen2); if (dirlen1 == 0) { if (dirlen2 > 0) { int i = p2.compare(0, dirlen2, "."); if (i != 0) return i > 0; } } else if (dirlen2 == 0) { int i = p1.compare(0, dirlen1, "."); if (i != 0) return i < 0; } else { int i = p1.compare(0, dirlen1, p2, 0, dirlen2); if (i != 0) return i < 0; } int i = p1.compare(elem1, elemlen1, p2, elem2, elemlen2); return i < 0; } // Pick out the directory and file name components for comparison. void Embedfs_sort::split(const std::string& s, size_t* dirlen, size_t* elem, size_t* elemlen) const { size_t len = s.size(); if (len > 0 && s[len - 1] == '/') --len; size_t slash = s.rfind('/', len - 1); if (slash == std::string::npos) { *dirlen = 0; *elem = 0; *elemlen = len; } else { *dirlen = slash; *elem = slash + 1; *elemlen = len - (slash + 1); } } // Convert the go:embed directives for a variable into an initializer // for that variable. Expression* Gogo::initializer_for_embeds(Type* type, const std::vector* embeds, Location loc) { if (this->embed_patterns_.empty()) { go_error_at(loc, ("invalid go:embed: build system did not " "supply embed configuration")); return Expression::make_error(loc); } type = type->unalias(); enum { EMBED_STRING = 0, EMBED_BYTES = 1, EMBED_FS = 2 } embed_kind; const Named_type* nt = type->named_type(); if (nt != NULL && nt->named_object()->package() != NULL && nt->named_object()->package()->pkgpath() == "embed" && nt->name() == "FS") embed_kind = EMBED_FS; else if (type->is_string_type()) embed_kind = EMBED_STRING; else if (type->is_slice_type() && type->array_type()->element_type()->integer_type() != NULL && type->array_type()->element_type()->integer_type()->is_byte()) embed_kind = EMBED_BYTES; else { go_error_at(loc, "invalid type for go:embed"); return Expression::make_error(loc); } // The patterns in the go:embed directive(s) are in EMBEDS. Find // them in the patterns in the embedcfg file. Unordered_set(std::string) have; std::vector paths; for (std::vector::const_iterator pe = embeds->begin(); pe != embeds->end(); pe++) { Embed_patterns::const_iterator pp = this->embed_patterns_.find(*pe); if (pp == this->embed_patterns_.end()) { go_error_at(loc, ("invalid go:embed: build system did not " "map pattern %<%s%>"), pe->c_str()); continue; } // Each pattern in the embedcfg file maps to a list of file // names. Add those file names to PATHS. for (std::vector::const_iterator pf = pp->second.begin(); pf != pp->second.end(); pf++) { if (this->embed_files_.find(*pf) == this->embed_files_.end()) { go_error_at(loc, ("invalid go:embed: build system did not " "map file %<%s%>"), pf->c_str()); continue; } std::pair ins = have.insert(*pf); if (ins.second) { const std::string& path(*pf); paths.push_back(path); if (embed_kind == EMBED_FS) { // Add each required directory, with a trailing slash. size_t i = std::string::npos; while (i > 0) { i = path.rfind('/', i); if (i == std::string::npos) break; std::string dir = path.substr(0, i + 1); ins = have.insert(dir); if (ins.second) paths.push_back(dir); --i; } } } } } if (embed_kind == EMBED_STRING || embed_kind == EMBED_BYTES) { if (paths.size() > 1) { go_error_at(loc, ("invalid go:embed: multiple files for " "string or byte slice"));; return Expression::make_error(loc); } std::string data; if (!read_file(this->embed_files_[paths[0]].c_str(), loc, &data)) return Expression::make_error(loc); Expression* e = Expression::make_string(data, loc); if (embed_kind == EMBED_BYTES) e = Expression::make_cast(type, e, loc); return e; } std::sort(paths.begin(), paths.end(), Embedfs_sort()); if (type->struct_type() == NULL || type->struct_type()->field_count() != 1) { go_error_at(loc, ("internal error: embed.FS should be struct type " "with one field")); return Expression::make_error(loc); } Type* ptr_type = type->struct_type()->field(0)->type(); if (ptr_type->points_to() == NULL) { go_error_at(loc, "internal error: embed.FS struct field should be pointer"); return Expression::make_error(loc); } Type* slice_type = ptr_type->points_to(); if (!slice_type->is_slice_type()) { go_error_at(loc, ("internal error: embed.FS struct field should be " "pointer to slice")); return Expression::make_error(loc); } Type* file_type = slice_type->array_type()->element_type(); if (file_type->struct_type() == NULL || (file_type->struct_type()->find_local_field(".embed.name", NULL) == NULL) || (file_type->struct_type()->find_local_field(".embed.data", NULL) == NULL)) { go_error_at(loc, ("internal error: embed.FS slice element should be struct " "with name and data fields")); return Expression::make_error(loc); } const Struct_field_list* file_fields = file_type->struct_type()->fields(); Expression_list* file_vals = new(Expression_list); file_vals->reserve(paths.size()); for (std::vector::const_iterator pp = paths.begin(); pp != paths.end(); ++pp) { std::string data; if ((*pp)[pp->size() - 1] != '/') { if (!read_file(this->embed_files_[*pp].c_str(), loc, &data)) return Expression::make_error(loc); } Expression_list* field_vals = new(Expression_list); for (Struct_field_list::const_iterator pf = file_fields->begin(); pf != file_fields->end(); ++pf) { if (pf->is_field_name(".embed.name")) field_vals->push_back(Expression::make_string(*pp, loc)); else if (pf->is_field_name(".embed.data")) field_vals->push_back(Expression::make_string(data, loc)); else { // FIXME: The embed.file type has a hash field, which is // currently unused. We should fill it in, but don't. // The hash is a SHA256, and we don't have convenient // SHA256 code. Do this later when the field is // actually used. field_vals->push_back(NULL); } } Expression* file_val = Expression::make_struct_composite_literal(file_type, field_vals, loc); file_vals->push_back(file_val); } Expression* slice_init = Expression::make_slice_composite_literal(slice_type, file_vals, loc); Expression* fs_init = Expression::make_heap_expression(slice_init, loc); Expression_list* fs_vals = new Expression_list(); fs_vals->push_back(fs_init); return Expression::make_struct_composite_literal(type, fs_vals, loc); }