stringpool.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. // stringpool.h -- a string pool for gold -*- C++ -*-
  2. // Copyright (C) 2006-2022 Free Software Foundation, Inc.
  3. // Written by Ian Lance Taylor <iant@google.com>.
  4. // This file is part of gold.
  5. // This program is free software; you can redistribute it and/or modify
  6. // it under the terms of the GNU General Public License as published by
  7. // the Free Software Foundation; either version 3 of the License, or
  8. // (at your option) any later version.
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  16. // MA 02110-1301, USA.
  17. #include <string>
  18. #include <list>
  19. #include <vector>
  20. #ifndef GOLD_STRINGPOOL_H
  21. #define GOLD_STRINGPOOL_H
  22. namespace gold
  23. {
  24. class Output_file;
  25. // Return the length of a string in units of Char_type.
  26. template<typename Char_type>
  27. inline size_t
  28. string_length(const Char_type* p)
  29. {
  30. size_t len = 0;
  31. for (; *p != 0; ++p)
  32. ++len;
  33. return len;
  34. }
  35. // Specialize string_length for char. Maybe we could just use
  36. // std::char_traits<>::length?
  37. template<>
  38. inline size_t
  39. string_length(const char* p)
  40. {
  41. return strlen(p);
  42. }
  43. // A Stringpool is a pool of unique strings. It provides the
  44. // following features:
  45. // Every string in the pool is unique. Thus, if you have two strings
  46. // in the Stringpool, you can compare them for equality by using
  47. // pointer comparison rather than string comparison.
  48. // There is a key associated with every string in the pool. If you
  49. // add strings to the Stringpool in the same order, then the key for
  50. // each string will always be the same for any run of the linker.
  51. // This is not true of the string pointers themselves, as they may
  52. // change due to address space randomization. Some parts of the
  53. // linker (e.g., the symbol table) use the key value instead of the
  54. // string pointer so that repeated runs of the linker will generate
  55. // precisely the same output.
  56. // When you add a string to a Stringpool, Stringpool will optionally
  57. // make a copy of it. Thus there is no requirement to keep a copy
  58. // elsewhere.
  59. // A Stringpool can be turned into a string table, a sequential series
  60. // of null terminated strings. The first string may optionally be a
  61. // single zero byte, as required for SHT_STRTAB sections. This
  62. // conversion is only permitted after all strings have been added to
  63. // the Stringpool. After doing this conversion, you can ask for the
  64. // offset of any string (or any key) in the stringpool in the string
  65. // table, and you can write the resulting string table to an output
  66. // file.
  67. // When a Stringpool is turned into a string table, then as an
  68. // optimization it will reuse string suffixes to avoid duplicating
  69. // strings. That is, given the strings "abc" and "bc", only the
  70. // string "abc" will be stored, and "bc" will be represented by an
  71. // offset into the middle of the string "abc".
  72. // A simple chunked vector class--this is a subset of std::vector
  73. // which stores memory in chunks. We don't provide iterators, because
  74. // we don't need them.
  75. template<typename Element>
  76. class Chunked_vector
  77. {
  78. public:
  79. Chunked_vector()
  80. : chunks_(), size_(0)
  81. { }
  82. // Clear the elements.
  83. void
  84. clear()
  85. {
  86. this->chunks_.clear();
  87. this->size_ = 0;
  88. }
  89. // Reserve elements.
  90. void
  91. reserve(unsigned int n)
  92. {
  93. if (n > this->chunks_.size() * chunk_size)
  94. {
  95. this->chunks_.resize((n + chunk_size - 1) / chunk_size);
  96. // We need to call reserve() of all chunks since changing
  97. // this->chunks_ causes Element_vectors to be copied. The
  98. // reserved capacity of an Element_vector may be lost in copying.
  99. for (size_t i = 0; i < this->chunks_.size(); ++i)
  100. this->chunks_[i].reserve(chunk_size);
  101. }
  102. }
  103. // Get the number of elements.
  104. size_t
  105. size() const
  106. { return this->size_; }
  107. // Push a new element on the back of the vector.
  108. void
  109. push_back(const Element& element)
  110. {
  111. size_t chunk_index = this->size_ / chunk_size;
  112. if (chunk_index >= this->chunks_.size())
  113. {
  114. this->chunks_.push_back(Element_vector());
  115. this->chunks_.back().reserve(chunk_size);
  116. gold_assert(chunk_index < this->chunks_.size());
  117. }
  118. this->chunks_[chunk_index].push_back(element);
  119. this->size_++;
  120. }
  121. // Return a reference to an entry in the vector.
  122. Element&
  123. operator[](size_t i)
  124. { return this->chunks_[i / chunk_size][i % chunk_size]; }
  125. const Element&
  126. operator[](size_t i) const
  127. { return this->chunks_[i / chunk_size][i % chunk_size]; }
  128. private:
  129. static const unsigned int chunk_size = 8192;
  130. typedef std::vector<Element> Element_vector;
  131. typedef std::vector<Element_vector> Chunk_vector;
  132. Chunk_vector chunks_;
  133. size_t size_;
  134. };
  135. // Stringpools are implemented in terms of Stringpool_template, which
  136. // is generalized on the type of character used for the strings. Most
  137. // uses will want the Stringpool type which uses char. Other cases
  138. // are used for merging wide string constants.
  139. template<typename Stringpool_char>
  140. class Stringpool_template
  141. {
  142. public:
  143. // The type of a key into the stringpool. As described above, a key
  144. // value will always be the same during any run of the linker. Zero
  145. // is never a valid key value.
  146. typedef size_t Key;
  147. // Create a Stringpool.
  148. Stringpool_template(uint64_t addralign = 1);
  149. ~Stringpool_template();
  150. // Clear all the data from the stringpool.
  151. void
  152. clear();
  153. // Hint to the stringpool class that you intend to insert n additional
  154. // elements. The stringpool class can use this info however it likes;
  155. // in practice it will resize its internal hashtables to make room.
  156. void
  157. reserve(unsigned int n);
  158. // Indicate that we should not reserve offset 0 to hold the empty
  159. // string when converting the stringpool to a string table. This
  160. // should not be called for a proper ELF SHT_STRTAB section.
  161. void
  162. set_no_zero_null()
  163. {
  164. gold_assert(this->string_set_.empty()
  165. && this->offset_ == sizeof(Stringpool_char));
  166. this->zero_null_ = false;
  167. this->offset_ = 0;
  168. }
  169. // Indicate that this string pool should be optimized, even if not
  170. // running with -O2.
  171. void
  172. set_optimize()
  173. { this->optimize_ = true; }
  174. // Add the string S to the pool. This returns a canonical permanent
  175. // pointer to the string in the pool. If COPY is true, the string
  176. // is copied into permanent storage. If PKEY is not NULL, this sets
  177. // *PKEY to the key for the string.
  178. const Stringpool_char*
  179. add(const Stringpool_char* s, bool copy, Key* pkey);
  180. // Add the string S to the pool.
  181. const Stringpool_char*
  182. add(const std::basic_string<Stringpool_char>& s, bool copy, Key* pkey)
  183. { return this->add_with_length(s.data(), s.size(), copy, pkey); }
  184. // Add string S of length LEN characters to the pool. If COPY is
  185. // true, S need not be null terminated.
  186. const Stringpool_char*
  187. add_with_length(const Stringpool_char* s, size_t len, bool copy, Key* pkey);
  188. // If the string S is present in the pool, return the canonical
  189. // string pointer. Otherwise, return NULL. If PKEY is not NULL,
  190. // set *PKEY to the key.
  191. const Stringpool_char*
  192. find(const Stringpool_char* s, Key* pkey) const;
  193. // Turn the stringpool into a string table: determine the offsets of
  194. // all the strings. After this is called, no more strings may be
  195. // added to the stringpool.
  196. void
  197. set_string_offsets();
  198. // Get the offset of the string S in the string table. This returns
  199. // the offset in bytes, not in units of Stringpool_char. This may
  200. // only be called after set_string_offsets has been called.
  201. section_offset_type
  202. get_offset(const Stringpool_char* s) const;
  203. // Get the offset of the string S in the string table.
  204. section_offset_type
  205. get_offset(const std::basic_string<Stringpool_char>& s) const
  206. { return this->get_offset_with_length(s.c_str(), s.size()); }
  207. // Get the offset of string S, with length LENGTH characters, in the
  208. // string table.
  209. section_offset_type
  210. get_offset_with_length(const Stringpool_char* s, size_t length) const;
  211. // Get the offset of the string with key K.
  212. section_offset_type
  213. get_offset_from_key(Key k) const
  214. {
  215. gold_assert(k <= this->key_to_offset_.size());
  216. return this->key_to_offset_[k - 1];
  217. }
  218. // Get the size of the string table. This returns the number of
  219. // bytes, not in units of Stringpool_char.
  220. section_size_type
  221. get_strtab_size() const
  222. {
  223. gold_assert(this->strtab_size_ != 0);
  224. return this->strtab_size_;
  225. }
  226. // Write the string table into the output file at the specified
  227. // offset.
  228. void
  229. write(Output_file*, off_t offset);
  230. // Write the string table into the specified buffer, of the
  231. // specified size. buffer_size should be at least
  232. // get_strtab_size().
  233. void
  234. write_to_buffer(unsigned char* buffer, section_size_type buffer_size);
  235. // Dump statistical information to stderr.
  236. void
  237. print_stats(const char*) const;
  238. private:
  239. Stringpool_template(const Stringpool_template&);
  240. Stringpool_template& operator=(const Stringpool_template&);
  241. // Return whether two strings are equal.
  242. static bool
  243. string_equal(const Stringpool_char*, const Stringpool_char*);
  244. // Compute a hash code for a string. LENGTH is the length of the
  245. // string in characters.
  246. static size_t
  247. string_hash(const Stringpool_char*, size_t length);
  248. // We store the actual data in a list of these buffers.
  249. struct Stringdata
  250. {
  251. // Length of data in buffer.
  252. size_t len;
  253. // Allocated size of buffer.
  254. size_t alc;
  255. // Buffer.
  256. char data[1];
  257. };
  258. // Add a new key offset entry.
  259. void
  260. new_key_offset(size_t);
  261. // Copy a string into the buffers, returning a canonical string.
  262. const Stringpool_char*
  263. add_string(const Stringpool_char*, size_t);
  264. // Return whether s1 is a suffix of s2.
  265. static bool
  266. is_suffix(const Stringpool_char* s1, size_t len1,
  267. const Stringpool_char* s2, size_t len2);
  268. // The hash table key includes the string, the length of the string,
  269. // and the hash code for the string. We put the hash code
  270. // explicitly into the key so that we can do a find()/insert()
  271. // sequence without having to recompute the hash. Computing the
  272. // hash code is a significant user of CPU time in the linker.
  273. struct Hashkey
  274. {
  275. const Stringpool_char* string;
  276. // Length is in characters, not bytes.
  277. size_t length;
  278. size_t hash_code;
  279. // This goes in an STL container, so we need a default
  280. // constructor.
  281. Hashkey()
  282. : string(NULL), length(0), hash_code(0)
  283. { }
  284. // Note that these constructors are relatively expensive, because
  285. // they compute the hash code.
  286. explicit Hashkey(const Stringpool_char* s)
  287. : string(s), length(string_length(s)), hash_code(string_hash(s, length))
  288. { }
  289. Hashkey(const Stringpool_char* s, size_t len)
  290. : string(s), length(len), hash_code(string_hash(s, len))
  291. { }
  292. };
  293. // Hash function. This is trivial, since we have already computed
  294. // the hash.
  295. struct Stringpool_hash
  296. {
  297. size_t
  298. operator()(const Hashkey& hk) const
  299. { return hk.hash_code; }
  300. };
  301. // Equality comparison function for hash table.
  302. struct Stringpool_eq
  303. {
  304. bool
  305. operator()(const Hashkey&, const Hashkey&) const;
  306. };
  307. // The hash table is a map from strings to Keys.
  308. typedef Key Hashval;
  309. typedef Unordered_map<Hashkey, Hashval, Stringpool_hash,
  310. Stringpool_eq> String_set_type;
  311. // Comparison routine used when sorting into a string table.
  312. typedef typename String_set_type::iterator Stringpool_sort_info;
  313. struct Stringpool_sort_comparison
  314. {
  315. bool
  316. operator()(const Stringpool_sort_info&, const Stringpool_sort_info&) const;
  317. };
  318. // Keys map to offsets via a Chunked_vector. We only use the
  319. // offsets if we turn this into an string table section.
  320. typedef Chunked_vector<section_offset_type> Key_to_offset;
  321. // List of Stringdata structures.
  322. typedef std::list<Stringdata*> Stringdata_list;
  323. // Mapping from const char* to namepool entry.
  324. String_set_type string_set_;
  325. // Mapping from Key to string table offset.
  326. Key_to_offset key_to_offset_;
  327. // List of buffers.
  328. Stringdata_list strings_;
  329. // Size of string table.
  330. section_size_type strtab_size_;
  331. // Whether to reserve offset 0 to hold the null string.
  332. bool zero_null_;
  333. // Whether to optimize the string table.
  334. bool optimize_;
  335. // offset of the next string.
  336. section_offset_type offset_;
  337. // The alignment of strings in the stringpool.
  338. uint64_t addralign_;
  339. };
  340. // The most common type of Stringpool.
  341. typedef Stringpool_template<char> Stringpool;
  342. } // End namespace gold.
  343. #endif // !defined(GOLD_STRINGPOOL_H)