1316 lines
30 KiB
C
1316 lines
30 KiB
C
/*
|
|
* cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
|
|
*/
|
|
|
|
/*
|
|
* Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019-2022,
|
|
* the Free Software Foundation, Inc.
|
|
*
|
|
* This file is part of GAWK, the GNU implementation of the
|
|
* AWK Programming Language.
|
|
*
|
|
* GAWK is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* GAWK is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
|
|
#include "awk.h"
|
|
|
|
#define INT32_BIT 32
|
|
|
|
extern FILE *output_fp;
|
|
extern void indent(int indent_level);
|
|
extern NODE **is_integer(NODE *symbol, NODE *subs);
|
|
|
|
/*
|
|
* NHAT --- maximum size of a leaf array (2^NHAT).
|
|
* THRESHOLD --- Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
|
|
*/
|
|
|
|
static int NHAT = 10;
|
|
static long THRESHOLD;
|
|
|
|
/*
|
|
* What is the optimium NHAT ? timing results suggest that 10 is a good choice,
|
|
* although differences aren't that significant for > 10.
|
|
*/
|
|
|
|
|
|
static NODE **cint_array_init(NODE *symbol, NODE *subs);
|
|
static NODE **is_uinteger(NODE *symbol, NODE *subs);
|
|
static NODE **cint_lookup(NODE *symbol, NODE *subs);
|
|
static NODE **cint_exists(NODE *symbol, NODE *subs);
|
|
static NODE **cint_clear(NODE *symbol, NODE *subs);
|
|
static NODE **cint_remove(NODE *symbol, NODE *subs);
|
|
static NODE **cint_list(NODE *symbol, NODE *t);
|
|
static NODE **cint_copy(NODE *symbol, NODE *newsymb);
|
|
static NODE **cint_dump(NODE *symbol, NODE *ndump);
|
|
#ifdef ARRAYDEBUG
|
|
static void cint_print(NODE *symbol);
|
|
#endif
|
|
|
|
const array_funcs_t cint_array_func = {
|
|
"cint",
|
|
cint_array_init,
|
|
is_uinteger,
|
|
cint_lookup,
|
|
cint_exists,
|
|
cint_clear,
|
|
cint_remove,
|
|
cint_list,
|
|
cint_copy,
|
|
cint_dump,
|
|
(afunc_t) 0,
|
|
};
|
|
|
|
|
|
static NODE **argv_store(NODE *symbol, NODE *subs);
|
|
|
|
/* special case for ARGV in sandbox mode */
|
|
static const array_funcs_t argv_array_func = {
|
|
"argv",
|
|
cint_array_init,
|
|
is_uinteger,
|
|
cint_lookup,
|
|
cint_exists,
|
|
cint_clear,
|
|
cint_remove,
|
|
cint_list,
|
|
cint_copy,
|
|
cint_dump,
|
|
argv_store,
|
|
};
|
|
|
|
static inline int cint_hash(long k);
|
|
static inline NODE **cint_find(NODE *symbol, long k, int h1);
|
|
|
|
static inline NODE *make_node(NODETYPE type);
|
|
|
|
static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
|
|
static NODE **tree_exists(NODE *tree, long k);
|
|
static void tree_clear(NODE *tree);
|
|
static int tree_remove(NODE *symbol, NODE *tree, long k);
|
|
static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
|
|
static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind);
|
|
static inline NODE **tree_find(NODE *tree, long k, int i);
|
|
static void tree_info(NODE *tree, NODE *ndump, const char *aname);
|
|
static size_t tree_kilobytes(NODE *tree);
|
|
#ifdef ARRAYDEBUG
|
|
static void tree_print(NODE *tree, size_t bi, int indent_level);
|
|
#endif
|
|
|
|
static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base);
|
|
static inline NODE **leaf_exists(NODE *array, long k);
|
|
static void leaf_clear(NODE *array);
|
|
static int leaf_remove(NODE *symbol, NODE *array, long k);
|
|
static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
|
|
static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind);
|
|
static void leaf_info(NODE *array, NODE *ndump, const char *aname);
|
|
#ifdef ARRAYDEBUG
|
|
static void leaf_print(NODE *array, size_t bi, int indent_level);
|
|
#endif
|
|
|
|
/* powers of 2 table upto 2^30 */
|
|
static const long power_two_table[] = {
|
|
1, 2, 4, 8, 16, 32, 64,
|
|
128, 256, 512, 1024, 2048, 4096,
|
|
8192, 16384, 32768, 65536, 131072, 262144,
|
|
524288, 1048576, 2097152, 4194304, 8388608, 16777216,
|
|
33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
|
|
};
|
|
|
|
|
|
#define ISUINT(a, s) ((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
|
|
&& (s)->numbr >= 0)
|
|
|
|
/*
|
|
* To store 2^n integers, allocate top-level array of size n, elements
|
|
* of which are 1-Dimensional (leaf-array) of geometrically increasing
|
|
* size (power of 2).
|
|
*
|
|
* [0] --> [ 0 ]
|
|
* [1] --> [ 1 ]
|
|
* |2| --> [ 2 | 3 ]
|
|
* |3| --> [ 4 | 5 | 6 | 7 ]
|
|
* |.|
|
|
* |k| --> [ 2^(k - 1)| ... | 2^k - 1 ]
|
|
* ...
|
|
*
|
|
* For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
|
|
*
|
|
* The idea for the geometrically increasing array sizes is from:
|
|
* Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
|
|
* Bagwell, Phil (2002).
|
|
* http://infoscience.epfl.ch/record/64410/files/techlists.pdf
|
|
*
|
|
* Disadvantage:
|
|
* Worst case memory waste > 99% and will happen when each of the
|
|
* leaf arrays contains only a single element. Even with consecutive
|
|
* integers, memory waste can be as high as 50%.
|
|
*
|
|
* Solution: Hashed Array Trees (HATs).
|
|
*
|
|
*/
|
|
|
|
/* cint_array_init --- array initialization routine */
|
|
|
|
static NODE **
|
|
cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
|
|
{
|
|
if (symbol == NULL) {
|
|
long newval;
|
|
size_t nelems = (sizeof(power_two_table) / sizeof(power_two_table[0]));
|
|
|
|
/* check relevant environment variables */
|
|
if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
|
|
NHAT = newval;
|
|
/* don't allow overflow off the end of the table */
|
|
if (NHAT > nelems - 2)
|
|
NHAT = nelems - 2;
|
|
THRESHOLD = power_two_table[NHAT + 1];
|
|
} else
|
|
null_array(symbol);
|
|
|
|
return & success_node;
|
|
}
|
|
|
|
|
|
/* is_uinteger --- test if the subscript is an integer >= 0 */
|
|
|
|
NODE **
|
|
is_uinteger(NODE *symbol, NODE *subs)
|
|
{
|
|
if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
|
|
return & success_node;
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */
|
|
|
|
static NODE **
|
|
cint_lookup(NODE *symbol, NODE *subs)
|
|
{
|
|
NODE **lhs;
|
|
long k;
|
|
int h1 = -1, m, li;
|
|
NODE *tn, *xn;
|
|
long cint_size, capacity;
|
|
|
|
k = -1;
|
|
if (ISUINT(symbol, subs)) {
|
|
k = subs->numbr; /* k >= 0 */
|
|
h1 = cint_hash(k); /* h1 >= NHAT */
|
|
if ((lhs = cint_find(symbol, k, h1)) != NULL)
|
|
return lhs;
|
|
}
|
|
xn = symbol->xarray;
|
|
if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
|
|
return lhs;
|
|
|
|
/* It's not there, install it */
|
|
|
|
if (k < 0)
|
|
goto xinstall;
|
|
|
|
m = h1 - 1; /* m >= (NHAT- 1) */
|
|
|
|
/* Estimate capacity upper bound.
|
|
* capacity upper bound = current capacity + leaf array size.
|
|
*/
|
|
li = m > NHAT ? m : NHAT;
|
|
while (li >= NHAT) {
|
|
/* leaf-array of a HAT */
|
|
li = (li + 1) / 2;
|
|
}
|
|
capacity = symbol->array_capacity + power_two_table[li];
|
|
|
|
cint_size = (xn == NULL) ? symbol->table_size
|
|
: (symbol->table_size - xn->table_size);
|
|
assert(cint_size >= 0);
|
|
if ((capacity - cint_size) > THRESHOLD)
|
|
goto xinstall;
|
|
|
|
if (symbol->nodes == NULL) {
|
|
symbol->array_capacity = 0;
|
|
assert(symbol->table_size == 0);
|
|
|
|
/* nodes[0] .. nodes[NHAT- 1] not used */
|
|
ezalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup");
|
|
}
|
|
|
|
symbol->table_size++; /* one more element in array */
|
|
|
|
tn = symbol->nodes[h1];
|
|
if (tn == NULL) {
|
|
tn = make_node(Node_array_tree);
|
|
symbol->nodes[h1] = tn;
|
|
}
|
|
|
|
if (m < NHAT)
|
|
return tree_lookup(symbol, tn, k, NHAT, 0);
|
|
return tree_lookup(symbol, tn, k, m, power_two_table[m]);
|
|
|
|
xinstall:
|
|
|
|
symbol->table_size++;
|
|
if (xn == NULL) {
|
|
xn = symbol->xarray = make_array();
|
|
xn->vname = symbol->vname; /* shallow copy */
|
|
|
|
/*
|
|
* Avoid using assoc_lookup(xn, subs) which may lead
|
|
* to infinite recursion.
|
|
*/
|
|
|
|
if (is_integer(xn, subs))
|
|
xn->array_funcs = & int_array_func;
|
|
else
|
|
xn->array_funcs = & str_array_func;
|
|
xn->flags |= XARRAY;
|
|
}
|
|
return xn->alookup(xn, subs);
|
|
}
|
|
|
|
|
|
/* cint_exists --- test whether an index is in the array or not. */
|
|
|
|
static NODE **
|
|
cint_exists(NODE *symbol, NODE *subs)
|
|
{
|
|
NODE *xn;
|
|
|
|
if (ISUINT(symbol, subs)) {
|
|
long k = subs->numbr;
|
|
NODE **lhs;
|
|
if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
|
|
return lhs;
|
|
}
|
|
if ((xn = symbol->xarray) == NULL)
|
|
return NULL;
|
|
return xn->aexists(xn, subs);
|
|
}
|
|
|
|
|
|
/* cint_clear --- flush all the values in symbol[] */
|
|
|
|
static NODE **
|
|
cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
|
|
{
|
|
size_t i;
|
|
NODE *tn;
|
|
|
|
assert(symbol->nodes != NULL);
|
|
|
|
if (symbol->xarray != NULL) {
|
|
NODE *xn = symbol->xarray;
|
|
assoc_clear(xn);
|
|
freenode(xn);
|
|
symbol->xarray = NULL;
|
|
}
|
|
|
|
for (i = NHAT; i < INT32_BIT; i++) {
|
|
tn = symbol->nodes[i];
|
|
if (tn != NULL) {
|
|
tree_clear(tn);
|
|
freenode(tn);
|
|
}
|
|
}
|
|
|
|
efree(symbol->nodes);
|
|
symbol->ainit(symbol, NULL); /* re-initialize symbol */
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* cint_remove --- remove an index from the array */
|
|
|
|
static NODE **
|
|
cint_remove(NODE *symbol, NODE *subs)
|
|
{
|
|
long k;
|
|
int h1;
|
|
NODE *tn, *xn = symbol->xarray;
|
|
|
|
if (symbol->table_size == 0)
|
|
return NULL;
|
|
|
|
if (! ISUINT(symbol, subs))
|
|
goto xremove;
|
|
|
|
assert(symbol->nodes != NULL);
|
|
|
|
k = subs->numbr;
|
|
h1 = cint_hash(k);
|
|
tn = symbol->nodes[h1];
|
|
if (tn == NULL || ! tree_remove(symbol, tn, k))
|
|
goto xremove;
|
|
|
|
if (tn->table_size == 0) {
|
|
freenode(tn);
|
|
symbol->nodes[h1] = NULL;
|
|
}
|
|
|
|
symbol->table_size--;
|
|
|
|
if (xn == NULL && symbol->table_size == 0) {
|
|
efree(symbol->nodes);
|
|
symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */
|
|
} else if(xn != NULL && symbol->table_size == xn->table_size) {
|
|
/* promote xn to symbol */
|
|
|
|
xn->flags &= ~XARRAY;
|
|
xn->parent_array = symbol->parent_array;
|
|
efree(symbol->nodes);
|
|
*symbol = *xn;
|
|
freenode(xn);
|
|
}
|
|
|
|
return & success_node;
|
|
|
|
xremove:
|
|
xn = symbol->xarray;
|
|
if (xn == NULL || xn->aremove(xn, subs) == NULL)
|
|
return NULL;
|
|
if (xn->table_size == 0) {
|
|
freenode(xn);
|
|
symbol->xarray = NULL;
|
|
}
|
|
symbol->table_size--;
|
|
assert(symbol->table_size > 0);
|
|
|
|
return & success_node;
|
|
}
|
|
|
|
|
|
/* cint_copy --- duplicate input array "symbol" */
|
|
|
|
static NODE **
|
|
cint_copy(NODE *symbol, NODE *newsymb)
|
|
{
|
|
NODE **old, **new;
|
|
size_t i;
|
|
|
|
assert(symbol->nodes != NULL);
|
|
|
|
/* allocate new table */
|
|
ezalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");
|
|
|
|
old = symbol->nodes;
|
|
for (i = NHAT; i < INT32_BIT; i++) {
|
|
if (old[i] == NULL)
|
|
continue;
|
|
new[i] = make_node(Node_array_tree);
|
|
tree_copy(newsymb, old[i], new[i]);
|
|
}
|
|
|
|
if (symbol->xarray != NULL) {
|
|
NODE *xn, *n;
|
|
xn = symbol->xarray;
|
|
n = make_array();
|
|
n->vname = newsymb->vname;
|
|
(void) xn->acopy(xn, n);
|
|
newsymb->xarray = n;
|
|
} else
|
|
newsymb->xarray = NULL;
|
|
|
|
newsymb->nodes = new;
|
|
newsymb->table_size = symbol->table_size;
|
|
newsymb->array_capacity = symbol->array_capacity;
|
|
newsymb->flags = symbol->flags;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* cint_list --- return a list of items */
|
|
|
|
static NODE**
|
|
cint_list(NODE *symbol, NODE *t)
|
|
{
|
|
NODE **list = NULL;
|
|
NODE *tn, *xn;
|
|
unsigned long k = 0, num_elems, list_size;
|
|
size_t j, ja, jd;
|
|
int elem_size = 1;
|
|
assoc_kind_t assoc_kind;
|
|
|
|
num_elems = symbol->table_size;
|
|
if (num_elems == 0)
|
|
return NULL;
|
|
assoc_kind = (assoc_kind_t) t->flags;
|
|
if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
|
|
num_elems = 1;
|
|
|
|
if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
|
|
elem_size = 2;
|
|
list_size = num_elems * elem_size;
|
|
|
|
if (symbol->xarray != NULL) {
|
|
xn = symbol->xarray;
|
|
list = xn->alist(xn, t);
|
|
assert(list != NULL);
|
|
assoc_kind &= ~(AASC|ADESC);
|
|
t->flags = (unsigned int) assoc_kind;
|
|
if (num_elems == 1 || num_elems == xn->table_size)
|
|
return list;
|
|
erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
|
|
k = elem_size * xn->table_size;
|
|
} else
|
|
emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
|
|
|
|
if ((assoc_kind & AINUM) == 0) {
|
|
/* not sorting by "index num" */
|
|
assoc_kind &= ~(AASC|ADESC);
|
|
t->flags = (unsigned int) assoc_kind;
|
|
}
|
|
|
|
/* populate it with index in ascending or descending order */
|
|
|
|
for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
|
|
j = (assoc_kind & ADESC) != 0 ? jd-- : ja++;
|
|
tn = symbol->nodes[j];
|
|
if (tn == NULL)
|
|
continue;
|
|
k += tree_list(tn, list + k, assoc_kind);
|
|
if (k >= list_size)
|
|
return list;
|
|
}
|
|
return list;
|
|
}
|
|
|
|
|
|
/* cint_dump --- dump array info */
|
|
|
|
static NODE **
|
|
cint_dump(NODE *symbol, NODE *ndump)
|
|
{
|
|
NODE *tn, *xn = NULL;
|
|
int indent_level;
|
|
size_t i;
|
|
long cint_size = 0, xsize = 0;
|
|
AWKNUM kb = 0;
|
|
extern AWKNUM int_kilobytes(NODE *symbol);
|
|
extern AWKNUM str_kilobytes(NODE *symbol);
|
|
|
|
indent_level = ndump->alevel;
|
|
|
|
if (symbol->xarray != NULL) {
|
|
xn = symbol->xarray;
|
|
xsize = xn->table_size;
|
|
}
|
|
cint_size = symbol->table_size - xsize;
|
|
|
|
if ((symbol->flags & XARRAY) == 0)
|
|
fprintf(output_fp, "%s `%s'\n",
|
|
(symbol->parent_array == NULL) ? "array" : "sub-array",
|
|
array_vname(symbol));
|
|
indent_level++;
|
|
indent(indent_level);
|
|
fprintf(output_fp, "array_func: cint_array_func\n");
|
|
if (symbol->flags != 0) {
|
|
indent(indent_level);
|
|
fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
|
|
}
|
|
indent(indent_level);
|
|
fprintf(output_fp, "NHAT: %d\n", NHAT);
|
|
indent(indent_level);
|
|
fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
|
|
indent(indent_level);
|
|
fprintf(output_fp, "table_size: %lu (total), %ld (cint), %ld (int + str)\n",
|
|
(unsigned long) symbol->table_size, cint_size, xsize);
|
|
indent(indent_level);
|
|
fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity);
|
|
indent(indent_level);
|
|
fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity);
|
|
|
|
for (i = NHAT; i < INT32_BIT; i++) {
|
|
tn = symbol->nodes[i];
|
|
if (tn == NULL)
|
|
continue;
|
|
/* Node_array_tree + HAT */
|
|
kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
|
|
}
|
|
kb += (INT32_BIT * sizeof(NODE *)) / 1024.0; /* symbol->nodes */
|
|
kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0; /* value nodes in Node_array_leaf(s) */
|
|
if (xn != NULL) {
|
|
if (xn->array_funcs == & int_array_func)
|
|
kb += int_kilobytes(xn);
|
|
else
|
|
kb += str_kilobytes(xn);
|
|
}
|
|
|
|
indent(indent_level);
|
|
fprintf(output_fp, "memory: %.2g kB (total)\n", kb);
|
|
|
|
/* dump elements */
|
|
|
|
if (ndump->adepth >= 0) {
|
|
const char *aname;
|
|
|
|
fprintf(output_fp, "\n");
|
|
aname = make_aname(symbol);
|
|
for (i = NHAT; i < INT32_BIT; i++) {
|
|
tn = symbol->nodes[i];
|
|
if (tn != NULL)
|
|
tree_info(tn, ndump, aname);
|
|
}
|
|
}
|
|
|
|
if (xn != NULL) {
|
|
fprintf(output_fp, "\n");
|
|
xn->adump(xn, ndump);
|
|
}
|
|
|
|
#ifdef ARRAYDEBUG
|
|
if (ndump->adepth < -999)
|
|
cint_print(symbol);
|
|
#endif
|
|
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* cint_hash --- locate the HAT for a given number 'k' */
|
|
|
|
static inline int
|
|
cint_hash(long k)
|
|
{
|
|
uint32_t num, r, shift;
|
|
|
|
assert(k >= 0);
|
|
if (k == 0)
|
|
return NHAT;
|
|
num = k;
|
|
|
|
/* Find the Floor(log base 2 of 32-bit integer) */
|
|
|
|
/*
|
|
* Warren Jr., Henry S. (2002). Hacker's Delight.
|
|
* Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
|
|
*
|
|
* r = 0;
|
|
* if (num >= 1<<16) { num >>= 16; r += 16; }
|
|
* if (num >= 1<< 8) { num >>= 8; r += 8; }
|
|
* if (num >= 1<< 4) { num >>= 4; r += 4; }
|
|
* if (num >= 1<< 2) { num >>= 2; r += 2; }
|
|
* if (num >= 1<< 1) { r += 1; }
|
|
*/
|
|
|
|
|
|
/*
|
|
* Slightly different code copied from:
|
|
*
|
|
* http://www-graphics.stanford.edu/~seander/bithacks.html
|
|
* Bit Twiddling Hacks
|
|
* By Sean Eron Anderson
|
|
* seander@cs.stanford.edu
|
|
* Individually, the code snippets here are in the public domain
|
|
* (unless otherwise noted) --- feel free to use them however you please.
|
|
* The aggregate collection and descriptions are (C) 1997-2005
|
|
* Sean Eron Anderson. The code and descriptions are distributed in the
|
|
* hope that they will be useful, but WITHOUT ANY WARRANTY and without
|
|
* even the implied warranty of merchantability or fitness for a particular
|
|
* purpose.
|
|
*
|
|
*/
|
|
|
|
r = (num > 0xFFFF) << 4; num >>= r;
|
|
shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
|
|
shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
|
|
shift = (num > 0x03) << 1; num >>= shift; r |= shift;
|
|
r |= (num >> 1);
|
|
|
|
/* We use a single HAT for 0 <= num < 2^NHAT */
|
|
if (r < NHAT)
|
|
return NHAT;
|
|
|
|
return (1 + r);
|
|
}
|
|
|
|
|
|
/* cint_find --- locate the integer subscript */
|
|
|
|
static inline NODE **
|
|
cint_find(NODE *symbol, long k, int h1)
|
|
{
|
|
NODE *tn;
|
|
|
|
if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
|
|
return NULL;
|
|
return tree_exists(tn, k);
|
|
}
|
|
|
|
|
|
#ifdef ARRAYDEBUG
|
|
|
|
/* cint_print --- print structural info */
|
|
|
|
static void
|
|
cint_print(NODE *symbol)
|
|
{
|
|
NODE *tn;
|
|
size_t i;
|
|
|
|
fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
|
|
(unsigned long) symbol->table_size);
|
|
for (i = NHAT; i < INT32_BIT; i++) {
|
|
tn = symbol->nodes[i];
|
|
if (tn == NULL)
|
|
continue;
|
|
tree_print(tn, i, 1);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
/*------------------------ Hashed Array Trees -----------------------------*/
|
|
|
|
/*
|
|
* HATs: Hashed Array Trees
|
|
* Fast variable-length arrays
|
|
* Edward Sitarski
|
|
* http://www.drdobbs.com/architecture-and-design/184409965
|
|
*
|
|
* HAT has a top-level array containing a power of two
|
|
* number of leaf arrays. All leaf arrays are the same size as the
|
|
* top-level array. A full HAT can hold n^2 elements,
|
|
* where n (some power of 2) is the size of each leaf array.
|
|
* [i/n][i & (n - 1)] locates the `i th' element in a HAT.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* A half HAT is defined here as a HAT with a top-level array of size n^2/2
|
|
* and holds the first n^2/2 elements.
|
|
*
|
|
* 1. 2^8 elements can be stored in a full HAT of size 2^4.
|
|
* 2. 2^9 elements can be stored in a half HAT of size 2^5.
|
|
* 3. When the number of elements is some power of 2, it
|
|
* can be stored in a full or a half HAT.
|
|
* 4. When the number of elements is some power of 2, it
|
|
* can be stored in a HAT (full or half) with HATs as leaf elements
|
|
* (full or half), and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
|
|
* array dimension) with each leaf array being a HAT of size 2^2).
|
|
*
|
|
* IMPLEMENTATION DETAILS:
|
|
* 1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
|
|
* of Node_array_leaf.
|
|
*
|
|
* 2. A HAT of HATS of 2^12 elements needs
|
|
* 2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
|
|
* ~ 2^9 house-keeping NODEs.
|
|
*
|
|
* 3. When a leaf array (or leaf HAT) becomes empty, the memory
|
|
* is deallocated, and when there is no leaf array (or leaf HAT) left,
|
|
* the HAT is deleted.
|
|
*
|
|
* 4. A HAT stores the base (first) element, and locates the leaf array/HAT
|
|
* for the `i th' element using integer division
|
|
* (i - base)/n where n is the size of the top-level array.
|
|
*
|
|
*/
|
|
|
|
/* make_node --- initialize a NODE */
|
|
|
|
static inline NODE *
|
|
make_node(NODETYPE type)
|
|
{
|
|
NODE *n;
|
|
getnode(n);
|
|
memset(n, '\0', sizeof(NODE));
|
|
n->type = type;
|
|
return n;
|
|
}
|
|
|
|
|
|
/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */
|
|
|
|
static NODE **
|
|
tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
|
|
{
|
|
NODE **lhs;
|
|
NODE *tn;
|
|
int i, n;
|
|
size_t size;
|
|
long num = k;
|
|
|
|
/*
|
|
* HAT size (size of Top & Leaf array) = 2^n
|
|
* where n = Floor ((m + 1)/2). For an odd value of m,
|
|
* only the first half of the HAT is needed.
|
|
*/
|
|
|
|
n = (m + 1) / 2;
|
|
|
|
if (tree->table_size == 0) {
|
|
size_t actual_size;
|
|
NODE **table;
|
|
|
|
assert(tree->nodes == NULL);
|
|
|
|
/* initialize top-level array */
|
|
size = actual_size = power_two_table[n];
|
|
tree->array_base = base;
|
|
tree->array_size = size;
|
|
tree->table_size = 0; /* # of elements in the array */
|
|
if (n > m/2) {
|
|
/* only first half of the array used */
|
|
actual_size /= 2;
|
|
tree->flags |= HALFHAT;
|
|
}
|
|
ezalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup");
|
|
tree->nodes = table;
|
|
} else
|
|
size = tree->array_size;
|
|
|
|
num -= tree->array_base;
|
|
i = num / size; /* top-level array index */
|
|
assert(i >= 0);
|
|
|
|
if ((lhs = tree_find(tree, k, i)) != NULL)
|
|
return lhs;
|
|
|
|
/* It's not there, install it */
|
|
|
|
tree->table_size++;
|
|
base += (size * i);
|
|
tn = tree->nodes[i];
|
|
if (n > NHAT) {
|
|
if (tn == NULL)
|
|
tn = tree->nodes[i] = make_node(Node_array_tree);
|
|
return tree_lookup(symbol, tn, k, n, base);
|
|
} else {
|
|
if (tn == NULL)
|
|
tn = tree->nodes[i] = make_node(Node_array_leaf);
|
|
return leaf_lookup(symbol, tn, k, size, base);
|
|
}
|
|
}
|
|
|
|
|
|
/* tree_exists --- test whether integer subscript `k' exists or not */
|
|
|
|
static NODE **
|
|
tree_exists(NODE *tree, long k)
|
|
{
|
|
int i;
|
|
NODE *tn;
|
|
|
|
i = (k - tree->array_base) / tree->array_size;
|
|
assert(i >= 0);
|
|
tn = tree->nodes[i];
|
|
if (tn == NULL)
|
|
return NULL;
|
|
if (tn->type == Node_array_tree)
|
|
return tree_exists(tn, k);
|
|
return leaf_exists(tn, k);
|
|
}
|
|
|
|
/* tree_clear --- flush all the values */
|
|
|
|
static void
|
|
tree_clear(NODE *tree)
|
|
{
|
|
NODE *tn;
|
|
size_t j, hsize;
|
|
|
|
hsize = tree->array_size;
|
|
if ((tree->flags & HALFHAT) != 0)
|
|
hsize /= 2;
|
|
|
|
for (j = 0; j < hsize; j++) {
|
|
tn = tree->nodes[j];
|
|
if (tn == NULL)
|
|
continue;
|
|
if (tn->type == Node_array_tree)
|
|
tree_clear(tn);
|
|
else
|
|
leaf_clear(tn);
|
|
freenode(tn);
|
|
}
|
|
|
|
efree(tree->nodes);
|
|
memset(tree, '\0', sizeof(NODE));
|
|
tree->type = Node_array_tree;
|
|
}
|
|
|
|
|
|
/* tree_remove --- If the integer subscript is in the HAT, remove it */
|
|
|
|
static int
|
|
tree_remove(NODE *symbol, NODE *tree, long k)
|
|
{
|
|
int i;
|
|
NODE *tn;
|
|
|
|
i = (k - tree->array_base) / tree->array_size;
|
|
assert(i >= 0);
|
|
tn = tree->nodes[i];
|
|
if (tn == NULL)
|
|
return false;
|
|
|
|
if (tn->type == Node_array_tree
|
|
&& ! tree_remove(symbol, tn, k))
|
|
return false;
|
|
else if (tn->type == Node_array_leaf
|
|
&& ! leaf_remove(symbol, tn, k))
|
|
return false;
|
|
|
|
if (tn->table_size == 0) {
|
|
freenode(tn);
|
|
tree->nodes[i] = NULL;
|
|
}
|
|
|
|
/* one less item in array */
|
|
if (--tree->table_size == 0) {
|
|
efree(tree->nodes);
|
|
memset(tree, '\0', sizeof(NODE));
|
|
tree->type = Node_array_tree;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/* tree_find --- locate an interger subscript in the HAT */
|
|
|
|
static inline NODE **
|
|
tree_find(NODE *tree, long k, int i)
|
|
{
|
|
NODE *tn;
|
|
|
|
assert(tree->nodes != NULL);
|
|
tn = tree->nodes[i];
|
|
if (tn != NULL) {
|
|
if (tn->type == Node_array_tree)
|
|
return tree_exists(tn, k);
|
|
return leaf_exists(tn, k);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/* tree_list --- return a list of items in the HAT */
|
|
|
|
static long
|
|
tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind)
|
|
{
|
|
NODE *tn;
|
|
size_t j, cj, hsize;
|
|
long k = 0;
|
|
|
|
assert(list != NULL);
|
|
|
|
hsize = tree->array_size;
|
|
if ((tree->flags & HALFHAT) != 0)
|
|
hsize /= 2;
|
|
|
|
for (j = 0; j < hsize; j++) {
|
|
cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j;
|
|
tn = tree->nodes[cj];
|
|
if (tn == NULL)
|
|
continue;
|
|
if (tn->type == Node_array_tree)
|
|
k += tree_list(tn, list + k, assoc_kind);
|
|
else
|
|
k += leaf_list(tn, list + k, assoc_kind);
|
|
if ((assoc_kind & ADELETE) != 0 && k >= 1)
|
|
return k;
|
|
}
|
|
return k;
|
|
}
|
|
|
|
|
|
/* tree_copy --- duplicate a HAT */
|
|
|
|
static void
|
|
tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
|
|
{
|
|
NODE **old, **new;
|
|
size_t j, hsize;
|
|
|
|
hsize = tree->array_size;
|
|
if ((tree->flags & HALFHAT) != 0)
|
|
hsize /= 2;
|
|
|
|
ezalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
|
|
newtree->nodes = new;
|
|
newtree->array_base = tree->array_base;
|
|
newtree->array_size = tree->array_size;
|
|
newtree->table_size = tree->table_size;
|
|
newtree->flags = tree->flags;
|
|
|
|
old = tree->nodes;
|
|
for (j = 0; j < hsize; j++) {
|
|
if (old[j] == NULL)
|
|
continue;
|
|
if (old[j]->type == Node_array_tree) {
|
|
new[j] = make_node(Node_array_tree);
|
|
tree_copy(newsymb, old[j], new[j]);
|
|
} else {
|
|
new[j] = make_node(Node_array_leaf);
|
|
leaf_copy(newsymb, old[j], new[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* tree_info --- print index, value info */
|
|
|
|
static void
|
|
tree_info(NODE *tree, NODE *ndump, const char *aname)
|
|
{
|
|
NODE *tn;
|
|
size_t j, hsize;
|
|
|
|
hsize = tree->array_size;
|
|
if ((tree->flags & HALFHAT) != 0)
|
|
hsize /= 2;
|
|
|
|
for (j = 0; j < hsize; j++) {
|
|
tn = tree->nodes[j];
|
|
if (tn == NULL)
|
|
continue;
|
|
if (tn->type == Node_array_tree)
|
|
tree_info(tn, ndump, aname);
|
|
else
|
|
leaf_info(tn, ndump, aname);
|
|
}
|
|
}
|
|
|
|
|
|
/* tree_kilobytes --- calculate memory consumption of a HAT */
|
|
|
|
static size_t
|
|
tree_kilobytes(NODE *tree)
|
|
{
|
|
NODE *tn;
|
|
size_t j, hsize;
|
|
size_t sz = 0;
|
|
|
|
hsize = tree->array_size;
|
|
if ((tree->flags & HALFHAT) != 0)
|
|
hsize /= 2;
|
|
for (j = 0; j < hsize; j++) {
|
|
tn = tree->nodes[j];
|
|
if (tn == NULL)
|
|
continue;
|
|
sz += sizeof(NODE); /* Node_array_tree or Node_array_leaf */
|
|
if (tn->type == Node_array_tree)
|
|
sz += tree_kilobytes(tn);
|
|
}
|
|
sz += hsize * sizeof(NODE *); /* tree->nodes */
|
|
return sz;
|
|
}
|
|
|
|
#ifdef ARRAYDEBUG
|
|
|
|
/* tree_print --- print the HAT structures */
|
|
|
|
static void
|
|
tree_print(NODE *tree, size_t bi, int indent_level)
|
|
{
|
|
NODE *tn;
|
|
size_t j, hsize;
|
|
|
|
indent(indent_level);
|
|
|
|
hsize = tree->array_size;
|
|
if ((tree->flags & HALFHAT) != 0)
|
|
hsize /= 2;
|
|
fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n",
|
|
(unsigned long) bi,
|
|
(tree->flags & HALFHAT) != 0 ? "HH" : "H",
|
|
(unsigned long) hsize, (unsigned long) tree->table_size);
|
|
|
|
for (j = 0; j < hsize; j++) {
|
|
tn = tree->nodes[j];
|
|
if (tn == NULL)
|
|
continue;
|
|
if (tn->type == Node_array_tree)
|
|
tree_print(tn, j, indent_level + 1);
|
|
else
|
|
leaf_print(tn, j, indent_level + 1);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*--------------------- leaf (linear 1-D) array --------------------*/
|
|
|
|
/*
|
|
* leaf_lookup --- find an integer subscript in the array; Install it if
|
|
* it isn't there.
|
|
*/
|
|
|
|
static inline NODE **
|
|
leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
|
|
{
|
|
NODE **lhs;
|
|
|
|
if (array->nodes == NULL) {
|
|
array->table_size = 0; /* sanity */
|
|
array->array_size = size;
|
|
array->array_base = base;
|
|
ezalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup");
|
|
symbol->array_capacity += size;
|
|
}
|
|
|
|
lhs = array->nodes + (k - base); /* leaf element */
|
|
if (*lhs == NULL) {
|
|
array->table_size++; /* one more element in leaf array */
|
|
*lhs = new_array_element();
|
|
}
|
|
return lhs;
|
|
}
|
|
|
|
|
|
/* leaf_exists --- check if the array contains an integer subscript */
|
|
|
|
static inline NODE **
|
|
leaf_exists(NODE *array, long k)
|
|
{
|
|
NODE **lhs;
|
|
lhs = array->nodes + (k - array->array_base);
|
|
return (*lhs != NULL) ? lhs : NULL;
|
|
}
|
|
|
|
|
|
/* leaf_clear --- flush all values in the array */
|
|
|
|
static void
|
|
leaf_clear(NODE *array)
|
|
{
|
|
long i, size = array->array_size;
|
|
NODE *r;
|
|
|
|
for (i = 0; i < size; i++) {
|
|
r = array->nodes[i];
|
|
if (r == NULL)
|
|
continue;
|
|
if (r->type == Node_var_array) {
|
|
assoc_clear(r); /* recursively clear all sub-arrays */
|
|
efree(r->vname);
|
|
freenode(r);
|
|
} else
|
|
unref(r);
|
|
}
|
|
efree(array->nodes);
|
|
array->nodes = NULL;
|
|
array->array_size = array->table_size = 0;
|
|
}
|
|
|
|
|
|
/* leaf_remove --- remove an integer subscript from the array */
|
|
|
|
static int
|
|
leaf_remove(NODE *symbol, NODE *array, long k)
|
|
{
|
|
NODE **lhs;
|
|
|
|
lhs = array->nodes + (k - array->array_base);
|
|
if (*lhs == NULL)
|
|
return false;
|
|
*lhs = NULL;
|
|
if (--array->table_size == 0) {
|
|
efree(array->nodes);
|
|
array->nodes = NULL;
|
|
symbol->array_capacity -= array->array_size;
|
|
array->array_size = 0; /* sanity */
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/* leaf_copy --- duplicate a leaf array */
|
|
|
|
static void
|
|
leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
|
|
{
|
|
NODE **old, **new;
|
|
long size, i;
|
|
|
|
size = array->array_size;
|
|
ezalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
|
|
newarray->nodes = new;
|
|
newarray->array_size = size;
|
|
newarray->array_base = array->array_base;
|
|
newarray->flags = array->flags;
|
|
newarray->table_size = array->table_size;
|
|
|
|
old = array->nodes;
|
|
for (i = 0; i < size; i++) {
|
|
if (old[i] == NULL)
|
|
continue;
|
|
if (old[i]->type == Node_val)
|
|
new[i] = dupnode(old[i]);
|
|
else {
|
|
NODE *r;
|
|
r = make_array();
|
|
r->vname = estrdup(old[i]->vname, strlen(old[i]->vname));
|
|
r->parent_array = newsymb;
|
|
new[i] = assoc_copy(old[i], r);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* leaf_list --- return a list of items */
|
|
|
|
static long
|
|
leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind)
|
|
{
|
|
NODE *r, *subs;
|
|
long num, i, ci, k = 0;
|
|
long size = array->array_size;
|
|
static char buf[100];
|
|
|
|
for (i = 0; i < size; i++) {
|
|
ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i;
|
|
r = array->nodes[ci];
|
|
if (r == NULL)
|
|
continue;
|
|
|
|
/* index */
|
|
num = array->array_base + ci;
|
|
if ((assoc_kind & AISTR) != 0) {
|
|
sprintf(buf, "%ld", num);
|
|
subs = make_string(buf, strlen(buf));
|
|
subs->numbr = num;
|
|
subs->flags |= (NUMCUR|NUMINT);
|
|
} else {
|
|
subs = make_number((AWKNUM) num);
|
|
subs->flags |= (INTIND|NUMINT);
|
|
}
|
|
list[k++] = subs;
|
|
|
|
/* value */
|
|
if ((assoc_kind & AVALUE) != 0) {
|
|
if (r->type == Node_val) {
|
|
if ((assoc_kind & AVNUM) != 0)
|
|
(void) force_number(r);
|
|
else if ((assoc_kind & AVSTR) != 0)
|
|
r = force_string(r);
|
|
}
|
|
list[k++] = r;
|
|
}
|
|
if ((assoc_kind & ADELETE) != 0 && k >= 1)
|
|
return k;
|
|
}
|
|
|
|
return k;
|
|
}
|
|
|
|
|
|
/* leaf_info --- print index, value info */
|
|
|
|
static void
|
|
leaf_info(NODE *array, NODE *ndump, const char *aname)
|
|
{
|
|
NODE *subs, *val;
|
|
size_t i, size;
|
|
|
|
size = array->array_size;
|
|
|
|
subs = make_number((AWKNUM) 0.0);
|
|
subs->flags |= (INTIND|NUMINT);
|
|
for (i = 0; i < size; i++) {
|
|
val = array->nodes[i];
|
|
if (val == NULL)
|
|
continue;
|
|
subs->numbr = array->array_base + i;
|
|
assoc_info(subs, val, ndump, aname);
|
|
}
|
|
unref(subs);
|
|
}
|
|
|
|
#ifdef ARRAYDEBUG
|
|
|
|
/* leaf_print --- print the leaf-array structure */
|
|
|
|
|
|
static void
|
|
leaf_print(NODE *array, size_t bi, int indent_level)
|
|
{
|
|
indent(indent_level);
|
|
fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n",
|
|
(unsigned long) bi,
|
|
(unsigned long) array->array_size,
|
|
(unsigned long) array->table_size);
|
|
}
|
|
#endif
|
|
|
|
static NODE *argv_shadow_array = NULL;
|
|
|
|
/* argv_store --- post assign function for ARGV in sandbox mode */
|
|
|
|
static NODE **
|
|
argv_store(NODE *symbol, NODE *subs)
|
|
{
|
|
NODE **val = cint_exists(symbol, subs);
|
|
NODE *newval = *val;
|
|
char *cp;
|
|
|
|
if (newval->stlen == 0) // empty strings in ARGV are OK
|
|
return val;
|
|
|
|
if ((cp = strchr(newval->stptr, '=')) == NULL) {
|
|
if (! in_array(argv_shadow_array, newval))
|
|
fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
|
|
(int) newval->stlen, newval->stptr);
|
|
} else {
|
|
// check if it's a valid variable assignment
|
|
bool badvar = false;
|
|
char *arg = newval->stptr;
|
|
char *cp2;
|
|
|
|
*cp = '\0'; // temporarily
|
|
|
|
if (! is_letter((unsigned char) arg[0]))
|
|
badvar = true;
|
|
else
|
|
for (cp2 = arg+1; *cp2; cp2++)
|
|
if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') {
|
|
badvar = true;
|
|
break;
|
|
}
|
|
|
|
// further checks
|
|
if (! badvar) {
|
|
char *cp = strchr(arg, ':');
|
|
if (cp && (cp[1] != ':' || strchr(cp + 2, ':') != NULL))
|
|
badvar = true;
|
|
}
|
|
*cp = '='; // restore the '='
|
|
|
|
if (badvar && ! in_array(argv_shadow_array, newval))
|
|
fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
|
|
(int) newval->stlen, newval->stptr);
|
|
|
|
// otherwise, badvar is false, let it through as variable assignment
|
|
}
|
|
return val;
|
|
}
|
|
|
|
/* init_argv_array --- set up the pointers for ARGV in sandbox mode. A bit hacky. */
|
|
|
|
void
|
|
init_argv_array(NODE *argv_node, NODE *shadow_node)
|
|
{
|
|
/* If POSIX simply don't reset the vtable and things work as before */
|
|
if (! do_sandbox)
|
|
return;
|
|
|
|
argv_node->array_funcs = & argv_array_func;
|
|
argv_shadow_array = shadow_node;
|
|
}
|