ubuntu-buildroot/output/build/host-gawk-5.2.0/cint_array.c

1316 lines
30 KiB
C

/*
* cint_array.c - routines for arrays of (mostly) consecutive positive integer indices.
*/
/*
* Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019-2022,
* the Free Software Foundation, Inc.
*
* This file is part of GAWK, the GNU implementation of the
* AWK Programming Language.
*
* GAWK is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* GAWK is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "awk.h"
#define INT32_BIT 32
extern FILE *output_fp;
extern void indent(int indent_level);
extern NODE **is_integer(NODE *symbol, NODE *subs);
/*
* NHAT --- maximum size of a leaf array (2^NHAT).
* THRESHOLD --- Maximum capacity waste; THRESHOLD >= 2^(NHAT + 1).
*/
static int NHAT = 10;
static long THRESHOLD;
/*
* What is the optimium NHAT ? timing results suggest that 10 is a good choice,
* although differences aren't that significant for > 10.
*/
static NODE **cint_array_init(NODE *symbol, NODE *subs);
static NODE **is_uinteger(NODE *symbol, NODE *subs);
static NODE **cint_lookup(NODE *symbol, NODE *subs);
static NODE **cint_exists(NODE *symbol, NODE *subs);
static NODE **cint_clear(NODE *symbol, NODE *subs);
static NODE **cint_remove(NODE *symbol, NODE *subs);
static NODE **cint_list(NODE *symbol, NODE *t);
static NODE **cint_copy(NODE *symbol, NODE *newsymb);
static NODE **cint_dump(NODE *symbol, NODE *ndump);
#ifdef ARRAYDEBUG
static void cint_print(NODE *symbol);
#endif
const array_funcs_t cint_array_func = {
"cint",
cint_array_init,
is_uinteger,
cint_lookup,
cint_exists,
cint_clear,
cint_remove,
cint_list,
cint_copy,
cint_dump,
(afunc_t) 0,
};
static NODE **argv_store(NODE *symbol, NODE *subs);
/* special case for ARGV in sandbox mode */
static const array_funcs_t argv_array_func = {
"argv",
cint_array_init,
is_uinteger,
cint_lookup,
cint_exists,
cint_clear,
cint_remove,
cint_list,
cint_copy,
cint_dump,
argv_store,
};
static inline int cint_hash(long k);
static inline NODE **cint_find(NODE *symbol, long k, int h1);
static inline NODE *make_node(NODETYPE type);
static NODE **tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base);
static NODE **tree_exists(NODE *tree, long k);
static void tree_clear(NODE *tree);
static int tree_remove(NODE *symbol, NODE *tree, long k);
static void tree_copy(NODE *newsymb, NODE *tree, NODE *newtree);
static long tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind);
static inline NODE **tree_find(NODE *tree, long k, int i);
static void tree_info(NODE *tree, NODE *ndump, const char *aname);
static size_t tree_kilobytes(NODE *tree);
#ifdef ARRAYDEBUG
static void tree_print(NODE *tree, size_t bi, int indent_level);
#endif
static inline NODE **leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base);
static inline NODE **leaf_exists(NODE *array, long k);
static void leaf_clear(NODE *array);
static int leaf_remove(NODE *symbol, NODE *array, long k);
static void leaf_copy(NODE *newsymb, NODE *array, NODE *newarray);
static long leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind);
static void leaf_info(NODE *array, NODE *ndump, const char *aname);
#ifdef ARRAYDEBUG
static void leaf_print(NODE *array, size_t bi, int indent_level);
#endif
/* powers of 2 table upto 2^30 */
static const long power_two_table[] = {
1, 2, 4, 8, 16, 32, 64,
128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768, 65536, 131072, 262144,
524288, 1048576, 2097152, 4194304, 8388608, 16777216,
33554432, 67108864, 134217728, 268435456, 536870912, 1073741824
};
#define ISUINT(a, s) ((((s)->flags & NUMINT) != 0 || is_integer(a, s) != NULL) \
&& (s)->numbr >= 0)
/*
* To store 2^n integers, allocate top-level array of size n, elements
* of which are 1-Dimensional (leaf-array) of geometrically increasing
* size (power of 2).
*
* [0] --> [ 0 ]
* [1] --> [ 1 ]
* |2| --> [ 2 | 3 ]
* |3| --> [ 4 | 5 | 6 | 7 ]
* |.|
* |k| --> [ 2^(k - 1)| ... | 2^k - 1 ]
* ...
*
* For a given integer n (> 0), the leaf-array is at 1 + floor(log2(n)).
*
* The idea for the geometrically increasing array sizes is from:
* Fast Functional Lists, Hash-Lists, Deques and Variable Length Arrays.
* Bagwell, Phil (2002).
* http://infoscience.epfl.ch/record/64410/files/techlists.pdf
*
* Disadvantage:
* Worst case memory waste > 99% and will happen when each of the
* leaf arrays contains only a single element. Even with consecutive
* integers, memory waste can be as high as 50%.
*
* Solution: Hashed Array Trees (HATs).
*
*/
/* cint_array_init --- array initialization routine */
static NODE **
cint_array_init(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
{
if (symbol == NULL) {
long newval;
size_t nelems = (sizeof(power_two_table) / sizeof(power_two_table[0]));
/* check relevant environment variables */
if ((newval = getenv_long("NHAT")) > 1 && newval < INT32_BIT)
NHAT = newval;
/* don't allow overflow off the end of the table */
if (NHAT > nelems - 2)
NHAT = nelems - 2;
THRESHOLD = power_two_table[NHAT + 1];
} else
null_array(symbol);
return & success_node;
}
/* is_uinteger --- test if the subscript is an integer >= 0 */
NODE **
is_uinteger(NODE *symbol, NODE *subs)
{
if (is_integer(symbol, subs) != NULL && subs->numbr >= 0)
return & success_node;
return NULL;
}
/* cint_lookup --- Find the subscript in the array; Install it if it isn't there. */
static NODE **
cint_lookup(NODE *symbol, NODE *subs)
{
NODE **lhs;
long k;
int h1 = -1, m, li;
NODE *tn, *xn;
long cint_size, capacity;
k = -1;
if (ISUINT(symbol, subs)) {
k = subs->numbr; /* k >= 0 */
h1 = cint_hash(k); /* h1 >= NHAT */
if ((lhs = cint_find(symbol, k, h1)) != NULL)
return lhs;
}
xn = symbol->xarray;
if (xn != NULL && (lhs = xn->aexists(xn, subs)) != NULL)
return lhs;
/* It's not there, install it */
if (k < 0)
goto xinstall;
m = h1 - 1; /* m >= (NHAT- 1) */
/* Estimate capacity upper bound.
* capacity upper bound = current capacity + leaf array size.
*/
li = m > NHAT ? m : NHAT;
while (li >= NHAT) {
/* leaf-array of a HAT */
li = (li + 1) / 2;
}
capacity = symbol->array_capacity + power_two_table[li];
cint_size = (xn == NULL) ? symbol->table_size
: (symbol->table_size - xn->table_size);
assert(cint_size >= 0);
if ((capacity - cint_size) > THRESHOLD)
goto xinstall;
if (symbol->nodes == NULL) {
symbol->array_capacity = 0;
assert(symbol->table_size == 0);
/* nodes[0] .. nodes[NHAT- 1] not used */
ezalloc(symbol->nodes, NODE **, INT32_BIT * sizeof(NODE *), "cint_lookup");
}
symbol->table_size++; /* one more element in array */
tn = symbol->nodes[h1];
if (tn == NULL) {
tn = make_node(Node_array_tree);
symbol->nodes[h1] = tn;
}
if (m < NHAT)
return tree_lookup(symbol, tn, k, NHAT, 0);
return tree_lookup(symbol, tn, k, m, power_two_table[m]);
xinstall:
symbol->table_size++;
if (xn == NULL) {
xn = symbol->xarray = make_array();
xn->vname = symbol->vname; /* shallow copy */
/*
* Avoid using assoc_lookup(xn, subs) which may lead
* to infinite recursion.
*/
if (is_integer(xn, subs))
xn->array_funcs = & int_array_func;
else
xn->array_funcs = & str_array_func;
xn->flags |= XARRAY;
}
return xn->alookup(xn, subs);
}
/* cint_exists --- test whether an index is in the array or not. */
static NODE **
cint_exists(NODE *symbol, NODE *subs)
{
NODE *xn;
if (ISUINT(symbol, subs)) {
long k = subs->numbr;
NODE **lhs;
if ((lhs = cint_find(symbol, k, cint_hash(k))) != NULL)
return lhs;
}
if ((xn = symbol->xarray) == NULL)
return NULL;
return xn->aexists(xn, subs);
}
/* cint_clear --- flush all the values in symbol[] */
static NODE **
cint_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
{
size_t i;
NODE *tn;
assert(symbol->nodes != NULL);
if (symbol->xarray != NULL) {
NODE *xn = symbol->xarray;
assoc_clear(xn);
freenode(xn);
symbol->xarray = NULL;
}
for (i = NHAT; i < INT32_BIT; i++) {
tn = symbol->nodes[i];
if (tn != NULL) {
tree_clear(tn);
freenode(tn);
}
}
efree(symbol->nodes);
symbol->ainit(symbol, NULL); /* re-initialize symbol */
return NULL;
}
/* cint_remove --- remove an index from the array */
static NODE **
cint_remove(NODE *symbol, NODE *subs)
{
long k;
int h1;
NODE *tn, *xn = symbol->xarray;
if (symbol->table_size == 0)
return NULL;
if (! ISUINT(symbol, subs))
goto xremove;
assert(symbol->nodes != NULL);
k = subs->numbr;
h1 = cint_hash(k);
tn = symbol->nodes[h1];
if (tn == NULL || ! tree_remove(symbol, tn, k))
goto xremove;
if (tn->table_size == 0) {
freenode(tn);
symbol->nodes[h1] = NULL;
}
symbol->table_size--;
if (xn == NULL && symbol->table_size == 0) {
efree(symbol->nodes);
symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */
} else if(xn != NULL && symbol->table_size == xn->table_size) {
/* promote xn to symbol */
xn->flags &= ~XARRAY;
xn->parent_array = symbol->parent_array;
efree(symbol->nodes);
*symbol = *xn;
freenode(xn);
}
return & success_node;
xremove:
xn = symbol->xarray;
if (xn == NULL || xn->aremove(xn, subs) == NULL)
return NULL;
if (xn->table_size == 0) {
freenode(xn);
symbol->xarray = NULL;
}
symbol->table_size--;
assert(symbol->table_size > 0);
return & success_node;
}
/* cint_copy --- duplicate input array "symbol" */
static NODE **
cint_copy(NODE *symbol, NODE *newsymb)
{
NODE **old, **new;
size_t i;
assert(symbol->nodes != NULL);
/* allocate new table */
ezalloc(new, NODE **, INT32_BIT * sizeof(NODE *), "cint_copy");
old = symbol->nodes;
for (i = NHAT; i < INT32_BIT; i++) {
if (old[i] == NULL)
continue;
new[i] = make_node(Node_array_tree);
tree_copy(newsymb, old[i], new[i]);
}
if (symbol->xarray != NULL) {
NODE *xn, *n;
xn = symbol->xarray;
n = make_array();
n->vname = newsymb->vname;
(void) xn->acopy(xn, n);
newsymb->xarray = n;
} else
newsymb->xarray = NULL;
newsymb->nodes = new;
newsymb->table_size = symbol->table_size;
newsymb->array_capacity = symbol->array_capacity;
newsymb->flags = symbol->flags;
return NULL;
}
/* cint_list --- return a list of items */
static NODE**
cint_list(NODE *symbol, NODE *t)
{
NODE **list = NULL;
NODE *tn, *xn;
unsigned long k = 0, num_elems, list_size;
size_t j, ja, jd;
int elem_size = 1;
assoc_kind_t assoc_kind;
num_elems = symbol->table_size;
if (num_elems == 0)
return NULL;
assoc_kind = (assoc_kind_t) t->flags;
if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
num_elems = 1;
if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
elem_size = 2;
list_size = num_elems * elem_size;
if (symbol->xarray != NULL) {
xn = symbol->xarray;
list = xn->alist(xn, t);
assert(list != NULL);
assoc_kind &= ~(AASC|ADESC);
t->flags = (unsigned int) assoc_kind;
if (num_elems == 1 || num_elems == xn->table_size)
return list;
erealloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
k = elem_size * xn->table_size;
} else
emalloc(list, NODE **, list_size * sizeof(NODE *), "cint_list");
if ((assoc_kind & AINUM) == 0) {
/* not sorting by "index num" */
assoc_kind &= ~(AASC|ADESC);
t->flags = (unsigned int) assoc_kind;
}
/* populate it with index in ascending or descending order */
for (ja = NHAT, jd = INT32_BIT - 1; ja < INT32_BIT && jd >= NHAT; ) {
j = (assoc_kind & ADESC) != 0 ? jd-- : ja++;
tn = symbol->nodes[j];
if (tn == NULL)
continue;
k += tree_list(tn, list + k, assoc_kind);
if (k >= list_size)
return list;
}
return list;
}
/* cint_dump --- dump array info */
static NODE **
cint_dump(NODE *symbol, NODE *ndump)
{
NODE *tn, *xn = NULL;
int indent_level;
size_t i;
long cint_size = 0, xsize = 0;
AWKNUM kb = 0;
extern AWKNUM int_kilobytes(NODE *symbol);
extern AWKNUM str_kilobytes(NODE *symbol);
indent_level = ndump->alevel;
if (symbol->xarray != NULL) {
xn = symbol->xarray;
xsize = xn->table_size;
}
cint_size = symbol->table_size - xsize;
if ((symbol->flags & XARRAY) == 0)
fprintf(output_fp, "%s `%s'\n",
(symbol->parent_array == NULL) ? "array" : "sub-array",
array_vname(symbol));
indent_level++;
indent(indent_level);
fprintf(output_fp, "array_func: cint_array_func\n");
if (symbol->flags != 0) {
indent(indent_level);
fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
}
indent(indent_level);
fprintf(output_fp, "NHAT: %d\n", NHAT);
indent(indent_level);
fprintf(output_fp, "THRESHOLD: %ld\n", THRESHOLD);
indent(indent_level);
fprintf(output_fp, "table_size: %lu (total), %ld (cint), %ld (int + str)\n",
(unsigned long) symbol->table_size, cint_size, xsize);
indent(indent_level);
fprintf(output_fp, "array_capacity: %lu\n", (unsigned long) symbol->array_capacity);
indent(indent_level);
fprintf(output_fp, "Load Factor: %.2g\n", (AWKNUM) cint_size / symbol->array_capacity);
for (i = NHAT; i < INT32_BIT; i++) {
tn = symbol->nodes[i];
if (tn == NULL)
continue;
/* Node_array_tree + HAT */
kb += (sizeof(NODE) + tree_kilobytes(tn)) / 1024.0;
}
kb += (INT32_BIT * sizeof(NODE *)) / 1024.0; /* symbol->nodes */
kb += (symbol->array_capacity * sizeof(NODE *)) / 1024.0; /* value nodes in Node_array_leaf(s) */
if (xn != NULL) {
if (xn->array_funcs == & int_array_func)
kb += int_kilobytes(xn);
else
kb += str_kilobytes(xn);
}
indent(indent_level);
fprintf(output_fp, "memory: %.2g kB (total)\n", kb);
/* dump elements */
if (ndump->adepth >= 0) {
const char *aname;
fprintf(output_fp, "\n");
aname = make_aname(symbol);
for (i = NHAT; i < INT32_BIT; i++) {
tn = symbol->nodes[i];
if (tn != NULL)
tree_info(tn, ndump, aname);
}
}
if (xn != NULL) {
fprintf(output_fp, "\n");
xn->adump(xn, ndump);
}
#ifdef ARRAYDEBUG
if (ndump->adepth < -999)
cint_print(symbol);
#endif
return NULL;
}
/* cint_hash --- locate the HAT for a given number 'k' */
static inline int
cint_hash(long k)
{
uint32_t num, r, shift;
assert(k >= 0);
if (k == 0)
return NHAT;
num = k;
/* Find the Floor(log base 2 of 32-bit integer) */
/*
* Warren Jr., Henry S. (2002). Hacker's Delight.
* Addison Wesley. pp. pp. 215. ISBN 978-0201914658.
*
* r = 0;
* if (num >= 1<<16) { num >>= 16; r += 16; }
* if (num >= 1<< 8) { num >>= 8; r += 8; }
* if (num >= 1<< 4) { num >>= 4; r += 4; }
* if (num >= 1<< 2) { num >>= 2; r += 2; }
* if (num >= 1<< 1) { r += 1; }
*/
/*
* Slightly different code copied from:
*
* http://www-graphics.stanford.edu/~seander/bithacks.html
* Bit Twiddling Hacks
* By Sean Eron Anderson
* seander@cs.stanford.edu
* Individually, the code snippets here are in the public domain
* (unless otherwise noted) --- feel free to use them however you please.
* The aggregate collection and descriptions are (C) 1997-2005
* Sean Eron Anderson. The code and descriptions are distributed in the
* hope that they will be useful, but WITHOUT ANY WARRANTY and without
* even the implied warranty of merchantability or fitness for a particular
* purpose.
*
*/
r = (num > 0xFFFF) << 4; num >>= r;
shift = (num > 0xFF) << 3; num >>= shift; r |= shift;
shift = (num > 0x0F) << 2; num >>= shift; r |= shift;
shift = (num > 0x03) << 1; num >>= shift; r |= shift;
r |= (num >> 1);
/* We use a single HAT for 0 <= num < 2^NHAT */
if (r < NHAT)
return NHAT;
return (1 + r);
}
/* cint_find --- locate the integer subscript */
static inline NODE **
cint_find(NODE *symbol, long k, int h1)
{
NODE *tn;
if (symbol->nodes == NULL || (tn = symbol->nodes[h1]) == NULL)
return NULL;
return tree_exists(tn, k);
}
#ifdef ARRAYDEBUG
/* cint_print --- print structural info */
static void
cint_print(NODE *symbol)
{
NODE *tn;
size_t i;
fprintf(output_fp, "I[%4lu:%-4lu]\n", (unsigned long) INT32_BIT,
(unsigned long) symbol->table_size);
for (i = NHAT; i < INT32_BIT; i++) {
tn = symbol->nodes[i];
if (tn == NULL)
continue;
tree_print(tn, i, 1);
}
}
#endif
/*------------------------ Hashed Array Trees -----------------------------*/
/*
* HATs: Hashed Array Trees
* Fast variable-length arrays
* Edward Sitarski
* http://www.drdobbs.com/architecture-and-design/184409965
*
* HAT has a top-level array containing a power of two
* number of leaf arrays. All leaf arrays are the same size as the
* top-level array. A full HAT can hold n^2 elements,
* where n (some power of 2) is the size of each leaf array.
* [i/n][i & (n - 1)] locates the `i th' element in a HAT.
*
*/
/*
* A half HAT is defined here as a HAT with a top-level array of size n^2/2
* and holds the first n^2/2 elements.
*
* 1. 2^8 elements can be stored in a full HAT of size 2^4.
* 2. 2^9 elements can be stored in a half HAT of size 2^5.
* 3. When the number of elements is some power of 2, it
* can be stored in a full or a half HAT.
* 4. When the number of elements is some power of 2, it
* can be stored in a HAT (full or half) with HATs as leaf elements
* (full or half), and so on (e.g. 2^8 elements in a HAT of size 2^4 (top-level
* array dimension) with each leaf array being a HAT of size 2^2).
*
* IMPLEMENTATION DETAILS:
* 1. A HAT of 2^12 elements needs 2^6 house-keeping NODEs
* of Node_array_leaf.
*
* 2. A HAT of HATS of 2^12 elements needs
* 2^6 * (1 Node_array_tree + 2^3 Node_array_leaf)
* ~ 2^9 house-keeping NODEs.
*
* 3. When a leaf array (or leaf HAT) becomes empty, the memory
* is deallocated, and when there is no leaf array (or leaf HAT) left,
* the HAT is deleted.
*
* 4. A HAT stores the base (first) element, and locates the leaf array/HAT
* for the `i th' element using integer division
* (i - base)/n where n is the size of the top-level array.
*
*/
/* make_node --- initialize a NODE */
static inline NODE *
make_node(NODETYPE type)
{
NODE *n;
getnode(n);
memset(n, '\0', sizeof(NODE));
n->type = type;
return n;
}
/* tree_lookup --- Find an integer subscript in a HAT; Install it if it isn't there */
static NODE **
tree_lookup(NODE *symbol, NODE *tree, long k, int m, long base)
{
NODE **lhs;
NODE *tn;
int i, n;
size_t size;
long num = k;
/*
* HAT size (size of Top & Leaf array) = 2^n
* where n = Floor ((m + 1)/2). For an odd value of m,
* only the first half of the HAT is needed.
*/
n = (m + 1) / 2;
if (tree->table_size == 0) {
size_t actual_size;
NODE **table;
assert(tree->nodes == NULL);
/* initialize top-level array */
size = actual_size = power_two_table[n];
tree->array_base = base;
tree->array_size = size;
tree->table_size = 0; /* # of elements in the array */
if (n > m/2) {
/* only first half of the array used */
actual_size /= 2;
tree->flags |= HALFHAT;
}
ezalloc(table, NODE **, actual_size * sizeof(NODE *), "tree_lookup");
tree->nodes = table;
} else
size = tree->array_size;
num -= tree->array_base;
i = num / size; /* top-level array index */
assert(i >= 0);
if ((lhs = tree_find(tree, k, i)) != NULL)
return lhs;
/* It's not there, install it */
tree->table_size++;
base += (size * i);
tn = tree->nodes[i];
if (n > NHAT) {
if (tn == NULL)
tn = tree->nodes[i] = make_node(Node_array_tree);
return tree_lookup(symbol, tn, k, n, base);
} else {
if (tn == NULL)
tn = tree->nodes[i] = make_node(Node_array_leaf);
return leaf_lookup(symbol, tn, k, size, base);
}
}
/* tree_exists --- test whether integer subscript `k' exists or not */
static NODE **
tree_exists(NODE *tree, long k)
{
int i;
NODE *tn;
i = (k - tree->array_base) / tree->array_size;
assert(i >= 0);
tn = tree->nodes[i];
if (tn == NULL)
return NULL;
if (tn->type == Node_array_tree)
return tree_exists(tn, k);
return leaf_exists(tn, k);
}
/* tree_clear --- flush all the values */
static void
tree_clear(NODE *tree)
{
NODE *tn;
size_t j, hsize;
hsize = tree->array_size;
if ((tree->flags & HALFHAT) != 0)
hsize /= 2;
for (j = 0; j < hsize; j++) {
tn = tree->nodes[j];
if (tn == NULL)
continue;
if (tn->type == Node_array_tree)
tree_clear(tn);
else
leaf_clear(tn);
freenode(tn);
}
efree(tree->nodes);
memset(tree, '\0', sizeof(NODE));
tree->type = Node_array_tree;
}
/* tree_remove --- If the integer subscript is in the HAT, remove it */
static int
tree_remove(NODE *symbol, NODE *tree, long k)
{
int i;
NODE *tn;
i = (k - tree->array_base) / tree->array_size;
assert(i >= 0);
tn = tree->nodes[i];
if (tn == NULL)
return false;
if (tn->type == Node_array_tree
&& ! tree_remove(symbol, tn, k))
return false;
else if (tn->type == Node_array_leaf
&& ! leaf_remove(symbol, tn, k))
return false;
if (tn->table_size == 0) {
freenode(tn);
tree->nodes[i] = NULL;
}
/* one less item in array */
if (--tree->table_size == 0) {
efree(tree->nodes);
memset(tree, '\0', sizeof(NODE));
tree->type = Node_array_tree;
}
return true;
}
/* tree_find --- locate an interger subscript in the HAT */
static inline NODE **
tree_find(NODE *tree, long k, int i)
{
NODE *tn;
assert(tree->nodes != NULL);
tn = tree->nodes[i];
if (tn != NULL) {
if (tn->type == Node_array_tree)
return tree_exists(tn, k);
return leaf_exists(tn, k);
}
return NULL;
}
/* tree_list --- return a list of items in the HAT */
static long
tree_list(NODE *tree, NODE **list, assoc_kind_t assoc_kind)
{
NODE *tn;
size_t j, cj, hsize;
long k = 0;
assert(list != NULL);
hsize = tree->array_size;
if ((tree->flags & HALFHAT) != 0)
hsize /= 2;
for (j = 0; j < hsize; j++) {
cj = (assoc_kind & ADESC) != 0 ? (hsize - 1 - j) : j;
tn = tree->nodes[cj];
if (tn == NULL)
continue;
if (tn->type == Node_array_tree)
k += tree_list(tn, list + k, assoc_kind);
else
k += leaf_list(tn, list + k, assoc_kind);
if ((assoc_kind & ADELETE) != 0 && k >= 1)
return k;
}
return k;
}
/* tree_copy --- duplicate a HAT */
static void
tree_copy(NODE *newsymb, NODE *tree, NODE *newtree)
{
NODE **old, **new;
size_t j, hsize;
hsize = tree->array_size;
if ((tree->flags & HALFHAT) != 0)
hsize /= 2;
ezalloc(new, NODE **, hsize * sizeof(NODE *), "tree_copy");
newtree->nodes = new;
newtree->array_base = tree->array_base;
newtree->array_size = tree->array_size;
newtree->table_size = tree->table_size;
newtree->flags = tree->flags;
old = tree->nodes;
for (j = 0; j < hsize; j++) {
if (old[j] == NULL)
continue;
if (old[j]->type == Node_array_tree) {
new[j] = make_node(Node_array_tree);
tree_copy(newsymb, old[j], new[j]);
} else {
new[j] = make_node(Node_array_leaf);
leaf_copy(newsymb, old[j], new[j]);
}
}
}
/* tree_info --- print index, value info */
static void
tree_info(NODE *tree, NODE *ndump, const char *aname)
{
NODE *tn;
size_t j, hsize;
hsize = tree->array_size;
if ((tree->flags & HALFHAT) != 0)
hsize /= 2;
for (j = 0; j < hsize; j++) {
tn = tree->nodes[j];
if (tn == NULL)
continue;
if (tn->type == Node_array_tree)
tree_info(tn, ndump, aname);
else
leaf_info(tn, ndump, aname);
}
}
/* tree_kilobytes --- calculate memory consumption of a HAT */
static size_t
tree_kilobytes(NODE *tree)
{
NODE *tn;
size_t j, hsize;
size_t sz = 0;
hsize = tree->array_size;
if ((tree->flags & HALFHAT) != 0)
hsize /= 2;
for (j = 0; j < hsize; j++) {
tn = tree->nodes[j];
if (tn == NULL)
continue;
sz += sizeof(NODE); /* Node_array_tree or Node_array_leaf */
if (tn->type == Node_array_tree)
sz += tree_kilobytes(tn);
}
sz += hsize * sizeof(NODE *); /* tree->nodes */
return sz;
}
#ifdef ARRAYDEBUG
/* tree_print --- print the HAT structures */
static void
tree_print(NODE *tree, size_t bi, int indent_level)
{
NODE *tn;
size_t j, hsize;
indent(indent_level);
hsize = tree->array_size;
if ((tree->flags & HALFHAT) != 0)
hsize /= 2;
fprintf(output_fp, "%4lu:%s[%4lu:%-4lu]\n",
(unsigned long) bi,
(tree->flags & HALFHAT) != 0 ? "HH" : "H",
(unsigned long) hsize, (unsigned long) tree->table_size);
for (j = 0; j < hsize; j++) {
tn = tree->nodes[j];
if (tn == NULL)
continue;
if (tn->type == Node_array_tree)
tree_print(tn, j, indent_level + 1);
else
leaf_print(tn, j, indent_level + 1);
}
}
#endif
/*--------------------- leaf (linear 1-D) array --------------------*/
/*
* leaf_lookup --- find an integer subscript in the array; Install it if
* it isn't there.
*/
static inline NODE **
leaf_lookup(NODE *symbol, NODE *array, long k, long size, long base)
{
NODE **lhs;
if (array->nodes == NULL) {
array->table_size = 0; /* sanity */
array->array_size = size;
array->array_base = base;
ezalloc(array->nodes, NODE **, size * sizeof(NODE *), "leaf_lookup");
symbol->array_capacity += size;
}
lhs = array->nodes + (k - base); /* leaf element */
if (*lhs == NULL) {
array->table_size++; /* one more element in leaf array */
*lhs = new_array_element();
}
return lhs;
}
/* leaf_exists --- check if the array contains an integer subscript */
static inline NODE **
leaf_exists(NODE *array, long k)
{
NODE **lhs;
lhs = array->nodes + (k - array->array_base);
return (*lhs != NULL) ? lhs : NULL;
}
/* leaf_clear --- flush all values in the array */
static void
leaf_clear(NODE *array)
{
long i, size = array->array_size;
NODE *r;
for (i = 0; i < size; i++) {
r = array->nodes[i];
if (r == NULL)
continue;
if (r->type == Node_var_array) {
assoc_clear(r); /* recursively clear all sub-arrays */
efree(r->vname);
freenode(r);
} else
unref(r);
}
efree(array->nodes);
array->nodes = NULL;
array->array_size = array->table_size = 0;
}
/* leaf_remove --- remove an integer subscript from the array */
static int
leaf_remove(NODE *symbol, NODE *array, long k)
{
NODE **lhs;
lhs = array->nodes + (k - array->array_base);
if (*lhs == NULL)
return false;
*lhs = NULL;
if (--array->table_size == 0) {
efree(array->nodes);
array->nodes = NULL;
symbol->array_capacity -= array->array_size;
array->array_size = 0; /* sanity */
}
return true;
}
/* leaf_copy --- duplicate a leaf array */
static void
leaf_copy(NODE *newsymb, NODE *array, NODE *newarray)
{
NODE **old, **new;
long size, i;
size = array->array_size;
ezalloc(new, NODE **, size * sizeof(NODE *), "leaf_copy");
newarray->nodes = new;
newarray->array_size = size;
newarray->array_base = array->array_base;
newarray->flags = array->flags;
newarray->table_size = array->table_size;
old = array->nodes;
for (i = 0; i < size; i++) {
if (old[i] == NULL)
continue;
if (old[i]->type == Node_val)
new[i] = dupnode(old[i]);
else {
NODE *r;
r = make_array();
r->vname = estrdup(old[i]->vname, strlen(old[i]->vname));
r->parent_array = newsymb;
new[i] = assoc_copy(old[i], r);
}
}
}
/* leaf_list --- return a list of items */
static long
leaf_list(NODE *array, NODE **list, assoc_kind_t assoc_kind)
{
NODE *r, *subs;
long num, i, ci, k = 0;
long size = array->array_size;
static char buf[100];
for (i = 0; i < size; i++) {
ci = (assoc_kind & ADESC) != 0 ? (size - 1 - i) : i;
r = array->nodes[ci];
if (r == NULL)
continue;
/* index */
num = array->array_base + ci;
if ((assoc_kind & AISTR) != 0) {
sprintf(buf, "%ld", num);
subs = make_string(buf, strlen(buf));
subs->numbr = num;
subs->flags |= (NUMCUR|NUMINT);
} else {
subs = make_number((AWKNUM) num);
subs->flags |= (INTIND|NUMINT);
}
list[k++] = subs;
/* value */
if ((assoc_kind & AVALUE) != 0) {
if (r->type == Node_val) {
if ((assoc_kind & AVNUM) != 0)
(void) force_number(r);
else if ((assoc_kind & AVSTR) != 0)
r = force_string(r);
}
list[k++] = r;
}
if ((assoc_kind & ADELETE) != 0 && k >= 1)
return k;
}
return k;
}
/* leaf_info --- print index, value info */
static void
leaf_info(NODE *array, NODE *ndump, const char *aname)
{
NODE *subs, *val;
size_t i, size;
size = array->array_size;
subs = make_number((AWKNUM) 0.0);
subs->flags |= (INTIND|NUMINT);
for (i = 0; i < size; i++) {
val = array->nodes[i];
if (val == NULL)
continue;
subs->numbr = array->array_base + i;
assoc_info(subs, val, ndump, aname);
}
unref(subs);
}
#ifdef ARRAYDEBUG
/* leaf_print --- print the leaf-array structure */
static void
leaf_print(NODE *array, size_t bi, int indent_level)
{
indent(indent_level);
fprintf(output_fp, "%4lu:L[%4lu:%-4lu]\n",
(unsigned long) bi,
(unsigned long) array->array_size,
(unsigned long) array->table_size);
}
#endif
static NODE *argv_shadow_array = NULL;
/* argv_store --- post assign function for ARGV in sandbox mode */
static NODE **
argv_store(NODE *symbol, NODE *subs)
{
NODE **val = cint_exists(symbol, subs);
NODE *newval = *val;
char *cp;
if (newval->stlen == 0) // empty strings in ARGV are OK
return val;
if ((cp = strchr(newval->stptr, '=')) == NULL) {
if (! in_array(argv_shadow_array, newval))
fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
(int) newval->stlen, newval->stptr);
} else {
// check if it's a valid variable assignment
bool badvar = false;
char *arg = newval->stptr;
char *cp2;
*cp = '\0'; // temporarily
if (! is_letter((unsigned char) arg[0]))
badvar = true;
else
for (cp2 = arg+1; *cp2; cp2++)
if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') {
badvar = true;
break;
}
// further checks
if (! badvar) {
char *cp = strchr(arg, ':');
if (cp && (cp[1] != ':' || strchr(cp + 2, ':') != NULL))
badvar = true;
}
*cp = '='; // restore the '='
if (badvar && ! in_array(argv_shadow_array, newval))
fatal(_("cannot add a new file (%.*s) to ARGV in sandbox mode"),
(int) newval->stlen, newval->stptr);
// otherwise, badvar is false, let it through as variable assignment
}
return val;
}
/* init_argv_array --- set up the pointers for ARGV in sandbox mode. A bit hacky. */
void
init_argv_array(NODE *argv_node, NODE *shadow_node)
{
/* If POSIX simply don't reset the vtable and things work as before */
if (! do_sandbox)
return;
argv_node->array_funcs = & argv_array_func;
argv_shadow_array = shadow_node;
}