Appendix: Data Structures in C

Diomidis Spinellis
Department of Management Science and Technology
Athens University of Economics and Business
Athens, Greece
dds@aueb.gr

Vectors

• Typically implemented with an array
• Processed using a for loop:
char pbuf0[ED_PAGE_SIZE];

for (i = 0; i < ED_PAGE_SIZE; i++)
pbuf0[i] = 0;
• Copied using memcpy
• Integer-typed elements initialized with memset
• Saved and restored with fread/fwrite (non-portable data representation)

Asymmetric Bounds

[...]
n_long  dr_pref;   /* preference adjusted by metric */
[...]
struct dr *drp;
[...]
for (drp = drs; drp < &drs[MAX_ADS]; drp++) {
drp->dr_recv_pref = 0;
drp->dr_life = 0;
}
In the code above drs (the lower bound) is the first element of the array; drs[MAX_ADS] (the upper bound) the first element outside the array.

When using asymmetric bounds:

• Number of elements is equal to the difference between upper and lower bound.
• When upper bound equals lower bound the range is empty.
• The lower bound is the first occupied element.
• The upper bound is the first free element.

Matrices

• Multi-dimensional structures of elements of the same type
• Fixed-sized implemented as multi-dimensional arrays
typedef double Transform3D[4][4];

IdentMat(Transform3D m)
{
register int i;
register int j;

for (i = 3; i >= 0; --i)
{
for (j = 3; j >= 0; --j)
m[i][j] = 0.0;
m[i][i] = 1.0;
}
}
• Variable-sized matrices are handled through pointers to elements, but accessed using the array syntax:
double **mat = (double **)xmalloc(rows * sizeof(double *));
for (i = 0; i < rows; i++)
mat[i] = (double *)xmalloc(cols * sizeof(double));
• N dimensions can also be mapped into a flat structure using multiplication.

Tables

• Rows of different-typed fields
• Often implemented as arrays of structures
static struct tok icmp2str[] = {
{ ICMP_SOURCEQUENCH,    "source quench" },
{ ICMP_ECHO,            "echo request" },
[...]
{ 0,                NULL }
};
• Can be accessed using a pointer (type-safe)
static struct user {
uid_t uid;
char *name;
long count;
} *users;

/* [...] */

struct user *usr, *usrs;

for (usr = usrs, n = nusers; --n >= 0 && usr->count; usr++) {
printf("%5ld", (long)SIZE(usr->space));
if (count)
printf("\t%5ld", (long)usr->count);
printf("\t%-8s", usr->name);
if (unused)
printf("\t%5ld\t%5ld\t%5ld",
SIZE(usr->spc30), SIZE(usr->spc60),
SIZE(usr->spc90));
printf("\n");
}
free(usrs);
}
• Accessing with an integer index is not type safe (why?)

Stacks

• Often implemented as an ADT
#define STACKMAX 32
static int opstack[STACKMAX];
static int opsp;

PushOp(op)
int op;
{
if (opsp==STACKMAX) {strcpy(dispstr,"stack error");  entered=3;}
else opstack[opsp++]=op;
}

int PopOp()
{
if (opsp==0) {
strcpy(dispstr,"stack error");
entered=3;
return(kNOP);
} else
return(opstack[--opsp]);
}

int isopempty()
{
return( opsp ? 0 : 1 );
}
• Also implemented in-line:
de_stack[--stack_top] = '0' + c % 8;
de_stack[--stack_top] = '0' + (c / 8) % 8;
de_stack[--stack_top] = '0' + c / 64;
Problem with overflow-underflow checking.

Queues

• Found when different systems are connected together (real world connection?)
• Window system - user applications
• OS network packets
• Mail messages
• Network interface cards
• Disk drives
• Serial communication
• Printers
• Efficiently implemented with a ring buffer and two indices
volatile int        rnd_tail;
[...]
volatile rnd_event_t    rnd_events[RND_EVENTQSIZE];
• Code must ensure pointers wrap around. Two approaches
• Conditional:
#define hist_bump(h)    ((++(h) == NUM_HISTORY) ? ((h) = 0) : 0)

for (i = history_tail; i != history_head; hist_bump(i))
DrawPoints (old_pixmaps[i], bit_0_gc, xp, n);
• Modulo arithmetic:

Maps

• Trivial map, integer-indexed:
static const int        mon_lengths[2][MONSPERYEAR] = {
{ 312831303130313130313031 },
{ 312931303130313130313031 }
};
• More sophisticated: key/value pairs
• The following map is used to dispatch functions based on the name of the command:
#define F_NEEDARG       0x01                    /* needs an argument */
#define F_OFFOK         0x02                    /* can turn off */
static struct key {
char *name;                             /* name */
void (*f) __P((struct info *));         /* function */
int flags;
} keys[] = {
{ "all",        f_all,          0 },
{ "cbreak",     f_cbreak,       F_OFFOK },
{ "cols",       f_columns,      F_NEEDARG },
{ "columns",    f_columns,      F_NEEDARG },
{ "cooked",     f_sane,         0 },
{ "dec",        f_dec,          0 },
{ "everything", f_everything,   0 },
{ "extproc",    f_extproc,      F_OFFOK },
/* [...] */
{ "size",       f_size,         0 },
{ "speed",      f_speed,        0 },
{ "tty",        f_tty,          0 },
};

static int c_key __P((const void *, const void *));

static int
c_key(a, b)
const void *a, *b;
{

return (strcmp(((struct key *)a)->name, ((struct key *)b)->name));
}

int
ksearch(argvp, ip)
char ***argvp;
struct info *ip;
{
char *name;
struct key *kp, tmp;

name = **argvp;
if (*name == '-') {
ip->off = 1;
++name;
} else
ip->off = 0;

tmp.name = name;
if (!(kp = (struct key *)bsearch(&tmp, keys,
sizeof(keys)/sizeof(struct key), sizeof(struct key), c_key)))
return (0);
if (!(kp->flags & F_OFFOK) && ip->off) {
warnx("illegal option -- %s", name);
usage();
}
if (kp->flags & F_NEEDARG && !(ip->arg = *++*argvp)) {
warnx("option requires an argument -- %s", name);
usage();
}
kp->f(ip);
return (1);
}

void
f_all(ip)
struct info *ip;
{
print(&ip->t, &ip->win, ip->ldisc, BSD);
}

Hash Tables

• Construct integer array index out of the (non-integer) key
static
Hash(char *string, int len)
{
int h;

h = 0;
while (len--)
h = (h << 3) ^ *string++;
if (h < 0)
return -h;
return h;
}
• Hash function, converts key into an integer
• A modulo function limits the integer into the size of a table
• A scheme for handling collisions must be provided (unless this is a perfect hash function)
Atom
MakeAtom(string, len, makeit)
char *string;
unsigned len;
int makeit;
{
AtomListPtr a;
int         hash;
int         h;
int         r;

hash = Hash (string, len);
if (hashTable)
{
if (hashTable[h])
{
if (hashTable[h]->hash == hash && hashTable[h]->len == len &&
NameEqual (hashTable[h]->name, string, len))
{
return hashTable[h]->atom;
}
r = (hash % rehash) | 1;
for (;;)
{
h += r;
if (h >= hashSize)
h -= hashSize;
if (!hashTable[h])
break;
if (hashTable[h]->hash == hash && hashTable[h]->len == len &&
NameEqual (hashTable[h]->name, string, len))
{
return hashTable[h]->atom;
}
}
}
}

Sets

• Small sets are sometimes represented as collection of bits
• Elements are added using binary-or
pbitvec[j/BITS_PER_LONG] |= ((unsigned long)1 << (j % BITS_PER_LONG));
• Element presence is implemented using binary-and
#define FD_ISSET(n, p) \
((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS)))
• Element removal is implemented using binary-and of the complement
#define FD_CLR(n, p) \
((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS)))
• Set intersection is implemented using binary and
#define XFD_ANDSET(dst,b1,b2) \
(dst)->fds_bits[0] = ((b1)->fds_bits[0] & (b2)->fds_bits[0]); \
• Set union is implemented using binary or
• Larger sets are sometimes represented as an array of values
resourceQuarks[q >> 3] |= 1 << (q & 0x7);

• Properties:
• Efficient addition and removal from any position
• Linear traversal to access arbitrary elements
• Efficient dynamic expansion
• List representation:
struct host_list {
struct host_list *next;
} *hosts;
• Traversal:
int
{
struct host_list *hp;

if (!hosts)
return(0);

for (hp = hosts; hp != NULL; hp = hp->next) {
return(1);
}
return(0);
}
void
{
struct host_list *hp;

if (!(hp = (struct host_list *)malloc(sizeof(struct host_list)))) {
err(1"malloc");
/* NOTREACHED */
}
hp->next = hosts;
hosts = hp;
}

• Uses two pointers:
struct queue {
struct queue *q_next, *q_prev;
};
• Adding and removing elements can be done given only a single element pointer
struct queue *next;

elem->q_next = next;
next->q_prev = elem;

• Last element points to the head
• Can not be traversed using a simple for loop
• Typically the code looks for an element to occur again
void
prlex(FILE *fp, struct wordent *sp0)
{
struct wordent *sp = sp0->next;

for (;;) {
(void) fprintf(fp, "%s", vis_str(sp->word));
sp = sp->next;
if (sp == sp0)
break;
if (sp->word[0] != '\n')
(void) fputc(' ', fp);
}
}

Trees

• Single path from each node to the trees root
• Applications
• Data organization
• Searching
• Sorting
• Language processing
• Graphics
• Compression
• Typically implemented using pointers to nodes:
typedef struct tree_s {
tree_t      data;
struct tree_s   *left, *right;
short       bal;
}
tree;
• Processing algorithms are also recursive:
tree_t
tree_srch(tree **ppr_tree, int (*pfi_compare)(), tree_t p_user)
{
register int    i_comp;

if (*ppr_tree) {
i_comp = (*pfi_compare)(p_user, (**ppr_tree).data);
if (i_comp > 0)
return (tree_srch(&(**ppr_tree).right, pfi_compare, p_user))
if (i_comp < 0)
return (tree_srch(&(**ppr_tree).left, pfi_compare, p_user))
/* not higher, not lower... this must be the one.
*/
return ((**ppr_tree).data)
}
*/
return (NULL)
}