An efficient implementation of tables in C

by Fred von Mame


Oftentimes I need to use tabular data in my program. Runtime initializations are expensive, and writing tables that are kept in-sync with each other often hard.

Here’s a simple example. Consider the the following table representing US coin names and values:

typedef struct {
   char *Name;
   unsigned short Count;
} Coin;

typedef enum {
   COIN_PENNY,
   COIN_NICKEL,
   COIN_DIME,
   COIN_QUARTER,
   COIN_HALF,
} Coins;

static Coin CoinInfo[] = {
   { "penny", 1 },
   { "nickel", 5 },
   { "dime", 10 },
   { "quarter", 25 },
   { "half-dollar", 50 },
};

Now accessing information about each coin is very easy:

Coin *c = &CoinInfo[COIN_DIME];
printf("You owe me %d cents (or a %s)\n", c->Count, c->Name);

The problem with that approach is that the it can be tricky to keep both the enum (used as in indexer for the info array) and the array itself in sync, especially as they get bigger.

Here’s a trick to help with that. We start by defining a set of macros to help us with it:

#define _E(e, ...) e
#define _V(e, ...) { __VA_ARGS__ }

#define ENUM(name, vm) \
   typedef enum {\
      vm(_E)\
   } name
#define TABLE(type, name, vm) \
   static type name[] = {\
      vm(_V)\
   }

Next, we describe the table data:

typedef struct {
   char *Name;
   unsigned int BusinessDay : 1;
} Weekday;

Then we define the data accessor (enum identifier) and the data itself. Notice how they are both defined alongside each other, so they are always in sync:

#define WEEK(_m) \
   _m(WEEKDAY_SUNDAY,      "Sunday",       0), \
   _m(WEEKDAY_MONDAY,      "Monday",       1), \
   _m(WEEKDAY_TUESDAY,     "Tuesday",      1), \
   _m(WEEKDAY_WEDNESDAY,   "Wednesday",    1), \
   _m(WEEKDAY_THURSDAY,    "Thursday",     1), \
   _m(WEEKDAY_FRIDAY,      "Friday",       1), \
   _m(WEEKDAY_SATURDAY,    "Saturday",     0), \

Notice how the above statement is merely a definition. It’s a preprocessor macro that will be compiled into nothingness in the generated code, unless it’s referenced from somewhere, and that’s what we’ll do next. We’ll declare both the enum and the array:

ENUM(Weekdays, WEEK);
TABLE(Weekday, WeekdayInfo, WEEK);

And that’s it. Putting it all together, we have:

#define _E(e, ...) e
#define _V(e, ...) { __VA_ARGS__ }

#define ENUM(name, vm) \
   typedef enum {\
      vm(_E)\
   } name
#define TABLE(type, name, vm) \
   static type name[] = {\
     vm(_V)\
   }
typedef struct {
   char *Name;
   unsigned int BusinessDay : 1;
} Weekday;

#define WEEK(_m) \
   _m(WEEKDAY_SUNDAY,      "Sunday",       0), \
   _m(WEEKDAY_MONDAY,      "Monday",       1), \
   _m(WEEKDAY_TUESDAY,     "Tuesday",      1), \
   _m(WEEKDAY_WEDNESDAY,   "Wednesday",    1), \
   _m(WEEKDAY_THURSDAY,    "Thursday",     1), \
   _m(WEEKDAY_FRIDAY,      "Friday",       1), \
   _m(WEEKDAY_SATURDAY,    "Saturday",     0), \

ENUM(Weekdays, WEEK);
TABLE(Weekday, WeekdayInfo, WEEK);

When it runs, the preprocessor will transform that into:

typedef struct {
   char *Name;
   unsigned int BusinessDay : 1;
} Weekday;
typedef enum {
   WEEKDAY_SUNDAY,
   WEEKDAY_MONDAY,
   WEEKDAY_TUESDAY,
   WEEKDAY_WEDNESDAY,
   WEEKDAY_THURSDAY,
   WEEKDAY_FRIDAY,
   WEEKDAY_SATURDAY,
} Weekdays;
static Weekday WeekdayInfo[] = {
   { "Sunday", 0 },
   { "Monday", 1 },
   { "Tuesday", 1 },
   { "Wednesday", 1 },
   { "Thursday", 1 },
   { "Friday", 1 },
   { "Saturday", 0 },
};

And now, from the compiler point-of-view (post-preprocessor), it is as if the enum and the table were declared separately!

Now if we inspect the generated assembly for the code above, it is comforting to see that the table is indeed staticaly initialized into the read-only data segment:

.file "test.c"
   .section .rdata,"dr"
LC0:
   .ascii "Sunday\0"
LC1:
   .ascii "Monday\0"
LC2:
   .ascii "Tuesday\0"
LC3:
   .ascii "Wednesday\0"
LC4:
   .ascii "Thursday\0"
LC5:
   .ascii "Friday\0"
LC6:
   .ascii "Saturday\0"
   .data
   .align 32
_WeekdayInfo:
   .long LC0
   .byte 0
   .space 3
   .long LC1
   .byte 1
   .space 3
   .long LC2
   .byte 1
   .space 3
   .long LC3
   .byte 1
   .space 3
   .long LC4
   .byte 1
   .space 3
   .long LC5
   .byte 1
   .space 3
   .long LC6
   .byte 0
   .space 3

And this is why other languages are still to match C in simplicity, elegance and power.