Index: tokenizer.c
===================================================================
--- tokenizer.c	(revision 527)
+++ tokenizer.c	(working copy)
@@ -35,26 +35,32 @@
 #endif
 
 #ifdef TEST
-#include <string.h>
 #include <ctype.h>
 #endif
 
+#include <string.h>
+#include <stdio.h> /* printf() */
 #include "tokenizer.h"
 #include "stdlib.h"
 
 static char const *ptr, *nextptr;
 
 #define MAX_NUMLEN 6
+/* !!NOTE: binary search requires keywords array to be put in lexical order */
+#define BINARY_SEARCH 0
 
 struct keyword_token {
   const char *keyword;
   int token;
+  int len;
 };
 
 static ubasic_token current_token = TOKENIZER_ERROR;
 static int current_line = 0;
 
-static const struct keyword_token keywords[] = {
+/* TOKEN_COUNT does not include NULL terminator */
+#define TOKEN_COUNT 139
+static const struct keyword_token keywords[TOKEN_COUNT+1] = {
   {"<>",          			  TOKENIZER_NE},
   {"<=",          			  TOKENIZER_LE},
   {">=",          			  TOKENIZER_GE},
@@ -256,6 +262,9 @@
 {
   struct keyword_token const *kt;
   int i;
+#if BINARY_SEARCH
+  int low,high,mid,c;
+#endif
 
   DEBUG_PRINTF("get_next_token(): '%s'\n", ptr);
 
@@ -269,17 +278,17 @@
   if(isdigit(*ptr)) {
     for(i = 0; i < (MAX_NUMLEN+1); ++i) {
       if(!isdigit(ptr[i])) {
-	if(i > 0) {
-	  nextptr = ptr + i;
-	  return TOKENIZER_NUMBER;
-	} else {
-	  DEBUG_PRINTF("get_next_token: error due to too short number\n");
-	  return TOKENIZER_ERROR;
-	}
+        if(i > 0) {
+          nextptr = ptr + i;
+          return TOKENIZER_NUMBER;
+        } else {
+          DEBUG_PRINTF("get_next_token: error due to too short number\n");
+          return TOKENIZER_ERROR;
+        }
       }
       if(!isdigit(ptr[i])) {
-	DEBUG_PRINTF("get_next_token: error due to malformed number\n");
-	return TOKENIZER_ERROR;
+        DEBUG_PRINTF("get_next_token: error due to malformed number\n");
+        return TOKENIZER_ERROR;
       }
     }
     DEBUG_PRINTF("get_next_token: error due to too long number\n");
@@ -315,15 +324,39 @@
     ++nextptr;
     return TOKENIZER_STRING;
   } else {
+#if BINARY_SEARCH
+    low = 0;
+    high = TOKEN_COUNT - 1;
+    while(low <= high) {
+      mid = (low + high) / 2;
+      kt = &keywords[mid];
+      c = strncmp(ptr, kt->keyword, kt->len);
+      if (c<0) {
+        high = mid - 1;
+      } else if (c>0) {
+        low = mid + 1;
+      } else {
+        /* check if this was a partial match */
+        while((kt+1)->keyword) {
+          if(strncmp(ptr, (kt+1)->keyword, (kt+1)->len))
+            break;
+          ++kt;
+        }
+      }
+      nextptr = ptr + kt->len;
+      return kt->token; // found
+    }
+#else
     for(kt = keywords; kt->keyword != NULL; ++kt) {
-      if(strncmp(ptr, kt->keyword, strlen(kt->keyword)) == 0) {
-	nextptr = ptr + strlen(kt->keyword);
+      if(strncmp(ptr, kt->keyword, kt->len) == 0) {
+        nextptr = ptr + kt->len;
         if (kt->token == TOKENIZER_REM) {
            while(*nextptr != 0 && *nextptr != '\r' && *nextptr != '\n') ++nextptr;
         }
-	return kt->token;
+        return kt->token;
       }
     }
+#endif
   }
 
   if((*ptr >= 'a' && *ptr <= 'z') || (*ptr >= 'A' && *ptr <= 'Z')) {
@@ -339,6 +372,20 @@
 tokenizer_init(const char *program)
 {
   ptr = program;
+  struct keyword_token *kt;
+  int count;
+  count = 0;
+  /* calculate keyword lengths once and for all */
+  for(kt = (struct keyword_token *)keywords; kt->keyword != NULL; ++kt) { /* const cast */
+    kt->len = strlen(kt->keyword);
+    ++count;
+  }
+#if DEBUG
+  if(count!=TOKEN_COUNT) {
+    printf("TOKEN COUNT ERROR:%d, should be %d!!!\n",TOKEN_COUNT,count);
+    exit(0);
+  }
+#endif
   current_line = 1;
   current_token = get_next_token();
 }
