phunix/commands/i386/asmconv/parse_bas.c

/*	parse_bas.c - parse BCC AS assembly		Author: Kees J. Bot
 *								13 Nov 1994
 */
#define nil 0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "asmconv.h"
#include "token.h"
#include "asm86.h"
#include "languages.h"

typedef struct mnemonic {	/* BAS mnemonics translation table. */
	char		*name;
	opcode_t	opcode;
	optype_t	optype;
} mnemonic_t;

static mnemonic_t mnemtab[] = {			/* This array is sorted. */
	{ ".align",	DOT_ALIGN,	PSEUDO },
	{ ".ascii",	DOT_ASCII,	PSEUDO },
	{ ".asciz",	DOT_ASCIZ,	PSEUDO },
	{ ".assert",	DOT_ASSERT,	PSEUDO },
	{ ".base",	DOT_BASE,	PSEUDO },
	{ ".blkb",	DOT_SPACE,	PSEUDO },
	{ ".bss",	DOT_BSS,	PSEUDO },
	{ ".byte",	DOT_DATA1,	PSEUDO },
	{ ".comm",	DOT_COMM,	PSEUDO },
	{ ".data",	DOT_DATA,	PSEUDO },
	{ ".define",	DOT_DEFINE,	PSEUDO },
	{ ".end",	DOT_END,	PSEUDO },
	{ ".even",	DOT_ALIGN,	PSEUDO },
	{ ".extern",	DOT_EXTERN,	PSEUDO },
	{ ".file",	DOT_FILE,	PSEUDO },
	{ ".globl",	DOT_DEFINE,	PSEUDO },
	{ ".lcomm",	DOT_LCOMM,	PSEUDO },
	{ ".line",	DOT_LINE,	PSEUDO },
	{ ".list",	DOT_LIST,	PSEUDO },
	{ ".long",	DOT_DATA4,	PSEUDO },
	{ ".nolist",	DOT_NOLIST,	PSEUDO },
	{ ".rom",	DOT_ROM,	PSEUDO },
	{ ".space",	DOT_SPACE,	PSEUDO },
	{ ".symb",	DOT_SYMB,	PSEUDO },
	{ ".text",	DOT_TEXT,	PSEUDO },
	{ ".use16",	DOT_USE16,	PSEUDO },
	{ ".use32",	DOT_USE32,	PSEUDO },
	{ ".word",	DOT_DATA2,	PSEUDO },
	{ ".zerob",	DOT_SPACE,	PSEUDO },
	{ ".zerow",	DOT_SPACE,	PSEUDO },
	{ "aaa",	AAA,		WORD },
	{ "aad",	AAD,		WORD },
	{ "aam",	AAM,		WORD },
	{ "aas",	AAS,		WORD },
	{ "adc",	ADC,		WORD },
	{ "add",	ADD,		WORD },
	{ "and",	AND,		WORD },
	{ "arpl",	ARPL,		WORD },
	{ "bc",		JB,		JUMP },
	{ "beq",	JE,		JUMP },
	{ "bge",	JGE,		JUMP },
	{ "bgt",	JG,		JUMP },
	{ "bhi",	JA,		JUMP },
	{ "bhis",	JAE,		JUMP },
	{ "ble",	JLE,		JUMP },
	{ "blo",	JB,		JUMP },
	{ "blos",	JBE,		JUMP },
	{ "blt",	JL,		JUMP },
	{ "bnc",	JAE,		JUMP },
	{ "bne",	JNE,		JUMP },
	{ "bound",	BOUND,		WORD },
	{ "br",		JMP,		JUMP },
	{ "bsf",	BSF,		WORD },
	{ "bsr",	BSR,		WORD },
	{ "bswap",	BSWAP,		WORD },
	{ "bt",		BT,		WORD },
	{ "btc",	BTC,		WORD },
	{ "btr",	BTR,		WORD },
	{ "bts",	BTS,		WORD },
	{ "bz",		JE,		JUMP },
	{ "call",	CALL,		JUMP },
	{ "callf",	CALLF,		JUMP },
	{ "cbw",	CBW,		WORD },
	{ "cdq",	CWD,		WORD },
	{ "clc",	CLC,		WORD },
	{ "cld",	CLD,		WORD },
	{ "cli",	CLI,		WORD },
	{ "clts",	CLTS,		WORD },
	{ "cmc",	CMC,		WORD },
	{ "cmp",	CMP,		WORD },
	{ "cmps",	CMPS,		WORD },
	{ "cmpsb",	CMPS,		BYTE },
	{ "cmpxchg",	CMPXCHG,	WORD },
	{ "cwd",	CWD,		WORD },
	{ "cwde",	CBW,		WORD },
	{ "daa",	DAA,		WORD },
	{ "das",	DAS,		WORD },
	{ "dd",		DOT_DATA4,	PSEUDO },
	{ "dec",	DEC,		WORD },
	{ "div",	DIV,		WORD },
	{ "enter",	ENTER,		WORD },
	{ "export",	DOT_DEFINE,	PSEUDO },
	{ "f2xm1",	F2XM1,		WORD },
	{ "fabs",	FABS,		WORD },
	{ "fadd",	FADD,		WORD },
	{ "faddd",	FADDD,		WORD },
	{ "faddp",	FADDP,		WORD },
	{ "fadds",	FADDS,		WORD },
	{ "fbld",	FBLD,		WORD },
	{ "fbstp",	FBSTP,		WORD },
	{ "fchs",	FCHS,		WORD },
	{ "fclex",	FCLEX,		WORD },
	{ "fcomd",	FCOMD,		WORD },
	{ "fcompd",	FCOMPD,		WORD },
	{ "fcompp",	FCOMPP,		WORD },
	{ "fcomps",	FCOMPS,		WORD },
	{ "fcoms",	FCOMS,		WORD },
	{ "fcos",	FCOS,		WORD },
	{ "fdecstp",	FDECSTP,	WORD },
	{ "fdivd",	FDIVD,		WORD },
	{ "fdivp",	FDIVP,		WORD },
	{ "fdivrd",	FDIVRD,		WORD },
	{ "fdivrp",	FDIVRP,		WORD },
	{ "fdivrs",	FDIVRS,		WORD },
	{ "fdivs",	FDIVS,		WORD },
	{ "ffree",	FFREE,		WORD },
	{ "fiaddl",	FIADDL,		WORD },
	{ "fiadds",	FIADDS,		WORD },
	{ "ficom",	FICOM,		WORD },
	{ "ficomp",	FICOMP,		WORD },
	{ "fidivl",	FIDIVL,		WORD },
	{ "fidivrl",	FIDIVRL,	WORD },
	{ "fidivrs",	FIDIVRS,	WORD },
	{ "fidivs",	FIDIVS,		WORD },
	{ "fildl",	FILDL,		WORD },
	{ "fildq",	FILDQ,		WORD },
	{ "filds",	FILDS,		WORD },
	{ "fimull",	FIMULL,		WORD },
	{ "fimuls",	FIMULS,		WORD },
	{ "fincstp",	FINCSTP,	WORD },
	{ "finit",	FINIT,		WORD },
	{ "fistl",	FISTL,		WORD },
	{ "fistp",	FISTP,		WORD },
	{ "fists",	FISTS,		WORD },
	{ "fisubl",	FISUBL,		WORD },
	{ "fisubrl",	FISUBRL,	WORD },
	{ "fisubrs",	FISUBRS,	WORD },
	{ "fisubs",	FISUBS,		WORD },
	{ "fld1",	FLD1,		WORD },
	{ "fldcw",	FLDCW,		WORD },
	{ "fldd",	FLDD,		WORD },
	{ "fldenv",	FLDENV,		WORD },
	{ "fldl2e",	FLDL2E,		WORD },
	{ "fldl2t",	FLDL2T,		WORD },
	{ "fldlg2",	FLDLG2,		WORD },
	{ "fldln2",	FLDLN2,		WORD },
	{ "fldpi",	FLDPI,		WORD },
	{ "flds",	FLDS,		WORD },
	{ "fldx",	FLDX,		WORD },
	{ "fldz",	FLDZ,		WORD },
	{ "fmuld",	FMULD,		WORD },
	{ "fmulp",	FMULP,		WORD },
	{ "fmuls",	FMULS,		WORD },
	{ "fnop",	FNOP,		WORD },
	{ "fpatan",	FPATAN,		WORD },
	{ "fprem",	FPREM,		WORD },
	{ "fprem1",	FPREM1,		WORD },
	{ "fptan",	FPTAN,		WORD },
	{ "frndint",	FRNDINT,	WORD },
	{ "frstor",	FRSTOR,		WORD },
	{ "fsave",	FSAVE,		WORD },
	{ "fscale",	FSCALE,		WORD },
	{ "fsin",	FSIN,		WORD },
	{ "fsincos",	FSINCOS,	WORD },
	{ "fsqrt",	FSQRT,		WORD },
	{ "fstcw",	FSTCW,		WORD },
	{ "fstd",	FSTD,		WORD },
	{ "fstenv",	FSTENV,		WORD },
	{ "fstpd",	FSTPD,		WORD },
	{ "fstps",	FSTPS,		WORD },
	{ "fstpx",	FSTPX,		WORD },
	{ "fsts",	FSTS,		WORD },
	{ "fstsw",	FSTSW,		WORD },
	{ "fsubd",	FSUBD,		WORD },
	{ "fsubp",	FSUBP,		WORD },
	{ "fsubpr",	FSUBPR,		WORD },
	{ "fsubrd",	FSUBRD,		WORD },
	{ "fsubrs",	FSUBRS,		WORD },
	{ "fsubs",	FSUBS,		WORD },
	{ "ftst",	FTST,		WORD },
	{ "fucom",	FUCOM,		WORD },
	{ "fucomp",	FUCOMP,		WORD },
	{ "fucompp",	FUCOMPP,	WORD },
	{ "fxam",	FXAM,		WORD },
	{ "fxch",	FXCH,		WORD },
	{ "fxtract",	FXTRACT,	WORD },
	{ "fyl2x",	FYL2X,		WORD },
	{ "fyl2xp1",	FYL2XP1,	WORD },
	{ "hlt",	HLT,		WORD },
	{ "idiv",	IDIV,		WORD },
	{ "imul",	IMUL,		WORD },
	{ "in",		IN,		WORD },
	{ "inb",	IN,		BYTE },
	{ "inc",	INC,		WORD },
	{ "ins",	INS,		WORD },
	{ "insb",	INS,		BYTE },
	{ "int",	INT,		WORD },
	{ "into",	INTO,		JUMP },
	{ "invd",	INVD,		WORD },
	{ "invlpg",	INVLPG,		WORD },
	{ "iret",	IRET,		JUMP },
	{ "iretd",	IRETD,		JUMP },
	{ "j",		JMP,		JUMP },
	{ "ja",		JA,		JUMP },
	{ "jae",	JAE,		JUMP },
	{ "jb",		JB,		JUMP },
	{ "jbe",	JBE,		JUMP },
	{ "jc",		JB,		JUMP },
	{ "jcxz",	JCXZ,		JUMP },
	{ "je",		JE,		JUMP },
	{ "jecxz",	JCXZ,		JUMP },
	{ "jeq",	JE,		JUMP },
	{ "jg",		JG,		JUMP },
	{ "jge",	JGE,		JUMP },
	{ "jgt",	JG,		JUMP },
	{ "jhi",	JA,		JUMP },
	{ "jhis",	JAE,		JUMP },
	{ "jl",		JL,		JUMP },
	{ "jle",	JLE,		JUMP },
	{ "jlo",	JB,		JUMP },
	{ "jlos",	JBE,		JUMP },
	{ "jlt",	JL,		JUMP },
	{ "jmp",	JMP,		JUMP },
	{ "jmpf",	JMPF,		JUMP },
	{ "jna",	JBE,		JUMP },
	{ "jnae",	JB,		JUMP },
	{ "jnb",	JAE,		JUMP },
	{ "jnbe",	JA,		JUMP },
	{ "jnc",	JAE,		JUMP },
	{ "jne",	JNE,		JUMP },
	{ "jng",	JLE,		JUMP },
	{ "jnge",	JL,		JUMP },
	{ "jnl",	JGE,		JUMP },
	{ "jnle",	JG,		JUMP },
	{ "jno",	JNO,		JUMP },
	{ "jnp",	JNP,		JUMP },
	{ "jns",	JNS,		JUMP },
	{ "jnz",	JNE,		JUMP },
	{ "jo",		JO,		JUMP },
	{ "jp",		JP,		JUMP },
	{ "js",		JS,		JUMP },
	{ "jz",		JE,		JUMP },
	{ "lahf",	LAHF,		WORD },
	{ "lar",	LAR,		WORD },
	{ "lds",	LDS,		WORD },
	{ "lea",	LEA,		WORD },
	{ "leave",	LEAVE,		WORD },
	{ "les",	LES,		WORD },
	{ "lfs",	LFS,		WORD },
	{ "lgdt",	LGDT,		WORD },
	{ "lgs",	LGS,		WORD },
	{ "lidt",	LIDT,		WORD },
	{ "lldt",	LLDT,		WORD },
	{ "lmsw",	LMSW,		WORD },
	{ "lock",	LOCK,		WORD },
	{ "lods",	LODS,		WORD },
	{ "lodsb",	LODS,		BYTE },
	{ "loop",	LOOP,		JUMP },
	{ "loope",	LOOPE,		JUMP },
	{ "loopne",	LOOPNE,		JUMP },
	{ "loopnz",	LOOPNE,		JUMP },
	{ "loopz",	LOOPE,		JUMP },
	{ "lsl",	LSL,		WORD },
	{ "lss",	LSS,		WORD },
	{ "ltr",	LTR,		WORD },
	{ "mov",	MOV,		WORD },
	{ "movs",	MOVS,		WORD },
	{ "movsb",	MOVS,		BYTE },
	{ "movsx",	MOVSX,		WORD },
	{ "movzx",	MOVZX,		WORD },
	{ "mul",	MUL,		WORD },
	{ "neg",	NEG,		WORD },
	{ "nop",	NOP,		WORD },
	{ "not",	NOT,		WORD },
	{ "or",		OR,		WORD },
	{ "out",	OUT,		WORD },
	{ "outb",	OUT,		BYTE },
	{ "outs",	OUTS,		WORD },
	{ "outsb",	OUTS,		BYTE },
	{ "pop",	POP,		WORD },
	{ "popa",	POPA,		WORD },
	{ "popad",	POPA,		WORD },
	{ "popf",	POPF,		WORD },
	{ "popfd",	POPF,		WORD },
	{ "push",	PUSH,		WORD },
	{ "pusha",	PUSHA,		WORD },
	{ "pushad",	PUSHA,		WORD },
	{ "pushf",	PUSHF,		WORD },
	{ "pushfd",	PUSHF,		WORD },
	{ "rcl",	RCL,		WORD },
	{ "rcr",	RCR,		WORD },
	{ "ret",	RET,		JUMP },
	{ "retf",	RETF,		JUMP },
	{ "rol",	ROL,		WORD },
	{ "ror",	ROR,		WORD },
	{ "sahf",	SAHF,		WORD },
	{ "sal",	SAL,		WORD },
	{ "sar",	SAR,		WORD },
	{ "sbb",	SBB,		WORD },
	{ "scas",	SCAS,		WORD },
	{ "seta",	SETA,		BYTE },
	{ "setae",	SETAE,		BYTE },
	{ "setb",	SETB,		BYTE },
	{ "setbe",	SETBE,		BYTE },
	{ "sete",	SETE,		BYTE },
	{ "setg",	SETG,		BYTE },
	{ "setge",	SETGE,		BYTE },
	{ "setl",	SETL,		BYTE },
	{ "setna",	SETBE,		BYTE },
	{ "setnae",	SETB,		BYTE },
	{ "setnb",	SETAE,		BYTE },
	{ "setnbe",	SETA,		BYTE },
	{ "setne",	SETNE,		BYTE },
	{ "setng",	SETLE,		BYTE },
	{ "setnge",	SETL,		BYTE },
	{ "setnl",	SETGE,		BYTE },
	{ "setnle",	SETG,		BYTE },
	{ "setno",	SETNO,		BYTE },
	{ "setnp",	SETNP,		BYTE },
	{ "setns",	SETNS,		BYTE },
	{ "seto",	SETO,		BYTE },
	{ "setp",	SETP,		BYTE },
	{ "sets",	SETS,		BYTE },
	{ "setz",	SETE,		BYTE },
	{ "sgdt",	SGDT,		WORD },
	{ "shl",	SHL,		WORD },
	{ "shld",	SHLD,		WORD },
	{ "shr",	SHR,		WORD },
	{ "shrd",	SHRD,		WORD },
	{ "sidt",	SIDT,		WORD },
	{ "sldt",	SLDT,		WORD },
	{ "smsw",	SMSW,		WORD },
	{ "stc",	STC,		WORD },
	{ "std",	STD,		WORD },
	{ "sti",	STI,		WORD },
	{ "stos",	STOS,		WORD },
	{ "stosb",	STOS,		BYTE },
	{ "str",	STR,		WORD },
	{ "sub",	SUB,		WORD },
	{ "test",	TEST,		WORD },
	{ "verr",	VERR,		WORD },
	{ "verw",	VERW,		WORD },
	{ "wait",	WAIT,		WORD },
	{ "wbinvd",	WBINVD,		WORD },
	{ "xadd",	XADD,		WORD },
	{ "xchg",	XCHG,		WORD },
	{ "xlat",	XLAT,		WORD },
	{ "xor",	XOR,		WORD },
};

void bas_parse_init(char *file)
/* Prepare parsing of an BAS assembly file. */
{
	tok_init(file, '!');
}

static void zap(void)
/* An error, zap the rest of the line. */
{
	token_t *t;

	while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
		skip_token(1);
}

static mnemonic_t *search_mnem(char *name)
/* Binary search for a mnemonic.  (That's why the table is sorted.) */
{
	int low, mid, high;
	int cmp;
	mnemonic_t *m;

	low= 0;
	high= arraysize(mnemtab)-1;
	while (low <= high) {
		mid= (low + high) / 2;
		m= &mnemtab[mid];

		if ((cmp= strcmp(name, m->name)) == 0) return m;

		if (cmp < 0) high= mid-1; else low= mid+1;
	}
	return nil;
}

static expression_t *bas_get_C_expression(int *pn)
/* Read a "C-like" expression.  Note that we don't worry about precedence,
 * the expression is printed later like it is read.  If the target language
 * does not have all the operators (like ~) then this has to be repaired by
 * changing the source file.  (No problem, you still have one source file
 * to maintain, not two.)
 */
{
	expression_t *e, *a1, *a2;
	token_t *t;

	if ((t= get_token(*pn))->symbol == '(') {
		/* ( expr ): grouping. */
		(*pn)++;
		if ((a1= bas_get_C_expression(pn)) == nil) return nil;
		if (get_token(*pn)->symbol != ')') {
			parse_err(1, t, "missing )\n");
			del_expr(a1);
			return nil;
		}
		(*pn)++;
		e= new_expr();
		e->operator= '[';
		e->middle= a1;
	} else
	if (t->type == T_WORD || t->type == T_STRING) {
		/* Label, number, or string. */
		e= new_expr();
		e->operator= t->type == T_WORD ? 'W' : 'S';
		e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
		memcpy(e->name, t->name, t->len+1);
		e->len= t->len;
		(*pn)++;
	} else
	if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
		/* Unary operator. */
		(*pn)++;
		if ((a1= bas_get_C_expression(pn)) == nil) return nil;
		e= new_expr();
		e->operator= t->symbol;
		e->middle= a1;
	} else
	if (t->symbol == '$' && get_token(*pn + 1)->type == T_WORD) {
		/* A hexadecimal number. */
		t= get_token(*pn + 1);
		e= new_expr();
		e->operator= 'W';
		e->name= allocate(nil, (t->len+3) * sizeof(e->name[0]));
		strcpy(e->name, "0x");
		memcpy(e->name+2, t->name, t->len+1);
		e->len= t->len+2;
		(*pn)+= 2;
	} else {
		parse_err(1, t, "expression syntax error\n");
		return nil;
	}

	switch ((t= get_token(*pn))->symbol) {
	case '+':
	case '-':
	case '*':
	case '/':
	case '%':
	case '&':
	case '|':
	case '^':
	case S_LEFTSHIFT:
	case S_RIGHTSHIFT:
		(*pn)++;
		a1= e;
		if ((a2= bas_get_C_expression(pn)) == nil) {
			del_expr(a1);
			return nil;
		}
		e= new_expr();
		e->operator= t->symbol;
		e->left= a1;
		e->right= a2;
	}
	return e;
}

/* We want to know the sizes of the first two operands. */
static optype_t optypes[2];
static int op_idx;

static expression_t *bas_get_operand(int *pn)
/* Get something like: [memory], offset[base+index*scale], or simpler. */
{
	expression_t *e, *offset, *base, *index;
	token_t *t;
	int c;
	optype_t optype;

	/* Prefixed by 'byte', 'word' or 'dword'? */
	if ((t= get_token(*pn))->type == T_WORD && (
		strcmp(t->name, "byte") == 0
		|| strcmp(t->name, "word") == 0
		|| strcmp(t->name, "dword") == 0)
	) {
		switch (t->name[0]) {
		case 'b':	optype= BYTE; break;
		case 'w':	optype= use16() ? WORD : OWORD; break;
		case 'd':	optype= use32() ? WORD : OWORD; break;
		}
		if (op_idx < arraysize(optypes)) optypes[op_idx++]= optype;
		(*pn)++;

		/* It may even be "byte ptr"... */
		if ((t= get_token(*pn))->type == T_WORD
					&& strcmp(t->name, "ptr") == 0) {
			(*pn)++;
		}
	}

	/* Is it [memory]? */
	if (get_token(*pn)->symbol == '['
		&& ((t= get_token(*pn + 1))->type != T_WORD
			|| !isregister(t->name))
	) {
		/* A memory dereference. */
		(*pn)++;
		if ((offset= bas_get_C_expression(pn)) == nil) return nil;
		if (get_token(*pn)->symbol != ']') {
			parse_err(1, t, "operand syntax error\n");
			del_expr(offset);
			return nil;
		}
		(*pn)++;
		e= new_expr();
		e->operator= '(';
		e->middle= offset;
		return e;
	}

	/* #something? *something? */
	if ((c= get_token(*pn)->symbol) == '#' || c == '*') {
		/* '#' and '*' are often used to introduce some constant. */
		(*pn)++;
	}

	/* Offset? */
	if (get_token(*pn)->symbol != '[') {
		/* There is an offset. */
		if ((offset= bas_get_C_expression(pn)) == nil) return nil;
	} else {
		/* No offset. */
		offset= nil;
	}

	/* [base]? [base+? base-? */
	c= 0;
	if (get_token(*pn)->symbol == '['
		&& (t= get_token(*pn + 1))->type == T_WORD
		&& isregister(t->name)
		&& ((c= get_token(*pn + 2)->symbol) == ']' || c=='+' || c=='-')
	) {
		/* A base register expression. */
		base= new_expr();
		base->operator= 'B';
		base->name= copystr(t->name);
		(*pn)+= c == ']' ? 3 : 2;
	} else {
		/* No base register expression. */
		base= nil;
	}

	/* +offset]? -offset]? */
	if (offset == nil
		&& (c == '+' || c == '-')
		&& (t= get_token(*pn + 1))->type == T_WORD
		&& !isregister(t->name)
	) {
		(*pn)++;
		if ((offset= bas_get_C_expression(pn)) == nil) return nil;
		if (get_token(*pn)->symbol != ']') {
			parse_err(1, t, "operand syntax error\n");
			del_expr(offset);
			del_expr(base);
			return nil;
		}
		(*pn)++;
		c= 0;
	}

	/* [index*scale]? +index*scale]? */
	if (c == '+' || get_token(*pn)->symbol == '[') {
		/* An index most likely. */
		token_t *m= nil;

		if (!(		/* This must be true: */
			(t= get_token(*pn + 1))->type == T_WORD
			&& isregister(t->name)
			&& (get_token(*pn + 2)->symbol == ']' || (
				get_token(*pn + 2)->symbol == '*'
				&& (m= get_token(*pn + 3))->type == T_WORD
				&& strchr("1248", m->name[0]) != nil
				&& m->name[1] == 0
				&& get_token(*pn + 4)->symbol == ']'
			))
		)) {
			/* Alas it isn't */
			parse_err(1, t, "operand syntax error\n");
			del_expr(offset);
			del_expr(base);
			return nil;
		}
		/* Found an index. */
		index= new_expr();
		index->operator= m == nil ? '1' : m->name[0];
		index->name= copystr(t->name);
		(*pn)+= (m == nil ? 3 : 5);
	} else {
		/* No index. */
		index= nil;
	}

	if (base == nil && index == nil) {
		/* Return a lone offset as is. */
		e= offset;

		/* Lone registers tell operand size. */
		if (offset->operator == 'W' && isregister(offset->name)) {
			switch (isregister(offset->name)) {
			case 1:	optype= BYTE; break;
			case 2:	optype= use16() ? WORD : OWORD; break;
			case 4:	optype= use32() ? WORD : OWORD; break;
			}
			if (op_idx < arraysize(optypes))
				optypes[op_idx++]= optype;
		}
	} else {
		e= new_expr();
		e->operator= 'O';
		e->left= offset;
		e->middle= base;
		e->right= index;
	}
	return e;
}

static expression_t *bas_get_oplist(int *pn)
/* Get a comma (or colon for jmpf and callf) separated list of instruction
 * operands.
 */
{
	expression_t *e, *o1, *o2;
	token_t *t;

	if ((e= bas_get_operand(pn)) == nil) return nil;

	if ((t= get_token(*pn))->symbol == ',' || t->symbol == ':') {
		o1= e;
		(*pn)++;
		if ((o2= bas_get_oplist(pn)) == nil) {
			del_expr(o1);
			return nil;
		}
		e= new_expr();
		e->operator= ',';
		e->left= o1;
		e->right= o2;
	}
	return e;
}

static asm86_t *bas_get_statement(void)
/* Get a pseudo op or machine instruction with arguments. */
{
	token_t *t= get_token(0);
	asm86_t *a;
	mnemonic_t *m;
	int n;
	int prefix_seen;


	assert(t->type == T_WORD);

	if (strcmp(t->name, ".sect") == 0) {
		/* .sect .text etc.  Accept only four segment names. */
		skip_token(1);
		t= get_token(0);
		if (t->type != T_WORD || (
			strcmp(t->name, ".text") != 0
			&& strcmp(t->name, ".rom") != 0
			&& strcmp(t->name, ".data") != 0
			&& strcmp(t->name, ".bss") != 0
			&& strcmp(t->name, ".end") != 0
		)) {
			parse_err(1, t, "weird section name to .sect\n");
			return nil;
		}
	}
	a= new_asm86();

	/* Process instruction prefixes. */
	for (prefix_seen= 0;; prefix_seen= 1) {
		if (strcmp(t->name, "rep") == 0
			|| strcmp(t->name, "repe") == 0
			|| strcmp(t->name, "repne") == 0
			|| strcmp(t->name, "repz") == 0
			|| strcmp(t->name, "repnz") == 0
		) {
			if (a->rep != ONCE) {
				parse_err(1, t,
					"can't have more than one rep\n");
			}
			switch (t->name[3]) {
			case 0:		a->rep= REP;	break;
			case 'e':
			case 'z':	a->rep= REPE;	break;
			case 'n':	a->rep= REPNE;	break;
			}
		} else
		if (strcmp(t->name, "seg") == 0
					&& get_token(1)->type == T_WORD) {
			if (a->seg != DEFSEG) {
				parse_err(1, t,
				"can't have more than one segment prefix\n");
			}
			switch (get_token(1)->name[0]) {
			case 'c':	a->seg= CSEG;	break;
			case 'd':	a->seg= DSEG;	break;
			case 'e':	a->seg= ESEG;	break;
			case 'f':	a->seg= FSEG;	break;
			case 'g':	a->seg= GSEG;	break;
			case 's':	a->seg= SSEG;	break;
			}
			skip_token(1);
		} else
		if (!prefix_seen) {
			/* No prefix here, get out! */
			break;
		} else {
			/* No more prefixes, next must be an instruction. */
			if (t->type != T_WORD
				|| (m= search_mnem(t->name)) == nil
				|| m->optype == PSEUDO
			) {
				parse_err(1, t,
		"machine instruction expected after instruction prefix\n");
				del_asm86(a);
				return nil;
			}
			break;
		}

		/* Skip the prefix and extra newlines. */
		do {
			skip_token(1);
		} while ((t= get_token(0))->symbol == ';');
	}

	/* All the readahead being done upsets the line counter. */
	a->line= t->line;

	/* Read a machine instruction or pseudo op. */
	if ((m= search_mnem(t->name)) == nil) {
		parse_err(1, t, "unknown instruction '%s'\n", t->name);
		del_asm86(a);
		return nil;
	}
	a->opcode= m->opcode;
	a->optype= m->optype;
	if (a->opcode == CBW || a->opcode == CWD) {
		a->optype= (strcmp(t->name, "cbw") == 0
		    || strcmp(t->name, "cwd") == 0) == use16() ? WORD : OWORD;
	}
	for (op_idx= 0; op_idx < arraysize(optypes); op_idx++)
		optypes[op_idx]= m->optype;
	op_idx= 0;

	n= 1;
	if (get_token(1)->symbol != ';'
				&& (a->args= bas_get_oplist(&n)) == nil) {
		del_asm86(a);
		return nil;
	}

	if (m->optype == WORD) {
		/* Does one of the operands overide the optype? */
		for (op_idx= 0; op_idx < arraysize(optypes); op_idx++) {
			if (optypes[op_idx] != m->optype)
				a->optype= optypes[op_idx];
		}
	}

	if (get_token(n)->symbol != ';') {
		parse_err(1, t, "garbage at end of instruction\n");
		del_asm86(a);
		return nil;
	}
	switch (a->opcode) {
	case DOT_ALIGN:
		/* Restrict .align to have a single numeric argument, some
		 * assemblers think of the argument as a power of two, so
		 * we need to be able to change the value.
		 */
		if (strcmp(t->name, ".even") == 0 && a->args == nil) {
			/* .even becomes .align 2. */
			expression_t *e;
			a->args= e= new_expr();
			e->operator= 'W';
			e->name= copystr("2");
			e->len= 2;
		}
		if (a->args == nil || a->args->operator != 'W'
					|| !isanumber(a->args->name)) {
			parse_err(1, t,
			  ".align is restricted to one numeric argument\n");
			del_asm86(a);
			return nil;
		}
		break;
	case MOVSX:
	case MOVZX:
		/* Types of both operands tell the instruction type. */
		a->optype= optypes[0];
		if (optypes[1] == BYTE) {
			a->opcode= a->opcode == MOVSX ? MOVSXB : MOVZXB;
		}
		break;
	case SAL:
	case SAR:
	case SHL:
	case SHR:
	case RCL:
	case RCR:
	case ROL:
	case ROR:
		/* Only the first operand tells the operand size. */
		a->optype= optypes[0];
		break;
	default:;
	}
	skip_token(n+1);
	return a;
}

asm86_t *bas_get_instruction(void)
{
	asm86_t *a= nil;
	expression_t *e;
	token_t *t;

	while ((t= get_token(0))->symbol == ';')
		skip_token(1);

	if (t->type == T_EOF) return nil;

	if (t->symbol == '#') {
		/* Preprocessor line and file change. */

		if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
			|| get_token(2)->type != T_STRING
		) {
			parse_err(1, t, "file not preprocessed?\n");
			zap();
		} else {
			set_file(get_token(2)->name,
				strtol(get_token(1)->name, nil, 0) - 1);

			/* GNU CPP adds extra cruft, simply zap the line. */
			zap();
		}
		a= bas_get_instruction();
	} else
	if (t->type == T_WORD && get_token(1)->symbol == ':') {
		/* A label definition. */
		a= new_asm86();
		a->line= t->line;
		a->opcode= DOT_LABEL;
		a->optype= PSEUDO;
		a->args= e= new_expr();
		e->operator= ':';
		e->name= copystr(t->name);
		skip_token(2);
	} else
	if (t->type == T_WORD && get_token(1)->symbol == '=') {
		int n= 2;

		if ((e= bas_get_C_expression(&n)) == nil) {
			zap();
			a= bas_get_instruction();
		} else
		if (get_token(n)->symbol != ';') {
			parse_err(1, t, "garbage after assignment\n");
			zap();
			a= bas_get_instruction();
		} else {
			a= new_asm86();
			a->line= t->line;
			a->opcode= DOT_EQU;
			a->optype= PSEUDO;
			a->args= new_expr();
			a->args->operator= '=';
			a->args->name= copystr(t->name);
			a->args->middle= e;
			skip_token(n+1);
		}
	} else
	if (t->type == T_WORD && get_token(1)->type == T_WORD
				&& strcmp(get_token(1)->name, "lcomm") == 0) {
		/* Local common block definition. */
		int n= 2;

		if ((e= bas_get_C_expression(&n)) == nil) {
			zap();
			a= bas_get_instruction();
		} else
		if (get_token(n)->symbol != ';') {
			parse_err(1, t, "garbage after lcomm\n");
			zap();
			a= bas_get_instruction();
		} else {
			a= new_asm86();
			a->line= t->line;
			a->opcode= DOT_LCOMM;
			a->optype= PSEUDO;
			a->args= new_expr();
			a->args->operator= ',';
			a->args->right= e;
			a->args->left= e= new_expr();
			e->operator= 'W';
			e->name= copystr(t->name);
			e->len= strlen(e->name)+1;
			skip_token(n+1);
		}
	} else
	if (t->type == T_WORD) {
		if ((a= bas_get_statement()) == nil) {
			zap();
			a= bas_get_instruction();
		}
	} else {
		parse_err(1, t, "syntax error\n");
		zap();
		a= bas_get_instruction();
	}
	if (a->optype == OWORD) {
		a->optype= WORD;
		a->oaz|= OPZ;
	}
	return a;
}