zxdump/main.c

468 lines
11 KiB
C
Raw Normal View History

2023-09-10 22:31:41 -04:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <inttypes.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
2023-09-11 19:10:48 -04:00
#include <endian.h>
2023-09-10 22:31:41 -04:00
#include <errno.h>
2023-09-13 00:09:23 -04:00
#define ZX_CHARSET_LEN 64
2023-09-11 18:32:47 -04:00
#define ZX_CHAR_LOW(c) \
2023-09-13 00:09:23 -04:00
(c <= ZX_CHARSET_LEN)
2023-09-11 18:32:47 -04:00
#define ZX_CHAR_INVERSE_START 0x80
#define ZX_CHAR_INVERSE_END 0xbf
#define ZX_CHAR_INVERSE(c) \
(c >= ZX_CHAR_INVERSE_START && c <= ZX_CHAR_INVERSE_END)
#define ZX_CHAR_TOKEN_LOW_START 0x40
#define ZX_CHAR_TOKEN_LOW_END 0x42
#define ZX_CHAR_TOKEN_LOW(c) \
(c >= ZX_CHAR_TOKEN_LOW_START && c <= ZX_CHAR_TOKEN_LOW_END)
2023-09-11 19:10:48 -04:00
#define ZX_CHAR_NEWLINE(c) \
(c == 0x76)
2023-09-11 18:32:47 -04:00
#define ZX_CHAR_TOKEN_HIGH_START 0xc0
#define ZX_CHAR_TOKEN_HIGH_END 0xff
#define ZX_CHAR_TOKEN_HIGH(c) \
(c >= 0xc0)
2023-09-11 20:47:33 -04:00
#define ZX_CHAR_TOKEN(c) \
(ZX_CHAR_TOKEN_LOW(c) || ZX_CHAR_TOKEN_HIGH(c))
2023-09-11 19:29:03 -04:00
#define ZX_CHAR_TOKEN_INTEGRAL(c) \
(c == 0x0e)
#define ZX_CHAR_TOKEN_FLOAT(c) \
(c == 0x7e)
2023-09-11 18:32:47 -04:00
typedef struct _zx_basic_line {
uint16_t num, len;
} zx_basic_line;
2023-09-13 00:43:08 -04:00
enum zx_basic_token_type {
ZX_BASIC_TOKEN_UNKNOWN,
ZX_BASIC_TOKEN_ALNUM,
ZX_BASIC_TOKEN_QUOTE,
ZX_BASIC_TOKEN_SYMBOL,
ZX_BASIC_TOKEN_WORD,
};
2023-09-12 11:47:15 -04:00
#define ZX_BASIC_STATE_SIZE 116
#define ZX_BASIC_LINE_LAST 0x7676
2023-09-13 00:09:23 -04:00
static uint32_t zx_charset[ZX_CHARSET_LEN] = {
2023-09-10 22:31:41 -04:00
0x0020, 0x2598, 0x259d, 0x2580, 0x2596, 0x258c, 0x259e, 0x259b,
0x2592, '.', '.', '"', 0x00a3, '$', ':', '?',
'(', ')', '>', '<', '=', '+', '-', '*',
'/', ';', ',', '.', '0', '1', '2', '3',
'4', '5', '6', '7', '8', '9', 'A', 'B',
'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
};
2023-09-11 18:32:47 -04:00
static char *zx_tokens_low[3] = {
"RND", "INKEY$", "PI",
};
2023-09-10 22:31:41 -04:00
2023-09-11 18:32:47 -04:00
static char *zx_tokens[64] = {
2023-09-12 12:01:20 -04:00
"\"\"", "AT", "TAB", NULL, "CODE", "VAL", "LEN", "SIN",
"COS", "TAN", "ASN", "ACS", "ATN", "LN", "EXP", "INT",
"SQR", "SGN", "ABS", "PEEK", "USR", "STR$", "CHR$", "NOT",
"**", "OR", "AND", "<=", ">=", "<>", "THEN", "TO",
"STEP", "LPRINT", "LLIST", "STOP", "SLOW", "FAST", "NEW", "SCROLL",
"CONT", "DIM", "REM", "FOR", "GOTO", "GOSUB", "INPUT", "LOAD",
"LIST", "LET", "PAUSE", "NEXT", "POKE", "PRINT", "PLOT", "RUN",
"SAVE", "RAND", "IF", "CLS", "UNPLOT", "CLEAR", "RETURN", "COPY",
2023-09-11 18:32:47 -04:00
};
2023-09-10 22:31:41 -04:00
static inline size_t utf8_encode(uint8_t *buf, uint32_t codepoint) {
2023-09-10 22:59:40 -04:00
if ((codepoint & 0x007f) == codepoint) {
buf[0] = codepoint & 0x007f;
2023-09-10 22:59:40 -04:00
return 1;
} else if ((codepoint & 0x07ff) == codepoint) {
buf[0] = 0xc0 | ((codepoint & 0x07c0) >> 6);
buf[1] = 0x80 | (codepoint & 0x003f);
2023-09-10 22:59:40 -04:00
return 2;
} else if ((codepoint & 0xffff) == codepoint) {
buf[0] = 0xe0 | ((codepoint & 0xf000) >> 12);
buf[1] = 0x80 | ((codepoint & 0x0fc0) >> 6);
buf[2] = 0x80 | (codepoint & 0x003f);
2023-09-10 22:59:40 -04:00
return 3;
} else {
buf[0] = 0xf0 | ((codepoint & 0x1c0000) >> 18);
buf[1] = 0x80 | ((codepoint & 0x03f000) >> 12);
buf[2] = 0x80 | ((codepoint & 0x000fc0) >> 6);
buf[3] = 0x80 | (codepoint & 0x00003f);
return 4;
2023-09-10 22:59:40 -04:00
}
}
2023-09-11 18:32:47 -04:00
static inline int zx_putchar(uint8_t c, int inverse) {
2023-09-10 23:34:52 -04:00
uint8_t sequence[4];
size_t len = utf8_encode(sequence, zx_charset[c]);
2023-09-11 18:32:47 -04:00
if (inverse) {
if (fputs("\033[7m", stdout) < 0) {
goto error_io;
}
}
2023-09-10 23:34:52 -04:00
if (fwrite(sequence, len, 1, stdout) < 1) {
goto error_io;
}
2023-09-11 18:32:47 -04:00
if (inverse) {
if (fputs("\033[27m", stdout) < 0) {
goto error_io;
}
}
2023-09-10 23:34:52 -04:00
return 0;
error_io:
return -1;
}
2023-09-13 00:09:23 -04:00
#define ZX_HEXDUMP_STRIDE_LINE 16
#define ZX_HEXDUMP_STRIDE_GROUP 2
2023-09-11 18:32:47 -04:00
static ssize_t hexdump_line(off_t offset, void *buf, size_t len, int tty) {
2023-09-10 22:31:41 -04:00
size_t i;
if (printf("%08zx: ", offset) < 0) {
goto error_io;
}
2023-09-11 02:08:32 -04:00
for (i=0; i<len; i++) {
2023-09-13 00:09:23 -04:00
if (i > 0 && (i % ZX_HEXDUMP_STRIDE_GROUP) == 0) {
2023-09-10 22:31:41 -04:00
if (putchar(' ') < 0) {
goto error_io;
}
}
2023-09-11 02:08:32 -04:00
if (printf("%02x", ((uint8_t *)buf)[offset+i]) < 0) {
goto error_io;
2023-09-10 22:31:41 -04:00
}
}
2023-09-11 18:32:47 -04:00
if (fputs(" ", stdout) < 0) {
2023-09-10 22:31:41 -04:00
goto error_io;
}
2023-09-11 02:08:32 -04:00
for (i=0; i<len; i++) {
2023-09-10 22:31:41 -04:00
uint8_t c = ((uint8_t *)buf)[offset+i];
2023-09-11 02:08:32 -04:00
if (c <= 0x3f) {
2023-09-11 18:32:47 -04:00
if (zx_putchar(c, 0) < 0) {
2023-09-10 22:59:40 -04:00
goto error_io;
2023-09-10 22:31:41 -04:00
}
} else if (c >= 0xa0 && c <= 0xbf) {
2023-09-11 18:32:47 -04:00
if (zx_putchar(c - 0xa0, tty) < 0) {
goto error_io;
}
2023-09-10 22:31:41 -04:00
} else {
if (putchar('.') < 0) {
goto error_io;
2023-09-10 22:31:41 -04:00
}
}
}
if (putchar('\n') < 0) {
goto error_io;
}
return fflush(stdout);
error_io:
return -1;
}
2023-09-11 19:10:48 -04:00
static ssize_t zx_dump_hex(int fd) {
2023-09-10 22:31:41 -04:00
void *buf;
2023-09-11 18:32:47 -04:00
ssize_t total = 0;
2023-09-11 13:50:24 -04:00
int tty = isatty(fileno(stdout));
2023-09-10 22:31:41 -04:00
struct stat st;
if (fstat(fd, &st) < 0) {
goto error_fstat;
}
if ((buf = malloc(st.st_blksize)) == NULL) {
goto error_malloc;
}
while (1) {
ssize_t len, i;
2023-09-11 02:08:32 -04:00
off_t offset = 0;
2023-09-10 22:31:41 -04:00
if ((len = read(fd, buf, st.st_blksize)) < 0) {
goto error_read;
} else if (len == 0) {
break;
}
2023-09-13 00:09:23 -04:00
for (i=0; i<len; i+=ZX_HEXDUMP_STRIDE_LINE) {
2023-09-10 22:31:41 -04:00
size_t left = len - i,
2023-09-13 00:09:23 -04:00
linesz = left < ZX_HEXDUMP_STRIDE_LINE? left: ZX_HEXDUMP_STRIDE_LINE;
2023-09-10 22:31:41 -04:00
2023-09-11 18:32:47 -04:00
if (hexdump_line(offset, buf, linesz, tty) < 0) {
goto error_hexdump_line;
2023-09-10 22:31:41 -04:00
}
offset += linesz;
2023-09-11 02:08:32 -04:00
total += linesz;
2023-09-10 22:31:41 -04:00
}
}
free(buf);
2023-09-11 02:08:32 -04:00
return total;
2023-09-10 22:31:41 -04:00
2023-09-11 18:32:47 -04:00
error_hexdump_line:
error_read:
free(buf);
error_malloc:
error_fstat:
return -1;
}
static inline int zx_print(uint8_t c, int tty) {
if (ZX_CHAR_LOW(c)) {
return zx_putchar(c, 0);
} else if (ZX_CHAR_TOKEN_LOW(c)) {
return fputs(zx_tokens_low[c - ZX_CHAR_TOKEN_LOW_START], stdout);
2023-09-11 19:10:48 -04:00
} else if (ZX_CHAR_NEWLINE(c)) {
return putchar('\n');
2023-09-11 18:32:47 -04:00
} else if (ZX_CHAR_INVERSE(c)) {
2023-09-11 19:10:48 -04:00
return zx_putchar(c - ZX_CHAR_INVERSE_START, tty);
2023-09-11 18:32:47 -04:00
} else if (ZX_CHAR_TOKEN_HIGH(c)) {
return fputs(zx_tokens[c - ZX_CHAR_TOKEN_HIGH_START], stdout);
}
2023-09-11 19:10:48 -04:00
return 0;
2023-09-11 18:32:47 -04:00
}
2023-09-13 00:43:08 -04:00
static inline enum zx_basic_token_type zx_basic_token_type_get(uint8_t b) {
if (ZX_CHAR_LOW(b)) {
uint32_t codepoint = zx_charset[b];
if ((codepoint >= 'A' && codepoint <= 'Z')
|| (codepoint >= '0' && codepoint <= '9')) {
return ZX_BASIC_TOKEN_ALNUM;
} else {
return ZX_BASIC_TOKEN_SYMBOL;
}
} else if (ZX_CHAR_INVERSE(b)) {
return zx_basic_token_type_get(b - ZX_CHAR_INVERSE_START);
} else if (ZX_CHAR_TOKEN_LOW(b)) {
return ZX_BASIC_TOKEN_WORD;
} else if (ZX_CHAR_TOKEN_HIGH(b)) {
char *token = zx_tokens[b-ZX_CHAR_TOKEN_HIGH_START];
if (token[0] >= 'A' && token[0] <= 'Z') {
return ZX_BASIC_TOKEN_WORD;
} else {
return ZX_BASIC_TOKEN_SYMBOL;
}
}
return ZX_BASIC_TOKEN_UNKNOWN;
}
2023-09-11 19:10:48 -04:00
static ssize_t zx_dump_basic(int fd) {
2023-09-11 18:32:47 -04:00
void *buf;
ssize_t total = 0;
2023-09-11 19:10:48 -04:00
int tty = isatty(fileno(stdout));
struct stat st;
2023-09-11 18:32:47 -04:00
if (fstat(fd, &st) < 0) {
goto error_fstat;
}
if ((buf = malloc(st.st_blksize)) == NULL) {
goto error_malloc;
}
2023-09-11 21:24:02 -04:00
if (lseek(fd, ZX_BASIC_STATE_SIZE, SEEK_CUR) < 0) {
2023-09-11 19:10:48 -04:00
goto error_io;
}
2023-09-11 18:32:47 -04:00
while (1) {
2023-09-11 19:10:48 -04:00
ssize_t readlen, len, i;
zx_basic_line line;
2023-09-11 20:47:33 -04:00
uint8_t last = 0xc0;
2023-09-11 18:32:47 -04:00
2023-09-11 19:10:48 -04:00
if ((readlen = read(fd, &line, sizeof(line))) < 0) {
goto error_io;
} else if (readlen == 0) {
2023-09-11 18:32:47 -04:00
break;
}
2023-09-11 20:47:33 -04:00
len = le16toh(line.len);
2023-09-12 11:47:15 -04:00
if (be16toh(line.num) == len && len == ZX_BASIC_LINE_LAST) {
2023-09-11 20:47:33 -04:00
break;
}
2023-09-11 19:10:48 -04:00
if (printf("%d", (int)be16toh(line.num)) < 0) {
goto error_io;
}
if (read(fd, buf, len) < 0) {
goto error_io;
}
2023-09-11 18:32:47 -04:00
for (i=0; i<len; i++) {
2023-09-11 19:10:48 -04:00
uint8_t c = ((uint8_t *)buf)[i];
2023-09-13 00:43:08 -04:00
enum zx_basic_token_type type = zx_basic_token_type_get(c),
type_last = zx_basic_token_type_get(last);
int space = 0;
if (type == ZX_BASIC_TOKEN_ALNUM) {
if (type_last == ZX_BASIC_TOKEN_WORD) {
space = 1;
2023-09-11 19:10:48 -04:00
}
2023-09-13 00:43:08 -04:00
} else if (type == ZX_BASIC_TOKEN_SYMBOL) {
space = 0;
} else if (type == ZX_BASIC_TOKEN_WORD) {
space = 1;
2023-09-11 20:47:33 -04:00
} else if (ZX_CHAR_TOKEN_INTEGRAL(c) || ZX_CHAR_TOKEN_FLOAT(c)) {
2023-09-11 19:29:03 -04:00
i += 5;
}
2023-09-13 00:43:08 -04:00
if (space && putchar(' ') < 0) {
goto error_io;
}
2023-09-11 19:10:48 -04:00
if (zx_print(c, tty) < 0) {
goto error_io;
}
last = c;
}
2023-09-11 18:32:47 -04:00
2023-09-11 19:10:48 -04:00
if (!ZX_CHAR_NEWLINE(((uint8_t *)buf)[i-1])) {
if (putchar('\n') < 0) {
goto error_io;
}
2023-09-11 18:32:47 -04:00
}
}
free(buf);
return total;
2023-09-11 19:10:48 -04:00
error_io:
2023-09-10 22:31:41 -04:00
free(buf);
error_malloc:
error_fstat:
return -1;
}
2023-09-11 18:32:47 -04:00
static void usage(const char *prog, char *message, ...) {
if (message != NULL) {
va_list args;
va_start(args, message);
vfprintf(stderr, message, args);
fputc('\n', stderr);
va_end(args);
}
fprintf(stderr, "usage: %s [-b] [file]\n", prog);
exit(1);
}
2023-09-13 00:09:23 -04:00
#define ZX_DUMP_FLAGS_NONE 0
#define ZX_DUMP_FLAGS_BASIC (1 << 0)
2023-09-10 22:31:41 -04:00
int main(int argc, char **argv) {
2023-09-11 18:32:47 -04:00
int fd,
2023-09-11 19:10:48 -04:00
flags = ZX_DUMP_FLAGS_NONE,
2023-09-11 18:32:47 -04:00
argn;
while (1) {
int c;
if ((c = getopt(argc, argv, "b")) < 0) {
break;
}
2023-09-10 22:31:41 -04:00
2023-09-11 18:32:47 -04:00
switch (c) {
case 'b':
2023-09-11 19:10:48 -04:00
flags |= ZX_DUMP_FLAGS_BASIC;
2023-09-11 18:32:47 -04:00
break;
case '?':
default:
usage(argv[0], NULL);
}
}
argn = argc - optind;
if (argn == 0) {
2023-09-10 22:31:41 -04:00
fd = fileno(stdin);
2023-09-11 18:32:47 -04:00
} else if (argn == 1) {
if ((fd = open(argv[optind], O_RDONLY)) < 0) {
fprintf(stderr, "%s: %s: %s\n", argv[0], argv[optind], strerror(errno));
2023-09-10 22:31:41 -04:00
goto error_open;
}
} else {
2023-09-11 18:32:47 -04:00
usage(argv[0], "Too many arguments provided");
2023-09-10 22:31:41 -04:00
}
2023-09-11 19:10:48 -04:00
if (flags & ZX_DUMP_FLAGS_BASIC) {
if (zx_dump_basic(fd) < 0) {
fprintf(stderr, "%s: %s: %s\n",
argv[0],
(argn == 1)? argv[optind]: "(stdin)",
strerror(errno));
2023-09-10 22:31:41 -04:00
2023-09-11 19:10:48 -04:00
goto error_dump;
}
} else {
if (zx_dump_hex(fd) < 0) {
fprintf(stderr, "%s: %s: %s\n",
argv[0],
(argn == 1)? argv[optind]: "(stdin)",
strerror(errno));
goto error_dump;
}
2023-09-10 22:31:41 -04:00
}
2023-09-11 18:32:47 -04:00
if (argn == 1) {
2023-09-10 22:31:41 -04:00
close(fd);
}
return 0;
2023-09-11 19:10:48 -04:00
error_dump:
2023-09-11 18:32:47 -04:00
if (argn == 1) {
2023-09-10 22:31:41 -04:00
close(fd);
}
error_open:
return 1;
}