From c2ecaf87e462fd6600ce5ee6a29cb352de042714 Mon Sep 17 00:00:00 2001 From: Philip Wittamore Date: Wed, 24 Sep 2025 22:26:00 +0200 Subject: update --- src/LYCharUtils.c | 3419 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3419 insertions(+) create mode 100644 src/LYCharUtils.c (limited to 'src/LYCharUtils.c') diff --git a/src/LYCharUtils.c b/src/LYCharUtils.c new file mode 100644 index 0000000..0013989 --- /dev/null +++ b/src/LYCharUtils.c @@ -0,0 +1,3419 @@ +/* + * $LynxId: LYCharUtils.c,v 1.137 2021/10/24 00:47:08 tom Exp $ + * + * Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM + * ========================================================================== + */ +#include +#include + +#define Lynx_HTML_Handler +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Used for nested lists. - FM + */ +int OL_CONTINUE = -29999; /* flag for whether CONTINUE is set */ +int OL_VOID = -29998; /* flag for whether a count is set */ + +static size_t count_char(const char *value, int ch) +{ + const char *found; + size_t result = 0; + + while ((*value != '\0') && (found = StrChr(value, ch)) != NULL) { + ++result; + value = (found + 1); + } + return result; +} + +/* + * This function converts any ampersands in a pre-allocated string to "&". + * If brackets is TRUE, it also converts any angle-brackets to "<" or ">". + */ +void LYEntify(char **in_out, + int brackets) +{ + char *source = *in_out; + char *target; + char *result = NULL; + size_t count_AMPs = 0; + size_t count_LTs = 0; + size_t count_GTs = 0; + +#ifdef CJK_EX + enum _state { + S_text, + S_esc, + S_dollar, + S_paren, + S_nonascii_text, + S_dollar_paren + } state = S_text; + int in_sjis = 0; +#endif + + if (non_empty(source)) { + count_AMPs = count_char(*in_out, '&'); + if (brackets) { + count_LTs = count_char(*in_out, '<'); + count_GTs = count_char(*in_out, '>'); + } + + if (count_AMPs != 0 || count_LTs != 0 || count_GTs != 0) { + + target = typecallocn(char, + (strlen(*in_out) + + (4 * count_AMPs) + + (3 * count_LTs) + + (3 * count_GTs) + 1)); + + if ((result = target) == NULL) + outofmem(__FILE__, "LYEntify"); + + for (source = *in_out; *source; source++) { +#ifdef CJK_EX + if (IS_CJK_TTY) { + switch (state) { + case S_text: + if (*source == '\033') { + state = S_esc; + *target++ = *source; + continue; + } + break; + + case S_esc: + if (*source == '$') { + state = S_dollar; + } else if (*source == '(') { + state = S_paren; + } else { + state = S_text; + } + *target++ = *source; + continue; + + case S_dollar: + if (*source == '@' || *source == 'B' || *source == 'A') { + state = S_nonascii_text; + } else if (*source == '(') { + state = S_dollar_paren; + } else { + state = S_text; + } + *target++ = *source; + continue; + + case S_dollar_paren: + if (*source == 'C') { + state = S_nonascii_text; + } else { + state = S_text; + } + *target++ = *source; + continue; + + case S_paren: + if (*source == 'B' || *source == 'J' || *source == 'T') { + state = S_text; + } else if (*source == 'I') { + state = S_nonascii_text; + } else if (*source == '\033') { + state = S_esc; + } + *target++ = *source; + continue; + + case S_nonascii_text: + if (*source == '\033') + state = S_esc; + *target++ = *source; + continue; + + default: + break; + } + if (*(source + 1) != '\0' && + (IS_EUC(UCH(*source), UCH(*(source + 1))) || + IS_SJIS(UCH(*source), UCH(*(source + 1)), in_sjis) || + IS_BIG5(UCH(*source), UCH(*(source + 1))))) { + *target++ = *source++; + *target++ = *source; + continue; + } + } +#endif + switch (*source) { + case '&': + *target++ = '&'; + *target++ = 'a'; + *target++ = 'm'; + *target++ = 'p'; + *target++ = ';'; + break; + case '<': + if (brackets) { + *target++ = '&'; + *target++ = 'l'; + *target++ = 't'; + *target++ = ';'; + break; + } + /* FALLTHRU */ + case '>': + if (brackets) { + *target++ = '&'; + *target++ = 'g'; + *target++ = 't'; + *target++ = ';'; + break; + } + /* FALLTHRU */ + default: + *target++ = *source; + break; + } + } + *target = '\0'; + FREE(*in_out); + *in_out = result; + } + } +} + +/* + * Callers to LYEntifyTitle/LYEntifyValue do not look at the 'target' param. + * Optimize things a little by avoiding the memory allocation if not needed, + * as is usually the case. + */ +static BOOL MustEntify(const char *source) +{ + BOOL result; + +#ifdef CJK_EX + if (IS_CJK_TTY && StrChr(source, '\033') != 0) { + result = TRUE; + } else +#endif + { + size_t length = strlen(source); + size_t reject = strcspn(source, "<&>"); + + result = (BOOL) (length != reject); + } + + return result; +} + +/* + * Wrappers for LYEntify() which do not assume that the source was allocated, + * e.g., output from gettext(). + */ +const char *LYEntifyTitle(char **target, const char *source) +{ + const char *result = 0; + + if (MustEntify(source)) { + StrAllocCopy(*target, source); + LYEntify(target, TRUE); + result = *target; + } else { + result = source; + } + return result; +} + +const char *LYEntifyValue(char **target, const char *source) +{ + const char *result = 0; + + if (MustEntify(source)) { + StrAllocCopy(*target, source); + LYEntify(target, FALSE); + result = *target; + } else { + result = source; + } + return result; +} + +/* + * This function trims characters <= that of a space (32), + * including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2), + * but not ESC, from the heads of strings. - FM + */ +void LYTrimHead(char *str) +{ + const char *s = str; + + if (isEmpty(s)) + return; + + while (*s && WHITE(*s) && UCH(*s) != UCH(CH_ESC)) /* S/390 -- gil -- 1669 */ + s++; + if (s > str) { + char *ns = str; + + while (*s) { + *ns++ = *s++; + } + *ns = '\0'; + } +} + +/* + * This function trims characters <= that of a space (32), + * including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and + * ESC from the tails of strings. - FM + */ +void LYTrimTail(char *str) +{ + int i; + + if (isEmpty(str)) + return; + + i = (int) strlen(str) - 1; + while (i >= 0) { + if (WHITE(str[i])) + str[i] = '\0'; + else + break; + i--; + } +} + +/* + * This function should receive a pointer to the start + * of a comment. It returns a pointer to the end ('>') + * character of comment, or it's best guess if the comment + * is invalid. - FM + */ +char *LYFindEndOfComment(char *str) +{ + char *cp, *cp1; + enum comment_state { + start1, + start2, + end1, + end2 + } state; + + if (str == NULL) + /* + * We got NULL, so return NULL. - FM + */ + return NULL; + + if (StrNCmp(str, "