Merge "Upgrade to mksh R56b."

am: a1899ee519 Change-Id: Iec6d19cc20406d06df71398b6f6479513dfe3f4b
2017-09-25 19:42:34 +00:00 · 2017-09-25 19:42:34 +00:00 · e1c2b662fd
commit e1c2b662fd
parent 0a685140c5 a1899ee519
24 changed files with 2668 additions and 1384 deletions
--- a/Android.mk
+++ b/Android.mk
@ -32,48 +32,89 @@ MKSH_SRC_FILES := \

 MKSH_INCLUDES := $(LOCAL_PATH)/src

+# Compiler flags...
 MKSH_CFLAGS += \
    -Wno-deprecated-declarations \
    -fno-asynchronous-unwind-tables \
    -fno-strict-aliasing \
    -fstack-protector -fwrapv \

-# ...and CPPFLAGS.
+# ...various options we choose...
 MKSH_CFLAGS += \
-    -DDEBUG_LEAKS -DMKSH_ASSUME_UTF8 \
+    -DDEBUG_LEAKS \
+    -DMKSH_ASSUME_UTF8 \
    -DMKSH_DONT_EMIT_IDSTRING \
+    -DKSH_VERSIONNAME_VENDOR_EXT=\"\ Android\" \
+
+# ...and the defines from Build.sh.
+MKSH_CFLAGS += \
    -DMKSH_BUILDSH \
-    -D_GNU_SOURCE -DSETUID_CAN_FAIL_WITH_EAGAIN \
-    -DHAVE_ATTRIBUTE_BOUNDED=0 -DHAVE_ATTRIBUTE_FORMAT=1 \
+    -D_GNU_SOURCE \
+    -DSETUID_CAN_FAIL_WITH_EAGAIN \
+    -DHAVE_STRING_POOLING=1 \
+    -DHAVE_ATTRIBUTE_BOUNDED=1 \
+    -DHAVE_ATTRIBUTE_FORMAT=1 \
    -DHAVE_ATTRIBUTE_NORETURN=1 \
    -DHAVE_ATTRIBUTE_PURE=1 \
    -DHAVE_ATTRIBUTE_UNUSED=1 \
-    -DHAVE_ATTRIBUTE_USED=1 -DHAVE_SYS_TIME_H=1 -DHAVE_TIME_H=1 \
-    -DHAVE_BOTH_TIME_H=1 -DHAVE_SYS_BSDTYPES_H=0 \
-    -DHAVE_SYS_FILE_H=1 -DHAVE_SYS_MKDEV_H=0 -DHAVE_SYS_MMAN_H=1 \
-    -DHAVE_SYS_PARAM_H=1 -DHAVE_SYS_RESOURCE_H=1 \
-    -DHAVE_SYS_SELECT_H=1 -DHAVE_SYS_SYSMACROS_H=1 \
-    -DHAVE_BSTRING_H=0 -DHAVE_GRP_H=1 -DHAVE_IO_H=0 -DHAVE_LIBGEN_H=1 \
-    -DHAVE_LIBUTIL_H=0 -DHAVE_PATHS_H=1 -DHAVE_STDINT_H=1 \
-    -DHAVE_STRINGS_H=1 -DHAVE_TERMIOS_H=1 -DHAVE_ULIMIT_H=0 \
-    -DHAVE_VALUES_H=0 -DHAVE_CAN_INTTYPES=1 -DHAVE_CAN_UCBINTS=1 \
-    -DHAVE_CAN_INT8TYPE=1 -DHAVE_CAN_UCBINT8=1 -DHAVE_RLIM_T=1 \
+    -DHAVE_ATTRIBUTE_USED=1 \
+    -DHAVE_SYS_TIME_H=1 \
+    -DHAVE_TIME_H=1 \
+    -DHAVE_BOTH_TIME_H=1 \
+    -DHAVE_SYS_BSDTYPES_H=0 \
+    -DHAVE_SYS_FILE_H=1 \
+    -DHAVE_SYS_MKDEV_H=0 \
+    -DHAVE_SYS_MMAN_H=1 \
+    -DHAVE_SYS_PARAM_H=1 \
+    -DHAVE_SYS_RESOURCE_H=1 \
+    -DHAVE_SYS_SELECT_H=1 \
+    -DHAVE_SYS_SYSMACROS_H=1 \
+    -DHAVE_BSTRING_H=0 \
+    -DHAVE_GRP_H=1 \
+    -DHAVE_IO_H=0 \
+    -DHAVE_LIBGEN_H=1 \
+    -DHAVE_LIBUTIL_H=0 \
+    -DHAVE_PATHS_H=1 \
+    -DHAVE_STDINT_H=1 \
+    -DHAVE_STRINGS_H=1 \
+    -DHAVE_TERMIOS_H=1 \
+    -DHAVE_ULIMIT_H=0 \
+    -DHAVE_VALUES_H=0 \
+    -DHAVE_CAN_INTTYPES=1 \
+    -DHAVE_CAN_UCBINTS=1 \
+    -DHAVE_CAN_INT8TYPE=1 \
+    -DHAVE_CAN_UCBINT8=1 \
+    -DHAVE_RLIM_T=1 \
    -DHAVE_SIG_T=1 \
-    -DHAVE_STRING_POOLING=1 \
-    -DHAVE_SYS_ERRLIST=0 -DHAVE_SYS_SIGNAME=1 \
-    -DHAVE_SYS_SIGLIST=1 -DHAVE_FLOCK=1 -DHAVE_LOCK_FCNTL=1 \
+    -DHAVE_SYS_ERRLIST=0 \
+    -DHAVE_SYS_SIGNAME=1 \
+    -DHAVE_SYS_SIGLIST=1 \
+    -DHAVE_FLOCK=1 \
+    -DHAVE_LOCK_FCNTL=1 \
    -DHAVE_GETRUSAGE=1 \
    -DHAVE_GETSID=1 \
    -DHAVE_GETTIMEOFDAY=1 \
-    -DHAVE_ISSETUGID=0 \
    -DHAVE_KILLPG=1 \
-    -DHAVE_MEMMOVE=1 -DHAVE_MKNOD=0 -DHAVE_MMAP=1 -DHAVE_NICE=1 \
-    -DHAVE_REVOKE=0 -DHAVE_SETLOCALE_CTYPE=0 \
-    -DHAVE_LANGINFO_CODESET=0 -DHAVE_SELECT=1 -DHAVE_SETRESUGID=1 \
-    -DHAVE_SETGROUPS=1 -DHAVE_STRERROR=1 -DHAVE_STRSIGNAL=0 \
-    -DHAVE_STRLCPY=1 -DHAVE_FLOCK_DECL=1 -DHAVE_REVOKE_DECL=1 \
-    -DHAVE_SYS_ERRLIST_DECL=0 -DHAVE_SYS_SIGLIST_DECL=1 \
-    -DHAVE_PERSISTENT_HISTORY=0 -DMKSH_BUILD_R=551
+    -DHAVE_MEMMOVE=1 \
+    -DHAVE_MKNOD=0 \
+    -DHAVE_MMAP=1 \
+    -DHAVE_FTRUNCATE=1 \
+    -DHAVE_NICE=1 \
+    -DHAVE_REVOKE=0 \
+    -DHAVE_SETLOCALE_CTYPE=1 \
+    -DHAVE_LANGINFO_CODESET=1 \
+    -DHAVE_SELECT=1 \
+    -DHAVE_SETRESUGID=1 \
+    -DHAVE_SETGROUPS=1 \
+    -DHAVE_STRERROR=1 \
+    -DHAVE_STRSIGNAL=0 \
+    -DHAVE_STRLCPY=1 \
+    -DHAVE_FLOCK_DECL=1 \
+    -DHAVE_REVOKE_DECL=1 \
+    -DHAVE_SYS_ERRLIST_DECL=0 \
+    -DHAVE_SYS_SIGLIST_DECL=1 \
+    -DHAVE_PERSISTENT_HISTORY=0 \
+    -DMKSH_BUILD_R=562 \

 LOCAL_SRC_FILES := $(MKSH_SRC_FILES)

--- a/Android.patch.txt
+++ b/Android.patch.txt
@ -0,0 +1,48 @@
+--- mksh-R56b/funcs.c	2017-05-05 15:53:55.000000000 -0700
+++ src/funcs.c	2017-09-22 16:19:44.327000462 -0700
+@@ -103,7 +103,9 @@
+ 	{Tsgbreak, c_brkcont},
+ 	{T__builtin, c_builtin},
+ 	{Tbuiltin, c_builtin},
+#if !defined(__ANDROID__)
+ 	{Tbcat, c_cat},
+#endif
+ 	{Tcd, c_cd},
+ 	/* dash compatibility hack */
+ 	{"chdir", c_cd},
+@@ -126,7 +128,9 @@
+ 	{"pwd", c_pwd},
+ 	{Tread, c_read},
+ 	{Tdsgreadonly, c_typeset},
+#if !defined(__ANDROID__)
+ 	{"!realpath", c_realpath},
+#endif
+ 	{"~rename", c_rename},
+ 	{"*=return", c_exitreturn},
+ 	{Tsgset, c_set},
+@@ -160,8 +164,10 @@
+ 	{"~printf", c_printf},
+ #endif
+ #if HAVE_SELECT
+#if !defined(__ANDROID__)
+ 	{"sleep", c_sleep},
+ #endif
+#endif
+ #ifdef __MirBSD__
+ 	/* alias to "true" for historical reasons */
+ 	{"domainname", c_true},
+--- mksh-R56b/main.c	2017-04-28 04:14:14.000000000 -0700
+++ src/main.c	2017-09-22 15:58:14.134149037 -0700
+@@ -410,6 +410,12 @@
+ 		}
+ 	}
+ 
+	/* override default PATH regardless of environment */
+#ifdef MKSH_DEFPATH_OVERRIDE
+	vp = global(TPATH);
+	setstr(vp, MKSH_DEFPATH_OVERRIDE, KSH_RETURN_ERROR);
+#endif
+
+ 	/* for security */
+ 	typeset(TinitIFS, 0, 0, 0, 0);
+ 
--- a/src/Build.sh
+++ b/src/Build.sh
@ -1,5 +1,5 @@
 #!/bin/sh
-srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.716 2017/04/12 18:33:22 tg Exp $'
+srcversion='$MirOS: src/bin/mksh/Build.sh,v 1.727 2017/08/29 13:38:28 tg Exp $'
 #-
 # Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 #		2011, 2012, 2013, 2014, 2015, 2016, 2017
@ -53,6 +53,16 @@ alll=qwertyuiopasdfghjklzxcvbnm
 alln=0123456789
 alls=______________________________________________________________

+case `echo a | tr '\201' X` in
+X)
+	# EBCDIC build system
+	lfcr='\n\r'
+	;;
+*)
+	lfcr='\012\015'
+	;;
+esac
+
 genopt_die() {
 	if test -n "$1"; then
 		echo >&2 "E: $*"
@ -425,7 +435,7 @@ ac_header() {
 		na=0
 	fi
 	hf=$1; shift
-	hv=`echo "$hf" | tr -d '\012\015' | tr -c $alll$allu$alln $alls`
+	hv=`echo "$hf" | tr -d "$lfcr" | tr -c $alll$allu$alln $alls`
 	echo "/* NeXTstep bug workaround */" >x
 	for i
 	do
@ -496,6 +506,7 @@ last=
 tfn=
 legacy=0
 textmode=0
+ebcdic=false

 for i
 do
@ -519,6 +530,9 @@ do
 	:-c)
 		last=c
 		;;
+	:-E)
+		ebcdic=true
+		;;
 	:-G)
 		echo "$me: Do not call me with '-G'!" >&2
 		exit 1
@ -603,6 +617,10 @@ else
 	add_cppflags -DMKSH_LEGACY_MODE
 fi

+if $ebcdic; then
+	add_cppflags -DMKSH_EBCDIC
+fi
+
 if test $textmode = 0; then
 	check_categories="$check_categories shell:textmode-no shell:binmode-yes"
 else
@ -765,7 +783,9 @@ GNU/kFreeBSD)
 	add_cppflags -DSETUID_CAN_FAIL_WITH_EAGAIN
 	;;
 Haiku)
-	add_cppflags -DMKSH_ASSUME_UTF8; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	add_cppflags -DMKSH_ASSUME_UTF8
+	HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	HAVE_ISOFF_MKSH_ASSUME_UTF8=0
 	;;
 Harvey)
 	add_cppflags -D_POSIX_SOURCE
@ -773,11 +793,14 @@ Harvey)
 	add_cppflags -D_BSD_EXTENSION
 	add_cppflags -D_SUSV2_SOURCE
 	add_cppflags -D_GNU_SOURCE
-	add_cppflags -DMKSH_ASSUME_UTF8; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	add_cppflags -DMKSH_ASSUME_UTF8
+	HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	HAVE_ISOFF_MKSH_ASSUME_UTF8=0
 	add_cppflags -DMKSH_NO_CMDLINE_EDITING
 	add_cppflags -DMKSH__NO_SETEUGID
 	oswarn=' and will currently not work'
 	add_cppflags -DMKSH_UNEMPLOYED
+	add_cppflags -DMKSH_NOPROSPECTOFWORK
 	# these taken from Harvey-OS github and need re-checking
 	add_cppflags -D_setjmp=setjmp -D_longjmp=longjmp
 	: "${HAVE_CAN_NO_EH_FRAME=0}"
@ -826,7 +849,9 @@ Minix3)
 MirBSD)
 	;;
 MSYS_*)
-	add_cppflags -DMKSH_ASSUME_UTF8=0; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	add_cppflags -DMKSH_ASSUME_UTF8=0
+	HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	HAVE_ISOFF_MKSH_ASSUME_UTF8=1
 	# almost same as CYGWIN* (from RT|Chatzilla)
 	: "${HAVE_SETLOCALE_CTYPE=0}"
 	# broken on this OE (from ir0nh34d)
@ -860,7 +885,9 @@ OpenBSD)
 	: "${HAVE_SETLOCALE_CTYPE=0}"
 	;;
 OS/2)
-	add_cppflags -DMKSH_ASSUME_UTF8=0; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	add_cppflags -DMKSH_ASSUME_UTF8=0
+	HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	HAVE_ISOFF_MKSH_ASSUME_UTF8=1
 	HAVE_TERMIOS_H=0
 	HAVE_MKNOD=0	# setmode() incompatible
 	oswarn="; it is being ported"
@ -894,6 +921,16 @@ the mksh-os2 porter.
 ] incompatibilities with $y.
 "
 	;;
+OS/390)
+	add_cppflags -DMKSH_ASSUME_UTF8=0
+	HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	HAVE_ISOFF_MKSH_ASSUME_UTF8=1
+	: "${CC=xlc}"
+	: "${SIZE=: size}"
+	add_cppflags -DMKSH_FOR_Z_OS
+	add_cppflags -D_ALL_SOURCE
+	oswarn='; EBCDIC support is incomplete'
+	;;
 OSF1)
 	HAVE_SIG_T=0	# incompatible
 	add_cppflags -D_OSF_SOURCE
@ -907,7 +944,9 @@ Plan9)
 	add_cppflags -D_LIMITS_EXTENSION
 	add_cppflags -D_BSD_EXTENSION
 	add_cppflags -D_SUSV2_SOURCE
-	add_cppflags -DMKSH_ASSUME_UTF8; HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	add_cppflags -DMKSH_ASSUME_UTF8
+	HAVE_ISSET_MKSH_ASSUME_UTF8=1
+	HAVE_ISOFF_MKSH_ASSUME_UTF8=0
 	add_cppflags -DMKSH_NO_CMDLINE_EDITING
 	add_cppflags -DMKSH__NO_SETEUGID
 	oswarn=' and will currently not work'
@ -1047,7 +1086,7 @@ $e $bi$me: Scanning for functions... please ignore any errors.$ao
 # - LLVM+clang defines __GNUC__ too
 # - nwcc defines __GNUC__ too
 CPP="$CC -E"
-$e ... which compiler seems to be used
+$e ... which compiler type seems to be used
 cat >conftest.c <<'EOF'
 const char *
 #if defined(__ICC) || defined(__INTEL_COMPILER)
@ -1297,7 +1336,7 @@ unknown)
 	# huh?
 	;;
 esac
-$e "$bi==> which compiler seems to be used...$ao $ui$ct$etd$ao"
+$e "$bi==> which compiler type seems to be used...$ao $ui$ct$etd$ao"
 rmf conftest.c conftest.o conftest a.out* a.exe* conftest.exe* vv.out

 #
@ -1392,8 +1431,16 @@ watcom)
 	DOWARN=-Wc,-we
 	;;
 xlc)
-	save_NOWARN=-qflag=i:e
-	DOWARN=-qflag=i:i
+	case $TARGET_OS in
+	OS/390)
+		save_NOWARN=-qflag=e
+		DOWARN=-qflag=i
+		;;
+	*)
+		save_NOWARN=-qflag=i:e
+		DOWARN=-qflag=i:i
+		;;
+	esac
 	;;
 *)
 	test x"$save_NOWARN" = x"" && save_NOWARN=-Wno-error
@ -1563,10 +1610,24 @@ tendra)
 	ac_flags 1 extansi -Xa
 	;;
 xlc)
-	ac_flags 1 rodata "-qro -qroconst -qroptr"
-	ac_flags 1 rtcheck -qcheck=all
-	#ac_flags 1 rtchkc -qextchk	# reported broken
-	ac_flags 1 wformat "-qformat=all -qformat=nozln"
+	case $TARGET_OS in
+	OS/390)
+		# On IBM z/OS, the following are warnings by default:
+		# CCN3296: #include file <foo.h> not found.
+		# CCN3944: Attribute "__foo__" is not supported and is ignored.
+		# CCN3963: The attribute "foo" is not a valid variable attribute and is ignored.
+		ac_flags 1 halton '-qhaltonmsg=CCN3296 -qhaltonmsg=CCN3944 -qhaltonmsg=CCN3963'
+		# CCN3290: Unknown macro name FOO on #undef directive.
+		# CCN4108: The use of keyword '__attribute__' is non-portable.
+		ac_flags 1 supprss '-qsuppress=CCN3290 -qsuppress=CCN4108'
+		;;
+	*)
+		ac_flags 1 rodata '-qro -qroconst -qroptr'
+		ac_flags 1 rtcheck -qcheck=all
+		#ac_flags 1 rtchkc -qextchk	# reported broken
+		ac_flags 1 wformat '-qformat=all -qformat=nozln'
+		;;
+	esac
 	#ac_flags 1 wp64 -qwarn64	# too verbose for now
 	;;
 esac
@ -1705,6 +1766,10 @@ ac_ifcpp 'ifdef MKSH_NOPROSPECTOFWORK' isset_MKSH_NOPROSPECTOFWORK '' \
    check_categories="$check_categories arge nojsig"
 ac_ifcpp 'ifdef MKSH_ASSUME_UTF8' isset_MKSH_ASSUME_UTF8 '' \
    'if the default UTF-8 mode is specified' && : "${HAVE_SETLOCALE_CTYPE=0}"
+ac_ifcpp 'if !MKSH_ASSUME_UTF8' isoff_MKSH_ASSUME_UTF8 \
+    isset_MKSH_ASSUME_UTF8 0 \
+    'if the default UTF-8 mode is disabled' && \
+    check_categories="$check_categories noutf8"
 #ac_ifcpp 'ifdef MKSH_DISABLE_DEPRECATED' isset_MKSH_DISABLE_DEPRECATED '' \
 #    "if deprecated features are to be omitted" && \
 #    check_categories="$check_categories nodeprecated"
@ -2025,6 +2090,11 @@ ac_test mmap lock_fcntl 0 'for mmap and munmap' <<-'EOF'
 	    munmap(NULL, 0)); }
 EOF

+ac_test ftruncate mmap 0 'for ftruncate' <<-'EOF'
+	#include <unistd.h>
+	int main(void) { return (ftruncate(0, 0)); }
+EOF
+
 ac_test nice <<-'EOF'
 	#include <unistd.h>
 	int main(void) { return (nice(4)); }
@ -2179,8 +2249,8 @@ EOF
 # other checks
 #
 fd='if to use persistent history'
-ac_cache PERSISTENT_HISTORY || case $HAVE_MMAP$HAVE_FLOCK$HAVE_LOCK_FCNTL in
-11*|101) fv=1 ;;
+ac_cache PERSISTENT_HISTORY || case $HAVE_FTRUNCATE$HAVE_MMAP$HAVE_FLOCK$HAVE_LOCK_FCNTL in
+111*|1101) fv=1 ;;
 esac
 test 1 = $fv || check_categories="$check_categories no-histfile"
 ac_testdone
@ -2339,7 +2409,7 @@ addsrcs '!' HAVE_STRLCPY strlcpy.c
 addsrcs USE_PRINTF_BUILTIN printf.c
 test 1 = "$USE_PRINTF_BUILTIN" && add_cppflags -DMKSH_PRINTF_BUILTIN
 test 1 = "$HAVE_CAN_VERB" && CFLAGS="$CFLAGS -verbose"
-add_cppflags -DMKSH_BUILD_R=551
+add_cppflags -DMKSH_BUILD_R=562

 $e $bi$me: Finished configuration testing, now producing output.$ao

@ -2366,8 +2436,8 @@ cat >test.sh <<-EOF
 	set -A check_categories -- $check_categories
 	pflag='$curdir/$mkshexe'
 	sflag='$srcdir/check.t'
-	usee=0 Pflag=0 Sflag=0 uset=0 vflag=1 xflag=0
-	while getopts "C:e:fPp:QSs:t:v" ch; do case \$ch {
+	usee=0 useU=0 Pflag=0 Sflag=0 uset=0 vflag=1 xflag=0
+	while getopts "C:e:fPp:QSs:t:U:v" ch; do case \$ch {
 	(C)	check_categories[\${#check_categories[*]}]=\$OPTARG ;;
 	(e)	usee=1; eflag=\$OPTARG ;;
 	(f)	check_categories[\${#check_categories[*]}]=fastbox ;;
@ -2380,6 +2450,7 @@ cat >test.sh <<-EOF
 	(+S)	Sflag=0 ;;
 	(s)	sflag=\$OPTARG ;;
 	(t)	uset=1; tflag=\$OPTARG ;;
+	(U)	useU=1; Uflag=\$OPTARG ;;
 	(v)	vflag=1 ;;
 	(+v)	vflag=0 ;;
 	(*)	xflag=1 ;;
@ -2387,6 +2458,9 @@ cat >test.sh <<-EOF
 	done
 	shift \$((OPTIND - 1))
 	set -A args -- '$srcdir/check.pl' -p "\$pflag"
+	if $ebcdic; then
+		args[\${#args[*]}]=-E
+	fi
 	x=
 	for y in "\${check_categories[@]}"; do
 		x=\$x,\$y
@ -2404,6 +2478,10 @@ cat >test.sh <<-EOF
 		args[\${#args[*]}]=-t
 		args[\${#args[*]}]=\$tflag
 	fi
+	if (( useU )); then
+		args[\${#args[*]}]=-U
+		args[\${#args[*]}]=\$Uflag
+	fi
 	(( vflag )) && args[\${#args[*]}]=-v
 	(( xflag )) && args[\${#args[*]}]=-x	# force usage by synerr
 	if [[ -n \$TMPDIR && -d \$TMPDIR/. ]]; then
@ -2647,7 +2725,7 @@ MKSH_A4PB			force use of arc4random_pushb
 MKSH_ASSUME_UTF8		(0=disabled, 1=enabled; default: unset)
 MKSH_BINSHPOSIX			if */sh or */-sh, enable set -o posix
 MKSH_BINSHREDUCED		if */sh or */-sh, enable set -o sh
-MKSH_CLS_STRING			"\033[;H\033[J"
+MKSH_CLS_STRING			KSH_ESC_STRING "[;H" KSH_ESC_STRING "[J"
 MKSH_DEFAULT_EXECSHELL		"/bin/sh" (do not change)
 MKSH_DEFAULT_PROFILEDIR		"/etc" (do not change)
 MKSH_DEFAULT_TMPDIR		"/tmp" (do not change)
--- a/src/check.pl
+++ b/src/check.pl
@ -1,8 +1,8 @@
-# $MirOS: src/bin/mksh/check.pl,v 1.42 2015/11/29 17:05:00 tg Exp $
+# $MirOS: src/bin/mksh/check.pl,v 1.49 2017/05/05 21:17:31 tg Exp $
 # $OpenBSD: th,v 1.1 2013/12/02 20:39:44 millert Exp $
 #-
 # Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011,
-#		2012, 2013, 2014, 2015
+#		2012, 2013, 2014, 2015, 2017
 #	mirabilos <m@mirbsd.org>
 #
 # Provided that these terms and disclaimer and all copyright notices
@ -78,9 +78,9 @@
 #					the test harness).
 #					CYGWIN is set to nodosfilewarning.
 #					ENV is set to /nonexistant.
-#					PATHSEP is set to either : or ;.
 #					__progname is set to the -p argument.
 #					__perlname is set to $^X (perlexe).
+#					@utflocale@ is substituted from -U.
 #	file-setup		mps	Used to create files, directories
 #					and symlinks. First word is either
 #					file, dir or symlink; second word is
@ -153,9 +153,15 @@
 #	p	tag takes parameters (used with m).
 #	s	tag can be used several times.

+# require Config only if it exists
 # pull EINTR from POSIX.pm or Errno.pm if they exist
 # otherwise just skip it
 BEGIN {
+	eval {
+		require Config;
+		import Config;
+		1;
+	};
 	$EINTR = 0;
 	eval {
 		require POSIX;
@ -172,7 +178,6 @@ BEGIN {
 };

 use Getopt::Std;
-use Config;

 $os = defined $^O ? $^O : 'unknown';

@ -180,7 +185,7 @@ $os = defined $^O ? $^O : 'unknown';

 $Usage = <<EOF ;
 Usage: $prog [-Pv] [-C cat] [-e e=v] [-p prog] [-s fn] [-T dir] \
-       [-t tmo] name ...
+       [-t tmo] [-U lcl] name ...
 	-C c	Specify the comma separated list of categories the program
 		belongs to (see category field).
 	-e e=v	Set the environment variable e to v for all tests
@ -193,6 +198,7 @@ Usage: $prog [-Pv] [-C cat] [-e e=v] [-p prog] [-s fn] [-T dir] \
 		scaned for test files (which end in .t).
 	-T dir	Use dir instead of /tmp to hold temporary files
 	-t t	Use t as default time limit for tests (default is unlimited)
+	-U lcl	Use lcl as UTF-8 locale (e.g. C.UTF-8) instead of the default
 	-v	Verbose mode: print reason test failed.
 	name	specifies the name of the test(s) to run; if none are
 		specified, all tests are run.
@ -241,7 +247,7 @@ $nxpassed = 0;

 %known_tests = ();

-if (!getopts('C:e:Pp:s:T:t:v')) {
+if (!getopts('C:Ee:Pp:s:T:t:U:v')) {
    print STDERR $Usage;
    exit 1;
 }
@ -250,8 +256,10 @@ die "$prog: no program specified (use -p)\n" if !defined $opt_p;
 die "$prog: no test set specified (use -s)\n" if !defined $opt_s;
 $test_prog = $opt_p;
 $verbose = defined $opt_v && $opt_v;
+$is_ebcdic = defined $opt_E && $opt_E;
 $test_set = $opt_s;
 $temp_base = $opt_T || "/tmp";
+$utflocale = $opt_U || (($os eq "hpux") ? "en_US.utf8" : "en_US.UTF-8");
 if (defined $opt_t) {
    die "$prog: bad -t argument (should be number > 0): $opt_t\n"
 	if $opt_t !~ /^\d+$/ || $opt_t <= 0;
@ -259,6 +267,14 @@ if (defined $opt_t) {
 }
 $program_kludge = defined $opt_P ? $opt_P : 0;

+if ($is_ebcdic) {
+	$categories{'shell:ebcdic-yes'} = 1;
+	$categories{'shell:ascii-no'} = 1;
+} else {
+	$categories{'shell:ebcdic-no'} = 1;
+	$categories{'shell:ascii-yes'} = 1;
+}
+
 if (defined $opt_C) {
    foreach $c (split(',', $opt_C)) {
 	$c =~ s/\s+//;
@ -281,12 +297,24 @@ foreach $env (('HOME', 'LD_LIBRARY_PATH', 'LOCPATH', 'LOGNAME',
 }
 $new_env{'CYGWIN'} = 'nodosfilewarning';
 $new_env{'ENV'} = '/nonexistant';
-$new_env{'PATHSEP'} = $os eq 'os2' ? ';' : ':';
+
 if (($os eq 'VMS') || ($Config{perlpath} =~ m/$Config{_exe}$/i)) {
 	$new_env{'__perlname'} = $Config{perlpath};
 } else {
 	$new_env{'__perlname'} = $Config{perlpath} . $Config{_exe};
 }
+$new_env{'__perlname'} = $^X if ($new_env{'__perlname'} eq '') and -f $^X and -x $^X;
+if ($new_env{'__perlname'} eq '') {
+	foreach $pathelt (split /:/,$ENV{'PATH'}) {
+		chomp($pathelt = `pwd`) if $pathelt eq '';
+		my $x = $pathelt . '/' . $^X;
+		next unless -f $x and -x $x;
+		$new_env{'__perlname'} = $x;
+		last;
+	}
+}
+$new_env{'__perlname'} = $^X if ($new_env{'__perlname'} eq '');
+
 if (defined $opt_e) {
    # XXX need a way to allow many -e arguments...
    if ($opt_e =~ /^([a-zA-Z_]\w*)(|=(.*))$/) {
@ -866,38 +894,50 @@ first_diff
 	    $char = 1;
 	}
    }
-    return "first difference: line $lineno, char $char (wanted '"
-	. &format_char($ce) . "', got '"
-	. &format_char($cg) . "'";
+    return "first difference: line $lineno, char $char (wanted " .
+	&format_char($ce) . ", got " . &format_char($cg);
 }

 sub
 format_char
 {
-    local($ch, $s);
+    local($ch, $s, $q);

    $ch = ord($_[0]);
+    $q = "'";
+
+    if ($is_ebcdic) {
+	if ($ch == 0x15) {
+		return $q . '\n' . $q;
+	} elsif ($ch == 0x16) {
+		return $q . '\b' . $q;
+	} elsif ($ch == 0x05) {
+		return $q . '\t' . $q;
+	} elsif ($ch < 64 || $ch == 255) {
+		return sprintf("X'%02X'", $ch);
+	}
+	return sprintf("'%c' (X'%02X')", $ch, $ch);
+    }
+
+    $s = sprintf("0x%02X (", $ch);
    if ($ch == 10) {
-	return '\n';
+	return $s . $q . '\n' . $q . ')';
    } elsif ($ch == 13) {
-	return '\r';
+	return $s . $q . '\r' . $q . ')';
    } elsif ($ch == 8) {
-	return '\b';
+	return $s . $q . '\b' . $q . ')';
    } elsif ($ch == 9) {
-	return '\t';
+	return $s . $q . '\t' . $q . ')';
    } elsif ($ch > 127) {
-	$ch -= 127;
-	$s = "M-";
-    } else {
-	$s = '';
+	$ch -= 128;
+	$s .= "M-";
    }
    if ($ch < 32) {
-	$s .= '^';
-	$ch += ord('@');
+	return sprintf("%s^%c)", $s, $ch + ord('@'));
    } elsif ($ch == 127) {
-	return $s . "^?";
+	return $s . "^?)";
    }
-    return $s . sprintf("%c", $ch);
+    return sprintf("%s'%c')", $s, $ch);
 }

 sub
@ -1159,6 +1199,8 @@ read_test
 	    print STDERR "$prog:$test{':long-name'}: env-setup field doesn't start and end with the same character\n";
 	    return undef;
 	}
+
+	$test{'env-setup'} =~ s/\@utflocale\@/$utflocale/g;
    }
    if (defined $test{'expected-exit'}) {
 	local($val) = $test{'expected-exit'};
--- a/src/check.t
+++ b/src/check.t
@ -1,4 +1,4 @@
-# $MirOS: src/bin/mksh/check.t,v 1.775 2017/04/12 17:38:41 tg Exp $
+# $MirOS: src/bin/mksh/check.t,v 1.797 2017/08/29 13:38:29 tg Exp $
 # -*- mode: sh -*-
 #-
 # Copyright Â© 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
@ -30,40 +30,62 @@
 # (2013/12/02 20:39:44) http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/regress/bin/ksh/?sortby=date

 expected-stdout:
-	@(#)MIRBSD KSH R55 2017/04/12
+	@(#)MIRBSD KSH R56 2017/08/29
 description:
-	Check version of shell.
+	Check base version of full shell
 stdin:
-	echo $KSH_VERSION
+	echo ${KSH_VERSION%%' +'*}
 name: KSH_VERSION
-category: !shell:legacy-yes,!shell:textmode-yes
+category: !shell:legacy-yes
 ---
 expected-stdout:
-	@(#)LEGACY KSH R55 2017/04/12
+	@(#)LEGACY KSH R56 2017/08/29
 description:
-	Check version of legacy shell.
+	Check base version of legacy shell
 stdin:
-	echo $KSH_VERSION
+	echo ${KSH_VERSION%%' +'*}
 name: KSH_VERSION-legacy
-category: !shell:legacy-no,!shell:textmode-yes
+category: !shell:legacy-no
 ---
-expected-stdout:
-	@(#)MIRBSD KSH R55 2017/04/12 +TEXTMODE
+name: KSH_VERSION-ascii
 description:
-	Check version of shell.
+	Check that the shell version tag does not include EBCDIC
+category: !shell:ebcdic-yes
 stdin:
-	echo $KSH_VERSION
+	for x in $KSH_VERSION; do
+		[[ $x = '+EBCDIC' ]] && exit 1
+	done
+	exit 0
+---
+name: KSH_VERSION-ebcdic
+description:
+	Check that the shell version tag includes EBCDIC
+category: !shell:ebcdic-no
+stdin:
+	for x in $KSH_VERSION; do
+		[[ $x = '+EBCDIC' ]] && exit 0
+	done
+	exit 1
+---
+name: KSH_VERSION-binmode
+description:
+	Check that the shell version tag does not include TEXTMODE
+category: !shell:textmode-yes
+stdin:
+	for x in $KSH_VERSION; do
+		[[ $x = '+TEXTMODE' ]] && exit 1
+	done
+	exit 0
+---
 name: KSH_VERSION-textmode
-category: !shell:legacy-yes,!shell:textmode-no
---
-expected-stdout:
-	@(#)LEGACY KSH R55 2017/04/12 +TEXTMODE
 description:
-	Check version of legacy shell.
+	Check that the shell version tag includes TEXTMODE
+category: !shell:textmode-no
 stdin:
-	echo $KSH_VERSION
-name: KSH_VERSION-legacy-textmode
-category: !shell:legacy-no,!shell:textmode-no
+	for x in $KSH_VERSION; do
+		[[ $x = '+TEXTMODE' ]] && exit 0
+	done
+	exit 1
 ---
 name: selftest-1
 description:
@ -1334,7 +1356,7 @@ name: cd-pe
 description:
 	Check package for cd -Pe
 need-pass: no
-# the mv command fails on Cygwin
+# the mv command fails on Cygwin and z/OS
 # Hurd aborts the testsuite (permission denied)
 # QNX does not find subdir to cd into
 category: !os:cygwin,!os:gnu,!os:msys,!os:nto,!os:os390,!nosymlink
@ -1355,7 +1377,7 @@ file-setup: file 644 "x"
 	cd -P$1 subdir
 	echo 2=$?,${PWD#$bwd/}
 	cd $bwd
-	chmod 755 renamed
+	chmod 755 noread renamed 2>/dev/null
 	rm -rf noread link renamed
 stdin:
 	export TSHELL="$__progname"
@ -1944,15 +1966,12 @@ expected-stdout:
 name: eglob-bad-1
 description:
 	Check that globbing isn't done when glob has syntax error
-file-setup: file 644 "abcx"
-file-setup: file 644 "abcz"
-file-setup: file 644 "bbc"
+category: !os:cygwin,!os:msys,!os:os2
+file-setup: file 644 "@(a[b|)c]foo"
 stdin:
-	echo !([*)*
-	echo +(a|b[)*
+	echo @(a[b|)c]*
 expected-stdout:
-	!([*)*
-	+(a|b[)*
+	@(a[b|)c]*
 ---
 name: eglob-bad-2
 description:
@ -2039,9 +2058,11 @@ stdin:
 	case foo in *(a|b[)) echo yes;; *) echo no;; esac
 	case foo in *(a|b[)|f*) echo yes;; *) echo no;; esac
 	case '*(a|b[)' in *(a|b[)) echo yes;; *) echo no;; esac
+	case 'aab[b[ab[a' in *(a|b[)) echo yes;; *) echo no;; esac
 expected-stdout:
 	no
 	yes
+	no
 	yes
 ---
 name: eglob-trim-1
@ -2305,6 +2326,7 @@ expected-stdout:
 name: eglob-utf8-1
 description:
 	UTF-8 mode differences for eglobbing
+category: !shell:ebcdic-yes
 stdin:
 	s=blÃ¶d
 	set +U
@ -2336,17 +2358,26 @@ expected-stdout:
 ---
 name: glob-bad-1
 description:
-	Check that globbing isn't done when glob has syntax error
+	Check that [ matches itself if it's not a valid bracket expr
+	but does not prevent globbing, while backslash-escaping does
 file-setup: dir 755 "[x"
 file-setup: file 644 "[x/foo"
 stdin:
 	echo [*
 	echo *[x
 	echo [x/*
+	:>'ab[x'
+	:>'a[a-z][x'
+	echo a[a-z][*
+	echo a[a-z]*
+	echo a[a\-z]*
 expected-stdout:
-	[*
-	*[x
+	[x
+	[x
 	[x/foo
+	ab[x
+	ab[x
+	a[a-z]*
 ---
 name: glob-bad-2
 description:
@ -2365,6 +2396,18 @@ expected-stdout:
 	dir/abc
 	dir/abc
 ---
+name: glob-bad-3
+description:
+	Check that the slash is parsed before the glob
+stdin:
+	mkdir a 'a[b'
+	(cd 'a[b'; echo ok >'c]d')
+	echo nok >abd
+	echo fail >a/d
+	cat a[b/c]d
+expected-stdout:
+	ok
+---
 name: glob-range-1
 description:
 	Test range matching
@ -2373,24 +2416,31 @@ file-setup: file 644 "abc"
 file-setup: file 644 "bbc"
 file-setup: file 644 "cbc"
 file-setup: file 644 "-bc"
+file-setup: file 644 "!bc"
+file-setup: file 644 "^bc"
+file-setup: file 644 "+bc"
+file-setup: file 644 ",bc"
+file-setup: file 644 "0bc"
+file-setup: file 644 "1bc"
 stdin:
 	echo [ab-]*
 	echo [-ab]*
 	echo [!-ab]*
 	echo [!ab]*
 	echo []ab]*
-	:>'./!bc'
-	:>'./^bc'
 	echo [^ab]*
-	echo [!ab]*
+	echo [+--]*
+	echo [--1]*
+
 expected-stdout:
 	-bc abc bbc
 	-bc abc bbc
-	cbc
-	-bc cbc
+	!bc +bc ,bc 0bc 1bc ^bc cbc
+	!bc +bc ,bc -bc 0bc 1bc ^bc cbc
 	abc bbc
 	^bc abc bbc
-	!bc -bc ^bc cbc
+	+bc ,bc -bc
+	-bc 0bc 1bc
 ---
 name: glob-range-2
 description:
@ -2408,7 +2458,7 @@ description:
 # breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
 # breaks on Cygwin 1.7 (files are now UTF-16 or something)
 # breaks on QNX 6.4.1 (says RT)
-category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2
+category: !os:cygwin,!os:darwin,!os:msys,!os:nto,!os:os2,!os:os390
 need-pass: no
 file-setup: file 644 "aÂc"
 stdin:
@ -2435,10 +2485,32 @@ file-setup: file 644 "cbc"
 file-setup: file 644 "dbc"
 file-setup: file 644 "ebc"
 file-setup: file 644 "-bc"
+file-setup: file 644 "@bc"
 stdin:
 	echo [a-c-e]*
+	echo [a--@]*
 expected-stdout:
 	-bc abc bbc cbc ebc
+	@bc
+---
+name: glob-word-1
+description:
+	Check BSD word boundary matches
+stdin:
+	t() { [[ $1 = *[[:\<:]]bar[[:\>:]]* ]]; echo =$?; }
+	t 'foo bar baz'
+	t 'foobar baz'
+	t 'foo barbaz'
+	t 'bar'
+	t '_bar'
+	t 'bar_'
+expected-stdout:
+	=0
+	=1
+	=1
+	=0
+	=1
+	=1
 ---
 name: glob-trim-1
 description:
@ -2695,6 +2767,7 @@ expected-stdout:
 name: heredoc-10
 description:
 	Check direct here document assignment
+category: !shell:ebcdic-yes
 stdin:
 	x=u
 	va=<<EOF
@ -2747,6 +2820,62 @@ expected-stdout:
 	} |
 	| vapp1^vapp2^ |
 ---
+name: heredoc-10-ebcdic
+description:
+	Check direct here document assignment
+category: !shell:ebcdic-no
+stdin:
+	x=u
+	va=<<EOF
+	=a $x \x7C=
+	EOF
+	vb=<<'EOF'
+	=b $x \x7C=
+	EOF
+	function foo {
+		vc=<<-EOF
+			=c $x \x7C=
+		EOF
+	}
+	fnd=$(typeset -f foo)
+	print -r -- "$fnd"
+	function foo {
+		echo blub
+	}
+	foo
+	eval "$fnd"
+	foo
+	# rather nonsensical, butâ€¦
+	vd=<<<"=d $x \x7C="
+	ve=<<<'=e $x \x7C='
+	vf=<<<$'=f $x \x7C='
+	# now check
+	print -r -- "| va={$va} vb={$vb} vc={$vc} vd={$vd} ve={$ve} vf={$vf} |"
+	# check append
+	v=<<-EOF
+		vapp1
+	EOF
+	v+=<<-EOF
+		vapp2
+	EOF
+	print -r -- "| ${v//$'\n'/^} |"
+expected-stdout:
+	function foo {
+		vc=<<-EOF 
+	=c $x \x7C=
+	EOF
+	
+	} 
+	blub
+	| va={=a u \x7C=
+	} vb={=b $x \x7C=
+	} vc={=c u \x7C=
+	} vd={=d u \x7C=
+	} ve={=e $x \x7C=
+	} vf={=f $x @=
+	} |
+	| vapp1^vapp2^ |
+---
 name: heredoc-11
 description:
 	Check here documents with no or empty delimiter
@ -5034,18 +5163,34 @@ expected-stdout:
 	2 :10/8,16: .
 	3 :10/10,16: .
 ---
-name: integer-base-check-numeric-from
+name: integer-base-check-numeric-from-1
 description:
-	Check behaviour for base one to 36, and that 37 degrades to 10
+	Check behaviour for base one
+category: !shell:ebcdic-yes
 stdin:
 	echo 1:$((1#1))0.
+expected-stdout:
+	1:490.
+---
+name: integer-base-check-numeric-from-1-ebcdic
+description:
+	Check behaviour for base one
+category: !shell:ebcdic-no
+stdin:
+	echo 1:$((1#1))0.
+expected-stdout:
+	1:2410.
+---
+name: integer-base-check-numeric-from-2
+description:
+	Check behaviour for base two to 36, and that 37 degrades to 10
+stdin:
 	i=1
 	while (( ++i <= 37 )); do
 		eval 'echo '$i':$(('$i'#10)).'
 	done
 	echo 37:$($__progname -c 'echo $((37#10))').$?:
 expected-stdout:
-	1:490.
 	2:2.
 	3:3.
 	4:4.
@ -5084,18 +5229,41 @@ expected-stdout:
 	37:10.
 	37:10.0:
 ---
-name: integer-base-check-numeric-to
+name: integer-base-check-numeric-to-1
 description:
-	Check behaviour for base one to 36, and that 37 degrades to 10
+	Check behaviour for base one
+category: !shell:ebcdic-yes
 stdin:
-	i=0
+	i=1
+	typeset -Uui$i x=0x40
+	eval "typeset -i10 y=$x"
+	print $i:$x.$y.
+expected-stdout:
+	1:1#@.64.
+---
+name: integer-base-check-numeric-to-1-ebcdic
+description:
+	Check behaviour for base one
+category: !shell:ebcdic-no
+stdin:
+	i=1
+	typeset -Uui$i x=0x7C
+	eval "typeset -i10 y=$x"
+	print $i:$x.$y.
+expected-stdout:
+	1:1#@.124.
+---
+name: integer-base-check-numeric-to-2
+description:
+	Check behaviour for base two to 36, and that 37 degrades to 10
+stdin:
+	i=1
 	while (( ++i <= 37 )); do
 		typeset -Uui$i x=0x40
 		eval "typeset -i10 y=$x"
 		print $i:$x.$y.
 	done
 expected-stdout:
-	1:1#@.64.
 	2:2#1000000.64.
 	3:3#2101.64.
 	4:4#1000.64.
@ -6738,6 +6906,13 @@ expected-exit: e != 0
 expected-stderr-pattern:
 	/read[ -]?only/
 ---
+name: readonly-5
+description:
+	Ensure readonly is idempotent
+stdin:
+	readonly x=1
+	readonly x
+---
 name: syntax-1
 description:
 	Check that lone ampersand is a syntax error
@ -6871,6 +7046,48 @@ expected-stdout:
 	y1-
 	x2-3- z1-
 ---
+name: exec-modern-korn-shell
+description:
+	Check that exec can execute any command that makes it
+	through syntax and parser
+stdin:
+	print '#!'"$__progname"'\necho tf' >lq
+	chmod +x lq
+	PATH=$PWD
+	exec 2>&1
+	foo() { print two; }
+	print =1
+	(exec print one)
+	print =2
+	(exec foo)
+	print =3
+	(exec ls)
+	print =4
+	(exec lq)
+expected-stdout-pattern:
+	/=1\none\n=2\ntwo\n=3\n.*: ls: not found\n=4\ntf\n/
+---
+name: exec-ksh88
+description:
+	Check that exec only executes after a PATH search
+arguments: !-o!posix!
+stdin:
+	print '#!'"$__progname"'\necho tf' >lq
+	chmod +x lq
+	PATH=$PWD
+	exec 2>&1
+	foo() { print two; }
+	print =1
+	(exec print one)
+	print =2
+	(exec foo)
+	print =3
+	(exec ls)
+	print =4
+	(exec lq)
+expected-stdout-pattern:
+	/=1\n.*: print: not found\n=2\n.*: foo: not found\n=3\n.*: ls: not found\n=4\ntf\n/
+---
 name: xxx-what-do-you-call-this-1
 stdin:
 	echo "${foo:-"a"}*"
@ -8233,7 +8450,7 @@ description:
 	multibyte character of the shell input (with -c, from standard
 	input, as file, or as eval argument), but nowhere else
 # breaks on Mac OSX (HFS+ non-standard Unicode canonical decomposition)
-category: !os:darwin
+category: !os:darwin,!shell:ebcdic-yes
 stdin:
 	mkdir foo
 	print '#!/bin/sh\necho ohne' >foo/fnord
@ -8310,11 +8527,10 @@ expected-stdout:
 expected-stderr-pattern:
 	/(Unrecognized character .... ignored at \..t4 line 1)*/
 ---
-name: utf8opt-1a
+name: utf8opt-1
 description:
 	Check that the utf8-mode flag is not set at non-interactive startup
-category: !os:hpux
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
+env-setup: !PS1=!PS2=!LC_CTYPE=@utflocale@!
 stdin:
 	if [[ $- = *U* ]]; then
 		echo is set
@ -8324,51 +8540,15 @@ stdin:
 expected-stdout:
 	is not set
 ---
-name: utf8opt-1b
-description:
-	Check that the utf8-mode flag is not set at non-interactive startup
-category: os:hpux
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
-stdin:
-	if [[ $- = *U* ]]; then
-		echo is set
-	else
-		echo is not set
-	fi
-expected-stdout:
-	is not set
---
-name: utf8opt-2a
+name: utf8opt-2
 description:
 	Check that the utf8-mode flag is set at interactive startup.
-	-DMKSH_ASSUME_UTF8=0 => expected failure, please ignore
-	-DMKSH_ASSUME_UTF8=1 => not expected, please investigate
-	-UMKSH_ASSUME_UTF8 => not expected, but if your OS is old,
-	 try passing HAVE_SETLOCALE_CTYPE=0 to Build.sh
+	If your OS is old, try passing HAVE_SETLOCALE_CTYPE=0 to Build.sh
 need-pass: no
-category: !os:hpux,!os:msys,!os:os2
+category: !noutf8
 need-ctty: yes
 arguments: !-i!
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.UTF-8!
-stdin:
-	if [[ $- = *U* ]]; then
-		echo is set
-	else
-		echo is not set
-	fi
-expected-stdout:
-	is set
-expected-stderr-pattern:
-	/(# )*/
---
-name: utf8opt-2b
-description:
-	Check that the utf8-mode flag is set at interactive startup
-	Expected failure if -DMKSH_ASSUME_UTF8=0
-category: os:hpux
-need-ctty: yes
-arguments: !-i!
-env-setup: !PS1=!PS2=!LC_CTYPE=en_US.utf8!
+env-setup: !PS1=!PS2=!LC_CTYPE=@utflocale@!
 stdin:
 	if [[ $- = *U* ]]; then
 		echo is set
@ -9348,6 +9528,7 @@ expected-stdout:
 name: varexpand-special-hash
 description:
 	Check special ${var@x} expansion for x=hash
+category: !shell:ebcdic-yes
 stdin:
 	typeset -i8 foo=10
 	bar=baz
@ -9356,9 +9537,22 @@ stdin:
 expected-stdout:
 	9B15FBFB CFBDD32B 00000000 .
 ---
+name: varexpand-special-hash-ebcdic
+description:
+	Check special ${var@x} expansion for x=hash
+category: !shell:ebcdic-no
+stdin:
+	typeset -i8 foo=10
+	bar=baz
+	unset baz
+	print ${foo@#} ${bar@#} ${baz@#} .
+expected-stdout:
+	016AE33D 9769C4AF 00000000 .
+---
 name: varexpand-special-quote
 description:
 	Check special ${var@Q} expansion for quoted strings
+category: !shell:faux-ebcdic
 stdin:
 	set +U
 	i=x
@ -9378,6 +9572,29 @@ expected-stdout:
 	typeset v='a b'
 	typeset w=$'c\nd\240e\u20ACf'
 ---
+name: varexpand-special-quote-faux-EBCDIC
+description:
+	Check special ${var@Q} expansion for quoted strings
+category: shell:faux-ebcdic
+stdin:
+	set +U
+	i=x
+	j=a\ b
+	k=$'c
+	d\xA0''eâ‚¬f'
+	print -r -- "<i=$i j=$j k=$k>"
+	s="u=${i@Q} v=${j@Q} w=${k@Q}"
+	print -r -- "s=\"$s\""
+	eval "$s"
+	typeset -p u v w
+expected-stdout:
+	<i=x j=a b k=c
+	d eâ‚¬f>
+	s="u=x v='a b' w=$'c\nd e\u20ACf'"
+	typeset u=x
+	typeset v='a b'
+	typeset w=$'c\nd e\u20ACf'
+---
 name: varexpand-null-1
 description:
 	Ensure empty strings expand emptily
@ -9718,7 +9935,7 @@ stdin:
 	    $'\J\K\L\M\N\O\P\Q\R\S\T\U1\V\W\X\Y\Z\[\\\]\^\_\`\a\b\d\e' \
 	    $'\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u1\v\w\x1\y\z\{\|\}\~ $x' \
 	    $'\u20acd' $'\U20acd' $'\x123' $'fn\x0rd' $'\0234' $'\234' \
-	    $'\2345' $'\ca' $'\c!' $'\c?' $'\c€' $'a\
+	    $'\2345' $'\ca' $'\c!' $'\c?' $'\câ€¦' $'a\
 	b' | {
 		# integer-base-one-3As
 		typeset -Uui16 -Z11 pos=0
@ -9760,7 +9977,7 @@ expected-stdout:
 	00000050  68 69 6A 6B 6C 6D 0A 6F - 70 71 0D 73 09 01 0B 77  |hijklm.opq.s...w|
 	00000060  01 79 7A 7B 7C 7D 7E 20 - 24 78 0A E2 82 AC 64 0A  |.yz{|}~ $x....d.|
 	00000070  EF BF BD 0A C4 A3 0A 66 - 6E 0A 13 34 0A 9C 0A 9C  |.......fn..4....|
-	00000080  35 0A 01 0A 01 0A 7F 0A - 02 82 AC 0A 61 0A 62 0A  |5...........a.b.|
+	00000080  35 0A 01 0A 01 0A 7F 0A - 82 80 A6 0A 61 0A 62 0A  |5...........a.b.|
 ---
 name: dollar-quotes-in-heredocs-strings
 description:
@ -10391,6 +10608,7 @@ expected-stdout:
 name: integer-base-one-5A
 description:
 	Check to see that weâ€™re NUL and Unicode safe
+category: !shell:ebcdic-yes
 stdin:
 	set +U
 	print 'a\0b\xfdz' >x
@ -10401,6 +10619,20 @@ stdin:
 expected-stdout:
 	16#61 16#0 16#62 16#FD 16#7A .
 ---
+name: integer-base-one-5E
+description:
+	Check to see that weâ€™re NUL and Unicode safe
+category: !shell:ebcdic-no
+stdin:
+	set +U
+	print 'a\0b\xfdz' >x
+	read -a y <x
+	set -U
+	typeset -Uui16 y
+	print ${y[*]} .
+expected-stdout:
+	16#81 16#0 16#82 16#FD 16#A9 .
+---
 name: integer-base-one-5W
 description:
 	Check to see that weâ€™re NUL and Unicode safe
@ -11486,19 +11718,19 @@ expected-stdout:
 		echo $(true) $((1+ 2)) ${  :;} ${| REPLY=x;}
 	}
 	inline_COMSUB_EXPRSUB_FUNSUB_VALSUB() {
-		\echo $(\true ) $((1+ 2)) ${ : ;} ${|REPLY=x ;} 
+		\echo $(\true ) $((1+ 2)) ${ \: ;} ${|REPLY=x ;} 
 	} 
 	function comsub_COMSUB_EXPRSUB_FUNSUB_VALSUB { x=$(
 		echo $(true) $((1+ 2)) ${  :;} ${| REPLY=x;}
 	); }
 	function comsub_COMSUB_EXPRSUB_FUNSUB_VALSUB {
-		x=$(\echo $(\true ) $((1+ 2)) ${ : ;} ${|REPLY=x ;} ) 
+		x=$(\echo $(\true ) $((1+ 2)) ${ \: ;} ${|REPLY=x ;} ) 
 	} 
 	function reread_COMSUB_EXPRSUB_FUNSUB_VALSUB { x=$((
 		echo $(true) $((1+ 2)) ${  :;} ${| REPLY=x;}
 	)|tr u x); }
 	function reread_COMSUB_EXPRSUB_FUNSUB_VALSUB {
-		x=$( ( \echo $(\true ) $((1+ 2)) ${ : ;} ${|REPLY=x ;} ) | \tr u x ) 
+		x=$( ( \echo $(\true ) $((1+ 2)) ${ \: ;} ${|REPLY=x ;} ) | \tr u x ) 
 	} 
 	inline_QCHAR_OQUOTE_CQUOTE() {
 		echo fo\ob\"a\`r\'b\$az
@ -12498,12 +12730,23 @@ expected-stdout:
 name: echo-test-1
 description:
 	Test what the echo builtin does (mksh)
+category: !shell:ebcdic-yes
 stdin:
 	echo -n 'foo\x40bar'
 	echo -e '\tbaz'
 expected-stdout:
 	foo@bar	baz
 ---
+name: echo-test-1-ebcdic
+description:
+	Test what the echo builtin does (mksh)
+category: !shell:ebcdic-no
+stdin:
+	echo -n 'foo\x7Cbar'
+	echo -e '\tbaz'
+expected-stdout:
+	foo@bar	baz
+---
 name: echo-test-2
 description:
 	Test what the echo builtin does (POSIX)
@ -12534,7 +12777,7 @@ expected-stdout:
 name: echo-test-3-normal
 description:
 	Test what the echo builtin does, and test a compatibility flag.
-category: !mnbsdash
+category: !mnbsdash,!shell:ebcdic-yes
 stdin:
 	"$__progname" -c 'echo -n 1=\\x40$1; echo -e \\x2E' -- foo bar
 	"$__progname" -o posix -c 'echo -n 2=\\x40$1; echo -e \\x2E' -- foo bar
@ -12544,6 +12787,19 @@ expected-stdout:
 	2=\x40foo-e \x2E
 	3=\x40foo-e \x2E
 ---
+name: echo-test-3-ebcdic
+description:
+	Test what the echo builtin does, and test a compatibility flag.
+category: !mnbsdash,!shell:ebcdic-no
+stdin:
+	"$__progname" -c 'echo -n 1=\\x7C$1; echo -e \\x4B' -- foo bar
+	"$__progname" -o posix -c 'echo -n 2=\\x7C$1; echo -e \\x4B' -- foo bar
+	"$__progname" -o sh -c 'echo -n 3=\\x7C$1; echo -e \\x4B' -- foo bar
+expected-stdout:
+	1=@foo.
+	2=\x7Cfoo-e \x4B
+	3=\x7Cfoo-e \x4B
+---
 name: utilities-getopts-1
 description:
 	getopts sets OPTIND correctly for unparsed option
@ -12979,6 +13235,7 @@ name: duffs-device
 description:
 	Check that the compiler did not optimise-break them
 	(lex.c has got a similar one in SHEREDELIM)
+category: !shell:faux-ebcdic,!shell:ebcdic-yes
 stdin:
 	set +U
 	s=
@ -12991,6 +13248,38 @@ stdin:
 expected-stdout:
 	typeset s=$'\001\002\003\004\005\006\a\b\t\n\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\E\034\035\036\037 !"#$%&\047()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377\u00A0\u20AC\uFFFD\357\277\276\357\277\277\360\220\200\200.'
 ---
+name: duffs-device-ebcdic
+description:
+	Check that the compiler did not optimise-break them
+category: !shell:ebcdic-no
+stdin:
+	set +U
+	s=
+	typeset -i1 i=0
+	while (( ++i < 256 )); do
+		s+=${i#1#}
+	done
+	#s+=$'\xC2\xA0\xE2\x82\xAC\xEF\xBF\xBD\xEF\xBF\xBE\xEF\xBF\xBF\xF0\x90\x80\x80.' #XXX
+	typeset -p s
+expected-stdout:
+	typeset s=$'\001\002\003\004\t\006\007\010\011\012\v\f\r\016\017\020\021\022\023\024\n\b\027\030\031\032\033\034\035\036\037\040\041\042\043\044\045\046\E\050\051\052\053\054\055\056\a\060\061\062\063\064\065\066\067\070\071\072\073\074\075\076\077  âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);^-/ÂÄÀÁÃÅÇÑ¦,%_>?øÉÊËÈÍÎÏÌ`:#@\175="Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿Ð[Þ®¬£¥·©§¶¼½¾Ý¨¯]´×{ABCDEFGHIôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ\377'
+---
+name: duffs-device-faux-EBCDIC
+description:
+	Check that the compiler did not optimise-break them
+category: shell:faux-ebcdic
+stdin:
+	set +U
+	s=
+	typeset -i1 i=0
+	while (( ++i < 256 )); do
+		s+=${i#1#}
+	done
+	s+=$'\xC2\xA0\xE2\x82\xAC\xEF\xBF\xBD\xEF\xBF\xBE\xEF\xBF\xBF\xF0\x90\x80\x80.'
+	typeset -p s
+expected-stdout:
+	typeset s=$'\001\002\003\004\005\006\a\b\t\n\v\f\r\016\017\020\021\022\023\024\025\026\027\030\031\032\E\034\035\036\037 !"#$%&\047()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237 ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ\u00A0\u20AC\uFFFDï¿¾ï¿¿ð\220\200\200.'
+---
 name: stateptr-underflow
 description:
 	This check overflows an Xrestpos stored in a short in R40
--- a/src/dot.mkshrc
+++ b/src/dot.mkshrc
@ -1,5 +1,5 @@
 # $Id$
-# $MirOS: src/bin/mksh/dot.mkshrc,v 1.114 2017/03/19 22:31:26 tg Exp $
+# $MirOS: src/bin/mksh/dot.mkshrc,v 1.121 2017/08/08 21:10:21 tg Exp $
 #-
 # Copyright (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010,
 #		2011, 2012, 2013, 2014, 2015, 2016, 2017
@ -56,6 +56,13 @@ smores() (
 	done
 )

+# customise your favourite editor here; the first one found is used
+for EDITOR in "${EDITOR:-}" jupp jstar mcedit ed vi; do
+	EDITOR=$(\\builtin whence -p "$EDITOR") || EDITOR=
+	[[ -n $EDITOR && -x $EDITOR ]] && break
+	EDITOR=
+done
+
 \\builtin alias ls=ls l='ls -F' la='l -a' ll='l -l' lo='l -alo'
 \: "${HOSTNAME:=$(\\builtin ulimit -c 0; \\builtin print -r -- $(hostname \
    2>/dev/null))}${EDITOR:=/bin/ed}${TERM:=vt100}${USER:=$(\\builtin ulimit \
@ -73,6 +80,7 @@ fi
 \: "${MKSH:=$(\\builtin whence -p mksh)}${MKSH:=/bin/mksh}"
 \\builtin export MKSH

+# prompts
 PS4='[$EPOCHREALTIME] '; PS1='#'; (( USER_ID )) && PS1='$'; PS1=$'\001\r''${|
 	\\builtin typeset e=$?

@ -86,6 +94,8 @@ PS4='[$EPOCHREALTIME] '; PS1='#'; (( USER_ID )) && PS1='$'; PS1=$'\001\r''${|

 	\\builtin return $e
 } '"$PS1 "
+
+# utilities
 \\builtin alias doch='sudo mksh -c "$(\\builtin fc -ln -1)"'
 \\builtin command -v rot13 >/dev/null || \\builtin alias rot13='tr \
    abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ \
@ -99,42 +109,47 @@ elif \\builtin command -v hexdump >/dev/null; then
 	}
 else
 	function hd {
-		\\builtin typeset -Uui16 -Z11 pos=0
-		\\builtin typeset -Uui16 -Z5 hv=2147483647
-		\\builtin typeset dasc line i
-		\\builtin set +U
-
-		\\builtin cat "$@" | if \\builtin read -arN -1 line; then
-			\\builtin typeset -i1 'line[*]'
-			i=0
-			while (( i < ${#line[*]} )); do
-				hv=${line[i++]}
-				if (( (pos & 15) == 0 )); then
-					(( pos )) && \
-					    \\builtin print -r -- "$dasc|"
-					\\builtin print -nr "${pos#16#}  "
-					dasc=' |'
-				fi
-				\\builtin print -nr "${hv#16#} "
-				#XXX EBCDIC, but we need [[:print:]] to fix this
-				if (( (hv < 32) || (hv > 126) )); then
-					dasc+=.
-				else
-					dasc+=${line[i-1]#1#}
-				fi
-				(( (pos++ & 15) == 7 )) && \
-				    \\builtin print -nr -- '- '
-			done
-			while (( pos & 15 )); do
-				\\builtin print -nr '   '
-				(( (pos++ & 15) == 7 )) && \
-				    \\builtin print -nr -- '- '
-			done
-			(( hv == 2147483647 )) || \\builtin print -r -- "$dasc|"
-		fi
+		\\builtin cat "$@" | hd_mksh "$@"
 	}
 fi

+# NUL-safe and EBCDIC-safe hexdump (from stdin)
+function hd_mksh {
+	\\builtin typeset -Uui16 -Z11 pos=0
+	\\builtin typeset -Uui16 -Z5 hv=2147483647
+	\\builtin typeset dasc dn line i
+	\\builtin set +U
+
+	while \\builtin read -arn 512 line; do
+		\\builtin typeset -i1 'line[*]'
+		i=0
+		while (( i < ${#line[*]} )); do
+			dn=
+			(( (hv = line[i++]) != 0 )) && dn=${line[i-1]#1#}
+			if (( (pos & 15) == 0 )); then
+				(( pos )) && \
+				    \\builtin print -r -- "$dasc|"
+				\\builtin print -nr "${pos#16#}  "
+				dasc=' |'
+			fi
+			\\builtin print -nr "${hv#16#} "
+			if [[ $dn = [[:print:]] ]]; then
+				dasc+=$dn
+			else
+				dasc+=.
+			fi
+			(( (pos++ & 15) == 7 )) && \
+			    \\builtin print -nr -- '- '
+		done
+	done
+	while (( pos & 15 )); do
+		\\builtin print -nr '   '
+		(( (pos++ & 15) == 7 )) && \
+		    \\builtin print -nr -- '- '
+	done
+	(( hv == 2147483647 )) || \\builtin print -r -- "$dasc|"
+}
+
 # Berkeley C shell compatible dirs, popd, and pushd functions
 # Z shell compatible chpwd() hook, used to update DIRSTACK[0]
 DIRSTACKBASE=$(\\builtin realpath ~/. 2>/dev/null || \
@ -483,6 +498,7 @@ function enable {
 	i_func[nfunc++]=setenv
 	i_func[nfunc++]=smores
 	i_func[nfunc++]=hd
+	i_func[nfunc++]=hd_mksh
 	i_func[nfunc++]=chpwd
 	i_func[nfunc++]=cd
 	i_func[nfunc++]=cd_csh
@ -588,6 +604,11 @@ function enable {

 \: place customisations below this line

+# some defaults follow — you are supposed to adjust these to your
+# liking; by default we add ~/.etc/bin and ~/bin (whichever exist)
+# to $PATH, set $SHELL to mksh, set some defaults for man and less
+# and show a few more possible things for users to begin moving in
+
 for p in ~/.etc/bin ~/bin; do
 	[[ -d $p/. ]] || \\builtin continue
 	[[ $PATHSEP$PATH$PATHSEP = *"$PATHSEP$p$PATHSEP"* ]] || \
--- a/src/edit.c
+++ b/src/edit.c
--- a/src/eval.c
+++ b/src/eval.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.201 2017/04/06 01:59:54 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/eval.c,v 1.215 2017/08/28 23:27:51 tg Exp $");

 /*
 * string expansion
@ -65,6 +65,12 @@ typedef struct {
 #define IFS_IWS		3	/* beginning of word, ignore IFS WS */
 #define IFS_QUOTE	4	/* beg.w/quote, become IFS_WORD unless "$@" */

+#define STYPE_CHAR	0xFF
+#define STYPE_DBL	0x100
+#define STYPE_AT	0x200
+#define STYPE_SINGLE	0x2FF
+#define STYPE_MASK	0x300
+
 static int varsub(Expand *, const char *, const char *, int *, int *);
 static int comsub(Expand *, const char *, int);
 static char *valsub(struct op *, Area *);
@ -277,18 +283,18 @@ expand(
 		switch (type) {
 		case XBASE:
 			/* original prefixed string */
-			c = *sp++;
+			c = ord(*sp++);
 			switch (c) {
 			case EOS:
 				c = 0;
 				break;
 			case CHAR:
-				c = *sp++;
+				c = ord(*sp++);
 				break;
 			case QCHAR:
 				/* temporary quote */
 				quote |= 2;
-				c = *sp++;
+				c = ord(*sp++);
 				break;
 			case OQUOTE:
 				if (word != IFS_WORD)
@ -314,21 +320,21 @@ expand(
 					case COMASUB:
 					case COMSUB:
 						*dp++ = '(';
-						c = ')';
+						c = ord(')');
 						break;
 					case FUNASUB:
 					case FUNSUB:
 					case VALSUB:
 						*dp++ = '{';
 						*dp++ = c == VALSUB ? '|' : ' ';
-						c = '}';
+						c = ord('}');
 						break;
 					}
 					while (*sp != '\0') {
 						Xcheck(ds, dp);
 						*dp++ = *sp++;
 					}
-					if (c == '}')
+					if (c == ord('}'))
 						*dp++ = ';';
 					*dp++ = c;
 				} else {
@ -429,12 +435,12 @@ expand(
 					/* skip qualifier(s) */
 					if (stype)
 						sp += slen;
-					switch (stype & 0x17F) {
-					case 0x100 | '#':
+					switch (stype & STYPE_SINGLE) {
+					case ord('#') | STYPE_AT:
 						x.str = shf_smprintf("%08X",
 						    (unsigned int)hash(str_val(st->var)));
 						break;
-					case 0x100 | 'Q': {
+					case ord('Q') | STYPE_AT: {
 						struct shf shf;

 						shf_sopen(NULL, 0, SHF_WR|SHF_DYNAMIC, &shf);
@ -442,7 +448,7 @@ expand(
 						x.str = shf_sclose(&shf);
 						break;
 					    }
-					case '0': {
+					case ord('0'): {
 						char *beg, *mid, *end, *stg;
 						mksh_ari_t from = 0, num = -1, flen, finc = 0;

@ -450,13 +456,13 @@ expand(
 						mid = beg + (wdscan(sp, ADELIM) - sp);
 						stg = beg + (wdscan(sp, CSUBST) - sp);
 						mid[-2] = EOS;
-						if (mid[-1] == /*{*/'}') {
+						if (ord(mid[-1]) == ord(/*{*/ '}')) {
 							sp += mid - beg - 1;
 							end = NULL;
 						} else {
 							end = mid +
 							    (wdscan(mid, ADELIM) - mid);
-							if (end[-1] != /*{*/ '}')
+							if (ord(end[-1]) != ord(/*{*/ '}'))
 								/* more than max delimiters */
 								goto unwind_substsyn;
 							end[-2] = EOS;
@ -489,8 +495,8 @@ expand(
 						strndupx(x.str, beg, num, ATEMP);
 						goto do_CSUBST;
 					    }
-					case 0x100 | '/':
-					case '/': {
+					case ord('/') | STYPE_AT:
+					case ord('/'): {
 						char *s, *p, *d, *sbeg, *end;
 						char *pat = NULL, *rrep = null;
 						char fpat = 0, *tpat1, *tpat2;
@ -500,18 +506,18 @@ expand(
 						p = s + (wdscan(sp, ADELIM) - sp);
 						d = s + (wdscan(sp, CSUBST) - sp);
 						p[-2] = EOS;
-						if (p[-1] == /*{*/'}')
+						if (ord(p[-1]) == ord(/*{*/ '}'))
 							d = NULL;
 						else
 							d[-2] = EOS;
 						sp += (d ? d : p) - s - 1;
-						if (!(stype & 0x180) &&
+						if (!(stype & STYPE_MASK) &&
 						    s[0] == CHAR &&
-						    (s[1] == '#' || s[1] == '%'))
+						    ctype(s[1], C_SUB2))
 							fpat = s[1];
 						wpat = s + (fpat ? 2 : 0);
 						wrep = d ? p : NULL;
-						if (!(stype & 0x100)) {
+						if (!(stype & STYPE_AT)) {
 							rrep = wrep ? evalstr(wrep,
 							    DOTILDE | DOSCALAR) :
 							    null;
@ -531,21 +537,21 @@ expand(
 							 */
 							goto no_repl;
 						}
-						if ((stype & 0x180) &&
+						if ((stype & STYPE_MASK) &&
 						    gmatchx(null, pat, false)) {
 							/*
 							 * pattern matches empty
 							 * string => don't loop
 							 */
-							stype &= ~0x180;
+							stype &= ~STYPE_MASK;
 						}

 						/* first see if we have any match at all */
-						if (fpat == '#') {
+						if (ord(fpat) == ord('#')) {
 							/* anchor at the beginning */
 							tpat1 = shf_smprintf("%s%c*", pat, MAGIC);
 							tpat2 = tpat1;
-						} else if (fpat == '%') {
+						} else if (ord(fpat) == ord('%')) {
 							/* anchor at the end */
 							tpat1 = shf_smprintf("%c*%s", MAGIC, pat);
 							tpat2 = pat;
@ -563,7 +569,7 @@ expand(
 							goto end_repl;
 						end = strnul(s);
 						/* now anchor the beginning of the match */
-						if (fpat != '#')
+						if (ord(fpat) != ord('#'))
 							while (sbeg <= end) {
 								if (gmatchx(sbeg, tpat2, false))
 									break;
@ -572,7 +578,7 @@ expand(
 							}
 						/* now anchor the end of the match */
 						p = end;
-						if (fpat != '%')
+						if (ord(fpat) != ord('%'))
 							while (p >= sbeg) {
 								bool gotmatch;

@ -587,7 +593,7 @@ expand(
 						strndupx(end, sbeg, p - sbeg, ATEMP);
 						record_match(end);
 						afree(end, ATEMP);
-						if (stype & 0x100) {
+						if (stype & STYPE_AT) {
 							if (rrep != null)
 								afree(rrep, ATEMP);
 							rrep = wrep ? evalstr(wrep,
@ -600,11 +606,11 @@ expand(
 						sbeg = d + (sbeg - s) + strlen(rrep);
 						afree(s, ATEMP);
 						s = d;
-						if (stype & 0x100) {
+						if (stype & STYPE_AT) {
 							afree(tpat1, ATEMP);
 							afree(pat, ATEMP);
 							goto again_search;
-						} else if (stype & 0x80)
+						} else if (stype & STYPE_DBL)
 							goto again_repl;
 end_repl:
 						afree(tpat1, ATEMP);
@ -616,8 +622,8 @@ expand(
 						afree(ws, ATEMP);
 						goto do_CSUBST;
 					    }
-					case '#':
-					case '%':
+					case ord('#'):
+					case ord('%'):
 						/* ! DOBLANK,DOBRACE */
 						f = (f & DONTRUNCOMMAND) |
 						    DOPAT | DOTILDE |
@ -631,10 +637,10 @@ expand(
 						 */
 						if (!Flag(FSH)) {
 							*dp++ = MAGIC;
-							*dp++ = 0x80 | '@';
+							*dp++ = ord(0x80 | '@');
 						}
 						break;
-					case '=':
+					case ord('='):
 						/*
 						 * Tilde expansion for string
 						 * variables in POSIX mode is
@ -658,7 +664,7 @@ expand(
 						f &= ~(DOBLANK|DOGLOB|DOBRACE);
 						tilde_ok = 1;
 						break;
-					case '?':
+					case ord('?'):
 						if (*sp == CSUBST)
 							errorf("%s: parameter null or not set",
 							    st->var->name);
@ -692,9 +698,9 @@ expand(
 				f = st->f;
 				if (f & DOBLANK)
 					doblank--;
-				switch (st->stype & 0x17F) {
-				case '#':
-				case '%':
+				switch (st->stype & STYPE_SINGLE) {
+				case ord('#'):
+				case ord('%'):
 					if (!Flag(FSH)) {
 						/* Append end-pattern */
 						*dp++ = MAGIC;
@ -724,7 +730,7 @@ expand(
 						doblank++;
 					st = st->prev;
 					continue;
-				case '=':
+				case ord('='):
 					/*
 					 * Restore our position and substitute
 					 * the value of st->var (may not be
@ -757,17 +763,17 @@ expand(
 					st = st->prev;
 					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
 					continue;
-				case '?':
+				case ord('?'):
 					dp = Xrestpos(ds, dp, st->base);

 					errorf(Tf_sD_s, st->var->name,
 					    debunk(dp, dp, strlen(dp) + 1));
 					break;
-				case '0':
-				case 0x100 | '/':
-				case '/':
-				case 0x100 | '#':
-				case 0x100 | 'Q':
+				case ord('0'):
+				case ord('/') | STYPE_AT:
+				case ord('/'):
+				case ord('#') | STYPE_AT:
+				case ord('Q') | STYPE_AT:
 					dp = Xrestpos(ds, dp, st->base);
 					type = XSUB;
 					word = quote || (!*x.str && (f & DOSCALAR)) ? IFS_WORD : IFS_IWS;
@ -845,7 +851,7 @@ expand(
 						doblank--;
 					continue;
 				}
-				c = ifs0;
+				c = ord(ifs0);
 				if ((f & DOHEREDOC)) {
 					/* pseudo-field-split reliably */
 					if (c == 0)
@ -891,10 +897,7 @@ expand(
 				--newlines;
 			} else {
 				while ((c = shf_getc(x.u.shf)) == 0 ||
-#ifdef MKSH_WITH_TEXTMODE
-				       c == '\r' ||
-#endif
-				       c == '\n') {
+				    ctype(c, C_NL)) {
 #ifdef MKSH_WITH_TEXTMODE
 					if (c == '\r') {
 						c = shf_getc(x.u.shf);
@ -999,11 +1002,11 @@ expand(
 			tilde_ok <<= 1;
 			/* mark any special second pass chars */
 			if (!quote)
-				switch (c) {
-				case '[':
-				case '!':
-				case '-':
-				case ']':
+				switch (ord(c)) {
+				case ord('['):
+				case ord('!'):
+				case ord('-'):
+				case ord(']'):
 					/*
 					 * For character classes - doesn't hurt
 					 * to have magic !,-,]s outside of
@ -1011,28 +1014,29 @@ expand(
 					 */
 					if (f & (DOPAT | DOGLOB)) {
 						fdo |= DOMAGIC;
-						if (c == '[')
+						if (c == ord('['))
 							fdo |= f & DOGLOB;
 						*dp++ = MAGIC;
 					}
 					break;
-				case '*':
-				case '?':
+				case ord('*'):
+				case ord('?'):
 					if (f & (DOPAT | DOGLOB)) {
 						fdo |= DOMAGIC | (f & DOGLOB);
 						*dp++ = MAGIC;
 					}
 					break;
-				case '{':
-				case '}':
-				case ',':
-					if ((f & DOBRACE) && (c == '{' /*}*/ ||
+				case ord('{'):
+				case ord('}'):
+				case ord(','):
+					if ((f & DOBRACE) &&
+					    (ord(c) == ord('{' /*}*/) ||
 					    (fdo & DOBRACE))) {
 						fdo |= DOBRACE|DOMAGIC;
 						*dp++ = MAGIC;
 					}
 					break;
-				case '=':
+				case ord('='):
 					/* Note first unquoted = for ~ */
 					if (!(f & DOTEMP) && (!Flag(FPOSIX) ||
 					    (f & DOASNTILDE)) && !saw_eq) {
@ -1040,13 +1044,13 @@ expand(
 						tilde_ok = 1;
 					}
 					break;
-				case ':':
+				case ord(':'):
 					/* : */
 					/* Note unquoted : for ~ */
 					if (!(f & DOTEMP) && (f & DOASNTILDE))
 						tilde_ok = 1;
 					break;
-				case '~':
+				case ord('~'):
 					/*
 					 * tilde_ok is reset whenever
 					 * any of ' " $( $(( ${ } are seen.
@ -1118,7 +1122,7 @@ varsub(Expand *xp, const char *sp, const char *word,
 	struct tbl *vp;
 	bool zero_ok = false;

-	if ((stype = sp[0]) == '\0')
+	if ((stype = ord(sp[0])) == '\0')
 		/* Bad variable name */
 		return (-1);

@ -1128,20 +1132,20 @@ varsub(Expand *xp, const char *sp, const char *word,
 	 * ${#var}, string length (-U: characters, +U: octets) or array size
 	 * ${%var}, string width (-U: screen columns, +U: octets)
 	 */
-	c = sp[1];
-	if (stype == '%' && c == '\0')
+	c = ord(sp[1]);
+	if (stype == ord('%') && c == '\0')
 		return (-1);
-	if ((stype == '#' || stype == '%') && c != '\0') {
+	if (ctype(stype, C_SUB2) && c != '\0') {
 		/* Can't have any modifiers for ${#...} or ${%...} */
 		if (*word != CSUBST)
 			return (-1);
 		sp++;
 		/* Check for size of array */
-		if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
-		    p[2] == ']') {
+		if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ord('*') ||
+		    ord(p[1]) == ord('@')) && ord(p[2]) == ord(']')) {
 			int n = 0;

-			if (stype != '#')
+			if (stype != ord('#'))
 				return (-1);
 			vp = global(arrayname(sp));
 			if (vp->flag & (ISSET|ARRAY))
@ -1150,14 +1154,14 @@ varsub(Expand *xp, const char *sp, const char *word,
 				if (vp->flag & ISSET)
 					n++;
 			c = n;
-		} else if (c == '*' || c == '@') {
-			if (stype != '#')
+		} else if (c == ord('*') || c == ord('@')) {
+			if (stype != ord('#'))
 				return (-1);
 			c = e->loc->argc;
 		} else {
 			p = str_val(global(sp));
 			zero_ok = p != null;
-			if (stype == '#')
+			if (stype == ord('#'))
 				c = utflen(p);
 			else {
 				/* partial utf_mbswidth reimplementation */
@ -1171,7 +1175,7 @@ varsub(Expand *xp, const char *sp, const char *word,
 					if (!UTFMODE || (len = utf_mbtowc(&wc,
 					    s)) == (size_t)-1)
 						/* not UTFMODE or not UTF-8 */
-						wc = (unsigned char)(*s++);
+						wc = rtt2asc(*s++);
 					else
 						/* UTFMODE and UTF-8 */
 						s += len;
@ -1192,11 +1196,11 @@ varsub(Expand *xp, const char *sp, const char *word,
 		xp->str = shf_smprintf(Tf_d, c);
 		return (XSUB);
 	}
-	if (stype == '!' && c != '\0' && *word == CSUBST) {
+	if (stype == ord('!') && c != '\0' && *word == CSUBST) {
 		sp++;
-		if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
-		    p[2] == ']') {
-			c = '!';
+		if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ord('*') ||
+		    ord(p[1]) == ord('@')) && ord(p[2]) == ord(']')) {
+			c = ord('!');
 			stype = 0;
 			goto arraynames;
 		}
@ -1209,43 +1213,46 @@ varsub(Expand *xp, const char *sp, const char *word,

 	/* Check for qualifiers in word part */
 	stype = 0;
-	c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
-	if (c == ':') {
+	c = word[slen + 0] == CHAR ? ord(word[slen + 1]) : 0;
+	if (c == ord(':')) {
 		slen += 2;
-		stype = 0x80;
-		c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
+		stype = STYPE_DBL;
+		c = word[slen + 0] == CHAR ? ord(word[slen + 1]) : 0;
 	}
-	if (!stype && c == '/') {
+	if (!stype && c == ord('/')) {
 		slen += 2;
 		stype = c;
-		if (word[slen] == ADELIM && word[slen + 1] == c) {
+		if (word[slen] == ADELIM &&
+		    ord(word[slen + 1]) == (unsigned int)c) {
 			slen += 2;
-			stype |= 0x80;
+			stype |= STYPE_DBL;
 		}
-	} else if (stype == 0x80 && (c == ' ' || c == '0')) {
-		stype |= '0';
-	} else if (ctype(c, C_SUBOP1)) {
+	} else if (stype == STYPE_DBL && (c == ord(' ') || c == ord('0'))) {
+		stype |= ord('0');
+	} else if (ctype(c, C_SUB1)) {
 		slen += 2;
 		stype |= c;
-	} else if (ksh_issubop2(c)) {
+	} else if (ctype(c, C_SUB2)) {
 		/* Note: ksh88 allows :%, :%%, etc */
 		slen += 2;
 		stype = c;
-		if (word[slen + 0] == CHAR && c == word[slen + 1]) {
-			stype |= 0x80;
+		if (word[slen + 0] == CHAR &&
+		    ord(word[slen + 1]) == (unsigned int)c) {
+			stype |= STYPE_DBL;
 			slen += 2;
 		}
-	} else if (c == '@') {
+	} else if (c == ord('@')) {
 		/* @x where x is command char */
-		switch (c = word[slen + 2] == CHAR ? word[slen + 3] : 0) {
-		case '#':
-		case '/':
-		case 'Q':
+		switch (c = ord(word[slen + 2]) == CHAR ?
+		    ord(word[slen + 3]) : 0) {
+		case ord('#'):
+		case ord('/'):
+		case ord('Q'):
 			break;
 		default:
 			return (-1);
 		}
-		stype |= 0x100 | c;
+		stype |= STYPE_AT | c;
 		slen += 4;
 	} else if (stype)
 		/* : is not ok */
@ -1253,51 +1260,51 @@ varsub(Expand *xp, const char *sp, const char *word,
 	if (!stype && *word != CSUBST)
 		return (-1);

-	c = sp[0];
-	if (c == '*' || c == '@') {
-		switch (stype & 0x17F) {
+	c = ord(sp[0]);
+	if (c == ord('*') || c == ord('@')) {
+		switch (stype & STYPE_SINGLE) {
 		/* can't assign to a vector */
-		case '=':
+		case ord('='):
 		/* can't trim a vector (yet) */
-		case '%':
-		case '#':
-		case '?':
-		case '0':
-		case 0x100 | '/':
-		case '/':
-		case 0x100 | '#':
-		case 0x100 | 'Q':
+		case ord('%'):
+		case ord('#'):
+		case ord('?'):
+		case ord('0'):
+		case ord('/') | STYPE_AT:
+		case ord('/'):
+		case ord('#') | STYPE_AT:
+		case ord('Q') | STYPE_AT:
 			return (-1);
 		}
 		if (e->loc->argc == 0) {
 			xp->str = null;
 			xp->var = global(sp);
-			state = c == '@' ? XNULLSUB : XSUB;
+			state = c == ord('@') ? XNULLSUB : XSUB;
 		} else {
 			xp->u.strv = (const char **)e->loc->argv + 1;
 			xp->str = *xp->u.strv++;
 			/* $@ */
-			xp->split = tobool(c == '@');
+			xp->split = tobool(c == ord('@'));
 			state = XARG;
 		}
 		/* POSIX 2009? */
 		zero_ok = true;
-	} else if ((p = cstrchr(sp, '[')) && (p[1] == '*' || p[1] == '@') &&
-	    p[2] == ']') {
+	} else if ((p = cstrchr(sp, '[')) && (ord(p[1]) == ord('*') ||
+	    ord(p[1]) == ord('@')) && ord(p[2]) == ord(']')) {
 		XPtrV wv;

-		switch (stype & 0x17F) {
+		switch (stype & STYPE_SINGLE) {
 		/* can't assign to a vector */
-		case '=':
+		case ord('='):
 		/* can't trim a vector (yet) */
-		case '%':
-		case '#':
-		case '?':
-		case '0':
-		case 0x100 | '/':
-		case '/':
-		case 0x100 | '#':
-		case 0x100 | 'Q':
+		case ord('%'):
+		case ord('#'):
+		case ord('?'):
+		case ord('0'):
+		case ord('/') | STYPE_AT:
+		case ord('/'):
+		case ord('#') | STYPE_AT:
+		case ord('Q') | STYPE_AT:
 			return (-1);
 		}
 		c = 0;
@ -1307,45 +1314,45 @@ varsub(Expand *xp, const char *sp, const char *word,
 		for (; vp; vp = vp->u.array) {
 			if (!(vp->flag&ISSET))
 				continue;
-			XPput(wv, c == '!' ? shf_smprintf(Tf_lu,
+			XPput(wv, c == ord('!') ? shf_smprintf(Tf_lu,
 			    arrayindex(vp)) :
 			    str_val(vp));
 		}
 		if (XPsize(wv) == 0) {
 			xp->str = null;
-			state = p[1] == '@' ? XNULLSUB : XSUB;
+			state = ord(p[1]) == ord('@') ? XNULLSUB : XSUB;
 			XPfree(wv);
 		} else {
 			XPput(wv, 0);
 			xp->u.strv = (const char **)XPptrv(wv);
 			xp->str = *xp->u.strv++;
 			/* ${foo[@]} */
-			xp->split = tobool(p[1] == '@');
+			xp->split = tobool(ord(p[1]) == ord('@'));
 			state = XARG;
 		}
 	} else {
 		xp->var = global(sp);
 		xp->str = str_val(xp->var);
 		/* can't assign things like $! or $1 */
-		if ((stype & 0x17F) == '=' && !*xp->str &&
+		if ((stype & STYPE_SINGLE) == ord('=') && !*xp->str &&
 		    ctype(*sp, C_VAR1 | C_DIGIT))
 			return (-1);
 		state = XSUB;
 	}

-	c = stype & 0x7F;
+	c = stype & STYPE_CHAR;
 	/* test the compiler's code generator */
-	if (((stype < 0x100) && (ksh_issubop2(c) ||
-	    (((stype & 0x80) ? *xp->str == '\0' : xp->str == null) &&
+	if ((!(stype & STYPE_AT) && (ctype(c, C_SUB2) ||
+	    (((stype & STYPE_DBL) ? *xp->str == '\0' : xp->str == null) &&
 	    (state != XARG || (ifs0 || xp->split ?
 	    (xp->u.strv[0] == NULL) : !hasnonempty(xp->u.strv))) ?
-	    c == '=' || c == '-' || c == '?' : c == '+'))) ||
-	    stype == (0x80 | '0') || stype == (0x100 | '#') ||
-	    stype == (0x100 | 'Q') || (stype & 0x7F) == '/')
+	    ctype(c, C_EQUAL | C_MINUS | C_QUEST) : c == ord('+')))) ||
+	    stype == (ord('0') | STYPE_DBL) || stype == (ord('#') | STYPE_AT) ||
+	    stype == (ord('Q') | STYPE_AT) || (stype & STYPE_CHAR) == ord('/'))
 		/* expand word instead of variable value */
 		state = XBASE;
 	if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
-	    (ksh_issubop2(c) || (state != XBASE && c != '+')))
+	    (ctype(c, C_SUB2) || (state != XBASE && c != ord('+'))))
 		errorf(Tf_parm, sp);
 	*stypep = stype;
 	*slenp = slen;
@ -1408,7 +1415,7 @@ comsub(Expand *xp, const char *cp, int fn)
 			if (!herein(io, &name)) {
 				xp->str = name;
 				/* as $(…) requires, trim trailing newlines */
-				name += strlen(name);
+				name = strnul(name);
 				while (name > xp->str && name[-1] == '\n')
 					--name;
 				*name = '\0';
@ -1483,8 +1490,8 @@ trimsub(char *str, char *pat, int how)
 	char *end = strnul(str);
 	char *p, c;

-	switch (how & 0xFF) {
-	case '#':
+	switch (how & (STYPE_CHAR | STYPE_DBL)) {
+	case ord('#'):
 		/* shortest match at beginning */
 		for (p = str; p <= end; p += utf_ptradj(p)) {
 			c = *p; *p = '\0';
@ -1496,7 +1503,7 @@ trimsub(char *str, char *pat, int how)
 			*p = c;
 		}
 		break;
-	case '#'|0x80:
+	case ord('#') | STYPE_DBL:
 		/* longest match at beginning */
 		for (p = end; p >= str; p--) {
 			c = *p; *p = '\0';
@ -1508,7 +1515,7 @@ trimsub(char *str, char *pat, int how)
 			*p = c;
 		}
 		break;
-	case '%':
+	case ord('%'):
 		/* shortest match at end */
 		p = end;
 		while (p >= str) {
@ -1516,7 +1523,7 @@ trimsub(char *str, char *pat, int how)
 				goto trimsub_match;
 			if (UTFMODE) {
 				char *op = p;
-				while ((p-- > str) && ((*p & 0xC0) == 0x80))
+				while ((p-- > str) && ((rtt2asc(*p) & 0xC0) == 0x80))
 					;
 				if ((p < str) || (p + utf_ptradj(p) != op))
 					p = op - 1;
@ -1524,7 +1531,7 @@ trimsub(char *str, char *pat, int how)
 				--p;
 		}
 		break;
-	case '%'|0x80:
+	case ord('%') | STYPE_DBL:
 		/* longest match at end */
 		for (p = str; p <= end; p++)
 			if (gmatchx(p, pat, false)) {
@ -1555,7 +1562,7 @@ glob(char *cp, XPtrV *wp, bool markdirs)
 		XPput(*wp, debunk(cp, cp, strlen(cp) + 1));
 	else
 		qsort(XPptrv(*wp) + oldsize, XPsize(*wp) - oldsize,
-		    sizeof(void *), xstrcmp);
+		    sizeof(void *), ascpstrcmp);
 }

 #define GF_NONE		0
@ -1658,7 +1665,7 @@ globit(XString *xs,	/* dest string */
 		*np++ = '\0';
 	} else {
 		odirsep = '\0'; /* keep gcc quiet */
-		se = sp + strlen(sp);
+		se = strnul(sp);
 	}


@ -1669,10 +1676,10 @@ globit(XString *xs,	/* dest string */
 	 * directory isn't readable - if no globbing is needed, only execute
 	 * permission should be required (as per POSIX)).
 	 */
-	if (!has_globbing(sp, se)) {
+	if (!has_globbing(sp)) {
 		XcheckN(*xs, xp, se - sp + 1);
 		debunk(xp, sp, Xnleft(*xs, xp));
-		xp += strlen(xp);
+		xp = strnul(xp);
 		*xpp = xp;
 		globit(xs, xpp, np, wp, check);
 	} else {
@ -1701,9 +1708,8 @@ globit(XString *xs,	/* dest string */
 			XcheckN(*xs, xp, len);
 			memcpy(xp, name, len);
 			*xpp = xp + len - 1;
-			globit(xs, xpp, np, wp,
-				(check & GF_MARKDIR) | GF_GLOBBED
-				| (np ? GF_EXCHECK : GF_NONE));
+			globit(xs, xpp, np, wp, (check & GF_MARKDIR) |
+			    GF_GLOBBED | (np ? GF_EXCHECK : GF_NONE));
 			xp = Xstring(*xs, xp) + prefix_len;
 		}
 		closedir(dirp);
@ -1728,7 +1734,7 @@ debunk(char *dp, const char *sp, size_t dlen)
 		memmove(dp, sp, s - sp);
 		for (d = dp + (s - sp); *s && (d - dp < (ssize_t)dlen); s++)
 			if (!ISMAGIC(*s) || !(*++s & 0x80) ||
-			    !vstrchr("*+?@! ", *s & 0x7f))
+			    !ctype(*s & 0x7F, C_PATMO | C_SPC))
 				*d++ = *s;
 			else {
 				/* extended pattern operators: *+?@! */
@ -1857,7 +1863,7 @@ alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
 	char *p = exp_start;

 	/* search for open brace */
-	while ((p = strchr(p, MAGIC)) && p[1] != '{' /*}*/)
+	while ((p = strchr(p, MAGIC)) && ord(p[1]) != ord('{' /*}*/))
 		p += 2;
 	brace_start = p;

@ -1868,9 +1874,9 @@ alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
 		p += 2;
 		while (*p && count) {
 			if (ISMAGIC(*p++)) {
-				if (*p == '{' /*}*/)
+				if (ord(*p) == ord('{' /*}*/))
 					++count;
-				else if (*p == /*{*/ '}')
+				else if (ord(*p) == ord(/*{*/ '}'))
 					--count;
 				else if (*p == ',' && count == 1)
 					comma = p;
@ -1902,9 +1908,9 @@ alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
 	count = 1;
 	for (p = brace_start + 2; p != brace_end; p++) {
 		if (ISMAGIC(*p)) {
-			if (*++p == '{' /*}*/)
+			if (ord(*++p) == ord('{' /*}*/))
 				++count;
-			else if ((*p == /*{*/ '}' && --count == 0) ||
+			else if ((ord(*p) == ord(/*{*/ '}') && --count == 0) ||
 			    (*p == ',' && count == 1)) {
 				char *news;
 				int l1, l2, l3;
--- a/src/exec.c
+++ b/src/exec.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.196 2017/04/12 16:46:21 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/exec.c,v 1.199 2017/08/07 21:16:31 tg Exp $");

 #ifndef MKSH_DEFAULT_EXECSHELL
 #define MKSH_DEFAULT_EXECSHELL	MKSH_UNIXROOT "/bin/sh"
@ -554,6 +554,9 @@ comexec(struct op *t, struct tbl * volatile tp, const char **ap,
 				}
 			ap += builtin_opt.optind;
 			flags |= XEXEC;
+			/* POSuX demands ksh88-like behaviour here */
+			if (Flag(FPOSIX))
+				fcflags = FC_PATH;
 		} else if (tp->val.f == c_command) {
 			bool saw_p = false;

@ -885,7 +888,9 @@ scriptexec(struct op *tp, const char **ap)
 #ifndef MKSH_SMALL
 	if ((fd = binopen2(tp->str, O_RDONLY)) >= 0) {
 		unsigned char *cp;
+#ifndef MKSH_EBCDIC
 		unsigned short m;
+#endif
 		ssize_t n;

 #if defined(__OS2__) && defined(MKSH_WITH_TEXTMODE)
@ -905,7 +910,7 @@ scriptexec(struct op *tp, const char **ap)
 		    (buf[2] == 0xBF)) ? 3 : 0);

 		/* scan for newline or NUL (end of buffer) */
-		while (*cp && *cp != '\n')
+		while (!ctype(*cp, C_NL | C_NUL))
 			++cp;
 		/* if the shebang line is longer than MAXINTERP, bail out */
 		if (!*cp)
@ -920,13 +925,13 @@ scriptexec(struct op *tp, const char **ap)
 			cp += 2;
 #ifdef __OS2__
 		else if (!strncmp(cp, Textproc, 7) &&
-		    (cp[7] == ' ' || cp[7] == '\t'))
+		    ctype(cp[7], C_BLANK))
 			cp += 8;
 #endif
 		else
 			goto noshebang;
 		/* skip whitespace before shell name */
-		while (*cp == ' ' || *cp == '\t')
+		while (ctype(*cp, C_BLANK))
 			++cp;
 		/* just whitespace on the line? */
 		if (*cp == '\0')
@ -934,13 +939,13 @@ scriptexec(struct op *tp, const char **ap)
 		/* no, we actually found an interpreter name */
 		sh = (char *)cp;
 		/* look for end of shell/interpreter name */
-		while (*cp != ' ' && *cp != '\t' && *cp != '\0')
+		while (!ctype(*cp, C_BLANK | C_NUL))
 			++cp;
 		/* any arguments? */
 		if (*cp) {
 			*cp++ = '\0';
 			/* skip spaces before arguments */
-			while (*cp == ' ' || *cp == '\t')
+			while (ctype(*cp, C_BLANK))
 				++cp;
 			/* pass it all in ONE argument (historic reasons) */
 			if (*cp)
@ -959,6 +964,7 @@ scriptexec(struct op *tp, const char **ap)
 #endif
 		goto nomagic;
 noshebang:
+#ifndef MKSH_EBCDIC
 		m = buf[0] << 8 | buf[1];
 		if (m == 0x7F45 && buf[2] == 'L' && buf[3] == 'F')
 			errorf("%s: not executable: %d-bit ELF file", tp->str,
@ -977,6 +983,7 @@ scriptexec(struct op *tp, const char **ap)
 		    buf[4] == 'Z') || (m == /* 7zip */ 0x377A) ||
 		    (m == /* gzip */ 0x1F8B) || (m == /* .Z */ 0x1F9D))
 			errorf("%s: not executable: magic %04X", tp->str, m);
+#endif
 #ifdef __OS2__
 		cp = _getext(tp->str);
 		if (cp && (!stricmp(cp, ".cmd") || !stricmp(cp, ".bat"))) {
@ -1337,7 +1344,7 @@ search_path(const char *name, const char *lpath,
 	while (sp != NULL) {
 		xp = Xstring(xs, xp);
 		if (!(p = cstrchr(sp, MKSH_PATHSEPC)))
-			p = sp + strlen(sp);
+			p = strnul(sp);
 		if (p != sp) {
 			XcheckN(xs, xp, p - sp);
 			memcpy(xp, sp, p - sp);
--- a/src/expr.c
+++ b/src/expr.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.93 2017/04/02 16:47:41 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/expr.c,v 1.100 2017/08/07 21:38:55 tg Exp $");

 #define EXPRTOK_DEFNS
 #include "exprtok.h"
@ -558,9 +558,9 @@ exprtoken(Expr_state *es)

 	/* skip whitespace */
 skip_spaces:
-	while ((c = *cp), ksh_isspace(c))
+	while (ctype(ord((c = *cp)), C_SPACE))
 		++cp;
-	if (es->tokp == es->expression && c == '#') {
+	if (es->tokp == es->expression && c == ord('#')) {
 		/* expression begins with # */
 		/* switch to unsigned */
 		es->natural = true;
@ -571,11 +571,11 @@ exprtoken(Expr_state *es)

 	if (c == '\0')
 		es->tok = END;
-	else if (ksh_isalphx(c)) {
+	else if (ctype(c, C_ALPHX)) {
 		do {
-			c = *++cp;
-		} while (ksh_isalnux(c));
-		if (c == '[') {
+			c = ord(*++cp);
+		} while (ctype(c, C_ALNUX));
+		if (c == ord('[')) {
 			size_t len;

 			len = array_ref_len(cp);
@ -617,9 +617,9 @@ exprtoken(Expr_state *es)
 		tvar[c] = '\0';
 		goto process_tvar;
 #endif
-	} else if (ksh_isdigit(c)) {
-		while (c != '_' && (ksh_isalnux(c) || c == '#'))
-			c = *cp++;
+	} else if (ctype(c, C_DIGIT)) {
+		while (ctype(c, C_ALNUM | C_HASH))
+			c = ord(*cp++);
 		strndupx(tvar, es->tokp, --cp - es->tokp, ATEMP);
 process_tvar:
 		es->val = tempvar("");
@ -633,7 +633,7 @@ exprtoken(Expr_state *es)
 	} else {
 		int i, n0;

-		for (i = 0; (n0 = opname[i][0]); i++)
+		for (i = 0; (n0 = ord(opname[i][0])); i++)
 			if (c == n0 && strncmp(cp, opname[i],
 			    (size_t)oplen[i]) == 0) {
 				es->tok = (enum token)i;
@ -772,8 +772,7 @@ utf_ptradj(const char *src)
 {
 	register size_t n;

-	if (!UTFMODE ||
-	    *(const unsigned char *)(src) < 0xC2 ||
+	if (!UTFMODE || rtt2asc(*src) < 0xC2 ||
 	    (n = utf_mbtowc(NULL, src)) == (size_t)-1)
 		n = 1;
 	return (n);
@ -791,7 +790,7 @@ utf_mbtowc(unsigned int *dst, const char *src)
 	const unsigned char *s = (const unsigned char *)src;
 	unsigned int c, wc;

-	if ((wc = *s++) < 0x80) {
+	if ((wc = ord(rtt2asc(*s++))) < 0x80) {
 out:
 		if (dst != NULL)
 			*dst = wc;
@ -805,7 +804,7 @@ utf_mbtowc(unsigned int *dst, const char *src)

 	if (wc < 0xE0) {
 		wc = (wc & 0x1F) << 6;
-		if (((c = *s++) & 0xC0) != 0x80)
+		if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
 			goto ilseq;
 		wc |= c & 0x3F;
 		goto out;
@ -813,11 +812,11 @@ utf_mbtowc(unsigned int *dst, const char *src)

 	wc = (wc & 0x0F) << 12;

-	if (((c = *s++) & 0xC0) != 0x80)
+	if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
 		goto ilseq;
 	wc |= (c & 0x3F) << 6;

-	if (((c = *s++) & 0xC0) != 0x80)
+	if (((c = ord(rtt2asc(*s++))) & 0xC0) != 0x80)
 		goto ilseq;
 	wc |= c & 0x3F;

@ -834,18 +833,18 @@ utf_wctomb(char *dst, unsigned int wc)
 	unsigned char *d;

 	if (wc < 0x80) {
-		*dst = wc;
+		*dst = asc2rtt(wc);
 		return (1);
 	}

 	d = (unsigned char *)dst;
 	if (wc < 0x0800)
-		*d++ = (wc >> 6) | 0xC0;
+		*d++ = asc2rtt((wc >> 6) | 0xC0);
 	else {
-		*d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
-		*d++ = ((wc >> 6) & 0x3F) | 0x80;
+		*d++ = asc2rtt(((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0);
+		*d++ = asc2rtt(((wc >> 6) & 0x3F) | 0x80);
 	}
-	*d++ = (wc & 0x3F) | 0x80;
+	*d++ = asc2rtt((wc & 0x3F) | 0x80);
 	return ((char *)d - dst);
 }

@ -873,7 +872,7 @@ ksh_access(const char *fn, int mode)
 }

 #ifndef MIRBSD_BOOTFLOPPY
-/* From: X11/xc/programs/xterm/wcwidth.c,v 1.9 */
+/* From: X11/xc/programs/xterm/wcwidth.c,v 1.10 */

 struct mb_ucsrange {
 	unsigned short beg;
@ -884,8 +883,8 @@ static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
    unsigned int val) MKSH_A_PURE;

 /*
- * Generated from the Unicode Character Database, Version 9.0.0, by
- * MirOS: contrib/code/Snippets/eawparse,v 1.3 2014/11/16 12:16:24 tg Exp $
+ * Generated from the Unicode Character Database, Version 10.0.0, by
+ * MirOS: contrib/code/Snippets/eawparse,v 1.10 2017/07/12 22:47:26 tg Exp $
 */

 static const struct mb_ucsrange mb_ucs_combining[] = {
@ -941,6 +940,7 @@ static const struct mb_ucsrange mb_ucs_combining[] = {
 	{ 0x0AC7, 0x0AC8 },
 	{ 0x0ACD, 0x0ACD },
 	{ 0x0AE2, 0x0AE3 },
+	{ 0x0AFA, 0x0AFF },
 	{ 0x0B01, 0x0B01 },
 	{ 0x0B3C, 0x0B3C },
 	{ 0x0B3F, 0x0B3F },
@ -963,7 +963,8 @@ static const struct mb_ucsrange mb_ucs_combining[] = {
 	{ 0x0CC6, 0x0CC6 },
 	{ 0x0CCC, 0x0CCD },
 	{ 0x0CE2, 0x0CE3 },
-	{ 0x0D01, 0x0D01 },
+	{ 0x0D00, 0x0D01 },
+	{ 0x0D3B, 0x0D3C },
 	{ 0x0D41, 0x0D44 },
 	{ 0x0D4D, 0x0D4D },
 	{ 0x0D62, 0x0D63 },
@ -1048,7 +1049,7 @@ static const struct mb_ucsrange mb_ucs_combining[] = {
 	{ 0x1CED, 0x1CED },
 	{ 0x1CF4, 0x1CF4 },
 	{ 0x1CF8, 0x1CF9 },
-	{ 0x1DC0, 0x1DF5 },
+	{ 0x1DC0, 0x1DF9 },
 	{ 0x1DFB, 0x1DFF },
 	{ 0x200B, 0x200F },
 	{ 0x202A, 0x202E },
@ -1136,14 +1137,16 @@ static const struct mb_ucsrange mb_ucs_fullwidth[] = {
 	{ 0x2B1B, 0x2B1C },
 	{ 0x2B50, 0x2B50 },
 	{ 0x2B55, 0x2B55 },
-	{ 0x2E80, 0x303E },
-	{ 0x3040, 0xA4CF },
+	{ 0x2E80, 0x3029 },
+	{ 0x302E, 0x303E },
+	{ 0x3040, 0x3098 },
+	{ 0x309B, 0xA4CF },
 	{ 0xA960, 0xA97F },
 	{ 0xAC00, 0xD7A3 },
 	{ 0xF900, 0xFAFF },
 	{ 0xFE10, 0xFE19 },
 	{ 0xFE30, 0xFE6F },
-	{ 0xFF00, 0xFF60 },
+	{ 0xFF01, 0xFF60 },
 	{ 0xFFE0, 0xFFE6 }
 };

--- a/src/funcs.c
+++ b/src/funcs.c
@ -38,7 +38,7 @@
 #endif
 #endif

-__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.340 2017/04/12 17:46:29 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/funcs.c,v 1.350 2017/05/05 22:53:28 tg Exp $");

 #if HAVE_KILLPG
 /*
@ -751,11 +751,15 @@ do_whence(const char **wp, int fcflags, bool vflag, bool iscommand)
 bool
 valid_alias_name(const char *cp)
 {
+	if (ord(*cp) == ord('-'))
+		return (false);
+	if (ord(cp[0]) == ord('[') && ord(cp[1]) == ord('[') && !cp[2])
+		return (false);
 	while (*cp)
-		if (!ksh_isalias(*cp))
-			return (false);
-		else
+		if (ctype(*cp, C_ALIAS))
 			++cp;
+		else
+			return (false);
 	return (true);
 }

@ -764,7 +768,7 @@ c_alias(const char **wp)
 {
 	struct table *t = &aliases;
 	int rv = 0, prefix = 0;
-	bool rflag = false, tflag, Uflag = false, pflag = false;
+	bool rflag = false, tflag, Uflag = false, pflag = false, chkalias;
 	uint32_t xflag = 0;
 	int optc;

@ -809,12 +813,13 @@ c_alias(const char **wp)
 	wp += builtin_opt.optind;

 	if (!(builtin_opt.info & GI_MINUSMINUS) && *wp &&
-	    (wp[0][0] == '-' || wp[0][0] == '+') && wp[0][1] == '\0') {
+	    ctype(wp[0][0], C_MINUS | C_PLUS) && wp[0][1] == '\0') {
 		prefix = wp[0][0];
 		wp++;
 	}

 	tflag = t == &taliases;
+	chkalias = t == &aliases;

 	/* "hash -r" means reset all the tracked aliases.. */
 	if (rflag) {
@ -857,7 +862,7 @@ c_alias(const char **wp)
 			strndupx(xalias, alias, val++ - alias, ATEMP);
 			alias = xalias;
 		}
-		if (!valid_alias_name(alias) || *alias == '-') {
+		if (chkalias && !valid_alias_name(alias)) {
 			bi_errorf(Tinvname, alias, Talias);
 			afree(xalias, ATEMP);
 			return (1);
@ -1072,8 +1077,7 @@ c_kill(const char **wp)
 	int i, n, rv, sig;

 	/* assume old style options if -digits or -UPPERCASE */
-	if ((p = wp[1]) && *p == '-' && (ksh_isdigit(p[1]) ||
-	    ksh_isupper(p[1]))) {
+	if ((p = wp[1]) && *p == '-' && ctype(p[1], C_DIGIT | C_UPPER)) {
 		if (!(t = gettrap(p + 1, false, false))) {
 			bi_errorf(Tbad_sig_s, p + 1);
 			return (1);
@ -1422,9 +1426,9 @@ c_umask(const char **wp)
 	} else {
 		mode_t new_umask;

-		if (ksh_isdigit(*cp)) {
+		if (ctype(*cp, C_DIGIT)) {
 			new_umask = 0;
-			while (*cp >= ord('0') && *cp <= ord('7')) {
+			while (ctype(*cp, C_OCTAL)) {
 				new_umask = new_umask * 8 + ksh_numdig(*cp);
 				++cp;
 			}
@ -1462,7 +1466,7 @@ c_umask(const char **wp)
 				if (!positions)
 					/* default is a */
 					positions = 0111;
-				if (!vstrchr("=+-", op = *cp))
+				if (!ctype((op = *cp), C_EQUAL | C_MINUS | C_PLUS))
 					break;
 				cp++;
 				new_val = 0;
@ -1503,7 +1507,7 @@ c_umask(const char **wp)
 				if (*cp == ',') {
 					positions = 0;
 					cp++;
-				} else if (!vstrchr("=+-", *cp))
+				} else if (!ctype(*cp, C_EQUAL | C_MINUS | C_PLUS))
 					break;
 			}
 			if (*cp) {
@ -1585,7 +1589,7 @@ c_wait(const char **wp)
 	return (rv);
 }

-static char REPLY[] = "REPLY";
+static const char REPLY[] = "REPLY";
 int
 c_read(const char **wp)
 {
@ -2300,8 +2304,9 @@ c_unset(const char **wp)
 			size_t n;

 			n = strlen(id);
-			if (n > 3 && id[n-3] == '[' && id[n-2] == '*' &&
-			    id[n-1] == ']') {
+			if (n > 3 && ord(id[n - 3]) == ord('[') &&
+			    ord(id[n - 2]) == ord('*') &&
+			    ord(id[n - 1]) == ord(']')) {
 				strndupx(cp, id, n - 3, ATEMP);
 				id = cp;
 				optc = 3;
@ -3350,7 +3355,7 @@ set_ulimit(const struct limits *l, const char *v, int how)
 		 * If this causes problems, will have to add parameter to
 		 * evaluate() to control if unset params are 0 or an error.
 		 */
-		if (!rval && !ksh_isdigit(v[0])) {
+		if (!rval && !ctype(v[0], C_DIGIT)) {
 			bi_errorf("invalid %s limit: %s", l->name, v);
 			return (1);
 		}
--- a/src/histrap.c
+++ b/src/histrap.c
@ -3,7 +3,7 @@

 /*-
 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
- *		 2011, 2012, 2014, 2015, 2016
+ *		 2011, 2012, 2014, 2015, 2016, 2017
 *	mirabilos <m@mirbsd.org>
 *
 * Provided that these terms and disclaimer and all copyright notices
@ -27,7 +27,7 @@
 #include <sys/file.h>
 #endif

-__RCSID("$MirOS: src/bin/mksh/histrap.c,v 1.160 2017/04/08 01:07:16 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/histrap.c,v 1.166 2017/08/07 23:25:09 tg Exp $");

 Trap sigtraps[ksh_NSIG + 1];
 static struct sigaction Sigact_ign;
@ -629,7 +629,7 @@ histsave(int *lnp, const char *cmd, int svmode, bool ignoredups)
 	if (svmode == HIST_FLUSH)
 		return;

-	ccp = cmd + strlen(cmd);
+	ccp = strnul(cmd);
 	while (ccp > cmd && ccp[-1] == '\n')
 		--ccp;
 	strndupx(c, cmd, ccp - cmd, APERM);
@ -714,26 +714,66 @@ histsave(int *lnp, const char *cmd, int svmode, bool ignoredups)

 #if HAVE_PERSISTENT_HISTORY
 static const unsigned char sprinkle[2] = { HMAGIC1, HMAGIC2 };
-#endif

-void
-hist_init(Source *s)
+static int
+hist_persist_back(int srcfd)
+{
+	off_t tot, mis;
+	ssize_t n, w;
+	char *buf, *cp;
+	int rv = 0;
+#define MKSH_HS_BUFSIZ 4096
+
+	if ((tot = lseek(srcfd, (off_t)0, SEEK_END)) < 0 ||
+	    lseek(srcfd, (off_t)0, SEEK_SET) < 0 ||
+	    lseek(histfd, (off_t)0, SEEK_SET) < 0)
+		return (1);
+
+	if ((buf = malloc_osfunc(MKSH_HS_BUFSIZ)) == NULL)
+		return (1);
+
+	mis = tot;
+	while (mis > 0) {
+		if ((n = blocking_read(srcfd, (cp = buf),
+		    MKSH_HS_BUFSIZ)) == -1) {
+			if (errno == EINTR) {
+				intrcheck();
+				continue;
+			}
+			goto copy_error;
+		}
+		mis -= n;
+		while (n) {
+			if (intrsig)
+				goto has_intrsig;
+			if ((w = write(histfd, cp, n)) != -1) {
+				n -= w;
+				cp += w;
+				continue;
+			}
+			if (errno == EINTR) {
+ has_intrsig:
+				intrcheck();
+				continue;
+			}
+			goto copy_error;
+		}
+	}
+	if (ftruncate(histfd, tot)) {
+ copy_error:
+		rv = 1;
+	}
+	free_osfunc(buf);
+	return (rv);
+}
+
+static void
+hist_persist_init(void)
 {
-#if HAVE_PERSISTENT_HISTORY
 	unsigned char *base;
 	int lines, fd;
-	enum { hist_init_first, hist_init_retry, hist_init_restore } hs;
-#endif
+	enum { hist_init_first, hist_init_retry, hist_use_it } hs;

-	histsave(NULL, NULL, HIST_DISCARD, true);
-
-	if (Flag(FTALKING) == 0)
-		return;
-
-	hstarted = true;
-	hist_source = s;
-
-#if HAVE_PERSISTENT_HISTORY
 	if (((hname = str_val(global("HISTFILE"))) == NULL) || !*hname) {
 		hname = NULL;
 		return;
@ -745,17 +785,16 @@ hist_init(Source *s)
 	/* we have a file and are interactive */
 	if ((fd = binopen3(hname, O_RDWR | O_CREAT | O_APPEND, 0600)) < 0)
 		return;
-
-	histfd = savefd(fd);
+	if ((histfd = savefd(fd)) < 0)
+		return;
 	if (histfd != fd)
 		close(fd);

 	mksh_lockfd(histfd);

 	histfsize = lseek(histfd, (off_t)0, SEEK_END);
-	if (histfsize > MKSH_MAXHISTFSIZE || hs == hist_init_restore) {
+	if (histfsize > MKSH_MAXHISTFSIZE) {
 		/* we ignore too large files but still append to them */
-		/* we also don't need to re-read after truncation */
 		goto hist_init_tail;
 	} else if (histfsize > 2) {
 		/* we have some data, check its validity */
@ -781,6 +820,7 @@ hist_init(Source *s)
 			if ((fd = binopen3(nhname, O_RDWR | O_CREAT | O_TRUNC |
 			    O_EXCL, 0600)) < 0) {
 				/* just don't truncate then, meh. */
+				hs = hist_use_it;
 				goto hist_trunc_dont;
 			}
 			if (fstat(histfd, &sb) >= 0 &&
@ -795,28 +835,26 @@ hist_init(Source *s)
 			hp = history;
 			while (hp < histptr) {
 				if (!writehistline(fd,
-				    s->line - (histptr - hp), *hp))
+				    hist_source->line - (histptr - hp), *hp))
 					goto hist_trunc_abort;
 				++hp;
 			}
-			/* now unlock, close both, rename, rinse, repeat */
+			/* now transfer back */
+			if (!hist_persist_back(fd)) {
+				/* success! */
+				hs = hist_use_it;
+			}
+ hist_trunc_abort:
+			/* remove temporary file */
 			close(fd);
 			fd = -1;
-			hist_finish();
-			if (rename(nhname, hname) < 0) {
- hist_trunc_abort:
-				if (fd != -1)
-					close(fd);
-				unlink(nhname);
-				if (fd != -1)
-					goto hist_trunc_dont;
-				/* darn! restore histfd and pray */
-			}
-			hs = hist_init_restore;
+			unlink(nhname);
+			/* use whatever is in the file now */
 hist_trunc_dont:
 			afree(nhname, ATEMP);
-			if (hs == hist_init_restore)
-				goto retry;
+			if (hs == hist_use_it)
+				goto hist_trunc_done;
+			goto hist_init_fail;
 		}
 	} else if (histfsize != 0) {
 		/* negative or too small... */
@ -840,9 +878,26 @@ hist_init(Source *s)
 			return;
 		}
 	}
+ hist_trunc_done:
 	histfsize = lseek(histfd, (off_t)0, SEEK_END);
 hist_init_tail:
 	mksh_unlkfd(histfd);
+}
+#endif
+
+void
+hist_init(Source *s)
+{
+	histsave(NULL, NULL, HIST_DISCARD, true);
+
+	if (Flag(FTALKING) == 0)
+		return;
+
+	hstarted = true;
+	hist_source = s;
+
+#if HAVE_PERSISTENT_HISTORY
+	hist_persist_init();
 #endif
 }

@ -909,10 +964,11 @@ writehistfile(int lno, const char *cmd)
 	mksh_lockfd(histfd);
 	sizenow = lseek(histfd, (off_t)0, SEEK_END);
 	if (sizenow < histfsize) {
-		/* the file has shrunk; give up */
-		goto bad;
-	}
-	if (
+		/* the file has shrunk; trust it just appending the new data */
+		/* well, for now, anyway… since mksh strdups all into memory */
+		/* we can use a nicer approach some time later… */
+		;
+	} else if (
 		/* ignore changes when the file is too large */
 		sizenow <= MKSH_MAXHISTFSIZE
 	    &&
@ -1114,7 +1170,7 @@ gettrap(const char *cs, bool igncase, bool allsigs)

 	/* signal number (1..ksh_NSIG) or 0? */

-	if (ksh_isdigit(*cs))
+	if (ctype(*cs, C_DIGIT))
 		return ((getn(cs, &i) && 0 <= i && i < ksh_NSIG) ?
 		    (&sigtraps[i]) : NULL);

--- a/src/jobs.c
+++ b/src/jobs.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/jobs.c,v 1.121 2016/07/25 00:04:44 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/jobs.c,v 1.124 2017/08/08 14:30:10 tg Exp $");

 #if HAVE_KILLPG
 #define mksh_killpg		killpg
@ -39,14 +39,27 @@ __RCSID("$MirOS: src/bin/mksh/jobs.c,v 1.121 2016/07/25 00:04:44 tg Exp $");
 #define PSTOPPED	3

 typedef struct proc Proc;
-struct proc {
-	Proc *next;		/* next process in pipeline (if any) */
-	pid_t pid;		/* process id */
+/* to take alignment into consideration */
+struct proc_dummy {
+	Proc *next;
+	pid_t pid;
 	int state;
-	int status;		/* wait status */
+	int status;
+	char command[128];
+};
+/* real structure */
+struct proc {
+	/* next process in pipeline (if any) */
+	Proc *next;
+	/* process id of this Unix process in the job */
+	pid_t pid;
+	/* one of the four P… above */
+	int state;
+	/* wait status */
+	int status;
 	/* process command string from vistree */
-	char command[256 - (ALLOC_OVERHEAD + sizeof(Proc *) +
-	    sizeof(pid_t) + 2 * sizeof(int))];
+	char command[256 - (ALLOC_OVERHEAD +
+	    offsetof(struct proc_dummy, command[0]))];
 };

 /* Notify/print flag - j_print() argument */
@ -1009,8 +1022,14 @@ j_notify(void)
 	}
 	for (j = job_list; j; j = tmp) {
 		tmp = j->next;
-		if (j->flags & JF_REMOVE)
-			remove_job(j, "notify");
+		if (j->flags & JF_REMOVE) {
+			if (j == async_job || (j->flags & JF_KNOWN)) {
+				j->flags = (j->flags & ~JF_REMOVE) | JF_ZOMBIE;
+				j->job = -1;
+				nzombie++;
+			} else
+				remove_job(j, "notify");
+		}
 	}
 	shf_flush(shl_out);
 #ifndef MKSH_NOPROSPECTOFWORK
@ -1651,7 +1670,7 @@ j_lookup(const char *cp, int *ecodep)
 	size_t len;
 	int job = 0;

-	if (ksh_isdigit(*cp) && getn(cp, &job)) {
+	if (ctype(*cp, C_DIGIT) && getn(cp, &job)) {
 		/* Look for last_proc->pid (what $! returns) first... */
 		for (j = job_list; j != NULL; j = j->next)
 			if (j->last_proc && j->last_proc->pid == job)
--- a/src/lex.c
+++ b/src/lex.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.234 2017/04/06 01:59:55 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.239 2017/05/05 22:53:29 tg Exp $");

 /*
 * states while lexing word
@ -131,7 +131,7 @@ getsc_i(void)
 }

 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
-#define getsc getsc_i
+#define getsc()		ord(getsc_i())
 #else
 static int getsc_r(int);

@ -141,7 +141,7 @@ getsc_r(int c)
 	o_getsc_r(c);
 }

-#define getsc()		getsc_r(o_getsc())
+#define getsc()		ord(getsc_r(o_getsc()))
 #endif

 #define STATE_BSIZE	8
@ -220,11 +220,11 @@ yylex(int cf)
 	} else {
 		/* normal lexing */
 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
-		while ((c = getsc()) == ' ' || c == '\t')
+		while (ctype((c = getsc()), C_BLANK))
 			;
 		if (c == '#') {
 			ignore_backslash_newline++;
-			while ((c = getsc()) != '\0' && c != '\n')
+			while (!ctype((c = getsc()), C_NUL | C_LF))
 				;
 			ignore_backslash_newline--;
 		}
@ -245,30 +245,30 @@ yylex(int cf)
 	while (!((c = getsc()) == 0 ||
 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
 		if (state == SBASE &&
-		    subshell_nesting_type == /*{*/ '}' &&
-		    c == /*{*/ '}')
+		    subshell_nesting_type == ord(/*{*/ '}') &&
+		    c == ord(/*{*/ '}'))
 			/* possibly end ${ :;} */
 			break;
 		Xcheck(ws, wp);
 		switch (state) {
 		case SADELIM:
-			if (c == '(')
+			if (c == ord('('))
 				statep->nparen++;
-			else if (c == ')')
+			else if (c == ord(')'))
 				statep->nparen--;
-			else if (statep->nparen == 0 && (c == /*{*/ '}' ||
+			else if (statep->nparen == 0 && (c == ord(/*{*/ '}') ||
 			    c == (int)statep->ls_adelim.delimiter)) {
 				*wp++ = ADELIM;
 				*wp++ = c;
-				if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
+				if (c == ord(/*{*/ '}') || --statep->ls_adelim.num == 0)
 					POP_STATE();
-				if (c == /*{*/ '}')
+				if (c == ord(/*{*/ '}'))
 					POP_STATE();
 				break;
 			}
 			/* FALLTHROUGH */
 		case SBASE:
-			if (c == '[' && (cf & CMDASN)) {
+			if (c == ord('[') && (cf & CMDASN)) {
 				/* temporary */
 				*wp = EOS;
 				if (is_wdvarname(Xstring(ws, wp), false)) {
@ -301,10 +301,9 @@ yylex(int cf)
 			}
 			/* FALLTHROUGH */
 Sbase1:		/* includes *(...|...) pattern (*+?@!) */
-			if (c == '*' || c == '@' || c == '+' || c == '?' ||
-			    c == '!') {
+			if (ctype(c, C_PATMO)) {
 				c2 = getsc();
-				if (c2 == '(' /*)*/ ) {
+				if (c2 == ord('(' /*)*/)) {
 					*wp++ = OPAT;
 					*wp++ = c;
 					PUSH_STATE(SPATTERN);
@ -315,7 +314,7 @@ yylex(int cf)
 			/* FALLTHROUGH */
 Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
 			switch (c) {
-			case '\\':
+			case ord('\\'):
 getsc_qchar:
 				if ((c = getsc())) {
 					/* trailing \ is lost */
@ -323,7 +322,7 @@ yylex(int cf)
 					*wp++ = c;
 				}
 				break;
-			case '\'':
+			case ord('\''):
 open_ssquote_unless_heredoc:
 				if ((cf & HEREDOC))
 					goto store_char;
@ -331,12 +330,12 @@ yylex(int cf)
 				ignore_backslash_newline++;
 				PUSH_STATE(SSQUOTE);
 				break;
-			case '"':
+			case ord('"'):
 open_sdquote:
 				*wp++ = OQUOTE;
 				PUSH_STATE(SDQUOTE);
 				break;
-			case '$':
+			case ord('$'):
 				/*
 				 * processing of dollar sign belongs into
 				 * Subst, except for those which can open
@ -345,9 +344,9 @@ yylex(int cf)
 subst_dollar_ex:
 				c = getsc();
 				switch (c) {
-				case '"':
+				case ord('"'):
 					goto open_sdquote;
-				case '\'':
+				case ord('\''):
 					goto open_sequote;
 				default:
 					goto SubstS;
@ -359,15 +358,16 @@ yylex(int cf)

 Subst:
 			switch (c) {
-			case '\\':
+			case ord('\\'):
 				c = getsc();
 				switch (c) {
-				case '"':
+				case ord('"'):
 					if ((cf & HEREDOC))
 						goto heredocquote;
 					/* FALLTHROUGH */
-				case '\\':
-				case '$': case '`':
+				case ord('\\'):
+				case ord('$'):
+				case ord('`'):
 store_qchar:
 					*wp++ = QCHAR;
 					*wp++ = c;
@ -385,12 +385,12 @@ yylex(int cf)
 					break;
 				}
 				break;
-			case '$':
+			case ord('$'):
 				c = getsc();
 SubstS:
-				if (c == '(') /*)*/ {
+				if (c == ord('(' /*)*/)) {
 					c = getsc();
-					if (c == '(') /*)*/ {
+					if (c == ord('(' /*)*/)) {
 						*wp++ = EXPRSUB;
 						PUSH_SRETRACE(SASPAREN);
 						statep->nparen = 2;
@ -407,8 +407,8 @@ yylex(int cf)
 						memcpy(wp, sp, cz);
 						wp += cz;
 					}
-				} else if (c == '{') /*}*/ {
-					if ((c = getsc()) == '|') {
+				} else if (c == ord('{' /*}*/)) {
+					if ((c = getsc()) == ord('|')) {
 						/*
 						 * non-subenvironment
 						 * value substitution
@ -425,15 +425,15 @@ yylex(int cf)
 					}
 					ungetsc(c);
 					*wp++ = OSUBST;
-					*wp++ = '{'; /*}*/
+					*wp++ = '{' /*}*/;
 					wp = get_brace_var(&ws, wp);
 					c = getsc();
 					/* allow :# and :% (ksh88 compat) */
-					if (c == ':') {
+					if (c == ord(':')) {
 						*wp++ = CHAR;
 						*wp++ = c;
 						c = getsc();
-						if (c == ':') {
+						if (c == ord(':')) {
 							*wp++ = CHAR;
 							*wp++ = '0';
 							*wp++ = ADELIM;
@ -444,10 +444,9 @@ yylex(int cf)
 							statep->ls_adelim.num = 1;
 							statep->nparen = 0;
 							break;
-						} else if (ksh_isdigit(c) ||
-						    c == '('/*)*/ || c == ' ' ||
+						} else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
 						    /*XXX what else? */
-						    c == '$') {
+						    c == '(' /*)*/) {
 							/* substring subst. */
 							if (c != ' ') {
 								*wp++ = CHAR;
@ -466,7 +465,7 @@ yylex(int cf)
 parse_adelim_slash:
 						*wp++ = CHAR;
 						*wp++ = c;
-						if ((c = getsc()) == '/') {
+						if ((c = getsc()) == ord('/')) {
 							*wp++ = c2;
 							*wp++ = c;
 						} else
@ -480,7 +479,7 @@ yylex(int cf)
 					} else if (c == '@') {
 						c2 = getsc();
 						ungetsc(c2);
-						if (c2 == '/') {
+						if (c2 == ord('/')) {
 							c2 = CHAR;
 							goto parse_adelim_slash;
 						}
@ -489,7 +488,7 @@ yylex(int cf)
 					 * If this is a trim operation,
 					 * treat (,|,) specially in STBRACE.
 					 */
-					if (ksh_issubop2(c)) {
+					if (ctype(c, C_SUB2)) {
 						ungetsc(c);
 						if (Flag(FSH))
 							PUSH_STATE(STBRACEBOURNE);
@ -503,14 +502,14 @@ yylex(int cf)
 						else
 							PUSH_STATE(SBRACE);
 					}
-				} else if (ksh_isalphx(c)) {
+				} else if (ctype(c, C_ALPHX)) {
 					*wp++ = OSUBST;
 					*wp++ = 'X';
 					do {
 						Xcheck(ws, wp);
 						*wp++ = c;
 						c = getsc();
-					} while (ksh_isalnux(c));
+					} while (ctype(c, C_ALNUX));
 					*wp++ = '\0';
 					*wp++ = CSUBST;
 					*wp++ = 'X';
@ -529,7 +528,7 @@ yylex(int cf)
 					ungetsc(c);
 				}
 				break;
-			case '`':
+			case ord('`'):
 subst_gravis:
 				PUSH_STATE(SBQUOTE);
 				*wp++ = COMASUB;
@ -573,11 +572,11 @@ yylex(int cf)
 			break;

 		case SEQUOTE:
-			if (c == '\'') {
+			if (c == ord('\'')) {
 				POP_STATE();
 				*wp++ = CQUOTE;
 				ignore_backslash_newline--;
-			} else if (c == '\\') {
+			} else if (c == ord('\\')) {
 				if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
 					c2 = getsc();
 				if (c2 == 0)
@ -605,7 +604,7 @@ yylex(int cf)
 			break;

 		case SSQUOTE:
-			if (c == '\'') {
+			if (c == ord('\'')) {
 				POP_STATE();
 				if ((cf & HEREDOC) || state == SQBRACE)
 					goto store_char;
@ -618,7 +617,7 @@ yylex(int cf)
 			break;

 		case SDQUOTE:
-			if (c == '"') {
+			if (c == ord('"')) {
 				POP_STATE();
 				*wp++ = CQUOTE;
 			} else
@ -627,15 +626,15 @@ yylex(int cf)

 		/* $(( ... )) */
 		case SASPAREN:
-			if (c == '(')
+			if (c == ord('('))
 				statep->nparen++;
-			else if (c == ')') {
+			else if (c == ord(')')) {
 				statep->nparen--;
 				if (statep->nparen == 1) {
 					/* end of EXPRSUB */
 					POP_SRETRACE();

-					if ((c2 = getsc()) == /*(*/ ')') {
+					if ((c2 = getsc()) == ord(/*(*/ ')')) {
 						cz = strlen(sp) - 2;
 						XcheckN(ws, wp, cz);
 						memcpy(wp, sp + 1, cz);
@ -667,7 +666,7 @@ yylex(int cf)
 			goto Sbase2;

 		case SQBRACE:
-			if (c == '\\') {
+			if (c == ord('\\')) {
 				/*
 				 * perform POSIX "quote removal" if the back-
 				 * slash is "special", i.e. same cases as the
@ -676,26 +675,26 @@ yylex(int cf)
 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
 				 * emitted (in heredocquote:)
 				 */
-				if ((c = getsc()) == '"' || c == '\\' ||
-				    c == '$' || c == '`' || c == /*{*/'}')
+				if ((c = getsc()) == ord('"') || c == ord('\\') ||
+				    ctype(c, C_DOLAR | C_GRAVE) || c == ord(/*{*/ '}'))
 					goto store_qchar;
 				goto heredocquote;
 			}
 			goto common_SQBRACE;

 		case SBRACE:
-			if (c == '\'')
+			if (c == ord('\''))
 				goto open_ssquote_unless_heredoc;
-			else if (c == '\\')
+			else if (c == ord('\\'))
 				goto getsc_qchar;
 common_SQBRACE:
-			if (c == '"')
+			if (c == ord('"'))
 				goto open_sdquote;
-			else if (c == '$')
+			else if (c == ord('$'))
 				goto subst_dollar_ex;
-			else if (c == '`')
+			else if (c == ord('`'))
 				goto subst_gravis;
-			else if (c != /*{*/ '}')
+			else if (c != ord(/*{*/ '}'))
 				goto store_char;
 			POP_STATE();
 			*wp++ = CSUBST;
@ -704,16 +703,16 @@ yylex(int cf)

 		/* Same as SBASE, except (,|,) treated specially */
 		case STBRACEKORN:
-			if (c == '|')
+			if (c == ord('|'))
 				*wp++ = SPAT;
-			else if (c == '(') {
+			else if (c == ord('(')) {
 				*wp++ = OPAT;
 				/* simile for @ */
 				*wp++ = ' ';
 				PUSH_STATE(SPATTERN);
 			} else /* FALLTHROUGH */
 		case STBRACEBOURNE:
-			  if (c == /*{*/ '}') {
+			  if (c == ord(/*{*/ '}')) {
 				POP_STATE();
 				*wp++ = CSUBST;
 				*wp++ = /*{*/ '}';
@ -722,20 +721,20 @@ yylex(int cf)
 			break;

 		case SBQUOTE:
-			if (c == '`') {
+			if (c == ord('`')) {
 				*wp++ = 0;
 				POP_STATE();
-			} else if (c == '\\') {
+			} else if (c == ord('\\')) {
 				switch (c = getsc()) {
 				case 0:
 					/* trailing \ is lost */
 					break;
-				case '$':
-				case '`':
-				case '\\':
+				case ord('$'):
+				case ord('`'):
+				case ord('\\'):
 					*wp++ = c;
 					break;
-				case '"':
+				case ord('"'):
 					if (statep->ls_bool) {
 						*wp++ = c;
 						break;
@ -756,10 +755,10 @@ yylex(int cf)

 		/* LETEXPR: (( ... )) */
 		case SLETPAREN:
-			if (c == /*(*/ ')') {
+			if (c == ord(/*(*/ ')')) {
 				if (statep->nparen > 0)
 					--statep->nparen;
-				else if ((c2 = getsc()) == /*(*/ ')') {
+				else if ((c2 = getsc()) == ord(/*(*/ ')')) {
 					c = 0;
 					*wp++ = CQUOTE;
 					goto Done;
@ -780,10 +779,10 @@ yylex(int cf)
 					s->start = s->str = s->u.freeme = dp;
 					s->next = source;
 					source = s;
-					ungetsc('('/*)*/);
-					return ('('/*)*/);
+					ungetsc('(' /*)*/);
+					return (ord('(' /*)*/));
 				}
-			} else if (c == '(')
+			} else if (c == ord('('))
 				/*
 				 * parentheses inside quotes and
 				 * backslashes are lost, but AT&T ksh
@ -799,26 +798,26 @@ yylex(int cf)
 			 * $ and `...` are not to be treated specially
 			 */
 			switch (c) {
-			case '\\':
+			case ord('\\'):
 				if ((c = getsc())) {
 					/* trailing \ is lost */
 					*wp++ = QCHAR;
 					*wp++ = c;
 				}
 				break;
-			case '\'':
+			case ord('\''):
 				goto open_ssquote_unless_heredoc;
-			case '$':
-				if ((c2 = getsc()) == '\'') {
+			case ord('$'):
+				if ((c2 = getsc()) == ord('\'')) {
 open_sequote:
 					*wp++ = OQUOTE;
 					ignore_backslash_newline++;
 					PUSH_STATE(SEQUOTE);
 					statep->ls_bool = false;
 					break;
-				} else if (c2 == '"') {
+				} else if (c2 == ord('"')) {
 					/* FALLTHROUGH */
-			case '"':
+			case ord('"'):
 					PUSH_SRETRACE(SHEREDQUOTE);
 					break;
 				}
@ -832,7 +831,7 @@ yylex(int cf)

 		/* " in << or <<- delimiter */
 		case SHEREDQUOTE:
-			if (c != '"')
+			if (c != ord('"'))
 				goto Subst;
 			POP_SRETRACE();
 			dp = strnul(sp) - 1;
@ -845,10 +844,10 @@ yylex(int cf)
 			while ((c = *dp++)) {
 				if (c == '\\') {
 					switch ((c = *dp++)) {
-					case '\\':
-					case '"':
-					case '$':
-					case '`':
+					case ord('\\'):
+					case ord('"'):
+					case ord('$'):
+					case ord('`'):
 						break;
 					default:
 						*wp++ = CHAR;
@ -866,12 +865,12 @@ yylex(int cf)

 		/* in *(...|...) pattern (*+?@!) */
 		case SPATTERN:
-			if (c == /*(*/ ')') {
+			if (c == ord(/*(*/ ')')) {
 				*wp++ = CPAT;
 				POP_STATE();
-			} else if (c == '|') {
+			} else if (c == ord('|')) {
 				*wp++ = SPAT;
-			} else if (c == '(') {
+			} else if (c == ord('(')) {
 				*wp++ = OPAT;
 				/* simile for @ */
 				*wp++ = ' ';
@ -894,14 +893,14 @@ yylex(int cf)
 	dp = Xstring(ws, wp);
 	if (state == SBASE && (
 	    (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
-	    c == '<' || c == '>') && ((c2 = Xlength(ws, wp)) == 0 ||
-	    (c2 == 2 && dp[0] == CHAR && ksh_isdigit(dp[1])))) {
+	    ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
+	    (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);

 		iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;

 		if (c == '&') {
-			if ((c2 = getsc()) != '>') {
+			if ((c2 = getsc()) != ord('>')) {
 				ungetsc(c2);
 				goto no_iop;
 			}
@ -912,22 +911,22 @@ yylex(int cf)

 		c2 = getsc();
 		/* <<, >>, <> are ok, >< is not */
-		if (c == c2 || (c == '<' && c2 == '>')) {
+		if (c == c2 || (c == ord('<') && c2 == ord('>'))) {
 			iop->ioflag |= c == c2 ?
-			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
+			    (c == ord('>') ? IOCAT : IOHERE) : IORDWR;
 			if (iop->ioflag == IOHERE) {
-				if ((c2 = getsc()) == '-')
+				if ((c2 = getsc()) == ord('-'))
 					iop->ioflag |= IOSKIP;
-				else if (c2 == '<')
+				else if (c2 == ord('<'))
 					iop->ioflag |= IOHERESTR;
 				else
 					ungetsc(c2);
 			}
-		} else if (c2 == '&')
-			iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
+		} else if (c2 == ord('&'))
+			iop->ioflag |= IODUP | (c == ord('<') ? IORDUP : 0);
 		else {
-			iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
-			if (c == '>' && c2 == '|')
+			iop->ioflag |= c == ord('>') ? IOWRITE : IOREAD;
+			if (c == ord('>') && c2 == ord('|'))
 				iop->ioflag |= IOCLOB;
 			else
 				ungetsc(c2);
@ -948,29 +947,30 @@ yylex(int cf)
 		/* free word */
 		Xfree(ws, wp);
 		/* no word, process LEX1 character */
-		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
+		if ((c == ord('|')) || (c == ord('&')) || (c == ord(';')) ||
+		    (c == ord('(' /*)*/))) {
 			if ((c2 = getsc()) == c)
-				c = (c == ';') ? BREAK :
-				    (c == '|') ? LOGOR :
-				    (c == '&') ? LOGAND :
-				    /* c == '(' ) */ MDPAREN;
-			else if (c == '|' && c2 == '&')
+				c = (c == ord(';')) ? BREAK :
+				    (c == ord('|')) ? LOGOR :
+				    (c == ord('&')) ? LOGAND :
+				    /* c == ord('(' )) */ MDPAREN;
+			else if (c == ord('|') && c2 == ord('&'))
 				c = COPROC;
-			else if (c == ';' && c2 == '|')
+			else if (c == ord(';') && c2 == ord('|'))
 				c = BRKEV;
-			else if (c == ';' && c2 == '&')
+			else if (c == ord(';') && c2 == ord('&'))
 				c = BRKFT;
 			else
 				ungetsc(c2);
 #ifndef MKSH_SMALL
 			if (c == BREAK) {
-				if ((c2 = getsc()) == '&')
+				if ((c2 = getsc()) == ord('&'))
 					c = BRKEV;
 				else
 					ungetsc(c2);
 			}
 #endif
-		} else if (c == '\n') {
+		} else if (c == ord('\n')) {
 			if (cf & HEREDELIM)
 				ungetsc(c);
 			else {
@ -1025,7 +1025,7 @@ yylex(int cf)

 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
 		    (!(cf & ESACONLY) || p->val.i == ESAC ||
-		    p->val.i == /*{*/ '}')) {
+		    p->val.i == ord(/*{*/ '}'))) {
 			afree(yylval.cp, ATEMP);
 			return (p->val.i);
 		}
@ -1038,7 +1038,7 @@ yylex(int cf)
 			const char *cp = source->str;

 			/* prefer POSIX but not Korn functions over aliases */
-			while (*cp == ' ' || *cp == '\t')
+			while (ctype(*cp, C_BLANK))
 				/*
 				 * this is like getsc() without skipping
 				 * over Source boundaries (including not
@ -1136,7 +1136,7 @@ readhere(struct ioword *iop)
 	if (!*eofp) {
 		/* end of here document marker, what to do? */
 		switch (c) {
-		case /*(*/ ')':
+		case ord(/*(*/ ')'):
 			if (!subshell_nesting_type)
 				/*-
 				 * not allowed outside $(...) or (...)
@ -1151,7 +1151,7 @@ readhere(struct ioword *iop)
 			 * Allow EOF here to commands without trailing
 			 * newlines (mksh -c '...') will work as well.
 			 */
-		case '\n':
+		case ord('\n'):
 			/* Newline terminates here document marker */
 			goto heredoc_found_terminator;
 		}
@ -1233,7 +1233,7 @@ getsc_uu(void)
 	Source *s = source;
 	int c;

-	while ((c = *s->str++) == 0) {
+	while ((c = ord(*s->str++)) == 0) {
 		/* return 0 for EOF by default */
 		s->str = NULL;
 		switch (s->type) {
@ -1275,7 +1275,7 @@ getsc_uu(void)
 				source->flags |= s->flags & SF_ALIAS;
 				s = source;
 			} else if (*s->u.tblp->val.s &&
-			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
+			    ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
 				/* pop source stack */
 				source = s = s->next;
 				/*
@ -1435,7 +1435,7 @@ getsc_line(Source *s)
 	} else if (interactive && cur_prompt == PS1) {
 check_for_sole_return:
 		cp = Xstring(s->xs, xp);
-		while (*cp && ctype(*cp, C_IFSWS))
+		while (ctype(*cp, C_IFSWS))
 			++cp;
 		if (!*cp) {
 			histsave(&s->line, NULL, HIST_FLUSH, true);
@ -1528,7 +1528,7 @@ pprompt(const char *cp, int ntruncate)
 	for (; *cp; cp++) {
 		if (indelimit && *cp != delimiter)
 			;
-		else if (*cp == '\n' || *cp == '\r') {
+		else if (ctype(*cp, C_CR | C_LF)) {
 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
 			columns = 0;
 		} else if (*cp == '\t') {
@ -1538,7 +1538,7 @@ pprompt(const char *cp, int ntruncate)
 				columns--;
 		} else if (*cp == delimiter)
 			indelimit = !indelimit;
-		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
+		else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
 			const char *cp2;
 			columns += utf_widthadj(cp, &cp2);
 			if (doprint && (indelimit ||
@ -1580,39 +1580,39 @@ get_brace_var(XString *wsp, char *wp)

 				c2 = getsc();
 				ungetsc(c2);
-				if (c2 != /*{*/ '}') {
+				if (ord(c2) != ord(/*{*/ '}')) {
 					ungetsc(c);
 					goto out;
 				}
 			}
 			goto ps_common;
 		case PS_SAW_BANG:
-			switch (c) {
-			case '@':
-			case '#':
-			case '-':
-			case '?':
+			switch (ord(c)) {
+			case ord('@'):
+			case ord('#'):
+			case ord('-'):
+			case ord('?'):
 				goto out;
 			}
 			goto ps_common;
 		case PS_INITIAL:
-			switch (c) {
-			case '%':
+			switch (ord(c)) {
+			case ord('%'):
 				state = PS_SAW_PERCENT;
 				goto next;
-			case '#':
+			case ord('#'):
 				state = PS_SAW_HASH;
 				goto next;
-			case '!':
+			case ord('!'):
 				state = PS_SAW_BANG;
 				goto next;
 			}
 			/* FALLTHROUGH */
 		case PS_SAW_PERCENT:
 ps_common:
-			if (ksh_isalphx(c))
+			if (ctype(c, C_ALPHX))
 				state = PS_IDENT;
-			else if (ksh_isdigit(c))
+			else if (ctype(c, C_DIGIT))
 				state = PS_NUMBER;
 			else if (ctype(c, C_VAR1))
 				state = PS_VAR1;
@ -1620,14 +1620,15 @@ get_brace_var(XString *wsp, char *wp)
 				goto out;
 			break;
 		case PS_IDENT:
-			if (!ksh_isalnux(c)) {
-				if (c == '[') {
+			if (!ctype(c, C_ALNUX)) {
+				if (ord(c) == ord('[')) {
 					char *tmp, *p;

 					if (!arraysub(&tmp))
 						yyerror("missing ]");
 					*wp++ = c;
-					for (p = tmp; *p; ) {
+					p = tmp;
+					while (*p) {
 						Xcheck(*wsp, wp);
 						*wp++ = *p++;
 					}
@ -1640,7 +1641,7 @@ get_brace_var(XString *wsp, char *wp)
 next:
 			break;
 		case PS_NUMBER:
-			if (!ksh_isdigit(c))
+			if (!ctype(c, C_DIGIT))
 				goto out;
 			break;
 		case PS_VAR1:
@ -1675,9 +1676,9 @@ arraysub(char **strp)
 		c = getsc();
 		Xcheck(ws, wp);
 		*wp++ = c;
-		if (c == '[')
+		if (ord(c) == ord('['))
 			depth++;
-		else if (c == ']')
+		else if (ord(c) == ord(']'))
 			depth--;
 	} while (depth > 0 && c && c != '\n');

@ -1756,19 +1757,19 @@ yyskiputf8bom(void)
 {
 	int c;

-	if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
+	if (rtt2asc((c = o_getsc_u())) != 0xEF) {
 		ungetsc_i(c);
 		return;
 	}
-	if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
+	if (rtt2asc((c = o_getsc_u())) != 0xBB) {
 		ungetsc_i(c);
-		ungetsc_i(0xEF);
+		ungetsc_i(asc2rtt(0xEF));
 		return;
 	}
-	if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
+	if (rtt2asc((c = o_getsc_u())) != 0xBF) {
 		ungetsc_i(c);
-		ungetsc_i(0xBB);
-		ungetsc_i(0xEF);
+		ungetsc_i(asc2rtt(0xBB));
+		ungetsc_i(asc2rtt(0xEF));
 		return;
 	}
 	UTFMODE |= 8;
--- a/src/main.c
+++ b/src/main.c
@ -34,7 +34,7 @@
 #include <locale.h>
 #endif

-__RCSID("$MirOS: src/bin/mksh/main.c,v 1.332 2017/04/12 16:01:45 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/main.c,v 1.342 2017/04/28 11:13:47 tg Exp $");

 extern char **environ;

@ -236,6 +236,11 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
 	ssize_t k;
 #endif

+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+	ebcdic_init();
+#endif
+	set_ifs(TC_IFSWS);
+
 #ifdef __OS2__
 	for (i = 0; i < 3; ++i)
 		if (!isatty(i))
@ -333,8 +338,6 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)

 	initvar();

-	initctypes();
-
 	inittraps();

 	coproc_init();
@ -409,12 +412,12 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)

 	/* override default PATH regardless of environment */
 #ifdef MKSH_DEFPATH_OVERRIDE
-	 vp = global(TPATH);
-	 setstr(vp, MKSH_DEFPATH_OVERRIDE, KSH_RETURN_ERROR);
+	vp = global(TPATH);
+	setstr(vp, MKSH_DEFPATH_OVERRIDE, KSH_RETURN_ERROR);
 #endif

 	/* for security */
-	typeset("IFS= \t\n", 0, 0, 0, 0);
+	typeset(TinitIFS, 0, 0, 0, 0);

 	/* assign default shell variable values */
 	typeset("PATHSEP=" MKSH_PATHSEPS, 0, 0, 0, 0);
@ -497,7 +500,7 @@ main_init(int argc, const char *argv[], Source **sp, struct block **lp)
 		if (!(s->start = s->str = argv[argi++]))
 			errorf(Tf_optfoo, "", "", 'c', Treq_arg);
 		while (*s->str) {
-			if (*s->str != ' ' && ctype(*s->str, C_QUOTE))
+			if (ctype(*s->str, C_QUOTE))
 				break;
 			s->str++;
 		}
@ -1554,7 +1557,7 @@ check_fd(const char *name, int mode, const char **emsgp)
 		goto illegal_fd_name;
 	if (name[0] == 'p')
 		return (coproc_getfd(mode, emsgp));
-	if (!ksh_isdigit(name[0])) {
+	if (!ctype(name[0], C_DIGIT)) {
 illegal_fd_name:
 		if (emsgp)
 			*emsgp = "illegal file descriptor name";
@ -1893,7 +1896,7 @@ tnamecmp(const void *p1, const void *p2)
 	const struct tbl *a = *((const struct tbl * const *)p1);
 	const struct tbl *b = *((const struct tbl * const *)p2);

-	return (strcmp(a->name, b->name));
+	return (ascstrcmp(a->name, b->name));
 }

 struct tbl **
--- a/src/misc.c
+++ b/src/misc.c
@ -5,6 +5,8 @@
 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 *		 2011, 2012, 2013, 2014, 2015, 2016, 2017
 *	mirabilos <m@mirbsd.org>
+ * Copyright (c) 2015
+ *	Daniel Richard G. <skunk@iSKUNK.ORG>
 *
 * Provided that these terms and disclaimer and all copyright notices
 * are retained or reproduced in an accompanying document, permission
@ -30,7 +32,7 @@
 #include <grp.h>
 #endif

-__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.255 2017/04/12 16:46:22 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/misc.c,v 1.279 2017/08/07 21:39:25 tg Exp $");

 #define KSH_CHVT_FLAG
 #ifdef MKSH_SMALL
@ -47,7 +49,8 @@ unsigned char chtypes[UCHAR_MAX + 1];
 static const unsigned char *pat_scan(const unsigned char *,
    const unsigned char *, bool) MKSH_A_PURE;
 static int do_gmatch(const unsigned char *, const unsigned char *,
-    const unsigned char *, const unsigned char *) MKSH_A_PURE;
+    const unsigned char *, const unsigned char *,
+    const unsigned char *) MKSH_A_PURE;
 static const unsigned char *gmatch_cclass(const unsigned char *, unsigned char)
    MKSH_A_PURE;
 #ifdef KSH_CHVT_CODE
@ -68,37 +71,6 @@ static int make_path(const char *, const char *, char **, XString *, int *);
 #define DO_SETUID(func, argvec) func argvec
 #endif

-/*
- * Fast character classes
- */
-void
-setctypes(const char *s, int t)
-{
-	if (t & C_IFS) {
-		unsigned int i = 0;
-
-		while (++i <= UCHAR_MAX)
-			chtypes[i] &= ~C_IFS;
-		/* include '\0' in C_IFS */
-		chtypes[0] |= C_IFS;
-	}
-	while (*s != 0)
-		chtypes[(unsigned char)*s++] |= t;
-}
-
-void
-initctypes(void)
-{
-	setctypes(letters_uc, C_ALPHX);
-	setctypes(letters_lc, C_ALPHX);
-	chtypes['_'] |= C_ALPHX;
-	setctypes("0123456789", C_DIGIT);
-	setctypes(TC_LEX1, C_LEX1);
-	setctypes("*@#!$-?", C_VAR1);
-	setctypes(TC_IFSWS, C_IFSWS);
-	setctypes("=-+?", C_SUBOP1);
-	setctypes("\t\n \"#$&'()*;<=>?[\\]`|", C_QUOTE);
-}

 /* called from XcheckN() to grow buffer */
 char *
@ -147,7 +119,7 @@ option(const char *n)
 {
 	size_t i = 0;

-	if ((n[0] == '-' || n[0] == '+') && n[1] && !n[2])
+	if (ctype(n[0], C_MINUS | C_PLUS) && n[1] && !n[2])
 		while (i < NELEM(options)) {
 			if (OFC(i) == n[1])
 				return (i);
@ -299,6 +271,11 @@ change_flag(enum sh_flag f, int what, bool newset)
 	} else if ((f == FPOSIX || f == FSH) && newval) {
 		/* Turning on -o posix or -o sh? */
 		Flag(FBRACEEXPAND) = 0;
+		/* Turning on -o posix? */
+		if (f == FPOSIX) {
+			/* C locale required for compliance */
+			UTFMODE = 0;
+		}
 	} else if (f == FTALKING) {
 		/* Changing interactive flag? */
 		if ((what == OF_CMDLINE || what == OF_SET) && procpid == kshpid)
@ -483,7 +460,7 @@ parse_args(const char **argv,
 		}
 	}
 	if (!(go.info & GI_MINUSMINUS) && argv[go.optind] &&
-	    (argv[go.optind][0] == '-' || argv[go.optind][0] == '+') &&
+	    ctype(argv[go.optind][0], C_MINUS | C_PLUS) &&
 	    argv[go.optind][1] == '\0') {
 		/* lone - clears -v and -x flags */
 		if (argv[go.optind][0] == '-') {
@ -512,7 +489,7 @@ parse_args(const char **argv,
 		for (i = go.optind; argv[i]; i++)
 			;
 		qsort(&argv[go.optind], i - go.optind, sizeof(void *),
-		    xstrcmp);
+		    ascpstrcmp);
 	}
 	if (arrayset)
 		go.optind += set_array(array, tobool(arrayset > 0),
@ -533,7 +510,7 @@ getn(const char *s, int *ai)

 	do {
 		c = *s++;
-	} while (ksh_isspace(c));
+	} while (ctype(c, C_SPACE));

 	switch (c) {
 	case '-':
@ -545,7 +522,7 @@ getn(const char *s, int *ai)
 	}

 	do {
-		if (!ksh_isdigit(c))
+		if (!ctype(c, C_DIGIT))
 			/* not numeric */
 			return (0);
 		if (num.u > 214748364U)
@ -585,7 +562,7 @@ simplify_gmatch_pattern(const unsigned char *sp)
 	sp = cp;
 simplify_gmatch_pat1a:
 	dp = cp;
-	se = sp + strlen((const void *)sp);
+	se = strnul(sp);
 	while ((c = *sp++)) {
 		if (!ISMAGIC(c)) {
 			*dp++ = c;
@ -657,29 +634,30 @@ gmatchx(const char *s, const char *p, bool isfile)
 	if (s == NULL || p == NULL)
 		return (0);

-	se = s + strlen(s);
-	pe = p + strlen(p);
+	pe = strnul(p);
 	/*
 	 * isfile is false iff no syntax check has been done on
-	 * the pattern. If check fails, just to a strcmp().
+	 * the pattern. If check fails, just do a strcmp().
 	 */
-	if (!isfile && !has_globbing(p, pe)) {
+	if (!isfile && !has_globbing(p)) {
 		size_t len = pe - p + 1;
 		char tbuf[64];
 		char *t = len <= sizeof(tbuf) ? tbuf : alloc(len, ATEMP);
 		debunk(t, p, len);
 		return (!strcmp(t, s));
 	}
+	se = strnul(s);

 	/*
 	 * since the do_gmatch() engine sucks so much, we must do some
 	 * pattern simplifications
 	 */
 	pnew = simplify_gmatch_pattern((const unsigned char *)p);
-	pe = pnew + strlen(pnew);
+	pe = strnul(pnew);

 	rv = do_gmatch((const unsigned char *)s, (const unsigned char *)se,
-	    (const unsigned char *)pnew, (const unsigned char *)pe);
+	    (const unsigned char *)pnew, (const unsigned char *)pe,
+	    (const unsigned char *)s);
 	afree(pnew, ATEMP);
 	return (rv);
 }
@ -690,7 +668,7 @@ gmatchx(const char *s, const char *p, bool isfile)
 * Syntax errors are:
 *	- [ with no closing ]
 *	- imbalanced $(...) expression
- *	- [...] and *(...) not nested (eg, [a$(b|]c), *(a[b|c]d))
+ *	- [...] and *(...) not nested (eg, @(a[b|)c], *(a[b|c]d))
 */
 /*XXX
 * - if no magic,
@ -701,76 +679,101 @@ gmatchx(const char *s, const char *p, bool isfile)
 *	return ?
 * - return ?
 */
-int
-has_globbing(const char *xp, const char *xpe)
+bool
+has_globbing(const char *pat)
 {
-	const unsigned char *p = (const unsigned char *) xp;
-	const unsigned char *pe = (const unsigned char *) xpe;
-	int c;
-	int nest = 0, bnest = 0;
+	unsigned char c, subc;
 	bool saw_glob = false;
-	/* inside [...] */
-	bool in_bracket = false;
+	unsigned int nest = 0;
+	const unsigned char *p = (const unsigned char *)pat;
+	const unsigned char *s;

-	for (; p < pe; p++) {
-		if (!ISMAGIC(*p))
+	while ((c = *p++)) {
+		/* regular character? ok. */
+		if (!ISMAGIC(c))
 			continue;
-		if ((c = *++p) == '*' || c == '?')
+		/* MAGIC + NUL? abort. */
+		if (!(c = *p++))
+			return (false);
+		/* some specials */
+		if (ord(c) == ord('*') || ord(c) == ord('?')) {
+			/* easy glob, accept */
 			saw_glob = true;
-		else if (c == '[') {
-			if (!in_bracket) {
-				saw_glob = true;
-				in_bracket = true;
-				if (ISMAGIC(p[1]) && p[2] == '!')
-					p += 2;
-				if (ISMAGIC(p[1]) && p[2] == ']')
-					p += 2;
+		} else if (ord(c) == ord('[')) {
+			/* bracket expression; eat negation and initial ] */
+			if (ISMAGIC(p[0]) && ord(p[1]) == ord('!'))
+				p += 2;
+			if (ISMAGIC(p[0]) && ord(p[1]) == ord(']'))
+				p += 2;
+			/* check next string part */
+			s = p;
+			while ((c = *s++)) {
+				/* regular chars are ok */
+				if (!ISMAGIC(c))
+					continue;
+				/* MAGIC + NUL cannot happen */
+				if (!(c = *s++))
+					return (false);
+				/* terminating bracket? */
+				if (ord(c) == ord(']')) {
+					/* accept and continue */
+					p = s;
+					saw_glob = true;
+					break;
+				}
+				/* sub-bracket expressions */
+				if (ord(c) == ord('[') && (
+				    /* collating element? */
+				    ord(*s) == ord('.') ||
+				    /* equivalence class? */
+				    ord(*s) == ord('=') ||
+				    /* character class? */
+				    ord(*s) == ord(':'))) {
+					/* must stop with exactly the same c */
+					subc = *s++;
+					/* arbitrarily many chars in betwixt */
+					while ((c = *s++))
+						/* but only this sequence... */
+						if (c == subc && ISMAGIC(*s) &&
+						    ord(s[1]) == ord(']')) {
+							/* accept, terminate */
+							s += 2;
+							break;
+						}
+					/* EOS without: reject bracket expr */
+					if (!c)
+						break;
+					/* continue; */
+				}
+				/* anything else just goes on */
 			}
-			/*XXX Do we need to check ranges here? POSIX Q */
-		} else if (c == ']') {
-			if (in_bracket) {
-				if (bnest)
-					/* [a*(b]) */
-					return (0);
-				in_bracket = false;
-			}
-		} else if ((c & 0x80) && vstrchr("*+?@! ", c & 0x7f)) {
+		} else if ((c & 0x80) && ctype(c & 0x7F, C_PATMO | C_SPC)) {
+			/* opening pattern */
 			saw_glob = true;
-			if (in_bracket)
-				bnest++;
-			else
-				nest++;
-		} else if (c == '|') {
-			if (in_bracket && !bnest)
-				/* *(a[foo|bar]) */
-				return (0);
-		} else if (c == /*(*/ ')') {
-			if (in_bracket) {
-				if (!bnest--)
-					/* *(a[b)c] */
-					return (0);
-			} else if (nest)
-				nest--;
+			++nest;
+		} else if (ord(c) == ord(/*(*/ ')')) {
+			/* closing pattern */
+			if (nest)
+				--nest;
 		}
-		/*
-		 * else must be a MAGIC-MAGIC, or MAGIC-!,
-		 * MAGIC--, MAGIC-], MAGIC-{, MAGIC-, MAGIC-}
-		 */
 	}
-	return (saw_glob && !in_bracket && !nest);
+	return (saw_glob && !nest);
 }

 /* Function must return either 0 or 1 (assumed by code for 0x80|'!') */
 static int
 do_gmatch(const unsigned char *s, const unsigned char *se,
-    const unsigned char *p, const unsigned char *pe)
+    const unsigned char *p, const unsigned char *pe,
+    const unsigned char *smin)
 {
-	unsigned char sc, pc;
+	unsigned char sc, pc, sl = 0;
 	const unsigned char *prest, *psub, *pnext;
 	const unsigned char *srest;

 	if (s == NULL || p == NULL)
 		return (0);
+	if (s > smin && s <= se)
+		sl = s[-1];
 	while (p < pe) {
 		pc = *p++;
 		sc = s < se ? *s : '\0';
@ -778,15 +781,39 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 		if (!ISMAGIC(pc)) {
 			if (sc != pc)
 				return (0);
+			sl = sc;
 			continue;
 		}
-		switch (*p++) {
-		case '[':
+		switch (ord(*p++)) {
+		case ord('['):
+			/* BSD cclass extension? */
+			if (ISMAGIC(p[0]) && ord(p[1]) == ord('[') &&
+			    ord(p[2]) == ord(':') &&
+			    ctype((pc = p[3]), C_ANGLE) &&
+			    ord(p[4]) == ord(':') &&
+			    ISMAGIC(p[5]) && ord(p[6]) == ord(']') &&
+			    ISMAGIC(p[7]) && ord(p[8]) == ord(']')) {
+				/* zero-length match */
+				--s;
+				p += 9;
+				/* word begin? */
+				if (ord(pc) == ord('<') &&
+				    !ctype(sl, C_ALNUX) &&
+				    ctype(sc, C_ALNUX))
+					break;
+				/* word end? */
+				if (ord(pc) == ord('>') &&
+				    ctype(sl, C_ALNUX) &&
+				    !ctype(sc, C_ALNUX))
+					break;
+				/* neither */
+				return (0);
+			}
 			if (sc == 0 || (p = gmatch_cclass(p, sc)) == NULL)
 				return (0);
 			break;

-		case '?':
+		case ord('?'):
 			if (sc == 0)
 				return (0);
 			if (UTFMODE) {
@ -795,39 +822,39 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 			}
 			break;

-		case '*':
+		case ord('*'):
 			if (p == pe)
 				return (1);
 			s--;
 			do {
-				if (do_gmatch(s, se, p, pe))
+				if (do_gmatch(s, se, p, pe, smin))
 					return (1);
 			} while (s++ < se);
 			return (0);

 		/**
-		 * [*+?@!](pattern|pattern|..)
+		 * [+*?@!](pattern|pattern|..)
 		 * This is also needed for ${..%..}, etc.
 		 */

 		/* matches one or more times */
-		case 0x80|'+':
+		case 0x80|ord('+'):
 		/* matches zero or more times */
-		case 0x80|'*':
+		case 0x80|ord('*'):
 			if (!(prest = pat_scan(p, pe, false)))
 				return (0);
 			s--;
 			/* take care of zero matches */
-			if (p[-1] == (0x80 | '*') &&
-			    do_gmatch(s, se, prest, pe))
+			if (ord(p[-1]) == (0x80 | ord('*')) &&
+			    do_gmatch(s, se, prest, pe, smin))
 				return (1);
 			for (psub = p; ; psub = pnext) {
 				pnext = pat_scan(psub, pe, true);
 				for (srest = s; srest <= se; srest++) {
-					if (do_gmatch(s, srest, psub, pnext - 2) &&
-					    (do_gmatch(srest, se, prest, pe) ||
-					    (s != srest && do_gmatch(srest,
-					    se, p - 2, pe))))
+					if (do_gmatch(s, srest, psub, pnext - 2, smin) &&
+					    (do_gmatch(srest, se, prest, pe, smin) ||
+					    (s != srest &&
+					    do_gmatch(srest, se, p - 2, pe, smin))))
 						return (1);
 				}
 				if (pnext == prest)
@ -836,24 +863,24 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 			return (0);

 		/* matches zero or once */
-		case 0x80|'?':
+		case 0x80|ord('?'):
 		/* matches one of the patterns */
-		case 0x80|'@':
+		case 0x80|ord('@'):
 		/* simile for @ */
-		case 0x80|' ':
+		case 0x80|ord(' '):
 			if (!(prest = pat_scan(p, pe, false)))
 				return (0);
 			s--;
 			/* Take care of zero matches */
-			if (p[-1] == (0x80 | '?') &&
-			    do_gmatch(s, se, prest, pe))
+			if (ord(p[-1]) == (0x80 | ord('?')) &&
+			    do_gmatch(s, se, prest, pe, smin))
 				return (1);
 			for (psub = p; ; psub = pnext) {
 				pnext = pat_scan(psub, pe, true);
 				srest = prest == pe ? se : s;
 				for (; srest <= se; srest++) {
-					if (do_gmatch(s, srest, psub, pnext - 2) &&
-					    do_gmatch(srest, se, prest, pe))
+					if (do_gmatch(s, srest, psub, pnext - 2, smin) &&
+					    do_gmatch(srest, se, prest, pe, smin))
 						return (1);
 				}
 				if (pnext == prest)
@ -862,7 +889,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 			return (0);

 		/* matches none of the patterns */
-		case 0x80|'!':
+		case 0x80|ord('!'):
 			if (!(prest = pat_scan(p, pe, false)))
 				return (0);
 			s--;
@ -872,7 +899,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 				for (psub = p; ; psub = pnext) {
 					pnext = pat_scan(psub, pe, true);
 					if (do_gmatch(s, srest, psub,
-					    pnext - 2)) {
+					    pnext - 2, smin)) {
 						matched = 1;
 						break;
 					}
@ -880,7 +907,7 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 						break;
 				}
 				if (!matched &&
-				    do_gmatch(srest, se, prest, pe))
+				    do_gmatch(srest, se, prest, pe, smin))
 					return (1);
 			}
 			return (0);
@ -890,55 +917,245 @@ do_gmatch(const unsigned char *s, const unsigned char *se,
 				return (0);
 			break;
 		}
+		sl = sc;
 	}
 	return (s == se);
 }

-static const unsigned char *
-gmatch_cclass(const unsigned char *p, unsigned char sub)
-{
-	unsigned char c, d;
-	bool notp, found = false;
-	const unsigned char *orig_p = p;
+/*XXX this is a prime example for bsearch or a const hashtable */
+static const struct cclass {
+	const char *name;
+	uint32_t value;
+} cclasses[] = {
+	/* POSIX */
+	{ "alnum",	C_ALNUM	},
+	{ "alpha",	C_ALPHA	},
+	{ "blank",	C_BLANK	},
+	{ "cntrl",	C_CNTRL	},
+	{ "digit",	C_DIGIT	},
+	{ "graph",	C_GRAPH	},
+	{ "lower",	C_LOWER	},
+	{ "print",	C_PRINT	},
+	{ "punct",	C_PUNCT	},
+	{ "space",	C_SPACE	},
+	{ "upper",	C_UPPER	},
+	{ "xdigit",	C_SEDEC	},
+	/* BSD */
+	/* "<" and ">" are handled inline */
+	/* GNU bash */
+	{ "ascii",	C_ASCII	},
+	{ "word",	C_ALNUX	},
+	/* mksh */
+	{ "sh_alias",	C_ALIAS	},
+	{ "sh_edq",	C_EDQ	},
+	{ "sh_ifs",	C_IFS	},
+	{ "sh_ifsws",	C_IFSWS	},
+	{ "sh_nl",	C_NL	},
+	{ "sh_quote",	C_QUOTE	},
+	/* sentinel */
+	{ NULL,		0	}
+};

-	if ((notp = tobool(ISMAGIC(*p) && *++p == '!')))
-		p++;
-	do {
-		c = *p++;
+static const unsigned char *
+gmatch_cclass(const unsigned char *pat, unsigned char sc)
+{
+	unsigned char c, subc, lc;
+	const unsigned char *p = pat, *s;
+	bool found = false;
+	bool negated = false;
+	char *subp;
+
+	/* check for negation */
+	if (ISMAGIC(p[0]) && ord(p[1]) == ord('!')) {
+		p += 2;
+		negated = true;
+	}
+	/* make initial ] non-MAGIC */
+	if (ISMAGIC(p[0]) && ord(p[1]) == ord(']'))
+		++p;
+	/* iterate over bracket expression, debunk()ing on the fly */
+	while ((c = *p++)) {
+ nextc:
+		/* non-regular character? */
 		if (ISMAGIC(c)) {
-			c = *p++;
-			if ((c & 0x80) && !ISMAGIC(c)) {
-				/* extended pattern matching: *+?@! */
-				c &= 0x7F;
-				/* XXX the ( char isn't handled as part of [] */
-				if (c == ' ')
-					/* simile for @: plain (..) */
-					c = '(' /*)*/;
+			/* MAGIC + NUL cannot happen */
+			if (!(c = *p++))
+				break;
+			/* terminating bracket? */
+			if (ord(c) == ord(']')) {
+				/* accept and return */
+				return (found != negated ? p : NULL);
+			}
+			/* sub-bracket expressions */
+			if (ord(c) == ord('[') && (
+			    /* collating element? */
+			    ord(*p) == ord('.') ||
+			    /* equivalence class? */
+			    ord(*p) == ord('=') ||
+			    /* character class? */
+			    ord(*p) == ord(':'))) {
+				/* must stop with exactly the same c */
+				subc = *p++;
+				/* save away start of substring */
+				s = p;
+				/* arbitrarily many chars in betwixt */
+				while ((c = *p++))
+					/* but only this sequence... */
+					if (c == subc && ISMAGIC(*p) &&
+					    ord(p[1]) == ord(']')) {
+						/* accept, terminate */
+						p += 2;
+						break;
+					}
+				/* EOS without: reject bracket expr */
+				if (!c)
+					break;
+				/* debunk substring */
+				strndupx(subp, s, p - s - 3, ATEMP);
+				debunk(subp, subp, p - s - 3 + 1);
+ cclass_common:
+				/* whither subexpression */
+				if (ord(subc) == ord(':')) {
+					const struct cclass *cls = cclasses;
+
+					/* search for name in cclass list */
+					while (cls->name)
+						if (!strcmp(subp, cls->name)) {
+							/* found, match? */
+							if (ctype(sc,
+							    cls->value))
+								found = true;
+							/* break either way */
+							break;
+						} else
+							++cls;
+					/* that's all here */
+					afree(subp, ATEMP);
+					continue;
+				}
+				/* collating element or equivalence class */
+				/* Note: latter are treated as former */
+				if (ctype(subp[0], C_ASCII) && !subp[1])
+					/* [.a.] where a is one ASCII char */
+					c = subp[0];
+				else
+					/* force no match */
+					c = 0;
+				/* no longer needed */
+				afree(subp, ATEMP);
+			} else if (!ISMAGIC(c) && (c & 0x80)) {
+				/* 0x80|' ' is plain (...) */
+				if ((c &= 0x7F) != ' ') {
+					/* check single match NOW */
+					if (sc == c)
+						found = true;
+					/* next character is (...) */
+				}
+				c = '(' /*)*/;
 			}
 		}
-		if (c == '\0')
-			/* No closing ] - act as if the opening [ was quoted */
-			return (sub == '[' ? orig_p : NULL);
-		if (ISMAGIC(p[0]) && p[1] == '-' &&
-		    (!ISMAGIC(p[2]) || p[3] != ']')) {
-			/* MAGIC- */
-			p += 2;
-			d = *p++;
-			if (ISMAGIC(d)) {
-				d = *p++;
-				if ((d & 0x80) && !ISMAGIC(d))
-					d &= 0x7f;
-			}
-			/* POSIX says this is an invalid expression */
-			if (c > d)
-				return (NULL);
-		} else
-			d = c;
-		if (c == sub || (c <= sub && sub <= d))
-			found = true;
-	} while (!(ISMAGIC(p[0]) && p[1] == ']'));
+		/* range expression? */
+		if (!(ISMAGIC(p[0]) && ord(p[1]) == ord('-') &&
+		    /* not terminating bracket? */
+		    (!ISMAGIC(p[2]) || ord(p[3]) != ord(']')))) {
+			/* no, check single match */
+			if (sc == c)
+				/* note: sc is never NUL */
+				found = true;
+			/* do the next "first" character */
+			continue;
+		}
+		/* save lower range bound */
+		lc = c;
+		/* skip over the range operator */
+		p += 2;
+		/* do the same shit as above... almost */
+		subc = 0;
+		if (!(c = *p++))
+			break;
+		/* non-regular character? */
+		if (ISMAGIC(c)) {
+			/* MAGIC + NUL cannot happen */
+			if (!(c = *p++))
+				break;
+			/* sub-bracket expressions */
+			if (ord(c) == ord('[') && (
+			    /* collating element? */
+			    ord(*p) == ord('.') ||
+			    /* equivalence class? */
+			    ord(*p) == ord('=') ||
+			    /* character class? */
+			    ord(*p) == ord(':'))) {
+				/* must stop with exactly the same c */
+				subc = *p++;
+				/* save away start of substring */
+				s = p;
+				/* arbitrarily many chars in betwixt */
+				while ((c = *p++))
+					/* but only this sequence... */
+					if (c == subc && ISMAGIC(*p) &&
+					    ord(p[1]) == ord(']')) {
+						/* accept, terminate */
+						p += 2;
+						break;
+					}
+				/* EOS without: reject bracket expr */
+				if (!c)
+					break;
+				/* debunk substring */
+				strndupx(subp, s, p - s - 3, ATEMP);
+				debunk(subp, subp, p - s - 3 + 1);
+				/* whither subexpression */
+				if (ord(subc) == ord(':')) {
+					/* oops, not a range */

-	return ((found != notp) ? p+2 : NULL);
+					/* match single previous char */
+					if (lc && (sc == lc))
+						found = true;
+					/* match hyphen-minus */
+					if (ord(sc) == ord('-'))
+						found = true;
+					/* handle cclass common part */
+					goto cclass_common;
+				}
+				/* collating element or equivalence class */
+				/* Note: latter are treated as former */
+				if (ctype(subp[0], C_ASCII) && !subp[1])
+					/* [.a.] where a is one ASCII char */
+					c = subp[0];
+				else
+					/* force no match */
+					c = 0;
+				/* no longer needed */
+				afree(subp, ATEMP);
+				/* other meaning below */
+				subc = 0;
+			} else if (c == (0x80 | ' ')) {
+				/* 0x80|' ' is plain (...) */
+				c = '(' /*)*/;
+			} else if (!ISMAGIC(c) && (c & 0x80)) {
+				c &= 0x7F;
+				subc = '(' /*)*/;
+			}
+		}
+		/* now do the actual range match check */
+		if (lc != 0 /* && c != 0 */ &&
+		    asciibetical(lc) <= asciibetical(sc) &&
+		    asciibetical(sc) <= asciibetical(c))
+			found = true;
+		/* forced next character? */
+		if (subc) {
+			c = subc;
+			goto nextc;
+		}
+		/* otherwise, just go on with the pattern string */
+	}
+	/* if we broke here, the bracket expression was invalid */
+	if (ord(sc) == ord('['))
+		/* initial opening bracket as literal match */
+		return (pat);
+	/* or rather no match */
+	return (NULL);
 }

 /* Look for next ) or | (if match_sep) in *(foo|bar) pattern */
@ -953,16 +1170,30 @@ pat_scan(const unsigned char *p, const unsigned char *pe, bool match_sep)
 		if ((*++p == /*(*/ ')' && nest-- == 0) ||
 		    (*p == '|' && match_sep && nest == 0))
 			return (p + 1);
-		if ((*p & 0x80) && vstrchr("*+?@! ", *p & 0x7f))
+		if ((*p & 0x80) && ctype(*p & 0x7F, C_PATMO | C_SPC))
 			nest++;
 	}
 	return (NULL);
 }

 int
-xstrcmp(const void *p1, const void *p2)
+ascstrcmp(const void *s1, const void *s2)
 {
-	return (strcmp(*(const char * const *)p1, *(const char * const *)p2));
+	const uint8_t *cp1 = s1, *cp2 = s2;
+
+	while (*cp1 == *cp2) {
+		if (*cp1++ == '\0')
+			return (0);
+		++cp2;
+	}
+	return ((int)asciibetical(*cp1) - (int)asciibetical(*cp2));
+}
+
+int
+ascpstrcmp(const void *pstr1, const void *pstr2)
+{
+	return (ascstrcmp(*(const char * const *)pstr1,
+	    *(const char * const *)pstr2));
 }

 /* Initialise a Getopt structure */
@ -1032,7 +1263,7 @@ ksh_getopt(const char **argv, Getopt *go, const char *optionsp)
 		go->info |= flag == '-' ? GI_MINUS : GI_PLUS;
 	}
 	go->p++;
-	if (c == '?' || c == ':' || c == ';' || c == ',' || c == '#' ||
+	if (ctype(c, C_QUEST | C_COLON | C_HASH) || c == ';' || c == ',' ||
 	    !(o = cstrchr(optionsp, c))) {
 		if (optionsp[0] == ':') {
 			go->buf[0] = c;
@ -1086,13 +1317,14 @@ ksh_getopt(const char **argv, Getopt *go, const char *optionsp)
 		 * argument is missing.
 		 */
 		if (argv[go->optind - 1][go->p]) {
-			if (ksh_isdigit(argv[go->optind - 1][go->p])) {
+			if (ctype(argv[go->optind - 1][go->p], C_DIGIT)) {
 				go->optarg = argv[go->optind - 1] + go->p;
 				go->p = 0;
 			} else
 				go->optarg = NULL;
 		} else {
-			if (argv[go->optind] && ksh_isdigit(argv[go->optind][0])) {
+			if (argv[go->optind] &&
+			    ctype(argv[go->optind][0], C_DIGIT)) {
 				go->optarg = argv[go->optind++];
 				go->p = 0;
 			} else
@ -1115,8 +1347,8 @@ print_value_quoted(struct shf *shf, const char *s)
 	bool inquote = true;

 	/* first, check whether any quotes are needed */
-	while ((c = *p++) >= 32)
-		if (ctype(c, C_QUOTE))
+	while (rtt2asc(c = *p++) >= 32)
+		if (ctype(c, C_QUOTE | C_SPC))
 			inquote = false;

 	p = (const unsigned char *)s;
@ -1154,6 +1386,7 @@ print_value_quoted(struct shf *shf, const char *s)
 		shf_putc('$', shf);
 		shf_putc('\'', shf);
 		while ((c = *p) != 0) {
+#ifndef MKSH_EBCDIC
 			if (c >= 0xC2) {
 				n = utf_mbtowc(&wc, (const char *)p);
 				if (n != (size_t)-1) {
@ -1162,10 +1395,11 @@ print_value_quoted(struct shf *shf, const char *s)
 					continue;
 				}
 			}
+#endif
 			++p;
 			switch (c) {
 			/* see unbksl() in this file for comments */
-			case 7:
+			case KSH_BEL:
 				c = 'a';
 				if (0)
 					/* FALLTHROUGH */
@ -1189,11 +1423,11 @@ print_value_quoted(struct shf *shf, const char *s)
 				  c = 't';
 				if (0)
 					/* FALLTHROUGH */
-			case 11:
+			case KSH_VTAB:
 				  c = 'v';
 				if (0)
 					/* FALLTHROUGH */
-			case '\033':
+			case KSH_ESC:
 				/* take E not e because \e is \ in *roff */
 				  c = 'E';
 				/* FALLTHROUGH */
@ -1203,7 +1437,12 @@ print_value_quoted(struct shf *shf, const char *s)
 				if (0)
 					/* FALLTHROUGH */
 			default:
-				  if (c < 32 || c > 0x7E) {
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+				  if (ksh_isctrl(c))
+#else
+				  if (!ctype(c, C_PRINT))
+#endif
+				    {
 					/* FALLTHROUGH */
 			case '\'':
 					shf_fprintf(shf, "\\%03o", c);
@ -2154,13 +2393,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 	fc = (*fg)();
 	switch (fc) {
 	case 'a':
-		/*
-		 * according to the comments in pdksh, \007 seems
-		 * to be more portable than \a (due to HP-UX cc,
-		 * Ultrix cc, old pcc, etc.) so we avoid the escape
-		 * sequence altogether in mksh and assume ASCII
-		 */
-		wc = 7;
+		wc = KSH_BEL;
 		break;
 	case 'b':
 		wc = '\b';
@ -2169,11 +2402,11 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 		if (!cstyle)
 			goto unknown_escape;
 		c = (*fg)();
-		wc = CTRL(c);
+		wc = ksh_toctrl(c);
 		break;
 	case 'E':
 	case 'e':
-		wc = 033;
+		wc = KSH_ESC;
 		break;
 	case 'f':
 		wc = '\f';
@ -2188,8 +2421,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 		wc = '\t';
 		break;
 	case 'v':
-		/* assume ASCII here as well */
-		wc = 11;
+		wc = KSH_VTAB;
 		break;
 	case '1':
 	case '2':
@ -2212,7 +2444,7 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 		wc = 0;
 		i = 3;
 		while (i--)
-			if ((c = (*fg)()) >= ord('0') && c <= ord('7'))
+			if (ctype((c = (*fg)()), C_OCTAL))
 				wc = (wc << 3) + ksh_numdig(c);
 			else {
 				(*fp)(c);
@ -2240,17 +2472,17 @@ unbksl(bool cstyle, int (*fg)(void), void (*fp)(int))
 		n = 0;
 		while (n < i || i == -1) {
 			wc <<= 4;
-			if ((c = (*fg)()) >= ord('0') && c <= ord('9'))
-				wc += ksh_numdig(c);
-			else if (c >= ord('A') && c <= ord('F'))
-				wc += ksh_numuc(c) + 10;
-			else if (c >= ord('a') && c <= ord('f'))
-				wc += ksh_numlc(c) + 10;
-			else {
+			if (!ctype((c = (*fg)()), C_SEDEC)) {
 				wc >>= 4;
 				(*fp)(c);
 				break;
 			}
+			if (ctype(c, C_DIGIT))
+				wc += ksh_numdig(c);
+			else if (ctype(c, C_UPPER))
+				wc += ksh_numuc(c) + 10;
+			else
+				wc += ksh_numlc(c) + 10;
 			++n;
 		}
 		if (!n)
--- a/src/mksh.1
+++ b/src/mksh.1
@ -1,4 +1,4 @@
-.\" $MirOS: src/bin/mksh/mksh.1,v 1.442 2017/04/12 18:30:58 tg Exp $
+.\" $MirOS: src/bin/mksh/mksh.1,v 1.451 2017/08/16 21:40:14 tg Exp $
 .\" $OpenBSD: ksh.1,v 1.160 2015/07/04 13:27:04 feinerer Exp $
 .\"-
 .\" Copyright © 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
@ -76,7 +76,7 @@
 .\" with -mandoc, it might implement .Mx itself, but we want to
 .\" use our own definition. And .Dd must come *first*, always.
 .\"
-.Dd $Mdocdate: April 12 2017 $
+.Dd $Mdocdate: August 16 2017 $
 .\"
 .\" Check which macro package we use, and do other -mdoc setup.
 .\"
@ -2091,7 +2091,7 @@ this hack; it's derived from the original
 which did print the delimiter character so you were out of luck
 if you did not have any non-printing characters.
 .Pp
-Since Backslashes and other special characters may be
+Since backslashes and other special characters may be
 interpreted by the shell, to set
 .Ev PS1
 either escape the backslash itself
@ -2106,7 +2106,7 @@ in reverse video
 .Pq colour would work, too ,
 in the prompt string:
 .Bd -literal -offset indent
-x=$(print \e\e001)
+x=$(print \e\e001) # otherwise unused char
 PS1="$x$(print \e\er)$x$(tput so)$x\e$PWD$x$(tput se)$x\*(Gt "
 .Ed
 .Pp
@ -3066,11 +3066,13 @@ Without arguments,
 .Ic alias
 lists all aliases.
 For any name without a value, the existing alias is listed.
-Any name with a value defines an alias (see
+Any name with a value defines an alias; see
 .Sx Aliases
-above).
-.Li \&[A\-Za\-z0\-9_!%,@\-]
-are valid in names except they may not begin with a hyphen-minus.
+above.
+.Li \&[][A\-Za\-z0\-9_!%,.@:\-]
+are valid in names, except they may not begin with a hyphen-minus, and
+.Ic \&[[
+is not a valid alias name.
 .Pp
 When listing aliases, one of two formats is used.
 Normally, aliases are listed as
@ -3162,7 +3164,8 @@ other trailing character will be processed afterwards.
 .Pp
 Control characters may be written using caret notation
 i.e. \*(haX represents Ctrl-X.
-Note that although only two prefix characters (usually ESC and \*(haX)
+The caret itself can be escaped by a backslash, which also escapes itself.
+Note that although only three prefix characters (usually ESC, \*(haX and NUL)
 are supported, some multi-character sequences can be supported.
 .Pp
 The following default bindings show how the arrow keys, the home, end and
@ -4305,9 +4308,11 @@ Automatically enabled if the basename of the shell invocation begins with
 .Dq sh
 and this autodetection feature is compiled in
 .Pq not in MirBSD .
-As a side effect, setting this flag turns off
+As a side effect, setting this flag turns off the
 .Ic braceexpand
-mode, which can be turned back on manually, and
+and
+.Ic utf8\-mode
+flags, which can be turned back on manually, and
 .Ic sh
 mode (unless both are enabled at the same time).
 .It Fl o Ic sh
@ -5382,6 +5387,11 @@ only lists signal names, all in one line.
 .Ic getopts
 does not accept options with a leading
 .Ql + .
+.It
+.Ic exec
+skips builtins, functions and other commands and uses a
+.Ev PATH
+search to determine the utility to execute.
 .El
 .Ss SH mode
 Compatibility mode; intended for use with legacy scripts that
@ -5537,7 +5547,7 @@ Emacs key bindings:
 .No INTR Pq \*(haC ,
 .No \*(haG
 .Xc
-Abort the current command, empty the line buffer and
+Abort the current command, save it to the history, empty the line buffer and
 set the exit state to interrupted.
 .It auto\-insert: Op Ar n
 Simply causes the character to appear as literal input.
@ -5572,7 +5582,8 @@ Uppercase the first ASCII character in the next
 words, leaving the cursor past the end of the last word.
 .It clear\-screen: \*(ha[\*(haL
 Prints a compile-time configurable sequence to clear the screen and home
-the cursor, redraws the entire prompt and the currently edited input line.
+the cursor, redraws the last line of the prompt string and the currently
+edited input line.
 The default sequence works for almost all standard terminals.
 .It comment: \*(ha[#
 If the current line does not begin with a comment character, one is added at
@ -6434,7 +6445,7 @@ Undo all changes that have been made to the current line.
 They move as expected, both in insert and command mode.
 .It Ar intr No and Ar quit
 The interrupt and quit terminal characters cause the current line to be
-deleted and a new prompt to be printed.
+removed to the history and a new prompt to be printed.
 .El
 .Sh FILES
 .Bl -tag -width XetcXsuid_profile -compact
@ -6584,7 +6595,7 @@ and
 .An Michael Rendell .
 The effort of several projects, such as Debian and OpenBSD, and other
 contributors including our users, to improve the shell is appreciated.
-See the documentation, web site and CVS for details.
+See the documentation, website and source code (CVS) for details.
 .Pp
 .Nm mksh\-os2
 is developed by
@ -6594,6 +6605,10 @@ is developed by
 is developed by
 .An Michael Langguth Aq Mt lan@scalaris.com .
 .Pp
+.Nm mksh Ns / Ns Tn z/OS
+is contributed by
+.An Daniel Richard G. Aq Mt skunk@iSKUNK.ORG .
+.Pp
 The BSD daemon is Copyright \(co Marshall Kirk McKusick.
 The complete legalese is at:
 .Pa http://www.mirbsd.org/TaC\-mksh.txt
@ -6633,12 +6648,14 @@ supports only the
 locale.
 .Nm mksh Ns 's
 .Ic utf8\-mode
+.Em must
+be disabled in POSIX mode, and it
 only supports the Unicode BMP (Basic Multilingual Plane) and maps
 raw octets into the U+EF80..U+EFFF wide character range; compare
 .Sx Arithmetic expressions .
 The following
 .Tn POSIX
-.Nm sh
+.Nm sh Ns -compatible
 code toggles the
 .Ic utf8\-mode
 option dependent on the current
@ -6680,7 +6697,7 @@ for the in-memory portion of the history is slow, should use
 .Xr memmove 3 .
 .Pp
 This document attempts to describe
-.Nm mksh\ R55
+.Nm mksh\ R56
 and up,
 .\" with vendor patches from insert-your-name-here,
 compiled without any options impacting functionality, such as
@ -6881,3 +6898,9 @@ commands starting with what was already entered.
 .Nm
 separates the shortcuts: Cursor Up goes up one command
 and PgUp searches the history as described above.
+.Ss "My question is not answered here!"
+Check
+.Pa http://www.mirbsd.org/mksh\-faq.htm
+which contains a collection of frequently asked questions about
+.Nm
+in general, for packagers, etc. while these above are in user scope.
--- a/src/os2.c
+++ b/src/os2.c
@ -1,6 +1,8 @@
 /*-
 * Copyright (c) 2015
 *	KO Myung-Hun <komh@chollian.net>
+ * Copyright (c) 2017
+ *	mirabilos <m@mirbsd.org>
 *
 * Provided that these terms and disclaimer and all copyright notices
 * are retained or reproduced in an accompanying document, permission
@ -28,7 +30,7 @@
 #include <unistd.h>
 #include <process.h>

-__RCSID("$MirOS: src/bin/mksh/os2.c,v 1.1 2017/04/02 15:00:44 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/os2.c,v 1.2 2017/04/29 22:04:29 tg Exp $");

 static char *remove_trailing_dots(char *);
 static int access_stat_ex(int (*)(), const char *, void *);
@ -247,9 +249,9 @@ setextlibpath(const char *name, const char *val)
 static char *
 remove_trailing_dots(char *name)
 {
-	char *p;
+	char *p = strnul(name);

-	for (p = name + strlen(name); --p > name && *p == '.'; )
+	while (--p > name && *p == '.')
 		/* nothing */;

 	if (*p != '.' && *p != '/' && *p != '\\' && *p != ':')
--- a/src/sh.h
+++ b/src/sh.h
@ -112,6 +112,13 @@
 #include <wchar.h>
 #endif

+/* monkey-patch known-bad offsetof versions to quell a warning */
+#if (defined(__KLIBC__) || defined(__dietlibc__)) && \
+    ((defined(__GNUC__) && (__GNUC__ > 3)) || defined(__NWCC__))
+#undef offsetof
+#define offsetof(s, e)		__builtin_offsetof(s, e)
+#endif
+
 #undef __attribute__
 #if HAVE_ATTRIBUTE_BOUNDED
 #define MKSH_A_BOUNDED(x,y,z)	__attribute__((__bounded__(x, y, z)))
@ -175,9 +182,9 @@
 #endif

 #ifdef EXTERN
-__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.808 2017/04/12 17:38:46 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/sh.h,v 1.841 2017/08/29 13:38:31 tg Exp $");
 #endif
-#define MKSH_VERSION "R55 2017/04/12"
+#define MKSH_VERSION "R56 2017/08/29"

 /* arithmetic types: C implementation */
 #if !HAVE_CAN_INTTYPES
@ -257,6 +264,23 @@ typedef MKSH_TYPEDEF_SSIZE_T ssize_t;

 #ifndef MKSH_INCLUDES_ONLY

+/* EBCDIC fun */
+
+/* see the large comment in shf.c for an EBCDIC primer */
+
+#if defined(MKSH_FOR_Z_OS) && defined(__MVS__) && defined(__IBMC__) && defined(__CHARSET_LIB)
+# if !__CHARSET_LIB && !defined(MKSH_EBCDIC)
+#  error "Please compile with Build.sh -E for EBCDIC!"
+# endif
+# if __CHARSET_LIB && defined(MKSH_EBCDIC)
+#  error "Please compile without -E argument to Build.sh for ASCII!"
+# endif
+# if __CHARSET_LIB && !defined(_ENHANCED_ASCII_EXT)
+   /* go all-out on ASCII */
+#  define _ENHANCED_ASCII_EXT 0xFFFFFFFF
+# endif
+#endif
+
 /* extra types */

 /* getrusage does not exist on OS/2 kLIBC */
@ -349,6 +373,8 @@ struct rusage {
 #define ksh_NSIG (_SIGMAX + 1)
 #elif defined(NSIG_MAX)
 #define ksh_NSIG (NSIG_MAX)
+#elif defined(MKSH_FOR_Z_OS)
+#define ksh_NSIG 40
 #else
 # error Please have your platform define NSIG.
 #endif
@ -487,6 +513,23 @@ extern int __cdecl setegid(gid_t);
 #define ISTRIP		0
 #endif

+#ifdef MKSH_EBCDIC
+#define KSH_BEL		'\a'
+#define KSH_ESC		047
+#define KSH_ESC_STRING	"\047"
+#define KSH_VTAB	'\v'
+#else
+/*
+ * According to the comments in pdksh, \007 seems to be more portable
+ * than \a (HP-UX cc, Ultrix cc, old pcc, etc.) so we avoid the escape
+ * sequence if ASCII can be assumed.
+ */
+#define KSH_BEL		7
+#define KSH_ESC		033
+#define KSH_ESC_STRING	"\033"
+#define KSH_VTAB	11
+#endif
+

 /* some useful #defines */
 #ifdef EXTERN
@ -498,16 +541,22 @@ extern int __cdecl setegid(gid_t);
 #endif

 /* define bit in flag */
-#define BIT(i)		(1 << (i))
+#define BIT(i)		(1U << (i))
 #define NELEM(a)	(sizeof(a) / sizeof((a)[0]))

 /*
 * Make MAGIC a char that might be printed to make bugs more obvious, but
 * not a char that is used often. Also, can't use the high bit as it causes
 * portability problems (calling strchr(x, 0x80 | 'x') is error prone).
+ *
+ * MAGIC can be followed by MAGIC (to escape the octet itself) or one of:
+ * ' !)*,-?[]{|}' 0x80|' !*+?@' (probably… hysteric raisins abound)
+ *
+ * The |0x80 is likely unsafe on EBCDIC :( though the listed chars are
+ * low-bit7 at least on cp1047 so YMMV
 */
-#define MAGIC		(7)	/* prefix for *?[!{,} during expand */
-#define ISMAGIC(c)	((unsigned char)(c) == MAGIC)
+#define MAGIC		KSH_BEL	/* prefix for *?[!{,} during expand */
+#define ISMAGIC(c)	(ord(c) == ord(MAGIC))

 EXTERN const char *safe_prompt; /* safe prompt if PS1 substitution fails */

@ -521,17 +570,21 @@ EXTERN const char *safe_prompt; /* safe prompt if PS1 substitution fails */
 #else
 #define KSH_VERSIONNAME_TEXTMODE	""
 #endif
+#ifdef MKSH_EBCDIC
+#define KSH_VERSIONNAME_EBCDIC		" +EBCDIC"
+#else
+#define KSH_VERSIONNAME_EBCDIC		""
+#endif
 #ifndef KSH_VERSIONNAME_VENDOR_EXT
 #define KSH_VERSIONNAME_VENDOR_EXT	""
 #endif
 EXTERN const char initvsn[] E_INIT("KSH_VERSION=@(#)" KSH_VERSIONNAME_ISLEGACY \
-    " KSH " MKSH_VERSION KSH_VERSIONNAME_TEXTMODE KSH_VERSIONNAME_VENDOR_EXT);
+    " KSH " MKSH_VERSION KSH_VERSIONNAME_EBCDIC KSH_VERSIONNAME_TEXTMODE \
+    KSH_VERSIONNAME_VENDOR_EXT);
 #define KSH_VERSION	(initvsn + /* "KSH_VERSION=@(#)" */ 16)

 EXTERN const char digits_uc[] E_INIT("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
 EXTERN const char digits_lc[] E_INIT("0123456789abcdefghijklmnopqrstuvwxyz");
-#define letters_uc (digits_uc + 10)
-#define letters_lc (digits_lc + 10)

 /*
 * Evil hack for const correctness due to API brokenness
@ -585,15 +638,12 @@ char *ucstrstr(char *, const char *);
 #endif

 #if defined(DEBUG) || defined(__COVERITY__)
-#define mkssert(e)	do { if (!(e)) exit(255); } while (/* CONSTCOND */ 0)
 #ifndef DEBUG_LEAKS
 #define DEBUG_LEAKS
 #endif
-#else
-#define mkssert(e)	do { } while (/* CONSTCOND */ 0)
 #endif

-#if (!defined(MKSH_BUILDMAKEFILE4BSD) && !defined(MKSH_BUILDSH)) || (MKSH_BUILD_R != 551)
+#if (!defined(MKSH_BUILDMAKEFILE4BSD) && !defined(MKSH_BUILDSH)) || (MKSH_BUILD_R != 562)
 #error Must run Build.sh to compile this.
 extern void thiswillneverbedefinedIhope(void);
 int
@ -605,7 +655,7 @@ im_sorry_dave(void)
 #endif

 /* use this ipv strchr(s, 0) but no side effects in s! */
-#define strnul(s)	((s) + strlen(s))
+#define strnul(s)	((s) + strlen((const void *)s))

 #define utf_ptradjx(src, dst) do {					\
 	(dst) = (src) + utf_ptradj(src);				\
@ -621,7 +671,7 @@ im_sorry_dave(void)
 #else
 /* be careful to evaluate arguments only once! */
 #define strdupx(d, s, ap) do {						\
-	const char *strdup_src = (s);					\
+	const char *strdup_src = (const void *)(s);			\
 	char *strdup_dst = NULL;					\
 									\
 	if (strdup_src != NULL) {					\
@ -632,7 +682,7 @@ im_sorry_dave(void)
 	(d) = strdup_dst;						\
 } while (/* CONSTCOND */ 0)
 #define strndupx(d, s, n, ap) do {					\
-	const char *strdup_src = (s);					\
+	const char *strdup_src = (const void *)(s);			\
 	char *strdup_dst = NULL;					\
 									\
 	if (strdup_src != NULL) {					\
@ -753,8 +803,8 @@ enum sh_flag {
 struct sretrace_info;
 struct yyrecursive_state;

-EXTERN struct sretrace_info *retrace_info E_INIT(NULL);
-EXTERN int subshell_nesting_type E_INIT(0);
+EXTERN struct sretrace_info *retrace_info;
+EXTERN int subshell_nesting_type;

 extern struct env {
 	ALLOC_ITEM alloc_INT;	/* internal, do not touch */
@ -865,8 +915,8 @@ EXTERN char null[] E_INIT("");
 EXTERN const char T4spaces[] E_INIT("    ");
 #define T1space (Treal_sp2 + 5)
 #define Tcolsp (Tf_sD_ + 2)
-EXTERN const char TC_LEX1[] E_INIT("|&;<>() \t\n");
-#define TC_IFSWS (TC_LEX1 + 7)
+#define TC_IFSWS (TinitIFS + 4)
+EXTERN const char TinitIFS[] E_INIT("IFS= \t\n");
 EXTERN const char TFCEDIT_dollaru[] E_INIT("${FCEDIT:-/bin/ed} $_");
 #define Tspdollaru (TFCEDIT_dollaru + 18)
 EXTERN const char Tsgdot[] E_INIT("*=.");
@ -1026,8 +1076,8 @@ EXTERN const char T_devtty[] E_INIT("/dev/tty");
 #define T4spaces "    "
 #define T1space " "
 #define Tcolsp ": "
-#define TC_LEX1 "|&;<>() \t\n"
 #define TC_IFSWS " \t\n"
+#define TinitIFS "IFS= \t\n"
 #define TFCEDIT_dollaru "${FCEDIT:-/bin/ed} $_"
 #define Tspdollaru " $_"
 #define Tsgdot "*=."
@ -1277,7 +1327,7 @@ enum tmout_enum {
 	TMOUT_LEAVING		/* have timed out */
 };
 EXTERN unsigned int ksh_tmout;
-EXTERN enum tmout_enum ksh_tmout_state E_INIT(TMOUT_EXECUTING);
+EXTERN enum tmout_enum ksh_tmout_state;

 /* For "You have stopped jobs" message */
 EXTERN bool really_exit;
@ -1285,39 +1335,178 @@ EXTERN bool really_exit;
 /*
 * fast character classes
 */
-#define C_ALPHX	 BIT(0)		/* A-Za-z_ */
-#define C_DIGIT	 BIT(1)		/* 0-9 */
-#define C_LEX1	 BIT(2)		/* \t \n\0|&;<>() */
-#define C_VAR1	 BIT(3)		/* *@#!$-? */
-#define C_IFSWS	 BIT(4)		/* \t \n (IFS white space) */
-#define C_SUBOP1 BIT(5)		/* "=-+?" */
-#define C_QUOTE	 BIT(6)		/* \t\n "#$&'()*;<=>?[\]`| (needing quoting) */
-#define C_IFS	 BIT(7)		/* $IFS */

-extern unsigned char chtypes[];
+/* internal types, do not reference */

-#define ctype(c, t)	tobool(chtypes[(unsigned char)(c)] & (t))
-#define ord(c)		((int)(unsigned char)(c))
-#define ksh_issubop2(c)	tobool((c) == ord('#') || (c) == ord('%'))
-#define ksh_isalias(c)	(ctype((c), C_ALPHX | C_DIGIT) || (c) == ord('!') || \
-			    (c) == ord('%') || (c) == ord(',') || \
-			    (c) == ord('@') || (c) == ord('-'))
-#define ksh_isalpha(c)	(ctype((c), C_ALPHX) && (c) != ord('_'))
-#define ksh_isalphx(c)	ctype((c), C_ALPHX)
-#define ksh_isalnux(c)	ctype((c), C_ALPHX | C_DIGIT)
-#define ksh_isdigit(c)	ctype((c), C_DIGIT)
-#define ksh_islower(c)	(((c) >= 'a') && ((c) <= 'z'))
-#define ksh_isupper(c)	(((c) >= 'A') && ((c) <= 'Z'))
-#define ksh_tolower(c)	(ksh_isupper(c) ? (c) - 'A' + 'a' : (c))
-#define ksh_toupper(c)	(ksh_islower(c) ? (c) - 'a' + 'A' : (c))
-#define ksh_isdash(s)	(((s)[0] == '-') && ((s)[1] == '\0'))
-#define ksh_isspace(c)	((((c) >= 0x09) && ((c) <= 0x0D)) || ((c) == 0x20))
-#define ksh_eq(c,u,l)	(((c) | 0x20) == (l))
-#define ksh_numdig(c)	((c) - ord('0'))
-#define ksh_numuc(c)	((c) - ord('A'))
-#define ksh_numlc(c)	((c) - ord('a'))
+/* initially empty — filled at runtime from $IFS */
+#define CiIFS	BIT(0)
+#define CiCNTRL	BIT(1)	/* \x01‥\x08\x0E‥\x1F\x7F	*/
+#define CiUPPER	BIT(2)	/* A‥Z				*/
+#define CiLOWER	BIT(3)	/* a‥z				*/
+#define CiHEXLT	BIT(4)	/* A‥Fa‥f			*/
+#define CiOCTAL	BIT(5)	/* 0‥7				*/
+#define CiQCL	BIT(6)	/* &();|			*/
+#define CiALIAS	BIT(7)	/* !,.@				*/
+#define CiQCX	BIT(8)	/* *[\\				*/
+#define CiVAR1	BIT(9)	/* !*@				*/
+#define CiQCM	BIT(10)	/* /^~				*/
+#define CiDIGIT	BIT(11)	/* 89				*/
+#define CiQC	BIT(12)	/* "'				*/
+#define CiSPX	BIT(13)	/* \x0B\x0C			*/
+#define CiCURLY	BIT(14)	/* {}				*/
+#define CiANGLE	BIT(15)	/* <>				*/
+#define CiNUL	BIT(16)	/* \x00				*/
+#define CiTAB	BIT(17)	/* \x09				*/
+#define CiNL	BIT(18)	/* \x0A				*/
+#define CiCR	BIT(19)	/* \x0D				*/
+#define CiSP	BIT(20)	/* \x20				*/
+#define CiHASH	BIT(21)	/* #				*/
+#define CiSS	BIT(22)	/* $				*/
+#define CiPERCT	BIT(23)	/* %				*/
+#define CiPLUS	BIT(24)	/* +				*/
+#define CiMINUS	BIT(25)	/* -				*/
+#define CiCOLON	BIT(26)	/* :				*/
+#define CiEQUAL	BIT(27)	/* =				*/
+#define CiQUEST	BIT(28)	/* ?				*/
+#define CiBRACK	BIT(29)	/* ]				*/
+#define CiUNDER	BIT(30)	/* _				*/
+#define CiGRAVE	BIT(31)	/* `				*/
+/* out of space, but one for *@ would make sense, possibly others */

-EXTERN int ifs0 E_INIT(' ');	/* for "$*" */
+/* compile-time initialised, ASCII only */
+extern const uint32_t tpl_ctypes[128];
+/* run-time, contains C_IFS as well, full 2⁸ octet range */
+EXTERN uint32_t ksh_ctypes[256];
+/* first octet of $IFS, for concatenating "$*" */
+EXTERN char ifs0;
+
+/* external types */
+
+/* !%,-.0‥9:@A‥Z[]_a‥z	valid characters in alias names */
+#define C_ALIAS	(CiALIAS | CiBRACK | CiCOLON | CiDIGIT | CiLOWER | CiMINUS | CiOCTAL | CiPERCT | CiUNDER | CiUPPER)
+/* 0‥9A‥Za‥z		alphanumerical */
+#define C_ALNUM	(CiDIGIT | CiLOWER | CiOCTAL | CiUPPER)
+/* 0‥9A‥Z_a‥z		alphanumerical plus underscore (“word character”) */
+#define C_ALNUX	(CiDIGIT | CiLOWER | CiOCTAL | CiUNDER | CiUPPER)
+/* A‥Za‥z		alphabetical (upper plus lower) */
+#define C_ALPHA	(CiLOWER | CiUPPER)
+/* A‥Z_a‥z		alphabetical plus underscore (identifier lead) */
+#define C_ALPHX	(CiLOWER | CiUNDER | CiUPPER)
+/* \x01‥\x7F		7-bit ASCII except NUL */
+#define C_ASCII (CiALIAS | CiANGLE | CiBRACK | CiCNTRL | CiCOLON | CiCR | CiCURLY | CiDIGIT | CiEQUAL | CiGRAVE | CiHASH | CiLOWER | CiMINUS | CiNL | CiOCTAL | CiPERCT | CiPLUS | CiQC | CiQCL | CiQCM | CiQCX | CiQUEST | CiSP | CiSPX | CiSS | CiTAB | CiUNDER | CiUPPER)
+/* \x09\x20		tab and space */
+#define C_BLANK	(CiSP | CiTAB)
+/* \x09\x20"'		separator for completion */
+#define C_CFS	(CiQC | CiSP | CiTAB)
+/* \x00‥\x1F\x7F	POSIX control characters */
+#define C_CNTRL	(CiCNTRL | CiCR | CiNL | CiNUL | CiSPX | CiTAB)
+/* 0‥9			decimal digits */
+#define C_DIGIT	(CiDIGIT | CiOCTAL)
+/* &();`|			editor x_locate_word() command */
+#define C_EDCMD	(CiGRAVE | CiQCL)
+/* \x09\x0A\x20"&'():;<=>`|	editor non-word characters */
+#define C_EDNWC	(CiANGLE | CiCOLON | CiEQUAL | CiGRAVE | CiNL | CiQC | CiQCL | CiSP | CiTAB)
+/* "#$&'()*:;<=>?[\\`{|}	editor quotes for tab completion */
+#define C_EDQ	(CiANGLE | CiCOLON | CiCURLY | CiEQUAL | CiGRAVE | CiHASH | CiQC | CiQCL | CiQCX | CiQUEST | CiSS)
+/* !‥~			POSIX graphical (alphanumerical plus punctuation) */
+#define C_GRAPH	(C_PUNCT | CiDIGIT | CiLOWER | CiOCTAL | CiUPPER)
+/* A‥Fa‥f		hex letter */
+#define C_HEXLT	CiHEXLT
+/* \x00 + $IFS		IFS whitespace, IFS non-whitespace, NUL */
+#define C_IFS	(CiIFS | CiNUL)
+/* \x09\x0A\x20		IFS whitespace */
+#define C_IFSWS	(CiNL | CiSP | CiTAB)
+/* \x09\x0A\x20&();<>|	(for the lexer) */
+#define C_LEX1	(CiANGLE | CiNL | CiQCL | CiSP | CiTAB)
+/* a‥z			lowercase letters */
+#define C_LOWER	CiLOWER
+/* not alnux or dollar	separator for motion */
+#define C_MFS	(CiALIAS | CiANGLE | CiBRACK | CiCNTRL | CiCOLON | CiCR | CiCURLY | CiEQUAL | CiGRAVE | CiHASH | CiMINUS | CiNL | CiNUL | CiPERCT | CiPLUS | CiQC | CiQCL | CiQCM | CiQCX | CiQUEST | CiSP | CiSPX | CiTAB)
+/* 0‥7			octal digit */
+#define C_OCTAL	CiOCTAL
+/* !*+?@		pattern magical operator, except space */
+#define C_PATMO	(CiPLUS | CiQUEST | CiVAR1)
+/* \x20‥~		POSIX printable characters (graph plus space) */
+#define C_PRINT	(C_GRAPH | CiSP)
+/* !"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~	POSIX punctuation */
+#define C_PUNCT	(CiALIAS | CiANGLE | CiBRACK | CiCOLON | CiCURLY | CiEQUAL | CiGRAVE | CiHASH | CiMINUS | CiPERCT | CiPLUS | CiQC | CiQCL | CiQCM | CiQCX | CiQUEST | CiSS | CiUNDER)
+/* \x09\x0A"#$&'()*;<=>?[\\]`|	characters requiring quoting, minus space */
+#define C_QUOTE	(CiANGLE | CiBRACK | CiEQUAL | CiGRAVE | CiHASH | CiNL | CiQC | CiQCL | CiQCX | CiQUEST | CiSS | CiTAB)
+/* 0‥9A‥Fa‥f		hexadecimal digit */
+#define C_SEDEC	(CiDIGIT | CiHEXLT | CiOCTAL)
+/* \x09‥\x0D\x20	POSIX space class */
+#define C_SPACE	(CiCR | CiNL | CiSP | CiSPX | CiTAB)
+/* +-=?			substitution operations with word */
+#define C_SUB1	(CiEQUAL | CiMINUS | CiPLUS | CiQUEST)
+/* #%			substitution operations with pattern */
+#define C_SUB2	(CiHASH | CiPERCT)
+/* A‥Z			uppercase letters */
+#define C_UPPER	CiUPPER
+/* !#$*-?@		substitution parameters, other than positional */
+#define C_VAR1	(CiHASH | CiMINUS | CiQUEST | CiSS | CiVAR1)
+
+/* individual chars you might like */
+#define C_ANGLE	CiANGLE		/* <>	angle brackets */
+#define C_COLON	CiCOLON		/* :	colon */
+#define C_CR	CiCR		/* \x0D	ASCII carriage return */
+#define C_DOLAR	CiSS		/* $	dollar sign */
+#define C_EQUAL	CiEQUAL		/* =	equals sign */
+#define C_GRAVE	CiGRAVE		/* `	accent gravis */
+#define C_HASH	CiHASH		/* #	hash sign */
+#define C_LF	CiNL		/* \x0A	ASCII line feed */
+#define C_MINUS	CiMINUS		/* -	hyphen-minus */
+#ifdef MKSH_WITH_TEXTMODE
+#define C_NL	(CiNL | CiCR)	/* 	CR or LF under OS/2 TEXTMODE */
+#else
+#define C_NL	CiNL		/* 	LF only like under Unix */
+#endif
+#define C_NUL	CiNUL		/* \x00	ASCII NUL */
+#define C_PLUS	CiPLUS		/* +	plus sign */
+#define C_QC	CiQC		/* "'	quote characters */
+#define C_QUEST	CiQUEST		/* ?	question mark */
+#define C_SPC	CiSP		/* \x20	ASCII space */
+#define C_TAB	CiTAB		/* \x09	ASCII horizontal tabulator */
+#define C_UNDER	CiUNDER		/* _	underscore */
+
+/* identity transform of octet */
+#define ord(c)		((unsigned int)(unsigned char)(c))
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+EXTERN unsigned short ebcdic_map[256];
+EXTERN unsigned char ebcdic_rtt_toascii[256];
+EXTERN unsigned char ebcdic_rtt_fromascii[256];
+extern void ebcdic_init(void);
+/* one-way to-ascii-or-high conversion, for POSIX locale ordering */
+#define asciibetical(c)	((unsigned int)ebcdic_map[(unsigned char)(c)])
+/* two-way round-trip conversion, for general use */
+#define rtt2asc(c)	ebcdic_rtt_toascii[(unsigned char)(c)]
+#define asc2rtt(c)	ebcdic_rtt_fromascii[(unsigned char)(c)]
+/* case-independent char comparison */
+#define ksh_eq(c,u,l)	(ord(c) == ord(u) || ord(c) == ord(l))
+#else
+#define asciibetical(c)	ord(c)
+#define rtt2asc(c)	((unsigned char)(c))
+#define asc2rtt(c)	((unsigned char)(c))
+#define ksh_eq(c,u,l)	((ord(c) | 0x20) == ord(l))
+#endif
+/* control character foo */
+#ifdef MKSH_EBCDIC
+#define ksh_isctrl(c)	(ord(c) < 0x40 || ord(c) == 0xFF)
+#else
+#define ksh_isctrl(c)	((ord(c) & 0x7F) < 0x20 || (c) == 0x7F)
+#endif
+/* new fast character classes */
+#define ctype(c,t)	tobool(ksh_ctypes[ord(c)] & (t))
+/* helper functions */
+#define ksh_isdash(s)	tobool(ord((s)[0]) == '-' && ord((s)[1]) == '\0')
+/* invariant distance even in EBCDIC */
+#define ksh_tolower(c)	(ctype(c, C_UPPER) ? (c) - 'A' + 'a' : (c))
+#define ksh_toupper(c)	(ctype(c, C_LOWER) ? (c) - 'a' + 'A' : (c))
+/* strictly speaking rtt2asc() here, but this works even in EBCDIC */
+#define ksh_numdig(c)	(ord(c) - ord('0'))
+#define ksh_numuc(c)	(rtt2asc(c) - rtt2asc('A'))
+#define ksh_numlc(c)	(rtt2asc(c) - rtt2asc('a'))
+#define ksh_toctrl(c)	asc2rtt(ord(c) == ord('?') ? 0x7F : rtt2asc(c) & 0x9F)
+#define ksh_unctrl(c)	asc2rtt(rtt2asc(c) ^ 0x40U)

 /* Argument parsing for built-in commands and getopts command */

@ -1990,12 +2179,77 @@ typedef union {

 #define HERES		10	/* max number of << in line */

-#undef CTRL
-#define	CTRL(x)		((x) == '?' ? 0x7F : (x) & 0x1F)	/* ASCII */
-#define	UNCTRL(x)	((x) ^ 0x40)				/* ASCII */
-#define	ISCTRL(x)	(((signed char)((uint8_t)(x) + 1)) < 33)
+#ifdef MKSH_EBCDIC
+#define CTRL_AT	(0x00U)
+#define CTRL_A	(0x01U)
+#define CTRL_B	(0x02U)
+#define CTRL_C	(0x03U)
+#define CTRL_D	(0x37U)
+#define CTRL_E	(0x2DU)
+#define CTRL_F	(0x2EU)
+#define CTRL_G	(0x2FU)
+#define CTRL_H	(0x16U)
+#define CTRL_I	(0x05U)
+#define CTRL_J	(0x15U)
+#define CTRL_K	(0x0BU)
+#define CTRL_L	(0x0CU)
+#define CTRL_M	(0x0DU)
+#define CTRL_N	(0x0EU)
+#define CTRL_O	(0x0FU)
+#define CTRL_P	(0x10U)
+#define CTRL_Q	(0x11U)
+#define CTRL_R	(0x12U)
+#define CTRL_S	(0x13U)
+#define CTRL_T	(0x3CU)
+#define CTRL_U	(0x3DU)
+#define CTRL_V	(0x32U)
+#define CTRL_W	(0x26U)
+#define CTRL_X	(0x18U)
+#define CTRL_Y	(0x19U)
+#define CTRL_Z	(0x3FU)
+#define CTRL_BO	(0x27U)
+#define CTRL_BK	(0x1CU)
+#define CTRL_BC	(0x1DU)
+#define CTRL_CA	(0x1EU)
+#define CTRL_US	(0x1FU)
+#define CTRL_QM	(0x07U)
+#else
+#define CTRL_AT	(0x00U)
+#define CTRL_A	(0x01U)
+#define CTRL_B	(0x02U)
+#define CTRL_C	(0x03U)
+#define CTRL_D	(0x04U)
+#define CTRL_E	(0x05U)
+#define CTRL_F	(0x06U)
+#define CTRL_G	(0x07U)
+#define CTRL_H	(0x08U)
+#define CTRL_I	(0x09U)
+#define CTRL_J	(0x0AU)
+#define CTRL_K	(0x0BU)
+#define CTRL_L	(0x0CU)
+#define CTRL_M	(0x0DU)
+#define CTRL_N	(0x0EU)
+#define CTRL_O	(0x0FU)
+#define CTRL_P	(0x10U)
+#define CTRL_Q	(0x11U)
+#define CTRL_R	(0x12U)
+#define CTRL_S	(0x13U)
+#define CTRL_T	(0x14U)
+#define CTRL_U	(0x15U)
+#define CTRL_V	(0x16U)
+#define CTRL_W	(0x17U)
+#define CTRL_X	(0x18U)
+#define CTRL_Y	(0x19U)
+#define CTRL_Z	(0x1AU)
+#define CTRL_BO	(0x1BU)
+#define CTRL_BK	(0x1CU)
+#define CTRL_BC	(0x1DU)
+#define CTRL_CA	(0x1EU)
+#define CTRL_US	(0x1FU)
+#define CTRL_QM	(0x7FU)
+#endif

-#define IDENT		64
+#define IDENT	64

 EXTERN Source *source;		/* yyparse/yylex source */
 EXTERN YYSTYPE yylval;		/* result from yylex */
@ -2273,8 +2527,6 @@ void DF(const char *, ...)
    MKSH_A_FORMAT(__printf__, 1, 2);
 #endif
 /* misc.c */
-void setctypes(const char *, int);
-void initctypes(void);
 size_t option(const char *) MKSH_A_PURE;
 char *getoptions(void);
 void change_flag(enum sh_flag, int, bool);
@ -2282,8 +2534,9 @@ void change_xtrace(unsigned char, bool);
 int parse_args(const char **, int, bool *);
 int getn(const char *, int *);
 int gmatchx(const char *, const char *, bool);
-int has_globbing(const char *, const char *) MKSH_A_PURE;
-int xstrcmp(const void *, const void *) MKSH_A_PURE;
+bool has_globbing(const char *) MKSH_A_PURE;
+int ascstrcmp(const void *, const void *) MKSH_A_PURE;
+int ascpstrcmp(const void *, const void *) MKSH_A_PURE;
 void ksh_getopt_reset(Getopt *, int);
 int ksh_getopt(const char **, Getopt *, const char *);
 void print_value_quoted(struct shf *, const char *);
@ -2346,6 +2599,7 @@ char *shf_smprintf(const char *, ...)
    MKSH_A_FORMAT(__printf__, 1, 2);
 ssize_t shf_vfprintf(struct shf *, const char *, va_list)
    MKSH_A_FORMAT(__printf__, 2, 0);
+void set_ifs(const char *);
 /* syn.c */
 void initkeywords(void);
 struct op *compile(Source *, bool, bool);
@ -2483,7 +2737,7 @@ extern int tty_init_fd(void);	/* initialise tty_fd, tty_devtty */
 #define mksh_abspath(s)			__extension__({			\
 	const char *mksh_abspath_s = (s);				\
 	(mksh_cdirsep(mksh_abspath_s[0]) ||				\
-	    (ksh_isalpha(mksh_abspath_s[0]) &&				\
+	    (ctype(mksh_abspath_s[0], C_ALPHA) &&			\
 	    mksh_abspath_s[1] == ':'));					\
 })
 #define mksh_cdirsep(c)			__extension__({			\
@ -2492,15 +2746,15 @@ extern int tty_init_fd(void);	/* initialise tty_fd, tty_devtty */
 })
 #define mksh_sdirsep(s)			__extension__({			\
 	const char *mksh_sdirsep_s = (s);				\
-	((char *)((ksh_isalphx(mksh_sdirsep_s[0]) &&			\
+	((char *)((ctype(mksh_sdirsep_s[0], C_ALPHA) &&			\
 	    mksh_sdirsep_s[1] == ':' &&					\
 	    !mksh_cdirsep(mksh_sdirsep_s[2])) ?				\
 	    (mksh_sdirsep_s + 1) : strpbrk(mksh_sdirsep_s, "/\\")));	\
 })
 #define mksh_vdirsep(s)			(mksh_sdirsep((s)) != NULL)
 #else
-#define mksh_abspath(s)			((s)[0] == '/')
-#define mksh_cdirsep(c)			((c) == '/')
+#define mksh_abspath(s)			(ord((s)[0]) == ord('/'))
+#define mksh_cdirsep(c)			(ord(c) == ord('/'))
 #define mksh_sdirsep(s)			strchr((s), '/')
 #define mksh_vdirsep(s)			vstrchr((s), '/')
 #endif
--- a/src/shf.c
+++ b/src/shf.c
@ -4,6 +4,8 @@
 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011,
 *		 2012, 2013, 2015, 2016, 2017
 *	mirabilos <m@mirbsd.org>
+ * Copyright (c) 2015
+ *	Daniel Richard G. <skunk@iSKUNK.ORG>
 *
 * Provided that these terms and disclaimer and all copyright notices
 * are retained or reproduced in an accompanying document, permission
@ -25,7 +27,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.79 2017/04/12 17:08:49 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/shf.c,v 1.95 2017/05/05 22:45:58 tg Exp $");

 /* flags to shf_emptybuf() */
 #define EB_READSW	0x01	/* about to switch to reading */
@ -874,11 +876,11 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
 				flags |= FL_SIZET;
 				continue;
 			}
-			if (ksh_isdigit(c)) {
+			if (ctype(c, C_DIGIT)) {
 				bool overflowed = false;

 				tmp = ksh_numdig(c);
-				while (c = *fmt++, ksh_isdigit(c))
+				while (ctype((c = *fmt++), C_DIGIT))
 					if (notok2mul(2147483647, tmp, 10))
 						overflowed = true;
 					else
@ -899,7 +901,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
 			/* nasty format */
 			break;

-		if (ksh_isupper(c)) {
+		if (ctype(c, C_UPPER)) {
 			flags |= FL_UPPER;
 			c = ksh_tolower(c);
 		}
@ -1029,8 +1031,7 @@ shf_vfprintf(struct shf *shf, const char *fmt, va_list args)
 			if (!(flags & FL_RIGHT)) {
 				/* skip past sign or 0x when padding with 0 */
 				if ((flags & FL_ZERO) && (flags & FL_NUMBER)) {
-					if (*s == '+' || *s == '-' ||
-					    *s == ' ') {
+					if (ctype(*s, C_SPC | C_PLUS | C_MINUS)) {
 						shf_putc(*s, shf);
 						s++;
 						precision--;
@ -1158,3 +1159,163 @@ cstrerror(int errnum)
 	}
 }
 #endif
+
+/* fast character classes */
+const uint32_t tpl_ctypes[128] = {
+	/* 0x00 */
+	CiNUL,		CiCNTRL,	CiCNTRL,	CiCNTRL,
+	CiCNTRL,	CiCNTRL,	CiCNTRL,	CiCNTRL,
+	CiCNTRL,	CiTAB,		CiNL,		CiSPX,
+	CiSPX,		CiCR,		CiCNTRL,	CiCNTRL,
+	/* 0x10 */
+	CiCNTRL,	CiCNTRL,	CiCNTRL,	CiCNTRL,
+	CiCNTRL,	CiCNTRL,	CiCNTRL,	CiCNTRL,
+	CiCNTRL,	CiCNTRL,	CiCNTRL,	CiCNTRL,
+	CiCNTRL,	CiCNTRL,	CiCNTRL,	CiCNTRL,
+	/* 0x20 */
+	CiSP,		CiALIAS | CiVAR1,	CiQC,	CiHASH,
+	CiSS,		CiPERCT,	CiQCL,		CiQC,
+	CiQCL,		CiQCL,		CiQCX | CiVAR1,	CiPLUS,
+	CiALIAS,	CiMINUS,	CiALIAS,	CiQCM,
+	/* 0x30 */
+	CiOCTAL,	CiOCTAL,	CiOCTAL,	CiOCTAL,
+	CiOCTAL,	CiOCTAL,	CiOCTAL,	CiOCTAL,
+	CiDIGIT,	CiDIGIT,	CiCOLON,	CiQCL,
+	CiANGLE,	CiEQUAL,	CiANGLE,	CiQUEST,
+	/* 0x40 */
+	CiALIAS | CiVAR1,	CiUPPER | CiHEXLT,
+	CiUPPER | CiHEXLT,	CiUPPER | CiHEXLT,
+	CiUPPER | CiHEXLT,	CiUPPER | CiHEXLT,
+	CiUPPER | CiHEXLT,	CiUPPER,
+	CiUPPER,	CiUPPER,	CiUPPER,	CiUPPER,
+	CiUPPER,	CiUPPER,	CiUPPER,	CiUPPER,
+	/* 0x50 */
+	CiUPPER,	CiUPPER,	CiUPPER,	CiUPPER,
+	CiUPPER,	CiUPPER,	CiUPPER,	CiUPPER,
+	CiUPPER,	CiUPPER,	CiUPPER,	CiQCX | CiBRACK,
+	CiQCX,		CiBRACK,	CiQCM,		CiUNDER,
+	/* 0x60 */
+	CiGRAVE,		CiLOWER | CiHEXLT,
+	CiLOWER | CiHEXLT,	CiLOWER | CiHEXLT,
+	CiLOWER | CiHEXLT,	CiLOWER | CiHEXLT,
+	CiLOWER | CiHEXLT,	CiLOWER,
+	CiLOWER,	CiLOWER,	CiLOWER,	CiLOWER,
+	CiLOWER,	CiLOWER,	CiLOWER,	CiLOWER,
+	/* 0x70 */
+	CiLOWER,	CiLOWER,	CiLOWER,	CiLOWER,
+	CiLOWER,	CiLOWER,	CiLOWER,	CiLOWER,
+	CiLOWER,	CiLOWER,	CiLOWER,	CiCURLY,
+	CiQCL,		CiCURLY,	CiQCM,		CiCNTRL
+};
+
+void
+set_ifs(const char *s)
+{
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+	int i = 256;
+
+	memset(ksh_ctypes, 0, sizeof(ksh_ctypes));
+	while (i--)
+		if (ebcdic_map[i] < 0x80U)
+			ksh_ctypes[i] = tpl_ctypes[ebcdic_map[i]];
+#else
+	memcpy(ksh_ctypes, tpl_ctypes, sizeof(tpl_ctypes));
+	memset((char *)ksh_ctypes + sizeof(tpl_ctypes), '\0',
+	    sizeof(ksh_ctypes) - sizeof(tpl_ctypes));
+#endif
+	ifs0 = *s;
+	while (*s)
+		ksh_ctypes[ord(*s++)] |= CiIFS;
+}
+
+#if defined(MKSH_EBCDIC) || defined(MKSH_FAUX_EBCDIC)
+#include <locale.h>
+
+/*
+ * Many headaches with EBCDIC:
+ * 1. There are numerous EBCDIC variants, and it is not feasible for us
+ *    to support them all. But we can support the EBCDIC code pages that
+ *    contain all (most?) of the characters in ASCII, and these
+ *    usually tend to agree on the code points assigned to the ASCII
+ *    subset. If you need a representative example, look at EBCDIC 1047,
+ *    which is first among equals in the IBM MVS development
+ *    environment: https://en.wikipedia.org/wiki/EBCDIC_1047
+ *    Unfortunately, the square brackets are not consistently mapped,
+ *    and for certain reasons, we need an unambiguous bijective
+ *    mapping between EBCDIC and "extended ASCII".
+ * 2. Character ranges that are contiguous in ASCII, like the letters
+ *    in [A-Z], are broken up into segments (i.e. [A-IJ-RS-Z]), so we
+ *    can't implement e.g. islower() as { return c >= 'a' && c <= 'z'; }
+ *    because it will also return true for a handful of extraneous
+ *    characters (like the plus-minus sign at 0x8F in EBCDIC 1047, a
+ *    little after 'i'). But at least '_' is not one of these.
+ * 3. The normal [0-9A-Za-z] characters are at codepoints beyond 0x80.
+ *    Not only do they require all 8 bits instead of 7, if chars are
+ *    signed, they will have negative integer values! Something like
+ *    (c - 'A') could actually become (c + 63)! Use the ord() macro to
+ *    ensure you're getting a value in [0, 255].
+ * 4. '\n' is actually NL (0x15, U+0085) instead of LF (0x25, U+000A).
+ *    EBCDIC has a proper newline character instead of "emulating" one
+ *    with line feeds, although this is mapped to LF for our purposes.
+ * 5. Note that it is possible to compile programs in ASCII mode on IBM
+ *    mainframe systems, using the -qascii option to the XL C compiler.
+ *    We can determine the build mode by looking at __CHARSET_LIB:
+ *    0 == EBCDIC, 1 == ASCII
+ */
+
+void
+ebcdic_init(void)
+{
+	int i = 256;
+	unsigned char t;
+	bool mapcache[256];
+
+	while (i--)
+		ebcdic_rtt_toascii[i] = i;
+	memset(ebcdic_rtt_fromascii, 0xFF, sizeof(ebcdic_rtt_fromascii));
+	setlocale(LC_ALL, "");
+#ifdef MKSH_EBCDIC
+	if (__etoa_l(ebcdic_rtt_toascii, 256) != 256) {
+		write(2, "mksh: could not map EBCDIC to ASCII\n", 36);
+		exit(255);
+	}
+#endif
+
+	memset(mapcache, 0, sizeof(mapcache));
+	i = 256;
+	while (i--) {
+		t = ebcdic_rtt_toascii[i];
+		/* ensure unique round-trip capable mapping */
+		if (mapcache[t]) {
+			write(2, "mksh: duplicate EBCDIC to ASCII mapping\n", 40);
+			exit(255);
+		}
+		/*
+		 * since there are 256 input octets, this also ensures
+		 * the other mapping direction is completely filled
+		 */
+		mapcache[t] = true;
+		/* fill the complete round-trip map */
+		ebcdic_rtt_fromascii[t] = i;
+		/*
+		 * Only use the converted value if it's in the range
+		 * [0x00; 0x7F], which I checked; the "extended ASCII"
+		 * characters can be any encoding, not just Latin1,
+		 * and the C1 control characters other than NEL are
+		 * hopeless, but we map EBCDIC NEL to ASCII LF so we
+		 * cannot even use C1 NEL.
+		 * If ever we map to Unicode, bump the table width to
+		 * an unsigned int, and or the raw unconverted EBCDIC
+		 * values with 0x01000000 instead.
+		 */
+		if (t < 0x80U)
+			ebcdic_map[i] = (unsigned short)ord(t);
+		else
+			ebcdic_map[i] = (unsigned short)(0x100U | ord(i));
+	}
+	if (ebcdic_rtt_toascii[0] || ebcdic_rtt_fromascii[0] || ebcdic_map[0]) {
+		write(2, "mksh: NUL not at position 0\n", 28);
+		exit(255);
+	}
+}
+#endif
--- a/src/signames.inc
+++ b/src/signames.inc
@ -1,31 +0,0 @@
-		{ "ABRT", 6 },
-		{ "FPE", 8 },
-		{ "ILL", 4 },
-		{ "INT", 2 },
-		{ "SEGV", 11 },
-		{ "TERM", 15 },
-		{ "ALRM", 14 },
-		{ "BUS", 7 },
-		{ "CHLD", 17 },
-		{ "CONT", 18 },
-		{ "HUP", 1 },
-		{ "KILL", 9 },
-		{ "PIPE", 13 },
-		{ "QUIT", 3 },
-		{ "STOP", 19 },
-		{ "TSTP", 20 },
-		{ "TTIN", 21 },
-		{ "TTOU", 22 },
-		{ "USR1", 10 },
-		{ "USR2", 12 },
-		{ "POLL", 29 },
-		{ "PROF", 27 },
-		{ "SYS", 31 },
-		{ "TRAP", 5 },
-		{ "URG", 23 },
-		{ "VTALRM", 26 },
-		{ "XCPU", 24 },
-		{ "XFSZ", 25 },
-		{ "WINCH", 28 },
-		{ "PWR", 30 },
-		{ "STKFLT", 16 },
--- a/src/syn.c
+++ b/src/syn.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.120 2017/04/06 01:59:57 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/syn.c,v 1.124 2017/05/05 22:53:31 tg Exp $");

 struct nesting_state {
 	int start_token;	/* token than began nesting (eg, FOR) */
@ -91,7 +91,7 @@ yyparse(bool doalias)
 	c = tpeek(0);
 	if (c == 0 && !outtree)
 		outtree = newtp(TEOF);
-	else if (c != '\n' && c != 0)
+	else if (!ctype(c, C_LF | C_NUL))
 		syntaxerr(NULL);
 }

@ -330,7 +330,7 @@ get_command(int cf, int sALIAS)
 					XPput(args, yylval.cp);
 				break;

-			case '(' /*)*/:
+			case ord('(' /*)*/):
 				if (XPsize(args) == 0 && XPsize(vars) == 1 &&
 				    is_wdvarassign(yylval.cp)) {
 					char *tcp;
@ -373,7 +373,7 @@ get_command(int cf, int sALIAS)
 					    XPsize(vars) != 0)
 						syntaxerr(NULL);
 					ACCEPT;
-					musthave(/*(*/')', 0);
+					musthave(/*(*/ ')', 0);
 					t = function_body(XPptrv(args)[0],
 					    sALIAS, false);
 				}
@ -386,18 +386,18 @@ get_command(int cf, int sALIAS)
 Leave:
 		break;

-	case '(': /*)*/ {
+	case ord('(' /*)*/): {
 		int subshell_nesting_type_saved;
 Subshell:
 		subshell_nesting_type_saved = subshell_nesting_type;
-		subshell_nesting_type = ')';
-		t = nested(TPAREN, '(', ')', sALIAS);
+		subshell_nesting_type = ord(')');
+		t = nested(TPAREN, ord('('), ord(')'), sALIAS);
 		subshell_nesting_type = subshell_nesting_type_saved;
 		break;
 	    }

-	case '{': /*}*/
-		t = nested(TBRACE, '{', '}', sALIAS);
+	case ord('{' /*}*/):
+		t = nested(TBRACE, ord('{'), ord('}'), sALIAS);
 		break;

 	case MDPAREN:
@ -407,8 +407,8 @@ get_command(int cf, int sALIAS)
 		switch (token(LETEXPR)) {
 		case LWORD:
 			break;
-		case '(': /*)*/
-			c = '(';
+		case ord('(' /*)*/):
+			c = ord('(');
 			goto Subshell;
 		default:
 			syntaxerr(NULL);
@ -554,8 +554,8 @@ dogroup(int sALIAS)
 	 */
 	if (c == DO)
 		c = DONE;
-	else if (c == '{')
-		c = '}';
+	else if (c == ord('{'))
+		c = ord('}');
 	else
 		syntaxerr(NULL);
 	list = c_list(sALIAS, true);
@ -610,8 +610,8 @@ caselist(int sALIAS)
 	/* A {...} can be used instead of in...esac for case statements */
 	if (c == IN)
 		c = ESAC;
-	else if (c == '{')
-		c = '}';
+	else if (c == ord('{'))
+		c = ord('}');
 	else
 		syntaxerr(NULL);
 	t = tl = NULL;
@ -636,17 +636,18 @@ casepart(int endtok, int sALIAS)
 	XPinit(ptns, 16);
 	t = newtp(TPAT);
 	/* no ALIAS here */
-	if (token(CONTIN | KEYWORD) != '(')
+	if (token(CONTIN | KEYWORD) != ord('('))
 		REJECT;
 	do {
 		switch (token(0)) {
 		case LWORD:
 			break;
-		case '}':
+		case ord('}'):
 		case ESAC:
 			if (symbol != endtok) {
 				strdupx(yylval.cp,
-				    symbol == '}' ? Tcbrace : Tesac, ATEMP);
+				    symbol == ord('}') ? Tcbrace : Tesac,
+				    ATEMP);
 				break;
 			}
 			/* FALLTHROUGH */
@ -658,23 +659,23 @@ casepart(int endtok, int sALIAS)
 	REJECT;
 	XPput(ptns, NULL);
 	t->vars = (char **)XPclose(ptns);
-	musthave(')', 0);
+	musthave(ord(')'), 0);

 	t->left = c_list(sALIAS, true);

 	/* initialise to default for ;; or omitted */
-	t->u.charflag = ';';
+	t->u.charflag = ord(';');
 	/* SUSv4 requires the ;; except in the last casepart */
 	if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
 		switch (symbol) {
 		default:
 			syntaxerr(NULL);
 		case BRKEV:
-			t->u.charflag = '|';
+			t->u.charflag = ord('|');
 			if (0)
 				/* FALLTHROUGH */
 		case BRKFT:
-			t->u.charflag = '&';
+			  t->u.charflag = ord('&');
 			/* FALLTHROUGH */
 		case BREAK:
 			/* initialised above, but we need to eat the token */
@ -697,10 +698,10 @@ function_body(char *name, int sALIAS,
 	 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
 	 * have allowed more; the following were never allowed:
 	 *	NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
-	 * C_QUOTE covers all but adds # * ? [ ]
+	 * C_QUOTE|C_SPC covers all but adds # * ? [ ]
 	 */
 	for (p = sname; *p; p++)
-		if (ctype(*p, C_QUOTE))
+		if (ctype(*p, C_QUOTE | C_SPC))
 			yyerror(Tinvname, sname, Tfunction);

 	/*
@ -710,14 +711,14 @@ function_body(char *name, int sALIAS,
 	 * only accepts an open-brace.
 	 */
 	if (ksh_func) {
-		if (tpeek(CONTIN|KEYWORD|sALIAS) == '(' /*)*/) {
+		if (tpeek(CONTIN|KEYWORD|sALIAS) == ord('(' /*)*/)) {
 			/* function foo () { //}*/
 			ACCEPT;
-			musthave(')', 0);
+			musthave(ord(/*(*/ ')'), 0);
 			/* degrade to POSIX function */
 			ksh_func = false;
 		}
-		musthave('{' /*}*/, CONTIN|KEYWORD|sALIAS);
+		musthave(ord('{' /*}*/), CONTIN|KEYWORD|sALIAS);
 		REJECT;
 	}

@ -809,8 +810,8 @@ static const struct tokeninfo {
 	{ "in",		IN,	true },
 	{ Tfunction,	FUNCTION, true },
 	{ Ttime,	TIME,	true },
-	{ "{",		'{',	true },
-	{ Tcbrace,	'}',	true },
+	{ "{",		ord('{'), true },
+	{ Tcbrace,	ord('}'), true },
 	{ "!",		BANG,	true },
 	{ "[[",		DBRACKET, true },
 	/* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
@ -822,7 +823,7 @@ static const struct tokeninfo {
 	{ "((",		MDPAREN, false },
 	{ "|&",		COPROC,	false },
 	/* and some special cases... */
-	{ "newline",	'\n',	false },
+	{ "newline",	ord('\n'), false },
 	{ NULL,		0,	false }
 };

@ -997,9 +998,9 @@ dbtestp_isa(Test_env *te, Test_meta meta)
 		ret = (uqword && !strcmp(yylval.cp,
 		    dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
 	else if (meta == TM_OPAREN)
-		ret = c == '(' /*)*/ ? TO_NONNULL : TO_NONOP;
+		ret = c == ord('(') /*)*/ ? TO_NONNULL : TO_NONOP;
 	else if (meta == TM_CPAREN)
-		ret = c == /*(*/ ')' ? TO_NONNULL : TO_NONOP;
+		ret = c == /*(*/ ord(')') ? TO_NONNULL : TO_NONOP;
 	else if (meta == TM_UNOP || meta == TM_BINOP) {
 		if (meta == TM_BINOP && c == REDIR &&
 		    (yylval.iop->ioflag == IOREAD ||
@ -1079,7 +1080,7 @@ parse_usec(const char *s, struct timeval *tv)

 	tv->tv_sec = 0;
 	/* parse integral part */
-	while (ksh_isdigit(*s)) {
+	while (ctype(*s, C_DIGIT)) {
 		tt.tv_sec = tv->tv_sec * 10 + ksh_numdig(*s++);
 		/*XXX this overflow check maybe UB */
 		if (tt.tv_sec / 10 != tv->tv_sec) {
@ -1101,14 +1102,14 @@ parse_usec(const char *s, struct timeval *tv)

 	/* parse decimal fraction */
 	i = 100000;
-	while (ksh_isdigit(*s)) {
+	while (ctype(*s, C_DIGIT)) {
 		tv->tv_usec += i * ksh_numdig(*s++);
 		if (i == 1)
 			break;
 		i /= 10;
 	}
 	/* check for junk after fractional part */
-	while (ksh_isdigit(*s))
+	while (ctype(*s, C_DIGIT))
 		++s;
 	if (*s) {
 		errno = EINVAL;
@ -1133,11 +1134,11 @@ yyrecursive(int subtype)
 	int stok, etok;

 	if (subtype != COMSUB) {
-		stok = '{';
-		etok = '}';
+		stok = ord('{');
+		etok = ord('}');
 	} else {
-		stok = '(';
-		etok = ')';
+		stok = ord('(');
+		etok = ord(')');
 	}

 	ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
--- a/src/tree.c
+++ b/src/tree.c
@ -23,7 +23,7 @@

 #include "sh.h"

-__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.89 2017/04/12 16:46:23 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/tree.c,v 1.93 2017/05/05 22:53:32 tg Exp $");

 #define INDENT	8

@ -329,34 +329,34 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
 		case EOS:
 			return (--wp);
 		case ADELIM:
-			if (*wp == /*{*/'}') {
+			if (ord(*wp) == ord(/*{*/ '}')) {
 				++wp;
 				goto wdvarput_csubst;
 			}
 			/* FALLTHROUGH */
 		case CHAR:
-			c = *wp++;
+			c = ord(*wp++);
 			shf_putc(c, shf);
 			break;
 		case QCHAR:
-			c = *wp++;
+			c = ord(*wp++);
 			if (opmode & WDS_TPUTS)
 				switch (c) {
-				case '\n':
+				case ord('\n'):
 					if (quotelevel == 0) {
-						c = '\'';
+						c = ord('\'');
 						shf_putc(c, shf);
-						shf_putc('\n', shf);
+						shf_putc(ord('\n'), shf);
 					}
 					break;
 				default:
 					if (quotelevel == 0)
 						/* FALLTHROUGH */
-				case '"':
-				case '`':
-				case '$':
-				case '\\':
-					  shf_putc('\\', shf);
+				case ord('"'):
+				case ord('`'):
+				case ord('$'):
+				case ord('\\'):
+					  shf_putc(ord('\\'), shf);
 					break;
 				}
 			shf_putc(c, shf);
@ -365,7 +365,7 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
 		case COMSUB:
 			shf_puts("$(", shf);
 			cs = ")";
-			if (*wp == '(' /*)*/)
+			if (ord(*wp) == ord('(' /*)*/))
 				shf_putc(' ', shf);
 pSUB:
 			while ((c = *wp++) != 0)
@ -374,11 +374,11 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
 			break;
 		case FUNASUB:
 		case FUNSUB:
-			c = ' ';
+			c = ord(' ');
 			if (0)
 				/* FALLTHROUGH */
 		case VALSUB:
-			  c = '|';
+			  c = ord('|');
 			shf_putc('$', shf);
 			shf_putc('{', shf);
 			shf_putc(c, shf);
@ -403,14 +403,14 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
 			break;
 		case OSUBST:
 			shf_putc('$', shf);
-			if (*wp++ == '{')
+			if (ord(*wp++) == ord('{'))
 				shf_putc('{', shf);
 			while ((c = *wp++) != 0)
 				shf_putc(c, shf);
 			wp = wdvarput(shf, wp, 0, opmode);
 			break;
 		case CSUBST:
-			if (*wp++ == '}') {
+			if (ord(*wp++) == ord('}')) {
 wdvarput_csubst:
 				shf_putc('}', shf);
 			}
@ -420,11 +420,11 @@ wdvarput(struct shf *shf, const char *wp, int quotelevel, int opmode)
 			shf_putc('(', shf);
 			break;
 		case SPAT:
-			c = '|';
+			c = ord('|');
 			if (0)
 				/* FALLTHROUGH */
 		case CPAT:
-			  c = /*(*/ ')';
+			  c = ord(/*(*/ ')');
 			shf_putc(c, shf);
 			break;
 		}
@ -467,39 +467,39 @@ vfptreef(struct shf *shf, int indent, const char *fmt, va_list va)
 {
 	int c;

-	while ((c = *fmt++)) {
+	while ((c = ord(*fmt++))) {
 		if (c == '%') {
-			switch ((c = *fmt++)) {
-			case 'c':
+			switch ((c = ord(*fmt++))) {
+			case ord('c'):
 				/* character (octet, probably) */
 				shf_putchar(va_arg(va, int), shf);
 				break;
-			case 's':
+			case ord('s'):
 				/* string */
 				shf_puts(va_arg(va, char *), shf);
 				break;
-			case 'S':
+			case ord('S'):
 				/* word */
 				wdvarput(shf, va_arg(va, char *), 0, WDS_TPUTS);
 				break;
-			case 'd':
+			case ord('d'):
 				/* signed decimal */
 				shf_fprintf(shf, Tf_d, va_arg(va, int));
 				break;
-			case 'u':
+			case ord('u'):
 				/* unsigned decimal */
 				shf_fprintf(shf, "%u", va_arg(va, unsigned int));
 				break;
-			case 'T':
+			case ord('T'):
 				/* format tree */
 				ptree(va_arg(va, struct op *), indent, shf);
 				goto dont_trash_prevent_semicolon;
-			case ';':
+			case ord(';'):
 				/* newline or ; */
-			case 'N':
+			case ord('N'):
 				/* newline or space */
 				if (shf->flags & SHF_STRING) {
-					if (c == ';' && !prevent_semicolon)
+					if (c == ord(';') && !prevent_semicolon)
 						shf_putc(';', shf);
 					shf_putc(' ', shf);
 				} else {
@ -515,7 +515,7 @@ vfptreef(struct shf *shf, int indent, const char *fmt, va_list va)
 						shf_putc(' ', shf);
 				}
 				break;
-			case 'R':
+			case ord('R'):
 				/* I/O redirection */
 				pioact(shf, va_arg(va, struct ioword *));
 				break;
@ -613,7 +613,7 @@ wdscan(const char *wp, int c)
 		case ADELIM:
 			if (c == ADELIM && nest == 0)
 				return (wp + 1);
-			if (*wp == /*{*/'}')
+			if (ord(*wp) == ord(/*{*/ '}'))
 				goto wdscan_csubst;
 			/* FALLTHROUGH */
 		case CHAR:
@ -795,20 +795,20 @@ vistree(char *dst, size_t sz, struct op *t)
 			*dst++ = *cp++;
 		goto vist_loop;
 	}
-	if (--sz == 0 || (c = (unsigned char)(*cp++)) == 0)
+	if (--sz == 0 || (c = ord(*cp++)) == 0)
 		/* NUL or not enough free space */
 		goto vist_out;
-	if (ISCTRL(c & 0x7F)) {
+	if (ksh_isctrl(c)) {
 		/* C0 or C1 control character or DEL */
 		if (--sz == 0)
 			/* not enough free space for two chars */
 			goto vist_out;
-		*dst++ = (c & 0x80) ? '$' : '^';
-		c = UNCTRL(c & 0x7F);
-	} else if (UTFMODE && c > 0x7F) {
+		*dst++ = '^';
+		c = ksh_unctrl(c);
+	} else if (UTFMODE && rtt2asc(c) > 0x7F) {
 		/* better not try to display broken multibyte chars */
 		/* also go easy on the Unicode: no U+FFFD here */
-		c = '?';
+		c = ord('?');
 	}
 	*dst++ = c;
 	goto vist_loop;
@ -822,10 +822,10 @@ vistree(char *dst, size_t sz, struct op *t)
 void
 dumpchar(struct shf *shf, int c)
 {
-	if (ISCTRL(c & 0x7F)) {
+	if (ksh_isctrl(c)) {
 		/* C0 or C1 control character or DEL */
-		shf_putc((c & 0x80) ? '$' : '^', shf);
-		c = UNCTRL(c & 0x7F);
+		shf_putc('^', shf);
+		c = ksh_unctrl(c);
 	}
 	shf_putc(c, shf);
 }
@ -842,7 +842,7 @@ dumpwdvar_i(struct shf *shf, const char *wp, int quotelevel)
 			shf_puts("EOS", shf);
 			return (--wp);
 		case ADELIM:
-			if (*wp == /*{*/'}') {
+			if (ord(*wp) == ord(/*{*/ '}')) {
 				shf_puts(/*{*/ "]ADELIM(})", shf);
 				return (wp + 1);
 			}
@ -855,9 +855,9 @@ dumpwdvar_i(struct shf *shf, const char *wp, int quotelevel)
 			break;
 		case QCHAR:
 			shf_puts("QCHAR<", shf);
-			c = *wp++;
-			if (quotelevel == 0 ||
-			    (c == '"' || c == '`' || c == '$' || c == '\\'))
+			c = ord(*wp++);
+			if (quotelevel == 0 || c == ord('"') ||
+			    c == ord('\\') || ctype(c, C_DOLAR | C_GRAVE))
 				shf_putc('\\', shf);
 			dumpchar(shf, c);
 			goto closeandout;
--- a/src/var.c
+++ b/src/var.c
@ -28,7 +28,7 @@
 #include <sys/sysctl.h>
 #endif

-__RCSID("$MirOS: src/bin/mksh/var.c,v 1.214 2017/04/02 16:47:43 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/var.c,v 1.220 2017/07/26 23:02:28 tg Exp $");

 /*-
 * Variables
@ -183,7 +183,7 @@ array_index_calc(const char *n, bool *arrayp, uint32_t *valp)
 	*arrayp = false;
 redo_from_ref:
 	p = skip_varname(n, false);
-	if (innermost_refflag == SRF_NOP && (p != n) && ksh_isalphx(n[0])) {
+	if (innermost_refflag == SRF_NOP && (p != n) && ctype(n[0], C_ALPHX)) {
 		struct tbl *vp;
 		char *vn;

@ -204,7 +204,7 @@ array_index_calc(const char *n, bool *arrayp, uint32_t *valp)
 	}
 	innermost_refflag = SRF_NOP;

-	if (p != n && *p == '[' && (len = array_ref_len(p))) {
+	if (p != n && ord(*p) == ord('[') && (len = array_ref_len(p))) {
 		char *sub, *tmp;
 		mksh_ari_t rval;

@ -249,14 +249,14 @@ isglobal(const char *n, bool docreate)
 	vn = array_index_calc(n, &array, &val);
 	h = hash(vn);
 	c = (unsigned char)vn[0];
-	if (!ksh_isalphx(c)) {
+	if (!ctype(c, C_ALPHX)) {
 		if (array)
 			errorf(Tbadsubst);
 		vp = vtemp;
 		vp->flag = DEFINED;
 		vp->type = 0;
 		vp->areap = ATEMP;
-		if (ksh_isdigit(c)) {
+		if (ctype(c, C_DIGIT)) {
 			if (getn(vn, &c)) {
 				/* main.c:main_init() says 12 */
 				shf_snprintf(vp->name, 12, Tf_d, c);
@ -339,7 +339,7 @@ local(const char *n, bool copy)
 	 */
 	vn = array_index_calc(n, &array, &val);
 	h = hash(vn);
-	if (!ksh_isalphx(*vn)) {
+	if (!ctype(*vn, C_ALPHX)) {
 		vp = vtemp;
 		vp->flag = DEFINED|RDONLY;
 		vp->type = 0;
@ -414,9 +414,11 @@ str_val(struct tbl *vp)

 			*(s = strbuf) = '1';
 			s[1] = '#';
-			if (!UTFMODE || ((n & 0xFF80) == 0xEF80))
+			if (!UTFMODE)
+				s[2] = (unsigned char)n;
+			else if ((n & 0xFF80) == 0xEF80)
 				/* OPTU-16 -> raw octet */
-				s[2] = n & 0xFF;
+				s[2] = asc2rtt(n & 0xFF);
 			else
 				sz = utf_wctomb(s + 2, n);
 			s[2 + sz] = '\0';
@ -464,7 +466,7 @@ setstr(struct tbl *vq, const char *s, int error_ok)
 #ifndef MKSH_SMALL
 			/* debugging */
 			if (s >= vq->val.s &&
-			    s <= vq->val.s + strlen(vq->val.s)) {
+			    s <= strnul(vq->val.s)) {
 				internal_errorf(
 				    "setstr: %s=%s: assigning to self",
 				    vq->name, s);
@ -532,7 +534,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)

 	do {
 		c = (unsigned char)*s++;
-	} while (ksh_isspace(c));
+	} while (ctype(c, C_SPACE));

 	switch (c) {
 	case '-':
@ -549,7 +551,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
 			base = 16;
 			++s;
 			goto getint_c_style_base;
-		} else if (Flag(FPOSIX) && ksh_isdigit(s[0]) &&
+		} else if (Flag(FPOSIX) && ctype(s[0], C_DIGIT) &&
 		    !(vp->flag & ZEROFIL)) {
 			/* interpret as octal (deprecated) */
 			base = 8;
@ -577,7 +579,7 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
 					 * the same as 1#\x80 does, thus is
 					 * not round-tripping correctly XXX)
 					 */
-					wc = 0xEF00 + *(const unsigned char *)s;
+					wc = 0xEF00 + rtt2asc(*s);
 				nump->u = (mksh_uari_t)wc;
 				return (1);
 			} else if (base > 36)
@ -586,11 +588,11 @@ getint(struct tbl *vp, mksh_ari_u *nump, bool arith)
 			have_base = true;
 			continue;
 		}
-		if (ksh_isdigit(c))
+		if (ctype(c, C_DIGIT))
 			c = ksh_numdig(c);
-		else if (ksh_isupper(c))
+		else if (ctype(c, C_UPPER))
 			c = ksh_numuc(c) + 10;
-		else if (ksh_islower(c))
+		else if (ctype(c, C_LOWER))
 			c = ksh_numlc(c) + 10;
 		else
 			return (-1);
@ -670,7 +672,7 @@ formatstr(struct tbl *vp, const char *s)
 			qq = utf_skipcols(s, slen, &slen);

 			/* strip trailing spaces (AT&T uses qq[-1] == ' ') */
-			while (qq > s && ksh_isspace(qq[-1])) {
+			while (qq > s && ctype(qq[-1], C_SPACE)) {
 				--qq;
 				--slen;
 			}
@ -700,7 +702,7 @@ formatstr(struct tbl *vp, const char *s)
 			    "%.*s", slen, s);
 		} else {
 			/* strip leading spaces/zeros */
-			while (ksh_isspace(*s))
+			while (ctype(*s, C_SPACE))
 				s++;
 			if (vp->flag & ZEROFIL)
 				while (*s == '0')
@ -778,7 +780,7 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
 		/* no variable name given */
 		return (NULL);
 	}
-	if (*val == '[') {
+	if (ord(*val) == ord('[')) {
 		if (new_refflag != SRF_NOP)
 			errorf(Tf_sD_s, var,
 			    "reference variable can't be an array");
@ -796,18 +798,18 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
 			size_t i;

 			for (i = 1; i < len - 1; i++)
-				if (!ksh_isdigit(val[i]))
+				if (!ctype(val[i], C_DIGIT))
 					return (NULL);
 		}
 		val += len;
 	}
-	if (val[0] == '=') {
+	if (ord(val[0]) == ord('=')) {
 		strndupx(tvar, var, val - var, ATEMP);
 		++val;
 	} else if (set & IMPORT) {
 		/* environment invalid variable name or no assignment */
 		return (NULL);
-	} else if (val[0] == '+' && val[1] == '=') {
+	} else if (ord(val[0]) == ord('+') && ord(val[1]) == ord('=')) {
 		strndupx(tvar, var, val - var, ATEMP);
 		val += 2;
 		vappend = true;
@ -820,8 +822,9 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
 		val = NULL;
 		/* handle foo[*] => foo (whole array) mapping for R39b */
 		len = strlen(tvar);
-		if (len > 3 && tvar[len - 3] == '[' && tvar[len - 2] == '*' &&
-		    tvar[len - 1] == ']')
+		if (len > 3 && ord(tvar[len - 3]) == ord('[') &&
+		    ord(tvar[len - 2]) == ord('*') &&
+		    ord(tvar[len - 1]) == ord(']'))
 			tvar[len - 3] = '\0';
 	}

@ -845,7 +848,7 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)

 			if (!(c = (unsigned char)qval[0]))
 				goto nameref_empty;
-			else if (ksh_isdigit(c) && getn(qval, &c))
+			else if (ctype(c, C_DIGIT) && getn(qval, &c))
 				goto nameref_rhs_checked;
 			else if (qval[1] == '\0') switch (c) {
 			case '$':
@ -858,7 +861,7 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
 nameref_empty:
 			errorf(Tf_sD_s, var, "empty nameref target");
 		}
-		len = (*ccp == '[') ? array_ref_len(ccp) : 0;
+		len = (ord(*ccp) == ord('[')) ? array_ref_len(ccp) : 0;
 		if (ccp[len]) {
 			/*
 			 * works for cases "no array", "valid array with
@ -914,12 +917,12 @@ typeset(const char *var, uint32_t set, uint32_t clr, int field, int base)
 	vpbase = (vp->flag & ARRAY) ? global(arrayname(tvar)) : vp;

 	/*
-	 * only allow export flag to be set; AT&T ksh allows any
-	 * attribute to be changed which means it can be truncated or
-	 * modified (-L/-R/-Z/-i)
+	 * only allow export and readonly flag to be set; AT&T ksh
+	 * allows any attribute to be changed which means it can be
+	 * truncated or modified (-L/-R/-Z/-i)
 	 */
 	if ((vpbase->flag & RDONLY) &&
-	    (val || clr || (set & ~EXPORT)))
+	    (val || clr || (set & ~(EXPORT | RDONLY))))
 		/* XXX check calls - is error here ok by POSIX? */
 		errorfx(2, Tf_ro, tvar);
 	afree(tvar, ATEMP);
@ -1064,11 +1067,11 @@ skip_varname(const char *s, bool aok)
 {
 	size_t alen;

-	if (s && ksh_isalphx(*s)) {
+	if (s && ctype(*s, C_ALPHX)) {
 		do {
 			++s;
-		} while (ksh_isalnux(*s));
-		if (aok && *s == '[' && (alen = array_ref_len(s)))
+		} while (ctype(*s, C_ALNUX));
+		if (aok && ord(*s) == ord('[') && (alen = array_ref_len(s)))
 			s += alen;
 	}
 	return (s);
@ -1080,11 +1083,11 @@ skip_wdvarname(const char *s,
    /* skip array de-reference? */
    bool aok)
 {
-	if (s[0] == CHAR && ksh_isalphx(s[1])) {
+	if (s[0] == CHAR && ctype(s[1], C_ALPHX)) {
 		do {
 			s += 2;
-		} while (s[0] == CHAR && ksh_isalnux(s[1]));
-		if (aok && s[0] == CHAR && s[1] == '[') {
+		} while (s[0] == CHAR && ctype(s[1], C_ALNUX));
+		if (aok && s[0] == CHAR && ord(s[1]) == ord('[')) {
 			/* skip possible array de-reference */
 			const char *p = s;
 			char c;
@ -1095,9 +1098,9 @@ skip_wdvarname(const char *s,
 					break;
 				c = p[1];
 				p += 2;
-				if (c == '[')
+				if (ord(c) == ord('['))
 					depth++;
-				else if (c == ']' && --depth == 0) {
+				else if (ord(c) == ord(']') && --depth == 0) {
 					s = p;
 					break;
 				}
@ -1307,8 +1310,7 @@ setspec(struct tbl *vp)
 		return;
 #endif
 	case V_IFS:
-		setctypes(s = str_val(vp), C_IFS);
-		ifs0 = *s;
+		set_ifs(str_val(vp));
 		return;
 	case V_PATH:
 		afree(path, APERM);
@ -1436,8 +1438,7 @@ unsetspec(struct tbl *vp)
 		return;
 #endif
 	case V_IFS:
-		setctypes(TC_IFSWS, C_IFS);
-		ifs0 = ' ';
+		set_ifs(TC_IFSWS);
 		break;
 	case V_PATH:
 		afree(path, APERM);
@ -1527,8 +1528,8 @@ array_ref_len(const char *cp)
 	char c;
 	int depth = 0;

-	while ((c = *s++) && (c != ']' || --depth))
-		if (c == '[')
+	while ((c = *s++) && (ord(c) != ord(']') || --depth))
+		if (ord(c) == ord('['))
 			depth++;
 	if (!c)
 		return (0);
@ -1600,17 +1601,18 @@ set_array(const char *var, bool reset, const char **vals)
 	}
 	while ((ccp = vals[i])) {
 #if 0 /* temporarily taken out due to regression */
-		if (*ccp == '[') {
+		if (ord(*ccp) == ord('[')) {
 			int level = 0;

 			while (*ccp) {
-				if (*ccp == ']' && --level == 0)
+				if (ord(*ccp) == ord(']') && --level == 0)
 					break;
-				if (*ccp == '[')
+				if (ord(*ccp) == ord('['))
 					++level;
 				++ccp;
 			}
-			if (*ccp == ']' && level == 0 && ccp[1] == '=') {
+			if (ord(*ccp) == ord(']') && level == 0 &&
+			    ord(ccp[1]) == ord('=')) {
 				strndupx(cp, vals[i] + 1, ccp - (vals[i] + 1),
 				    ATEMP);
 				evaluate(substitute(cp, 0), (mksh_ari_t *)&j,