changeset 64972:d2c2a51cc7e5

Merge with Jul2017 branch.
author Sjoerd Mullender <sjoerd@acm.org>
date Fri, 20 Oct 2017 20:19:04 +0200 (2017-10-20)
parents 5633d013012c (current diff) e04a93ced123 (diff)
children 2e1bfee1461d
files MonetDB.spec configure.ag monetdb5/modules/atoms/str.c
diffstat 3 files changed, 2219 insertions(+), 738 deletions(-) [+]
line wrap: on
line diff
--- a/MonetDB.spec
+++ b/MonetDB.spec
@@ -970,7 +970,7 @@ fi
 	--enable-monetdb5=yes \
 	--enable-netcdf=no \
 	--enable-odbc=yes \
-	--enable-optimize=yes \
+	--enable-optimize=no \
 	--enable-profile=no \
 	--enable-pyintegration=%{?with_pyintegration:yes}%{!?with_pyintegration:no} \
 	--enable-rintegration=%{?with_rintegration:yes}%{!?with_rintegration:no} \
--- a/configure.ag
+++ b/configure.ag
@@ -326,19 +326,19 @@ AC_ARG_ENABLE([debug],
 	[AS_HELP_STRING([--enable-debug],
 		[enable full debugging (default=yes for development sources)])],
 	[enable_debug=$enableval],
-	[enable_debug=def_$dft_debug])
+	[enable_debug=$dft_debug])
 
 AC_ARG_ENABLE([assert],
 	[AS_HELP_STRING([--enable-assert],
 		[enable assertions in the code (default=yes for development sources)])],
 	[enable_assert=$enableval],
-	[enable_assert=def_$dft_assert])
+	[enable_assert=$dft_assert])
 
 AC_ARG_ENABLE([optimize],
 	[AS_HELP_STRING([--enable-optimize],
 		[enable extra optimization (default=no)])],
 	[enable_optimize=$enableval],
-	[enable_optimize=def_$dft_optimize])
+	[enable_optimize=$dft_optimize])
 
 AC_ARG_ENABLE([strict],
 	[AS_HELP_STRING([--enable-strict],
@@ -362,7 +362,7 @@ dft_profile=$need_profile
 AC_ARG_ENABLE([profile],
 	[AS_HELP_STRING([--enable-profile], [enable profiling (default=no)])],
 	[enable_profile=$enableval],
-	[enable_profile=def_$dft_profile])
+	[enable_profile=$dft_profile])
 
 need_instrument=no
 dft_instrument=$need_instrument
@@ -370,7 +370,7 @@ AC_ARG_ENABLE([instrument],
 	[AS_HELP_STRING([--enable-instrument],
 		[enable instrument (default=no)])],
 	[enable_instrument=$enableval],
-	[enable_instrument=def_$dft_instrument])
+	[enable_instrument=$dft_instrument])
 
 # RIPEMD160 is patent free, academic and European, but unfortunately
 # can't use it by default, as that would exclude JDBC usage (Java
--- a/monetdb5/modules/atoms/str.c
+++ b/monetdb5/modules/atoms/str.c
@@ -96,51 +96,10 @@
  * high-performance hash-lookup (all code inlined).
  */
 
-/* This table was generated from the Unicode 5.0.0 spec. The table is
- * generated by using the codes for conversion to lower case and for
- * conversion to title case and upper case. A few code points have
- * been moved in order to get reasonable conversions (if two code
- * points are converted to the same value, the first one in this table
- * wins).  The code points that have been interchanged are:
- * U+0345 (COMBINING GREEK YPOGEGRAMMENI) / U+03B9 (GREEK SMALL LETTER IOTA) <-> U+0399 (GREEK CAPITAL LETTER IOTA)
- * U+00B5 (MICRO SIGN) / U+03BC (GREEK SMALL LETTER MU) <-> U+039C (GREEK CAPITAL LETTER MU)
- * U+03C2 (GREEK SMALL LETTER FINAL SIGMA) / U+03C3 (GREEK SMALL LETTER SIGMA) <-> U+3A3 (GREEK CAPITAL LETTER SIGMA)
- *
- * In addition, there are a few code points where there are different
- * versions for upper case and title case.  These had to be switched
- * around a little so that the mappings are done sensibly.
- *
- * The following combinations are included in this order:
- * lower case <-> title case
- * lower case <-  upper case
- * upper case  -> title case
- * The conversion title case -> upper case was removed
- *
- * The relevant code points are:
- * U+01C4 (LATIN CAPITAL LETTER DZ WITH CARON)
- * U+01C5 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON)
- * U+01C6 (LATIN SMALL LETTER DZ WITH CARON)
- * U+01C7 (LATIN CAPITAL LETTER LJ)
- * U+01C8 (LATIN CAPITAL LETTER L WITH SMALL LETTER J)
- * U+01C9 (LATIN SMALL LETTER LJ)
- * U+01CA (LATIN CAPITAL LETTER NJ)
- * U+01CB (LATIN CAPITAL LETTER N WITH SMALL LETTER J)
- * U+01CC (LATIN SMALL LETTER NJ)
- * U+01F1 (LATIN CAPITAL LETTER DZ)
- * U+01F2 (LATIN CAPITAL LETTER D WITH SMALL LETTER Z)
- * U+01F3 (LATIN SMALL LETTER DZ)
- *
- * The script used was basically:
-(cut -d\; -f1,14 UnicodeData.txt | sed -n 's/\(.*\);\(..*\)/\2;\1/p'
- cut -d\; -f1,15 UnicodeData.txt | grep -v ';$'
- cut -d\; -f1,13 UnicodeData.txt | grep -v ';$'
-) | grep -v '^\([^ ]*\);\1$' | sort -t\; -u | sed 's/\(.*\);\(.*\)/{0x\1,0x\2,},/'
- * with some hand munging afterward.  The data file is UnicodeData.txt
- * from http://www.unicode.org/.
- */
+/* These tables were generated from the Unicode 10.0.0 spec. */
 struct UTF8_lower_upper {
-	unsigned int lower, upper;
-} UTF8_lower_upper[] = {
+	unsigned int from, to;
+} UTF8_toUpper[] = { /* code points with non-null uppercase conversion */
 	{ 0x0061, 0x0041, },
 	{ 0x0062, 0x0042, },
 	{ 0x0063, 0x0043, },
@@ -150,10 +109,8 @@ struct UTF8_lower_upper {
 	{ 0x0067, 0x0047, },
 	{ 0x0068, 0x0048, },
 	{ 0x0069, 0x0049, },
-	{ 0x0069, 0x0130, },
 	{ 0x006A, 0x004A, },
 	{ 0x006B, 0x004B, },
-	{ 0x006B, 0x212A, },
 	{ 0x006C, 0x004C, },
 	{ 0x006D, 0x004D, },
 	{ 0x006E, 0x004E, },
@@ -169,14 +126,13 @@ struct UTF8_lower_upper {
 	{ 0x0078, 0x0058, },
 	{ 0x0079, 0x0059, },
 	{ 0x007A, 0x005A, },
-	{ 0x03BC, 0x039C, },
+	{ 0x00B5, 0x039C, },
 	{ 0x00E0, 0x00C0, },
 	{ 0x00E1, 0x00C1, },
 	{ 0x00E2, 0x00C2, },
 	{ 0x00E3, 0x00C3, },
 	{ 0x00E4, 0x00C4, },
 	{ 0x00E5, 0x00C5, },
-	{ 0x00E5, 0x212B, },
 	{ 0x00E6, 0x00C6, },
 	{ 0x00E7, 0x00C7, },
 	{ 0x00E8, 0x00C8, },
@@ -286,15 +242,12 @@ struct UTF8_lower_upper {
 	{ 0x01B9, 0x01B8, },
 	{ 0x01BD, 0x01BC, },
 	{ 0x01BF, 0x01F7, },
-	{ 0x01C6, 0x01C5, },
+	{ 0x01C5, 0x01C4, },
 	{ 0x01C6, 0x01C4, },
-	{ 0x01C4, 0x01C5, },
-	{ 0x01C9, 0x01C8, },
+	{ 0x01C8, 0x01C7, },
 	{ 0x01C9, 0x01C7, },
-	{ 0x01C7, 0x01C8, },
-	{ 0x01CC, 0x01CB, },
+	{ 0x01CB, 0x01CA, },
 	{ 0x01CC, 0x01CA, },
-	{ 0x01CA, 0x01CB, },
 	{ 0x01CE, 0x01CD, },
 	{ 0x01D0, 0x01CF, },
 	{ 0x01D2, 0x01D1, },
@@ -313,9 +266,8 @@ struct UTF8_lower_upper {
 	{ 0x01EB, 0x01EA, },
 	{ 0x01ED, 0x01EC, },
 	{ 0x01EF, 0x01EE, },
-	{ 0x01F3, 0x01F2, },
+	{ 0x01F2, 0x01F1, },
 	{ 0x01F3, 0x01F1, },
-	{ 0x01F1, 0x01F2, },
 	{ 0x01F5, 0x01F4, },
 	{ 0x01F9, 0x01F8, },
 	{ 0x01FB, 0x01FA, },
@@ -347,36 +299,54 @@ struct UTF8_lower_upper {
 	{ 0x0231, 0x0230, },
 	{ 0x0233, 0x0232, },
 	{ 0x023C, 0x023B, },
+	{ 0x023F, 0x2C7E, },
+	{ 0x0240, 0x2C7F, },
 	{ 0x0242, 0x0241, },
 	{ 0x0247, 0x0246, },
 	{ 0x0249, 0x0248, },
 	{ 0x024B, 0x024A, },
 	{ 0x024D, 0x024C, },
 	{ 0x024F, 0x024E, },
+	{ 0x0250, 0x2C6F, },
+	{ 0x0251, 0x2C6D, },
+	{ 0x0252, 0x2C70, },
 	{ 0x0253, 0x0181, },
 	{ 0x0254, 0x0186, },
 	{ 0x0256, 0x0189, },
 	{ 0x0257, 0x018A, },
 	{ 0x0259, 0x018F, },
 	{ 0x025B, 0x0190, },
+	{ 0x025C, 0xA7AB, },
 	{ 0x0260, 0x0193, },
+	{ 0x0261, 0xA7AC, },
 	{ 0x0263, 0x0194, },
+	{ 0x0265, 0xA78D, },
+	{ 0x0266, 0xA7AA, },
 	{ 0x0268, 0x0197, },
 	{ 0x0269, 0x0196, },
+	{ 0x026A, 0xA7AE, },
 	{ 0x026B, 0x2C62, },
+	{ 0x026C, 0xA7AD, },
 	{ 0x026F, 0x019C, },
+	{ 0x0271, 0x2C6E, },
 	{ 0x0272, 0x019D, },
 	{ 0x0275, 0x019F, },
 	{ 0x027D, 0x2C64, },
 	{ 0x0280, 0x01A6, },
 	{ 0x0283, 0x01A9, },
+	{ 0x0287, 0xA7B1, },
 	{ 0x0288, 0x01AE, },
 	{ 0x0289, 0x0244, },
 	{ 0x028A, 0x01B1, },
 	{ 0x028B, 0x01B2, },
 	{ 0x028C, 0x0245, },
 	{ 0x0292, 0x01B7, },
-	{ 0x03B9, 0x0399, },
+	{ 0x029D, 0xA7B2, },
+	{ 0x029E, 0xA7B0, },
+	{ 0x0345, 0x0399, },
+	{ 0x0371, 0x0370, },
+	{ 0x0373, 0x0372, },
+	{ 0x0377, 0x0376, },
 	{ 0x037B, 0x03FD, },
 	{ 0x037C, 0x03FE, },
 	{ 0x037D, 0x03FF, },
@@ -392,25 +362,23 @@ struct UTF8_lower_upper {
 	{ 0x03B6, 0x0396, },
 	{ 0x03B7, 0x0397, },
 	{ 0x03B8, 0x0398, },
-	{ 0x03B8, 0x03F4, },
-	{ 0x0345, 0x0399, },
+	{ 0x03B9, 0x0399, },
 	{ 0x03BA, 0x039A, },
 	{ 0x03BB, 0x039B, },
-	{ 0x00B5, 0x039C, },
+	{ 0x03BC, 0x039C, },
 	{ 0x03BD, 0x039D, },
 	{ 0x03BE, 0x039E, },
 	{ 0x03BF, 0x039F, },
 	{ 0x03C0, 0x03A0, },
 	{ 0x03C1, 0x03A1, },
+	{ 0x03C2, 0x03A3, },
 	{ 0x03C3, 0x03A3, },
-	{ 0x03C2, 0x03A3, },
 	{ 0x03C4, 0x03A4, },
 	{ 0x03C5, 0x03A5, },
 	{ 0x03C6, 0x03A6, },
 	{ 0x03C7, 0x03A7, },
 	{ 0x03C8, 0x03A8, },
 	{ 0x03C9, 0x03A9, },
-	{ 0x03C9, 0x2126, },
 	{ 0x03CA, 0x03AA, },
 	{ 0x03CB, 0x03AB, },
 	{ 0x03CC, 0x038C, },
@@ -420,6 +388,7 @@ struct UTF8_lower_upper {
 	{ 0x03D1, 0x0398, },
 	{ 0x03D5, 0x03A6, },
 	{ 0x03D6, 0x03A0, },
+	{ 0x03D7, 0x03CF, },
 	{ 0x03D9, 0x03D8, },
 	{ 0x03DB, 0x03DA, },
 	{ 0x03DD, 0x03DC, },
@@ -435,6 +404,7 @@ struct UTF8_lower_upper {
 	{ 0x03F0, 0x039A, },
 	{ 0x03F1, 0x03A1, },
 	{ 0x03F2, 0x03F9, },
+	{ 0x03F3, 0x037F, },
 	{ 0x03F5, 0x0395, },
 	{ 0x03F8, 0x03F7, },
 	{ 0x03FB, 0x03FA, },
@@ -572,6 +542,20 @@ struct UTF8_lower_upper {
 	{ 0x050F, 0x050E, },
 	{ 0x0511, 0x0510, },
 	{ 0x0513, 0x0512, },
+	{ 0x0515, 0x0514, },
+	{ 0x0517, 0x0516, },
+	{ 0x0519, 0x0518, },
+	{ 0x051B, 0x051A, },
+	{ 0x051D, 0x051C, },
+	{ 0x051F, 0x051E, },
+	{ 0x0521, 0x0520, },
+	{ 0x0523, 0x0522, },
+	{ 0x0525, 0x0524, },
+	{ 0x0527, 0x0526, },
+	{ 0x0529, 0x0528, },
+	{ 0x052B, 0x052A, },
+	{ 0x052D, 0x052C, },
+	{ 0x052F, 0x052E, },
 	{ 0x0561, 0x0531, },
 	{ 0x0562, 0x0532, },
 	{ 0x0563, 0x0533, },
@@ -610,6 +594,22 @@ struct UTF8_lower_upper {
 	{ 0x0584, 0x0554, },
 	{ 0x0585, 0x0555, },
 	{ 0x0586, 0x0556, },
+	{ 0x13F8, 0x13F0, },
+	{ 0x13F9, 0x13F1, },
+	{ 0x13FA, 0x13F2, },
+	{ 0x13FB, 0x13F3, },
+	{ 0x13FC, 0x13F4, },
+	{ 0x13FD, 0x13F5, },
+	{ 0x1C80, 0x0412, },
+	{ 0x1C81, 0x0414, },
+	{ 0x1C82, 0x041E, },
+	{ 0x1C83, 0x0421, },
+	{ 0x1C84, 0x0422, },
+	{ 0x1C85, 0x0422, },
+	{ 0x1C86, 0x042A, },
+	{ 0x1C87, 0x0462, },
+	{ 0x1C88, 0xA64A, },
+	{ 0x1D79, 0xA77D, },
 	{ 0x1D7D, 0x2C63, },
 	{ 0x1E01, 0x1E00, },
 	{ 0x1E03, 0x1E02, },
@@ -732,6 +732,9 @@ struct UTF8_lower_upper {
 	{ 0x1EF5, 0x1EF4, },
 	{ 0x1EF7, 0x1EF6, },
 	{ 0x1EF9, 0x1EF8, },
+	{ 0x1EFB, 0x1EFA, },
+	{ 0x1EFD, 0x1EFC, },
+	{ 0x1EFF, 0x1EFE, },
 	{ 0x1F00, 0x1F08, },
 	{ 0x1F01, 0x1F09, },
 	{ 0x1F02, 0x1F0A, },
@@ -926,6 +929,7 @@ struct UTF8_lower_upper {
 	{ 0x2C68, 0x2C67, },
 	{ 0x2C6A, 0x2C69, },
 	{ 0x2C6C, 0x2C6B, },
+	{ 0x2C73, 0x2C72, },
 	{ 0x2C76, 0x2C75, },
 	{ 0x2C81, 0x2C80, },
 	{ 0x2C83, 0x2C82, },
@@ -977,6 +981,9 @@ struct UTF8_lower_upper {
 	{ 0x2CDF, 0x2CDE, },
 	{ 0x2CE1, 0x2CE0, },
 	{ 0x2CE3, 0x2CE2, },
+	{ 0x2CEC, 0x2CEB, },
+	{ 0x2CEE, 0x2CED, },
+	{ 0x2CF3, 0x2CF2, },
 	{ 0x2D00, 0x10A0, },
 	{ 0x2D01, 0x10A1, },
 	{ 0x2D02, 0x10A2, },
@@ -1015,6 +1022,186 @@ struct UTF8_lower_upper {
 	{ 0x2D23, 0x10C3, },
 	{ 0x2D24, 0x10C4, },
 	{ 0x2D25, 0x10C5, },
+	{ 0x2D27, 0x10C7, },
+	{ 0x2D2D, 0x10CD, },
+	{ 0xA641, 0xA640, },
+	{ 0xA643, 0xA642, },
+	{ 0xA645, 0xA644, },
+	{ 0xA647, 0xA646, },
+	{ 0xA649, 0xA648, },
+	{ 0xA64B, 0xA64A, },
+	{ 0xA64D, 0xA64C, },
+	{ 0xA64F, 0xA64E, },
+	{ 0xA651, 0xA650, },
+	{ 0xA653, 0xA652, },
+	{ 0xA655, 0xA654, },
+	{ 0xA657, 0xA656, },
+	{ 0xA659, 0xA658, },
+	{ 0xA65B, 0xA65A, },
+	{ 0xA65D, 0xA65C, },
+	{ 0xA65F, 0xA65E, },
+	{ 0xA661, 0xA660, },
+	{ 0xA663, 0xA662, },
+	{ 0xA665, 0xA664, },
+	{ 0xA667, 0xA666, },
+	{ 0xA669, 0xA668, },
+	{ 0xA66B, 0xA66A, },
+	{ 0xA66D, 0xA66C, },
+	{ 0xA681, 0xA680, },
+	{ 0xA683, 0xA682, },
+	{ 0xA685, 0xA684, },
+	{ 0xA687, 0xA686, },
+	{ 0xA689, 0xA688, },
+	{ 0xA68B, 0xA68A, },
+	{ 0xA68D, 0xA68C, },
+	{ 0xA68F, 0xA68E, },
+	{ 0xA691, 0xA690, },
+	{ 0xA693, 0xA692, },
+	{ 0xA695, 0xA694, },
+	{ 0xA697, 0xA696, },
+	{ 0xA699, 0xA698, },
+	{ 0xA69B, 0xA69A, },
+	{ 0xA723, 0xA722, },
+	{ 0xA725, 0xA724, },
+	{ 0xA727, 0xA726, },
+	{ 0xA729, 0xA728, },
+	{ 0xA72B, 0xA72A, },
+	{ 0xA72D, 0xA72C, },
+	{ 0xA72F, 0xA72E, },
+	{ 0xA733, 0xA732, },
+	{ 0xA735, 0xA734, },
+	{ 0xA737, 0xA736, },
+	{ 0xA739, 0xA738, },
+	{ 0xA73B, 0xA73A, },
+	{ 0xA73D, 0xA73C, },
+	{ 0xA73F, 0xA73E, },
+	{ 0xA741, 0xA740, },
+	{ 0xA743, 0xA742, },
+	{ 0xA745, 0xA744, },
+	{ 0xA747, 0xA746, },
+	{ 0xA749, 0xA748, },
+	{ 0xA74B, 0xA74A, },
+	{ 0xA74D, 0xA74C, },
+	{ 0xA74F, 0xA74E, },
+	{ 0xA751, 0xA750, },
+	{ 0xA753, 0xA752, },
+	{ 0xA755, 0xA754, },
+	{ 0xA757, 0xA756, },
+	{ 0xA759, 0xA758, },
+	{ 0xA75B, 0xA75A, },
+	{ 0xA75D, 0xA75C, },
+	{ 0xA75F, 0xA75E, },
+	{ 0xA761, 0xA760, },
+	{ 0xA763, 0xA762, },
+	{ 0xA765, 0xA764, },
+	{ 0xA767, 0xA766, },
+	{ 0xA769, 0xA768, },
+	{ 0xA76B, 0xA76A, },
+	{ 0xA76D, 0xA76C, },
+	{ 0xA76F, 0xA76E, },
+	{ 0xA77A, 0xA779, },
+	{ 0xA77C, 0xA77B, },
+	{ 0xA77F, 0xA77E, },
+	{ 0xA781, 0xA780, },
+	{ 0xA783, 0xA782, },
+	{ 0xA785, 0xA784, },
+	{ 0xA787, 0xA786, },
+	{ 0xA78C, 0xA78B, },
+	{ 0xA791, 0xA790, },
+	{ 0xA793, 0xA792, },
+	{ 0xA797, 0xA796, },
+	{ 0xA799, 0xA798, },
+	{ 0xA79B, 0xA79A, },
+	{ 0xA79D, 0xA79C, },
+	{ 0xA79F, 0xA79E, },
+	{ 0xA7A1, 0xA7A0, },
+	{ 0xA7A3, 0xA7A2, },
+	{ 0xA7A5, 0xA7A4, },
+	{ 0xA7A7, 0xA7A6, },
+	{ 0xA7A9, 0xA7A8, },
+	{ 0xA7B5, 0xA7B4, },
+	{ 0xA7B7, 0xA7B6, },
+	{ 0xAB53, 0xA7B3, },
+	{ 0xAB70, 0x13A0, },
+	{ 0xAB71, 0x13A1, },
+	{ 0xAB72, 0x13A2, },
+	{ 0xAB73, 0x13A3, },
+	{ 0xAB74, 0x13A4, },
+	{ 0xAB75, 0x13A5, },
+	{ 0xAB76, 0x13A6, },
+	{ 0xAB77, 0x13A7, },
+	{ 0xAB78, 0x13A8, },
+	{ 0xAB79, 0x13A9, },
+	{ 0xAB7A, 0x13AA, },
+	{ 0xAB7B, 0x13AB, },
+	{ 0xAB7C, 0x13AC, },
+	{ 0xAB7D, 0x13AD, },
+	{ 0xAB7E, 0x13AE, },
+	{ 0xAB7F, 0x13AF, },
+	{ 0xAB80, 0x13B0, },
+	{ 0xAB81, 0x13B1, },
+	{ 0xAB82, 0x13B2, },
+	{ 0xAB83, 0x13B3, },
+	{ 0xAB84, 0x13B4, },
+	{ 0xAB85, 0x13B5, },
+	{ 0xAB86, 0x13B6, },
+	{ 0xAB87, 0x13B7, },
+	{ 0xAB88, 0x13B8, },
+	{ 0xAB89, 0x13B9, },
+	{ 0xAB8A, 0x13BA, },
+	{ 0xAB8B, 0x13BB, },
+	{ 0xAB8C, 0x13BC, },
+	{ 0xAB8D, 0x13BD, },
+	{ 0xAB8E, 0x13BE, },
+	{ 0xAB8F, 0x13BF, },
+	{ 0xAB90, 0x13C0, },
+	{ 0xAB91, 0x13C1, },
+	{ 0xAB92, 0x13C2, },
+	{ 0xAB93, 0x13C3, },
+	{ 0xAB94, 0x13C4, },
+	{ 0xAB95, 0x13C5, },
+	{ 0xAB96, 0x13C6, },
+	{ 0xAB97, 0x13C7, },
+	{ 0xAB98, 0x13C8, },
+	{ 0xAB99, 0x13C9, },
+	{ 0xAB9A, 0x13CA, },
+	{ 0xAB9B, 0x13CB, },
+	{ 0xAB9C, 0x13CC, },
+	{ 0xAB9D, 0x13CD, },
+	{ 0xAB9E, 0x13CE, },
+	{ 0xAB9F, 0x13CF, },
+	{ 0xABA0, 0x13D0, },
+	{ 0xABA1, 0x13D1, },
+	{ 0xABA2, 0x13D2, },
+	{ 0xABA3, 0x13D3, },
+	{ 0xABA4, 0x13D4, },
+	{ 0xABA5, 0x13D5, },
+	{ 0xABA6, 0x13D6, },
+	{ 0xABA7, 0x13D7, },
+	{ 0xABA8, 0x13D8, },
+	{ 0xABA9, 0x13D9, },
+	{ 0xABAA, 0x13DA, },
+	{ 0xABAB, 0x13DB, },
+	{ 0xABAC, 0x13DC, },
+	{ 0xABAD, 0x13DD, },
+	{ 0xABAE, 0x13DE, },
+	{ 0xABAF, 0x13DF, },
+	{ 0xABB0, 0x13E0, },
+	{ 0xABB1, 0x13E1, },
+	{ 0xABB2, 0x13E2, },
+	{ 0xABB3, 0x13E3, },
+	{ 0xABB4, 0x13E4, },
+	{ 0xABB5, 0x13E5, },
+	{ 0xABB6, 0x13E6, },
+	{ 0xABB7, 0x13E7, },
+	{ 0xABB8, 0x13E8, },
+	{ 0xABB9, 0x13E9, },
+	{ 0xABBA, 0x13EA, },
+	{ 0xABBB, 0x13EB, },
+	{ 0xABBC, 0x13EC, },
+	{ 0xABBD, 0x13ED, },
+	{ 0xABBE, 0x13EE, },
+	{ 0xABBF, 0x13EF, },
 	{ 0xFF41, 0xFF21, },
 	{ 0xFF42, 0xFF22, },
 	{ 0xFF43, 0xFF23, },
@@ -1081,42 +1268,1547 @@ struct UTF8_lower_upper {
 	{ 0x1044D, 0x10425, },
 	{ 0x1044E, 0x10426, },
 	{ 0x1044F, 0x10427, },
+	{ 0x104D8, 0x104B0, },
+	{ 0x104D9, 0x104B1, },
+	{ 0x104DA, 0x104B2, },
+	{ 0x104DB, 0x104B3, },
+	{ 0x104DC, 0x104B4, },
+	{ 0x104DD, 0x104B5, },
+	{ 0x104DE, 0x104B6, },
+	{ 0x104DF, 0x104B7, },
+	{ 0x104E0, 0x104B8, },
+	{ 0x104E1, 0x104B9, },
+	{ 0x104E2, 0x104BA, },
+	{ 0x104E3, 0x104BB, },
+	{ 0x104E4, 0x104BC, },
+	{ 0x104E5, 0x104BD, },
+	{ 0x104E6, 0x104BE, },
+	{ 0x104E7, 0x104BF, },
+	{ 0x104E8, 0x104C0, },
+	{ 0x104E9, 0x104C1, },
+	{ 0x104EA, 0x104C2, },
+	{ 0x104EB, 0x104C3, },
+	{ 0x104EC, 0x104C4, },
+	{ 0x104ED, 0x104C5, },
+	{ 0x104EE, 0x104C6, },
+	{ 0x104EF, 0x104C7, },
+	{ 0x104F0, 0x104C8, },
+	{ 0x104F1, 0x104C9, },
+	{ 0x104F2, 0x104CA, },
+	{ 0x104F3, 0x104CB, },
+	{ 0x104F4, 0x104CC, },
+	{ 0x104F5, 0x104CD, },
+	{ 0x104F6, 0x104CE, },
+	{ 0x104F7, 0x104CF, },
+	{ 0x104F8, 0x104D0, },
+	{ 0x104F9, 0x104D1, },
+	{ 0x104FA, 0x104D2, },
+	{ 0x104FB, 0x104D3, },
+	{ 0x10CC0, 0x10C80, },
+	{ 0x10CC1, 0x10C81, },
+	{ 0x10CC2, 0x10C82, },
+	{ 0x10CC3, 0x10C83, },
+	{ 0x10CC4, 0x10C84, },
+	{ 0x10CC5, 0x10C85, },
+	{ 0x10CC6, 0x10C86, },
+	{ 0x10CC7, 0x10C87, },
+	{ 0x10CC8, 0x10C88, },
+	{ 0x10CC9, 0x10C89, },
+	{ 0x10CCA, 0x10C8A, },
+	{ 0x10CCB, 0x10C8B, },
+	{ 0x10CCC, 0x10C8C, },
+	{ 0x10CCD, 0x10C8D, },
+	{ 0x10CCE, 0x10C8E, },
+	{ 0x10CCF, 0x10C8F, },
+	{ 0x10CD0, 0x10C90, },
+	{ 0x10CD1, 0x10C91, },
+	{ 0x10CD2, 0x10C92, },
+	{ 0x10CD3, 0x10C93, },
+	{ 0x10CD4, 0x10C94, },
+	{ 0x10CD5, 0x10C95, },
+	{ 0x10CD6, 0x10C96, },
+	{ 0x10CD7, 0x10C97, },
+	{ 0x10CD8, 0x10C98, },
+	{ 0x10CD9, 0x10C99, },
+	{ 0x10CDA, 0x10C9A, },
+	{ 0x10CDB, 0x10C9B, },
+	{ 0x10CDC, 0x10C9C, },
+	{ 0x10CDD, 0x10C9D, },
+	{ 0x10CDE, 0x10C9E, },
+	{ 0x10CDF, 0x10C9F, },
+	{ 0x10CE0, 0x10CA0, },
+	{ 0x10CE1, 0x10CA1, },
+	{ 0x10CE2, 0x10CA2, },
+	{ 0x10CE3, 0x10CA3, },
+	{ 0x10CE4, 0x10CA4, },
+	{ 0x10CE5, 0x10CA5, },
+	{ 0x10CE6, 0x10CA6, },
+	{ 0x10CE7, 0x10CA7, },
+	{ 0x10CE8, 0x10CA8, },
+	{ 0x10CE9, 0x10CA9, },
+	{ 0x10CEA, 0x10CAA, },
+	{ 0x10CEB, 0x10CAB, },
+	{ 0x10CEC, 0x10CAC, },
+	{ 0x10CED, 0x10CAD, },
+	{ 0x10CEE, 0x10CAE, },
+	{ 0x10CEF, 0x10CAF, },
+	{ 0x10CF0, 0x10CB0, },
+	{ 0x10CF1, 0x10CB1, },
+	{ 0x10CF2, 0x10CB2, },
+	{ 0x118C0, 0x118A0, },
+	{ 0x118C1, 0x118A1, },
+	{ 0x118C2, 0x118A2, },
+	{ 0x118C3, 0x118A3, },
+	{ 0x118C4, 0x118A4, },
+	{ 0x118C5, 0x118A5, },
+	{ 0x118C6, 0x118A6, },
+	{ 0x118C7, 0x118A7, },
+	{ 0x118C8, 0x118A8, },
+	{ 0x118C9, 0x118A9, },
+	{ 0x118CA, 0x118AA, },
+	{ 0x118CB, 0x118AB, },
+	{ 0x118CC, 0x118AC, },
+	{ 0x118CD, 0x118AD, },
+	{ 0x118CE, 0x118AE, },
+	{ 0x118CF, 0x118AF, },
+	{ 0x118D0, 0x118B0, },
+	{ 0x118D1, 0x118B1, },
+	{ 0x118D2, 0x118B2, },
+	{ 0x118D3, 0x118B3, },
+	{ 0x118D4, 0x118B4, },
+	{ 0x118D5, 0x118B5, },
+	{ 0x118D6, 0x118B6, },
+	{ 0x118D7, 0x118B7, },
+	{ 0x118D8, 0x118B8, },
+	{ 0x118D9, 0x118B9, },
+	{ 0x118DA, 0x118BA, },
+	{ 0x118DB, 0x118BB, },
+	{ 0x118DC, 0x118BC, },
+	{ 0x118DD, 0x118BD, },
+	{ 0x118DE, 0x118BE, },
+	{ 0x118DF, 0x118BF, },
+	{ 0x1E922, 0x1E900, },
+	{ 0x1E923, 0x1E901, },
+	{ 0x1E924, 0x1E902, },
+	{ 0x1E925, 0x1E903, },
+	{ 0x1E926, 0x1E904, },
+	{ 0x1E927, 0x1E905, },
+	{ 0x1E928, 0x1E906, },
+	{ 0x1E929, 0x1E907, },
+	{ 0x1E92A, 0x1E908, },
+	{ 0x1E92B, 0x1E909, },
+	{ 0x1E92C, 0x1E90A, },
+	{ 0x1E92D, 0x1E90B, },
+	{ 0x1E92E, 0x1E90C, },
+	{ 0x1E92F, 0x1E90D, },
+	{ 0x1E930, 0x1E90E, },
+	{ 0x1E931, 0x1E90F, },
+	{ 0x1E932, 0x1E910, },
+	{ 0x1E933, 0x1E911, },
+	{ 0x1E934, 0x1E912, },
+	{ 0x1E935, 0x1E913, },
+	{ 0x1E936, 0x1E914, },
+	{ 0x1E937, 0x1E915, },
+	{ 0x1E938, 0x1E916, },
+	{ 0x1E939, 0x1E917, },
+	{ 0x1E93A, 0x1E918, },
+	{ 0x1E93B, 0x1E919, },
+	{ 0x1E93C, 0x1E91A, },
+	{ 0x1E93D, 0x1E91B, },
+	{ 0x1E93E, 0x1E91C, },
+	{ 0x1E93F, 0x1E91D, },
+	{ 0x1E940, 0x1E91E, },
+	{ 0x1E941, 0x1E91F, },
+	{ 0x1E942, 0x1E920, },
+	{ 0x1E943, 0x1E921, },
+}, UTF8_toLower[] = { /* code points with non-null lowercase conversion */
+	{ 0x0041, 0x0061, },
+	{ 0x0042, 0x0062, },
+	{ 0x0043, 0x0063, },
+	{ 0x0044, 0x0064, },
+	{ 0x0045, 0x0065, },
+	{ 0x0046, 0x0066, },
+	{ 0x0047, 0x0067, },
+	{ 0x0048, 0x0068, },
+	{ 0x0049, 0x0069, },
+	{ 0x004A, 0x006A, },
+	{ 0x004B, 0x006B, },
+	{ 0x004C, 0x006C, },
+	{ 0x004D, 0x006D, },
+	{ 0x004E, 0x006E, },
+	{ 0x004F, 0x006F, },
+	{ 0x0050, 0x0070, },
+	{ 0x0051, 0x0071, },
+	{ 0x0052, 0x0072, },
+	{ 0x0053, 0x0073, },
+	{ 0x0054, 0x0074, },
+	{ 0x0055, 0x0075, },
+	{ 0x0056, 0x0076, },
+	{ 0x0057, 0x0077, },
+	{ 0x0058, 0x0078, },
+	{ 0x0059, 0x0079, },
+	{ 0x005A, 0x007A, },
+	{ 0x00C0, 0x00E0, },
+	{ 0x00C1, 0x00E1, },
+	{ 0x00C2, 0x00E2, },
+	{ 0x00C3, 0x00E3, },
+	{ 0x00C4, 0x00E4, },
+	{ 0x00C5, 0x00E5, },
+	{ 0x00C6, 0x00E6, },
+	{ 0x00C7, 0x00E7, },
+	{ 0x00C8, 0x00E8, },
+	{ 0x00C9, 0x00E9, },
+	{ 0x00CA, 0x00EA, },
+	{ 0x00CB, 0x00EB, },
+	{ 0x00CC, 0x00EC, },
+	{ 0x00CD, 0x00ED, },
+	{ 0x00CE, 0x00EE, },
+	{ 0x00CF, 0x00EF, },
+	{ 0x00D0, 0x00F0, },
+	{ 0x00D1, 0x00F1, },
+	{ 0x00D2, 0x00F2, },
+	{ 0x00D3, 0x00F3, },
+	{ 0x00D4, 0x00F4, },
+	{ 0x00D5, 0x00F5, },
+	{ 0x00D6, 0x00F6, },
+	{ 0x00D8, 0x00F8, },
+	{ 0x00D9, 0x00F9, },
+	{ 0x00DA, 0x00FA, },
+	{ 0x00DB, 0x00FB, },
+	{ 0x00DC, 0x00FC, },
+	{ 0x00DD, 0x00FD, },
+	{ 0x00DE, 0x00FE, },
+	{ 0x0100, 0x0101, },
+	{ 0x0102, 0x0103, },
+	{ 0x0104, 0x0105, },
+	{ 0x0106, 0x0107, },
+	{ 0x0108, 0x0109, },
+	{ 0x010A, 0x010B, },
+	{ 0x010C, 0x010D, },
+	{ 0x010E, 0x010F, },
+	{ 0x0110, 0x0111, },
+	{ 0x0112, 0x0113, },
+	{ 0x0114, 0x0115, },
+	{ 0x0116, 0x0117, },
+	{ 0x0118, 0x0119, },
+	{ 0x011A, 0x011B, },
+	{ 0x011C, 0x011D, },
+	{ 0x011E, 0x011F, },
+	{ 0x0120, 0x0121, },
+	{ 0x0122, 0x0123, },
+	{ 0x0124, 0x0125, },
+	{ 0x0126, 0x0127, },
+	{ 0x0128, 0x0129, },
+	{ 0x012A, 0x012B, },
+	{ 0x012C, 0x012D, },
+	{ 0x012E, 0x012F, },
+	{ 0x0130, 0x0069, },
+	{ 0x0132, 0x0133, },
+	{ 0x0134, 0x0135, },
+	{ 0x0136, 0x0137, },
+	{ 0x0139, 0x013A, },
+	{ 0x013B, 0x013C, },
+	{ 0x013D, 0x013E, },
+	{ 0x013F, 0x0140, },
+	{ 0x0141, 0x0142, },
+	{ 0x0143, 0x0144, },
+	{ 0x0145, 0x0146, },
+	{ 0x0147, 0x0148, },
+	{ 0x014A, 0x014B, },
+	{ 0x014C, 0x014D, },
+	{ 0x014E, 0x014F, },
+	{ 0x0150, 0x0151, },
+	{ 0x0152, 0x0153, },
+	{ 0x0154, 0x0155, },
+	{ 0x0156, 0x0157, },
+	{ 0x0158, 0x0159, },
+	{ 0x015A, 0x015B, },
+	{ 0x015C, 0x015D, },
+	{ 0x015E, 0x015F, },
+	{ 0x0160, 0x0161, },
+	{ 0x0162, 0x0163, },
+	{ 0x0164, 0x0165, },
+	{ 0x0166, 0x0167, },
+	{ 0x0168, 0x0169, },
+	{ 0x016A, 0x016B, },
+	{ 0x016C, 0x016D, },
+	{ 0x016E, 0x016F, },
+	{ 0x0170, 0x0171, },
+	{ 0x0172, 0x0173, },
+	{ 0x0174, 0x0175, },
+	{ 0x0176, 0x0177, },
+	{ 0x0178, 0x00FF, },
+	{ 0x0179, 0x017A, },
+	{ 0x017B, 0x017C, },
+	{ 0x017D, 0x017E, },
+	{ 0x0181, 0x0253, },
+	{ 0x0182, 0x0183, },
+	{ 0x0184, 0x0185, },
+	{ 0x0186, 0x0254, },
+	{ 0x0187, 0x0188, },
+	{ 0x0189, 0x0256, },
+	{ 0x018A, 0x0257, },
+	{ 0x018B, 0x018C, },
+	{ 0x018E, 0x01DD, },
+	{ 0x018F, 0x0259, },
+	{ 0x0190, 0x025B, },
+	{ 0x0191, 0x0192, },
+	{ 0x0193, 0x0260, },
+	{ 0x0194, 0x0263, },
+	{ 0x0196, 0x0269, },
+	{ 0x0197, 0x0268, },
+	{ 0x0198, 0x0199, },
+	{ 0x019C, 0x026F, },
+	{ 0x019D, 0x0272, },
+	{ 0x019F, 0x0275, },
+	{ 0x01A0, 0x01A1, },
+	{ 0x01A2, 0x01A3, },
+	{ 0x01A4, 0x01A5, },
+	{ 0x01A6, 0x0280, },
+	{ 0x01A7, 0x01A8, },
+	{ 0x01A9, 0x0283, },
+	{ 0x01AC, 0x01AD, },
+	{ 0x01AE, 0x0288, },
+	{ 0x01AF, 0x01B0, },
+	{ 0x01B1, 0x028A, },
+	{ 0x01B2, 0x028B, },
+	{ 0x01B3, 0x01B4, },
+	{ 0x01B5, 0x01B6, },
+	{ 0x01B7, 0x0292, },
+	{ 0x01B8, 0x01B9, },
+	{ 0x01BC, 0x01BD, },
+	{ 0x01C4, 0x01C6, },
+	{ 0x01C5, 0x01C6, },
+	{ 0x01C7, 0x01C9, },
+	{ 0x01C8, 0x01C9, },
+	{ 0x01CA, 0x01CC, },
+	{ 0x01CB, 0x01CC, },
+	{ 0x01CD, 0x01CE, },
+	{ 0x01CF, 0x01D0, },
+	{ 0x01D1, 0x01D2, },
+	{ 0x01D3, 0x01D4, },
+	{ 0x01D5, 0x01D6, },
+	{ 0x01D7, 0x01D8, },
+	{ 0x01D9, 0x01DA, },
+	{ 0x01DB, 0x01DC, },
+	{ 0x01DE, 0x01DF, },
+	{ 0x01E0, 0x01E1, },
+	{ 0x01E2, 0x01E3, },
+	{ 0x01E4, 0x01E5, },
+	{ 0x01E6, 0x01E7, },
+	{ 0x01E8, 0x01E9, },
+	{ 0x01EA, 0x01EB, },
+	{ 0x01EC, 0x01ED, },
+	{ 0x01EE, 0x01EF, },
+	{ 0x01F1, 0x01F3, },
+	{ 0x01F2, 0x01F3, },
+	{ 0x01F4, 0x01F5, },
+	{ 0x01F6, 0x0195, },
+	{ 0x01F7, 0x01BF, },
+	{ 0x01F8, 0x01F9, },
+	{ 0x01FA, 0x01FB, },
+	{ 0x01FC, 0x01FD, },
+	{ 0x01FE, 0x01FF, },
+	{ 0x0200, 0x0201, },
+	{ 0x0202, 0x0203, },
+	{ 0x0204, 0x0205, },
+	{ 0x0206, 0x0207, },
+	{ 0x0208, 0x0209, },
+	{ 0x020A, 0x020B, },
+	{ 0x020C, 0x020D, },
+	{ 0x020E, 0x020F, },
+	{ 0x0210, 0x0211, },
+	{ 0x0212, 0x0213, },
+	{ 0x0214, 0x0215, },
+	{ 0x0216, 0x0217, },
+	{ 0x0218, 0x0219, },
+	{ 0x021A, 0x021B, },
+	{ 0x021C, 0x021D, },
+	{ 0x021E, 0x021F, },
+	{ 0x0220, 0x019E, },
+	{ 0x0222, 0x0223, },
+	{ 0x0224, 0x0225, },
+	{ 0x0226, 0x0227, },
+	{ 0x0228, 0x0229, },
+	{ 0x022A, 0x022B, },
+	{ 0x022C, 0x022D, },
+	{ 0x022E, 0x022F, },
+	{ 0x0230, 0x0231, },
+	{ 0x0232, 0x0233, },
+	{ 0x023A, 0x2C65, },
+	{ 0x023B, 0x023C, },
+	{ 0x023D, 0x019A, },
+	{ 0x023E, 0x2C66, },
+	{ 0x0241, 0x0242, },
+	{ 0x0243, 0x0180, },
+	{ 0x0244, 0x0289, },
+	{ 0x0245, 0x028C, },
+	{ 0x0246, 0x0247, },
+	{ 0x0248, 0x0249, },
+	{ 0x024A, 0x024B, },
+	{ 0x024C, 0x024D, },
+	{ 0x024E, 0x024F, },
+	{ 0x0370, 0x0371, },
+	{ 0x0372, 0x0373, },
+	{ 0x0376, 0x0377, },
+	{ 0x037F, 0x03F3, },
+	{ 0x0386, 0x03AC, },
+	{ 0x0388, 0x03AD, },
+	{ 0x0389, 0x03AE, },
+	{ 0x038A, 0x03AF, },
+	{ 0x038C, 0x03CC, },
+	{ 0x038E, 0x03CD, },
+	{ 0x038F, 0x03CE, },
+	{ 0x0391, 0x03B1, },
+	{ 0x0392, 0x03B2, },
+	{ 0x0393, 0x03B3, },
+	{ 0x0394, 0x03B4, },
+	{ 0x0395, 0x03B5, },
+	{ 0x0396, 0x03B6, },
+	{ 0x0397, 0x03B7, },
+	{ 0x0398, 0x03B8, },
+	{ 0x0399, 0x03B9, },
+	{ 0x039A, 0x03BA, },
+	{ 0x039B, 0x03BB, },
+	{ 0x039C, 0x03BC, },
+	{ 0x039D, 0x03BD, },
+	{ 0x039E, 0x03BE, },
+	{ 0x039F, 0x03BF, },
+	{ 0x03A0, 0x03C0, },
+	{ 0x03A1, 0x03C1, },
+	{ 0x03A3, 0x03C3, },
+	{ 0x03A4, 0x03C4, },
+	{ 0x03A5, 0x03C5, },
+	{ 0x03A6, 0x03C6, },
+	{ 0x03A7, 0x03C7, },
+	{ 0x03A8, 0x03C8, },
+	{ 0x03A9, 0x03C9, },
+	{ 0x03AA, 0x03CA, },
+	{ 0x03AB, 0x03CB, },
+	{ 0x03CF, 0x03D7, },
+	{ 0x03D8, 0x03D9, },
+	{ 0x03DA, 0x03DB, },
+	{ 0x03DC, 0x03DD, },
+	{ 0x03DE, 0x03DF, },
+	{ 0x03E0, 0x03E1, },
+	{ 0x03E2, 0x03E3, },
+	{ 0x03E4, 0x03E5, },
+	{ 0x03E6, 0x03E7, },
+	{ 0x03E8, 0x03E9, },
+	{ 0x03EA, 0x03EB, },
+	{ 0x03EC, 0x03ED, },
+	{ 0x03EE, 0x03EF, },
+	{ 0x03F4, 0x03B8, },
+	{ 0x03F7, 0x03F8, },
+	{ 0x03F9, 0x03F2, },
+	{ 0x03FA, 0x03FB, },
+	{ 0x03FD, 0x037B, },
+	{ 0x03FE, 0x037C, },
+	{ 0x03FF, 0x037D, },
+	{ 0x0400, 0x0450, },
+	{ 0x0401, 0x0451, },
+	{ 0x0402, 0x0452, },
+	{ 0x0403, 0x0453, },
+	{ 0x0404, 0x0454, },
+	{ 0x0405, 0x0455, },
+	{ 0x0406, 0x0456, },
+	{ 0x0407, 0x0457, },
+	{ 0x0408, 0x0458, },
+	{ 0x0409, 0x0459, },
+	{ 0x040A, 0x045A, },
+	{ 0x040B, 0x045B, },
+	{ 0x040C, 0x045C, },
+	{ 0x040D, 0x045D, },
+	{ 0x040E, 0x045E, },
+	{ 0x040F, 0x045F, },
+	{ 0x0410, 0x0430, },
+	{ 0x0411, 0x0431, },
+	{ 0x0412, 0x0432, },
+	{ 0x0413, 0x0433, },
+	{ 0x0414, 0x0434, },
+	{ 0x0415, 0x0435, },
+	{ 0x0416, 0x0436, },
+	{ 0x0417, 0x0437, },
+	{ 0x0418, 0x0438, },
+	{ 0x0419, 0x0439, },
+	{ 0x041A, 0x043A, },
+	{ 0x041B, 0x043B, },
+	{ 0x041C, 0x043C, },
+	{ 0x041D, 0x043D, },
+	{ 0x041E, 0x043E, },
+	{ 0x041F, 0x043F, },
+	{ 0x0420, 0x0440, },
+	{ 0x0421, 0x0441, },
+	{ 0x0422, 0x0442, },
+	{ 0x0423, 0x0443, },
+	{ 0x0424, 0x0444, },
+	{ 0x0425, 0x0445, },
+	{ 0x0426, 0x0446, },
+	{ 0x0427, 0x0447, },
+	{ 0x0428, 0x0448, },
+	{ 0x0429, 0x0449, },
+	{ 0x042A, 0x044A, },
+	{ 0x042B, 0x044B, },
+	{ 0x042C, 0x044C, },
+	{ 0x042D, 0x044D, },
+	{ 0x042E, 0x044E, },
+	{ 0x042F, 0x044F, },
+	{ 0x0460, 0x0461, },
+	{ 0x0462, 0x0463, },
+	{ 0x0464, 0x0465, },
+	{ 0x0466, 0x0467, },
+	{ 0x0468, 0x0469, },
+	{ 0x046A, 0x046B, },
+	{ 0x046C, 0x046D, },
+	{ 0x046E, 0x046F, },
+	{ 0x0470, 0x0471, },
+	{ 0x0472, 0x0473, },
+	{ 0x0474, 0x0475, },
+	{ 0x0476, 0x0477, },
+	{ 0x0478, 0x0479, },
+	{ 0x047A, 0x047B, },
+	{ 0x047C, 0x047D, },
+	{ 0x047E, 0x047F, },
+	{ 0x0480, 0x0481, },
+	{ 0x048A, 0x048B, },
+	{ 0x048C, 0x048D, },
+	{ 0x048E, 0x048F, },
+	{ 0x0490, 0x0491, },
+	{ 0x0492, 0x0493, },
+	{ 0x0494, 0x0495, },
+	{ 0x0496, 0x0497, },
+	{ 0x0498, 0x0499, },
+	{ 0x049A, 0x049B, },
+	{ 0x049C, 0x049D, },
+	{ 0x049E, 0x049F, },
+	{ 0x04A0, 0x04A1, },
+	{ 0x04A2, 0x04A3, },
+	{ 0x04A4, 0x04A5, },
+	{ 0x04A6, 0x04A7, },
+	{ 0x04A8, 0x04A9, },
+	{ 0x04AA, 0x04AB, },
+	{ 0x04AC, 0x04AD, },
+	{ 0x04AE, 0x04AF, },
+	{ 0x04B0, 0x04B1, },
+	{ 0x04B2, 0x04B3, },
+	{ 0x04B4, 0x04B5, },
+	{ 0x04B6, 0x04B7, },
+	{ 0x04B8, 0x04B9, },
+	{ 0x04BA, 0x04BB, },
+	{ 0x04BC, 0x04BD, },
+	{ 0x04BE, 0x04BF, },
+	{ 0x04C0, 0x04CF, },
+	{ 0x04C1, 0x04C2, },
+	{ 0x04C3, 0x04C4, },
+	{ 0x04C5, 0x04C6, },
+	{ 0x04C7, 0x04C8, },
+	{ 0x04C9, 0x04CA, },
+	{ 0x04CB, 0x04CC, },
+	{ 0x04CD, 0x04CE, },
+	{ 0x04D0, 0x04D1, },
+	{ 0x04D2, 0x04D3, },
+	{ 0x04D4, 0x04D5, },
+	{ 0x04D6, 0x04D7, },
+	{ 0x04D8, 0x04D9, },
+	{ 0x04DA, 0x04DB, },
+	{ 0x04DC, 0x04DD, },
+	{ 0x04DE, 0x04DF, },
+	{ 0x04E0, 0x04E1, },
+	{ 0x04E2, 0x04E3, },
+	{ 0x04E4, 0x04E5, },
+	{ 0x04E6, 0x04E7, },
+	{ 0x04E8, 0x04E9, },
+	{ 0x04EA, 0x04EB, },
+	{ 0x04EC, 0x04ED, },
+	{ 0x04EE, 0x04EF, },
+	{ 0x04F0, 0x04F1, },
+	{ 0x04F2, 0x04F3, },
+	{ 0x04F4, 0x04F5, },
+	{ 0x04F6, 0x04F7, },
+	{ 0x04F8, 0x04F9, },
+	{ 0x04FA, 0x04FB, },
+	{ 0x04FC, 0x04FD, },
+	{ 0x04FE, 0x04FF, },
+	{ 0x0500, 0x0501, },
+	{ 0x0502, 0x0503, },
+	{ 0x0504, 0x0505, },
+	{ 0x0506, 0x0507, },
+	{ 0x0508, 0x0509, },
+	{ 0x050A, 0x050B, },
+	{ 0x050C, 0x050D, },
+	{ 0x050E, 0x050F, },
+	{ 0x0510, 0x0511, },
+	{ 0x0512, 0x0513, },
+	{ 0x0514, 0x0515, },
+	{ 0x0516, 0x0517, },
+	{ 0x0518, 0x0519, },
+	{ 0x051A, 0x051B, },
+	{ 0x051C, 0x051D, },
+	{ 0x051E, 0x051F, },
+	{ 0x0520, 0x0521, },
+	{ 0x0522, 0x0523, },
+	{ 0x0524, 0x0525, },
+	{ 0x0526, 0x0527, },
+	{ 0x0528, 0x0529, },
+	{ 0x052A, 0x052B, },
+	{ 0x052C, 0x052D, },
+	{ 0x052E, 0x052F, },
+	{ 0x0531, 0x0561, },
+	{ 0x0532, 0x0562, },
+	{ 0x0533, 0x0563, },
+	{ 0x0534, 0x0564, },
+	{ 0x0535, 0x0565, },
+	{ 0x0536, 0x0566, },
+	{ 0x0537, 0x0567, },
+	{ 0x0538, 0x0568, },
+	{ 0x0539, 0x0569, },
+	{ 0x053A, 0x056A, },
+	{ 0x053B, 0x056B, },
+	{ 0x053C, 0x056C, },
+	{ 0x053D, 0x056D, },
+	{ 0x053E, 0x056E, },
+	{ 0x053F, 0x056F, },
+	{ 0x0540, 0x0570, },
+	{ 0x0541, 0x0571, },
+	{ 0x0542, 0x0572, },
+	{ 0x0543, 0x0573, },
+	{ 0x0544, 0x0574, },
+	{ 0x0545, 0x0575, },
+	{ 0x0546, 0x0576, },
+	{ 0x0547, 0x0577, },
+	{ 0x0548, 0x0578, },
+	{ 0x0549, 0x0579, },
+	{ 0x054A, 0x057A, },
+	{ 0x054B, 0x057B, },
+	{ 0x054C, 0x057C, },
+	{ 0x054D, 0x057D, },
+	{ 0x054E, 0x057E, },
+	{ 0x054F, 0x057F, },
+	{ 0x0550, 0x0580, },
+	{ 0x0551, 0x0581, },
+	{ 0x0552, 0x0582, },
+	{ 0x0553, 0x0583, },
+	{ 0x0554, 0x0584, },
+	{ 0x0555, 0x0585, },
+	{ 0x0556, 0x0586, },
+	{ 0x10A0, 0x2D00, },
+	{ 0x10A1, 0x2D01, },
+	{ 0x10A2, 0x2D02, },
+	{ 0x10A3, 0x2D03, },
+	{ 0x10A4, 0x2D04, },
+	{ 0x10A5, 0x2D05, },
+	{ 0x10A6, 0x2D06, },
+	{ 0x10A7, 0x2D07, },
+	{ 0x10A8, 0x2D08, },
+	{ 0x10A9, 0x2D09, },
+	{ 0x10AA, 0x2D0A, },
+	{ 0x10AB, 0x2D0B, },
+	{ 0x10AC, 0x2D0C, },
+	{ 0x10AD, 0x2D0D, },
+	{ 0x10AE, 0x2D0E, },
+	{ 0x10AF, 0x2D0F, },
+	{ 0x10B0, 0x2D10, },
+	{ 0x10B1, 0x2D11, },
+	{ 0x10B2, 0x2D12, },
+	{ 0x10B3, 0x2D13, },
+	{ 0x10B4, 0x2D14, },
+	{ 0x10B5, 0x2D15, },
+	{ 0x10B6, 0x2D16, },
+	{ 0x10B7, 0x2D17, },
+	{ 0x10B8, 0x2D18, },
+	{ 0x10B9, 0x2D19, },
+	{ 0x10BA, 0x2D1A, },
+	{ 0x10BB, 0x2D1B, },
+	{ 0x10BC, 0x2D1C, },
+	{ 0x10BD, 0x2D1D, },
+	{ 0x10BE, 0x2D1E, },
+	{ 0x10BF, 0x2D1F, },
+	{ 0x10C0, 0x2D20, },
+	{ 0x10C1, 0x2D21, },
+	{ 0x10C2, 0x2D22, },
+	{ 0x10C3, 0x2D23, },
+	{ 0x10C4, 0x2D24, },
+	{ 0x10C5, 0x2D25, },
+	{ 0x10C7, 0x2D27, },
+	{ 0x10CD, 0x2D2D, },
+	{ 0x13A0, 0xAB70, },
+	{ 0x13A1, 0xAB71, },
+	{ 0x13A2, 0xAB72, },
+	{ 0x13A3, 0xAB73, },
+	{ 0x13A4, 0xAB74, },
+	{ 0x13A5, 0xAB75, },
+	{ 0x13A6, 0xAB76, },
+	{ 0x13A7, 0xAB77, },
+	{ 0x13A8, 0xAB78, },
+	{ 0x13A9, 0xAB79, },
+	{ 0x13AA, 0xAB7A, },
+	{ 0x13AB, 0xAB7B, },
+	{ 0x13AC, 0xAB7C, },
+	{ 0x13AD, 0xAB7D, },
+	{ 0x13AE, 0xAB7E, },
+	{ 0x13AF, 0xAB7F, },
+	{ 0x13B0, 0xAB80, },
+	{ 0x13B1, 0xAB81, },
+	{ 0x13B2, 0xAB82, },
+	{ 0x13B3, 0xAB83, },
+	{ 0x13B4, 0xAB84, },
+	{ 0x13B5, 0xAB85, },
+	{ 0x13B6, 0xAB86, },
+	{ 0x13B7, 0xAB87, },
+	{ 0x13B8, 0xAB88, },
+	{ 0x13B9, 0xAB89, },
+	{ 0x13BA, 0xAB8A, },
+	{ 0x13BB, 0xAB8B, },
+	{ 0x13BC, 0xAB8C, },
+	{ 0x13BD, 0xAB8D, },
+	{ 0x13BE, 0xAB8E, },
+	{ 0x13BF, 0xAB8F, },
+	{ 0x13C0, 0xAB90, },
+	{ 0x13C1, 0xAB91, },
+	{ 0x13C2, 0xAB92, },
+	{ 0x13C3, 0xAB93, },
+	{ 0x13C4, 0xAB94, },
+	{ 0x13C5, 0xAB95, },
+	{ 0x13C6, 0xAB96, },
+	{ 0x13C7, 0xAB97, },
+	{ 0x13C8, 0xAB98, },
+	{ 0x13C9, 0xAB99, },
+	{ 0x13CA, 0xAB9A, },
+	{ 0x13CB, 0xAB9B, },
+	{ 0x13CC, 0xAB9C, },
+	{ 0x13CD, 0xAB9D, },
+	{ 0x13CE, 0xAB9E, },
+	{ 0x13CF, 0xAB9F, },
+	{ 0x13D0, 0xABA0, },
+	{ 0x13D1, 0xABA1, },
+	{ 0x13D2, 0xABA2, },
+	{ 0x13D3, 0xABA3, },
+	{ 0x13D4, 0xABA4, },
+	{ 0x13D5, 0xABA5, },
+	{ 0x13D6, 0xABA6, },
+	{ 0x13D7, 0xABA7, },
+	{ 0x13D8, 0xABA8, },
+	{ 0x13D9, 0xABA9, },
+	{ 0x13DA, 0xABAA, },
+	{ 0x13DB, 0xABAB, },
+	{ 0x13DC, 0xABAC, },
+	{ 0x13DD, 0xABAD, },
+	{ 0x13DE, 0xABAE, },
+	{ 0x13DF, 0xABAF, },
+	{ 0x13E0, 0xABB0, },
+	{ 0x13E1, 0xABB1, },
+	{ 0x13E2, 0xABB2, },
+	{ 0x13E3, 0xABB3, },
+	{ 0x13E4, 0xABB4, },
+	{ 0x13E5, 0xABB5, },
+	{ 0x13E6, 0xABB6, },
+	{ 0x13E7, 0xABB7, },
+	{ 0x13E8, 0xABB8, },
+	{ 0x13E9, 0xABB9, },
+	{ 0x13EA, 0xABBA, },
+	{ 0x13EB, 0xABBB, },
+	{ 0x13EC, 0xABBC, },
+	{ 0x13ED, 0xABBD, },
+	{ 0x13EE, 0xABBE, },
+	{ 0x13EF, 0xABBF, },
+	{ 0x13F0, 0x13F8, },
+	{ 0x13F1, 0x13F9, },
+	{ 0x13F2, 0x13FA, },
+	{ 0x13F3, 0x13FB, },
+	{ 0x13F4, 0x13FC, },
+	{ 0x13F5, 0x13FD, },
+	{ 0x1E00, 0x1E01, },
+	{ 0x1E02, 0x1E03, },
+	{ 0x1E04, 0x1E05, },
+	{ 0x1E06, 0x1E07, },
+	{ 0x1E08, 0x1E09, },
+	{ 0x1E0A, 0x1E0B, },
+	{ 0x1E0C, 0x1E0D, },
+	{ 0x1E0E, 0x1E0F, },
+	{ 0x1E10, 0x1E11, },
+	{ 0x1E12, 0x1E13, },
+	{ 0x1E14, 0x1E15, },
+	{ 0x1E16, 0x1E17, },
+	{ 0x1E18, 0x1E19, },
+	{ 0x1E1A, 0x1E1B, },
+	{ 0x1E1C, 0x1E1D, },
+	{ 0x1E1E, 0x1E1F, },
+	{ 0x1E20, 0x1E21, },
+	{ 0x1E22, 0x1E23, },
+	{ 0x1E24, 0x1E25, },
+	{ 0x1E26, 0x1E27, },
+	{ 0x1E28, 0x1E29, },
+	{ 0x1E2A, 0x1E2B, },
+	{ 0x1E2C, 0x1E2D, },
+	{ 0x1E2E, 0x1E2F, },
+	{ 0x1E30, 0x1E31, },
+	{ 0x1E32, 0x1E33, },
+	{ 0x1E34, 0x1E35, },
+	{ 0x1E36, 0x1E37, },
+	{ 0x1E38, 0x1E39, },
+	{ 0x1E3A, 0x1E3B, },
+	{ 0x1E3C, 0x1E3D, },
+	{ 0x1E3E, 0x1E3F, },
+	{ 0x1E40, 0x1E41, },
+	{ 0x1E42, 0x1E43, },
+	{ 0x1E44, 0x1E45, },
+	{ 0x1E46, 0x1E47, },
+	{ 0x1E48, 0x1E49, },
+	{ 0x1E4A, 0x1E4B, },
+	{ 0x1E4C, 0x1E4D, },
+	{ 0x1E4E, 0x1E4F, },
+	{ 0x1E50, 0x1E51, },
+	{ 0x1E52, 0x1E53, },
+	{ 0x1E54, 0x1E55, },
+	{ 0x1E56, 0x1E57, },
+	{ 0x1E58, 0x1E59, },
+	{ 0x1E5A, 0x1E5B, },
+	{ 0x1E5C, 0x1E5D, },
+	{ 0x1E5E, 0x1E5F, },
+	{ 0x1E60, 0x1E61, },
+	{ 0x1E62, 0x1E63, },
+	{ 0x1E64, 0x1E65, },
+	{ 0x1E66, 0x1E67, },
+	{ 0x1E68, 0x1E69, },
+	{ 0x1E6A, 0x1E6B, },
+	{ 0x1E6C, 0x1E6D, },
+	{ 0x1E6E, 0x1E6F, },
+	{ 0x1E70, 0x1E71, },
+	{ 0x1E72, 0x1E73, },
+	{ 0x1E74, 0x1E75, },
+	{ 0x1E76, 0x1E77, },
+	{ 0x1E78, 0x1E79, },
+	{ 0x1E7A, 0x1E7B, },
+	{ 0x1E7C, 0x1E7D, },
+	{ 0x1E7E, 0x1E7F, },
+	{ 0x1E80, 0x1E81, },
+	{ 0x1E82, 0x1E83, },
+	{ 0x1E84, 0x1E85, },
+	{ 0x1E86, 0x1E87, },
+	{ 0x1E88, 0x1E89, },
+	{ 0x1E8A, 0x1E8B, },
+	{ 0x1E8C, 0x1E8D, },
+	{ 0x1E8E, 0x1E8F, },
+	{ 0x1E90, 0x1E91, },
+	{ 0x1E92, 0x1E93, },
+	{ 0x1E94, 0x1E95, },
+	{ 0x1E9E, 0x00DF, },
+	{ 0x1EA0, 0x1EA1, },
+	{ 0x1EA2, 0x1EA3, },
+	{ 0x1EA4, 0x1EA5, },
+	{ 0x1EA6, 0x1EA7, },
+	{ 0x1EA8, 0x1EA9, },
+	{ 0x1EAA, 0x1EAB, },
+	{ 0x1EAC, 0x1EAD, },
+	{ 0x1EAE, 0x1EAF, },
+	{ 0x1EB0, 0x1EB1, },
+	{ 0x1EB2, 0x1EB3, },
+	{ 0x1EB4, 0x1EB5, },
+	{ 0x1EB6, 0x1EB7, },
+	{ 0x1EB8, 0x1EB9, },
+	{ 0x1EBA, 0x1EBB, },
+	{ 0x1EBC, 0x1EBD, },
+	{ 0x1EBE, 0x1EBF, },
+	{ 0x1EC0, 0x1EC1, },
+	{ 0x1EC2, 0x1EC3, },
+	{ 0x1EC4, 0x1EC5, },
+	{ 0x1EC6, 0x1EC7, },
+	{ 0x1EC8, 0x1EC9, },
+	{ 0x1ECA, 0x1ECB, },
+	{ 0x1ECC, 0x1ECD, },
+	{ 0x1ECE, 0x1ECF, },
+	{ 0x1ED0, 0x1ED1, },
+	{ 0x1ED2, 0x1ED3, },
+	{ 0x1ED4, 0x1ED5, },
+	{ 0x1ED6, 0x1ED7, },
+	{ 0x1ED8, 0x1ED9, },
+	{ 0x1EDA, 0x1EDB, },
+	{ 0x1EDC, 0x1EDD, },
+	{ 0x1EDE, 0x1EDF, },
+	{ 0x1EE0, 0x1EE1, },
+	{ 0x1EE2, 0x1EE3, },
+	{ 0x1EE4, 0x1EE5, },
+	{ 0x1EE6, 0x1EE7, },
+	{ 0x1EE8, 0x1EE9, },
+	{ 0x1EEA, 0x1EEB, },
+	{ 0x1EEC, 0x1EED, },
+	{ 0x1EEE, 0x1EEF, },
+	{ 0x1EF0, 0x1EF1, },
+	{ 0x1EF2, 0x1EF3, },
+	{ 0x1EF4, 0x1EF5, },
+	{ 0x1EF6, 0x1EF7, },
+	{ 0x1EF8, 0x1EF9, },
+	{ 0x1EFA, 0x1EFB, },
+	{ 0x1EFC, 0x1EFD, },
+	{ 0x1EFE, 0x1EFF, },
+	{ 0x1F08, 0x1F00, },
+	{ 0x1F09, 0x1F01, },
+	{ 0x1F0A, 0x1F02, },
+	{ 0x1F0B, 0x1F03, },
+	{ 0x1F0C, 0x1F04, },
+	{ 0x1F0D, 0x1F05, },
+	{ 0x1F0E, 0x1F06, },
+	{ 0x1F0F, 0x1F07, },
+	{ 0x1F18, 0x1F10, },
+	{ 0x1F19, 0x1F11, },
+	{ 0x1F1A, 0x1F12, },
+	{ 0x1F1B, 0x1F13, },
+	{ 0x1F1C, 0x1F14, },
+	{ 0x1F1D, 0x1F15, },
+	{ 0x1F28, 0x1F20, },
+	{ 0x1F29, 0x1F21, },
+	{ 0x1F2A, 0x1F22, },
+	{ 0x1F2B, 0x1F23, },
+	{ 0x1F2C, 0x1F24, },
+	{ 0x1F2D, 0x1F25, },
+	{ 0x1F2E, 0x1F26, },
+	{ 0x1F2F, 0x1F27, },
+	{ 0x1F38, 0x1F30, },
+	{ 0x1F39, 0x1F31, },
+	{ 0x1F3A, 0x1F32, },
+	{ 0x1F3B, 0x1F33, },
+	{ 0x1F3C, 0x1F34, },
+	{ 0x1F3D, 0x1F35, },
+	{ 0x1F3E, 0x1F36, },
+	{ 0x1F3F, 0x1F37, },
+	{ 0x1F48, 0x1F40, },
+	{ 0x1F49, 0x1F41, },
+	{ 0x1F4A, 0x1F42, },
+	{ 0x1F4B, 0x1F43, },
+	{ 0x1F4C, 0x1F44, },
+	{ 0x1F4D, 0x1F45, },
+	{ 0x1F59, 0x1F51, },
+	{ 0x1F5B, 0x1F53, },
+	{ 0x1F5D, 0x1F55, },
+	{ 0x1F5F, 0x1F57, },
+	{ 0x1F68, 0x1F60, },
+	{ 0x1F69, 0x1F61, },
+	{ 0x1F6A, 0x1F62, },
+	{ 0x1F6B, 0x1F63, },
+	{ 0x1F6C, 0x1F64, },
+	{ 0x1F6D, 0x1F65, },
+	{ 0x1F6E, 0x1F66, },
+	{ 0x1F6F, 0x1F67, },
+	{ 0x1F88, 0x1F80, },
+	{ 0x1F89, 0x1F81, },
+	{ 0x1F8A, 0x1F82, },
+	{ 0x1F8B, 0x1F83, },
+	{ 0x1F8C, 0x1F84, },
+	{ 0x1F8D, 0x1F85, },
+	{ 0x1F8E, 0x1F86, },
+	{ 0x1F8F, 0x1F87, },
+	{ 0x1F98, 0x1F90, },
+	{ 0x1F99, 0x1F91, },
+	{ 0x1F9A, 0x1F92, },
+	{ 0x1F9B, 0x1F93, },
+	{ 0x1F9C, 0x1F94, },
+	{ 0x1F9D, 0x1F95, },
+	{ 0x1F9E, 0x1F96, },
+	{ 0x1F9F, 0x1F97, },
+	{ 0x1FA8, 0x1FA0, },
+	{ 0x1FA9, 0x1FA1, },
+	{ 0x1FAA, 0x1FA2, },
+	{ 0x1FAB, 0x1FA3, },
+	{ 0x1FAC, 0x1FA4, },
+	{ 0x1FAD, 0x1FA5, },
+	{ 0x1FAE, 0x1FA6, },
+	{ 0x1FAF, 0x1FA7, },
+	{ 0x1FB8, 0x1FB0, },
+	{ 0x1FB9, 0x1FB1, },
+	{ 0x1FBA, 0x1F70, },
+	{ 0x1FBB, 0x1F71, },
+	{ 0x1FBC, 0x1FB3, },
+	{ 0x1FC8, 0x1F72, },
+	{ 0x1FC9, 0x1F73, },
+	{ 0x1FCA, 0x1F74, },
+	{ 0x1FCB, 0x1F75, },
+	{ 0x1FCC, 0x1FC3, },
+	{ 0x1FD8, 0x1FD0, },
+	{ 0x1FD9, 0x1FD1, },
+	{ 0x1FDA, 0x1F76, },
+	{ 0x1FDB, 0x1F77, },
+	{ 0x1FE8, 0x1FE0, },
+	{ 0x1FE9, 0x1FE1, },
+	{ 0x1FEA, 0x1F7A, },
+	{ 0x1FEB, 0x1F7B, },
+	{ 0x1FEC, 0x1FE5, },
+	{ 0x1FF8, 0x1F78, },
+	{ 0x1FF9, 0x1F79, },
+	{ 0x1FFA, 0x1F7C, },
+	{ 0x1FFB, 0x1F7D, },
+	{ 0x1FFC, 0x1FF3, },
+	{ 0x2126, 0x03C9, },
+	{ 0x212A, 0x006B, },
+	{ 0x212B, 0x00E5, },
+	{ 0x2132, 0x214E, },
+	{ 0x2160, 0x2170, },
+	{ 0x2161, 0x2171, },
+	{ 0x2162, 0x2172, },
+	{ 0x2163, 0x2173, },
+	{ 0x2164, 0x2174, },
+	{ 0x2165, 0x2175, },
+	{ 0x2166, 0x2176, },
+	{ 0x2167, 0x2177, },
+	{ 0x2168, 0x2178, },
+	{ 0x2169, 0x2179, },
+	{ 0x216A, 0x217A, },
+	{ 0x216B, 0x217B, },
+	{ 0x216C, 0x217C, },
+	{ 0x216D, 0x217D, },
+	{ 0x216E, 0x217E, },
+	{ 0x216F, 0x217F, },
+	{ 0x2183, 0x2184, },
+	{ 0x24B6, 0x24D0, },
+	{ 0x24B7, 0x24D1, },
+	{ 0x24B8, 0x24D2, },
+	{ 0x24B9, 0x24D3, },
+	{ 0x24BA, 0x24D4, },
+	{ 0x24BB, 0x24D5, },
+	{ 0x24BC, 0x24D6, },
+	{ 0x24BD, 0x24D7, },
+	{ 0x24BE, 0x24D8, },
+	{ 0x24BF, 0x24D9, },
+	{ 0x24C0, 0x24DA, },
+	{ 0x24C1, 0x24DB, },
+	{ 0x24C2, 0x24DC, },
+	{ 0x24C3, 0x24DD, },
+	{ 0x24C4, 0x24DE, },
+	{ 0x24C5, 0x24DF, },
+	{ 0x24C6, 0x24E0, },
+	{ 0x24C7, 0x24E1, },
+	{ 0x24C8, 0x24E2, },
+	{ 0x24C9, 0x24E3, },
+	{ 0x24CA, 0x24E4, },
+	{ 0x24CB, 0x24E5, },
+	{ 0x24CC, 0x24E6, },
+	{ 0x24CD, 0x24E7, },
+	{ 0x24CE, 0x24E8, },
+	{ 0x24CF, 0x24E9, },
+	{ 0x2C00, 0x2C30, },
+	{ 0x2C01, 0x2C31, },
+	{ 0x2C02, 0x2C32, },
+	{ 0x2C03, 0x2C33, },
+	{ 0x2C04, 0x2C34, },
+	{ 0x2C05, 0x2C35, },
+	{ 0x2C06, 0x2C36, },
+	{ 0x2C07, 0x2C37, },
+	{ 0x2C08, 0x2C38, },
+	{ 0x2C09, 0x2C39, },
+	{ 0x2C0A, 0x2C3A, },
+	{ 0x2C0B, 0x2C3B, },
+	{ 0x2C0C, 0x2C3C, },
+	{ 0x2C0D, 0x2C3D, },
+	{ 0x2C0E, 0x2C3E, },
+	{ 0x2C0F, 0x2C3F, },
+	{ 0x2C10, 0x2C40, },
+	{ 0x2C11, 0x2C41, },
+	{ 0x2C12, 0x2C42, },
+	{ 0x2C13, 0x2C43, },
+	{ 0x2C14, 0x2C44, },
+	{ 0x2C15, 0x2C45, },
+	{ 0x2C16, 0x2C46, },
+	{ 0x2C17, 0x2C47, },
+	{ 0x2C18, 0x2C48, },
+	{ 0x2C19, 0x2C49, },
+	{ 0x2C1A, 0x2C4A, },
+	{ 0x2C1B, 0x2C4B, },
+	{ 0x2C1C, 0x2C4C, },
+	{ 0x2C1D, 0x2C4D, },
+	{ 0x2C1E, 0x2C4E, },
+	{ 0x2C1F, 0x2C4F, },
+	{ 0x2C20, 0x2C50, },
+	{ 0x2C21, 0x2C51, },
+	{ 0x2C22, 0x2C52, },
+	{ 0x2C23, 0x2C53, },
+	{ 0x2C24, 0x2C54, },
+	{ 0x2C25, 0x2C55, },
+	{ 0x2C26, 0x2C56, },
+	{ 0x2C27, 0x2C57, },
+	{ 0x2C28, 0x2C58, },
+	{ 0x2C29, 0x2C59, },
+	{ 0x2C2A, 0x2C5A, },
+	{ 0x2C2B, 0x2C5B, },
+	{ 0x2C2C, 0x2C5C, },
+	{ 0x2C2D, 0x2C5D, },
+	{ 0x2C2E, 0x2C5E, },
+	{ 0x2C60, 0x2C61, },
+	{ 0x2C62, 0x026B, },
+	{ 0x2C63, 0x1D7D, },
+	{ 0x2C64, 0x027D, },
+	{ 0x2C67, 0x2C68, },
+	{ 0x2C69, 0x2C6A, },
+	{ 0x2C6B, 0x2C6C, },
+	{ 0x2C6D, 0x0251, },
+	{ 0x2C6E, 0x0271, },
+	{ 0x2C6F, 0x0250, },
+	{ 0x2C70, 0x0252, },
+	{ 0x2C72, 0x2C73, },
+	{ 0x2C75, 0x2C76, },
+	{ 0x2C7E, 0x023F, },
+	{ 0x2C7F, 0x0240, },
+	{ 0x2C80, 0x2C81, },
+	{ 0x2C82, 0x2C83, },
+	{ 0x2C84, 0x2C85, },
+	{ 0x2C86, 0x2C87, },
+	{ 0x2C88, 0x2C89, },
+	{ 0x2C8A, 0x2C8B, },
+	{ 0x2C8C, 0x2C8D, },
+	{ 0x2C8E, 0x2C8F, },
+	{ 0x2C90, 0x2C91, },
+	{ 0x2C92, 0x2C93, },
+	{ 0x2C94, 0x2C95, },
+	{ 0x2C96, 0x2C97, },
+	{ 0x2C98, 0x2C99, },
+	{ 0x2C9A, 0x2C9B, },
+	{ 0x2C9C, 0x2C9D, },
+	{ 0x2C9E, 0x2C9F, },
+	{ 0x2CA0, 0x2CA1, },
+	{ 0x2CA2, 0x2CA3, },
+	{ 0x2CA4, 0x2CA5, },
+	{ 0x2CA6, 0x2CA7, },
+	{ 0x2CA8, 0x2CA9, },
+	{ 0x2CAA, 0x2CAB, },
+	{ 0x2CAC, 0x2CAD, },
+	{ 0x2CAE, 0x2CAF, },
+	{ 0x2CB0, 0x2CB1, },
+	{ 0x2CB2, 0x2CB3, },
+	{ 0x2CB4, 0x2CB5, },
+	{ 0x2CB6, 0x2CB7, },
+	{ 0x2CB8, 0x2CB9, },
+	{ 0x2CBA, 0x2CBB, },
+	{ 0x2CBC, 0x2CBD, },
+	{ 0x2CBE, 0x2CBF, },
+	{ 0x2CC0, 0x2CC1, },
+	{ 0x2CC2, 0x2CC3, },
+	{ 0x2CC4, 0x2CC5, },
+	{ 0x2CC6, 0x2CC7, },
+	{ 0x2CC8, 0x2CC9, },
+	{ 0x2CCA, 0x2CCB, },
+	{ 0x2CCC, 0x2CCD, },
+	{ 0x2CCE, 0x2CCF, },
+	{ 0x2CD0, 0x2CD1, },
+	{ 0x2CD2, 0x2CD3, },
+	{ 0x2CD4, 0x2CD5, },
+	{ 0x2CD6, 0x2CD7, },
+	{ 0x2CD8, 0x2CD9, },
+	{ 0x2CDA, 0x2CDB, },
+	{ 0x2CDC, 0x2CDD, },
+	{ 0x2CDE, 0x2CDF, },
+	{ 0x2CE0, 0x2CE1, },
+	{ 0x2CE2, 0x2CE3, },
+	{ 0x2CEB, 0x2CEC, },
+	{ 0x2CED, 0x2CEE, },
+	{ 0x2CF2, 0x2CF3, },
+	{ 0xA640, 0xA641, },
+	{ 0xA642, 0xA643, },
+	{ 0xA644, 0xA645, },
+	{ 0xA646, 0xA647, },
+	{ 0xA648, 0xA649, },
+	{ 0xA64A, 0xA64B, },
+	{ 0xA64C, 0xA64D, },
+	{ 0xA64E, 0xA64F, },
+	{ 0xA650, 0xA651, },
+	{ 0xA652, 0xA653, },
+	{ 0xA654, 0xA655, },
+	{ 0xA656, 0xA657, },
+	{ 0xA658, 0xA659, },
+	{ 0xA65A, 0xA65B, },
+	{ 0xA65C, 0xA65D, },
+	{ 0xA65E, 0xA65F, },
+	{ 0xA660, 0xA661, },
+	{ 0xA662, 0xA663, },
+	{ 0xA664, 0xA665, },
+	{ 0xA666, 0xA667, },
+	{ 0xA668, 0xA669, },
+	{ 0xA66A, 0xA66B, },
+	{ 0xA66C, 0xA66D, },
+	{ 0xA680, 0xA681, },
+	{ 0xA682, 0xA683, },
+	{ 0xA684, 0xA685, },
+	{ 0xA686, 0xA687, },
+	{ 0xA688, 0xA689, },
+	{ 0xA68A, 0xA68B, },
+	{ 0xA68C, 0xA68D, },
+	{ 0xA68E, 0xA68F, },
+	{ 0xA690, 0xA691, },
+	{ 0xA692, 0xA693, },
+	{ 0xA694, 0xA695, },
+	{ 0xA696, 0xA697, },
+	{ 0xA698, 0xA699, },
+	{ 0xA69A, 0xA69B, },
+	{ 0xA722, 0xA723, },
+	{ 0xA724, 0xA725, },
+	{ 0xA726, 0xA727, },
+	{ 0xA728, 0xA729, },
+	{ 0xA72A, 0xA72B, },
+	{ 0xA72C, 0xA72D, },
+	{ 0xA72E, 0xA72F, },
+	{ 0xA732, 0xA733, },
+	{ 0xA734, 0xA735, },
+	{ 0xA736, 0xA737, },
+	{ 0xA738, 0xA739, },
+	{ 0xA73A, 0xA73B, },
+	{ 0xA73C, 0xA73D, },
+	{ 0xA73E, 0xA73F, },
+	{ 0xA740, 0xA741, },
+	{ 0xA742, 0xA743, },
+	{ 0xA744, 0xA745, },
+	{ 0xA746, 0xA747, },
+	{ 0xA748, 0xA749, },
+	{ 0xA74A, 0xA74B, },
+	{ 0xA74C, 0xA74D, },
+	{ 0xA74E, 0xA74F, },
+	{ 0xA750, 0xA751, },
+	{ 0xA752, 0xA753, },
+	{ 0xA754, 0xA755, },
+	{ 0xA756, 0xA757, },
+	{ 0xA758, 0xA759, },
+	{ 0xA75A, 0xA75B, },
+	{ 0xA75C, 0xA75D, },
+	{ 0xA75E, 0xA75F, },
+	{ 0xA760, 0xA761, },
+	{ 0xA762, 0xA763, },
+	{ 0xA764, 0xA765, },
+	{ 0xA766, 0xA767, },
+	{ 0xA768, 0xA769, },
+	{ 0xA76A, 0xA76B, },
+	{ 0xA76C, 0xA76D, },
+	{ 0xA76E, 0xA76F, },
+	{ 0xA779, 0xA77A, },
+	{ 0xA77B, 0xA77C, },
+	{ 0xA77D, 0x1D79, },
+	{ 0xA77E, 0xA77F, },
+	{ 0xA780, 0xA781, },
+	{ 0xA782, 0xA783, },
+	{ 0xA784, 0xA785, },
+	{ 0xA786, 0xA787, },
+	{ 0xA78B, 0xA78C, },
+	{ 0xA78D, 0x0265, },
+	{ 0xA790, 0xA791, },
+	{ 0xA792, 0xA793, },
+	{ 0xA796, 0xA797, },
+	{ 0xA798, 0xA799, },
+	{ 0xA79A, 0xA79B, },
+	{ 0xA79C, 0xA79D, },
+	{ 0xA79E, 0xA79F, },
+	{ 0xA7A0, 0xA7A1, },
+	{ 0xA7A2, 0xA7A3, },
+	{ 0xA7A4, 0xA7A5, },
+	{ 0xA7A6, 0xA7A7, },
+	{ 0xA7A8, 0xA7A9, },
+	{ 0xA7AA, 0x0266, },
+	{ 0xA7AB, 0x025C, },
+	{ 0xA7AC, 0x0261, },
+	{ 0xA7AD, 0x026C, },
+	{ 0xA7AE, 0x026A, },
+	{ 0xA7B0, 0x029E, },
+	{ 0xA7B1, 0x0287, },
+	{ 0xA7B2, 0x029D, },
+	{ 0xA7B3, 0xAB53, },
+	{ 0xA7B4, 0xA7B5, },
+	{ 0xA7B6, 0xA7B7, },
+	{ 0xFF21, 0xFF41, },
+	{ 0xFF22, 0xFF42, },
+	{ 0xFF23, 0xFF43, },
+	{ 0xFF24, 0xFF44, },
+	{ 0xFF25, 0xFF45, },
+	{ 0xFF26, 0xFF46, },
+	{ 0xFF27, 0xFF47, },
+	{ 0xFF28, 0xFF48, },
+	{ 0xFF29, 0xFF49, },
+	{ 0xFF2A, 0xFF4A, },
+	{ 0xFF2B, 0xFF4B, },
+	{ 0xFF2C, 0xFF4C, },
+	{ 0xFF2D, 0xFF4D, },
+	{ 0xFF2E, 0xFF4E, },
+	{ 0xFF2F, 0xFF4F, },
+	{ 0xFF30, 0xFF50, },
+	{ 0xFF31, 0xFF51, },
+	{ 0xFF32, 0xFF52, },
+	{ 0xFF33, 0xFF53, },
+	{ 0xFF34, 0xFF54, },
+	{ 0xFF35, 0xFF55, },
+	{ 0xFF36, 0xFF56, },
+	{ 0xFF37, 0xFF57, },
+	{ 0xFF38, 0xFF58, },
+	{ 0xFF39, 0xFF59, },
+	{ 0xFF3A, 0xFF5A, },
+	{ 0x10400, 0x10428, },
+	{ 0x10401, 0x10429, },
+	{ 0x10402, 0x1042A, },
+	{ 0x10403, 0x1042B, },
+	{ 0x10404, 0x1042C, },
+	{ 0x10405, 0x1042D, },
+	{ 0x10406, 0x1042E, },
+	{ 0x10407, 0x1042F, },
+	{ 0x10408, 0x10430, },
+	{ 0x10409, 0x10431, },
+	{ 0x1040A, 0x10432, },
+	{ 0x1040B, 0x10433, },
+	{ 0x1040C, 0x10434, },
+	{ 0x1040D, 0x10435, },
+	{ 0x1040E, 0x10436, },
+	{ 0x1040F, 0x10437, },
+	{ 0x10410, 0x10438, },
+	{ 0x10411, 0x10439, },
+	{ 0x10412, 0x1043A, },
+	{ 0x10413, 0x1043B, },
+	{ 0x10414, 0x1043C, },
+	{ 0x10415, 0x1043D, },
+	{ 0x10416, 0x1043E, },
+	{ 0x10417, 0x1043F, },
+	{ 0x10418, 0x10440, },
+	{ 0x10419, 0x10441, },
+	{ 0x1041A, 0x10442, },
+	{ 0x1041B, 0x10443, },
+	{ 0x1041C, 0x10444, },
+	{ 0x1041D, 0x10445, },
+	{ 0x1041E, 0x10446, },
+	{ 0x1041F, 0x10447, },
+	{ 0x10420, 0x10448, },
+	{ 0x10421, 0x10449, },
+	{ 0x10422, 0x1044A, },
+	{ 0x10423, 0x1044B, },
+	{ 0x10424, 0x1044C, },
+	{ 0x10425, 0x1044D, },
+	{ 0x10426, 0x1044E, },
+	{ 0x10427, 0x1044F, },
+	{ 0x104B0, 0x104D8, },
+	{ 0x104B1, 0x104D9, },
+	{ 0x104B2, 0x104DA, },
+	{ 0x104B3, 0x104DB, },
+	{ 0x104B4, 0x104DC, },
+	{ 0x104B5, 0x104DD, },
+	{ 0x104B6, 0x104DE, },
+	{ 0x104B7, 0x104DF, },
+	{ 0x104B8, 0x104E0, },
+	{ 0x104B9, 0x104E1, },
+	{ 0x104BA, 0x104E2, },
+	{ 0x104BB, 0x104E3, },
+	{ 0x104BC, 0x104E4, },
+	{ 0x104BD, 0x104E5, },
+	{ 0x104BE, 0x104E6, },
+	{ 0x104BF, 0x104E7, },
+	{ 0x104C0, 0x104E8, },
+	{ 0x104C1, 0x104E9, },
+	{ 0x104C2, 0x104EA, },
+	{ 0x104C3, 0x104EB, },
+	{ 0x104C4, 0x104EC, },
+	{ 0x104C5, 0x104ED, },
+	{ 0x104C6, 0x104EE, },
+	{ 0x104C7, 0x104EF, },
+	{ 0x104C8, 0x104F0, },
+	{ 0x104C9, 0x104F1, },
+	{ 0x104CA, 0x104F2, },
+	{ 0x104CB, 0x104F3, },
+	{ 0x104CC, 0x104F4, },
+	{ 0x104CD, 0x104F5, },
+	{ 0x104CE, 0x104F6, },
+	{ 0x104CF, 0x104F7, },
+	{ 0x104D0, 0x104F8, },
+	{ 0x104D1, 0x104F9, },
+	{ 0x104D2, 0x104FA, },
+	{ 0x104D3, 0x104FB, },
+	{ 0x10C80, 0x10CC0, },
+	{ 0x10C81, 0x10CC1, },
+	{ 0x10C82, 0x10CC2, },
+	{ 0x10C83, 0x10CC3, },
+	{ 0x10C84, 0x10CC4, },
+	{ 0x10C85, 0x10CC5, },
+	{ 0x10C86, 0x10CC6, },
+	{ 0x10C87, 0x10CC7, },
+	{ 0x10C88, 0x10CC8, },
+	{ 0x10C89, 0x10CC9, },
+	{ 0x10C8A, 0x10CCA, },
+	{ 0x10C8B, 0x10CCB, },
+	{ 0x10C8C, 0x10CCC, },
+	{ 0x10C8D, 0x10CCD, },
+	{ 0x10C8E, 0x10CCE, },
+	{ 0x10C8F, 0x10CCF, },
+	{ 0x10C90, 0x10CD0, },
+	{ 0x10C91, 0x10CD1, },
+	{ 0x10C92, 0x10CD2, },
+	{ 0x10C93, 0x10CD3, },
+	{ 0x10C94, 0x10CD4, },
+	{ 0x10C95, 0x10CD5, },
+	{ 0x10C96, 0x10CD6, },
+	{ 0x10C97, 0x10CD7, },
+	{ 0x10C98, 0x10CD8, },
+	{ 0x10C99, 0x10CD9, },
+	{ 0x10C9A, 0x10CDA, },
+	{ 0x10C9B, 0x10CDB, },
+	{ 0x10C9C, 0x10CDC, },
+	{ 0x10C9D, 0x10CDD, },
+	{ 0x10C9E, 0x10CDE, },
+	{ 0x10C9F, 0x10CDF, },
+	{ 0x10CA0, 0x10CE0, },
+	{ 0x10CA1, 0x10CE1, },
+	{ 0x10CA2, 0x10CE2, },
+	{ 0x10CA3, 0x10CE3, },
+	{ 0x10CA4, 0x10CE4, },
+	{ 0x10CA5, 0x10CE5, },
+	{ 0x10CA6, 0x10CE6, },
+	{ 0x10CA7, 0x10CE7, },
+	{ 0x10CA8, 0x10CE8, },
+	{ 0x10CA9, 0x10CE9, },
+	{ 0x10CAA, 0x10CEA, },
+	{ 0x10CAB, 0x10CEB, },
+	{ 0x10CAC, 0x10CEC, },
+	{ 0x10CAD, 0x10CED, },
+	{ 0x10CAE, 0x10CEE, },
+	{ 0x10CAF, 0x10CEF, },
+	{ 0x10CB0, 0x10CF0, },
+	{ 0x10CB1, 0x10CF1, },
+	{ 0x10CB2, 0x10CF2, },
+	{ 0x118A0, 0x118C0, },
+	{ 0x118A1, 0x118C1, },
+	{ 0x118A2, 0x118C2, },
+	{ 0x118A3, 0x118C3, },
+	{ 0x118A4, 0x118C4, },
+	{ 0x118A5, 0x118C5, },
+	{ 0x118A6, 0x118C6, },
+	{ 0x118A7, 0x118C7, },
+	{ 0x118A8, 0x118C8, },
+	{ 0x118A9, 0x118C9, },
+	{ 0x118AA, 0x118CA, },
+	{ 0x118AB, 0x118CB, },
+	{ 0x118AC, 0x118CC, },
+	{ 0x118AD, 0x118CD, },
+	{ 0x118AE, 0x118CE, },
+	{ 0x118AF, 0x118CF, },
+	{ 0x118B0, 0x118D0, },
+	{ 0x118B1, 0x118D1, },
+	{ 0x118B2, 0x118D2, },
+	{ 0x118B3, 0x118D3, },
+	{ 0x118B4, 0x118D4, },
+	{ 0x118B5, 0x118D5, },
+	{ 0x118B6, 0x118D6, },
+	{ 0x118B7, 0x118D7, },
+	{ 0x118B8, 0x118D8, },
+	{ 0x118B9, 0x118D9, },
+	{ 0x118BA, 0x118DA, },
+	{ 0x118BB, 0x118DB, },
+	{ 0x118BC, 0x118DC, },
+	{ 0x118BD, 0x118DD, },
+	{ 0x118BE, 0x118DE, },
+	{ 0x118BF, 0x118DF, },
+	{ 0x1E900, 0x1E922, },
+	{ 0x1E901, 0x1E923, },
+	{ 0x1E902, 0x1E924, },
+	{ 0x1E903, 0x1E925, },
+	{ 0x1E904, 0x1E926, },
+	{ 0x1E905, 0x1E927, },
+	{ 0x1E906, 0x1E928, },
+	{ 0x1E907, 0x1E929, },
+	{ 0x1E908, 0x1E92A, },
+	{ 0x1E909, 0x1E92B, },
+	{ 0x1E90A, 0x1E92C, },
+	{ 0x1E90B, 0x1E92D, },
+	{ 0x1E90C, 0x1E92E, },
+	{ 0x1E90D, 0x1E92F, },
+	{ 0x1E90E, 0x1E930, },
+	{ 0x1E90F, 0x1E931, },
+	{ 0x1E910, 0x1E932, },
+	{ 0x1E911, 0x1E933, },
+	{ 0x1E912, 0x1E934, },
+	{ 0x1E913, 0x1E935, },
+	{ 0x1E914, 0x1E936, },
+	{ 0x1E915, 0x1E937, },
+	{ 0x1E916, 0x1E938, },
+	{ 0x1E917, 0x1E939, },
+	{ 0x1E918, 0x1E93A, },
+	{ 0x1E919, 0x1E93B, },
+	{ 0x1E91A, 0x1E93C, },
+	{ 0x1E91B, 0x1E93D, },
+	{ 0x1E91C, 0x1E93E, },
+	{ 0x1E91D, 0x1E93F, },
+	{ 0x1E91E, 0x1E940, },
+	{ 0x1E91F, 0x1E941, },
+	{ 0x1E920, 0x1E942, },
+	{ 0x1E921, 0x1E943, },
 };
 
-#define UTF8_CONVERSIONS (sizeof(UTF8_lower_upper) / sizeof(UTF8_lower_upper[0]))
+static BAT *UTF8_toUpperFrom = NULL, *UTF8_toUpperTo = NULL,
+	*UTF8_toLowerFrom = NULL, *UTF8_toLowerTo = NULL;
+
+#ifndef NDEBUG
+static void
+UTF8_assert(const char *s)
+{
+	int c;
 
-static BAT *UTF8_upperBat = NULL, *UTF8_lowerBat = NULL;
+	if (s == NULL)
+		return;
+	if (*s == '\200' && s[1] == '\0')
+		return;					/* str_nil */
+	while ((c = *s++) != '\0') {
+		if ((c & 0x80) == 0)
+			continue;
+		if ((*s++ & 0xC0) != 0x80)
+			assert(0);
+		if ((c & 0xE0) == 0xC0)
+			continue;
+		if ((*s++ & 0xC0) != 0x80)
+			assert(0);
+		if ((c & 0xF0) == 0xE0)
+			continue;
+		if ((*s++ & 0xC0) != 0x80)
+			assert(0);
+		if ((c & 0xF8) == 0xF0)
+			continue;
+		assert(0);
+	}
+}
+#else
+#define UTF8_assert(s)		((void) 0)
+#endif
 
 str
 strPrelude(void *ret)
 {
 	(void) ret;
-	if (UTF8_upperBat == NULL) {
-		int i = UTF8_CONVERSIONS;
+	if (UTF8_toUpperFrom == NULL) {
+		size_t i;
 
-		UTF8_upperBat = COLnew(0, TYPE_int, UTF8_CONVERSIONS, TRANSIENT);
-		UTF8_lowerBat = COLnew(0, TYPE_int, UTF8_CONVERSIONS, TRANSIENT);
-		if (UTF8_upperBat == NULL || UTF8_lowerBat == NULL)
+		UTF8_toUpperFrom = COLnew(0, TYPE_int, 1500, TRANSIENT);
+		UTF8_toUpperTo = COLnew(0, TYPE_int, 1500, TRANSIENT);
+		UTF8_toLowerFrom = COLnew(0, TYPE_int, 1500, TRANSIENT);
+		UTF8_toLowerTo = COLnew(0, TYPE_int, 1500, TRANSIENT);
+		if (UTF8_toUpperFrom == NULL || UTF8_toUpperTo == NULL ||
+			UTF8_toLowerFrom == NULL || UTF8_toLowerTo == NULL) {
 			goto bailout;
+		}
 
-		while (--i >= 0) {
-			if (BUNappend(UTF8_upperBat, &UTF8_lower_upper[i].upper, FALSE) != GDK_SUCCEED ||
-				BUNappend(UTF8_lowerBat, &UTF8_lower_upper[i].lower, FALSE) != GDK_SUCCEED) {
+		for (i = 0; i < sizeof(UTF8_toUpper) / sizeof(UTF8_toUpper[0]); i++) {
+			if (BUNappend(UTF8_toUpperFrom, &UTF8_toUpper[i].from, FALSE) != GDK_SUCCEED ||
+				BUNappend(UTF8_toUpperTo, &UTF8_toUpper[i].to, FALSE) != GDK_SUCCEED)
 				goto bailout;
-			}
 		}
-		if (BBPrename(UTF8_upperBat->batCacheid, "monet_unicode_toupper") != 0 ||
-			BBPrename(UTF8_lowerBat->batCacheid, "monet_unicode_tolower") != 0) {
+
+		for (i = 0; i < sizeof(UTF8_toLower) / sizeof(UTF8_toLower[0]); i++) {
+			if (BUNappend(UTF8_toLowerFrom, &UTF8_toLower[i].from, FALSE) != GDK_SUCCEED ||
+				BUNappend(UTF8_toLowerTo, &UTF8_toLower[i].to, FALSE) != GDK_SUCCEED)
+				goto bailout;
+		}
+
+		if (BBPrename(UTF8_toUpperFrom->batCacheid, "monet_unicode_upper_from") != 0 ||
+			BBPrename(UTF8_toUpperTo->batCacheid, "monet_unicode_upper_to") != 0 ||
+			BBPrename(UTF8_toLowerFrom->batCacheid, "monet_unicode_lower_from") != 0 ||
+			BBPrename(UTF8_toLowerTo->batCacheid, "monet_unicode_lower_to") != 0) {
 			goto bailout;
 		}
 	}
 	return MAL_SUCCEED;
 
   bailout:
-	BBPreclaim(UTF8_upperBat);
-	BBPreclaim(UTF8_lowerBat);
-	UTF8_upperBat = NULL;
-	UTF8_lowerBat = NULL;
+	BBPreclaim(UTF8_toUpperFrom);
+	BBPreclaim(UTF8_toUpperTo);
+	BBPreclaim(UTF8_toLowerFrom);
+	BBPreclaim(UTF8_toLowerTo);
+	UTF8_toUpperFrom = NULL;
+	UTF8_toUpperTo = NULL;
+	UTF8_toLowerFrom = NULL;
+	UTF8_toLowerTo = NULL;
 	throw(MAL, "str.prelude", GDK_EXCEPTION);
 }
 
@@ -1124,95 +2816,71 @@ str
 strEpilogue(void *ret)
 {
 	(void) ret;
-	if (UTF8_upperBat)
-		BBPunfix(UTF8_upperBat->batCacheid);
-	if (UTF8_lowerBat)
-		BBPunfix(UTF8_lowerBat->batCacheid);
-	UTF8_upperBat = UTF8_lowerBat = NULL;
+	if (UTF8_toUpperFrom)
+		BBPunfix(UTF8_toUpperFrom->batCacheid);
+	if (UTF8_toUpperTo)
+		BBPunfix(UTF8_toUpperTo->batCacheid);
+	if (UTF8_toLowerFrom)
+		BBPunfix(UTF8_toLowerFrom->batCacheid);
+	if (UTF8_toLowerTo)
+		BBPunfix(UTF8_toLowerTo->batCacheid);
+	UTF8_toUpperFrom = NULL;
+	UTF8_toUpperTo = NULL;
+	UTF8_toLowerFrom = NULL;
+	UTF8_toLowerTo = NULL;
 	return MAL_SUCCEED;
 }
 
-/* Get the last char in (X2), and #bytes it takes, but do not decrease the pos in (X2)
- * The ELSE IF conditions are computed by comparing the left most byte with the
- * (mask-bits - 1). The '-1' is to use '>' i.s.o. '>='.
- * See gdk_atoms.c for UTF-8 encoding, especially, definitions of the mask-bits */
+/* Get the last char in (X2), and #bytes it takes, but do not decrease
+ * the pos in (X2).  See gdk_atoms.c for UTF-8 encoding */
 #define UTF8_LASTCHAR(X1, SZ, X2, SZ2)				\
 	do {											\
-		if (*((X2)+SZ2-1) < 0x80) {					\
-			(X1) = *((X2)+SZ2-1);					\
+		if (((X2)[SZ2-1] & 0x80) == 0) {			\
+			(X1) = (X2)[SZ2-1];						\
 			(SZ) = 1;								\
-		} else if (*((X2)+SZ2-2) > 0xBF) {			\
-			(X1)  = (*((X2)+SZ2-2) & 0x1F) << 6;	\
-			(X1) |= (*((X2)+SZ2-1) & 0x3F);			\
+		} else if (((X2)[SZ2-2] & 0xE0) == 0xC0) {	\
+			(X1)  = ((X2)[SZ2-2] & 0x1F) << 6;		\
+			(X1) |= ((X2)[SZ2-1] & 0x3F);			\
 			(SZ) = 2;								\
-		} else if (*((X2)+SZ2-3) > 0xDF) {			\
-			(X1)  = (*((X2)+SZ2-3) & 0x0F) << 12;	\
-			(X1) |= (*((X2)+SZ2-2) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+SZ2-1) & 0x3F);			\
+		} else if (((X2)[SZ2-3] & 0xF0) == 0xE0) {	\
+			(X1)  = ((X2)[SZ2-3] & 0x0F) << 12;		\
+			(X1) |= ((X2)[SZ2-2] & 0x3F) << 6;		\
+			(X1) |= ((X2)[SZ2-1] & 0x3F);			\
 			(SZ) = 3;								\
-		} else if (*((X2)+SZ2-4) > 0xEF) {			\
-			(X1)  = (*((X2)+SZ2-4) & 0x07) << 18;	\
-			(X1) |= (*((X2)+SZ2-3) & 0x3F) << 12;	\
-			(X1) |= (*((X2)+SZ2-2) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+SZ2-1) & 0x3F);			\
+		} else if (((X2)[SZ2-4] & 0xF8) == 0xF0) {	\
+			(X1)  = ((X2)[SZ2-4] & 0x07) << 18;		\
+			(X1) |= ((X2)[SZ2-3] & 0x3F) << 12;		\
+			(X1) |= ((X2)[SZ2-2] & 0x3F) << 6;		\
+			(X1) |= ((X2)[SZ2-1] & 0x3F);			\
 			(SZ) = 4;								\
-		} else if (*((X2)+SZ2-5) > 0xF7) {			\
-			(X1)  = (*((X2)+SZ2-5) & 0x03) << 24;	\
-			(X1) |= (*((X2)+SZ2-4) & 0x3F) << 18;	\
-			(X1) |= (*((X2)+SZ2-3) & 0x3F) << 12;	\
-			(X1) |= (*((X2)+SZ2-2) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+SZ2-1) & 0x3F);			\
-			(SZ) = 5;								\
-		} else if (*((X2)+SZ2-6) > 0xFB) {			\
-			(X1)  = (*((X2)+SZ2-6) & 0x01) << 30;	\
-			(X1) |= (*((X2)+SZ2-5) & 0x3F) << 24;	\
-			(X1) |= (*((X2)+SZ2-4) & 0x3F) << 18;	\
-			(X1) |= (*((X2)+SZ2-3) & 0x3F) << 12;	\
-			(X1) |= (*((X2)+SZ2-2) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+SZ2-1) & 0x3F);			\
-			(SZ) = 6;								\
 		} else {									\
 			(X1) = int_nil;							\
 			(SZ) = 0;								\
 		}											\
 	} while (0)
 
-/* Get the first char in (X2), and #bytes it takes, but do not increase the pos in (X2) */
+/* Get the first char in (X2), and #bytes it takes, but do not
+ * increase the pos in (X2) */
 #define UTF8_NEXTCHAR(X1, SZ, X2)				\
 	do {										\
-		if (*(X2) < 0x80) {						\
-			(X1) = *(X2);						\
+		if (((X2)[0] & 0x80) == 0) {			\
+			(X1) = (X2)[0];						\
 			(SZ) = 1;							\
-		} else if (*(X2) < 0xE0) {				\
-			(X1)  = ( *(X2)   & 0x1F) << 6;		\
-			(X1) |= (*((X2)+1) & 0x3F);			\
+		} else if (((X2)[0] & 0xE0) == 0xC0) {	\
+			(X1)  = ((X2)[0] & 0x1F) << 6;		\
+			(X1) |= ((X2)[1] & 0x3F);			\
 			(SZ) = 2;							\
-		} else if (*(X2) < 0xF0) {				\
-			(X1)  = ( *(X2)   & 0x0F) << 12;	\
-			(X1) |= (*((X2)+1) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+2) & 0x3F);			\
+		} else if (((X2)[0] & 0xF0) == 0xE0) {	\
+			(X1)  = ((X2)[0] & 0x0F) << 12;		\
+			(X1) |= ((X2)[1] & 0x3F) << 6;		\
+			(X1) |= ((X2)[2] & 0x3F);			\
 			(SZ) = 3;							\
-		} else if (*(X2) < 0xF8) {				\
-			(X1)  = ( *(X2)   & 0x07) << 18;	\
-			(X1) |= (*((X2)+1) & 0x3F) << 12;	\
-			(X1) |= (*((X2)+2) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+3) & 0x3F);			\
+		} else if (((X2)[0] & 0xF8) == 0xF0) {	\
+			(X1)  = ((X2)[0] & 0x07) << 18;		\
+			(X1) |= ((X2)[1] & 0x3F) << 12;		\
+			(X1) |= ((X2)[2] & 0x3F) << 6;		\
+			(X1) |= ((X2)[3] & 0x3F);			\
 			(SZ) = 4;							\
-		} else if (*(X2) < 0xFC) {				\
-			(X1)  = ( *(X2)   & 0x03) << 24;	\
-			(X1) |= (*((X2)+1) & 0x3F) << 18;	\
-			(X1) |= (*((X2)+2) & 0x3F) << 12;	\
-			(X1) |= (*((X2)+3) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+4) & 0x3F);			\
-			(SZ) = 5;							\
-		} else if (*(X2) < 0xFE) {				\
-			(X1)  = ( *(X2)   & 0x01) << 30;	\
-			(X1) |= (*((X2)+1) & 0x3F) << 24;	\
-			(X1) |= (*((X2)+2) & 0x3F) << 18;	\
-			(X1) |= (*((X2)+3) & 0x3F) << 12;	\
-			(X1) |= (*((X2)+4) & 0x3F) << 6;	\
-			(X1) |= (*((X2)+5) & 0x3F);			\
-			(SZ) = 6;							\
 		} else {								\
 			(X1) = int_nil;						\
 			(SZ) = 0;							\
@@ -1222,39 +2890,24 @@ strEpilogue(void *ret)
 /* Get the first char in (X2), and #bytes it takes */
 #define UTF8_GETCHAR_SZ(X1, SZ, X2)				\
 	do {										\
-		if (*(X2) < 0x80) {						\
+		if ((*(X2) & 0x80) == 0) {				\
 			(X1) = *(X2)++;						\
 			(SZ) = 1;							\
-		} else if (*(X2) < 0xE0) {				\
+		} else if ((*(X2) & 0xE0) == 0xC0) {	\
 			(X1)  = (*(X2)++ & 0x1F) << 6;		\
 			(X1) |= (*(X2)++ & 0x3F);			\
 			(SZ) = 2;							\
-		} else if (*(X2) < 0xF0) {				\
+		} else if ((*(X2) & 0xF0) == 0xE0) {	\
 			(X1)  = (*(X2)++ & 0x0F) << 12;		\
 			(X1) |= (*(X2)++ & 0x3F) << 6;		\
 			(X1) |= (*(X2)++ & 0x3F);			\
 			(SZ) = 3;							\
-		} else if (*(X2) < 0xF8) {				\
+		} else if ((*(X2) & 0xF8) == 0xF0) {	\
 			(X1)  = (*(X2)++ & 0x07) << 18;		\
 			(X1) |= (*(X2)++ & 0x3F) << 12;		\
 			(X1) |= (*(X2)++ & 0x3F) << 6;		\
 			(X1) |= (*(X2)++ & 0x3F);			\
 			(SZ) = 4;							\
-		} else if (*(X2) < 0xFC) {				\
-			(X1)  = (*(X2)++ & 0x03) << 24;		\
-			(X1) |= (*(X2)++ & 0x3F) << 18;		\
-			(X1) |= (*(X2)++ & 0x3F) << 12;		\
-			(X1) |= (*(X2)++ & 0x3F) << 6;		\
-			(X1) |= (*(X2)++ & 0x3F);			\
-			(SZ) = 5;							\
-		} else if (*(X2) < 0xFE) {				\
-			(X1)  = (*(X2)++ & 0x01) << 30;		\
-			(X1) |= (*(X2)++ & 0x3F) << 24;		\
-			(X1) |= (*(X2)++ & 0x3F) << 18;		\
-			(X1) |= (*(X2)++ & 0x3F) << 12;		\
-			(X1) |= (*(X2)++ & 0x3F) << 6;		\
-			(X1) |= (*(X2)++ & 0x3F);			\
-			(SZ) = 6;							\
 		} else {								\
 			(X1) = int_nil;						\
 			(SZ) = 0;							\
@@ -1263,200 +2916,119 @@ strEpilogue(void *ret)
 
 #define UTF8_GETCHAR(X1, X2)					\
 	do {										\
-		if (*(X2) < 0x80) {						\
+		if ((*(X2) & 0x80) == 0) {				\
 			(X1) = *(X2)++;						\
-		} else if (*(X2) < 0xE0) {				\
+		} else if ((*(X2) & 0xE0) == 0xC0) {	\
 			(X1)  = (*(X2)++ & 0x1F) << 6;		\
 			(X1) |= (*(X2)++ & 0x3F);			\
-		} else if (*(X2) < 0xF0) {				\
+		} else if ((*(X2) & 0xF0) == 0xE0) {	\
 			(X1)  = (*(X2)++ & 0x0F) << 12;		\
 			(X1) |= (*(X2)++ & 0x3F) << 6;		\
 			(X1) |= (*(X2)++ & 0x3F);			\
-		} else if (*(X2) < 0xF8) {				\
+		} else if ((*(X2) & 0xF8) == 0xF0) {	\
 			(X1)  = (*(X2)++ & 0x07) << 18;		\
 			(X1) |= (*(X2)++ & 0x3F) << 12;		\
 			(X1) |= (*(X2)++ & 0x3F) << 6;		\
 			(X1) |= (*(X2)++ & 0x3F);			\
-		} else if (*(X2) < 0xFC) {				\
-			(X1)  = (*(X2)++ & 0x03) << 24;		\
-			(X1) |= (*(X2)++ & 0x3F) << 18;		\
-			(X1) |= (*(X2)++ & 0x3F) << 12;		\
-			(X1) |= (*(X2)++ & 0x3F) << 6;		\
-			(X1) |= (*(X2)++ & 0x3F);			\
-		} else if (*(X2) < 0xFE) {				\
-			(X1)  = (*(X2)++ & 0x01) << 30;		\
-			(X1) |= (*(X2)++ & 0x3F) << 24;		\
-			(X1) |= (*(X2)++ & 0x3F) << 18;		\
-			(X1) |= (*(X2)++ & 0x3F) << 12;		\
-			(X1) |= (*(X2)++ & 0x3F) << 6;		\
-			(X1) |= (*(X2)++ & 0x3F);			\
 		} else {								\
 			(X1) = int_nil;						\
 		}										\
 	} while (0)
 
-#define UTF8_PUTCHAR(X1,X2)												\
-	do {																\
-		if ((X1) < 0 || (SIZEOF_INT > 4 && (int) (X1) >= 0x80000000)) {	\
-			*(X2)++ = '\200';											\
-		} else if ((X1) < 0x80) {										\
-			*(X2)++ = (X1);												\
-		} else if ((X1) < 0x800) {										\
-			*(X2)++ = 0xC0 | ((X1) >> 6);								\
-			*(X2)++ = 0x80 | ((X1) & 0x3F);								\
-		} else if ((X1) < 0x10000) {									\
-			*(X2)++ = 0xE0 | ((X1) >> 12);								\
-			*(X2)++ = 0x80 | (((X1) >> 6) & 0x3F);						\
-			*(X2)++ = 0x80 | ((X1) & 0x3F);								\
-		} else if ((X1) < 0x200000) {									\
-			*(X2)++ = 0xF0 | ((X1) >> 18);								\
-			*(X2)++ = 0x80 | (((X1) >> 12) & 0x3F);						\
-			*(X2)++ = 0x80 | (((X1) >> 6) & 0x3F);						\
-			*(X2)++ = 0x80 | ((X1) & 0x3F);								\
-		} else if ((X1) < 0x4000000) {									\
-			*(X2)++ = 0xF8 | ((X1) >> 24);								\
-			*(X2)++ = 0x80 | (((X1) >> 18) & 0x3F);						\
-			*(X2)++ = 0x80 | (((X1) >> 12) & 0x3F);						\
-			*(X2)++ = 0x80 | (((X1) >> 6) & 0x3F);						\
-			*(X2)++ = 0x80 | ((X1) & 0x3F);								\
-		} else /* if ((X1) < 0x80000000) */ {							\
-			*(X2)++ = 0xFC | ((X1) >> 30);								\
-			*(X2)++ = 0x80 | (((X1) >> 24) & 0x3F);						\
-			*(X2)++ = 0x80 | (((X1) >> 18) & 0x3F);						\
-			*(X2)++ = 0x80 | (((X1) >> 12) & 0x3F);						\
-			*(X2)++ = 0x80 | (((X1) >> 6) & 0x3F);						\
-			*(X2)++ = 0x80 | ((X1) & 0x3F);								\
-		}																\
+#define UTF8_PUTCHAR(X1,X2)							\
+	do {											\
+		if ((X1) < 0 || (X1) > 0x10FFFF) {			\
+			goto illegal;							\
+		} else if ((X1) <= 0x7F) {					\
+			*(X2)++ = (X1);							\
+		} else if ((X1) <= 0x7FF) {					\
+			*(X2)++ = 0xC0 | ((X1) >> 6);			\
+			*(X2)++ = 0x80 | ((X1) & 0x3F);			\
+		} else if ((X1) <= 0xFFFF) {				\
+			*(X2)++ = 0xE0 | ((X1) >> 12);			\
+			*(X2)++ = 0x80 | (((X1) >> 6) & 0x3F);	\
+			*(X2)++ = 0x80 | ((X1) & 0x3F);			\
+		} else {									\
+			*(X2)++ = 0xF0 | ((X1) >> 18);			\
+			*(X2)++ = 0x80 | (((X1) >> 12) & 0x3F);	\
+			*(X2)++ = 0x80 | (((X1) >> 6) & 0x3F);	\
+			*(X2)++ = 0x80 | ((X1) & 0x3F);			\
+		}											\
 	} while (0)
 
-static inline int
-UTF8_strlen(const char *val)
+static inline size_t
+UTF8_strlen(const char *s)
 {
-	const unsigned char *s = (const unsigned char *) val;
-	int pos = 0;
+	size_t pos = 0;
+
+	UTF8_assert(s);
+
+	if (GDK_STRNIL(s))
+		return 1;
 
 	while (*s) {
-		int c = *s++;
-
-		pos++;
-		if (c < 0xC0)
-			continue;
-		if (*s++ < 0x80)
-			return int_nil;
-		if (c < 0xE0)
-			continue;
-		if (*s++ < 0x80)
-			return int_nil;
-		if (c < 0xF0)
-			continue;
-		if (*s++ < 0x80)
-			return int_nil;
-		if (c < 0xF8)
-			continue;
-		if (*s++ < 0x80)
-			return int_nil;
-		if (c < 0xFC)
-			continue;
-		if (*s++ < 0x80)
-			return int_nil;
+		/* just count leading bytes of encoded code points; only works
+		 * for correctly encoded UTF-8 */
+		pos += (*s++ & 0xC0) != 0x80;
 	}
 	return pos;
 }
 
 static inline int
-UTF8_strpos(const char *val, const char *end)
+UTF8_strpos(const char *s, const char *end)
 {
-	const unsigned char *s = (const unsigned char *) val;
 	int pos = 0;
 
-	if (s > (unsigned char *) end) {
+	UTF8_assert(s);
+
+	if (s > end) {
 		return -1;
 	}
-	while (s < (unsigned char *) end) {
-		int c = *s++;
-
-		pos++;
-		if (c == 0)
-			return -1;
-		if (c < 0xC0)
-			continue;
-		if (*s++ < 0x80)
-			return -1;
-		if (c < 0xE0)
-			continue;
-		if (*s++ < 0x80)
-			return -1;
-		if (c < 0xF0)
-			continue;
-		if (*s++ < 0x80)
-			return -1;
-		if (c < 0xF8)
-			continue;
-		if (*s++ < 0x80)
-			return -1;
-		if (c < 0xFC)
-			continue;
-		if (*s++ < 0x80)
-			return -1;
+	while (s < end) {
+		/* just count leading bytes of encoded code points; only works
+		 * for correctly encoded UTF-8 */
+		pos += (*s++ & 0xC0) != 0x80;
 	}
 	return pos;
 }
 
 static inline str
-UTF8_strtail(const char *val, int pos)
+UTF8_strtail(const char *s, int pos)
 {
-	const unsigned char *s = (const unsigned char *) val;
-
-	while (*s && pos-- > 0) {
-		int c = *s++;
-
-		if (c < 0xC0)
-			continue;
-		if (*s++ < 0x80)
-			return NULL;
-		if (c < 0xE0)
-			continue;
-		if (*s++ < 0x80)
-			return NULL;
-		if (c < 0xF0)
-			continue;
-		if (*s++ < 0x80)
-			return NULL;
-		if (c < 0xF8)
-			continue;
-		if (*s++ < 0x80)
-			return NULL;
-		if (c < 0xFC)
-			continue;
-		if (*s++ < 0x80)
-			return NULL;
+	UTF8_assert(s);
+	while (*s) {
+		if ((*s & 0xC0) != 0x80) {
+			if (pos <= 0)
+				break;
+			pos--;
+		}
+		s++;
 	}
 	return (str) s;
 }
 
 static str
-convertCase(BAT *from, BAT *to, str *res, const char *s, const char *malfunc)
+convertCase(BAT *from, BAT *to, str *res, const char *src, const char *malfunc)
 {
 	BATiter toi = bat_iterator(to);
 	BATiter fromi = bat_iterator(from);
-	size_t len = strlen(s);
-	unsigned char *dst;
-	const unsigned char *src = (const unsigned char *) s;
-	const unsigned char *end = (const unsigned char *) (src + len);
+	size_t len = strlen(src);
+	char *dst;
+	const char *end = src + len;
 	BUN UTF8_CONV_r;
-	int lower_to_upper = from == UTF8_lowerBat;
+	int lower_to_upper = from == UTF8_toUpperFrom;
 
-	if (strNil(s)) {
+	if (strNil(src)) {
 		*res = GDKstrdup(str_nil);
 	} else {
 		*res = GDKmalloc(len + 1);
 		if (*res != NULL) {
-			dst = (unsigned char *) *res;
+			dst = *res;
 			while (src < end) {
 				int c;
 
 				UTF8_GETCHAR(c, src);
-				if (c < 0x80) {
+				if ((c & 0x80) == 0) {
 					/* for ASCII characters we don't need to do a hash
 					 * lookup */
 					if (lower_to_upper) {
@@ -1467,25 +3039,26 @@ convertCase(BAT *from, BAT *to, str *res
 							c += 'a' - 'A';
 					}
 				} else {
+					/* use hash, even though BAT is sorted */
 					HASHfnd_int(UTF8_CONV_r, fromi, &c);
 					if (UTF8_CONV_r != BUN_NONE)
 						c = *(int*) BUNtloc(toi, UTF8_CONV_r);
 				}
-				if (dst + 6 > (unsigned char *) *res + len) {
+				if (dst + 4 > *res + len) {
 					/* not guaranteed to fit, so allocate more space;
 					 * also allocate enough for the rest of the
 					 * source */
-					size_t off = dst - (unsigned char *) *res;
+					size_t off = dst - *res;
 
-					dst = GDKrealloc(*res, (len += 6 + (end - src)) + 1);
+					dst = GDKrealloc(*res, (len += 4 + (end - src)) + 1);
 					if (dst == NULL) {
 						/* if realloc fails, original buffer is still
 						 * allocated, so free it */
 						GDKfree(*res);
 						goto hashfnd_failed;
 					}
-					*res = (char *) dst;
-					dst = (unsigned char *) *res + off;
+					*res = dst;
+					dst = *res + off;
 				}
 				UTF8_PUTCHAR(c, dst);
 			}
@@ -1496,6 +3069,8 @@ convertCase(BAT *from, BAT *to, str *res
 		return MAL_SUCCEED;
   hashfnd_failed:
 	throw(MAL, malfunc, SQLSTATE(HY001) MAL_MALLOC_FAIL);
+  illegal:
+	throw(MAL, malfunc, SQLSTATE(42000) "Illegal Unicode code point");
 }
 
 /*
@@ -1545,13 +3120,16 @@ STRlike(const char *s, const char *pat, 
 }
 
 str
-STRlikewrap(bit *ret, const str *s, const str *pat, const str *esc){
-	*ret = STRlike(*s,*pat,*esc);
+STRlikewrap(bit *ret, const str *s, const str *pat, const str *esc)
+{
+	*ret = STRlike(*s, *pat, *esc);
 	return MAL_SUCCEED;
 }
+
 str
-STRlikewrap2(bit *ret, const str *s, const str *pat){
-	*ret = STRlike(*s,*pat,0);
+STRlikewrap2(bit *ret, const str *s, const str *pat)
+{
+	*ret = STRlike(*s, *pat, NULL);
 	return MAL_SUCCEED;
 }
 
@@ -1567,9 +3145,6 @@ STRtostr(str *res, const str *src)
 	return MAL_SUCCEED;
 }
 
-/*
- * The concatenate operator requires a type in most cases.
- */
 str
 STRConcat(str *res, const str *val1, const str *val2)
 {
@@ -1596,7 +3171,7 @@ STRConcat(str *res, const str *val1, con
 str
 STRLength(int *res, const str *arg1)
 {
-	int l;
+	size_t l;
 	const char *s = *arg1;
 
 	if (strNil(s)) {
@@ -1604,8 +3179,10 @@ STRLength(int *res, const str *arg1)
 		return MAL_SUCCEED;
 	}
 	l =  UTF8_strlen(s);
-	assert(l <INT_MAX);
-	*res = l;
+	assert(l < INT_MAX);
+	if (l > INT_MAX)
+		l = INT_MAX;
+	*res = (int) l;
 	return MAL_SUCCEED;
 }
 
@@ -1615,7 +3192,7 @@ STRBytes(int *res, const str *arg1)
 	size_t l;
 
 	l = strlen(*arg1);
-	assert(l <INT_MAX);
+	assert(l < INT_MAX);
 	*res = (int) l;
 	return MAL_SUCCEED;
 }
@@ -1630,19 +3207,14 @@ STRTail(str *res, const str *arg1, const
 		*res = GDKstrdup(str_nil);
 	} else {
 		if (off < 0) {
-			int len = UTF8_strlen(s);
+			size_t len = UTF8_strlen(s);
 
-			if (len == int_nil) {
-				*res = GDKstrdup(str_nil);
-				if (*res == NULL)
-					throw(MAL, "str.tail", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-				return MAL_SUCCEED;
-			}
-			off = len + off;
+			assert(len <= INT_MAX);
+			off += (int) len;
 			if (off < 0)
 				off = 0;
 		}
-		*res = (char *) GDKstrdup(UTF8_strtail(s, off));
+		*res = GDKstrdup(UTF8_strtail(s, off));
 	}
 	if (*res == NULL)
 		throw(MAL, "str.tail", SQLSTATE(HY001) MAL_MALLOC_FAIL);
@@ -1652,7 +3224,8 @@ STRTail(str *res, const str *arg1, const
 str
 STRSubString(str *res, const str *arg1, const int *offset, const int *length)
 {
-	int len, off = *offset, l = *length;
+	size_t len;
+	int off = *offset, l = *length;
 	const char *s = *arg1;
 
 	if (strNil(s) || off == int_nil || l == int_nil) {
@@ -1663,13 +3236,10 @@ STRSubString(str *res, const str *arg1, 
 	}
 	if (off < 0) {
 		len = UTF8_strlen(s);
-		if (len == int_nil) {
-			*res = GDKstrdup(str_nil);
-			if (*res == NULL)
-				throw(MAL, "str.substring", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-			return MAL_SUCCEED;
-		}
-		off = len + off;
+		assert(len <= INT_MAX);
+		if (len > INT_MAX)
+			len = INT_MAX;
+		off += (int) len;
 		if (off < 0) {
 			l += off;
 			off = 0;
@@ -1683,7 +3253,7 @@ STRSubString(str *res, const str *arg1, 
 		return MAL_SUCCEED;
 	}
 	s = UTF8_strtail(s, off);
-	len = (int) (UTF8_strtail(s, l) - s);
+	len = (size_t) (UTF8_strtail(s, l) - s);
 	*res = GDKmalloc(len + 1);
 	if (*res == NULL)
 		throw(MAL, "str.substring", SQLSTATE(HY001) MAL_MALLOC_FAIL);
@@ -1695,23 +3265,35 @@ STRSubString(str *res, const str *arg1, 
 str
 STRFromWChr(str *res, const int *c)
 {
-	str s = *res = GDKmalloc(7);
+	str s;
 
+	if (*c == int_nil) {
+		*res = GDKstrdup(str_nil);
+		if (*res == NULL)
+			throw(MAL, "str.unicode", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		return MAL_SUCCEED;
+	}
+
+	s = *res = GDKmalloc(5);
 	if (*res == NULL)
 		throw(MAL, "str.unicode", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	UTF8_PUTCHAR(*c, s);
 	*s = 0;
 	return MAL_SUCCEED;
+  illegal:
+	GDKfree(*res);
+	*res = NULL;
+	throw(MAL, "str.unicode", SQLSTATE(42000) "Illegal Unicode code point");
 }
 
+/* return the Unicode code point of arg1 at position at */
 str
 STRWChrAt(int *res, const str *arg1, const int *at)
 {
 /* 64bit: should have lng arg */
 	const char *s = *arg1;
-	const unsigned char *u;
 
-	if (strNil(*arg1) || *at == int_nil || *at < 0) {
+	if (strNil(s) || *at == int_nil || *at < 0) {
 		*res = int_nil;
 		return MAL_SUCCEED;
 	}
@@ -1720,15 +3302,14 @@ STRWChrAt(int *res, const str *arg1, con
 		*res = int_nil;
 		return MAL_SUCCEED;
 	}
-	u = (const unsigned char *) s;
-	UTF8_GETCHAR(*res, u);
+	UTF8_GETCHAR(*res, s);
 	return MAL_SUCCEED;
 }
 
+/* returns whether arg1 starts with arg2 */
 str
 STRPrefix(bit *res, const str *arg1, const str *arg2)
 {
-	size_t pl, i;
 	const char *s = *arg1;
 	const char *prefix = *arg2;
 
@@ -1736,25 +3317,15 @@ STRPrefix(bit *res, const str *arg1, con
 		*res = bit_nil;
 		return MAL_SUCCEED;
 	}
-	pl = strlen(prefix);
-	if (strlen(s) < pl) {
-		*res = 0;
-		return MAL_SUCCEED;
-	}
-	*res = 1;
-	for (i = 0; i < pl; i++) {
-		if (s[i] != prefix[i]) {
-			*res = 0;
-			break;
-		}
-	}
+	*res = strncmp(s, prefix, strlen(prefix)) == 0;
 	return MAL_SUCCEED;
 }
 
+/* returns whether arg1 ends with arg2 */
 str
 STRSuffix(bit *res, const str *arg1, const str *arg2)
 {
-	size_t i, sl, sul;
+	size_t sl, sul;
 	const char *s = *arg1;
 	const char *suffix = *arg2;
 
@@ -1765,37 +3336,30 @@ STRSuffix(bit *res, const str *arg1, con
 	sl = strlen(s);
 	sul = strlen(suffix);
 
-	if (sl < sul) {
+	if (sl < sul)
 		*res = 0;
-		return MAL_SUCCEED;
-	}
-	*res = 1;
-	for (i = 0; i < sul; i++) {
-		if (s[sl - 1 - i] != suffix[sul - 1 - i]) {
-			*res = 0;
-			break;
-		}
-	}
+	else
+		*res = strcmp(s + sl - sul, suffix) == 0;
 	return MAL_SUCCEED;
 }
 
 str
 STRLower(str *res, const str *arg1)
 {
-	return convertCase(UTF8_upperBat, UTF8_lowerBat, res, *arg1, "str.lower");
+	return convertCase(UTF8_toLowerFrom, UTF8_toLowerTo, res, *arg1, "str.lower");
 }
 
 str
 STRUpper(str *res, const str *arg1)
 {
-	return convertCase(UTF8_lowerBat, UTF8_upperBat, res, *arg1, "str.upper");
+	return convertCase(UTF8_toUpperFrom, UTF8_toUpperTo, res, *arg1, "str.upper");
 }
 
+/* find first occurrence of needle in haystack */
 str
 STRstrSearch(int *res, const str *haystack, const str *needle)
 {
 /* 64bit: should return lng */
-	char *p;
 	const char *s = *haystack;
 	const char *s2 = *needle;
 
@@ -1803,20 +3367,19 @@ STRstrSearch(int *res, const str *haysta
 		*res = int_nil;
 		return MAL_SUCCEED;
 	}
-	if ((p = strstr(s, s2)) != 0)
-		*res = UTF8_strpos(s, p);
+	if ((s2 = strstr(s, s2)) != NULL)
+		*res = UTF8_strpos(s, s2);
 	else
 		*res = -1;
 	return MAL_SUCCEED;
 }
 
+/* find last occurrence of arg2 in arg1 */
 str
 STRReverseStrSearch(int *res, const str *arg1, const str *arg2)
 {
 /* 64bit: should return lng */
 	size_t len, slen;
-	const char *p, *q;
-	size_t i;
 	const char *s = *arg1;
 	const char *s2 = *arg2;
 
@@ -1827,13 +3390,15 @@ STRReverseStrSearch(int *res, const str 
 	*res = -1;
 	len = strlen(s);
 	slen = strlen(s2);
-	for (p = s + len - slen; p >= s; p--) {
-		for (i = 0, q = p; i < slen && *q == s2[i]; i++, q++)
-			;
-		if (i == slen) {
-			*res = UTF8_strpos(s, p);
-			break;
-		}
+	*res = -1;					/* changed if found */
+	if (len >= slen) {
+		const char *p = s + len - slen;
+		do {
+			if (strncmp(p, s2, slen) == 0) {
+				*res = UTF8_strpos(s, p);
+				break;
+			}
+		} while (p-- > s);
 	}
 	return MAL_SUCCEED;
 }
@@ -1848,19 +3413,19 @@ STRsplitpart(str *res, str *haystack, st
 	const char *s2 = *needle;
 
 	if (strNil(s) || *field == int_nil) {
-		*res = GDKstrdup("");
+		*res = GDKstrdup(str_nil);
 		if (*res == NULL)
 			throw(MAL, "str.splitpart", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 		return MAL_SUCCEED;
 	}
 
 	if (*field <= 0) {
-		throw(MAL, "str.splitpart", "field position must be greater than zero");
+		throw(MAL, "str.splitpart", SQLSTATE(42000) "field position must be greater than zero");
 	}
 
 	len = strlen(s2);
 
-	while ((p = strstr(s, s2)) != 0 && f > 1) {
+	while ((p = strstr(s, s2)) != NULL && f > 1) {
 		s = p + len;
 		f--;
 	}
@@ -1872,257 +3437,312 @@ STRsplitpart(str *res, str *haystack, st
 		return MAL_SUCCEED;
 	}
 
-	if (p == 0) {
+	if (p == NULL) {
 		len = strlen(s);
-	} else if ((p = strstr(s, s2)) != 0) {
+	} else {
 		len = (size_t) (p - s);
-	} else {
-		len = strlen(s);
 	}
 
-	if (len == 0) {
-		*res = GDKstrdup("");
-		if (*res == NULL)
-			throw(MAL, "str.splitpart", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-		return MAL_SUCCEED;
-	}
-	*res = GDKmalloc(len + 1);
+	*res = GDKstrndup(s, len);
 	if (*res == NULL)
 		throw(MAL, "str.splitpart", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-	strncpy(*res, s, len);
-	(*res)[len] = 0;
-	return MAL_SUCCEED;
-}
-
-str
-STRStrip(str *res, const str *arg1)
-{
-	const char *start = *arg1;
-	const char *s;
-	size_t len;
-
-	while (GDKisspace(*start))
-		start++;
-
-	/* Remove the trailing spaces.  Make sure not to pass the start */
-	/* pointer in case a string only contains spaces.		*/
-	s = start + strlen(start);
-	while (s > start && GDKisspace(*(s - 1)))
-		s--;
-
-	len = s - start + 1;
-	*res = GDKmalloc(len);
-	if (*res == NULL)
-		throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-	memcpy(*res, start, len - 1);
-	(*res)[len - 1] = '\0';
 	return MAL_SUCCEED;
 }
 
-/* Remove the longest string containing only characters from 'arg2' from the start of 'arg1'
- * 
- * Example: trim('zzzytrimzyxyyz', 'xyz')
- * Result: trim
- */
-str
-STRStrip2(str *res, const str *arg1, const str *arg2)
+/* returns number of bytes to remove from left to strip the codepoints in rm */
+static size_t
+lstrip(const char *s, size_t len, const int *rm, size_t nrm)
 {
-	const char *s = *arg1, *s2 = *arg2;
-	const unsigned char *u = NULL;
-	int *toRm = NULL; /* candidate list of to be removed characters, converted to INT */
-	int i = 0, rm_cnt = UTF8_strlen(s2);
-	size_t len = strlen(*arg1);
+	int c;
+	size_t i, n, skip = 0;
+
+	while (len > 0) {
+		UTF8_NEXTCHAR(c, n, s);
+		assert(n > 0 && n <= len);
+		for (i = 0; i < nrm; i++) {
+			if (rm[i] == c) {
+				s += n;
+				skip += n;
+				len -= n;
+				break;
+			}
+		}
+		if (i == nrm)
+			break;
+	}
+	return skip;
+}
+
+/* returns the resulting length of s after stripping codepoints in rm
+ * from the right */
+static size_t
+rstrip(const char *s, size_t len, const int *rm, size_t nrm)
+{
+	int c;
+	size_t i, n;
 
-	toRm = GDKmalloc(sizeof(int) * rm_cnt);
-	if (toRm == NULL)
-		throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-	u = (const unsigned char *) s2;
-	for (i = 0; i < rm_cnt; i++)
-		UTF8_GETCHAR(toRm[i], u);
-	/* Just a sanity check that all bytes of s2 are consumed */
-	if (u[0] != '\0') {
-		GDKfree(toRm);
-		throw(MAL, "str.trim", "Invalid UTF-8 string %s", *arg2);
+	while (len > 0) {
+		UTF8_LASTCHAR(c, n, s, len);
+		assert(n > 0 && n <= len);
+		for (i = 0; i < nrm; i++) {
+			if (rm[i] == c) {
+				len -= n;
+				break;
+			}
+		}
+		if (i == nrm)
+			break;
 	}
+	return len;
+}
 
-	if (strNil(s)) {
+const int whitespace[] = {
+	' ',						/* space */
+	'\t',						/* tab (character tabulation) */
+	'\n',						/* line feed */
+	'\r',						/* carriage return */
+	'\f',						/* form feed */
+	'\v',						/* vertical tab (line tabulation) */
+/* below the code points that have the Unicode Zs (space separator) property */
+	0x00A0,						/* no-break space */
+	0x1680,						/* ogham space mark */
+	0x2000,						/* en quad */
+	0x2001,						/* em quad */
+	0x2002,						/* en space */
+	0x2003,						/* em space */
+	0x2004,						/* three-per-em space */
+	0x2005,						/* four-per-em space */
+	0x2006,						/* six-per-em space */
+	0x2007,						/* figure space */
+	0x2008,						/* punctuation space */
+	0x2009,						/* thin space */
+	0x200A,						/* hair space */
+	0x202F,						/* narrow no-break space */
+	0x205F,						/* medium mathematical space */
+	0x3000,						/* ideographic space */
+};
+#define NSPACES		(sizeof(whitespace) / sizeof(whitespace[0]))
+
+/* remove all whitespace from either side of arg1 */
+str
+STRStrip(str *res, const str *arg1)
+{
+	const char *s = *arg1;
+	size_t len;
+	size_t n;
+
+	if (GDK_STRNIL(s)) {
 		*res = GDKstrdup(str_nil);
 	} else {
-		int c = 0, sz = 0;
-		const unsigned char *v = NULL;
-
-		/* trim left */
-		u = (const unsigned char *) s;
-		do {
-			UTF8_NEXTCHAR(c, sz, u);
-
-			for (i = 0; i < rm_cnt; i++) {
-				if (toRm[i] == c) {
-					u += sz;
-					break;
-				}
-			}
-		} while (i < rm_cnt);
-		/* trim right */
-		v = (const unsigned char *) s;
-		do {
-			UTF8_LASTCHAR(c, sz, v, len);
-
-			for (i = 0; i < rm_cnt; i++) {
-				if (toRm[i] == c) {
-					len -= sz;
-					break;
-				}
-			}
-		} while (i < rm_cnt);
-		*res = GDKstrndup((const char*)u, len - ((const char*)u - s));
+		len = strlen(s);
+		n = lstrip(s, len, whitespace, NSPACES);
+		s += n;
+		len -= n;
+		n = rstrip(s, len, whitespace, NSPACES);
+		*res = GDKstrndup(s, n);
 	}
-
-	GDKfree(toRm);
 	if (*res == NULL)
-		throw(MAL, "str.ltrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
+/* remove all whitespace from the start (left) of arg1 */
 str
 STRLtrim(str *res, const str *arg1)
 {
 	const char *s = *arg1;
-	if (strNil(s)) {
+	size_t len;
+	size_t n;
+
+	if (GDK_STRNIL(s)) {
 		*res = GDKstrdup(str_nil);
 	} else {
-		while (GDKisspace(*s))
-			s++;
-		*res = GDKstrdup(s);
+		len = strlen(s);
+		n = lstrip(s, len, whitespace, NSPACES);
+		*res = GDKstrndup(s + n, len - n);
 	}
 	if (*res == NULL)
 		throw(MAL, "str.ltrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
-/* Remove the longest string containing only characters from 'arg2' from the start of 'arg1'
- * 
- * Example: ltrim('zzzytrim', 'xyz')
- * Result: trim
- */
-str
-STRLtrim2(str *res, const str *arg1, const str *arg2)
-{
-	const char *s = *arg1, *s2 = *arg2;
-	const unsigned char *u = NULL;
-	int *toRm = NULL; /* candidate list of to be removed characters, converted to INT */
-	int i = 0, rm_cnt = UTF8_strlen(s2);
-
-	toRm = GDKmalloc(sizeof(int) * rm_cnt);
-	if (toRm == NULL)
-		throw(MAL, "str.ltrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-	u = (const unsigned char *) s2;
-	for (i = 0; i < rm_cnt; i++)
-		UTF8_GETCHAR(toRm[i], u);
-	/* Just a sanity check that all bytes of s2 are consumed */
-	if (u[0] != '\0') {
-		GDKfree(toRm);
-		throw(MAL, "str.ltrim", "Invalid UTF-8 string %s", *arg2);
-	}
-
-	if (strNil(s)) {
-		*res = GDKstrdup(str_nil);
-	} else {
-		int c = 0, sz = 0;
-
-		u = (const unsigned char *) s;
-		do {
-			UTF8_NEXTCHAR(c, sz, u);
-
-			for (i = 0; i < rm_cnt; i++) {
-				if (toRm[i] == c) {
-					u += sz;
-					break;
-				}
-			}
-		} while (i < rm_cnt);
-		*res = GDKstrdup((const char*)u);
-	}
-
-	GDKfree(toRm);
-	if (*res == NULL)
-		throw(MAL, "str.ltrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-	return MAL_SUCCEED;
-}
-
+/* remove all whitespace from the end (right) of arg1 */
 str
 STRRtrim(str *res, const str *arg1)
 {
 	const char *s = *arg1;
-	size_t len = strlen(*arg1);
+	size_t len;
+	size_t n;
 
-	if (strNil(s)) {
+	if (GDK_STRNIL(s)) {
 		*res = GDKstrdup(str_nil);
 	} else {
-		while (len > 0 && GDKisspace(s[len - 1]))
-			len--;
-		*res = GDKmalloc(len + 1);
-		if (*res != NULL) {
-			memcpy(*res, s, len);
-			(*res)[len] = '\0';
-		}
+		len = strlen(s);
+		n = rstrip(s, len, whitespace, NSPACES);
+		*res = GDKstrndup(s, n);
 	}
 	if (*res == NULL)
 		throw(MAL, "str.rtrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
-/* Remove the longest string containing only characters from 'arg2' from the end of 'arg1'
- * 
- * Example: rtrim('trimxxxxxxxxx', 'xyz')
- * Result: trim
- */
-str
-STRRtrim2(str *res, const str *arg1, const str *arg2)
+/* return a list of codepoints in s */
+static int *
+trimchars(const char *s, size_t *n)
 {
-	const char *s = *arg1, *s2 = *arg2;
-	const unsigned char *u = NULL;
-	int *toRm = NULL;
-	int i = 0, rm_cnt = UTF8_strlen(*arg2);
-	size_t len = strlen(*arg1);
+	size_t len = 0;
+	int *chars = GDKmalloc(strlen(s) * sizeof(int));
+	int c;
+
+	if (chars == NULL)
+		return NULL;
 
-	toRm = GDKmalloc(sizeof(int) * rm_cnt);
-	if (toRm == NULL)
-		throw(MAL, "str.rtrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-	u = (const unsigned char *) s2;
-	for (i = 0; i < rm_cnt; i++)
-		UTF8_GETCHAR(toRm[i], u);
-	/* Just a sanity check that all bytes of arg2 are consumed */
-	if (u[0] != '\0') {
-		GDKfree(toRm);
-		throw(MAL, "str.rtrim", "Invalid UTF-8 string %s", *arg2);
+	while (*s) {
+		UTF8_GETCHAR(c, s);
+		assert(c != int_nil);
+		chars[len++] = c;
 	}
+	*n = len;
+	return chars;
+}
 
-	if (strNil(s)) {
+/* remove the longest string containing only characters from arg2 from
+ * either side of arg1 */
+str
+STRStrip2(str *res, const str *arg1, const str *arg2)
+{
+	const char *s = *arg1;
+	size_t len;
+	size_t n;
+	size_t nchars;
+	int *chars;
+
+	if (GDK_STRNIL(s)) {
 		*res = GDKstrdup(str_nil);
 	} else {
-		int c = 0, sz = 0;
-		u = (unsigned char *) s;
-		do {
-			UTF8_LASTCHAR(c, sz, u, len);
+		chars = trimchars(*arg2, &nchars);
+		if (chars == NULL)
+			throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		len = strlen(s);
+		n = lstrip(s, len, chars, nchars);
+		s += n;
+		len -= n;
+		n = rstrip(s, len, chars, nchars);
+		GDKfree(chars);
+		*res = GDKstrndup(s, n);
+	}
+	if (*res == NULL)
+		throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+	return MAL_SUCCEED;
+}
 
-			for (i = 0; i < rm_cnt; i++) {
-				if (toRm[i] == c) {
-					len -= sz;
-					break;
-				}
-			}
-		} while (i < rm_cnt);
-		*res = GDKstrndup(s, len);
+/* remove the longest string containing only characters from arg2 from
+ * the start (left) of arg1 */
+str
+STRLtrim2(str *res, const str *arg1, const str *arg2)
+{
+	const char *s = *arg1;
+	size_t len;
+	size_t n;
+	size_t nchars;
+	int *chars;
+
+	if (GDK_STRNIL(s)) {
+		*res = GDKstrdup(str_nil);
+	} else {
+		chars = trimchars(*arg2, &nchars);
+		if (chars == NULL)
+			throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		len = strlen(s);
+		n = lstrip(s, len, chars, nchars);
+		*res = GDKstrndup(s + n, len - n);
 	}
-
-	GDKfree(toRm);
 	if (*res == NULL)
 		throw(MAL, "str.ltrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
-/* Fill up 'arg1' to lenth 'len' by prepending whitespaces.
+/* remove the longest string containing only characters from arg2 from
+ * the end (right) of arg1 */
+str
+STRRtrim2(str *res, const str *arg1, const str *arg2)
+{
+	const char *s = *arg1;
+	size_t len;
+	size_t n;
+	size_t nchars;
+	int *chars;
+
+	if (GDK_STRNIL(s)) {
+		*res = GDKstrdup(str_nil);
+	} else {
+		chars = trimchars(*arg2, &nchars);
+		if (chars == NULL)
+			throw(MAL, "str.trim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		len = strlen(s);
+		n = rstrip(s, len, chars, nchars);
+		*res = GDKstrndup(s, n);
+	}
+	if (*res == NULL)
+		throw(MAL, "str.rtrim", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+	return MAL_SUCCEED;
+}
+
+static char *
+pad(const char *s, const char *pad, int len, int left)
+{
+	size_t slen, padlen, repeats, residual, i;
+	char *res;
+
+	if (GDK_STRNIL(s) || GDK_STRNIL(pad) || len == int_nil)
+		return GDKstrdup(str_nil);
+
+	if (len < 0)
+		len = 0;
+
+	slen = UTF8_strlen(s);
+
+	if (slen > (size_t) len) {
+		/* truncate */
+		pad = UTF8_strtail(s, len);
+		return GDKstrndup(s, pad - s);
+	}
+
+	padlen = UTF8_strlen(pad);
+	if (slen == (size_t) len || padlen == 0) {
+		/* nothing to do (no padding if there is no pad string) */
+		return GDKstrdup(s);
+	}
+
+	repeats = ((size_t) len - slen) / padlen;
+	residual = ((size_t) len - slen) % padlen;
+	if (residual > 0)
+		residual = (size_t) (UTF8_strtail(pad, (int) residual) - pad);
+	padlen = strlen(pad);
+	slen = strlen(s);
+	res = GDKmalloc(slen + repeats * padlen + residual + 1);
+	if (res == NULL)
+		return NULL;
+	if (left) {
+		for (i = 0; i < repeats; i++)
+			memcpy(res + i * padlen, pad, padlen);
+		if (residual > 0)
+			memcpy(res + repeats * padlen, pad, residual);
+		if (slen > 0)
+			memcpy(res + repeats * padlen + residual, s, slen);
+	} else {
+		if (slen > 0)
+			memcpy(res, s, slen);
+		for (i = 0; i < repeats; i++)
+			memcpy(res + slen + i * padlen, pad, padlen);
+		if (residual > 0)
+			memcpy(res + slen + repeats * padlen, pad, residual);
+	}
+	res[repeats * padlen + residual + slen] = 0;
+	return res;
+}
+
+/* Fill up 'arg1' to length 'len' by prepending whitespaces.
  * If 'arg1' is already longer than 'len', then it's truncated on the right
  * (NB: this is the PostgreSQL definition).
  *
@@ -2132,36 +3752,13 @@ STRRtrim2(str *res, const str *arg1, con
 str
 STRLpad(str *res, const str *arg1, const int *len)
 {
-	const char *s = *arg1;
-	int pad_cnt = *len - UTF8_strlen(s); /* #whitespaces to be prepended */
-
-	if (pad_cnt == 0) {
-		*res = GDKstrdup(s);
-	} else if (pad_cnt < 0) { /* truncate */
-		s = UTF8_strtail(s, *len);
-		*res = GDKstrndup(*arg1, s - *arg1);
-	} else { /* pad_cnt > 0: fill */
-		int i = 0;
-		size_t s_len = strlen(s),
-			res_len = pad_cnt + s_len;
-		char *r = GDKmalloc(res_len+1);
-
-		if (r == NULL)
-			throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-		for (i = 0; i < pad_cnt; i++) {
-			r[i] = ' ';
-		}
-		memcpy(r + pad_cnt, s, s_len);
-		r[res_len] = '\0';
-		*res = r;
-	}
-
+	*res = pad(*arg1, " ", *len, 1);
 	if (*res == NULL)
 		throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
-/* Fill up 'arg1' to lenth 'len' by appending whitespaces.
+/* Fill up 'arg1' to length 'len' by appending whitespaces.
  * If 'arg1' is already longer than 'len', then it's truncated (on the right)
  * (NB: this is the PostgreSQL definition).
  *
@@ -2171,36 +3768,13 @@ STRLpad(str *res, const str *arg1, const
 str
 STRRpad(str *res, const str *arg1, const int *len)
 {
-	const char *s = *arg1;
-	int pad_cnt = *len - UTF8_strlen(s); /* #whitespaces to be appended */
-
-	if (pad_cnt == 0) {
-		*res = GDKstrdup(s);
-	} else if (pad_cnt < 0) { /* truncate */
-		s = UTF8_strtail(s, *len);
-		*res = GDKstrndup(*arg1, s - *arg1);
-	} else { /* pad_cnt > 0: fill */
-		size_t i = 0,
-			   s_len = strlen(s),
-			   res_len = pad_cnt + s_len;
-		char *r = GDKmalloc(res_len+1);
-
-		if (r == NULL)
-			throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-		memcpy(r, s, s_len);
-		for (i = s_len; i < res_len; i++) {
-			r[i] = ' ';
-		}
-		r[res_len] = '\0';
-		*res = r;
-	}
-
+	*res = pad(*arg1, " ", *len, 0);
 	if (*res == NULL)
-		throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		throw(MAL, "str.rpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
-/* Fill up 'arg1' to lenth 'len' by prepending characters from 'arg2'
+/* Fill up 'arg1' to length 'len' by prepending characters from 'arg2'
  * If 'arg1' is already longer than 'len', then it's truncated on the right
  * (NB: this is the PostgreSQL definition).
  *
@@ -2210,62 +3784,16 @@ STRRpad(str *res, const str *arg1, const
 str
 STRLpad2(str *res, const str *arg1, const int *len, const str *arg2)
 {
-	const char *s = *arg1;
-	int pad_cnt = *len - UTF8_strlen(s); /* #chars to be prepended */
-
-	if (pad_cnt == 0) {
-		*res = GDKstrdup(s);
-	} else if (pad_cnt < 0) { /* truncate */
-		s = UTF8_strtail(s, *len);
-		*res = GDKstrndup(*arg1, s - *arg1);
-	} else { /* pad_cnt > 0: fill */
-		const char *s2 = *arg2, *s2_tmp = *arg2;
-		char *r = NULL;
-		const unsigned char *u = NULL;
-		int i, c, sz, s2_cnt, nr_repeat, nr_residual;
-		size_t s_len, s2_len, repeat_len, residual_len, res_len;
+	if (**arg2 == 0)
+		throw(MAL, "str.lpad", SQLSTATE(42000) ILLEGAL_ARGUMENT ": pad string is empty");
 
-		i = 0;
-		c = 0;
-		sz = 0;
-		s2_cnt = UTF8_strlen(s2);
-		if (s2_cnt == 0)
-			throw(MAL, "str.lpad", ILLEGAL_ARGUMENT ": pad string is empty");
-		nr_repeat = pad_cnt / s2_cnt;
-		nr_residual = pad_cnt % s2_cnt;
-		s_len = strlen(s);
-		s2_len = strlen(s2);
-		repeat_len = s2_len * nr_repeat;
-		residual_len = 0;
-		res_len = s_len + repeat_len;
-		u = (const unsigned char *) s2_tmp;
-		for (i = 0; i < nr_residual; i++) {
-			UTF8_GETCHAR_SZ(c, sz, u);
-			residual_len += sz;
-		}
-		res_len += residual_len;
-		r = GDKmalloc(res_len+1);
-		if (r == NULL)
-			throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-		for (i = 0; i < pad_cnt; i++) {
-			r[i] = ' ';
-		}
-
-		for (i = 0; i < nr_repeat; i++) {
-			memcpy(r + s2_len*i, s2, s2_len);
-		}
-		memcpy(r + repeat_len, s2, residual_len);
-		memcpy(r + repeat_len + residual_len, s, s_len);
-		r[res_len] = '\0';
-		*res = r;
-	}
-
+	*res = pad(*arg1, *arg2, *len, 1);
 	if (*res == NULL)
 		throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
-/* Fill up 'arg1' to lenth 'len' by appending characters from 'arg2'
+/* Fill up 'arg1' to length 'len' by appending characters from 'arg2'
  * If 'arg1' is already longer than 'len', then it's truncated (on the right)
  * (NB: this is the PostgreSQL definition).
  *
@@ -2275,59 +3803,12 @@ STRLpad2(str *res, const str *arg1, cons
 str
 STRRpad2(str *res, const str *arg1, const int *len, const str *arg2)
 {
-	const char *s = *arg1;
-	int pad_cnt = *len - UTF8_strlen(s); /* #chars to be appended */
-
-	if (pad_cnt == 0) {
-		*res = GDKstrdup(s);
-	} else if (pad_cnt < 0) { /* truncate */
-		s = UTF8_strtail(s, *len);
-		*res = GDKstrndup(*arg1, s - *arg1);
-	} else { /* pad_cnt > 0: fill */
-		const char *s2 = *arg2, *s2_tmp = *arg2;
-		char *r = NULL;
-		const unsigned char *u = NULL;
-		int i, c, sz, s2_cnt, nr_repeat, nr_residual;
-		size_t s_len, s2_len, repeat_len, residual_len, res_len;
+	if (**arg2 == 0)
+		throw(MAL, "str.rpad", SQLSTATE(42000) ILLEGAL_ARGUMENT ": pad string is empty");
 
-		i = 0;
-		c = 0;
-		sz = 0;
-		s2_cnt = UTF8_strlen(s2);
-		if (s2_cnt == 0)
-			throw(MAL, "str.rpad", ILLEGAL_ARGUMENT ": pad string is empty");
-		nr_repeat = pad_cnt / s2_cnt;
-		nr_residual = pad_cnt % s2_cnt;
-		s_len = strlen(s);
-		s2_len = strlen(s2);
-		repeat_len = s2_len * nr_repeat;
-		residual_len = 0;
-		res_len = s_len + repeat_len;
-
-		u = (const unsigned char *)s2_tmp;
-		for (i = 0; i < nr_residual; i++) {
-			UTF8_GETCHAR_SZ(c, sz, u);
-			residual_len += sz;
-		}
-		res_len += residual_len;
-		r = GDKmalloc(res_len+1);
-		if (r == NULL)
-			throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
-		for (i = 0; i < pad_cnt; i++) {
-			r[i] = ' ';
-		}
-
-		memcpy(r, s, s_len);
-		for (i = 0; i < nr_repeat; i++) {
-			memcpy(r + s_len + s2_len*i, s2, s2_len);
-		}
-		memcpy(r + s_len + repeat_len, s2, residual_len);
-		r[res_len] = '\0';
-		*res = r;
-	}
-
+	*res = pad(*arg1, *arg2, *len, 0);
 	if (*res == NULL)
-		throw(MAL, "str.lpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
+		throw(MAL, "str.rpad", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 	return MAL_SUCCEED;
 }
 
@@ -2453,7 +3934,7 @@ STRinsert(str *ret, const str *s, const 
 			throw(MAL, "str.insert", SQLSTATE(HY001) MAL_MALLOC_FAIL);
 		}
 		if (*l < 0)
-			throw(MAL, "str.insert", ILLEGAL_ARGUMENT);
+			throw(MAL, "str.insert", SQLSTATE(42000) ILLEGAL_ARGUMENT);
 		if (strt < 0) {
 			if ((size_t) -strt <= l1)
 				strt = (int) (l1 + strt);