diff --git a/3rd/str/.editorconfig b/3rd/str/.editorconfig
new file mode 100644
index 0000000..76bd3a3
--- /dev/null
+++ b/3rd/str/.editorconfig
@@ -0,0 +1,11 @@
+root = true
+
+[*]
+indent_style = tab
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+end_of_line = lf
+
+[Makefile]
+indent_size = 8
diff --git a/3rd/str/.gitignore b/3rd/str/.gitignore
new file mode 100644
index 0000000..c740f5e
--- /dev/null
+++ b/3rd/str/.gitignore
@@ -0,0 +1,4 @@
+test
+flto-test
+*.bak
+tools/gen-char-class
diff --git a/3rd/str/LICENSE b/3rd/str/LICENSE
new file mode 100644
index 0000000..60be582
--- /dev/null
+++ b/3rd/str/LICENSE
@@ -0,0 +1,30 @@
+BSD 3-Clause License
+
+Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/3rd/str/Makefile b/3rd/str/Makefile
new file mode 100644
index 0000000..acc3214
--- /dev/null
+++ b/3rd/str/Makefile
@@ -0,0 +1,51 @@
+# flags
+CC_WARN := -Wall -Wextra -Werror=implicit-function-declaration -Wformat -Werror=format-security
+
+ifeq ($(CC),musl-gcc)
+# musl is ISO 10646 compliant but doesn't define __STDC_ISO_10646__
+CC_EXTRA := -D__STDC_ISO_10646__=201706L
+else
+# sanitisers only work for non-musl builds
+CC_SAN := -fsanitize=address -fsanitize=leak -fsanitize=undefined -fsanitize-address-use-after-scope
+endif
+
+test: CFLAGS := -ggdb -std=c11 -pipe $(CC_WARN) $(CC_EXTRA) -fno-omit-frame-pointer $(CC_SAN)
+flto-test: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA) -flto -march=native -mtune=native
+tools: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA)
+
+# str library source files
+SRC := str.c str.h str_test.c
+
+# all
+.PHONY: all
+all: tools test flto-test
+
+.PHONY: clean
+clean: clean-test clean-tools
+
+# test
+test: $(SRC)
+ $(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
+ ./$@
+
+flto-test: $(SRC)
+ $(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
+ ./$@
+
+.PHONY: clean-test
+clean-test:
+ rm -f test flto-test
+
+# tools
+GEN_CHAR_CLASS := tools/gen-char-class
+
+.PHONY: tools
+tools: $(GEN_CHAR_CLASS)
+
+# gen-char-class
+$(GEN_CHAR_CLASS): tools/gen_char_class.c
+ $(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
+
+.PHONY: clean-tools
+clean-tools:
+ rm -f $(GEN_CHAR_CLASS)
diff --git a/3rd/str/README.md b/3rd/str/README.md
new file mode 100644
index 0000000..2ea794f
--- /dev/null
+++ b/3rd/str/README.md
@@ -0,0 +1,440 @@
+# str: yet another string library for C language.
+
+[](https://opensource.org/licenses/BSD-3-Clause)
+
+## Motivation
+
+Bored with developing the same functionality over and over again, unsatisfied
+with existing libraries, so decided to make the right one, once and forever. ๐
+
+## Features
+
+* Handles both C and binary strings;
+* Light-weight references to strings: cheap to create, copy, or pass by value;
+* Support for copy and move semantics, although not enforceable by the C language;
+* String composition functions writing to memory, file descriptors, or file streams;
+* Can be compiled using `gcc` or `clang`, and linked with `libc` or `musl`.
+
+## Installation
+Just clone the project and copy (or symlink) the files `str.h` and `str.c` into your project,
+but please respect the [license](LICENSE).
+
+## Code Examples
+
+String composition:
+
+```C
+str s = str_null;
+
+str_join(&s, str_lit(", "),
+ str_lit("Here"),
+ str_lit("there"),
+ str_lit("and everywhere"));
+
+str_cat(&s, s, str_lit("..."));
+
+assert(str_eq(s, str_lit("Here, there, and everywhere...")));
+str_free(s);
+```
+
+Same as above, but writing to a file:
+
+```C
+FILE* const stream = fopen(...);
+
+int err = str_join(stream, str_lit(", "),
+ str_lit("Here"),
+ str_lit("there"),
+ str_lit("and everywhere..."));
+
+if(err != 0) { /* handle the error */ }
+```
+
+[Discussion](https://news.ycombinator.com/item?id=25212864) on Hacker News.
+
+## User Guide
+
+_**Disclaimer:** This is the good old C language, not C++ or Rust, so nothing can be enforced
+on the language level, and certain discipline is required to make sure there is no corrupt
+or leaked memory resulting from using this library._
+
+A string is represented by the type `str` that maintains a pointer to some memory containing the
+actual string, and the length of the string. Objects of type `str` are small enough (a struct
+of a `const char*` and a `size_t`) to be cheap to create, copy (pass by value), and move. The
+`str` structure should be treated as opaque (i.e., do not attempt to directly access or modify
+the fields in this structure). The strings are assumed to be immutable, like those in Java or
+Go, but only by means of `const char*` pointers, so it is actually possible to modify such a
+string, although the required type cast to `char*` offers at least some (mostly psychological)
+protection from changing the string by mistake.
+
+This library focusses only on handling strings, not gradually composing them like
+[StringBuffer](https://docs.oracle.com/javase/7/docs/api/java/lang/StringBuffer.html)
+class in Java.
+
+All string objects must be initialised before use. Uninitialised objects will cause
+undefined behaviour. Use the provided constructors, or `str_null` for empty strings.
+
+There are two kinds of `str` objects: those actually owning the memory they point to, and
+non-owning references. This property can be queried using `str_is_owner` and `str_is_ref`
+functions, otherwise such objects are indistinguishable.
+
+Non-owning string objects are safe to copy and assign to each other, as long as the memory
+they refer to is valid. They do not need to be freed. `str_free` is a no-op for reference
+objects. A reference object can be cheaply created from a C string, a string literal,
+or from a range of bytes.
+
+Owning objects require special treatment, in particular:
+* It is a good idea to have only one owning object per each allocated string, but such
+a string can have many references to its underlying string, as long as those references do not
+outlive the owning object.
+Sometimes this rule may be relaxed for code clarity, like in the above example where
+the owning object is passed directly to a function, but only if the function does not
+store or release the object. When in doubt pass such an object via `str_ref`.
+* Direct assignments (like `s2 = s1;`) to owning objects will certainly leak memory, use
+`str_assign` function instead. In fact, this function can assign to any string object,
+owning or not, so it can be used everywhere, just to avoid any doubt.
+* There is no automatic memory management in C, so every owning object must be released at
+some point using either `str_free` or `str_clear` function. String objects on the stack
+can also be declared as `str_auto` (or `const str_auto`) for automatic cleanup when the variable
+goes out of scope.
+* An owning object can be moved to another location by using `str_move` function. The
+function resets its source object to an empty string.
+* Object ownership can be passed over to another object by using `str_pass` function. The
+function sets its source to a non-owning reference to the original string.
+
+It is technically possible to create a reference to a string that is not
+null-terminated. The library accepts strings without null-terminators, but every new string
+allocated by the library is guaranteed to be null-terminated.
+
+### String Construction
+
+A string object can be constructed form any C string, string literal, or a range of bytes.
+The provided constructors are computationally cheap to apply. Depending on the constructor,
+the new object can either own the actual string it refers to, or be a non-owning reference.
+Constructors themselves do not allocate any memory. Importantly, constructors are the only
+functions in this library that return a string object, while others only assign their results
+through a pointer to a pre-existing string. This makes constructors suitable for initialisation
+of new string objects. In all other situations one should combine construction with assignment,
+for example:
+`str_assign(&dest, str_acquire_chars(buff, n));`
+
+### String Object Properties
+
+Querying a property of a string object (like the length of the string via `str_len`) is a
+cheap operation.
+
+### Assigning, Moving, and Passing String Objects
+
+C language does not allow for operator overloading, so this library provides a function
+`str_assign` that takes a string object and assigns it to the destination object, freeing
+any memory owned by the destination. It is generally recommended to use this function
+everywhere outside object initialisation.
+
+An existing object can be moved over to another location via `str_move` function.
+The function resets the source object to `str_null` to guarantee the correct move semantics.
+The value returned by `str_move` may be either used to initialise a new object, or
+assigned to an existing object using `str_assign`.
+
+An existing object can also be passed over to another location via `str_pass` function. The function
+sets the source object to be a non-owning reference to the original string, otherwise the semantics
+and usage is the same as `str_move`.
+
+### String Composition and Generic Destination
+
+String composition [functions](#string-composition) can write their results to different
+destinations, depending on the _type_ of their `dest` parameter:
+
+* `str*`: result is assigned to the string object;
+* `int`: result is written to the file descriptor;
+* `FILE*` result is written to the file stream.
+
+The composition functions return 0 on success, or the value of `errno` as retrieved at the point
+of failure (including `ENOMEM` on memory allocation error).
+
+### Detailed Example
+
+Just to make things more clear, here is the same code as in the example above, but with comments:
+```C
+// declare a variable and initialise it with an empty string; could also be declared as "str_auto"
+// to avoid explicit call to str_free() below.
+str s = str_null;
+
+// join the given string literals around the separator (second parameter),
+// storing the result in object "s" (first parameter); in this example we do not check
+// the return values of the composition functions, thus ignoring memory allocation failures,
+// which is probably not the best idea in general.
+str_join(&s, str_lit(", "),
+ str_lit("Here"),
+ str_lit("there"),
+ str_lit("and everywhere"));
+
+// create a new string concatenating "s" and a literal; the function only modifies its
+// destination object "s" after the result is computed, also freeing the destination
+// before the assignment, so it is safe to use "s" as both a parameter and a destination.
+// note: we pass a copy of the owning object "s" as the second parameter, and here it is
+// safe to do so because this particular function does not modify its arguments.
+str_cat(&s, s, str_lit("..."));
+
+// check that we have got the expected result
+assert(str_eq(s, str_lit("Here, there, and everywhere...")));
+
+// finally, free the memory allocated for the string
+str_free(s);
+```
+
+There are some useful [code snippets](snippets.md) provided to assist with writing code using
+this library.
+
+## API brief
+
+`typedef struct { ... } str;`
+The string object.
+
+#### String Properties
+
+`size_t str_len(const str s)`
+Returns the number of bytes in the string referenced by the object.
+
+`const char* str_ptr(const str s)`
+Returns a pointer to the first byte of the string referenced by the object. The pointer is never NULL.
+
+`const char* str_end(const str s)`
+Returns a pointer to the next byte past the end of the string referenced by the object.
+The pointer is never NULL, but it is not guaranteed to point to any valid byte or location.
+For C strings it points to the terminating null character. For any given string `s` the following
+condition is always satisfied: `str_end(s) == str_ptr(s) + str_len(s)`.
+
+`bool str_is_empty(const str s)`
+Returns "true" for empty strings.
+
+`bool str_is_owner(const str s)`
+Returns "true" if the string object is the owner of the memory it references.
+
+`bool str_is_ref(const str s)`
+Returns "true" if the string object does not own the memory it references.
+
+#### String Construction
+
+`str_null`
+Empty string constant.
+
+`str str_lit(s)`
+Constructs a non-owning object from a string literal. Implemented as a macro.
+
+`str str_ref(s)`
+Constructs a non-owning object from either a null-terminated C string, or another `str` object.
+Implemented as a macro.
+
+`str str_ref_chars(const char* const s, const size_t n)`
+Constructs a non-owning object referencing the given range of bytes.
+
+`str str_acquire_chars(const char* const s, const size_t n)`
+Constructs an owning object for the specified range of bytes. The pointer `s` should be safe
+to pass to `free(3)` function.
+
+`str str_acquire(const char* const s)`
+Constructs an owning object from the given C string. The string should be safe to pass to
+`free(3)` function.
+
+`str str_move(str* const ps)`
+Saves the given object to a temporary, resets the source object to `str_null`, and then
+returns the saved object.
+
+`str str_pass(str* const ps)`
+Saves the given object to a temporary, sets the source object to be a non-owning reference to the
+original string, and then returns the saved object.
+
+#### String Deallocation
+
+`void str_free(const str s)`
+Deallocates any memory held by the owning string object. No-op for references. After a call to
+this function the string object is in unknown and unusable state.
+
+String objects on the stack can also be declared as `str_auto` instead of `str` to deallocate
+any memory held by the string when the variable goes out of scope.
+
+#### String Modification
+
+`void str_assign(str* const ps, const str s)`
+Assigns the object `s` to the object pointed to by `ps`. Any memory owned by the target
+object is freed before the assignment.
+
+`void str_clear(str* const ps)`
+Sets the target object to `str_null` after freeing any memory owned by the target.
+
+`void str_swap(str* const s1, str* const s2)`
+Swaps two string objects.
+
+`int str_from_file(str* const dest, const char* const file_name)`
+Reads the entire file (of up to 64MB by default, configurable via `STR_MAX_FILE_SIZE`) into
+the destination string. Returns 0 on success, or the value of `errno` on error.
+
+#### String Comparison
+
+`int str_cmp(const str s1, const str s2)`
+Lexicographically compares the two string objects, with usual semantics.
+
+`bool str_eq(const str s1, const str s2)`
+Returns "true" if the two strings match exactly.
+
+`int str_cmp_ci(const str s1, const str s2)`
+Case-insensitive comparison of two strings, implemented using `strncasecmp(3)`.
+
+`bool str_eq_ci(const str s1, const str s2`
+Returns "true" is the two strings match case-insensitively.
+
+`bool str_has_prefix(const str s, const str prefix)`
+Tests if the given string `s` starts with the specified prefix.
+
+`bool str_has_suffix(const str s, const str suffix)`
+Tests if the given string `s` ends with the specified suffix.
+
+#### String Composition
+
+`int str_cpy(dest, const str src)`
+Copies the source string referenced by `src` to the
+[generic](#string-composition-and-generic-destination) destination `dest`. Returns 0 on success,
+or the value of `errno` on failure.
+
+`int str_cat_range(dest, const str* src, size_t count)`
+Concatenates `count` strings from the array starting at address `src`, and writes
+the result to the [generic](#string-composition-and-generic-destination) destination `dest`.
+Returns 0 on success, or the value of `errno` on failure.
+
+`int str_cat(dest, ...)`
+Concatenates a variable list of `str` arguments, and writes the result to the
+[generic](#string-composition-and-generic-destination) destination `dest`.
+Returns 0 on success, or the value of `errno` on failure.
+
+`int str_join_range(dest, const str sep, const str* src, size_t count)`
+Joins around `sep` the `count` strings from the array starting at address `src`, and writes
+the result to the [generic](#string-composition-and-generic-destination) destination `dest`.
+Returns 0 on success, or the value of `errno` on failure.
+
+`int str_join(dest, const str sep, ...)`
+Joins a variable list of `str` arguments around `sep` delimiter, and writes the result to the
+[generic](#string-composition-and-generic-destination) destination `dest`.
+Returns 0 on success, or the value of `errno` on failure.
+
+#### Searching and Sorting
+
+`bool str_partition(const str src, const str patt, str* const prefix, str* const suffix)`
+Splits the string `src` on the first match of `patt`, assigning a reference to the part
+of the string before the match to the `prefix` object, and the part after the match to the
+`suffix` object. Returns `true` if a match has been found, or `false` otherwise, also
+setting `prefix` to reference the entire `src` string, and clearing the `suffix` object.
+Empty pattern `patt` never matches.
+
+`void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count)`
+Sorts the given array of `str` objects using the given comparison function. A number
+of typically used comparison functions is also provided:
+* `str_order_asc` (ascending sort)
+* `str_order_desc` (descending sort)
+* `str_order_asc_ci` (ascending case-insensitive sort)
+* `str_order_desc_ci` (descending case-insensitive sort)
+
+`const str* str_search_range(const str key, const str* const array, const size_t count)`
+Binary search for the given key. The input array must be sorted using `str_order_asc`.
+Returns a pointer to the string matching the key, or NULL.
+
+`size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count)`
+Reorders the string objects in the given range in such a way that all elements for which
+the predicate `pred` returns "true" precede the elements for which predicate `pred`
+returns "false". Returns the number of preceding objects.
+
+`size_t str_unique_range(str* const array, const size_t count)`
+Reorders the string objects in the given range in such a way that there are two partitions:
+one where each object is unique within the input range, and another partition with all the
+remaining objects. The unique partition is stored at the beginning of the array, and is
+sorted in ascending order, followed by the partition with all remaining objects.
+Returns the number of unique objects.
+
+#### UNICODE support
+
+`for_each_codepoint(var_name, src_string)`
+A macro that expands to a loop iterating over the given string `src_string` (of type `str`) by UTF-32
+code points. On each iteration the variable `var_name` (of type `char32_t`) is assigned
+the value of the next valid UTF-32 code point from the source string. Upon exit from the loop the
+variable has one on the following values:
+* `CPI_END_OF_STRING`: the iteration has reached the end of source string;
+* `CPI_ERR_INCOMPLETE_SEQ`: an incomplete byte sequence has been detected;
+* `CPI_ERR_INVALID_ENCODING`: an invalid byte sequence has been detected.
+
+The source string is expected to be encoded in the _current program locale_, as set by the most
+recent call to `setlocale(3)`.
+
+Usage pattern:
+```c
+#include
+...
+str s = ...
+...
+char32_t c; // variable to receive UTF-32 values on each iteration
+
+for_each_codepoint(c, s)
+{
+ /* process c */
+}
+
+if(c != CPI_END_OF_STRING)
+{
+ /* handle error */
+}
+```
+
+#### Tokeniser
+
+Tokeniser interface provides functionality similar to `strtok(3)` function. The tokeniser
+is fully re-entrant with no hidden state, and its input string is not modified while being
+parsed.
+
+##### Typical usage:
+```C
+// declare and initialise tokeniser state
+str_tok_state state;
+
+str_tok_init(&state, source_string, delimiter_set);
+
+// object to receive tokens
+str token = str_null;
+
+// token iterator
+while(str_tok(&token, &state))
+{
+ /* process "token" */
+}
+```
+
+##### Tokeniser API
+
+`void str_tok_init(str_tok_state* const state, const str src, const str delim_set)`
+Initialises tokeniser state with the given source string and delimiter set. The delimiter set
+is treated as bytes, _not_ as UNICODE code points encoded in UTF-8.
+
+`bool str_tok(str* const dest, str_tok_state* const state)`
+Retrieves the next token and stores it in the `dest` object. Returns `true` if the token has
+been read, or `false` if the end of input has been reached. Retrieved token is always
+a reference to a slice of the source string.
+
+`void str_tok_delim(str_tok_state* const state, const str delim_set)`
+Changes the delimiter set associated with the given tokeniser state. The delimiter set is
+treated as bytes, _not_ as UNICODE code points encoded in UTF-8.
+
+## Tools
+
+All the tools are located in `tools/` directory. Currently, there are the following tools:
+
+* `file-to-str`: The script takes a file (text or binary) and a C variable name, and
+writes to `stdout` C source code where the variable (of type `str`) is defined
+and initialised with the content of the file.
+
+* `gen-char-class`: Generates character classification functions that do the same as their
+`isw*()` counterparts under the current locale as specified by `LC_ALL` environment variable.
+Run `tools/gen-char-class --help` for further details, or `tools/gen-char-class --space`
+to see an example of its output.
+
+## Project Status
+The library requires at least a C11 compiler. So far has been tested on Linux Mint versions
+from 19.3 to 22.0, with `gcc` versions from 9.5.0 to 13.2.0 (with either `libc` or `musl`),
+and `clang` versions up to 18.1.3; it is also reported to work on ALT Linux 9.1 for Elbrus, with
+`lcc` version 1.25.09.
diff --git a/3rd/str/snippets.md b/3rd/str/snippets.md
new file mode 100644
index 0000000..8528890
--- /dev/null
+++ b/3rd/str/snippets.md
@@ -0,0 +1,63 @@
+### Code Examples
+
+Here I provide various (hopefully, useful) functions and code examples that are not included into the
+main library. Some examples use non-POSIX and/or compiler-specific features that may or may
+not be suitable for a particular project. Also, these snippets were tested while being developed,
+but they may break in the future as the library evolves.
+
+##### `void str_sprintf(str* const dest, const char* fmt, ...)`
+
+Probably the simplest implementation utilising non-POSIX `asprintf(3)` function:
+```C
+#define _GNU_SOURCE
+
+#include "str.h"
+
+#define str_sprintf(dest, fmt, ...) \
+({ \
+ char* ___p; \
+ const int ___n = asprintf(&___p, (fmt), ##__VA_ARGS__); \
+ str_assign((dest), str_acquire_chars(___p, ___n)); \
+})
+```
+This code does not check for errors. A more standard-conforming implementation would probably go
+through `open_memstream(3)` function.
+
+##### `int str_from_int(str* const dest, const int val)`
+```C
+int str_from_int(str* const dest, const int val)
+{
+ char buff[256]; // of some "big enough" size
+
+ return str_cpy(dest, str_ref_chars(buff, snprintf(buff, sizeof(buff), "%d", val)));
+}
+```
+
+This code can also be used as a template for other functions converting from `double`, `struct tm`, etc.
+
+##### `int str_append(str* const dest, ...)`
+```C
+#define str_append(dest, ...) \
+ ({ str* const ___p = (dest); str_cat(___p, *___p, ##__VA_ARGS__); })
+```
+Test case and usage example:
+```C
+ str s = str_lit("zzz");
+
+ assert(str_append(&s, str_lit(" "), str_lit("aaa")) == 0);
+ assert(str_eq(s, str_lit("zzz aaa")));
+
+ str_free(s);
+```
+
+##### Using `str` objects with `printf` family of functions
+
+Since a string object is not guaranteed to refer to a null-terminated string it should be formatted
+with explicitly specified length, for example:
+```C
+ str s = ...
+
+ printf("%.*s\n", (int)str_len(s), str_ptr(s));
+```
+_Note:_ The maximum length of the string is limited to `INT_MAX` bytes, and formatting will stop
+at the first null byte within the string.
diff --git a/3rd/str.c b/3rd/str/str.c
similarity index 100%
rename from 3rd/str.c
rename to 3rd/str/str.c
diff --git a/3rd/str.h b/3rd/str/str.h
similarity index 100%
rename from 3rd/str.h
rename to 3rd/str/str.h
diff --git a/3rd/str/str_test.c b/3rd/str/str_test.c
new file mode 100644
index 0000000..f877916
--- /dev/null
+++ b/3rd/str/str_test.c
@@ -0,0 +1,907 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define _POSIX_C_SOURCE 200809L
+
+#include "str.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+// make sure assert is always enabled
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include
+
+#define passed printf("passed: %s\n", __func__)
+
+static
+void test_str_lit(void)
+{
+ const str s = str_lit("ZZZ");
+
+ assert(str_len(s) == 3);
+ assert(str_is_ref(s));
+ assert(!str_is_owner(s));
+ assert(str_eq(s, str_lit("ZZZ")));
+
+ passed;
+}
+
+static
+void test_str_cpy(void)
+{
+ str_auto s = str_null;
+
+ assert(str_cpy(&s, str_lit("ZZZ")) == 0);
+
+ assert(str_len(s) == 3);
+ assert(!str_is_ref(s));
+ assert(str_is_owner(s));
+ assert(str_eq(s, str_lit("ZZZ")));
+ assert(*str_end(s) == 0);
+
+ passed;
+}
+
+static
+void test_str_clear(void)
+{
+ str s = str_null;
+
+ assert(str_cpy(&s, str_lit("ZZZ")) == 0);
+
+ assert(str_len(s) == 3);
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ str_clear(&s);
+
+ assert(str_is_empty(s));
+ assert(str_is_ref(s));
+
+ passed;
+}
+
+static
+void test_str_move(void)
+{
+ str s1 = str_null;
+
+ assert(str_cpy(&s1, str_lit("ZZZ")) == 0);
+
+ str s2 = str_move(&s1);
+
+ assert(str_is_empty(s1));
+ assert(str_is_ref(s1));
+
+ assert(str_is_owner(s2));
+ assert(str_eq(s2, str_lit("ZZZ")));
+
+ str_free(s2);
+ passed;
+}
+
+static
+void test_str_pass(void)
+{
+ str s1 = str_null;
+
+ assert(str_cpy(&s1, str_lit("ZZZ")) == 0);
+
+ str s2 = str_pass(&s1);
+
+ assert(str_is_ref(s1));
+ assert(str_eq(s1, str_lit("ZZZ")));
+
+ assert(str_is_owner(s2));
+ assert(str_eq(s2, str_lit("ZZZ")));
+
+ str_free(s2);
+ passed;
+}
+
+static
+void test_str_ref(void)
+{
+ str s = str_ref("ZZZ");
+
+ assert(str_len(s) == 3);
+ assert(str_is_ref(s));
+
+ s = str_ref(s);
+
+ assert(str_is_ref(s));
+ assert(str_eq(s, str_lit("ZZZ")));
+
+ const char* const p = "ZZZ";
+
+ s = str_ref(p);
+
+ assert(str_is_ref(s));
+ assert(str_eq(s, str_lit("ZZZ")));
+
+ passed;
+}
+
+static
+void test_str_cmp(void)
+{
+ const str_auto s = str_lit("zzz");
+
+ assert(str_cmp(s, s) == 0);
+ assert(str_cmp(s, str_lit("zzz")) == 0);
+ assert(str_cmp(s, str_lit("zz")) > 0);
+ assert(str_cmp(s, str_lit("zzzz")) < 0);
+ assert(str_cmp(s, str_null) > 0);
+ assert(str_cmp(str_null, s) < 0);
+ assert(str_cmp(str_null, str_null) == 0);
+ assert(str_eq(s, str_lit("zzz")));
+
+ passed;
+}
+
+static
+void test_str_cmp_ci(void)
+{
+ const str s = str_lit("zzz");
+
+ assert(str_cmp_ci(s, s) == 0);
+ assert(str_cmp_ci(s, str_lit("zzz")) == 0);
+ assert(str_cmp_ci(s, str_lit("zz")) > 0);
+ assert(str_cmp_ci(s, str_lit("zzzz")) < 0);
+ assert(str_cmp_ci(s, str_null) > 0);
+ assert(str_cmp_ci(str_null, s) < 0);
+ assert(str_cmp_ci(str_null, str_null) == 0);
+ assert(str_cmp_ci(s, str_lit("ZZZ")) == 0);
+ assert(str_cmp_ci(s, str_lit("ZZ")) > 0);
+ assert(str_cmp_ci(s, str_lit("ZZZZ")) < 0);
+ assert(str_eq_ci(s, str_lit("ZZZ")));
+
+ passed;
+}
+
+static
+void test_str_acquire(void)
+{
+ str_auto s = str_acquire(strdup("ZZZ"));
+
+ assert(str_is_owner(s));
+ assert(str_eq(s, str_lit("ZZZ")));
+ assert(*str_end(s) == 0);
+
+ passed;
+}
+
+static
+void test_str_cat(void)
+{
+ str s = str_null;
+
+ assert(str_cat(&s, str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0);
+
+ assert(str_eq(s, str_lit("AAABBBCCC")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ assert(str_cat(&s, str_null, str_null, str_null) == 0); // this simply clears the target string
+
+ assert(str_is_empty(s));
+ assert(str_is_ref(s));
+
+ passed;
+}
+
+static
+void test_str_join(void)
+{
+ str s = str_null;
+
+ assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0);
+
+ assert(str_eq(s, str_lit("AAA_BBB_CCC")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ assert(str_join(&s, str_lit("_"), str_null, str_lit("BBB"), str_lit("CCC")) == 0);
+
+ assert(str_eq(s, str_lit("_BBB_CCC")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_null, str_lit("CCC")) == 0);
+
+ assert(str_eq(s, str_lit("AAA__CCC")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_null) == 0);
+
+ assert(str_eq(s, str_lit("AAA_BBB_")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ assert(str_join(&s, str_lit("_"), str_null, str_null, str_null) == 0);
+
+ assert(str_eq(s, str_lit("__")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ assert(str_join(&s, str_null) == 0); // this simply clears the target string
+
+ assert(str_is_empty(s));
+ assert(str_is_ref(s));
+
+ passed;
+}
+
+static
+void test_composition(void)
+{
+ str_auto s = str_lit(", ");
+
+ assert(str_join(&s, s, str_lit("Here"), str_lit("there"), str_lit("and everywhere")) == 0);
+ assert(str_cat(&s, s, str_lit("...")) == 0);
+
+ assert(str_eq(s, str_lit("Here, there, and everywhere...")));
+ assert(str_is_owner(s));
+ assert(*str_end(s) == 0);
+
+ passed;
+}
+
+static
+void test_sort(void)
+{
+ str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") };
+
+ str_sort_range(str_order_asc, src, sizeof(src)/sizeof(src[0]));
+
+ assert(str_eq(src[0], str_lit("aaa")));
+ assert(str_eq(src[1], str_lit("bbb")));
+ assert(str_eq(src[2], str_lit("z")));
+ assert(str_eq(src[3], str_lit("zzz")));
+
+ str_sort_range(str_order_desc, src, sizeof(src)/sizeof(src[0]));
+
+ assert(str_eq(src[0], str_lit("zzz")));
+ assert(str_eq(src[1], str_lit("z")));
+ assert(str_eq(src[2], str_lit("bbb")));
+ assert(str_eq(src[3], str_lit("aaa")));
+
+ passed;
+}
+
+static
+void test_sort_ci(void)
+{
+ str src[] = { str_lit("ZZZ"), str_lit("zzz"), str_lit("aaa"), str_lit("AAA") };
+
+ str_sort_range(str_order_asc_ci, src, sizeof(src)/sizeof(src[0]));
+
+ assert(str_eq_ci(src[0], str_lit("aaa")));
+ assert(str_eq_ci(src[1], str_lit("aaa")));
+ assert(str_eq_ci(src[2], str_lit("zzz")));
+ assert(str_eq_ci(src[3], str_lit("zzz")));
+
+ str_sort_range(str_order_desc_ci, src, sizeof(src)/sizeof(src[0]));
+
+ assert(str_eq_ci(src[0], str_lit("zzz")));
+ assert(str_eq_ci(src[1], str_lit("zzz")));
+ assert(str_eq_ci(src[2], str_lit("aaa")));
+ assert(str_eq_ci(src[3], str_lit("aaa")));
+
+ passed;
+}
+
+static
+void test_search(void)
+{
+ str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") };
+ const size_t count = sizeof(src)/sizeof(src[0]);
+
+ str_sort_range(str_order_asc, src, count);
+
+ assert(str_search_range(src[0], src, count) == &src[0]);
+ assert(str_search_range(src[1], src, count) == &src[1]);
+ assert(str_search_range(src[2], src, count) == &src[2]);
+ assert(str_search_range(src[3], src, count) == &src[3]);
+ assert(str_search_range(str_lit("xxx"), src, count) == NULL);
+
+ passed;
+}
+
+static
+void test_prefix(void)
+{
+ const str s = str_lit("abcd");
+
+ assert(str_has_prefix(s, str_null));
+ assert(str_has_prefix(s, str_lit("a")));
+ assert(str_has_prefix(s, str_lit("ab")));
+ assert(str_has_prefix(s, str_lit("abc")));
+ assert(str_has_prefix(s, str_lit("abcd")));
+
+ assert(!str_has_prefix(s, str_lit("zzz")));
+ assert(!str_has_prefix(s, str_lit("abcde")));
+
+ passed;
+}
+
+static
+void test_suffix(void)
+{
+ const str s = str_lit("abcd");
+
+ assert(str_has_suffix(s, str_null));
+ assert(str_has_suffix(s, str_lit("d")));
+ assert(str_has_suffix(s, str_lit("cd")));
+ assert(str_has_suffix(s, str_lit("bcd")));
+ assert(str_has_suffix(s, str_lit("abcd")));
+
+ assert(!str_has_suffix(s, str_lit("zzz")));
+ assert(!str_has_suffix(s, str_lit("_abcd")));
+
+ passed;
+}
+
+static
+void test_cpy_to_fd(void)
+{
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_cpy(fileno(tmp), str_lit("ZZZ")) == 0);
+
+ rewind(tmp);
+
+ char buff[32];
+
+ assert(fread(buff, 1, sizeof(buff), tmp) == 3);
+ assert(memcmp(buff, "ZZZ", 3) == 0);
+
+ fclose(tmp);
+ passed;
+}
+
+static
+void test_cpy_to_stream(void)
+{
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_cpy(tmp, str_lit("ZZZ")) == 0);
+
+ assert(fflush(tmp) == 0);
+ rewind(tmp);
+
+ char buff[32];
+
+ assert(fread(buff, 1, sizeof(buff), tmp) == 3);
+ assert(memcmp(buff, "ZZZ", 3) == 0);
+
+ fclose(tmp);
+ passed;
+}
+
+static
+void test_cat_range_to_fd(void)
+{
+ const str src[] = {
+ str_lit("aaa"),
+ str_lit("bbb"),
+ str_null,
+ str_lit("ccc"),
+ str_lit("ddd"),
+ str_null,
+ str_null
+ };
+
+ const size_t num_items = sizeof(src)/sizeof(src[0]);
+
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_cat_range(fileno(tmp), src, num_items) == 0);
+
+ rewind(tmp);
+
+ const char res[] = "aaabbbcccddd";
+ const size_t len = sizeof(res) - 1;
+ char buff[32];
+
+ assert(fread(buff, 1, sizeof(buff), tmp) == len);
+ assert(memcmp(buff, res, len) == 0);
+
+ fclose(tmp);
+ passed;
+}
+
+static
+void test_cat_large_range_to_fd(void)
+{
+ // prepare data
+ const size_t n = 100000;
+ str* const src = calloc(n, sizeof(str));
+
+ assert(src != NULL);
+
+ char buff[100];
+
+ for(unsigned i = 0; i < n; i++)
+ assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u\n", i))) == 0);
+
+ // write to file
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_cat_range(fileno(tmp), src, n) == 0);
+
+ // clear input data
+ for(unsigned i = 0; i < n; ++i)
+ str_free(src[i]);
+
+ free(src);
+
+ // validate
+ rewind(tmp);
+
+ char* line = NULL;
+ size_t cap = 0;
+ ssize_t len;
+ int i = 0;
+
+ while((len = getline(&line, &cap, tmp)) >= 0)
+ assert(atoi(line) == i++);
+
+ assert(i == (int)n);
+
+ // all done
+ fclose(tmp);
+ free(line);
+ passed;
+}
+
+static
+void test_cat_range_to_stream(void)
+{
+ const str src[] = {
+ str_lit("aaa"),
+ str_lit("bbb"),
+ str_null,
+ str_lit("ccc"),
+ str_lit("ddd"),
+ str_null,
+ str_null
+ };
+
+ const size_t num_items = sizeof(src)/sizeof(src[0]);
+
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_cat_range(tmp, src, num_items) == 0);
+
+ assert(fflush(tmp) == 0);
+ rewind(tmp);
+
+ const char res[] = "aaabbbcccddd";
+ const size_t len = sizeof(res) - 1;
+ char buff[32];
+
+ assert(fread(buff, 1, sizeof(buff), tmp) == len);
+ assert(memcmp(buff, res, len) == 0);
+
+ fclose(tmp);
+ passed;
+}
+
+static
+void test_join_to_fd(void)
+{
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_join(fileno(tmp), str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
+
+ rewind(tmp);
+
+ const char res[] = "aaa_bbb_ccc";
+ const size_t len = sizeof(res) - 1;
+ char buff[32];
+
+ assert(fread(buff, 1, sizeof(buff), tmp) == len);
+ assert(memcmp(buff, res, len) == 0);
+
+ fclose(tmp);
+ passed;
+}
+
+static
+void test_join_large_range_to_fd(void)
+{
+ // prepare data
+ const size_t n = 100000;
+ str* const src = calloc(n, sizeof(str));
+
+ assert(src != NULL);
+
+ char buff[100];
+
+ for(unsigned i = 0; i < n; i++)
+ assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u", i))) == 0);
+
+ // write to file
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_join_range(fileno(tmp), str_lit("\n"), src, n) == 0);
+
+ // clear input data
+ for(unsigned i = 0; i < n; ++i)
+ str_free(src[i]);
+
+ free(src);
+
+ // validate
+ rewind(tmp);
+
+ char* line = NULL;
+ size_t cap = 0;
+ ssize_t len;
+ int i = 0;
+
+ while((len = getline(&line, &cap, tmp)) >= 0)
+ assert(atoi(line) == i++);
+
+ assert(i == (int)n);
+
+ // all done
+ fclose(tmp);
+ free(line);
+ passed;
+}
+
+static
+void test_join_to_stream(void)
+{
+ FILE* const tmp = tmpfile();
+
+ assert(tmp != NULL);
+ assert(str_join(tmp, str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
+
+ assert(fflush(tmp) == 0);
+ rewind(tmp);
+
+ const char res[] = "aaa_bbb_ccc";
+ const size_t len = sizeof(res) - 1;
+ char buff[32];
+
+ assert(fread(buff, 1, sizeof(buff), tmp) == len);
+ assert(memcmp(buff, res, len) == 0);
+
+ fclose(tmp);
+ passed;
+}
+
+static
+bool part_pred(const str s) { return str_len(s) < 2; }
+
+static
+void test_partition_range(void)
+{
+ str src[] = { str_lit("aaa"), str_lit("a"), str_lit("aaaa"), str_lit("z") };
+
+ assert(str_partition_range(part_pred, src, 1) == 0);
+
+ assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 2);
+ assert(str_eq(src[0], str_lit("a")));
+ assert(str_eq(src[1], str_lit("z")));
+ assert(str_partition_range(part_pred, src, 1) == 1);
+
+ src[0] = str_lit("?");
+ src[2] = str_lit("*");
+
+ assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 3);
+ assert(str_eq(src[0], str_lit("?")));
+ assert(str_eq(src[1], str_lit("z")));
+ assert(str_eq(src[2], str_lit("*")));
+ assert(str_eq(src[3], str_lit("aaa")));
+
+ assert(str_partition_range(part_pred, NULL, 42) == 0);
+ assert(str_partition_range(part_pred, src, 0) == 0);
+
+ passed;
+}
+
+static
+void test_unique_range(void)
+{
+ str src[] = {
+ str_lit("zzz"),
+ str_lit("aaa"),
+ str_lit("zzz"),
+ str_lit("bbb"),
+ str_lit("aaa"),
+ str_lit("ccc"),
+ str_lit("ccc"),
+ str_lit("aaa"),
+ str_lit("ccc"),
+ str_lit("zzz")
+ };
+
+ assert(str_unique_range(src, sizeof(src)/sizeof(src[0])) == 4);
+ assert(str_eq(src[0], str_lit("aaa")));
+ assert(str_eq(src[1], str_lit("bbb")));
+ assert(str_eq(src[2], str_lit("ccc")));
+ assert(str_eq(src[3], str_lit("zzz")));
+
+ passed;
+}
+
+static
+void test_from_file(void)
+{
+ str_auto fname = str_null;
+
+ assert(str_cat(&fname, str_lit("tmp_"), str_ref_chars(__func__, sizeof(__func__) - 1)) == 0);
+
+ FILE* const stream = fopen(str_ptr(fname), "w");
+
+ assert(stream);
+ assert(str_join(stream, str_lit(" "), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
+ assert(fclose(stream) == 0);
+
+ str_auto res = str_null;
+
+ assert(str_from_file(&res, str_ptr(fname)) == 0);
+ unlink(str_ptr(fname));
+ assert(str_eq(res, str_lit("aaa bbb ccc")));
+ assert(str_is_owner(res));
+
+ // test errors
+ assert(str_from_file(&res, ".") == EISDIR);
+ assert(str_from_file(&res, "/dev/null") == EOPNOTSUPP);
+ assert(str_from_file(&res, "does-not-exist") == ENOENT);
+
+ passed;
+}
+
+#ifdef __STDC_UTF_32__
+
+static
+void test_codepoint_iterator(void)
+{
+ const str src = str_lit(u8"ะถัะปััะน"); // means "yellow" in Russian
+ static const char32_t src32[] = { U'ะถ', U'ั', U'ะป', U'ั', U'ั', U'ะน' };
+ size_t i = 0;
+ char32_t c;
+
+ for_each_codepoint(c, src)
+ {
+ assert(i < sizeof(src32)/sizeof(src32[0]));
+ assert(c == src32[i++]);
+ }
+
+ assert(c == CPI_END_OF_STRING);
+ assert(i == sizeof(src32)/sizeof(src32[0]));
+
+ // empty string iteration
+ c = 0;
+
+ for_each_codepoint(c, str_null)
+ assert(0);
+
+ assert(c == CPI_END_OF_STRING);
+ passed;
+}
+
+#endif // ifdef __STDC_UTF_32__
+
+static
+void test_tok(void)
+{
+ typedef struct
+ {
+ const str src, delim;
+ const unsigned n_tok;
+ const str tok[3];
+ } test_data;
+
+ static const test_data t[] =
+ {
+ {
+ str_lit("a,b,c"),
+ str_lit(","),
+ 3,
+ { str_lit("a"), str_lit("b"), str_lit("c") }
+ },
+ {
+ str_lit(",,a,b,,c,"),
+ str_lit(","),
+ 3,
+ { str_lit("a"), str_lit("b"), str_lit("c") }
+ },
+ {
+ str_lit("aaa;=~bbb~,=ccc="),
+ str_lit(",;=~"),
+ 3,
+ { str_lit("aaa"), str_lit("bbb"), str_lit("ccc") }
+ },
+ {
+ str_lit(""),
+ str_lit(","),
+ 0,
+ { }
+ },
+ {
+ str_lit(""),
+ str_lit(""),
+ 0,
+ { }
+ },
+ {
+ str_lit(",.;,.;;.,;.,"),
+ str_lit(",.;"),
+ 0,
+ { }
+ },
+ {
+ str_lit("aaa,bbb,ccc"),
+ str_lit(""),
+ 1,
+ { str_lit("aaa,bbb,ccc") }
+ },
+ {
+ str_lit("aaa,bbb,ccc"),
+ str_lit(";-="),
+ 1,
+ { str_lit("aaa,bbb,ccc") }
+ }
+ };
+
+ for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i)
+ {
+ unsigned tok_count = 0;
+
+ str tok = str_null;
+ str_tok_state state;
+
+ str_tok_init(&state, t[i].src, t[i].delim);
+
+ while(str_tok(&tok, &state))
+ {
+// printf("%u-%u: \"%.*s\" %zu\n",
+// i, tok_count, (int)str_len(tok), str_ptr(tok), str_len(tok));
+// fflush(stdout);
+
+ assert(tok_count < t[i].n_tok);
+ assert(str_eq(tok, t[i].tok[tok_count]));
+
+ ++tok_count;
+ }
+
+ assert(tok_count == t[i].n_tok);
+ }
+
+ passed;
+}
+
+static
+void test_partition(void)
+{
+ typedef struct
+ {
+ const bool res;
+ const str src, patt, pref, suff;
+ } test_data;
+
+ static const test_data t[] =
+ {
+ { true, str_lit("...abc..."), str_lit("abc"), str_lit("..."), str_lit("...") },
+ { true, str_lit("......abc"), str_lit("abc"), str_lit("......"), str_null },
+ { true, str_lit("abc......"), str_lit("abc"), str_null, str_lit("......") },
+
+ { true, str_lit("...a..."), str_lit("a"), str_lit("..."), str_lit("...") },
+ { true, str_lit("......a"), str_lit("a"), str_lit("......"), str_null },
+ { true, str_lit("a......"), str_lit("a"), str_null, str_lit("......") },
+
+ { false, str_lit("zzz"), str_null, str_lit("zzz"), str_null },
+ { false, str_null, str_lit("zzz"), str_null, str_null },
+ { false, str_null, str_null, str_null, str_null },
+
+ { false, str_lit("...zzz..."), str_lit("xxx"), str_lit("...zzz..."), str_null },
+ { false, str_lit("...xxz..."), str_lit("xxx"), str_lit("...xxz..."), str_null },
+ { true, str_lit("...xxz...xxx."), str_lit("xxx"), str_lit("...xxz..."), str_lit(".") },
+ { true, str_lit(u8"...ัะธััั___"), str_lit(u8"ัะธััั"), str_lit("..."), str_lit("___") }
+ };
+
+ for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i)
+ {
+ str pref = str_lit("???"), suff = str_lit("???");
+
+ assert(str_partition(t[i].src, t[i].patt, &pref, &suff) == t[i].res);
+ assert(str_eq(pref, t[i].pref));
+ assert(str_eq(suff, t[i].suff));
+ }
+
+ passed;
+}
+
+int main(void)
+{
+ // tests
+ test_str_lit();
+ test_str_cpy();
+ test_str_clear();
+ test_str_move();
+ test_str_pass();
+ test_str_ref();
+ test_str_cmp();
+ test_str_cmp_ci();
+ test_str_acquire();
+ test_str_cat();
+ test_str_join();
+ test_composition();
+ test_sort();
+ test_sort_ci();
+ test_search();
+ test_prefix();
+ test_suffix();
+ test_cpy_to_fd();
+ test_cpy_to_stream();
+ test_cat_range_to_fd();
+ test_cat_large_range_to_fd();
+ test_cat_range_to_stream();
+ test_join_to_fd();
+ test_join_large_range_to_fd();
+ test_join_to_stream();
+ test_partition_range();
+ test_unique_range();
+ test_from_file();
+ test_tok();
+ test_partition();
+
+#ifdef __STDC_UTF_32__
+ assert(setlocale(LC_ALL, "C.UTF-8"));
+
+ test_codepoint_iterator();
+#endif
+
+ return puts("OK.") < 0;
+}
diff --git a/3rd/str/tools/file-to-str b/3rd/str/tools/file-to-str
new file mode 100755
index 0000000..82366af
--- /dev/null
+++ b/3rd/str/tools/file-to-str
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+die() {
+ echo >&2 "$@"
+ exit 1
+}
+
+[ $# -eq 2 ] || die "Usage: $(basename "$0") FILE VAR-NAME"
+[ -f "$1" ] || die "$0: file \"$1\" does not exist, or is not a file."
+
+set -e
+
+cat << EOF
+// AUTOMATICALLY GENERATED FILE - DO NOT EDIT
+
+// source file: $1
+
+#include "str.h"
+
+static
+const char _bytes[] = {
+EOF
+
+od -v -w12 -A n -t x1 "$1" | sed -E 's/\<([[:xdigit:]]{2})\>/0x\1,/g'
+
+cat << EOF
+ 0x00 };
+
+const str $2 = (const str){ _bytes, _ref_info(sizeof(_bytes) - 1) };
+EOF
diff --git a/3rd/str/tools/gen_char_class.c b/3rd/str/tools/gen_char_class.c
new file mode 100644
index 0000000..9c829d1
--- /dev/null
+++ b/3rd/str/tools/gen_char_class.c
@@ -0,0 +1,209 @@
+/*
+BSD 3-Clause License
+
+Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+// platform checks
+#ifndef __STDC_ISO_10646__
+#error "this platform does not support UNICODE (__STDC_ISO_10646__ is not defined)"
+#endif
+
+#if __SIZEOF_WCHAR_T__ < 4 || __SIZEOF_WINT_T__ < 4
+#error "this platform does not have a usable wchar_t (both sizeof(wchar_t) and sizeof(wint_t) should be at least 4)"
+#endif
+
+// i/o helpers
+static __attribute((noinline, noreturn))
+void die(const char* const msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+#define do_printf(fmt, ...) \
+ do { \
+ if(printf(fmt, ##__VA_ARGS__) < 0) \
+ die("error writing output"); \
+ } while(0)
+
+#define do_write(str) \
+ do { \
+ if(fwrite((str), 1, sizeof(str) - 1, stdout) != sizeof(str) - 1) \
+ die("error writing output"); \
+ } while(0)
+
+// char type selector (isw*() functions)
+typedef int (*selector)(wint_t wc);
+
+// option parser
+static __attribute__((noreturn))
+void usage_exit(void)
+{
+ static const char usage[] =
+ "Usage: gen-char-class SELECTOR\n"
+ " Generate a character classification C function that does the same as its\n"
+ " isw*() counterpart under the current locale as specified by LC_ALL\n"
+ " environment variable. SELECTOR specifies the classification function\n"
+ " to generate, it must be any one of:\n"
+ " --alnum -> use iswalnum()\n"
+ " --alpha -> use iswalpha()\n"
+ " --blank -> use iswblank()\n"
+ " --cntrl -> use iswcntrl()\n"
+ " --digit -> use iswdigit()\n"
+ " --graph -> use iswgraph()\n"
+ " --lower -> use iswlower()\n"
+ " --print -> use iswprint()\n"
+ " --punct -> use iswpunct()\n"
+ " --space -> use iswspace()\n"
+ " --upper -> use iswupper()\n"
+ " --xdigit -> use iswxdigit()\n";
+
+ fputs(usage, stderr);
+ exit(1);
+}
+
+static
+selector fn;
+
+static
+const char* fn_name;
+
+static
+const char* loc;
+
+#define ARG(name) \
+ if(strcmp(argv[1], "--" #name) == 0) { \
+ fn = isw ## name; fn_name = #name; \
+ return; \
+ }
+
+static
+void read_opts(int argc, char* const argv[])
+{
+ if(argc != 2)
+ usage_exit();
+
+ ARG(alnum)
+ ARG(alpha)
+ ARG(blank)
+ ARG(cntrl)
+ ARG(digit)
+ ARG(graph)
+ ARG(lower)
+ ARG(print)
+ ARG(punct)
+ ARG(space)
+ ARG(upper)
+ ARG(xdigit)
+
+ if(strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)
+ usage_exit();
+
+ fprintf(stderr, "unknown option: \"%s\"\n", argv[1]);
+ exit(1);
+}
+
+#undef ARG
+
+// range printing
+static
+void print_range(const wint_t first, const wint_t last)
+{
+ if(first == last)
+ do_printf("\t\tcase 0x%.2X:\n", first);
+ else
+ do_printf("\t\tcase 0x%.2X ... 0x%.2X:\n", first, last);
+}
+
+// header/footer
+static
+const char header[] =
+ "/* LC_ALL = \"%s\" */\n"
+ "bool is_%s(const char32_t c)\n"
+ "{\n"
+ " switch(c)\n"
+ " {\n";
+
+static
+const char footer[] =
+ " return true;\n"
+ " default:\n"
+ " return false;\n"
+ " }\n"
+ "}\n";
+
+// main
+#define UTF32_MAX_CHAR 0x10ffff
+
+int main(int argc, char* const argv[])
+{
+ read_opts(argc, argv);
+
+ loc = getenv("LC_ALL");
+
+ if(loc && !setlocale(LC_ALL, loc))
+ die("cannot change current locale");
+
+ errno = 0;
+ do_printf(header, loc ? loc : "", fn_name);
+
+ wint_t first = 0;
+ bool in_range = false;
+
+ for(wint_t c = 0; c <= UTF32_MAX_CHAR; ++c)
+ {
+ const bool match = (fn(c) != 0);
+
+ if(in_range && !match)
+ print_range(first, c - 1);
+ else if(!in_range && match)
+ first = c;
+
+ in_range = match;
+ }
+
+ if(in_range)
+ print_range(first, UTF32_MAX_CHAR);
+
+ do_write(footer);
+
+ if(fflush(stdout))
+ die("error writing output");
+
+ return 0;
+}