finish cleanup
This commit is contained in:
11
3rd/str/.editorconfig
Normal file
11
3rd/str/.editorconfig
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
indent_style = tab
|
||||||
|
indent_size = 4
|
||||||
|
trim_trailing_whitespace = true
|
||||||
|
insert_final_newline = true
|
||||||
|
end_of_line = lf
|
||||||
|
|
||||||
|
[Makefile]
|
||||||
|
indent_size = 8
|
||||||
4
3rd/str/.gitignore
vendored
Normal file
4
3rd/str/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
test
|
||||||
|
flto-test
|
||||||
|
*.bak
|
||||||
|
tools/gen-char-class
|
||||||
30
3rd/str/LICENSE
Normal file
30
3rd/str/LICENSE
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
BSD 3-Clause License
|
||||||
|
|
||||||
|
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
51
3rd/str/Makefile
Normal file
51
3rd/str/Makefile
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
# flags
|
||||||
|
CC_WARN := -Wall -Wextra -Werror=implicit-function-declaration -Wformat -Werror=format-security
|
||||||
|
|
||||||
|
ifeq ($(CC),musl-gcc)
|
||||||
|
# musl is ISO 10646 compliant but doesn't define __STDC_ISO_10646__
|
||||||
|
CC_EXTRA := -D__STDC_ISO_10646__=201706L
|
||||||
|
else
|
||||||
|
# sanitisers only work for non-musl builds
|
||||||
|
CC_SAN := -fsanitize=address -fsanitize=leak -fsanitize=undefined -fsanitize-address-use-after-scope
|
||||||
|
endif
|
||||||
|
|
||||||
|
test: CFLAGS := -ggdb -std=c11 -pipe $(CC_WARN) $(CC_EXTRA) -fno-omit-frame-pointer $(CC_SAN)
|
||||||
|
flto-test: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA) -flto -march=native -mtune=native
|
||||||
|
tools: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA)
|
||||||
|
|
||||||
|
# str library source files
|
||||||
|
SRC := str.c str.h str_test.c
|
||||||
|
|
||||||
|
# all
|
||||||
|
.PHONY: all
|
||||||
|
all: tools test flto-test
|
||||||
|
|
||||||
|
.PHONY: clean
|
||||||
|
clean: clean-test clean-tools
|
||||||
|
|
||||||
|
# test
|
||||||
|
test: $(SRC)
|
||||||
|
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
|
||||||
|
./$@
|
||||||
|
|
||||||
|
flto-test: $(SRC)
|
||||||
|
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
|
||||||
|
./$@
|
||||||
|
|
||||||
|
.PHONY: clean-test
|
||||||
|
clean-test:
|
||||||
|
rm -f test flto-test
|
||||||
|
|
||||||
|
# tools
|
||||||
|
GEN_CHAR_CLASS := tools/gen-char-class
|
||||||
|
|
||||||
|
.PHONY: tools
|
||||||
|
tools: $(GEN_CHAR_CLASS)
|
||||||
|
|
||||||
|
# gen-char-class
|
||||||
|
$(GEN_CHAR_CLASS): tools/gen_char_class.c
|
||||||
|
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
|
||||||
|
|
||||||
|
.PHONY: clean-tools
|
||||||
|
clean-tools:
|
||||||
|
rm -f $(GEN_CHAR_CLASS)
|
||||||
440
3rd/str/README.md
Normal file
440
3rd/str/README.md
Normal file
@@ -0,0 +1,440 @@
|
|||||||
|
# str: yet another string library for C language.
|
||||||
|
|
||||||
|
[](https://opensource.org/licenses/BSD-3-Clause)
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
Bored with developing the same functionality over and over again, unsatisfied
|
||||||
|
with existing libraries, so decided to make the right one, once and forever. 🙂
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
* Handles both C and binary strings;
|
||||||
|
* Light-weight references to strings: cheap to create, copy, or pass by value;
|
||||||
|
* Support for copy and move semantics, although not enforceable by the C language;
|
||||||
|
* String composition functions writing to memory, file descriptors, or file streams;
|
||||||
|
* Can be compiled using `gcc` or `clang`, and linked with `libc` or `musl`.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
Just clone the project and copy (or symlink) the files `str.h` and `str.c` into your project,
|
||||||
|
but please respect the [license](LICENSE).
|
||||||
|
|
||||||
|
## Code Examples
|
||||||
|
|
||||||
|
String composition:
|
||||||
|
|
||||||
|
```C
|
||||||
|
str s = str_null;
|
||||||
|
|
||||||
|
str_join(&s, str_lit(", "),
|
||||||
|
str_lit("Here"),
|
||||||
|
str_lit("there"),
|
||||||
|
str_lit("and everywhere"));
|
||||||
|
|
||||||
|
str_cat(&s, s, str_lit("..."));
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("Here, there, and everywhere...")));
|
||||||
|
str_free(s);
|
||||||
|
```
|
||||||
|
|
||||||
|
Same as above, but writing to a file:
|
||||||
|
|
||||||
|
```C
|
||||||
|
FILE* const stream = fopen(...);
|
||||||
|
|
||||||
|
int err = str_join(stream, str_lit(", "),
|
||||||
|
str_lit("Here"),
|
||||||
|
str_lit("there"),
|
||||||
|
str_lit("and everywhere..."));
|
||||||
|
|
||||||
|
if(err != 0) { /* handle the error */ }
|
||||||
|
```
|
||||||
|
|
||||||
|
[Discussion](https://news.ycombinator.com/item?id=25212864) on Hacker News.
|
||||||
|
|
||||||
|
## User Guide
|
||||||
|
|
||||||
|
_**Disclaimer:** This is the good old C language, not C++ or Rust, so nothing can be enforced
|
||||||
|
on the language level, and certain discipline is required to make sure there is no corrupt
|
||||||
|
or leaked memory resulting from using this library._
|
||||||
|
|
||||||
|
A string is represented by the type `str` that maintains a pointer to some memory containing the
|
||||||
|
actual string, and the length of the string. Objects of type `str` are small enough (a struct
|
||||||
|
of a `const char*` and a `size_t`) to be cheap to create, copy (pass by value), and move. The
|
||||||
|
`str` structure should be treated as opaque (i.e., do not attempt to directly access or modify
|
||||||
|
the fields in this structure). The strings are assumed to be immutable, like those in Java or
|
||||||
|
Go, but only by means of `const char*` pointers, so it is actually possible to modify such a
|
||||||
|
string, although the required type cast to `char*` offers at least some (mostly psychological)
|
||||||
|
protection from changing the string by mistake.
|
||||||
|
|
||||||
|
This library focusses only on handling strings, not gradually composing them like
|
||||||
|
[StringBuffer](https://docs.oracle.com/javase/7/docs/api/java/lang/StringBuffer.html)
|
||||||
|
class in Java.
|
||||||
|
|
||||||
|
All string objects must be initialised before use. Uninitialised objects will cause
|
||||||
|
undefined behaviour. Use the provided constructors, or `str_null` for empty strings.
|
||||||
|
|
||||||
|
There are two kinds of `str` objects: those actually owning the memory they point to, and
|
||||||
|
non-owning references. This property can be queried using `str_is_owner` and `str_is_ref`
|
||||||
|
functions, otherwise such objects are indistinguishable.
|
||||||
|
|
||||||
|
Non-owning string objects are safe to copy and assign to each other, as long as the memory
|
||||||
|
they refer to is valid. They do not need to be freed. `str_free` is a no-op for reference
|
||||||
|
objects. A reference object can be cheaply created from a C string, a string literal,
|
||||||
|
or from a range of bytes.
|
||||||
|
|
||||||
|
Owning objects require special treatment, in particular:
|
||||||
|
* It is a good idea to have only one owning object per each allocated string, but such
|
||||||
|
a string can have many references to its underlying string, as long as those references do not
|
||||||
|
outlive the owning object.
|
||||||
|
Sometimes this rule may be relaxed for code clarity, like in the above example where
|
||||||
|
the owning object is passed directly to a function, but only if the function does not
|
||||||
|
store or release the object. When in doubt pass such an object via `str_ref`.
|
||||||
|
* Direct assignments (like `s2 = s1;`) to owning objects will certainly leak memory, use
|
||||||
|
`str_assign` function instead. In fact, this function can assign to any string object,
|
||||||
|
owning or not, so it can be used everywhere, just to avoid any doubt.
|
||||||
|
* There is no automatic memory management in C, so every owning object must be released at
|
||||||
|
some point using either `str_free` or `str_clear` function. String objects on the stack
|
||||||
|
can also be declared as `str_auto` (or `const str_auto`) for automatic cleanup when the variable
|
||||||
|
goes out of scope.
|
||||||
|
* An owning object can be moved to another location by using `str_move` function. The
|
||||||
|
function resets its source object to an empty string.
|
||||||
|
* Object ownership can be passed over to another object by using `str_pass` function. The
|
||||||
|
function sets its source to a non-owning reference to the original string.
|
||||||
|
|
||||||
|
It is technically possible to create a reference to a string that is not
|
||||||
|
null-terminated. The library accepts strings without null-terminators, but every new string
|
||||||
|
allocated by the library is guaranteed to be null-terminated.
|
||||||
|
|
||||||
|
### String Construction
|
||||||
|
|
||||||
|
A string object can be constructed form any C string, string literal, or a range of bytes.
|
||||||
|
The provided constructors are computationally cheap to apply. Depending on the constructor,
|
||||||
|
the new object can either own the actual string it refers to, or be a non-owning reference.
|
||||||
|
Constructors themselves do not allocate any memory. Importantly, constructors are the only
|
||||||
|
functions in this library that return a string object, while others only assign their results
|
||||||
|
through a pointer to a pre-existing string. This makes constructors suitable for initialisation
|
||||||
|
of new string objects. In all other situations one should combine construction with assignment,
|
||||||
|
for example:<br>
|
||||||
|
`str_assign(&dest, str_acquire_chars(buff, n));`
|
||||||
|
|
||||||
|
### String Object Properties
|
||||||
|
|
||||||
|
Querying a property of a string object (like the length of the string via `str_len`) is a
|
||||||
|
cheap operation.
|
||||||
|
|
||||||
|
### Assigning, Moving, and Passing String Objects
|
||||||
|
|
||||||
|
C language does not allow for operator overloading, so this library provides a function
|
||||||
|
`str_assign` that takes a string object and assigns it to the destination object, freeing
|
||||||
|
any memory owned by the destination. It is generally recommended to use this function
|
||||||
|
everywhere outside object initialisation.
|
||||||
|
|
||||||
|
An existing object can be moved over to another location via `str_move` function.
|
||||||
|
The function resets the source object to `str_null` to guarantee the correct move semantics.
|
||||||
|
The value returned by `str_move` may be either used to initialise a new object, or
|
||||||
|
assigned to an existing object using `str_assign`.
|
||||||
|
|
||||||
|
An existing object can also be passed over to another location via `str_pass` function. The function
|
||||||
|
sets the source object to be a non-owning reference to the original string, otherwise the semantics
|
||||||
|
and usage is the same as `str_move`.
|
||||||
|
|
||||||
|
### String Composition and Generic Destination
|
||||||
|
|
||||||
|
String composition [functions](#string-composition) can write their results to different
|
||||||
|
destinations, depending on the _type_ of their `dest` parameter:
|
||||||
|
|
||||||
|
* `str*`: result is assigned to the string object;
|
||||||
|
* `int`: result is written to the file descriptor;
|
||||||
|
* `FILE*` result is written to the file stream.
|
||||||
|
|
||||||
|
The composition functions return 0 on success, or the value of `errno` as retrieved at the point
|
||||||
|
of failure (including `ENOMEM` on memory allocation error).
|
||||||
|
|
||||||
|
### Detailed Example
|
||||||
|
|
||||||
|
Just to make things more clear, here is the same code as in the example above, but with comments:
|
||||||
|
```C
|
||||||
|
// declare a variable and initialise it with an empty string; could also be declared as "str_auto"
|
||||||
|
// to avoid explicit call to str_free() below.
|
||||||
|
str s = str_null;
|
||||||
|
|
||||||
|
// join the given string literals around the separator (second parameter),
|
||||||
|
// storing the result in object "s" (first parameter); in this example we do not check
|
||||||
|
// the return values of the composition functions, thus ignoring memory allocation failures,
|
||||||
|
// which is probably not the best idea in general.
|
||||||
|
str_join(&s, str_lit(", "),
|
||||||
|
str_lit("Here"),
|
||||||
|
str_lit("there"),
|
||||||
|
str_lit("and everywhere"));
|
||||||
|
|
||||||
|
// create a new string concatenating "s" and a literal; the function only modifies its
|
||||||
|
// destination object "s" after the result is computed, also freeing the destination
|
||||||
|
// before the assignment, so it is safe to use "s" as both a parameter and a destination.
|
||||||
|
// note: we pass a copy of the owning object "s" as the second parameter, and here it is
|
||||||
|
// safe to do so because this particular function does not modify its arguments.
|
||||||
|
str_cat(&s, s, str_lit("..."));
|
||||||
|
|
||||||
|
// check that we have got the expected result
|
||||||
|
assert(str_eq(s, str_lit("Here, there, and everywhere...")));
|
||||||
|
|
||||||
|
// finally, free the memory allocated for the string
|
||||||
|
str_free(s);
|
||||||
|
```
|
||||||
|
|
||||||
|
There are some useful [code snippets](snippets.md) provided to assist with writing code using
|
||||||
|
this library.
|
||||||
|
|
||||||
|
## API brief
|
||||||
|
|
||||||
|
`typedef struct { ... } str;`<br>
|
||||||
|
The string object.
|
||||||
|
|
||||||
|
#### String Properties
|
||||||
|
|
||||||
|
`size_t str_len(const str s)`<br>
|
||||||
|
Returns the number of bytes in the string referenced by the object.
|
||||||
|
|
||||||
|
`const char* str_ptr(const str s)`<br>
|
||||||
|
Returns a pointer to the first byte of the string referenced by the object. The pointer is never NULL.
|
||||||
|
|
||||||
|
`const char* str_end(const str s)`<br>
|
||||||
|
Returns a pointer to the next byte past the end of the string referenced by the object.
|
||||||
|
The pointer is never NULL, but it is not guaranteed to point to any valid byte or location.
|
||||||
|
For C strings it points to the terminating null character. For any given string `s` the following
|
||||||
|
condition is always satisfied: `str_end(s) == str_ptr(s) + str_len(s)`.
|
||||||
|
|
||||||
|
`bool str_is_empty(const str s)`<br>
|
||||||
|
Returns "true" for empty strings.
|
||||||
|
|
||||||
|
`bool str_is_owner(const str s)`<br>
|
||||||
|
Returns "true" if the string object is the owner of the memory it references.
|
||||||
|
|
||||||
|
`bool str_is_ref(const str s)`<br>
|
||||||
|
Returns "true" if the string object does not own the memory it references.
|
||||||
|
|
||||||
|
#### String Construction
|
||||||
|
|
||||||
|
`str_null`<br>
|
||||||
|
Empty string constant.
|
||||||
|
|
||||||
|
`str str_lit(s)`<br>
|
||||||
|
Constructs a non-owning object from a string literal. Implemented as a macro.
|
||||||
|
|
||||||
|
`str str_ref(s)`<br>
|
||||||
|
Constructs a non-owning object from either a null-terminated C string, or another `str` object.
|
||||||
|
Implemented as a macro.
|
||||||
|
|
||||||
|
`str str_ref_chars(const char* const s, const size_t n)`<br>
|
||||||
|
Constructs a non-owning object referencing the given range of bytes.
|
||||||
|
|
||||||
|
`str str_acquire_chars(const char* const s, const size_t n)`<br>
|
||||||
|
Constructs an owning object for the specified range of bytes. The pointer `s` should be safe
|
||||||
|
to pass to `free(3)` function.
|
||||||
|
|
||||||
|
`str str_acquire(const char* const s)`<br>
|
||||||
|
Constructs an owning object from the given C string. The string should be safe to pass to
|
||||||
|
`free(3)` function.
|
||||||
|
|
||||||
|
`str str_move(str* const ps)`<br>
|
||||||
|
Saves the given object to a temporary, resets the source object to `str_null`, and then
|
||||||
|
returns the saved object.
|
||||||
|
|
||||||
|
`str str_pass(str* const ps)`<br>
|
||||||
|
Saves the given object to a temporary, sets the source object to be a non-owning reference to the
|
||||||
|
original string, and then returns the saved object.
|
||||||
|
|
||||||
|
#### String Deallocation
|
||||||
|
|
||||||
|
`void str_free(const str s)`<br>
|
||||||
|
Deallocates any memory held by the owning string object. No-op for references. After a call to
|
||||||
|
this function the string object is in unknown and unusable state.
|
||||||
|
|
||||||
|
String objects on the stack can also be declared as `str_auto` instead of `str` to deallocate
|
||||||
|
any memory held by the string when the variable goes out of scope.
|
||||||
|
|
||||||
|
#### String Modification
|
||||||
|
|
||||||
|
`void str_assign(str* const ps, const str s)`<br>
|
||||||
|
Assigns the object `s` to the object pointed to by `ps`. Any memory owned by the target
|
||||||
|
object is freed before the assignment.
|
||||||
|
|
||||||
|
`void str_clear(str* const ps)`<br>
|
||||||
|
Sets the target object to `str_null` after freeing any memory owned by the target.
|
||||||
|
|
||||||
|
`void str_swap(str* const s1, str* const s2)`<br>
|
||||||
|
Swaps two string objects.
|
||||||
|
|
||||||
|
`int str_from_file(str* const dest, const char* const file_name)`<br>
|
||||||
|
Reads the entire file (of up to 64MB by default, configurable via `STR_MAX_FILE_SIZE`) into
|
||||||
|
the destination string. Returns 0 on success, or the value of `errno` on error.
|
||||||
|
|
||||||
|
#### String Comparison
|
||||||
|
|
||||||
|
`int str_cmp(const str s1, const str s2)`<br>
|
||||||
|
Lexicographically compares the two string objects, with usual semantics.
|
||||||
|
|
||||||
|
`bool str_eq(const str s1, const str s2)`<br>
|
||||||
|
Returns "true" if the two strings match exactly.
|
||||||
|
|
||||||
|
`int str_cmp_ci(const str s1, const str s2)`<br>
|
||||||
|
Case-insensitive comparison of two strings, implemented using `strncasecmp(3)`.
|
||||||
|
|
||||||
|
`bool str_eq_ci(const str s1, const str s2`<br>
|
||||||
|
Returns "true" is the two strings match case-insensitively.
|
||||||
|
|
||||||
|
`bool str_has_prefix(const str s, const str prefix)`<br>
|
||||||
|
Tests if the given string `s` starts with the specified prefix.
|
||||||
|
|
||||||
|
`bool str_has_suffix(const str s, const str suffix)`<br>
|
||||||
|
Tests if the given string `s` ends with the specified suffix.
|
||||||
|
|
||||||
|
#### String Composition
|
||||||
|
|
||||||
|
`int str_cpy(dest, const str src)`<br>
|
||||||
|
Copies the source string referenced by `src` to the
|
||||||
|
[generic](#string-composition-and-generic-destination) destination `dest`. Returns 0 on success,
|
||||||
|
or the value of `errno` on failure.
|
||||||
|
|
||||||
|
`int str_cat_range(dest, const str* src, size_t count)`<br>
|
||||||
|
Concatenates `count` strings from the array starting at address `src`, and writes
|
||||||
|
the result to the [generic](#string-composition-and-generic-destination) destination `dest`.
|
||||||
|
Returns 0 on success, or the value of `errno` on failure.
|
||||||
|
|
||||||
|
`int str_cat(dest, ...)`<br>
|
||||||
|
Concatenates a variable list of `str` arguments, and writes the result to the
|
||||||
|
[generic](#string-composition-and-generic-destination) destination `dest`.
|
||||||
|
Returns 0 on success, or the value of `errno` on failure.
|
||||||
|
|
||||||
|
`int str_join_range(dest, const str sep, const str* src, size_t count)`<br>
|
||||||
|
Joins around `sep` the `count` strings from the array starting at address `src`, and writes
|
||||||
|
the result to the [generic](#string-composition-and-generic-destination) destination `dest`.
|
||||||
|
Returns 0 on success, or the value of `errno` on failure.
|
||||||
|
|
||||||
|
`int str_join(dest, const str sep, ...)`<br>
|
||||||
|
Joins a variable list of `str` arguments around `sep` delimiter, and writes the result to the
|
||||||
|
[generic](#string-composition-and-generic-destination) destination `dest`.
|
||||||
|
Returns 0 on success, or the value of `errno` on failure.
|
||||||
|
|
||||||
|
#### Searching and Sorting
|
||||||
|
|
||||||
|
`bool str_partition(const str src, const str patt, str* const prefix, str* const suffix)`<br>
|
||||||
|
Splits the string `src` on the first match of `patt`, assigning a reference to the part
|
||||||
|
of the string before the match to the `prefix` object, and the part after the match to the
|
||||||
|
`suffix` object. Returns `true` if a match has been found, or `false` otherwise, also
|
||||||
|
setting `prefix` to reference the entire `src` string, and clearing the `suffix` object.
|
||||||
|
Empty pattern `patt` never matches.
|
||||||
|
|
||||||
|
`void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count)`<br>
|
||||||
|
Sorts the given array of `str` objects using the given comparison function. A number
|
||||||
|
of typically used comparison functions is also provided:
|
||||||
|
* `str_order_asc` (ascending sort)
|
||||||
|
* `str_order_desc` (descending sort)
|
||||||
|
* `str_order_asc_ci` (ascending case-insensitive sort)
|
||||||
|
* `str_order_desc_ci` (descending case-insensitive sort)
|
||||||
|
|
||||||
|
`const str* str_search_range(const str key, const str* const array, const size_t count)`<br>
|
||||||
|
Binary search for the given key. The input array must be sorted using `str_order_asc`.
|
||||||
|
Returns a pointer to the string matching the key, or NULL.
|
||||||
|
|
||||||
|
`size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count)`<br>
|
||||||
|
Reorders the string objects in the given range in such a way that all elements for which
|
||||||
|
the predicate `pred` returns "true" precede the elements for which predicate `pred`
|
||||||
|
returns "false". Returns the number of preceding objects.
|
||||||
|
|
||||||
|
`size_t str_unique_range(str* const array, const size_t count)`<br>
|
||||||
|
Reorders the string objects in the given range in such a way that there are two partitions:
|
||||||
|
one where each object is unique within the input range, and another partition with all the
|
||||||
|
remaining objects. The unique partition is stored at the beginning of the array, and is
|
||||||
|
sorted in ascending order, followed by the partition with all remaining objects.
|
||||||
|
Returns the number of unique objects.
|
||||||
|
|
||||||
|
#### UNICODE support
|
||||||
|
|
||||||
|
`for_each_codepoint(var_name, src_string)`<br>
|
||||||
|
A macro that expands to a loop iterating over the given string `src_string` (of type `str`) by UTF-32
|
||||||
|
code points. On each iteration the variable `var_name` (of type `char32_t`) is assigned
|
||||||
|
the value of the next valid UTF-32 code point from the source string. Upon exit from the loop the
|
||||||
|
variable has one on the following values:
|
||||||
|
* `CPI_END_OF_STRING`: the iteration has reached the end of source string;
|
||||||
|
* `CPI_ERR_INCOMPLETE_SEQ`: an incomplete byte sequence has been detected;
|
||||||
|
* `CPI_ERR_INVALID_ENCODING`: an invalid byte sequence has been detected.
|
||||||
|
|
||||||
|
The source string is expected to be encoded in the _current program locale_, as set by the most
|
||||||
|
recent call to `setlocale(3)`.
|
||||||
|
|
||||||
|
Usage pattern:
|
||||||
|
```c
|
||||||
|
#include <uchar.h>
|
||||||
|
...
|
||||||
|
str s = ...
|
||||||
|
...
|
||||||
|
char32_t c; // variable to receive UTF-32 values on each iteration
|
||||||
|
|
||||||
|
for_each_codepoint(c, s)
|
||||||
|
{
|
||||||
|
/* process c */
|
||||||
|
}
|
||||||
|
|
||||||
|
if(c != CPI_END_OF_STRING)
|
||||||
|
{
|
||||||
|
/* handle error */
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Tokeniser
|
||||||
|
|
||||||
|
Tokeniser interface provides functionality similar to `strtok(3)` function. The tokeniser
|
||||||
|
is fully re-entrant with no hidden state, and its input string is not modified while being
|
||||||
|
parsed.
|
||||||
|
|
||||||
|
##### Typical usage:
|
||||||
|
```C
|
||||||
|
// declare and initialise tokeniser state
|
||||||
|
str_tok_state state;
|
||||||
|
|
||||||
|
str_tok_init(&state, source_string, delimiter_set);
|
||||||
|
|
||||||
|
// object to receive tokens
|
||||||
|
str token = str_null;
|
||||||
|
|
||||||
|
// token iterator
|
||||||
|
while(str_tok(&token, &state))
|
||||||
|
{
|
||||||
|
/* process "token" */
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Tokeniser API
|
||||||
|
|
||||||
|
`void str_tok_init(str_tok_state* const state, const str src, const str delim_set)`<br>
|
||||||
|
Initialises tokeniser state with the given source string and delimiter set. The delimiter set
|
||||||
|
is treated as bytes, _not_ as UNICODE code points encoded in UTF-8.
|
||||||
|
|
||||||
|
`bool str_tok(str* const dest, str_tok_state* const state)`<br>
|
||||||
|
Retrieves the next token and stores it in the `dest` object. Returns `true` if the token has
|
||||||
|
been read, or `false` if the end of input has been reached. Retrieved token is always
|
||||||
|
a reference to a slice of the source string.
|
||||||
|
|
||||||
|
`void str_tok_delim(str_tok_state* const state, const str delim_set)`<br>
|
||||||
|
Changes the delimiter set associated with the given tokeniser state. The delimiter set is
|
||||||
|
treated as bytes, _not_ as UNICODE code points encoded in UTF-8.
|
||||||
|
|
||||||
|
## Tools
|
||||||
|
|
||||||
|
All the tools are located in `tools/` directory. Currently, there are the following tools:
|
||||||
|
|
||||||
|
* `file-to-str`: The script takes a file (text or binary) and a C variable name, and
|
||||||
|
writes to `stdout` C source code where the variable (of type `str`) is defined
|
||||||
|
and initialised with the content of the file.
|
||||||
|
|
||||||
|
* `gen-char-class`: Generates character classification functions that do the same as their
|
||||||
|
`isw*()` counterparts under the current locale as specified by `LC_ALL` environment variable.
|
||||||
|
Run `tools/gen-char-class --help` for further details, or `tools/gen-char-class --space`
|
||||||
|
to see an example of its output.
|
||||||
|
|
||||||
|
## Project Status
|
||||||
|
The library requires at least a C11 compiler. So far has been tested on Linux Mint versions
|
||||||
|
from 19.3 to 22.0, with `gcc` versions from 9.5.0 to 13.2.0 (with either `libc` or `musl`),
|
||||||
|
and `clang` versions up to 18.1.3; it is also reported to work on ALT Linux 9.1 for Elbrus, with
|
||||||
|
`lcc` version 1.25.09.
|
||||||
63
3rd/str/snippets.md
Normal file
63
3rd/str/snippets.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
### Code Examples
|
||||||
|
|
||||||
|
Here I provide various (hopefully, useful) functions and code examples that are not included into the
|
||||||
|
main library. Some examples use non-POSIX and/or compiler-specific features that may or may
|
||||||
|
not be suitable for a particular project. Also, these snippets were tested while being developed,
|
||||||
|
but they may break in the future as the library evolves.
|
||||||
|
|
||||||
|
##### `void str_sprintf(str* const dest, const char* fmt, ...)`
|
||||||
|
|
||||||
|
Probably the simplest implementation utilising non-POSIX `asprintf(3)` function:
|
||||||
|
```C
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#include "str.h"
|
||||||
|
|
||||||
|
#define str_sprintf(dest, fmt, ...) \
|
||||||
|
({ \
|
||||||
|
char* ___p; \
|
||||||
|
const int ___n = asprintf(&___p, (fmt), ##__VA_ARGS__); \
|
||||||
|
str_assign((dest), str_acquire_chars(___p, ___n)); \
|
||||||
|
})
|
||||||
|
```
|
||||||
|
This code does not check for errors. A more standard-conforming implementation would probably go
|
||||||
|
through `open_memstream(3)` function.
|
||||||
|
|
||||||
|
##### `int str_from_int(str* const dest, const int val)`
|
||||||
|
```C
|
||||||
|
int str_from_int(str* const dest, const int val)
|
||||||
|
{
|
||||||
|
char buff[256]; // of some "big enough" size
|
||||||
|
|
||||||
|
return str_cpy(dest, str_ref_chars(buff, snprintf(buff, sizeof(buff), "%d", val)));
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This code can also be used as a template for other functions converting from `double`, `struct tm`, etc.
|
||||||
|
|
||||||
|
##### `int str_append(str* const dest, ...)`
|
||||||
|
```C
|
||||||
|
#define str_append(dest, ...) \
|
||||||
|
({ str* const ___p = (dest); str_cat(___p, *___p, ##__VA_ARGS__); })
|
||||||
|
```
|
||||||
|
Test case and usage example:
|
||||||
|
```C
|
||||||
|
str s = str_lit("zzz");
|
||||||
|
|
||||||
|
assert(str_append(&s, str_lit(" "), str_lit("aaa")) == 0);
|
||||||
|
assert(str_eq(s, str_lit("zzz aaa")));
|
||||||
|
|
||||||
|
str_free(s);
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Using `str` objects with `printf` family of functions
|
||||||
|
|
||||||
|
Since a string object is not guaranteed to refer to a null-terminated string it should be formatted
|
||||||
|
with explicitly specified length, for example:
|
||||||
|
```C
|
||||||
|
str s = ...
|
||||||
|
|
||||||
|
printf("%.*s\n", (int)str_len(s), str_ptr(s));
|
||||||
|
```
|
||||||
|
_Note:_ The maximum length of the string is limited to `INT_MAX` bytes, and formatting will stop
|
||||||
|
at the first null byte within the string.
|
||||||
907
3rd/str/str_test.c
Normal file
907
3rd/str/str_test.c
Normal file
@@ -0,0 +1,907 @@
|
|||||||
|
/*
|
||||||
|
BSD 3-Clause License
|
||||||
|
|
||||||
|
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define _POSIX_C_SOURCE 200809L
|
||||||
|
|
||||||
|
#include "str.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <locale.h>
|
||||||
|
|
||||||
|
// make sure assert is always enabled
|
||||||
|
#ifdef NDEBUG
|
||||||
|
#undef NDEBUG
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#define passed printf("passed: %s\n", __func__)
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_lit(void)
|
||||||
|
{
|
||||||
|
const str s = str_lit("ZZZ");
|
||||||
|
|
||||||
|
assert(str_len(s) == 3);
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
assert(!str_is_owner(s));
|
||||||
|
assert(str_eq(s, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_cpy(void)
|
||||||
|
{
|
||||||
|
str_auto s = str_null;
|
||||||
|
|
||||||
|
assert(str_cpy(&s, str_lit("ZZZ")) == 0);
|
||||||
|
|
||||||
|
assert(str_len(s) == 3);
|
||||||
|
assert(!str_is_ref(s));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(str_eq(s, str_lit("ZZZ")));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_clear(void)
|
||||||
|
{
|
||||||
|
str s = str_null;
|
||||||
|
|
||||||
|
assert(str_cpy(&s, str_lit("ZZZ")) == 0);
|
||||||
|
|
||||||
|
assert(str_len(s) == 3);
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
str_clear(&s);
|
||||||
|
|
||||||
|
assert(str_is_empty(s));
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_move(void)
|
||||||
|
{
|
||||||
|
str s1 = str_null;
|
||||||
|
|
||||||
|
assert(str_cpy(&s1, str_lit("ZZZ")) == 0);
|
||||||
|
|
||||||
|
str s2 = str_move(&s1);
|
||||||
|
|
||||||
|
assert(str_is_empty(s1));
|
||||||
|
assert(str_is_ref(s1));
|
||||||
|
|
||||||
|
assert(str_is_owner(s2));
|
||||||
|
assert(str_eq(s2, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
str_free(s2);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_pass(void)
|
||||||
|
{
|
||||||
|
str s1 = str_null;
|
||||||
|
|
||||||
|
assert(str_cpy(&s1, str_lit("ZZZ")) == 0);
|
||||||
|
|
||||||
|
str s2 = str_pass(&s1);
|
||||||
|
|
||||||
|
assert(str_is_ref(s1));
|
||||||
|
assert(str_eq(s1, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
assert(str_is_owner(s2));
|
||||||
|
assert(str_eq(s2, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
str_free(s2);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_ref(void)
|
||||||
|
{
|
||||||
|
str s = str_ref("ZZZ");
|
||||||
|
|
||||||
|
assert(str_len(s) == 3);
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
|
||||||
|
s = str_ref(s);
|
||||||
|
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
assert(str_eq(s, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
const char* const p = "ZZZ";
|
||||||
|
|
||||||
|
s = str_ref(p);
|
||||||
|
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
assert(str_eq(s, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_cmp(void)
|
||||||
|
{
|
||||||
|
const str_auto s = str_lit("zzz");
|
||||||
|
|
||||||
|
assert(str_cmp(s, s) == 0);
|
||||||
|
assert(str_cmp(s, str_lit("zzz")) == 0);
|
||||||
|
assert(str_cmp(s, str_lit("zz")) > 0);
|
||||||
|
assert(str_cmp(s, str_lit("zzzz")) < 0);
|
||||||
|
assert(str_cmp(s, str_null) > 0);
|
||||||
|
assert(str_cmp(str_null, s) < 0);
|
||||||
|
assert(str_cmp(str_null, str_null) == 0);
|
||||||
|
assert(str_eq(s, str_lit("zzz")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_cmp_ci(void)
|
||||||
|
{
|
||||||
|
const str s = str_lit("zzz");
|
||||||
|
|
||||||
|
assert(str_cmp_ci(s, s) == 0);
|
||||||
|
assert(str_cmp_ci(s, str_lit("zzz")) == 0);
|
||||||
|
assert(str_cmp_ci(s, str_lit("zz")) > 0);
|
||||||
|
assert(str_cmp_ci(s, str_lit("zzzz")) < 0);
|
||||||
|
assert(str_cmp_ci(s, str_null) > 0);
|
||||||
|
assert(str_cmp_ci(str_null, s) < 0);
|
||||||
|
assert(str_cmp_ci(str_null, str_null) == 0);
|
||||||
|
assert(str_cmp_ci(s, str_lit("ZZZ")) == 0);
|
||||||
|
assert(str_cmp_ci(s, str_lit("ZZ")) > 0);
|
||||||
|
assert(str_cmp_ci(s, str_lit("ZZZZ")) < 0);
|
||||||
|
assert(str_eq_ci(s, str_lit("ZZZ")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_acquire(void)
|
||||||
|
{
|
||||||
|
str_auto s = str_acquire(strdup("ZZZ"));
|
||||||
|
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(str_eq(s, str_lit("ZZZ")));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_cat(void)
|
||||||
|
{
|
||||||
|
str s = str_null;
|
||||||
|
|
||||||
|
assert(str_cat(&s, str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("AAABBBCCC")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
assert(str_cat(&s, str_null, str_null, str_null) == 0); // this simply clears the target string
|
||||||
|
|
||||||
|
assert(str_is_empty(s));
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_str_join(void)
|
||||||
|
{
|
||||||
|
str s = str_null;
|
||||||
|
|
||||||
|
assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("AAA_BBB_CCC")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
assert(str_join(&s, str_lit("_"), str_null, str_lit("BBB"), str_lit("CCC")) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("_BBB_CCC")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_null, str_lit("CCC")) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("AAA__CCC")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_null) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("AAA_BBB_")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
assert(str_join(&s, str_lit("_"), str_null, str_null, str_null) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("__")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
assert(str_join(&s, str_null) == 0); // this simply clears the target string
|
||||||
|
|
||||||
|
assert(str_is_empty(s));
|
||||||
|
assert(str_is_ref(s));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_composition(void)
|
||||||
|
{
|
||||||
|
str_auto s = str_lit(", ");
|
||||||
|
|
||||||
|
assert(str_join(&s, s, str_lit("Here"), str_lit("there"), str_lit("and everywhere")) == 0);
|
||||||
|
assert(str_cat(&s, s, str_lit("...")) == 0);
|
||||||
|
|
||||||
|
assert(str_eq(s, str_lit("Here, there, and everywhere...")));
|
||||||
|
assert(str_is_owner(s));
|
||||||
|
assert(*str_end(s) == 0);
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_sort(void)
|
||||||
|
{
|
||||||
|
str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") };
|
||||||
|
|
||||||
|
str_sort_range(str_order_asc, src, sizeof(src)/sizeof(src[0]));
|
||||||
|
|
||||||
|
assert(str_eq(src[0], str_lit("aaa")));
|
||||||
|
assert(str_eq(src[1], str_lit("bbb")));
|
||||||
|
assert(str_eq(src[2], str_lit("z")));
|
||||||
|
assert(str_eq(src[3], str_lit("zzz")));
|
||||||
|
|
||||||
|
str_sort_range(str_order_desc, src, sizeof(src)/sizeof(src[0]));
|
||||||
|
|
||||||
|
assert(str_eq(src[0], str_lit("zzz")));
|
||||||
|
assert(str_eq(src[1], str_lit("z")));
|
||||||
|
assert(str_eq(src[2], str_lit("bbb")));
|
||||||
|
assert(str_eq(src[3], str_lit("aaa")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_sort_ci(void)
|
||||||
|
{
|
||||||
|
str src[] = { str_lit("ZZZ"), str_lit("zzz"), str_lit("aaa"), str_lit("AAA") };
|
||||||
|
|
||||||
|
str_sort_range(str_order_asc_ci, src, sizeof(src)/sizeof(src[0]));
|
||||||
|
|
||||||
|
assert(str_eq_ci(src[0], str_lit("aaa")));
|
||||||
|
assert(str_eq_ci(src[1], str_lit("aaa")));
|
||||||
|
assert(str_eq_ci(src[2], str_lit("zzz")));
|
||||||
|
assert(str_eq_ci(src[3], str_lit("zzz")));
|
||||||
|
|
||||||
|
str_sort_range(str_order_desc_ci, src, sizeof(src)/sizeof(src[0]));
|
||||||
|
|
||||||
|
assert(str_eq_ci(src[0], str_lit("zzz")));
|
||||||
|
assert(str_eq_ci(src[1], str_lit("zzz")));
|
||||||
|
assert(str_eq_ci(src[2], str_lit("aaa")));
|
||||||
|
assert(str_eq_ci(src[3], str_lit("aaa")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_search(void)
|
||||||
|
{
|
||||||
|
str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") };
|
||||||
|
const size_t count = sizeof(src)/sizeof(src[0]);
|
||||||
|
|
||||||
|
str_sort_range(str_order_asc, src, count);
|
||||||
|
|
||||||
|
assert(str_search_range(src[0], src, count) == &src[0]);
|
||||||
|
assert(str_search_range(src[1], src, count) == &src[1]);
|
||||||
|
assert(str_search_range(src[2], src, count) == &src[2]);
|
||||||
|
assert(str_search_range(src[3], src, count) == &src[3]);
|
||||||
|
assert(str_search_range(str_lit("xxx"), src, count) == NULL);
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_prefix(void)
|
||||||
|
{
|
||||||
|
const str s = str_lit("abcd");
|
||||||
|
|
||||||
|
assert(str_has_prefix(s, str_null));
|
||||||
|
assert(str_has_prefix(s, str_lit("a")));
|
||||||
|
assert(str_has_prefix(s, str_lit("ab")));
|
||||||
|
assert(str_has_prefix(s, str_lit("abc")));
|
||||||
|
assert(str_has_prefix(s, str_lit("abcd")));
|
||||||
|
|
||||||
|
assert(!str_has_prefix(s, str_lit("zzz")));
|
||||||
|
assert(!str_has_prefix(s, str_lit("abcde")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_suffix(void)
|
||||||
|
{
|
||||||
|
const str s = str_lit("abcd");
|
||||||
|
|
||||||
|
assert(str_has_suffix(s, str_null));
|
||||||
|
assert(str_has_suffix(s, str_lit("d")));
|
||||||
|
assert(str_has_suffix(s, str_lit("cd")));
|
||||||
|
assert(str_has_suffix(s, str_lit("bcd")));
|
||||||
|
assert(str_has_suffix(s, str_lit("abcd")));
|
||||||
|
|
||||||
|
assert(!str_has_suffix(s, str_lit("zzz")));
|
||||||
|
assert(!str_has_suffix(s, str_lit("_abcd")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_cpy_to_fd(void)
|
||||||
|
{
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_cpy(fileno(tmp), str_lit("ZZZ")) == 0);
|
||||||
|
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
char buff[32];
|
||||||
|
|
||||||
|
assert(fread(buff, 1, sizeof(buff), tmp) == 3);
|
||||||
|
assert(memcmp(buff, "ZZZ", 3) == 0);
|
||||||
|
|
||||||
|
fclose(tmp);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_cpy_to_stream(void)
|
||||||
|
{
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_cpy(tmp, str_lit("ZZZ")) == 0);
|
||||||
|
|
||||||
|
assert(fflush(tmp) == 0);
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
char buff[32];
|
||||||
|
|
||||||
|
assert(fread(buff, 1, sizeof(buff), tmp) == 3);
|
||||||
|
assert(memcmp(buff, "ZZZ", 3) == 0);
|
||||||
|
|
||||||
|
fclose(tmp);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_cat_range_to_fd(void)
|
||||||
|
{
|
||||||
|
const str src[] = {
|
||||||
|
str_lit("aaa"),
|
||||||
|
str_lit("bbb"),
|
||||||
|
str_null,
|
||||||
|
str_lit("ccc"),
|
||||||
|
str_lit("ddd"),
|
||||||
|
str_null,
|
||||||
|
str_null
|
||||||
|
};
|
||||||
|
|
||||||
|
const size_t num_items = sizeof(src)/sizeof(src[0]);
|
||||||
|
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_cat_range(fileno(tmp), src, num_items) == 0);
|
||||||
|
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
const char res[] = "aaabbbcccddd";
|
||||||
|
const size_t len = sizeof(res) - 1;
|
||||||
|
char buff[32];
|
||||||
|
|
||||||
|
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||||
|
assert(memcmp(buff, res, len) == 0);
|
||||||
|
|
||||||
|
fclose(tmp);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_cat_large_range_to_fd(void)
|
||||||
|
{
|
||||||
|
// prepare data
|
||||||
|
const size_t n = 100000;
|
||||||
|
str* const src = calloc(n, sizeof(str));
|
||||||
|
|
||||||
|
assert(src != NULL);
|
||||||
|
|
||||||
|
char buff[100];
|
||||||
|
|
||||||
|
for(unsigned i = 0; i < n; i++)
|
||||||
|
assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u\n", i))) == 0);
|
||||||
|
|
||||||
|
// write to file
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_cat_range(fileno(tmp), src, n) == 0);
|
||||||
|
|
||||||
|
// clear input data
|
||||||
|
for(unsigned i = 0; i < n; ++i)
|
||||||
|
str_free(src[i]);
|
||||||
|
|
||||||
|
free(src);
|
||||||
|
|
||||||
|
// validate
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
char* line = NULL;
|
||||||
|
size_t cap = 0;
|
||||||
|
ssize_t len;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
while((len = getline(&line, &cap, tmp)) >= 0)
|
||||||
|
assert(atoi(line) == i++);
|
||||||
|
|
||||||
|
assert(i == (int)n);
|
||||||
|
|
||||||
|
// all done
|
||||||
|
fclose(tmp);
|
||||||
|
free(line);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_cat_range_to_stream(void)
|
||||||
|
{
|
||||||
|
const str src[] = {
|
||||||
|
str_lit("aaa"),
|
||||||
|
str_lit("bbb"),
|
||||||
|
str_null,
|
||||||
|
str_lit("ccc"),
|
||||||
|
str_lit("ddd"),
|
||||||
|
str_null,
|
||||||
|
str_null
|
||||||
|
};
|
||||||
|
|
||||||
|
const size_t num_items = sizeof(src)/sizeof(src[0]);
|
||||||
|
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_cat_range(tmp, src, num_items) == 0);
|
||||||
|
|
||||||
|
assert(fflush(tmp) == 0);
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
const char res[] = "aaabbbcccddd";
|
||||||
|
const size_t len = sizeof(res) - 1;
|
||||||
|
char buff[32];
|
||||||
|
|
||||||
|
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||||
|
assert(memcmp(buff, res, len) == 0);
|
||||||
|
|
||||||
|
fclose(tmp);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_join_to_fd(void)
|
||||||
|
{
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_join(fileno(tmp), str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
|
||||||
|
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
const char res[] = "aaa_bbb_ccc";
|
||||||
|
const size_t len = sizeof(res) - 1;
|
||||||
|
char buff[32];
|
||||||
|
|
||||||
|
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||||
|
assert(memcmp(buff, res, len) == 0);
|
||||||
|
|
||||||
|
fclose(tmp);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_join_large_range_to_fd(void)
|
||||||
|
{
|
||||||
|
// prepare data
|
||||||
|
const size_t n = 100000;
|
||||||
|
str* const src = calloc(n, sizeof(str));
|
||||||
|
|
||||||
|
assert(src != NULL);
|
||||||
|
|
||||||
|
char buff[100];
|
||||||
|
|
||||||
|
for(unsigned i = 0; i < n; i++)
|
||||||
|
assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u", i))) == 0);
|
||||||
|
|
||||||
|
// write to file
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_join_range(fileno(tmp), str_lit("\n"), src, n) == 0);
|
||||||
|
|
||||||
|
// clear input data
|
||||||
|
for(unsigned i = 0; i < n; ++i)
|
||||||
|
str_free(src[i]);
|
||||||
|
|
||||||
|
free(src);
|
||||||
|
|
||||||
|
// validate
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
char* line = NULL;
|
||||||
|
size_t cap = 0;
|
||||||
|
ssize_t len;
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
while((len = getline(&line, &cap, tmp)) >= 0)
|
||||||
|
assert(atoi(line) == i++);
|
||||||
|
|
||||||
|
assert(i == (int)n);
|
||||||
|
|
||||||
|
// all done
|
||||||
|
fclose(tmp);
|
||||||
|
free(line);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_join_to_stream(void)
|
||||||
|
{
|
||||||
|
FILE* const tmp = tmpfile();
|
||||||
|
|
||||||
|
assert(tmp != NULL);
|
||||||
|
assert(str_join(tmp, str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
|
||||||
|
|
||||||
|
assert(fflush(tmp) == 0);
|
||||||
|
rewind(tmp);
|
||||||
|
|
||||||
|
const char res[] = "aaa_bbb_ccc";
|
||||||
|
const size_t len = sizeof(res) - 1;
|
||||||
|
char buff[32];
|
||||||
|
|
||||||
|
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||||
|
assert(memcmp(buff, res, len) == 0);
|
||||||
|
|
||||||
|
fclose(tmp);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
bool part_pred(const str s) { return str_len(s) < 2; }
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_partition_range(void)
|
||||||
|
{
|
||||||
|
str src[] = { str_lit("aaa"), str_lit("a"), str_lit("aaaa"), str_lit("z") };
|
||||||
|
|
||||||
|
assert(str_partition_range(part_pred, src, 1) == 0);
|
||||||
|
|
||||||
|
assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 2);
|
||||||
|
assert(str_eq(src[0], str_lit("a")));
|
||||||
|
assert(str_eq(src[1], str_lit("z")));
|
||||||
|
assert(str_partition_range(part_pred, src, 1) == 1);
|
||||||
|
|
||||||
|
src[0] = str_lit("?");
|
||||||
|
src[2] = str_lit("*");
|
||||||
|
|
||||||
|
assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 3);
|
||||||
|
assert(str_eq(src[0], str_lit("?")));
|
||||||
|
assert(str_eq(src[1], str_lit("z")));
|
||||||
|
assert(str_eq(src[2], str_lit("*")));
|
||||||
|
assert(str_eq(src[3], str_lit("aaa")));
|
||||||
|
|
||||||
|
assert(str_partition_range(part_pred, NULL, 42) == 0);
|
||||||
|
assert(str_partition_range(part_pred, src, 0) == 0);
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_unique_range(void)
|
||||||
|
{
|
||||||
|
str src[] = {
|
||||||
|
str_lit("zzz"),
|
||||||
|
str_lit("aaa"),
|
||||||
|
str_lit("zzz"),
|
||||||
|
str_lit("bbb"),
|
||||||
|
str_lit("aaa"),
|
||||||
|
str_lit("ccc"),
|
||||||
|
str_lit("ccc"),
|
||||||
|
str_lit("aaa"),
|
||||||
|
str_lit("ccc"),
|
||||||
|
str_lit("zzz")
|
||||||
|
};
|
||||||
|
|
||||||
|
assert(str_unique_range(src, sizeof(src)/sizeof(src[0])) == 4);
|
||||||
|
assert(str_eq(src[0], str_lit("aaa")));
|
||||||
|
assert(str_eq(src[1], str_lit("bbb")));
|
||||||
|
assert(str_eq(src[2], str_lit("ccc")));
|
||||||
|
assert(str_eq(src[3], str_lit("zzz")));
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_from_file(void)
|
||||||
|
{
|
||||||
|
str_auto fname = str_null;
|
||||||
|
|
||||||
|
assert(str_cat(&fname, str_lit("tmp_"), str_ref_chars(__func__, sizeof(__func__) - 1)) == 0);
|
||||||
|
|
||||||
|
FILE* const stream = fopen(str_ptr(fname), "w");
|
||||||
|
|
||||||
|
assert(stream);
|
||||||
|
assert(str_join(stream, str_lit(" "), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
|
||||||
|
assert(fclose(stream) == 0);
|
||||||
|
|
||||||
|
str_auto res = str_null;
|
||||||
|
|
||||||
|
assert(str_from_file(&res, str_ptr(fname)) == 0);
|
||||||
|
unlink(str_ptr(fname));
|
||||||
|
assert(str_eq(res, str_lit("aaa bbb ccc")));
|
||||||
|
assert(str_is_owner(res));
|
||||||
|
|
||||||
|
// test errors
|
||||||
|
assert(str_from_file(&res, ".") == EISDIR);
|
||||||
|
assert(str_from_file(&res, "/dev/null") == EOPNOTSUPP);
|
||||||
|
assert(str_from_file(&res, "does-not-exist") == ENOENT);
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __STDC_UTF_32__
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_codepoint_iterator(void)
|
||||||
|
{
|
||||||
|
const str src = str_lit(u8"жёлтый"); // means "yellow" in Russian
|
||||||
|
static const char32_t src32[] = { U'ж', U'ё', U'л', U'т', U'ы', U'й' };
|
||||||
|
size_t i = 0;
|
||||||
|
char32_t c;
|
||||||
|
|
||||||
|
for_each_codepoint(c, src)
|
||||||
|
{
|
||||||
|
assert(i < sizeof(src32)/sizeof(src32[0]));
|
||||||
|
assert(c == src32[i++]);
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(c == CPI_END_OF_STRING);
|
||||||
|
assert(i == sizeof(src32)/sizeof(src32[0]));
|
||||||
|
|
||||||
|
// empty string iteration
|
||||||
|
c = 0;
|
||||||
|
|
||||||
|
for_each_codepoint(c, str_null)
|
||||||
|
assert(0);
|
||||||
|
|
||||||
|
assert(c == CPI_END_OF_STRING);
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // ifdef __STDC_UTF_32__
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_tok(void)
|
||||||
|
{
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
const str src, delim;
|
||||||
|
const unsigned n_tok;
|
||||||
|
const str tok[3];
|
||||||
|
} test_data;
|
||||||
|
|
||||||
|
static const test_data t[] =
|
||||||
|
{
|
||||||
|
{
|
||||||
|
str_lit("a,b,c"),
|
||||||
|
str_lit(","),
|
||||||
|
3,
|
||||||
|
{ str_lit("a"), str_lit("b"), str_lit("c") }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit(",,a,b,,c,"),
|
||||||
|
str_lit(","),
|
||||||
|
3,
|
||||||
|
{ str_lit("a"), str_lit("b"), str_lit("c") }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit("aaa;=~bbb~,=ccc="),
|
||||||
|
str_lit(",;=~"),
|
||||||
|
3,
|
||||||
|
{ str_lit("aaa"), str_lit("bbb"), str_lit("ccc") }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit(""),
|
||||||
|
str_lit(","),
|
||||||
|
0,
|
||||||
|
{ }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit(""),
|
||||||
|
str_lit(""),
|
||||||
|
0,
|
||||||
|
{ }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit(",.;,.;;.,;.,"),
|
||||||
|
str_lit(",.;"),
|
||||||
|
0,
|
||||||
|
{ }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit("aaa,bbb,ccc"),
|
||||||
|
str_lit(""),
|
||||||
|
1,
|
||||||
|
{ str_lit("aaa,bbb,ccc") }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
str_lit("aaa,bbb,ccc"),
|
||||||
|
str_lit(";-="),
|
||||||
|
1,
|
||||||
|
{ str_lit("aaa,bbb,ccc") }
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i)
|
||||||
|
{
|
||||||
|
unsigned tok_count = 0;
|
||||||
|
|
||||||
|
str tok = str_null;
|
||||||
|
str_tok_state state;
|
||||||
|
|
||||||
|
str_tok_init(&state, t[i].src, t[i].delim);
|
||||||
|
|
||||||
|
while(str_tok(&tok, &state))
|
||||||
|
{
|
||||||
|
// printf("%u-%u: \"%.*s\" %zu\n",
|
||||||
|
// i, tok_count, (int)str_len(tok), str_ptr(tok), str_len(tok));
|
||||||
|
// fflush(stdout);
|
||||||
|
|
||||||
|
assert(tok_count < t[i].n_tok);
|
||||||
|
assert(str_eq(tok, t[i].tok[tok_count]));
|
||||||
|
|
||||||
|
++tok_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(tok_count == t[i].n_tok);
|
||||||
|
}
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void test_partition(void)
|
||||||
|
{
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
const bool res;
|
||||||
|
const str src, patt, pref, suff;
|
||||||
|
} test_data;
|
||||||
|
|
||||||
|
static const test_data t[] =
|
||||||
|
{
|
||||||
|
{ true, str_lit("...abc..."), str_lit("abc"), str_lit("..."), str_lit("...") },
|
||||||
|
{ true, str_lit("......abc"), str_lit("abc"), str_lit("......"), str_null },
|
||||||
|
{ true, str_lit("abc......"), str_lit("abc"), str_null, str_lit("......") },
|
||||||
|
|
||||||
|
{ true, str_lit("...a..."), str_lit("a"), str_lit("..."), str_lit("...") },
|
||||||
|
{ true, str_lit("......a"), str_lit("a"), str_lit("......"), str_null },
|
||||||
|
{ true, str_lit("a......"), str_lit("a"), str_null, str_lit("......") },
|
||||||
|
|
||||||
|
{ false, str_lit("zzz"), str_null, str_lit("zzz"), str_null },
|
||||||
|
{ false, str_null, str_lit("zzz"), str_null, str_null },
|
||||||
|
{ false, str_null, str_null, str_null, str_null },
|
||||||
|
|
||||||
|
{ false, str_lit("...zzz..."), str_lit("xxx"), str_lit("...zzz..."), str_null },
|
||||||
|
{ false, str_lit("...xxz..."), str_lit("xxx"), str_lit("...xxz..."), str_null },
|
||||||
|
{ true, str_lit("...xxz...xxx."), str_lit("xxx"), str_lit("...xxz..."), str_lit(".") },
|
||||||
|
{ true, str_lit(u8"...цифры___"), str_lit(u8"цифры"), str_lit("..."), str_lit("___") }
|
||||||
|
};
|
||||||
|
|
||||||
|
for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i)
|
||||||
|
{
|
||||||
|
str pref = str_lit("???"), suff = str_lit("???");
|
||||||
|
|
||||||
|
assert(str_partition(t[i].src, t[i].patt, &pref, &suff) == t[i].res);
|
||||||
|
assert(str_eq(pref, t[i].pref));
|
||||||
|
assert(str_eq(suff, t[i].suff));
|
||||||
|
}
|
||||||
|
|
||||||
|
passed;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(void)
|
||||||
|
{
|
||||||
|
// tests
|
||||||
|
test_str_lit();
|
||||||
|
test_str_cpy();
|
||||||
|
test_str_clear();
|
||||||
|
test_str_move();
|
||||||
|
test_str_pass();
|
||||||
|
test_str_ref();
|
||||||
|
test_str_cmp();
|
||||||
|
test_str_cmp_ci();
|
||||||
|
test_str_acquire();
|
||||||
|
test_str_cat();
|
||||||
|
test_str_join();
|
||||||
|
test_composition();
|
||||||
|
test_sort();
|
||||||
|
test_sort_ci();
|
||||||
|
test_search();
|
||||||
|
test_prefix();
|
||||||
|
test_suffix();
|
||||||
|
test_cpy_to_fd();
|
||||||
|
test_cpy_to_stream();
|
||||||
|
test_cat_range_to_fd();
|
||||||
|
test_cat_large_range_to_fd();
|
||||||
|
test_cat_range_to_stream();
|
||||||
|
test_join_to_fd();
|
||||||
|
test_join_large_range_to_fd();
|
||||||
|
test_join_to_stream();
|
||||||
|
test_partition_range();
|
||||||
|
test_unique_range();
|
||||||
|
test_from_file();
|
||||||
|
test_tok();
|
||||||
|
test_partition();
|
||||||
|
|
||||||
|
#ifdef __STDC_UTF_32__
|
||||||
|
assert(setlocale(LC_ALL, "C.UTF-8"));
|
||||||
|
|
||||||
|
test_codepoint_iterator();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return puts("OK.") < 0;
|
||||||
|
}
|
||||||
30
3rd/str/tools/file-to-str
Executable file
30
3rd/str/tools/file-to-str
Executable file
@@ -0,0 +1,30 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
die() {
|
||||||
|
echo >&2 "$@"
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
[ $# -eq 2 ] || die "Usage: $(basename "$0") FILE VAR-NAME"
|
||||||
|
[ -f "$1" ] || die "$0: file \"$1\" does not exist, or is not a file."
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cat << EOF
|
||||||
|
// AUTOMATICALLY GENERATED FILE - DO NOT EDIT
|
||||||
|
|
||||||
|
// source file: $1
|
||||||
|
|
||||||
|
#include "str.h"
|
||||||
|
|
||||||
|
static
|
||||||
|
const char _bytes[] = {
|
||||||
|
EOF
|
||||||
|
|
||||||
|
od -v -w12 -A n -t x1 "$1" | sed -E 's/\<([[:xdigit:]]{2})\>/0x\1,/g'
|
||||||
|
|
||||||
|
cat << EOF
|
||||||
|
0x00 };
|
||||||
|
|
||||||
|
const str $2 = (const str){ _bytes, _ref_info(sizeof(_bytes) - 1) };
|
||||||
|
EOF
|
||||||
209
3rd/str/tools/gen_char_class.c
Normal file
209
3rd/str/tools/gen_char_class.c
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
/*
|
||||||
|
BSD 3-Clause License
|
||||||
|
|
||||||
|
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice, this
|
||||||
|
list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer in the documentation
|
||||||
|
and/or other materials provided with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <locale.h>
|
||||||
|
#include <wctype.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
// platform checks
|
||||||
|
#ifndef __STDC_ISO_10646__
|
||||||
|
#error "this platform does not support UNICODE (__STDC_ISO_10646__ is not defined)"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __SIZEOF_WCHAR_T__ < 4 || __SIZEOF_WINT_T__ < 4
|
||||||
|
#error "this platform does not have a usable wchar_t (both sizeof(wchar_t) and sizeof(wint_t) should be at least 4)"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// i/o helpers
|
||||||
|
static __attribute((noinline, noreturn))
|
||||||
|
void die(const char* const msg)
|
||||||
|
{
|
||||||
|
perror(msg);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define do_printf(fmt, ...) \
|
||||||
|
do { \
|
||||||
|
if(printf(fmt, ##__VA_ARGS__) < 0) \
|
||||||
|
die("error writing output"); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define do_write(str) \
|
||||||
|
do { \
|
||||||
|
if(fwrite((str), 1, sizeof(str) - 1, stdout) != sizeof(str) - 1) \
|
||||||
|
die("error writing output"); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
// char type selector (isw*() functions)
|
||||||
|
typedef int (*selector)(wint_t wc);
|
||||||
|
|
||||||
|
// option parser
|
||||||
|
static __attribute__((noreturn))
|
||||||
|
void usage_exit(void)
|
||||||
|
{
|
||||||
|
static const char usage[] =
|
||||||
|
"Usage: gen-char-class SELECTOR\n"
|
||||||
|
" Generate a character classification C function that does the same as its\n"
|
||||||
|
" isw*() counterpart under the current locale as specified by LC_ALL\n"
|
||||||
|
" environment variable. SELECTOR specifies the classification function\n"
|
||||||
|
" to generate, it must be any one of:\n"
|
||||||
|
" --alnum -> use iswalnum()\n"
|
||||||
|
" --alpha -> use iswalpha()\n"
|
||||||
|
" --blank -> use iswblank()\n"
|
||||||
|
" --cntrl -> use iswcntrl()\n"
|
||||||
|
" --digit -> use iswdigit()\n"
|
||||||
|
" --graph -> use iswgraph()\n"
|
||||||
|
" --lower -> use iswlower()\n"
|
||||||
|
" --print -> use iswprint()\n"
|
||||||
|
" --punct -> use iswpunct()\n"
|
||||||
|
" --space -> use iswspace()\n"
|
||||||
|
" --upper -> use iswupper()\n"
|
||||||
|
" --xdigit -> use iswxdigit()\n";
|
||||||
|
|
||||||
|
fputs(usage, stderr);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
selector fn;
|
||||||
|
|
||||||
|
static
|
||||||
|
const char* fn_name;
|
||||||
|
|
||||||
|
static
|
||||||
|
const char* loc;
|
||||||
|
|
||||||
|
#define ARG(name) \
|
||||||
|
if(strcmp(argv[1], "--" #name) == 0) { \
|
||||||
|
fn = isw ## name; fn_name = #name; \
|
||||||
|
return; \
|
||||||
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void read_opts(int argc, char* const argv[])
|
||||||
|
{
|
||||||
|
if(argc != 2)
|
||||||
|
usage_exit();
|
||||||
|
|
||||||
|
ARG(alnum)
|
||||||
|
ARG(alpha)
|
||||||
|
ARG(blank)
|
||||||
|
ARG(cntrl)
|
||||||
|
ARG(digit)
|
||||||
|
ARG(graph)
|
||||||
|
ARG(lower)
|
||||||
|
ARG(print)
|
||||||
|
ARG(punct)
|
||||||
|
ARG(space)
|
||||||
|
ARG(upper)
|
||||||
|
ARG(xdigit)
|
||||||
|
|
||||||
|
if(strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)
|
||||||
|
usage_exit();
|
||||||
|
|
||||||
|
fprintf(stderr, "unknown option: \"%s\"\n", argv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef ARG
|
||||||
|
|
||||||
|
// range printing
|
||||||
|
static
|
||||||
|
void print_range(const wint_t first, const wint_t last)
|
||||||
|
{
|
||||||
|
if(first == last)
|
||||||
|
do_printf("\t\tcase 0x%.2X:\n", first);
|
||||||
|
else
|
||||||
|
do_printf("\t\tcase 0x%.2X ... 0x%.2X:\n", first, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
// header/footer
|
||||||
|
static
|
||||||
|
const char header[] =
|
||||||
|
"/* LC_ALL = \"%s\" */\n"
|
||||||
|
"bool is_%s(const char32_t c)\n"
|
||||||
|
"{\n"
|
||||||
|
" switch(c)\n"
|
||||||
|
" {\n";
|
||||||
|
|
||||||
|
static
|
||||||
|
const char footer[] =
|
||||||
|
" return true;\n"
|
||||||
|
" default:\n"
|
||||||
|
" return false;\n"
|
||||||
|
" }\n"
|
||||||
|
"}\n";
|
||||||
|
|
||||||
|
// main
|
||||||
|
#define UTF32_MAX_CHAR 0x10ffff
|
||||||
|
|
||||||
|
int main(int argc, char* const argv[])
|
||||||
|
{
|
||||||
|
read_opts(argc, argv);
|
||||||
|
|
||||||
|
loc = getenv("LC_ALL");
|
||||||
|
|
||||||
|
if(loc && !setlocale(LC_ALL, loc))
|
||||||
|
die("cannot change current locale");
|
||||||
|
|
||||||
|
errno = 0;
|
||||||
|
do_printf(header, loc ? loc : "", fn_name);
|
||||||
|
|
||||||
|
wint_t first = 0;
|
||||||
|
bool in_range = false;
|
||||||
|
|
||||||
|
for(wint_t c = 0; c <= UTF32_MAX_CHAR; ++c)
|
||||||
|
{
|
||||||
|
const bool match = (fn(c) != 0);
|
||||||
|
|
||||||
|
if(in_range && !match)
|
||||||
|
print_range(first, c - 1);
|
||||||
|
else if(!in_range && match)
|
||||||
|
first = c;
|
||||||
|
|
||||||
|
in_range = match;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(in_range)
|
||||||
|
print_range(first, UTF32_MAX_CHAR);
|
||||||
|
|
||||||
|
do_write(footer);
|
||||||
|
|
||||||
|
if(fflush(stdout))
|
||||||
|
die("error writing output");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user