write a dynamic file reader and remove str library
This commit is contained in:
@@ -1,11 +0,0 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
end_of_line = lf
|
||||
|
||||
[Makefile]
|
||||
indent_size = 8
|
||||
4
3rd/str/.gitignore
vendored
4
3rd/str/.gitignore
vendored
@@ -1,4 +0,0 @@
|
||||
test
|
||||
flto-test
|
||||
*.bak
|
||||
tools/gen-char-class
|
||||
@@ -1,30 +0,0 @@
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
# flags
|
||||
CC_WARN := -Wall -Wextra -Werror=implicit-function-declaration -Wformat -Werror=format-security
|
||||
|
||||
ifeq ($(CC),musl-gcc)
|
||||
# musl is ISO 10646 compliant but doesn't define __STDC_ISO_10646__
|
||||
CC_EXTRA := -D__STDC_ISO_10646__=201706L
|
||||
else
|
||||
# sanitisers only work for non-musl builds
|
||||
CC_SAN := -fsanitize=address -fsanitize=leak -fsanitize=undefined -fsanitize-address-use-after-scope
|
||||
endif
|
||||
|
||||
test: CFLAGS := -ggdb -std=c11 -pipe $(CC_WARN) $(CC_EXTRA) -fno-omit-frame-pointer $(CC_SAN)
|
||||
flto-test: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA) -flto -march=native -mtune=native
|
||||
tools: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA)
|
||||
|
||||
# str library source files
|
||||
SRC := str.c str.h str_test.c
|
||||
|
||||
# all
|
||||
.PHONY: all
|
||||
all: tools test flto-test
|
||||
|
||||
.PHONY: clean
|
||||
clean: clean-test clean-tools
|
||||
|
||||
# test
|
||||
test: $(SRC)
|
||||
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
|
||||
./$@
|
||||
|
||||
flto-test: $(SRC)
|
||||
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
|
||||
./$@
|
||||
|
||||
.PHONY: clean-test
|
||||
clean-test:
|
||||
rm -f test flto-test
|
||||
|
||||
# tools
|
||||
GEN_CHAR_CLASS := tools/gen-char-class
|
||||
|
||||
.PHONY: tools
|
||||
tools: $(GEN_CHAR_CLASS)
|
||||
|
||||
# gen-char-class
|
||||
$(GEN_CHAR_CLASS): tools/gen_char_class.c
|
||||
$(CC) $(CFLAGS) -o $@ $(filter %.c,$^)
|
||||
|
||||
.PHONY: clean-tools
|
||||
clean-tools:
|
||||
rm -f $(GEN_CHAR_CLASS)
|
||||
@@ -1,440 +0,0 @@
|
||||
# str: yet another string library for C language.
|
||||
|
||||
[](https://opensource.org/licenses/BSD-3-Clause)
|
||||
|
||||
## Motivation
|
||||
|
||||
Bored with developing the same functionality over and over again, unsatisfied
|
||||
with existing libraries, so decided to make the right one, once and forever. 🙂
|
||||
|
||||
## Features
|
||||
|
||||
* Handles both C and binary strings;
|
||||
* Light-weight references to strings: cheap to create, copy, or pass by value;
|
||||
* Support for copy and move semantics, although not enforceable by the C language;
|
||||
* String composition functions writing to memory, file descriptors, or file streams;
|
||||
* Can be compiled using `gcc` or `clang`, and linked with `libc` or `musl`.
|
||||
|
||||
## Installation
|
||||
Just clone the project and copy (or symlink) the files `str.h` and `str.c` into your project,
|
||||
but please respect the [license](LICENSE).
|
||||
|
||||
## Code Examples
|
||||
|
||||
String composition:
|
||||
|
||||
```C
|
||||
str s = str_null;
|
||||
|
||||
str_join(&s, str_lit(", "),
|
||||
str_lit("Here"),
|
||||
str_lit("there"),
|
||||
str_lit("and everywhere"));
|
||||
|
||||
str_cat(&s, s, str_lit("..."));
|
||||
|
||||
assert(str_eq(s, str_lit("Here, there, and everywhere...")));
|
||||
str_free(s);
|
||||
```
|
||||
|
||||
Same as above, but writing to a file:
|
||||
|
||||
```C
|
||||
FILE* const stream = fopen(...);
|
||||
|
||||
int err = str_join(stream, str_lit(", "),
|
||||
str_lit("Here"),
|
||||
str_lit("there"),
|
||||
str_lit("and everywhere..."));
|
||||
|
||||
if(err != 0) { /* handle the error */ }
|
||||
```
|
||||
|
||||
[Discussion](https://news.ycombinator.com/item?id=25212864) on Hacker News.
|
||||
|
||||
## User Guide
|
||||
|
||||
_**Disclaimer:** This is the good old C language, not C++ or Rust, so nothing can be enforced
|
||||
on the language level, and certain discipline is required to make sure there is no corrupt
|
||||
or leaked memory resulting from using this library._
|
||||
|
||||
A string is represented by the type `str` that maintains a pointer to some memory containing the
|
||||
actual string, and the length of the string. Objects of type `str` are small enough (a struct
|
||||
of a `const char*` and a `size_t`) to be cheap to create, copy (pass by value), and move. The
|
||||
`str` structure should be treated as opaque (i.e., do not attempt to directly access or modify
|
||||
the fields in this structure). The strings are assumed to be immutable, like those in Java or
|
||||
Go, but only by means of `const char*` pointers, so it is actually possible to modify such a
|
||||
string, although the required type cast to `char*` offers at least some (mostly psychological)
|
||||
protection from changing the string by mistake.
|
||||
|
||||
This library focusses only on handling strings, not gradually composing them like
|
||||
[StringBuffer](https://docs.oracle.com/javase/7/docs/api/java/lang/StringBuffer.html)
|
||||
class in Java.
|
||||
|
||||
All string objects must be initialised before use. Uninitialised objects will cause
|
||||
undefined behaviour. Use the provided constructors, or `str_null` for empty strings.
|
||||
|
||||
There are two kinds of `str` objects: those actually owning the memory they point to, and
|
||||
non-owning references. This property can be queried using `str_is_owner` and `str_is_ref`
|
||||
functions, otherwise such objects are indistinguishable.
|
||||
|
||||
Non-owning string objects are safe to copy and assign to each other, as long as the memory
|
||||
they refer to is valid. They do not need to be freed. `str_free` is a no-op for reference
|
||||
objects. A reference object can be cheaply created from a C string, a string literal,
|
||||
or from a range of bytes.
|
||||
|
||||
Owning objects require special treatment, in particular:
|
||||
* It is a good idea to have only one owning object per each allocated string, but such
|
||||
a string can have many references to its underlying string, as long as those references do not
|
||||
outlive the owning object.
|
||||
Sometimes this rule may be relaxed for code clarity, like in the above example where
|
||||
the owning object is passed directly to a function, but only if the function does not
|
||||
store or release the object. When in doubt pass such an object via `str_ref`.
|
||||
* Direct assignments (like `s2 = s1;`) to owning objects will certainly leak memory, use
|
||||
`str_assign` function instead. In fact, this function can assign to any string object,
|
||||
owning or not, so it can be used everywhere, just to avoid any doubt.
|
||||
* There is no automatic memory management in C, so every owning object must be released at
|
||||
some point using either `str_free` or `str_clear` function. String objects on the stack
|
||||
can also be declared as `str_auto` (or `const str_auto`) for automatic cleanup when the variable
|
||||
goes out of scope.
|
||||
* An owning object can be moved to another location by using `str_move` function. The
|
||||
function resets its source object to an empty string.
|
||||
* Object ownership can be passed over to another object by using `str_pass` function. The
|
||||
function sets its source to a non-owning reference to the original string.
|
||||
|
||||
It is technically possible to create a reference to a string that is not
|
||||
null-terminated. The library accepts strings without null-terminators, but every new string
|
||||
allocated by the library is guaranteed to be null-terminated.
|
||||
|
||||
### String Construction
|
||||
|
||||
A string object can be constructed form any C string, string literal, or a range of bytes.
|
||||
The provided constructors are computationally cheap to apply. Depending on the constructor,
|
||||
the new object can either own the actual string it refers to, or be a non-owning reference.
|
||||
Constructors themselves do not allocate any memory. Importantly, constructors are the only
|
||||
functions in this library that return a string object, while others only assign their results
|
||||
through a pointer to a pre-existing string. This makes constructors suitable for initialisation
|
||||
of new string objects. In all other situations one should combine construction with assignment,
|
||||
for example:<br>
|
||||
`str_assign(&dest, str_acquire_chars(buff, n));`
|
||||
|
||||
### String Object Properties
|
||||
|
||||
Querying a property of a string object (like the length of the string via `str_len`) is a
|
||||
cheap operation.
|
||||
|
||||
### Assigning, Moving, and Passing String Objects
|
||||
|
||||
C language does not allow for operator overloading, so this library provides a function
|
||||
`str_assign` that takes a string object and assigns it to the destination object, freeing
|
||||
any memory owned by the destination. It is generally recommended to use this function
|
||||
everywhere outside object initialisation.
|
||||
|
||||
An existing object can be moved over to another location via `str_move` function.
|
||||
The function resets the source object to `str_null` to guarantee the correct move semantics.
|
||||
The value returned by `str_move` may be either used to initialise a new object, or
|
||||
assigned to an existing object using `str_assign`.
|
||||
|
||||
An existing object can also be passed over to another location via `str_pass` function. The function
|
||||
sets the source object to be a non-owning reference to the original string, otherwise the semantics
|
||||
and usage is the same as `str_move`.
|
||||
|
||||
### String Composition and Generic Destination
|
||||
|
||||
String composition [functions](#string-composition) can write their results to different
|
||||
destinations, depending on the _type_ of their `dest` parameter:
|
||||
|
||||
* `str*`: result is assigned to the string object;
|
||||
* `int`: result is written to the file descriptor;
|
||||
* `FILE*` result is written to the file stream.
|
||||
|
||||
The composition functions return 0 on success, or the value of `errno` as retrieved at the point
|
||||
of failure (including `ENOMEM` on memory allocation error).
|
||||
|
||||
### Detailed Example
|
||||
|
||||
Just to make things more clear, here is the same code as in the example above, but with comments:
|
||||
```C
|
||||
// declare a variable and initialise it with an empty string; could also be declared as "str_auto"
|
||||
// to avoid explicit call to str_free() below.
|
||||
str s = str_null;
|
||||
|
||||
// join the given string literals around the separator (second parameter),
|
||||
// storing the result in object "s" (first parameter); in this example we do not check
|
||||
// the return values of the composition functions, thus ignoring memory allocation failures,
|
||||
// which is probably not the best idea in general.
|
||||
str_join(&s, str_lit(", "),
|
||||
str_lit("Here"),
|
||||
str_lit("there"),
|
||||
str_lit("and everywhere"));
|
||||
|
||||
// create a new string concatenating "s" and a literal; the function only modifies its
|
||||
// destination object "s" after the result is computed, also freeing the destination
|
||||
// before the assignment, so it is safe to use "s" as both a parameter and a destination.
|
||||
// note: we pass a copy of the owning object "s" as the second parameter, and here it is
|
||||
// safe to do so because this particular function does not modify its arguments.
|
||||
str_cat(&s, s, str_lit("..."));
|
||||
|
||||
// check that we have got the expected result
|
||||
assert(str_eq(s, str_lit("Here, there, and everywhere...")));
|
||||
|
||||
// finally, free the memory allocated for the string
|
||||
str_free(s);
|
||||
```
|
||||
|
||||
There are some useful [code snippets](snippets.md) provided to assist with writing code using
|
||||
this library.
|
||||
|
||||
## API brief
|
||||
|
||||
`typedef struct { ... } str;`<br>
|
||||
The string object.
|
||||
|
||||
#### String Properties
|
||||
|
||||
`size_t str_len(const str s)`<br>
|
||||
Returns the number of bytes in the string referenced by the object.
|
||||
|
||||
`const char* str_ptr(const str s)`<br>
|
||||
Returns a pointer to the first byte of the string referenced by the object. The pointer is never NULL.
|
||||
|
||||
`const char* str_end(const str s)`<br>
|
||||
Returns a pointer to the next byte past the end of the string referenced by the object.
|
||||
The pointer is never NULL, but it is not guaranteed to point to any valid byte or location.
|
||||
For C strings it points to the terminating null character. For any given string `s` the following
|
||||
condition is always satisfied: `str_end(s) == str_ptr(s) + str_len(s)`.
|
||||
|
||||
`bool str_is_empty(const str s)`<br>
|
||||
Returns "true" for empty strings.
|
||||
|
||||
`bool str_is_owner(const str s)`<br>
|
||||
Returns "true" if the string object is the owner of the memory it references.
|
||||
|
||||
`bool str_is_ref(const str s)`<br>
|
||||
Returns "true" if the string object does not own the memory it references.
|
||||
|
||||
#### String Construction
|
||||
|
||||
`str_null`<br>
|
||||
Empty string constant.
|
||||
|
||||
`str str_lit(s)`<br>
|
||||
Constructs a non-owning object from a string literal. Implemented as a macro.
|
||||
|
||||
`str str_ref(s)`<br>
|
||||
Constructs a non-owning object from either a null-terminated C string, or another `str` object.
|
||||
Implemented as a macro.
|
||||
|
||||
`str str_ref_chars(const char* const s, const size_t n)`<br>
|
||||
Constructs a non-owning object referencing the given range of bytes.
|
||||
|
||||
`str str_acquire_chars(const char* const s, const size_t n)`<br>
|
||||
Constructs an owning object for the specified range of bytes. The pointer `s` should be safe
|
||||
to pass to `free(3)` function.
|
||||
|
||||
`str str_acquire(const char* const s)`<br>
|
||||
Constructs an owning object from the given C string. The string should be safe to pass to
|
||||
`free(3)` function.
|
||||
|
||||
`str str_move(str* const ps)`<br>
|
||||
Saves the given object to a temporary, resets the source object to `str_null`, and then
|
||||
returns the saved object.
|
||||
|
||||
`str str_pass(str* const ps)`<br>
|
||||
Saves the given object to a temporary, sets the source object to be a non-owning reference to the
|
||||
original string, and then returns the saved object.
|
||||
|
||||
#### String Deallocation
|
||||
|
||||
`void str_free(const str s)`<br>
|
||||
Deallocates any memory held by the owning string object. No-op for references. After a call to
|
||||
this function the string object is in unknown and unusable state.
|
||||
|
||||
String objects on the stack can also be declared as `str_auto` instead of `str` to deallocate
|
||||
any memory held by the string when the variable goes out of scope.
|
||||
|
||||
#### String Modification
|
||||
|
||||
`void str_assign(str* const ps, const str s)`<br>
|
||||
Assigns the object `s` to the object pointed to by `ps`. Any memory owned by the target
|
||||
object is freed before the assignment.
|
||||
|
||||
`void str_clear(str* const ps)`<br>
|
||||
Sets the target object to `str_null` after freeing any memory owned by the target.
|
||||
|
||||
`void str_swap(str* const s1, str* const s2)`<br>
|
||||
Swaps two string objects.
|
||||
|
||||
`int str_from_file(str* const dest, const char* const file_name)`<br>
|
||||
Reads the entire file (of up to 64MB by default, configurable via `STR_MAX_FILE_SIZE`) into
|
||||
the destination string. Returns 0 on success, or the value of `errno` on error.
|
||||
|
||||
#### String Comparison
|
||||
|
||||
`int str_cmp(const str s1, const str s2)`<br>
|
||||
Lexicographically compares the two string objects, with usual semantics.
|
||||
|
||||
`bool str_eq(const str s1, const str s2)`<br>
|
||||
Returns "true" if the two strings match exactly.
|
||||
|
||||
`int str_cmp_ci(const str s1, const str s2)`<br>
|
||||
Case-insensitive comparison of two strings, implemented using `strncasecmp(3)`.
|
||||
|
||||
`bool str_eq_ci(const str s1, const str s2`<br>
|
||||
Returns "true" is the two strings match case-insensitively.
|
||||
|
||||
`bool str_has_prefix(const str s, const str prefix)`<br>
|
||||
Tests if the given string `s` starts with the specified prefix.
|
||||
|
||||
`bool str_has_suffix(const str s, const str suffix)`<br>
|
||||
Tests if the given string `s` ends with the specified suffix.
|
||||
|
||||
#### String Composition
|
||||
|
||||
`int str_cpy(dest, const str src)`<br>
|
||||
Copies the source string referenced by `src` to the
|
||||
[generic](#string-composition-and-generic-destination) destination `dest`. Returns 0 on success,
|
||||
or the value of `errno` on failure.
|
||||
|
||||
`int str_cat_range(dest, const str* src, size_t count)`<br>
|
||||
Concatenates `count` strings from the array starting at address `src`, and writes
|
||||
the result to the [generic](#string-composition-and-generic-destination) destination `dest`.
|
||||
Returns 0 on success, or the value of `errno` on failure.
|
||||
|
||||
`int str_cat(dest, ...)`<br>
|
||||
Concatenates a variable list of `str` arguments, and writes the result to the
|
||||
[generic](#string-composition-and-generic-destination) destination `dest`.
|
||||
Returns 0 on success, or the value of `errno` on failure.
|
||||
|
||||
`int str_join_range(dest, const str sep, const str* src, size_t count)`<br>
|
||||
Joins around `sep` the `count` strings from the array starting at address `src`, and writes
|
||||
the result to the [generic](#string-composition-and-generic-destination) destination `dest`.
|
||||
Returns 0 on success, or the value of `errno` on failure.
|
||||
|
||||
`int str_join(dest, const str sep, ...)`<br>
|
||||
Joins a variable list of `str` arguments around `sep` delimiter, and writes the result to the
|
||||
[generic](#string-composition-and-generic-destination) destination `dest`.
|
||||
Returns 0 on success, or the value of `errno` on failure.
|
||||
|
||||
#### Searching and Sorting
|
||||
|
||||
`bool str_partition(const str src, const str patt, str* const prefix, str* const suffix)`<br>
|
||||
Splits the string `src` on the first match of `patt`, assigning a reference to the part
|
||||
of the string before the match to the `prefix` object, and the part after the match to the
|
||||
`suffix` object. Returns `true` if a match has been found, or `false` otherwise, also
|
||||
setting `prefix` to reference the entire `src` string, and clearing the `suffix` object.
|
||||
Empty pattern `patt` never matches.
|
||||
|
||||
`void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count)`<br>
|
||||
Sorts the given array of `str` objects using the given comparison function. A number
|
||||
of typically used comparison functions is also provided:
|
||||
* `str_order_asc` (ascending sort)
|
||||
* `str_order_desc` (descending sort)
|
||||
* `str_order_asc_ci` (ascending case-insensitive sort)
|
||||
* `str_order_desc_ci` (descending case-insensitive sort)
|
||||
|
||||
`const str* str_search_range(const str key, const str* const array, const size_t count)`<br>
|
||||
Binary search for the given key. The input array must be sorted using `str_order_asc`.
|
||||
Returns a pointer to the string matching the key, or NULL.
|
||||
|
||||
`size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count)`<br>
|
||||
Reorders the string objects in the given range in such a way that all elements for which
|
||||
the predicate `pred` returns "true" precede the elements for which predicate `pred`
|
||||
returns "false". Returns the number of preceding objects.
|
||||
|
||||
`size_t str_unique_range(str* const array, const size_t count)`<br>
|
||||
Reorders the string objects in the given range in such a way that there are two partitions:
|
||||
one where each object is unique within the input range, and another partition with all the
|
||||
remaining objects. The unique partition is stored at the beginning of the array, and is
|
||||
sorted in ascending order, followed by the partition with all remaining objects.
|
||||
Returns the number of unique objects.
|
||||
|
||||
#### UNICODE support
|
||||
|
||||
`for_each_codepoint(var_name, src_string)`<br>
|
||||
A macro that expands to a loop iterating over the given string `src_string` (of type `str`) by UTF-32
|
||||
code points. On each iteration the variable `var_name` (of type `char32_t`) is assigned
|
||||
the value of the next valid UTF-32 code point from the source string. Upon exit from the loop the
|
||||
variable has one on the following values:
|
||||
* `CPI_END_OF_STRING`: the iteration has reached the end of source string;
|
||||
* `CPI_ERR_INCOMPLETE_SEQ`: an incomplete byte sequence has been detected;
|
||||
* `CPI_ERR_INVALID_ENCODING`: an invalid byte sequence has been detected.
|
||||
|
||||
The source string is expected to be encoded in the _current program locale_, as set by the most
|
||||
recent call to `setlocale(3)`.
|
||||
|
||||
Usage pattern:
|
||||
```c
|
||||
#include <uchar.h>
|
||||
...
|
||||
str s = ...
|
||||
...
|
||||
char32_t c; // variable to receive UTF-32 values on each iteration
|
||||
|
||||
for_each_codepoint(c, s)
|
||||
{
|
||||
/* process c */
|
||||
}
|
||||
|
||||
if(c != CPI_END_OF_STRING)
|
||||
{
|
||||
/* handle error */
|
||||
}
|
||||
```
|
||||
|
||||
#### Tokeniser
|
||||
|
||||
Tokeniser interface provides functionality similar to `strtok(3)` function. The tokeniser
|
||||
is fully re-entrant with no hidden state, and its input string is not modified while being
|
||||
parsed.
|
||||
|
||||
##### Typical usage:
|
||||
```C
|
||||
// declare and initialise tokeniser state
|
||||
str_tok_state state;
|
||||
|
||||
str_tok_init(&state, source_string, delimiter_set);
|
||||
|
||||
// object to receive tokens
|
||||
str token = str_null;
|
||||
|
||||
// token iterator
|
||||
while(str_tok(&token, &state))
|
||||
{
|
||||
/* process "token" */
|
||||
}
|
||||
```
|
||||
|
||||
##### Tokeniser API
|
||||
|
||||
`void str_tok_init(str_tok_state* const state, const str src, const str delim_set)`<br>
|
||||
Initialises tokeniser state with the given source string and delimiter set. The delimiter set
|
||||
is treated as bytes, _not_ as UNICODE code points encoded in UTF-8.
|
||||
|
||||
`bool str_tok(str* const dest, str_tok_state* const state)`<br>
|
||||
Retrieves the next token and stores it in the `dest` object. Returns `true` if the token has
|
||||
been read, or `false` if the end of input has been reached. Retrieved token is always
|
||||
a reference to a slice of the source string.
|
||||
|
||||
`void str_tok_delim(str_tok_state* const state, const str delim_set)`<br>
|
||||
Changes the delimiter set associated with the given tokeniser state. The delimiter set is
|
||||
treated as bytes, _not_ as UNICODE code points encoded in UTF-8.
|
||||
|
||||
## Tools
|
||||
|
||||
All the tools are located in `tools/` directory. Currently, there are the following tools:
|
||||
|
||||
* `file-to-str`: The script takes a file (text or binary) and a C variable name, and
|
||||
writes to `stdout` C source code where the variable (of type `str`) is defined
|
||||
and initialised with the content of the file.
|
||||
|
||||
* `gen-char-class`: Generates character classification functions that do the same as their
|
||||
`isw*()` counterparts under the current locale as specified by `LC_ALL` environment variable.
|
||||
Run `tools/gen-char-class --help` for further details, or `tools/gen-char-class --space`
|
||||
to see an example of its output.
|
||||
|
||||
## Project Status
|
||||
The library requires at least a C11 compiler. So far has been tested on Linux Mint versions
|
||||
from 19.3 to 22.0, with `gcc` versions from 9.5.0 to 13.2.0 (with either `libc` or `musl`),
|
||||
and `clang` versions up to 18.1.3; it is also reported to work on ALT Linux 9.1 for Elbrus, with
|
||||
`lcc` version 1.25.09.
|
||||
@@ -1,63 +0,0 @@
|
||||
### Code Examples
|
||||
|
||||
Here I provide various (hopefully, useful) functions and code examples that are not included into the
|
||||
main library. Some examples use non-POSIX and/or compiler-specific features that may or may
|
||||
not be suitable for a particular project. Also, these snippets were tested while being developed,
|
||||
but they may break in the future as the library evolves.
|
||||
|
||||
##### `void str_sprintf(str* const dest, const char* fmt, ...)`
|
||||
|
||||
Probably the simplest implementation utilising non-POSIX `asprintf(3)` function:
|
||||
```C
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include "str.h"
|
||||
|
||||
#define str_sprintf(dest, fmt, ...) \
|
||||
({ \
|
||||
char* ___p; \
|
||||
const int ___n = asprintf(&___p, (fmt), ##__VA_ARGS__); \
|
||||
str_assign((dest), str_acquire_chars(___p, ___n)); \
|
||||
})
|
||||
```
|
||||
This code does not check for errors. A more standard-conforming implementation would probably go
|
||||
through `open_memstream(3)` function.
|
||||
|
||||
##### `int str_from_int(str* const dest, const int val)`
|
||||
```C
|
||||
int str_from_int(str* const dest, const int val)
|
||||
{
|
||||
char buff[256]; // of some "big enough" size
|
||||
|
||||
return str_cpy(dest, str_ref_chars(buff, snprintf(buff, sizeof(buff), "%d", val)));
|
||||
}
|
||||
```
|
||||
|
||||
This code can also be used as a template for other functions converting from `double`, `struct tm`, etc.
|
||||
|
||||
##### `int str_append(str* const dest, ...)`
|
||||
```C
|
||||
#define str_append(dest, ...) \
|
||||
({ str* const ___p = (dest); str_cat(___p, *___p, ##__VA_ARGS__); })
|
||||
```
|
||||
Test case and usage example:
|
||||
```C
|
||||
str s = str_lit("zzz");
|
||||
|
||||
assert(str_append(&s, str_lit(" "), str_lit("aaa")) == 0);
|
||||
assert(str_eq(s, str_lit("zzz aaa")));
|
||||
|
||||
str_free(s);
|
||||
```
|
||||
|
||||
##### Using `str` objects with `printf` family of functions
|
||||
|
||||
Since a string object is not guaranteed to refer to a null-terminated string it should be formatted
|
||||
with explicitly specified length, for example:
|
||||
```C
|
||||
str s = ...
|
||||
|
||||
printf("%.*s\n", (int)str_len(s), str_ptr(s));
|
||||
```
|
||||
_Note:_ The maximum length of the string is limited to `INT_MAX` bytes, and formatting will stop
|
||||
at the first null byte within the string.
|
||||
839
3rd/str/str.c
839
3rd/str/str.c
@@ -1,839 +0,0 @@
|
||||
/*
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _DEFAULT_SOURCE // for strncasecmp()
|
||||
#define _XOPEN_SOURCE 500 // for IOV_MAX
|
||||
|
||||
#include "str.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
// append to destination and return the end pointer
|
||||
static inline
|
||||
void* mem_append(void* dest, const void* src, const size_t n)
|
||||
{
|
||||
return memcpy(dest, src, n) + n;
|
||||
}
|
||||
|
||||
// string deallocation
|
||||
void str_free(const str s)
|
||||
{
|
||||
if(str_is_owner(s))
|
||||
free((void*)s.ptr);
|
||||
}
|
||||
|
||||
// version of str_free() for str_auto macro
|
||||
void str_free_auto(const str* const ps)
|
||||
{
|
||||
if(ps)
|
||||
str_free(*ps);
|
||||
}
|
||||
|
||||
// memory allocation helpers
|
||||
#define ALLOC(n) \
|
||||
({ \
|
||||
void* const ___p = malloc(n); \
|
||||
if(!___p) return ENOMEM; \
|
||||
___p; \
|
||||
})
|
||||
|
||||
#define REALLOC(p, n) \
|
||||
({ \
|
||||
void* const ___p = realloc((p), (n)); \
|
||||
if(!___p) return ENOMEM; \
|
||||
___p; \
|
||||
})
|
||||
|
||||
|
||||
// errno checker
|
||||
#define RETURN_ON_ERROR(expr) \
|
||||
while((expr) < 0) do { const int __err = errno; if(__err != EINTR) return __err; } while(0)
|
||||
|
||||
// swap
|
||||
void str_swap(str* const s1, str* const s2)
|
||||
{
|
||||
const str tmp = *s1;
|
||||
|
||||
*s1 = *s2;
|
||||
*s2 = tmp;
|
||||
}
|
||||
|
||||
// empty string
|
||||
const char* const str_empty_string = "";
|
||||
|
||||
// string comparison ---------------------------------------------------------------------
|
||||
// compare two strings lexicographically
|
||||
int str_cmp(const str s1, const str s2)
|
||||
{
|
||||
const size_t n1 = str_len(s1), n2 = str_len(s2);
|
||||
|
||||
// either string may be missing a null terminator, hence "memcmp"
|
||||
const int res = memcmp(str_ptr(s1), str_ptr(s2), (n1 < n2) ? n1 : n2);
|
||||
|
||||
if(res != 0 || n1 == n2)
|
||||
return res;
|
||||
|
||||
return (n1 < n2) ? -1 : 1;
|
||||
}
|
||||
|
||||
// case-insensitive comparison
|
||||
int str_cmp_ci(const str s1, const str s2)
|
||||
{
|
||||
const size_t n1 = str_len(s1), n2 = str_len(s2);
|
||||
|
||||
// either string may be missing a null terminator, hence "strNcasecmp"
|
||||
const int res = strncasecmp(str_ptr(s1), str_ptr(s2), (n1 < n2) ? n1 : n2);
|
||||
|
||||
if(res != 0 || n1 == n2)
|
||||
return res;
|
||||
|
||||
return (n1 < n2) ? -1 : 1;
|
||||
}
|
||||
|
||||
// test for prefix
|
||||
bool str_has_prefix(const str s, const str prefix)
|
||||
{
|
||||
const size_t n = str_len(prefix);
|
||||
|
||||
return (n == 0)
|
||||
|| (str_len(s) >= n && memcmp(str_ptr(s), str_ptr(prefix), n) == 0);
|
||||
}
|
||||
|
||||
// test for suffix
|
||||
bool str_has_suffix(const str s, const str suffix)
|
||||
{
|
||||
const size_t n = str_len(suffix);
|
||||
|
||||
return (n == 0)
|
||||
|| (str_len(s) >= n && memcmp(str_end(s) - n, str_ptr(suffix), n) == 0);
|
||||
}
|
||||
|
||||
// string constructors -----------------------------------------------------------------
|
||||
// create a reference to the given range of chars
|
||||
str str_ref_chars(const char* const s, const size_t n)
|
||||
{
|
||||
return (s && n > 0) ? ((str){ s, str_ref_info(n) }) : str_null;
|
||||
}
|
||||
|
||||
str str_ref_from_ptr(const char* const s)
|
||||
{
|
||||
return s ? str_ref_chars(s, strlen(s)) : str_null;
|
||||
}
|
||||
|
||||
// take ownership of the given range of chars
|
||||
str str_acquire_chars(const char* const s, const size_t n)
|
||||
{
|
||||
if(!s)
|
||||
return str_null;
|
||||
|
||||
if(n == 0)
|
||||
{
|
||||
free((void*)s);
|
||||
return str_null;
|
||||
}
|
||||
|
||||
return (str){ s, str_owner_info(n) };
|
||||
}
|
||||
|
||||
// take ownership of the given C string
|
||||
str str_acquire(const char* const s)
|
||||
{
|
||||
return s ? str_acquire_chars(s, strlen(s)) : str_null;
|
||||
}
|
||||
|
||||
// allocate a copy of the given string
|
||||
int str_dup_impl(str* const dest, const str s)
|
||||
{
|
||||
const size_t n = str_len(s);
|
||||
|
||||
if(n == 0)
|
||||
str_clear(dest);
|
||||
else
|
||||
{
|
||||
char* const p = memcpy(ALLOC(n + 1), str_ptr(s), n);
|
||||
|
||||
p[n] = 0;
|
||||
str_assign(dest, str_acquire_chars(p, n));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef STR_MAX_FILE_SIZE
|
||||
#define STR_MAX_FILE_SIZE (64 * 1024 * 1024 - 1)
|
||||
#endif
|
||||
|
||||
static
|
||||
int get_file_size(const int fd, off_t* const size)
|
||||
{
|
||||
// stat the file
|
||||
struct stat info;
|
||||
|
||||
RETURN_ON_ERROR(fstat(fd, &info));
|
||||
|
||||
*size = info.st_size;
|
||||
|
||||
// only regular files are allowed
|
||||
switch(info.st_mode & S_IFMT)
|
||||
{
|
||||
case S_IFREG:
|
||||
return (info.st_size > STR_MAX_FILE_SIZE) ? EFBIG : 0;
|
||||
case S_IFDIR:
|
||||
return EISDIR;
|
||||
default:
|
||||
return EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
int read_from_fd(const int fd, void* p, off_t* const psize)
|
||||
{
|
||||
const void* const end = p + *psize;
|
||||
ssize_t n;
|
||||
|
||||
do
|
||||
{
|
||||
RETURN_ON_ERROR(n = read(fd, p, end - p));
|
||||
|
||||
p += n;
|
||||
} while(n > 0 && p < end);
|
||||
|
||||
*psize -= end - p;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
int read_from_fd_cont(const int fd, void* p, off_t* const psize)
|
||||
{
|
||||
const void* end = p + *psize;
|
||||
void *buf = p;
|
||||
ssize_t n;
|
||||
ssize_t nread = 0;
|
||||
|
||||
do
|
||||
{
|
||||
RETURN_ON_ERROR(n = read(fd, p, end - p));
|
||||
|
||||
p += n;
|
||||
nread += n;
|
||||
|
||||
// pre-emptively realloc, even though we could potentially be at the end
|
||||
if (p == end) {
|
||||
*psize *= 2;
|
||||
buf = REALLOC(buf, *psize);
|
||||
p = buf + nread;
|
||||
end = buf + *psize;
|
||||
}
|
||||
|
||||
} while(n > 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int str_from_fd(const int fd, const off_t size, str* const dest)
|
||||
{
|
||||
if(size == 0)
|
||||
{
|
||||
str_clear(dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char* buff = ALLOC(size + 1);
|
||||
off_t n = size;
|
||||
const int err = read_from_fd(fd, buff, &n);
|
||||
|
||||
if(err != 0)
|
||||
{
|
||||
free(buff);
|
||||
return err;
|
||||
}
|
||||
|
||||
if(n == 0)
|
||||
{
|
||||
free(buff);
|
||||
str_clear(dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(n < size)
|
||||
{
|
||||
char* const p = realloc(buff, n + 1);
|
||||
|
||||
if(!p)
|
||||
{
|
||||
free(buff);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
buff = p;
|
||||
}
|
||||
|
||||
buff[n] = 0;
|
||||
str_assign(dest, str_acquire_chars(buff, n));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
int str_from_stream_cont(const int fd, str* const dest, int *nmax)
|
||||
{
|
||||
const size_t start_size = 8192;
|
||||
|
||||
char* buff = ALLOC(start_size + 1);
|
||||
off_t n = start_size;
|
||||
const int err = read_from_fd_cont(fd, buff, &n);
|
||||
|
||||
if(err != 0)
|
||||
{
|
||||
free(buff);
|
||||
return err;
|
||||
}
|
||||
|
||||
if(n == 0)
|
||||
{
|
||||
free(buff);
|
||||
str_clear(dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(n < start_size)
|
||||
{
|
||||
char* const p = realloc(buff, n + 1);
|
||||
|
||||
if(!p)
|
||||
{
|
||||
free(buff);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
buff = p;
|
||||
}
|
||||
|
||||
buff[n] = '\0';
|
||||
str_assign(dest, str_acquire_chars(buff, n));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int str_from_file(str* const dest, const char* const file_name)
|
||||
{
|
||||
int fd;
|
||||
|
||||
RETURN_ON_ERROR(fd = open(file_name, O_CLOEXEC | O_RDONLY));
|
||||
|
||||
off_t size = 0;
|
||||
int err = get_file_size(fd, &size);
|
||||
|
||||
if(err == 0)
|
||||
err = str_from_fd(fd, size, dest);
|
||||
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
int str_from_stream(str* const dest, const char* const file_name, int *nread)
|
||||
{
|
||||
int fd;
|
||||
|
||||
RETURN_ON_ERROR(fd = open(file_name, O_CLOEXEC | O_RDONLY));
|
||||
|
||||
int nmax = nread ? *nread : 0;
|
||||
|
||||
off_t chunk_size = 4096;
|
||||
int err = 0;
|
||||
if (nmax == 0)
|
||||
err = str_from_stream_cont(fd, dest, &nmax);
|
||||
else
|
||||
err = str_from_fd(fd, nmax, dest);
|
||||
|
||||
if (nread)
|
||||
*nread = nmax;
|
||||
|
||||
close(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
// string composition -----------------------------------------------------------------------
|
||||
// append string
|
||||
static inline
|
||||
char* append_str(char* p, const str s)
|
||||
{
|
||||
return mem_append(p, str_ptr(s), str_len(s));
|
||||
}
|
||||
|
||||
static
|
||||
size_t total_length(const str* src, size_t count)
|
||||
{
|
||||
size_t sum = 0;
|
||||
|
||||
for(; count > 0; --count)
|
||||
sum += str_len(*src++);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// concatenate strings
|
||||
int str_cat_range_impl(str* const dest, const str* src, size_t count)
|
||||
{
|
||||
if(!src)
|
||||
{
|
||||
str_clear(dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// calculate total length
|
||||
const size_t num = total_length(src, count);
|
||||
|
||||
if(num == 0)
|
||||
{
|
||||
str_clear(dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// allocate
|
||||
char* const buff = ALLOC(num + 1);
|
||||
|
||||
// copy bytes
|
||||
char* p = buff;
|
||||
|
||||
for(; count > 0; --count)
|
||||
p = append_str(p, *src++);
|
||||
|
||||
// null-terminate and assign
|
||||
*p = 0;
|
||||
str_assign(dest, str_acquire_chars(buff, num));
|
||||
return 0;
|
||||
}
|
||||
|
||||
// writing to file descriptor
|
||||
int str_cpy_to_fd(const int fd, const str s)
|
||||
{
|
||||
size_t n = str_len(s);
|
||||
const void* p = str_ptr(s);
|
||||
|
||||
while(n > 0)
|
||||
{
|
||||
ssize_t m;
|
||||
|
||||
RETURN_ON_ERROR(m = write(fd, p, n));
|
||||
|
||||
n -= m;
|
||||
p += m;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// writing to byte stream
|
||||
int str_cpy_to_stream(FILE* const stream, const str s)
|
||||
{
|
||||
const size_t n = str_len(s);
|
||||
|
||||
return (n > 0 && fwrite(str_ptr(s), 1, n, stream) < n) ? EIO : 0;
|
||||
}
|
||||
|
||||
// write iovec
|
||||
static
|
||||
int write_iovec(const int fd, struct iovec* pv, unsigned nv)
|
||||
{
|
||||
while(nv > 0)
|
||||
{
|
||||
ssize_t n;
|
||||
|
||||
RETURN_ON_ERROR(n = writev(fd, pv, nv));
|
||||
|
||||
// discard items already written
|
||||
for(; nv > 0; ++pv, --nv)
|
||||
{
|
||||
if(n < (ssize_t)pv->iov_len)
|
||||
{
|
||||
pv->iov_base += n;
|
||||
pv->iov_len -= n;
|
||||
break;
|
||||
}
|
||||
|
||||
n -= (ssize_t)pv->iov_len;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// concatenate to file descriptor
|
||||
static
|
||||
struct iovec* vec_append(struct iovec* const pv, const str s)
|
||||
{
|
||||
*pv = (struct iovec){ (void*)str_ptr(s), str_len(s) };
|
||||
|
||||
return pv + 1;
|
||||
}
|
||||
|
||||
static
|
||||
struct iovec* vec_append_nonempty(struct iovec* const pv, const str s)
|
||||
{
|
||||
return str_is_empty(s) ? pv : vec_append(pv, s);
|
||||
}
|
||||
|
||||
int str_cat_range_to_fd(const int fd, const str* src, size_t count)
|
||||
{
|
||||
if(!src)
|
||||
return 0;
|
||||
|
||||
struct iovec v[IOV_MAX];
|
||||
|
||||
while(count > 0)
|
||||
{
|
||||
struct iovec* p = vec_append_nonempty(v, *src++);
|
||||
|
||||
while(--count > 0 && p < v + IOV_MAX)
|
||||
p = vec_append_nonempty(p, *src++);
|
||||
|
||||
const size_t n = p - v;
|
||||
|
||||
if(n == 0)
|
||||
break;
|
||||
|
||||
const int ret = write_iovec(fd, v, n);
|
||||
|
||||
if(ret != 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int str_cat_range_to_stream(FILE* const stream, const str* src, size_t count)
|
||||
{
|
||||
if(!src)
|
||||
return 0;
|
||||
|
||||
int err = 0;
|
||||
|
||||
for(; count > 0 && err == 0; --count)
|
||||
err = str_cpy(stream, *src++);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// join strings
|
||||
int str_join_range_impl(str* const dest, const str sep, const str* src, size_t count)
|
||||
{
|
||||
// test for simple cases
|
||||
if(str_is_empty(sep))
|
||||
return str_cat_range(dest, src, count);
|
||||
|
||||
if(!src || count == 0)
|
||||
{
|
||||
str_clear(dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(count == 1)
|
||||
return str_cpy(dest, *src);
|
||||
|
||||
// calculate total length
|
||||
const size_t num = total_length(src, count) + str_len(sep) * (count - 1);
|
||||
|
||||
// allocate
|
||||
char* const buff = ALLOC(num + 1);
|
||||
|
||||
// copy bytes
|
||||
char* p = append_str(buff, *src++);
|
||||
|
||||
while(--count > 0)
|
||||
p = append_str(append_str(p, sep), *src++);
|
||||
|
||||
// null-terminate and assign
|
||||
*p = 0;
|
||||
str_assign(dest, str_acquire_chars(buff, num));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int str_join_range_to_fd(const int fd, const str sep, const str* src, size_t count)
|
||||
{
|
||||
if(str_is_empty(sep))
|
||||
return str_cat_range(fd, src, count);
|
||||
|
||||
if(!src || count == 0)
|
||||
return 0;
|
||||
|
||||
if(count == 1)
|
||||
return str_cpy(fd, *src);
|
||||
|
||||
struct iovec v[IOV_MAX];
|
||||
|
||||
struct iovec* p = vec_append_nonempty(v, *src++);
|
||||
|
||||
for(--count; count > 0; p = v)
|
||||
{
|
||||
p = vec_append_nonempty(vec_append(p, sep), *src++);
|
||||
|
||||
while(--count > 0 && p < v + IOV_MAX - 1)
|
||||
p = vec_append_nonempty(vec_append(p, sep), *src++);
|
||||
|
||||
const size_t n = p - v;
|
||||
|
||||
if(n == 0)
|
||||
break;
|
||||
|
||||
const int ret = write_iovec(fd, v, n);
|
||||
|
||||
if(ret != 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int str_join_range_to_stream(FILE* const stream, const str sep, const str* src, size_t count)
|
||||
{
|
||||
if(str_is_empty(sep))
|
||||
return str_cat_range(stream, src, count);
|
||||
|
||||
if(!src || count == 0)
|
||||
return 0;
|
||||
|
||||
int err = str_cpy(stream, *src++);
|
||||
|
||||
while(--count > 0 && err == 0)
|
||||
err = str_cat(stream, sep, *src++);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
// searching and sorting --------------------------------------------------------------------
|
||||
// string partitioning
|
||||
bool str_partition(const str src, const str patt, str* const prefix, str* const suffix)
|
||||
{
|
||||
const size_t patt_len = str_len(patt);
|
||||
|
||||
if(patt_len > 0 && !str_is_empty(src))
|
||||
{
|
||||
const char* s = memmem(str_ptr(src), str_len(src), str_ptr(patt), patt_len);
|
||||
|
||||
if(s)
|
||||
{
|
||||
if(prefix)
|
||||
str_assign(prefix, str_ref_chars(str_ptr(src), s - str_ptr(src)));
|
||||
|
||||
if(suffix)
|
||||
{
|
||||
s += patt_len;
|
||||
str_assign(suffix, str_ref_chars(s, str_end(src) - s));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if(prefix)
|
||||
str_assign(prefix, str_ref(src));
|
||||
|
||||
if(suffix)
|
||||
str_clear(suffix);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// comparison functions
|
||||
int str_order_asc(const void* const s1, const void* const s2)
|
||||
{
|
||||
return str_cmp(*(const str*)s1, *(const str*)s2);
|
||||
}
|
||||
|
||||
int str_order_desc(const void* const s1, const void* const s2)
|
||||
{
|
||||
return -str_cmp(*(const str*)s1, *(const str*)s2);
|
||||
}
|
||||
|
||||
int str_order_asc_ci(const void* const s1, const void* const s2)
|
||||
{
|
||||
return str_cmp_ci(*(const str*)s1, *(const str*)s2);
|
||||
}
|
||||
|
||||
int str_order_desc_ci(const void* const s1, const void* const s2)
|
||||
{
|
||||
return -str_cmp_ci(*(const str*)s1, *(const str*)s2);
|
||||
}
|
||||
|
||||
// sorting
|
||||
void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count)
|
||||
{
|
||||
if(array && count > 1)
|
||||
qsort(array, count, sizeof(array[0]), cmp);
|
||||
}
|
||||
|
||||
// searching
|
||||
const str* str_search_range(const str key, const str* const array, const size_t count)
|
||||
{
|
||||
if(!array || count == 0)
|
||||
return NULL;
|
||||
|
||||
if(count == 1)
|
||||
return str_eq(key, array[0]) ? array : NULL;
|
||||
|
||||
return bsearch(&key, array, count, sizeof(str), str_order_asc);
|
||||
}
|
||||
|
||||
// partitioning
|
||||
size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count)
|
||||
{
|
||||
if(!array)
|
||||
return 0;
|
||||
|
||||
const str* const end = array + count;
|
||||
str* p = array;
|
||||
|
||||
while(p < end && pred(*p))
|
||||
++p;
|
||||
|
||||
for(str* s = p + 1; s < end; ++s)
|
||||
if(pred(*s))
|
||||
str_swap(p++, s);
|
||||
|
||||
return p - array;
|
||||
}
|
||||
|
||||
// unique partitioning
|
||||
size_t str_unique_range(str* const array, const size_t count)
|
||||
{
|
||||
if(!array || count == 0)
|
||||
return 0;
|
||||
|
||||
if(count == 1)
|
||||
return 1;
|
||||
|
||||
str_sort_range(str_order_asc, array, count);
|
||||
|
||||
const str* const end = array + count;
|
||||
str* p = array;
|
||||
|
||||
for(str* s = array + 1; s < end; ++s)
|
||||
if(!str_eq(*p, *s) && (++p < s))
|
||||
str_swap(p, s);
|
||||
|
||||
return p + 1 - array;
|
||||
}
|
||||
|
||||
// string iterator function
|
||||
#ifdef __STDC_UTF_32__
|
||||
|
||||
char32_t str_cp_iterator_next(str_cp_iterator* const it)
|
||||
{
|
||||
if(it->curr >= it->end)
|
||||
return CPI_END_OF_STRING;
|
||||
|
||||
char32_t c;
|
||||
const size_t n = mbrtoc32(&c, it->curr, it->end - it->curr, &it->state);
|
||||
|
||||
switch(n) // see https://en.cppreference.com/w/c/string/multibyte/mbrtoc32
|
||||
{
|
||||
case 0: // null character (U+0000) is allowed
|
||||
++it->curr;
|
||||
return 0;
|
||||
case (size_t)-1: // encoding error
|
||||
case (size_t)-3: // surrogate pair detected
|
||||
return CPI_ERR_INVALID_ENCODING;
|
||||
case (size_t)-2: // incomplete sequence
|
||||
return CPI_ERR_INCOMPLETE_SEQ;
|
||||
default: // ok
|
||||
it->curr += n;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ifdef __STDC_UTF_32__
|
||||
|
||||
// tokeniser
|
||||
static inline
|
||||
bool is_delim(const str_tok_state* const state, const char c)
|
||||
{
|
||||
return state->bits[(unsigned char)c >> 3] & (1 << (c & 0x7));
|
||||
}
|
||||
|
||||
static inline
|
||||
void set_bit(str_tok_state* const state, const char c)
|
||||
{
|
||||
state->bits[(unsigned char)c >> 3] |= (1 << (c & 0x7));
|
||||
}
|
||||
|
||||
void str_tok_delim(str_tok_state* const state, const str delim_set)
|
||||
{
|
||||
memset(state->bits, 0, sizeof(state->bits));
|
||||
|
||||
const char* const end = str_end(delim_set);
|
||||
|
||||
for(const char* s = str_ptr(delim_set); s < end; ++s)
|
||||
set_bit(state, *s);
|
||||
}
|
||||
|
||||
void str_tok_init(str_tok_state* const state, const str src, const str delim_set)
|
||||
{
|
||||
state->src = str_ptr(src);
|
||||
state->end = str_end(src);
|
||||
|
||||
str_tok_delim(state, delim_set);
|
||||
}
|
||||
|
||||
bool str_tok(str* const dest, str_tok_state* const state)
|
||||
{
|
||||
// token start
|
||||
const char* begin = state->src;
|
||||
|
||||
while(begin < state->end && is_delim(state, *begin))
|
||||
++begin;
|
||||
|
||||
if(begin == state->end)
|
||||
{
|
||||
str_clear(dest);
|
||||
return false;
|
||||
}
|
||||
|
||||
// token end
|
||||
const char* end = begin + 1;
|
||||
|
||||
while(end < state->end && !is_delim(state, *end))
|
||||
++end;
|
||||
|
||||
state->src = end;
|
||||
str_assign(dest, str_ref_chars(begin, end - begin));
|
||||
|
||||
return true;
|
||||
}
|
||||
296
3rd/str/str.h
296
3rd/str/str.h
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// string type ----------------------------------------------------------------------------
|
||||
typedef struct
|
||||
{
|
||||
const char* ptr;
|
||||
size_t info;
|
||||
} str;
|
||||
|
||||
// NULL string
|
||||
#define str_null ((str){ 0, 0 })
|
||||
|
||||
// helper macros
|
||||
#define str_ref_info(n) ((n) << 1)
|
||||
#define str_owner_info(n) (str_ref_info(n) | 1)
|
||||
|
||||
// string properties ----------------------------------------------------------------------
|
||||
// length of the string
|
||||
static inline
|
||||
size_t str_len(const str s) { return s.info >> 1; }
|
||||
|
||||
// pointer to the string
|
||||
static inline
|
||||
const char* str_ptr(const str s)
|
||||
{
|
||||
extern const char* const str_empty_string;
|
||||
|
||||
return s.ptr ? s.ptr : str_empty_string;
|
||||
}
|
||||
|
||||
// end of the string
|
||||
static inline
|
||||
const char* str_end(const str s) { return str_ptr(s) + str_len(s); }
|
||||
|
||||
// test if the string is empty
|
||||
static inline
|
||||
bool str_is_empty(const str s) { return str_len(s) == 0; }
|
||||
|
||||
// test if the string is allocated on the heap
|
||||
static inline
|
||||
bool str_is_owner(const str s) { return (s.info & 1) != 0; }
|
||||
|
||||
// test if the string is a reference
|
||||
static inline
|
||||
bool str_is_ref(const str s) { return !str_is_owner(s); }
|
||||
|
||||
// string memory control -------------------------------------------------------------------
|
||||
// free memory allocated for the string
|
||||
void str_free(const str s);
|
||||
|
||||
// automatic cleanup
|
||||
void str_free_auto(const str* const ps);
|
||||
|
||||
#define str_auto str __attribute__((cleanup(str_free_auto)))
|
||||
|
||||
// string movements -----------------------------------------------------------------------
|
||||
// free target string, then assign the new value to it
|
||||
static inline
|
||||
void str_assign(str* const ps, const str s) { str_free(*ps); *ps = s; }
|
||||
|
||||
// move the string, resetting the source to str_null
|
||||
static inline
|
||||
str str_move(str* const ps) { const str t = *ps; *ps = str_null; return t; }
|
||||
|
||||
// pass ownership of the string
|
||||
static inline
|
||||
str str_pass(str* const ps) { const str t = *ps; ps->info &= ~(size_t)1; return t; }
|
||||
|
||||
// swap two string objects
|
||||
void str_swap(str* const s1, str* const s2);
|
||||
|
||||
// string helpers --------------------------------------------------------------------------
|
||||
// reset the string to str_null
|
||||
static inline
|
||||
void str_clear(str* const ps) { str_assign(ps, str_null); }
|
||||
|
||||
// compare two strings lexicographically
|
||||
int str_cmp(const str s1, const str s2);
|
||||
|
||||
// test if two strings match
|
||||
static inline
|
||||
bool str_eq(const str s1, const str s2) { return str_cmp(s1, s2) == 0; }
|
||||
|
||||
// case-insensitive comparison
|
||||
int str_cmp_ci(const str s1, const str s2);
|
||||
|
||||
// case-insensitive match
|
||||
static inline
|
||||
bool str_eq_ci(const str s1, const str s2) { return str_cmp_ci(s1, s2) == 0; }
|
||||
|
||||
// test for prefix
|
||||
bool str_has_prefix(const str s, const str prefix);
|
||||
|
||||
// test for suffix
|
||||
bool str_has_suffix(const str s, const str suffix);
|
||||
|
||||
// string composition ------------------------------------------------------------------
|
||||
// implementation helpers
|
||||
int str_dup_impl(str* const dest, const str s);
|
||||
int str_cpy_to_fd(const int fd, const str s);
|
||||
int str_cpy_to_stream(FILE* const stream, const str s);
|
||||
|
||||
// copy string
|
||||
#define str_cpy(dest, src) \
|
||||
_Generic((dest), \
|
||||
str*: str_dup_impl, \
|
||||
int: str_cpy_to_fd, \
|
||||
FILE*: str_cpy_to_stream \
|
||||
)((dest), (src))
|
||||
|
||||
// implementation helpers
|
||||
int str_cat_range_impl(str* const dest, const str* src, size_t count);
|
||||
int str_cat_range_to_fd(const int fd, const str* src, size_t count);
|
||||
int str_cat_range_to_stream(FILE* const stream, const str* src, size_t count);
|
||||
|
||||
// concatenate range of strings
|
||||
#define str_cat_range(dest, src, count) \
|
||||
_Generic((dest), \
|
||||
str*: str_cat_range_impl, \
|
||||
int: str_cat_range_to_fd, \
|
||||
FILE*: str_cat_range_to_stream \
|
||||
)((dest), (src), (count))
|
||||
|
||||
// concatenate string arguments
|
||||
#define str_cat(dest, ...) \
|
||||
({ \
|
||||
const str args[] = { __VA_ARGS__ }; \
|
||||
str_cat_range((dest), args, sizeof(args)/sizeof(args[0])); \
|
||||
})
|
||||
|
||||
// implementation helpers
|
||||
int str_join_range_impl(str* const dest, const str sep, const str* src, size_t count);
|
||||
int str_join_range_to_fd(const int fd, const str sep, const str* src, size_t count);
|
||||
int str_join_range_to_stream(FILE* const stream, const str sep, const str* src, size_t count);
|
||||
|
||||
// join strings around the separator
|
||||
#define str_join_range(dest, sep, src, count) \
|
||||
_Generic((dest), \
|
||||
str*: str_join_range_impl, \
|
||||
int: str_join_range_to_fd, \
|
||||
FILE*: str_join_range_to_stream \
|
||||
)((dest), (sep), (src), (count))
|
||||
|
||||
// join string arguments around the separator
|
||||
#define str_join(dest, sep, ...) \
|
||||
({ \
|
||||
const str args[] = { __VA_ARGS__ }; \
|
||||
str_join_range((dest), (sep), args, sizeof(args)/sizeof(args[0])); \
|
||||
})
|
||||
|
||||
// constructors ----------------------------------------------------------------------------
|
||||
// string reference from a string literal
|
||||
#define str_lit(s) ((str){ "" s, str_ref_info(sizeof(s) - 1) })
|
||||
|
||||
static inline
|
||||
str str_ref_impl(const str s) { return (str){ s.ptr, s.info & ~(size_t)1 }; }
|
||||
|
||||
str str_ref_from_ptr(const char* const s);
|
||||
|
||||
// string reference from anything
|
||||
#define str_ref(s) \
|
||||
_Generic((s), \
|
||||
str: str_ref_impl, \
|
||||
char*: str_ref_from_ptr, \
|
||||
const char*: str_ref_from_ptr \
|
||||
)(s)
|
||||
|
||||
// create a reference to the given range of chars
|
||||
str str_ref_chars(const char* const s, const size_t n);
|
||||
|
||||
// take ownership of the given range of chars
|
||||
str str_acquire_chars(const char* const s, const size_t n);
|
||||
|
||||
// take ownership of the given string
|
||||
str str_acquire(const char* const s);
|
||||
|
||||
// string from file
|
||||
int str_from_file(str* const dest, const char* const file_name);
|
||||
|
||||
// read maximum nread bytes from file, write bytes read. 0 reads until EOS.
|
||||
int str_from_stream(str* const dest, const char* const file_name, int *nread);
|
||||
|
||||
// searching and sorting --------------------------------------------------------------------
|
||||
// string partitioning (substring search)
|
||||
bool str_partition(const str src, const str patt, str* const prefix, str* const suffix);
|
||||
|
||||
// comparison functions
|
||||
typedef int (*str_cmp_func)(const void*, const void*);
|
||||
|
||||
int str_order_asc(const void* const s1, const void* const s2);
|
||||
int str_order_desc(const void* const s1, const void* const s2);
|
||||
int str_order_asc_ci(const void* const s1, const void* const s2);
|
||||
int str_order_desc_ci(const void* const s1, const void* const s2);
|
||||
|
||||
// sort array of strings
|
||||
void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count);
|
||||
|
||||
// searching
|
||||
const str* str_search_range(const str key, const str* const array, const size_t count);
|
||||
|
||||
// partitioning
|
||||
size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count);
|
||||
|
||||
// unique partitioning
|
||||
size_t str_unique_range(str* const array, const size_t count);
|
||||
|
||||
// UTF-32 codepoint iterator ----------------------------------------------------------------
|
||||
#ifdef __STDC_UTF_32__
|
||||
#include <uchar.h>
|
||||
|
||||
// iterator
|
||||
#define for_each_codepoint(var, src) \
|
||||
for_each_cp((var), (src), CAT1(inner_it_, __COUNTER__))
|
||||
|
||||
// iterator error codes
|
||||
#define CPI_END_OF_STRING ((char32_t)-1)
|
||||
#define CPI_ERR_INCOMPLETE_SEQ ((char32_t)-2)
|
||||
#define CPI_ERR_INVALID_ENCODING ((char32_t)-3)
|
||||
|
||||
// implementation
|
||||
#define for_each_cp(var, src, it) \
|
||||
for(str_cp_iterator it = str_make_cp_iterator(src); (var = str_cp_iterator_next(&it)) <= 0x10FFFFu;)
|
||||
|
||||
#define CAT1(x, y) CAT2(x, y)
|
||||
#define CAT2(x, y) x ## y
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const char* curr;
|
||||
const char* const end;
|
||||
mbstate_t state;
|
||||
} str_cp_iterator;
|
||||
|
||||
static inline
|
||||
str_cp_iterator str_make_cp_iterator(const str s)
|
||||
{
|
||||
return (str_cp_iterator){ .curr = str_ptr(s), .end = str_end(s) };
|
||||
}
|
||||
|
||||
char32_t str_cp_iterator_next(str_cp_iterator* const it);
|
||||
|
||||
#endif // ifdef __STDC_UTF_32__
|
||||
|
||||
// tokeniser --------------------------------------------------------------------------------
|
||||
typedef struct
|
||||
{
|
||||
unsigned char bits[32]; // 256 / 8
|
||||
const char *src, *end;
|
||||
} str_tok_state;
|
||||
|
||||
void str_tok_init(str_tok_state* const state, const str src, const str delim_set);
|
||||
bool str_tok(str* const dest, str_tok_state* const state);
|
||||
void str_tok_delim(str_tok_state* const state, const str delim_set);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -1,907 +0,0 @@
|
||||
/*
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
|
||||
#include "str.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <locale.h>
|
||||
|
||||
// make sure assert is always enabled
|
||||
#ifdef NDEBUG
|
||||
#undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#define passed printf("passed: %s\n", __func__)
|
||||
|
||||
static
|
||||
void test_str_lit(void)
|
||||
{
|
||||
const str s = str_lit("ZZZ");
|
||||
|
||||
assert(str_len(s) == 3);
|
||||
assert(str_is_ref(s));
|
||||
assert(!str_is_owner(s));
|
||||
assert(str_eq(s, str_lit("ZZZ")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_cpy(void)
|
||||
{
|
||||
str_auto s = str_null;
|
||||
|
||||
assert(str_cpy(&s, str_lit("ZZZ")) == 0);
|
||||
|
||||
assert(str_len(s) == 3);
|
||||
assert(!str_is_ref(s));
|
||||
assert(str_is_owner(s));
|
||||
assert(str_eq(s, str_lit("ZZZ")));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_clear(void)
|
||||
{
|
||||
str s = str_null;
|
||||
|
||||
assert(str_cpy(&s, str_lit("ZZZ")) == 0);
|
||||
|
||||
assert(str_len(s) == 3);
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
str_clear(&s);
|
||||
|
||||
assert(str_is_empty(s));
|
||||
assert(str_is_ref(s));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_move(void)
|
||||
{
|
||||
str s1 = str_null;
|
||||
|
||||
assert(str_cpy(&s1, str_lit("ZZZ")) == 0);
|
||||
|
||||
str s2 = str_move(&s1);
|
||||
|
||||
assert(str_is_empty(s1));
|
||||
assert(str_is_ref(s1));
|
||||
|
||||
assert(str_is_owner(s2));
|
||||
assert(str_eq(s2, str_lit("ZZZ")));
|
||||
|
||||
str_free(s2);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_pass(void)
|
||||
{
|
||||
str s1 = str_null;
|
||||
|
||||
assert(str_cpy(&s1, str_lit("ZZZ")) == 0);
|
||||
|
||||
str s2 = str_pass(&s1);
|
||||
|
||||
assert(str_is_ref(s1));
|
||||
assert(str_eq(s1, str_lit("ZZZ")));
|
||||
|
||||
assert(str_is_owner(s2));
|
||||
assert(str_eq(s2, str_lit("ZZZ")));
|
||||
|
||||
str_free(s2);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_ref(void)
|
||||
{
|
||||
str s = str_ref("ZZZ");
|
||||
|
||||
assert(str_len(s) == 3);
|
||||
assert(str_is_ref(s));
|
||||
|
||||
s = str_ref(s);
|
||||
|
||||
assert(str_is_ref(s));
|
||||
assert(str_eq(s, str_lit("ZZZ")));
|
||||
|
||||
const char* const p = "ZZZ";
|
||||
|
||||
s = str_ref(p);
|
||||
|
||||
assert(str_is_ref(s));
|
||||
assert(str_eq(s, str_lit("ZZZ")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_cmp(void)
|
||||
{
|
||||
const str_auto s = str_lit("zzz");
|
||||
|
||||
assert(str_cmp(s, s) == 0);
|
||||
assert(str_cmp(s, str_lit("zzz")) == 0);
|
||||
assert(str_cmp(s, str_lit("zz")) > 0);
|
||||
assert(str_cmp(s, str_lit("zzzz")) < 0);
|
||||
assert(str_cmp(s, str_null) > 0);
|
||||
assert(str_cmp(str_null, s) < 0);
|
||||
assert(str_cmp(str_null, str_null) == 0);
|
||||
assert(str_eq(s, str_lit("zzz")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_cmp_ci(void)
|
||||
{
|
||||
const str s = str_lit("zzz");
|
||||
|
||||
assert(str_cmp_ci(s, s) == 0);
|
||||
assert(str_cmp_ci(s, str_lit("zzz")) == 0);
|
||||
assert(str_cmp_ci(s, str_lit("zz")) > 0);
|
||||
assert(str_cmp_ci(s, str_lit("zzzz")) < 0);
|
||||
assert(str_cmp_ci(s, str_null) > 0);
|
||||
assert(str_cmp_ci(str_null, s) < 0);
|
||||
assert(str_cmp_ci(str_null, str_null) == 0);
|
||||
assert(str_cmp_ci(s, str_lit("ZZZ")) == 0);
|
||||
assert(str_cmp_ci(s, str_lit("ZZ")) > 0);
|
||||
assert(str_cmp_ci(s, str_lit("ZZZZ")) < 0);
|
||||
assert(str_eq_ci(s, str_lit("ZZZ")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_acquire(void)
|
||||
{
|
||||
str_auto s = str_acquire(strdup("ZZZ"));
|
||||
|
||||
assert(str_is_owner(s));
|
||||
assert(str_eq(s, str_lit("ZZZ")));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_cat(void)
|
||||
{
|
||||
str s = str_null;
|
||||
|
||||
assert(str_cat(&s, str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("AAABBBCCC")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
assert(str_cat(&s, str_null, str_null, str_null) == 0); // this simply clears the target string
|
||||
|
||||
assert(str_is_empty(s));
|
||||
assert(str_is_ref(s));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_str_join(void)
|
||||
{
|
||||
str s = str_null;
|
||||
|
||||
assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("AAA_BBB_CCC")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
assert(str_join(&s, str_lit("_"), str_null, str_lit("BBB"), str_lit("CCC")) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("_BBB_CCC")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_null, str_lit("CCC")) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("AAA__CCC")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_null) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("AAA_BBB_")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
assert(str_join(&s, str_lit("_"), str_null, str_null, str_null) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("__")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
assert(str_join(&s, str_null) == 0); // this simply clears the target string
|
||||
|
||||
assert(str_is_empty(s));
|
||||
assert(str_is_ref(s));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_composition(void)
|
||||
{
|
||||
str_auto s = str_lit(", ");
|
||||
|
||||
assert(str_join(&s, s, str_lit("Here"), str_lit("there"), str_lit("and everywhere")) == 0);
|
||||
assert(str_cat(&s, s, str_lit("...")) == 0);
|
||||
|
||||
assert(str_eq(s, str_lit("Here, there, and everywhere...")));
|
||||
assert(str_is_owner(s));
|
||||
assert(*str_end(s) == 0);
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_sort(void)
|
||||
{
|
||||
str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") };
|
||||
|
||||
str_sort_range(str_order_asc, src, sizeof(src)/sizeof(src[0]));
|
||||
|
||||
assert(str_eq(src[0], str_lit("aaa")));
|
||||
assert(str_eq(src[1], str_lit("bbb")));
|
||||
assert(str_eq(src[2], str_lit("z")));
|
||||
assert(str_eq(src[3], str_lit("zzz")));
|
||||
|
||||
str_sort_range(str_order_desc, src, sizeof(src)/sizeof(src[0]));
|
||||
|
||||
assert(str_eq(src[0], str_lit("zzz")));
|
||||
assert(str_eq(src[1], str_lit("z")));
|
||||
assert(str_eq(src[2], str_lit("bbb")));
|
||||
assert(str_eq(src[3], str_lit("aaa")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_sort_ci(void)
|
||||
{
|
||||
str src[] = { str_lit("ZZZ"), str_lit("zzz"), str_lit("aaa"), str_lit("AAA") };
|
||||
|
||||
str_sort_range(str_order_asc_ci, src, sizeof(src)/sizeof(src[0]));
|
||||
|
||||
assert(str_eq_ci(src[0], str_lit("aaa")));
|
||||
assert(str_eq_ci(src[1], str_lit("aaa")));
|
||||
assert(str_eq_ci(src[2], str_lit("zzz")));
|
||||
assert(str_eq_ci(src[3], str_lit("zzz")));
|
||||
|
||||
str_sort_range(str_order_desc_ci, src, sizeof(src)/sizeof(src[0]));
|
||||
|
||||
assert(str_eq_ci(src[0], str_lit("zzz")));
|
||||
assert(str_eq_ci(src[1], str_lit("zzz")));
|
||||
assert(str_eq_ci(src[2], str_lit("aaa")));
|
||||
assert(str_eq_ci(src[3], str_lit("aaa")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_search(void)
|
||||
{
|
||||
str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") };
|
||||
const size_t count = sizeof(src)/sizeof(src[0]);
|
||||
|
||||
str_sort_range(str_order_asc, src, count);
|
||||
|
||||
assert(str_search_range(src[0], src, count) == &src[0]);
|
||||
assert(str_search_range(src[1], src, count) == &src[1]);
|
||||
assert(str_search_range(src[2], src, count) == &src[2]);
|
||||
assert(str_search_range(src[3], src, count) == &src[3]);
|
||||
assert(str_search_range(str_lit("xxx"), src, count) == NULL);
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_prefix(void)
|
||||
{
|
||||
const str s = str_lit("abcd");
|
||||
|
||||
assert(str_has_prefix(s, str_null));
|
||||
assert(str_has_prefix(s, str_lit("a")));
|
||||
assert(str_has_prefix(s, str_lit("ab")));
|
||||
assert(str_has_prefix(s, str_lit("abc")));
|
||||
assert(str_has_prefix(s, str_lit("abcd")));
|
||||
|
||||
assert(!str_has_prefix(s, str_lit("zzz")));
|
||||
assert(!str_has_prefix(s, str_lit("abcde")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_suffix(void)
|
||||
{
|
||||
const str s = str_lit("abcd");
|
||||
|
||||
assert(str_has_suffix(s, str_null));
|
||||
assert(str_has_suffix(s, str_lit("d")));
|
||||
assert(str_has_suffix(s, str_lit("cd")));
|
||||
assert(str_has_suffix(s, str_lit("bcd")));
|
||||
assert(str_has_suffix(s, str_lit("abcd")));
|
||||
|
||||
assert(!str_has_suffix(s, str_lit("zzz")));
|
||||
assert(!str_has_suffix(s, str_lit("_abcd")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_cpy_to_fd(void)
|
||||
{
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_cpy(fileno(tmp), str_lit("ZZZ")) == 0);
|
||||
|
||||
rewind(tmp);
|
||||
|
||||
char buff[32];
|
||||
|
||||
assert(fread(buff, 1, sizeof(buff), tmp) == 3);
|
||||
assert(memcmp(buff, "ZZZ", 3) == 0);
|
||||
|
||||
fclose(tmp);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_cpy_to_stream(void)
|
||||
{
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_cpy(tmp, str_lit("ZZZ")) == 0);
|
||||
|
||||
assert(fflush(tmp) == 0);
|
||||
rewind(tmp);
|
||||
|
||||
char buff[32];
|
||||
|
||||
assert(fread(buff, 1, sizeof(buff), tmp) == 3);
|
||||
assert(memcmp(buff, "ZZZ", 3) == 0);
|
||||
|
||||
fclose(tmp);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_cat_range_to_fd(void)
|
||||
{
|
||||
const str src[] = {
|
||||
str_lit("aaa"),
|
||||
str_lit("bbb"),
|
||||
str_null,
|
||||
str_lit("ccc"),
|
||||
str_lit("ddd"),
|
||||
str_null,
|
||||
str_null
|
||||
};
|
||||
|
||||
const size_t num_items = sizeof(src)/sizeof(src[0]);
|
||||
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_cat_range(fileno(tmp), src, num_items) == 0);
|
||||
|
||||
rewind(tmp);
|
||||
|
||||
const char res[] = "aaabbbcccddd";
|
||||
const size_t len = sizeof(res) - 1;
|
||||
char buff[32];
|
||||
|
||||
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||
assert(memcmp(buff, res, len) == 0);
|
||||
|
||||
fclose(tmp);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_cat_large_range_to_fd(void)
|
||||
{
|
||||
// prepare data
|
||||
const size_t n = 100000;
|
||||
str* const src = calloc(n, sizeof(str));
|
||||
|
||||
assert(src != NULL);
|
||||
|
||||
char buff[100];
|
||||
|
||||
for(unsigned i = 0; i < n; i++)
|
||||
assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u\n", i))) == 0);
|
||||
|
||||
// write to file
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_cat_range(fileno(tmp), src, n) == 0);
|
||||
|
||||
// clear input data
|
||||
for(unsigned i = 0; i < n; ++i)
|
||||
str_free(src[i]);
|
||||
|
||||
free(src);
|
||||
|
||||
// validate
|
||||
rewind(tmp);
|
||||
|
||||
char* line = NULL;
|
||||
size_t cap = 0;
|
||||
ssize_t len;
|
||||
int i = 0;
|
||||
|
||||
while((len = getline(&line, &cap, tmp)) >= 0)
|
||||
assert(atoi(line) == i++);
|
||||
|
||||
assert(i == (int)n);
|
||||
|
||||
// all done
|
||||
fclose(tmp);
|
||||
free(line);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_cat_range_to_stream(void)
|
||||
{
|
||||
const str src[] = {
|
||||
str_lit("aaa"),
|
||||
str_lit("bbb"),
|
||||
str_null,
|
||||
str_lit("ccc"),
|
||||
str_lit("ddd"),
|
||||
str_null,
|
||||
str_null
|
||||
};
|
||||
|
||||
const size_t num_items = sizeof(src)/sizeof(src[0]);
|
||||
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_cat_range(tmp, src, num_items) == 0);
|
||||
|
||||
assert(fflush(tmp) == 0);
|
||||
rewind(tmp);
|
||||
|
||||
const char res[] = "aaabbbcccddd";
|
||||
const size_t len = sizeof(res) - 1;
|
||||
char buff[32];
|
||||
|
||||
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||
assert(memcmp(buff, res, len) == 0);
|
||||
|
||||
fclose(tmp);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_join_to_fd(void)
|
||||
{
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_join(fileno(tmp), str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
|
||||
|
||||
rewind(tmp);
|
||||
|
||||
const char res[] = "aaa_bbb_ccc";
|
||||
const size_t len = sizeof(res) - 1;
|
||||
char buff[32];
|
||||
|
||||
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||
assert(memcmp(buff, res, len) == 0);
|
||||
|
||||
fclose(tmp);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_join_large_range_to_fd(void)
|
||||
{
|
||||
// prepare data
|
||||
const size_t n = 100000;
|
||||
str* const src = calloc(n, sizeof(str));
|
||||
|
||||
assert(src != NULL);
|
||||
|
||||
char buff[100];
|
||||
|
||||
for(unsigned i = 0; i < n; i++)
|
||||
assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u", i))) == 0);
|
||||
|
||||
// write to file
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_join_range(fileno(tmp), str_lit("\n"), src, n) == 0);
|
||||
|
||||
// clear input data
|
||||
for(unsigned i = 0; i < n; ++i)
|
||||
str_free(src[i]);
|
||||
|
||||
free(src);
|
||||
|
||||
// validate
|
||||
rewind(tmp);
|
||||
|
||||
char* line = NULL;
|
||||
size_t cap = 0;
|
||||
ssize_t len;
|
||||
int i = 0;
|
||||
|
||||
while((len = getline(&line, &cap, tmp)) >= 0)
|
||||
assert(atoi(line) == i++);
|
||||
|
||||
assert(i == (int)n);
|
||||
|
||||
// all done
|
||||
fclose(tmp);
|
||||
free(line);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_join_to_stream(void)
|
||||
{
|
||||
FILE* const tmp = tmpfile();
|
||||
|
||||
assert(tmp != NULL);
|
||||
assert(str_join(tmp, str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
|
||||
|
||||
assert(fflush(tmp) == 0);
|
||||
rewind(tmp);
|
||||
|
||||
const char res[] = "aaa_bbb_ccc";
|
||||
const size_t len = sizeof(res) - 1;
|
||||
char buff[32];
|
||||
|
||||
assert(fread(buff, 1, sizeof(buff), tmp) == len);
|
||||
assert(memcmp(buff, res, len) == 0);
|
||||
|
||||
fclose(tmp);
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
bool part_pred(const str s) { return str_len(s) < 2; }
|
||||
|
||||
static
|
||||
void test_partition_range(void)
|
||||
{
|
||||
str src[] = { str_lit("aaa"), str_lit("a"), str_lit("aaaa"), str_lit("z") };
|
||||
|
||||
assert(str_partition_range(part_pred, src, 1) == 0);
|
||||
|
||||
assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 2);
|
||||
assert(str_eq(src[0], str_lit("a")));
|
||||
assert(str_eq(src[1], str_lit("z")));
|
||||
assert(str_partition_range(part_pred, src, 1) == 1);
|
||||
|
||||
src[0] = str_lit("?");
|
||||
src[2] = str_lit("*");
|
||||
|
||||
assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 3);
|
||||
assert(str_eq(src[0], str_lit("?")));
|
||||
assert(str_eq(src[1], str_lit("z")));
|
||||
assert(str_eq(src[2], str_lit("*")));
|
||||
assert(str_eq(src[3], str_lit("aaa")));
|
||||
|
||||
assert(str_partition_range(part_pred, NULL, 42) == 0);
|
||||
assert(str_partition_range(part_pred, src, 0) == 0);
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_unique_range(void)
|
||||
{
|
||||
str src[] = {
|
||||
str_lit("zzz"),
|
||||
str_lit("aaa"),
|
||||
str_lit("zzz"),
|
||||
str_lit("bbb"),
|
||||
str_lit("aaa"),
|
||||
str_lit("ccc"),
|
||||
str_lit("ccc"),
|
||||
str_lit("aaa"),
|
||||
str_lit("ccc"),
|
||||
str_lit("zzz")
|
||||
};
|
||||
|
||||
assert(str_unique_range(src, sizeof(src)/sizeof(src[0])) == 4);
|
||||
assert(str_eq(src[0], str_lit("aaa")));
|
||||
assert(str_eq(src[1], str_lit("bbb")));
|
||||
assert(str_eq(src[2], str_lit("ccc")));
|
||||
assert(str_eq(src[3], str_lit("zzz")));
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_from_file(void)
|
||||
{
|
||||
str_auto fname = str_null;
|
||||
|
||||
assert(str_cat(&fname, str_lit("tmp_"), str_ref_chars(__func__, sizeof(__func__) - 1)) == 0);
|
||||
|
||||
FILE* const stream = fopen(str_ptr(fname), "w");
|
||||
|
||||
assert(stream);
|
||||
assert(str_join(stream, str_lit(" "), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0);
|
||||
assert(fclose(stream) == 0);
|
||||
|
||||
str_auto res = str_null;
|
||||
|
||||
assert(str_from_file(&res, str_ptr(fname)) == 0);
|
||||
unlink(str_ptr(fname));
|
||||
assert(str_eq(res, str_lit("aaa bbb ccc")));
|
||||
assert(str_is_owner(res));
|
||||
|
||||
// test errors
|
||||
assert(str_from_file(&res, ".") == EISDIR);
|
||||
assert(str_from_file(&res, "/dev/null") == EOPNOTSUPP);
|
||||
assert(str_from_file(&res, "does-not-exist") == ENOENT);
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
#ifdef __STDC_UTF_32__
|
||||
|
||||
static
|
||||
void test_codepoint_iterator(void)
|
||||
{
|
||||
const str src = str_lit(u8"жёлтый"); // means "yellow" in Russian
|
||||
static const char32_t src32[] = { U'ж', U'ё', U'л', U'т', U'ы', U'й' };
|
||||
size_t i = 0;
|
||||
char32_t c;
|
||||
|
||||
for_each_codepoint(c, src)
|
||||
{
|
||||
assert(i < sizeof(src32)/sizeof(src32[0]));
|
||||
assert(c == src32[i++]);
|
||||
}
|
||||
|
||||
assert(c == CPI_END_OF_STRING);
|
||||
assert(i == sizeof(src32)/sizeof(src32[0]));
|
||||
|
||||
// empty string iteration
|
||||
c = 0;
|
||||
|
||||
for_each_codepoint(c, str_null)
|
||||
assert(0);
|
||||
|
||||
assert(c == CPI_END_OF_STRING);
|
||||
passed;
|
||||
}
|
||||
|
||||
#endif // ifdef __STDC_UTF_32__
|
||||
|
||||
static
|
||||
void test_tok(void)
|
||||
{
|
||||
typedef struct
|
||||
{
|
||||
const str src, delim;
|
||||
const unsigned n_tok;
|
||||
const str tok[3];
|
||||
} test_data;
|
||||
|
||||
static const test_data t[] =
|
||||
{
|
||||
{
|
||||
str_lit("a,b,c"),
|
||||
str_lit(","),
|
||||
3,
|
||||
{ str_lit("a"), str_lit("b"), str_lit("c") }
|
||||
},
|
||||
{
|
||||
str_lit(",,a,b,,c,"),
|
||||
str_lit(","),
|
||||
3,
|
||||
{ str_lit("a"), str_lit("b"), str_lit("c") }
|
||||
},
|
||||
{
|
||||
str_lit("aaa;=~bbb~,=ccc="),
|
||||
str_lit(",;=~"),
|
||||
3,
|
||||
{ str_lit("aaa"), str_lit("bbb"), str_lit("ccc") }
|
||||
},
|
||||
{
|
||||
str_lit(""),
|
||||
str_lit(","),
|
||||
0,
|
||||
{ }
|
||||
},
|
||||
{
|
||||
str_lit(""),
|
||||
str_lit(""),
|
||||
0,
|
||||
{ }
|
||||
},
|
||||
{
|
||||
str_lit(",.;,.;;.,;.,"),
|
||||
str_lit(",.;"),
|
||||
0,
|
||||
{ }
|
||||
},
|
||||
{
|
||||
str_lit("aaa,bbb,ccc"),
|
||||
str_lit(""),
|
||||
1,
|
||||
{ str_lit("aaa,bbb,ccc") }
|
||||
},
|
||||
{
|
||||
str_lit("aaa,bbb,ccc"),
|
||||
str_lit(";-="),
|
||||
1,
|
||||
{ str_lit("aaa,bbb,ccc") }
|
||||
}
|
||||
};
|
||||
|
||||
for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i)
|
||||
{
|
||||
unsigned tok_count = 0;
|
||||
|
||||
str tok = str_null;
|
||||
str_tok_state state;
|
||||
|
||||
str_tok_init(&state, t[i].src, t[i].delim);
|
||||
|
||||
while(str_tok(&tok, &state))
|
||||
{
|
||||
// printf("%u-%u: \"%.*s\" %zu\n",
|
||||
// i, tok_count, (int)str_len(tok), str_ptr(tok), str_len(tok));
|
||||
// fflush(stdout);
|
||||
|
||||
assert(tok_count < t[i].n_tok);
|
||||
assert(str_eq(tok, t[i].tok[tok_count]));
|
||||
|
||||
++tok_count;
|
||||
}
|
||||
|
||||
assert(tok_count == t[i].n_tok);
|
||||
}
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
static
|
||||
void test_partition(void)
|
||||
{
|
||||
typedef struct
|
||||
{
|
||||
const bool res;
|
||||
const str src, patt, pref, suff;
|
||||
} test_data;
|
||||
|
||||
static const test_data t[] =
|
||||
{
|
||||
{ true, str_lit("...abc..."), str_lit("abc"), str_lit("..."), str_lit("...") },
|
||||
{ true, str_lit("......abc"), str_lit("abc"), str_lit("......"), str_null },
|
||||
{ true, str_lit("abc......"), str_lit("abc"), str_null, str_lit("......") },
|
||||
|
||||
{ true, str_lit("...a..."), str_lit("a"), str_lit("..."), str_lit("...") },
|
||||
{ true, str_lit("......a"), str_lit("a"), str_lit("......"), str_null },
|
||||
{ true, str_lit("a......"), str_lit("a"), str_null, str_lit("......") },
|
||||
|
||||
{ false, str_lit("zzz"), str_null, str_lit("zzz"), str_null },
|
||||
{ false, str_null, str_lit("zzz"), str_null, str_null },
|
||||
{ false, str_null, str_null, str_null, str_null },
|
||||
|
||||
{ false, str_lit("...zzz..."), str_lit("xxx"), str_lit("...zzz..."), str_null },
|
||||
{ false, str_lit("...xxz..."), str_lit("xxx"), str_lit("...xxz..."), str_null },
|
||||
{ true, str_lit("...xxz...xxx."), str_lit("xxx"), str_lit("...xxz..."), str_lit(".") },
|
||||
{ true, str_lit(u8"...цифры___"), str_lit(u8"цифры"), str_lit("..."), str_lit("___") }
|
||||
};
|
||||
|
||||
for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i)
|
||||
{
|
||||
str pref = str_lit("???"), suff = str_lit("???");
|
||||
|
||||
assert(str_partition(t[i].src, t[i].patt, &pref, &suff) == t[i].res);
|
||||
assert(str_eq(pref, t[i].pref));
|
||||
assert(str_eq(suff, t[i].suff));
|
||||
}
|
||||
|
||||
passed;
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
// tests
|
||||
test_str_lit();
|
||||
test_str_cpy();
|
||||
test_str_clear();
|
||||
test_str_move();
|
||||
test_str_pass();
|
||||
test_str_ref();
|
||||
test_str_cmp();
|
||||
test_str_cmp_ci();
|
||||
test_str_acquire();
|
||||
test_str_cat();
|
||||
test_str_join();
|
||||
test_composition();
|
||||
test_sort();
|
||||
test_sort_ci();
|
||||
test_search();
|
||||
test_prefix();
|
||||
test_suffix();
|
||||
test_cpy_to_fd();
|
||||
test_cpy_to_stream();
|
||||
test_cat_range_to_fd();
|
||||
test_cat_large_range_to_fd();
|
||||
test_cat_range_to_stream();
|
||||
test_join_to_fd();
|
||||
test_join_large_range_to_fd();
|
||||
test_join_to_stream();
|
||||
test_partition_range();
|
||||
test_unique_range();
|
||||
test_from_file();
|
||||
test_tok();
|
||||
test_partition();
|
||||
|
||||
#ifdef __STDC_UTF_32__
|
||||
assert(setlocale(LC_ALL, "C.UTF-8"));
|
||||
|
||||
test_codepoint_iterator();
|
||||
#endif
|
||||
|
||||
return puts("OK.") < 0;
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
die() {
|
||||
echo >&2 "$@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ $# -eq 2 ] || die "Usage: $(basename "$0") FILE VAR-NAME"
|
||||
[ -f "$1" ] || die "$0: file \"$1\" does not exist, or is not a file."
|
||||
|
||||
set -e
|
||||
|
||||
cat << EOF
|
||||
// AUTOMATICALLY GENERATED FILE - DO NOT EDIT
|
||||
|
||||
// source file: $1
|
||||
|
||||
#include "str.h"
|
||||
|
||||
static
|
||||
const char _bytes[] = {
|
||||
EOF
|
||||
|
||||
od -v -w12 -A n -t x1 "$1" | sed -E 's/\<([[:xdigit:]]{2})\>/0x\1,/g'
|
||||
|
||||
cat << EOF
|
||||
0x00 };
|
||||
|
||||
const str $2 = (const str){ _bytes, _ref_info(sizeof(_bytes) - 1) };
|
||||
EOF
|
||||
@@ -1,209 +0,0 @@
|
||||
/*
|
||||
BSD 3-Clause License
|
||||
|
||||
Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the copyright holder nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <locale.h>
|
||||
#include <wctype.h>
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// platform checks
|
||||
#ifndef __STDC_ISO_10646__
|
||||
#error "this platform does not support UNICODE (__STDC_ISO_10646__ is not defined)"
|
||||
#endif
|
||||
|
||||
#if __SIZEOF_WCHAR_T__ < 4 || __SIZEOF_WINT_T__ < 4
|
||||
#error "this platform does not have a usable wchar_t (both sizeof(wchar_t) and sizeof(wint_t) should be at least 4)"
|
||||
#endif
|
||||
|
||||
// i/o helpers
|
||||
static __attribute((noinline, noreturn))
|
||||
void die(const char* const msg)
|
||||
{
|
||||
perror(msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#define do_printf(fmt, ...) \
|
||||
do { \
|
||||
if(printf(fmt, ##__VA_ARGS__) < 0) \
|
||||
die("error writing output"); \
|
||||
} while(0)
|
||||
|
||||
#define do_write(str) \
|
||||
do { \
|
||||
if(fwrite((str), 1, sizeof(str) - 1, stdout) != sizeof(str) - 1) \
|
||||
die("error writing output"); \
|
||||
} while(0)
|
||||
|
||||
// char type selector (isw*() functions)
|
||||
typedef int (*selector)(wint_t wc);
|
||||
|
||||
// option parser
|
||||
static __attribute__((noreturn))
|
||||
void usage_exit(void)
|
||||
{
|
||||
static const char usage[] =
|
||||
"Usage: gen-char-class SELECTOR\n"
|
||||
" Generate a character classification C function that does the same as its\n"
|
||||
" isw*() counterpart under the current locale as specified by LC_ALL\n"
|
||||
" environment variable. SELECTOR specifies the classification function\n"
|
||||
" to generate, it must be any one of:\n"
|
||||
" --alnum -> use iswalnum()\n"
|
||||
" --alpha -> use iswalpha()\n"
|
||||
" --blank -> use iswblank()\n"
|
||||
" --cntrl -> use iswcntrl()\n"
|
||||
" --digit -> use iswdigit()\n"
|
||||
" --graph -> use iswgraph()\n"
|
||||
" --lower -> use iswlower()\n"
|
||||
" --print -> use iswprint()\n"
|
||||
" --punct -> use iswpunct()\n"
|
||||
" --space -> use iswspace()\n"
|
||||
" --upper -> use iswupper()\n"
|
||||
" --xdigit -> use iswxdigit()\n";
|
||||
|
||||
fputs(usage, stderr);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static
|
||||
selector fn;
|
||||
|
||||
static
|
||||
const char* fn_name;
|
||||
|
||||
static
|
||||
const char* loc;
|
||||
|
||||
#define ARG(name) \
|
||||
if(strcmp(argv[1], "--" #name) == 0) { \
|
||||
fn = isw ## name; fn_name = #name; \
|
||||
return; \
|
||||
}
|
||||
|
||||
static
|
||||
void read_opts(int argc, char* const argv[])
|
||||
{
|
||||
if(argc != 2)
|
||||
usage_exit();
|
||||
|
||||
ARG(alnum)
|
||||
ARG(alpha)
|
||||
ARG(blank)
|
||||
ARG(cntrl)
|
||||
ARG(digit)
|
||||
ARG(graph)
|
||||
ARG(lower)
|
||||
ARG(print)
|
||||
ARG(punct)
|
||||
ARG(space)
|
||||
ARG(upper)
|
||||
ARG(xdigit)
|
||||
|
||||
if(strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0)
|
||||
usage_exit();
|
||||
|
||||
fprintf(stderr, "unknown option: \"%s\"\n", argv[1]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#undef ARG
|
||||
|
||||
// range printing
|
||||
static
|
||||
void print_range(const wint_t first, const wint_t last)
|
||||
{
|
||||
if(first == last)
|
||||
do_printf("\t\tcase 0x%.2X:\n", first);
|
||||
else
|
||||
do_printf("\t\tcase 0x%.2X ... 0x%.2X:\n", first, last);
|
||||
}
|
||||
|
||||
// header/footer
|
||||
static
|
||||
const char header[] =
|
||||
"/* LC_ALL = \"%s\" */\n"
|
||||
"bool is_%s(const char32_t c)\n"
|
||||
"{\n"
|
||||
" switch(c)\n"
|
||||
" {\n";
|
||||
|
||||
static
|
||||
const char footer[] =
|
||||
" return true;\n"
|
||||
" default:\n"
|
||||
" return false;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
// main
|
||||
#define UTF32_MAX_CHAR 0x10ffff
|
||||
|
||||
int main(int argc, char* const argv[])
|
||||
{
|
||||
read_opts(argc, argv);
|
||||
|
||||
loc = getenv("LC_ALL");
|
||||
|
||||
if(loc && !setlocale(LC_ALL, loc))
|
||||
die("cannot change current locale");
|
||||
|
||||
errno = 0;
|
||||
do_printf(header, loc ? loc : "", fn_name);
|
||||
|
||||
wint_t first = 0;
|
||||
bool in_range = false;
|
||||
|
||||
for(wint_t c = 0; c <= UTF32_MAX_CHAR; ++c)
|
||||
{
|
||||
const bool match = (fn(c) != 0);
|
||||
|
||||
if(in_range && !match)
|
||||
print_range(first, c - 1);
|
||||
else if(!in_range && match)
|
||||
first = c;
|
||||
|
||||
in_range = match;
|
||||
}
|
||||
|
||||
if(in_range)
|
||||
print_range(first, UTF32_MAX_CHAR);
|
||||
|
||||
do_write(footer);
|
||||
|
||||
if(fflush(stdout))
|
||||
die("error writing output");
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user