diff --git a/3rd/str/.editorconfig b/3rd/str/.editorconfig deleted file mode 100644 index 76bd3a3..0000000 --- a/3rd/str/.editorconfig +++ /dev/null @@ -1,11 +0,0 @@ -root = true - -[*] -indent_style = tab -indent_size = 4 -trim_trailing_whitespace = true -insert_final_newline = true -end_of_line = lf - -[Makefile] -indent_size = 8 diff --git a/3rd/str/.gitignore b/3rd/str/.gitignore deleted file mode 100644 index c740f5e..0000000 --- a/3rd/str/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -test -flto-test -*.bak -tools/gen-char-class diff --git a/3rd/str/LICENSE b/3rd/str/LICENSE deleted file mode 100644 index 60be582..0000000 --- a/3rd/str/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/3rd/str/Makefile b/3rd/str/Makefile deleted file mode 100644 index acc3214..0000000 --- a/3rd/str/Makefile +++ /dev/null @@ -1,51 +0,0 @@ -# flags -CC_WARN := -Wall -Wextra -Werror=implicit-function-declaration -Wformat -Werror=format-security - -ifeq ($(CC),musl-gcc) -# musl is ISO 10646 compliant but doesn't define __STDC_ISO_10646__ -CC_EXTRA := -D__STDC_ISO_10646__=201706L -else -# sanitisers only work for non-musl builds -CC_SAN := -fsanitize=address -fsanitize=leak -fsanitize=undefined -fsanitize-address-use-after-scope -endif - -test: CFLAGS := -ggdb -std=c11 -pipe $(CC_WARN) $(CC_EXTRA) -fno-omit-frame-pointer $(CC_SAN) -flto-test: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA) -flto -march=native -mtune=native -tools: CFLAGS := -s -O2 -pipe -std=c11 $(CC_WARN) $(CC_EXTRA) - -# str library source files -SRC := str.c str.h str_test.c - -# all -.PHONY: all -all: tools test flto-test - -.PHONY: clean -clean: clean-test clean-tools - -# test -test: $(SRC) - $(CC) $(CFLAGS) -o $@ $(filter %.c,$^) - ./$@ - -flto-test: $(SRC) - $(CC) $(CFLAGS) -o $@ $(filter %.c,$^) - ./$@ - -.PHONY: clean-test -clean-test: - rm -f test flto-test - -# tools -GEN_CHAR_CLASS := tools/gen-char-class - -.PHONY: tools -tools: $(GEN_CHAR_CLASS) - -# gen-char-class -$(GEN_CHAR_CLASS): tools/gen_char_class.c - $(CC) $(CFLAGS) -o $@ $(filter %.c,$^) - -.PHONY: clean-tools -clean-tools: - rm -f $(GEN_CHAR_CLASS) diff --git a/3rd/str/README.md b/3rd/str/README.md deleted file mode 100644 index 2ea794f..0000000 --- a/3rd/str/README.md +++ /dev/null @@ -1,440 +0,0 @@ -# str: yet another string library for C language. - -[![License: BSD 3 Clause](https://img.shields.io/badge/License-BSD_3--Clause-yellow.svg)](https://opensource.org/licenses/BSD-3-Clause) - -## Motivation - -Bored with developing the same functionality over and over again, unsatisfied -with existing libraries, so decided to make the right one, once and forever. ๐Ÿ™‚ - -## Features - -* Handles both C and binary strings; -* Light-weight references to strings: cheap to create, copy, or pass by value; -* Support for copy and move semantics, although not enforceable by the C language; -* String composition functions writing to memory, file descriptors, or file streams; -* Can be compiled using `gcc` or `clang`, and linked with `libc` or `musl`. - -## Installation -Just clone the project and copy (or symlink) the files `str.h` and `str.c` into your project, -but please respect the [license](LICENSE). - -## Code Examples - -String composition: - -```C -str s = str_null; - -str_join(&s, str_lit(", "), - str_lit("Here"), - str_lit("there"), - str_lit("and everywhere")); - -str_cat(&s, s, str_lit("...")); - -assert(str_eq(s, str_lit("Here, there, and everywhere..."))); -str_free(s); -``` - -Same as above, but writing to a file: - -```C -FILE* const stream = fopen(...); - -int err = str_join(stream, str_lit(", "), - str_lit("Here"), - str_lit("there"), - str_lit("and everywhere...")); - -if(err != 0) { /* handle the error */ } -``` - -[Discussion](https://news.ycombinator.com/item?id=25212864) on Hacker News. - -## User Guide - -_**Disclaimer:** This is the good old C language, not C++ or Rust, so nothing can be enforced -on the language level, and certain discipline is required to make sure there is no corrupt -or leaked memory resulting from using this library._ - -A string is represented by the type `str` that maintains a pointer to some memory containing the -actual string, and the length of the string. Objects of type `str` are small enough (a struct -of a `const char*` and a `size_t`) to be cheap to create, copy (pass by value), and move. The -`str` structure should be treated as opaque (i.e., do not attempt to directly access or modify -the fields in this structure). The strings are assumed to be immutable, like those in Java or -Go, but only by means of `const char*` pointers, so it is actually possible to modify such a -string, although the required type cast to `char*` offers at least some (mostly psychological) -protection from changing the string by mistake. - -This library focusses only on handling strings, not gradually composing them like -[StringBuffer](https://docs.oracle.com/javase/7/docs/api/java/lang/StringBuffer.html) -class in Java. - -All string objects must be initialised before use. Uninitialised objects will cause -undefined behaviour. Use the provided constructors, or `str_null` for empty strings. - -There are two kinds of `str` objects: those actually owning the memory they point to, and -non-owning references. This property can be queried using `str_is_owner` and `str_is_ref` -functions, otherwise such objects are indistinguishable. - -Non-owning string objects are safe to copy and assign to each other, as long as the memory -they refer to is valid. They do not need to be freed. `str_free` is a no-op for reference -objects. A reference object can be cheaply created from a C string, a string literal, -or from a range of bytes. - -Owning objects require special treatment, in particular: -* It is a good idea to have only one owning object per each allocated string, but such -a string can have many references to its underlying string, as long as those references do not -outlive the owning object. -Sometimes this rule may be relaxed for code clarity, like in the above example where -the owning object is passed directly to a function, but only if the function does not -store or release the object. When in doubt pass such an object via `str_ref`. -* Direct assignments (like `s2 = s1;`) to owning objects will certainly leak memory, use -`str_assign` function instead. In fact, this function can assign to any string object, -owning or not, so it can be used everywhere, just to avoid any doubt. -* There is no automatic memory management in C, so every owning object must be released at -some point using either `str_free` or `str_clear` function. String objects on the stack -can also be declared as `str_auto` (or `const str_auto`) for automatic cleanup when the variable -goes out of scope. -* An owning object can be moved to another location by using `str_move` function. The -function resets its source object to an empty string. -* Object ownership can be passed over to another object by using `str_pass` function. The -function sets its source to a non-owning reference to the original string. - -It is technically possible to create a reference to a string that is not -null-terminated. The library accepts strings without null-terminators, but every new string -allocated by the library is guaranteed to be null-terminated. - -### String Construction - -A string object can be constructed form any C string, string literal, or a range of bytes. -The provided constructors are computationally cheap to apply. Depending on the constructor, -the new object can either own the actual string it refers to, or be a non-owning reference. -Constructors themselves do not allocate any memory. Importantly, constructors are the only -functions in this library that return a string object, while others only assign their results -through a pointer to a pre-existing string. This makes constructors suitable for initialisation -of new string objects. In all other situations one should combine construction with assignment, -for example:
-`str_assign(&dest, str_acquire_chars(buff, n));` - -### String Object Properties - -Querying a property of a string object (like the length of the string via `str_len`) is a -cheap operation. - -### Assigning, Moving, and Passing String Objects - -C language does not allow for operator overloading, so this library provides a function -`str_assign` that takes a string object and assigns it to the destination object, freeing -any memory owned by the destination. It is generally recommended to use this function -everywhere outside object initialisation. - -An existing object can be moved over to another location via `str_move` function. -The function resets the source object to `str_null` to guarantee the correct move semantics. -The value returned by `str_move` may be either used to initialise a new object, or -assigned to an existing object using `str_assign`. - -An existing object can also be passed over to another location via `str_pass` function. The function -sets the source object to be a non-owning reference to the original string, otherwise the semantics -and usage is the same as `str_move`. - -### String Composition and Generic Destination - -String composition [functions](#string-composition) can write their results to different -destinations, depending on the _type_ of their `dest` parameter: - -* `str*`: result is assigned to the string object; -* `int`: result is written to the file descriptor; -* `FILE*` result is written to the file stream. - -The composition functions return 0 on success, or the value of `errno` as retrieved at the point -of failure (including `ENOMEM` on memory allocation error). - -### Detailed Example - -Just to make things more clear, here is the same code as in the example above, but with comments: -```C -// declare a variable and initialise it with an empty string; could also be declared as "str_auto" -// to avoid explicit call to str_free() below. -str s = str_null; - -// join the given string literals around the separator (second parameter), -// storing the result in object "s" (first parameter); in this example we do not check -// the return values of the composition functions, thus ignoring memory allocation failures, -// which is probably not the best idea in general. -str_join(&s, str_lit(", "), - str_lit("Here"), - str_lit("there"), - str_lit("and everywhere")); - -// create a new string concatenating "s" and a literal; the function only modifies its -// destination object "s" after the result is computed, also freeing the destination -// before the assignment, so it is safe to use "s" as both a parameter and a destination. -// note: we pass a copy of the owning object "s" as the second parameter, and here it is -// safe to do so because this particular function does not modify its arguments. -str_cat(&s, s, str_lit("...")); - -// check that we have got the expected result -assert(str_eq(s, str_lit("Here, there, and everywhere..."))); - -// finally, free the memory allocated for the string -str_free(s); -``` - -There are some useful [code snippets](snippets.md) provided to assist with writing code using -this library. - -## API brief - -`typedef struct { ... } str;`
-The string object. - -#### String Properties - -`size_t str_len(const str s)`
-Returns the number of bytes in the string referenced by the object. - -`const char* str_ptr(const str s)`
-Returns a pointer to the first byte of the string referenced by the object. The pointer is never NULL. - -`const char* str_end(const str s)`
-Returns a pointer to the next byte past the end of the string referenced by the object. -The pointer is never NULL, but it is not guaranteed to point to any valid byte or location. -For C strings it points to the terminating null character. For any given string `s` the following -condition is always satisfied: `str_end(s) == str_ptr(s) + str_len(s)`. - -`bool str_is_empty(const str s)`
-Returns "true" for empty strings. - -`bool str_is_owner(const str s)`
-Returns "true" if the string object is the owner of the memory it references. - -`bool str_is_ref(const str s)`
-Returns "true" if the string object does not own the memory it references. - -#### String Construction - -`str_null`
-Empty string constant. - -`str str_lit(s)`
-Constructs a non-owning object from a string literal. Implemented as a macro. - -`str str_ref(s)`
-Constructs a non-owning object from either a null-terminated C string, or another `str` object. -Implemented as a macro. - -`str str_ref_chars(const char* const s, const size_t n)`
-Constructs a non-owning object referencing the given range of bytes. - -`str str_acquire_chars(const char* const s, const size_t n)`
-Constructs an owning object for the specified range of bytes. The pointer `s` should be safe -to pass to `free(3)` function. - -`str str_acquire(const char* const s)`
-Constructs an owning object from the given C string. The string should be safe to pass to -`free(3)` function. - -`str str_move(str* const ps)`
-Saves the given object to a temporary, resets the source object to `str_null`, and then -returns the saved object. - -`str str_pass(str* const ps)`
-Saves the given object to a temporary, sets the source object to be a non-owning reference to the -original string, and then returns the saved object. - -#### String Deallocation - -`void str_free(const str s)`
-Deallocates any memory held by the owning string object. No-op for references. After a call to -this function the string object is in unknown and unusable state. - -String objects on the stack can also be declared as `str_auto` instead of `str` to deallocate -any memory held by the string when the variable goes out of scope. - -#### String Modification - -`void str_assign(str* const ps, const str s)`
-Assigns the object `s` to the object pointed to by `ps`. Any memory owned by the target -object is freed before the assignment. - -`void str_clear(str* const ps)`
-Sets the target object to `str_null` after freeing any memory owned by the target. - -`void str_swap(str* const s1, str* const s2)`
-Swaps two string objects. - -`int str_from_file(str* const dest, const char* const file_name)`
-Reads the entire file (of up to 64MB by default, configurable via `STR_MAX_FILE_SIZE`) into -the destination string. Returns 0 on success, or the value of `errno` on error. - -#### String Comparison - -`int str_cmp(const str s1, const str s2)`
-Lexicographically compares the two string objects, with usual semantics. - -`bool str_eq(const str s1, const str s2)`
-Returns "true" if the two strings match exactly. - -`int str_cmp_ci(const str s1, const str s2)`
-Case-insensitive comparison of two strings, implemented using `strncasecmp(3)`. - -`bool str_eq_ci(const str s1, const str s2`
-Returns "true" is the two strings match case-insensitively. - -`bool str_has_prefix(const str s, const str prefix)`
-Tests if the given string `s` starts with the specified prefix. - -`bool str_has_suffix(const str s, const str suffix)`
-Tests if the given string `s` ends with the specified suffix. - -#### String Composition - -`int str_cpy(dest, const str src)`
-Copies the source string referenced by `src` to the -[generic](#string-composition-and-generic-destination) destination `dest`. Returns 0 on success, -or the value of `errno` on failure. - -`int str_cat_range(dest, const str* src, size_t count)`
-Concatenates `count` strings from the array starting at address `src`, and writes -the result to the [generic](#string-composition-and-generic-destination) destination `dest`. -Returns 0 on success, or the value of `errno` on failure. - -`int str_cat(dest, ...)`
-Concatenates a variable list of `str` arguments, and writes the result to the -[generic](#string-composition-and-generic-destination) destination `dest`. -Returns 0 on success, or the value of `errno` on failure. - -`int str_join_range(dest, const str sep, const str* src, size_t count)`
-Joins around `sep` the `count` strings from the array starting at address `src`, and writes -the result to the [generic](#string-composition-and-generic-destination) destination `dest`. -Returns 0 on success, or the value of `errno` on failure. - -`int str_join(dest, const str sep, ...)`
-Joins a variable list of `str` arguments around `sep` delimiter, and writes the result to the -[generic](#string-composition-and-generic-destination) destination `dest`. -Returns 0 on success, or the value of `errno` on failure. - -#### Searching and Sorting - -`bool str_partition(const str src, const str patt, str* const prefix, str* const suffix)`
-Splits the string `src` on the first match of `patt`, assigning a reference to the part -of the string before the match to the `prefix` object, and the part after the match to the -`suffix` object. Returns `true` if a match has been found, or `false` otherwise, also -setting `prefix` to reference the entire `src` string, and clearing the `suffix` object. -Empty pattern `patt` never matches. - -`void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count)`
-Sorts the given array of `str` objects using the given comparison function. A number -of typically used comparison functions is also provided: -* `str_order_asc` (ascending sort) -* `str_order_desc` (descending sort) -* `str_order_asc_ci` (ascending case-insensitive sort) -* `str_order_desc_ci` (descending case-insensitive sort) - -`const str* str_search_range(const str key, const str* const array, const size_t count)`
-Binary search for the given key. The input array must be sorted using `str_order_asc`. -Returns a pointer to the string matching the key, or NULL. - -`size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count)`
-Reorders the string objects in the given range in such a way that all elements for which -the predicate `pred` returns "true" precede the elements for which predicate `pred` -returns "false". Returns the number of preceding objects. - -`size_t str_unique_range(str* const array, const size_t count)`
-Reorders the string objects in the given range in such a way that there are two partitions: -one where each object is unique within the input range, and another partition with all the -remaining objects. The unique partition is stored at the beginning of the array, and is -sorted in ascending order, followed by the partition with all remaining objects. -Returns the number of unique objects. - -#### UNICODE support - -`for_each_codepoint(var_name, src_string)`
-A macro that expands to a loop iterating over the given string `src_string` (of type `str`) by UTF-32 -code points. On each iteration the variable `var_name` (of type `char32_t`) is assigned -the value of the next valid UTF-32 code point from the source string. Upon exit from the loop the -variable has one on the following values: -* `CPI_END_OF_STRING`: the iteration has reached the end of source string; -* `CPI_ERR_INCOMPLETE_SEQ`: an incomplete byte sequence has been detected; -* `CPI_ERR_INVALID_ENCODING`: an invalid byte sequence has been detected. - -The source string is expected to be encoded in the _current program locale_, as set by the most -recent call to `setlocale(3)`. - -Usage pattern: -```c -#include -... -str s = ... -... -char32_t c; // variable to receive UTF-32 values on each iteration - -for_each_codepoint(c, s) -{ - /* process c */ -} - -if(c != CPI_END_OF_STRING) -{ - /* handle error */ -} -``` - -#### Tokeniser - -Tokeniser interface provides functionality similar to `strtok(3)` function. The tokeniser -is fully re-entrant with no hidden state, and its input string is not modified while being -parsed. - -##### Typical usage: -```C -// declare and initialise tokeniser state -str_tok_state state; - -str_tok_init(&state, source_string, delimiter_set); - -// object to receive tokens -str token = str_null; - -// token iterator -while(str_tok(&token, &state)) -{ - /* process "token" */ -} -``` - -##### Tokeniser API - -`void str_tok_init(str_tok_state* const state, const str src, const str delim_set)`
-Initialises tokeniser state with the given source string and delimiter set. The delimiter set -is treated as bytes, _not_ as UNICODE code points encoded in UTF-8. - -`bool str_tok(str* const dest, str_tok_state* const state)`
-Retrieves the next token and stores it in the `dest` object. Returns `true` if the token has -been read, or `false` if the end of input has been reached. Retrieved token is always -a reference to a slice of the source string. - -`void str_tok_delim(str_tok_state* const state, const str delim_set)`
-Changes the delimiter set associated with the given tokeniser state. The delimiter set is -treated as bytes, _not_ as UNICODE code points encoded in UTF-8. - -## Tools - -All the tools are located in `tools/` directory. Currently, there are the following tools: - -* `file-to-str`: The script takes a file (text or binary) and a C variable name, and -writes to `stdout` C source code where the variable (of type `str`) is defined -and initialised with the content of the file. - -* `gen-char-class`: Generates character classification functions that do the same as their -`isw*()` counterparts under the current locale as specified by `LC_ALL` environment variable. -Run `tools/gen-char-class --help` for further details, or `tools/gen-char-class --space` -to see an example of its output. - -## Project Status -The library requires at least a C11 compiler. So far has been tested on Linux Mint versions -from 19.3 to 22.0, with `gcc` versions from 9.5.0 to 13.2.0 (with either `libc` or `musl`), -and `clang` versions up to 18.1.3; it is also reported to work on ALT Linux 9.1 for Elbrus, with -`lcc` version 1.25.09. diff --git a/3rd/str/snippets.md b/3rd/str/snippets.md deleted file mode 100644 index 8528890..0000000 --- a/3rd/str/snippets.md +++ /dev/null @@ -1,63 +0,0 @@ -### Code Examples - -Here I provide various (hopefully, useful) functions and code examples that are not included into the -main library. Some examples use non-POSIX and/or compiler-specific features that may or may -not be suitable for a particular project. Also, these snippets were tested while being developed, -but they may break in the future as the library evolves. - -##### `void str_sprintf(str* const dest, const char* fmt, ...)` - -Probably the simplest implementation utilising non-POSIX `asprintf(3)` function: -```C -#define _GNU_SOURCE - -#include "str.h" - -#define str_sprintf(dest, fmt, ...) \ -({ \ - char* ___p; \ - const int ___n = asprintf(&___p, (fmt), ##__VA_ARGS__); \ - str_assign((dest), str_acquire_chars(___p, ___n)); \ -}) -``` -This code does not check for errors. A more standard-conforming implementation would probably go -through `open_memstream(3)` function. - -##### `int str_from_int(str* const dest, const int val)` -```C -int str_from_int(str* const dest, const int val) -{ - char buff[256]; // of some "big enough" size - - return str_cpy(dest, str_ref_chars(buff, snprintf(buff, sizeof(buff), "%d", val))); -} -``` - -This code can also be used as a template for other functions converting from `double`, `struct tm`, etc. - -##### `int str_append(str* const dest, ...)` -```C -#define str_append(dest, ...) \ - ({ str* const ___p = (dest); str_cat(___p, *___p, ##__VA_ARGS__); }) -``` -Test case and usage example: -```C - str s = str_lit("zzz"); - - assert(str_append(&s, str_lit(" "), str_lit("aaa")) == 0); - assert(str_eq(s, str_lit("zzz aaa"))); - - str_free(s); -``` - -##### Using `str` objects with `printf` family of functions - -Since a string object is not guaranteed to refer to a null-terminated string it should be formatted -with explicitly specified length, for example: -```C - str s = ... - - printf("%.*s\n", (int)str_len(s), str_ptr(s)); -``` -_Note:_ The maximum length of the string is limited to `INT_MAX` bytes, and formatting will stop -at the first null byte within the string. diff --git a/3rd/str/str.c b/3rd/str/str.c deleted file mode 100644 index 37c9754..0000000 --- a/3rd/str/str.c +++ /dev/null @@ -1,839 +0,0 @@ -/* -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define _DEFAULT_SOURCE // for strncasecmp() -#define _XOPEN_SOURCE 500 // for IOV_MAX - -#include "str.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// append to destination and return the end pointer -static inline -void* mem_append(void* dest, const void* src, const size_t n) -{ - return memcpy(dest, src, n) + n; -} - -// string deallocation -void str_free(const str s) -{ - if(str_is_owner(s)) - free((void*)s.ptr); -} - -// version of str_free() for str_auto macro -void str_free_auto(const str* const ps) -{ - if(ps) - str_free(*ps); -} - -// memory allocation helpers -#define ALLOC(n) \ -({ \ - void* const ___p = malloc(n); \ - if(!___p) return ENOMEM; \ - ___p; \ -}) - -#define REALLOC(p, n) \ - ({ \ - void* const ___p = realloc((p), (n)); \ - if(!___p) return ENOMEM; \ - ___p; \ -}) - - -// errno checker -#define RETURN_ON_ERROR(expr) \ - while((expr) < 0) do { const int __err = errno; if(__err != EINTR) return __err; } while(0) - -// swap -void str_swap(str* const s1, str* const s2) -{ - const str tmp = *s1; - - *s1 = *s2; - *s2 = tmp; -} - -// empty string -const char* const str_empty_string = ""; - -// string comparison --------------------------------------------------------------------- -// compare two strings lexicographically -int str_cmp(const str s1, const str s2) -{ - const size_t n1 = str_len(s1), n2 = str_len(s2); - - // either string may be missing a null terminator, hence "memcmp" - const int res = memcmp(str_ptr(s1), str_ptr(s2), (n1 < n2) ? n1 : n2); - - if(res != 0 || n1 == n2) - return res; - - return (n1 < n2) ? -1 : 1; -} - -// case-insensitive comparison -int str_cmp_ci(const str s1, const str s2) -{ - const size_t n1 = str_len(s1), n2 = str_len(s2); - - // either string may be missing a null terminator, hence "strNcasecmp" - const int res = strncasecmp(str_ptr(s1), str_ptr(s2), (n1 < n2) ? n1 : n2); - - if(res != 0 || n1 == n2) - return res; - - return (n1 < n2) ? -1 : 1; -} - -// test for prefix -bool str_has_prefix(const str s, const str prefix) -{ - const size_t n = str_len(prefix); - - return (n == 0) - || (str_len(s) >= n && memcmp(str_ptr(s), str_ptr(prefix), n) == 0); -} - -// test for suffix -bool str_has_suffix(const str s, const str suffix) -{ - const size_t n = str_len(suffix); - - return (n == 0) - || (str_len(s) >= n && memcmp(str_end(s) - n, str_ptr(suffix), n) == 0); -} - -// string constructors ----------------------------------------------------------------- -// create a reference to the given range of chars -str str_ref_chars(const char* const s, const size_t n) -{ - return (s && n > 0) ? ((str){ s, str_ref_info(n) }) : str_null; -} - -str str_ref_from_ptr(const char* const s) -{ - return s ? str_ref_chars(s, strlen(s)) : str_null; -} - -// take ownership of the given range of chars -str str_acquire_chars(const char* const s, const size_t n) -{ - if(!s) - return str_null; - - if(n == 0) - { - free((void*)s); - return str_null; - } - - return (str){ s, str_owner_info(n) }; -} - -// take ownership of the given C string -str str_acquire(const char* const s) -{ - return s ? str_acquire_chars(s, strlen(s)) : str_null; -} - -// allocate a copy of the given string -int str_dup_impl(str* const dest, const str s) -{ - const size_t n = str_len(s); - - if(n == 0) - str_clear(dest); - else - { - char* const p = memcpy(ALLOC(n + 1), str_ptr(s), n); - - p[n] = 0; - str_assign(dest, str_acquire_chars(p, n)); - } - - return 0; -} - -#ifndef STR_MAX_FILE_SIZE -#define STR_MAX_FILE_SIZE (64 * 1024 * 1024 - 1) -#endif - -static -int get_file_size(const int fd, off_t* const size) -{ - // stat the file - struct stat info; - - RETURN_ON_ERROR(fstat(fd, &info)); - - *size = info.st_size; - - // only regular files are allowed - switch(info.st_mode & S_IFMT) - { - case S_IFREG: - return (info.st_size > STR_MAX_FILE_SIZE) ? EFBIG : 0; - case S_IFDIR: - return EISDIR; - default: - return EOPNOTSUPP; - } -} - -static -int read_from_fd(const int fd, void* p, off_t* const psize) -{ - const void* const end = p + *psize; - ssize_t n; - - do - { - RETURN_ON_ERROR(n = read(fd, p, end - p)); - - p += n; - } while(n > 0 && p < end); - - *psize -= end - p; - return 0; -} - -static -int read_from_fd_cont(const int fd, void* p, off_t* const psize) -{ - const void* end = p + *psize; - void *buf = p; - ssize_t n; - ssize_t nread = 0; - - do - { - RETURN_ON_ERROR(n = read(fd, p, end - p)); - - p += n; - nread += n; - - // pre-emptively realloc, even though we could potentially be at the end - if (p == end) { - *psize *= 2; - buf = REALLOC(buf, *psize); - p = buf + nread; - end = buf + *psize; - } - - } while(n > 0); - - return 0; -} - - -static -int str_from_fd(const int fd, const off_t size, str* const dest) -{ - if(size == 0) - { - str_clear(dest); - return 0; - } - - char* buff = ALLOC(size + 1); - off_t n = size; - const int err = read_from_fd(fd, buff, &n); - - if(err != 0) - { - free(buff); - return err; - } - - if(n == 0) - { - free(buff); - str_clear(dest); - return 0; - } - - if(n < size) - { - char* const p = realloc(buff, n + 1); - - if(!p) - { - free(buff); - return ENOMEM; - } - - buff = p; - } - - buff[n] = 0; - str_assign(dest, str_acquire_chars(buff, n)); - return 0; -} - -static -int str_from_stream_cont(const int fd, str* const dest, int *nmax) -{ - const size_t start_size = 8192; - - char* buff = ALLOC(start_size + 1); - off_t n = start_size; - const int err = read_from_fd_cont(fd, buff, &n); - - if(err != 0) - { - free(buff); - return err; - } - - if(n == 0) - { - free(buff); - str_clear(dest); - return 0; - } - - if(n < start_size) - { - char* const p = realloc(buff, n + 1); - - if(!p) - { - free(buff); - return ENOMEM; - } - - buff = p; - } - - buff[n] = '\0'; - str_assign(dest, str_acquire_chars(buff, n)); - return 0; -} - -int str_from_file(str* const dest, const char* const file_name) -{ - int fd; - - RETURN_ON_ERROR(fd = open(file_name, O_CLOEXEC | O_RDONLY)); - - off_t size = 0; - int err = get_file_size(fd, &size); - - if(err == 0) - err = str_from_fd(fd, size, dest); - - close(fd); - return err; -} - -int str_from_stream(str* const dest, const char* const file_name, int *nread) -{ - int fd; - - RETURN_ON_ERROR(fd = open(file_name, O_CLOEXEC | O_RDONLY)); - - int nmax = nread ? *nread : 0; - - off_t chunk_size = 4096; - int err = 0; - if (nmax == 0) - err = str_from_stream_cont(fd, dest, &nmax); - else - err = str_from_fd(fd, nmax, dest); - - if (nread) - *nread = nmax; - - close(fd); - return err; -} - -// string composition ----------------------------------------------------------------------- -// append string -static inline -char* append_str(char* p, const str s) -{ - return mem_append(p, str_ptr(s), str_len(s)); -} - -static -size_t total_length(const str* src, size_t count) -{ - size_t sum = 0; - - for(; count > 0; --count) - sum += str_len(*src++); - - return sum; -} - -// concatenate strings -int str_cat_range_impl(str* const dest, const str* src, size_t count) -{ - if(!src) - { - str_clear(dest); - return 0; - } - - // calculate total length - const size_t num = total_length(src, count); - - if(num == 0) - { - str_clear(dest); - return 0; - } - - // allocate - char* const buff = ALLOC(num + 1); - - // copy bytes - char* p = buff; - - for(; count > 0; --count) - p = append_str(p, *src++); - - // null-terminate and assign - *p = 0; - str_assign(dest, str_acquire_chars(buff, num)); - return 0; -} - -// writing to file descriptor -int str_cpy_to_fd(const int fd, const str s) -{ - size_t n = str_len(s); - const void* p = str_ptr(s); - - while(n > 0) - { - ssize_t m; - - RETURN_ON_ERROR(m = write(fd, p, n)); - - n -= m; - p += m; - } - - return 0; -} - -// writing to byte stream -int str_cpy_to_stream(FILE* const stream, const str s) -{ - const size_t n = str_len(s); - - return (n > 0 && fwrite(str_ptr(s), 1, n, stream) < n) ? EIO : 0; -} - -// write iovec -static -int write_iovec(const int fd, struct iovec* pv, unsigned nv) -{ - while(nv > 0) - { - ssize_t n; - - RETURN_ON_ERROR(n = writev(fd, pv, nv)); - - // discard items already written - for(; nv > 0; ++pv, --nv) - { - if(n < (ssize_t)pv->iov_len) - { - pv->iov_base += n; - pv->iov_len -= n; - break; - } - - n -= (ssize_t)pv->iov_len; - } - } - - return 0; -} - -// concatenate to file descriptor -static -struct iovec* vec_append(struct iovec* const pv, const str s) -{ - *pv = (struct iovec){ (void*)str_ptr(s), str_len(s) }; - - return pv + 1; -} - -static -struct iovec* vec_append_nonempty(struct iovec* const pv, const str s) -{ - return str_is_empty(s) ? pv : vec_append(pv, s); -} - -int str_cat_range_to_fd(const int fd, const str* src, size_t count) -{ - if(!src) - return 0; - - struct iovec v[IOV_MAX]; - - while(count > 0) - { - struct iovec* p = vec_append_nonempty(v, *src++); - - while(--count > 0 && p < v + IOV_MAX) - p = vec_append_nonempty(p, *src++); - - const size_t n = p - v; - - if(n == 0) - break; - - const int ret = write_iovec(fd, v, n); - - if(ret != 0) - return ret; - } - - return 0; -} - -int str_cat_range_to_stream(FILE* const stream, const str* src, size_t count) -{ - if(!src) - return 0; - - int err = 0; - - for(; count > 0 && err == 0; --count) - err = str_cpy(stream, *src++); - - return err; -} - -// join strings -int str_join_range_impl(str* const dest, const str sep, const str* src, size_t count) -{ - // test for simple cases - if(str_is_empty(sep)) - return str_cat_range(dest, src, count); - - if(!src || count == 0) - { - str_clear(dest); - return 0; - } - - if(count == 1) - return str_cpy(dest, *src); - - // calculate total length - const size_t num = total_length(src, count) + str_len(sep) * (count - 1); - - // allocate - char* const buff = ALLOC(num + 1); - - // copy bytes - char* p = append_str(buff, *src++); - - while(--count > 0) - p = append_str(append_str(p, sep), *src++); - - // null-terminate and assign - *p = 0; - str_assign(dest, str_acquire_chars(buff, num)); - return 0; -} - -int str_join_range_to_fd(const int fd, const str sep, const str* src, size_t count) -{ - if(str_is_empty(sep)) - return str_cat_range(fd, src, count); - - if(!src || count == 0) - return 0; - - if(count == 1) - return str_cpy(fd, *src); - - struct iovec v[IOV_MAX]; - - struct iovec* p = vec_append_nonempty(v, *src++); - - for(--count; count > 0; p = v) - { - p = vec_append_nonempty(vec_append(p, sep), *src++); - - while(--count > 0 && p < v + IOV_MAX - 1) - p = vec_append_nonempty(vec_append(p, sep), *src++); - - const size_t n = p - v; - - if(n == 0) - break; - - const int ret = write_iovec(fd, v, n); - - if(ret != 0) - return ret; - } - - return 0; -} - -int str_join_range_to_stream(FILE* const stream, const str sep, const str* src, size_t count) -{ - if(str_is_empty(sep)) - return str_cat_range(stream, src, count); - - if(!src || count == 0) - return 0; - - int err = str_cpy(stream, *src++); - - while(--count > 0 && err == 0) - err = str_cat(stream, sep, *src++); - - return err; -} - -// searching and sorting -------------------------------------------------------------------- -// string partitioning -bool str_partition(const str src, const str patt, str* const prefix, str* const suffix) -{ - const size_t patt_len = str_len(patt); - - if(patt_len > 0 && !str_is_empty(src)) - { - const char* s = memmem(str_ptr(src), str_len(src), str_ptr(patt), patt_len); - - if(s) - { - if(prefix) - str_assign(prefix, str_ref_chars(str_ptr(src), s - str_ptr(src))); - - if(suffix) - { - s += patt_len; - str_assign(suffix, str_ref_chars(s, str_end(src) - s)); - } - - return true; - } - } - - if(prefix) - str_assign(prefix, str_ref(src)); - - if(suffix) - str_clear(suffix); - - return false; -} - -// comparison functions -int str_order_asc(const void* const s1, const void* const s2) -{ - return str_cmp(*(const str*)s1, *(const str*)s2); -} - -int str_order_desc(const void* const s1, const void* const s2) -{ - return -str_cmp(*(const str*)s1, *(const str*)s2); -} - -int str_order_asc_ci(const void* const s1, const void* const s2) -{ - return str_cmp_ci(*(const str*)s1, *(const str*)s2); -} - -int str_order_desc_ci(const void* const s1, const void* const s2) -{ - return -str_cmp_ci(*(const str*)s1, *(const str*)s2); -} - -// sorting -void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count) -{ - if(array && count > 1) - qsort(array, count, sizeof(array[0]), cmp); -} - -// searching -const str* str_search_range(const str key, const str* const array, const size_t count) -{ - if(!array || count == 0) - return NULL; - - if(count == 1) - return str_eq(key, array[0]) ? array : NULL; - - return bsearch(&key, array, count, sizeof(str), str_order_asc); -} - -// partitioning -size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count) -{ - if(!array) - return 0; - - const str* const end = array + count; - str* p = array; - - while(p < end && pred(*p)) - ++p; - - for(str* s = p + 1; s < end; ++s) - if(pred(*s)) - str_swap(p++, s); - - return p - array; -} - -// unique partitioning -size_t str_unique_range(str* const array, const size_t count) -{ - if(!array || count == 0) - return 0; - - if(count == 1) - return 1; - - str_sort_range(str_order_asc, array, count); - - const str* const end = array + count; - str* p = array; - - for(str* s = array + 1; s < end; ++s) - if(!str_eq(*p, *s) && (++p < s)) - str_swap(p, s); - - return p + 1 - array; -} - -// string iterator function -#ifdef __STDC_UTF_32__ - -char32_t str_cp_iterator_next(str_cp_iterator* const it) -{ - if(it->curr >= it->end) - return CPI_END_OF_STRING; - - char32_t c; - const size_t n = mbrtoc32(&c, it->curr, it->end - it->curr, &it->state); - - switch(n) // see https://en.cppreference.com/w/c/string/multibyte/mbrtoc32 - { - case 0: // null character (U+0000) is allowed - ++it->curr; - return 0; - case (size_t)-1: // encoding error - case (size_t)-3: // surrogate pair detected - return CPI_ERR_INVALID_ENCODING; - case (size_t)-2: // incomplete sequence - return CPI_ERR_INCOMPLETE_SEQ; - default: // ok - it->curr += n; - return c; - } -} - -#endif // ifdef __STDC_UTF_32__ - -// tokeniser -static inline -bool is_delim(const str_tok_state* const state, const char c) -{ - return state->bits[(unsigned char)c >> 3] & (1 << (c & 0x7)); -} - -static inline -void set_bit(str_tok_state* const state, const char c) -{ - state->bits[(unsigned char)c >> 3] |= (1 << (c & 0x7)); -} - -void str_tok_delim(str_tok_state* const state, const str delim_set) -{ - memset(state->bits, 0, sizeof(state->bits)); - - const char* const end = str_end(delim_set); - - for(const char* s = str_ptr(delim_set); s < end; ++s) - set_bit(state, *s); -} - -void str_tok_init(str_tok_state* const state, const str src, const str delim_set) -{ - state->src = str_ptr(src); - state->end = str_end(src); - - str_tok_delim(state, delim_set); -} - -bool str_tok(str* const dest, str_tok_state* const state) -{ - // token start - const char* begin = state->src; - - while(begin < state->end && is_delim(state, *begin)) - ++begin; - - if(begin == state->end) - { - str_clear(dest); - return false; - } - - // token end - const char* end = begin + 1; - - while(end < state->end && !is_delim(state, *end)) - ++end; - - state->src = end; - str_assign(dest, str_ref_chars(begin, end - begin)); - - return true; -} diff --git a/3rd/str/str.h b/3rd/str/str.h deleted file mode 100644 index 55708fd..0000000 --- a/3rd/str/str.h +++ /dev/null @@ -1,296 +0,0 @@ -/* -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// string type ---------------------------------------------------------------------------- -typedef struct -{ - const char* ptr; - size_t info; -} str; - -// NULL string -#define str_null ((str){ 0, 0 }) - -// helper macros -#define str_ref_info(n) ((n) << 1) -#define str_owner_info(n) (str_ref_info(n) | 1) - -// string properties ---------------------------------------------------------------------- -// length of the string -static inline -size_t str_len(const str s) { return s.info >> 1; } - -// pointer to the string -static inline -const char* str_ptr(const str s) -{ - extern const char* const str_empty_string; - - return s.ptr ? s.ptr : str_empty_string; -} - -// end of the string -static inline -const char* str_end(const str s) { return str_ptr(s) + str_len(s); } - -// test if the string is empty -static inline -bool str_is_empty(const str s) { return str_len(s) == 0; } - -// test if the string is allocated on the heap -static inline -bool str_is_owner(const str s) { return (s.info & 1) != 0; } - -// test if the string is a reference -static inline -bool str_is_ref(const str s) { return !str_is_owner(s); } - -// string memory control ------------------------------------------------------------------- -// free memory allocated for the string -void str_free(const str s); - -// automatic cleanup -void str_free_auto(const str* const ps); - -#define str_auto str __attribute__((cleanup(str_free_auto))) - -// string movements ----------------------------------------------------------------------- -// free target string, then assign the new value to it -static inline -void str_assign(str* const ps, const str s) { str_free(*ps); *ps = s; } - -// move the string, resetting the source to str_null -static inline -str str_move(str* const ps) { const str t = *ps; *ps = str_null; return t; } - -// pass ownership of the string -static inline -str str_pass(str* const ps) { const str t = *ps; ps->info &= ~(size_t)1; return t; } - -// swap two string objects -void str_swap(str* const s1, str* const s2); - -// string helpers -------------------------------------------------------------------------- -// reset the string to str_null -static inline -void str_clear(str* const ps) { str_assign(ps, str_null); } - -// compare two strings lexicographically -int str_cmp(const str s1, const str s2); - -// test if two strings match -static inline -bool str_eq(const str s1, const str s2) { return str_cmp(s1, s2) == 0; } - -// case-insensitive comparison -int str_cmp_ci(const str s1, const str s2); - -// case-insensitive match -static inline -bool str_eq_ci(const str s1, const str s2) { return str_cmp_ci(s1, s2) == 0; } - -// test for prefix -bool str_has_prefix(const str s, const str prefix); - -// test for suffix -bool str_has_suffix(const str s, const str suffix); - -// string composition ------------------------------------------------------------------ -// implementation helpers -int str_dup_impl(str* const dest, const str s); -int str_cpy_to_fd(const int fd, const str s); -int str_cpy_to_stream(FILE* const stream, const str s); - -// copy string -#define str_cpy(dest, src) \ - _Generic((dest), \ - str*: str_dup_impl, \ - int: str_cpy_to_fd, \ - FILE*: str_cpy_to_stream \ - )((dest), (src)) - -// implementation helpers -int str_cat_range_impl(str* const dest, const str* src, size_t count); -int str_cat_range_to_fd(const int fd, const str* src, size_t count); -int str_cat_range_to_stream(FILE* const stream, const str* src, size_t count); - -// concatenate range of strings -#define str_cat_range(dest, src, count) \ - _Generic((dest), \ - str*: str_cat_range_impl, \ - int: str_cat_range_to_fd, \ - FILE*: str_cat_range_to_stream \ - )((dest), (src), (count)) - -// concatenate string arguments -#define str_cat(dest, ...) \ -({ \ - const str args[] = { __VA_ARGS__ }; \ - str_cat_range((dest), args, sizeof(args)/sizeof(args[0])); \ -}) - -// implementation helpers -int str_join_range_impl(str* const dest, const str sep, const str* src, size_t count); -int str_join_range_to_fd(const int fd, const str sep, const str* src, size_t count); -int str_join_range_to_stream(FILE* const stream, const str sep, const str* src, size_t count); - -// join strings around the separator -#define str_join_range(dest, sep, src, count) \ - _Generic((dest), \ - str*: str_join_range_impl, \ - int: str_join_range_to_fd, \ - FILE*: str_join_range_to_stream \ - )((dest), (sep), (src), (count)) - -// join string arguments around the separator -#define str_join(dest, sep, ...) \ -({ \ - const str args[] = { __VA_ARGS__ }; \ - str_join_range((dest), (sep), args, sizeof(args)/sizeof(args[0])); \ -}) - -// constructors ---------------------------------------------------------------------------- -// string reference from a string literal -#define str_lit(s) ((str){ "" s, str_ref_info(sizeof(s) - 1) }) - -static inline -str str_ref_impl(const str s) { return (str){ s.ptr, s.info & ~(size_t)1 }; } - -str str_ref_from_ptr(const char* const s); - -// string reference from anything -#define str_ref(s) \ - _Generic((s), \ - str: str_ref_impl, \ - char*: str_ref_from_ptr, \ - const char*: str_ref_from_ptr \ - )(s) - -// create a reference to the given range of chars -str str_ref_chars(const char* const s, const size_t n); - -// take ownership of the given range of chars -str str_acquire_chars(const char* const s, const size_t n); - -// take ownership of the given string -str str_acquire(const char* const s); - -// string from file -int str_from_file(str* const dest, const char* const file_name); - -// read maximum nread bytes from file, write bytes read. 0 reads until EOS. -int str_from_stream(str* const dest, const char* const file_name, int *nread); - -// searching and sorting -------------------------------------------------------------------- -// string partitioning (substring search) -bool str_partition(const str src, const str patt, str* const prefix, str* const suffix); - -// comparison functions -typedef int (*str_cmp_func)(const void*, const void*); - -int str_order_asc(const void* const s1, const void* const s2); -int str_order_desc(const void* const s1, const void* const s2); -int str_order_asc_ci(const void* const s1, const void* const s2); -int str_order_desc_ci(const void* const s1, const void* const s2); - -// sort array of strings -void str_sort_range(const str_cmp_func cmp, str* const array, const size_t count); - -// searching -const str* str_search_range(const str key, const str* const array, const size_t count); - -// partitioning -size_t str_partition_range(bool (*pred)(const str), str* const array, const size_t count); - -// unique partitioning -size_t str_unique_range(str* const array, const size_t count); - -// UTF-32 codepoint iterator ---------------------------------------------------------------- -#ifdef __STDC_UTF_32__ -#include - -// iterator -#define for_each_codepoint(var, src) \ - for_each_cp((var), (src), CAT1(inner_it_, __COUNTER__)) - -// iterator error codes -#define CPI_END_OF_STRING ((char32_t)-1) -#define CPI_ERR_INCOMPLETE_SEQ ((char32_t)-2) -#define CPI_ERR_INVALID_ENCODING ((char32_t)-3) - -// implementation -#define for_each_cp(var, src, it) \ - for(str_cp_iterator it = str_make_cp_iterator(src); (var = str_cp_iterator_next(&it)) <= 0x10FFFFu;) - -#define CAT1(x, y) CAT2(x, y) -#define CAT2(x, y) x ## y - -typedef struct -{ - const char* curr; - const char* const end; - mbstate_t state; -} str_cp_iterator; - -static inline -str_cp_iterator str_make_cp_iterator(const str s) -{ - return (str_cp_iterator){ .curr = str_ptr(s), .end = str_end(s) }; -} - -char32_t str_cp_iterator_next(str_cp_iterator* const it); - -#endif // ifdef __STDC_UTF_32__ - -// tokeniser -------------------------------------------------------------------------------- -typedef struct -{ - unsigned char bits[32]; // 256 / 8 - const char *src, *end; -} str_tok_state; - -void str_tok_init(str_tok_state* const state, const str src, const str delim_set); -bool str_tok(str* const dest, str_tok_state* const state); -void str_tok_delim(str_tok_state* const state, const str delim_set); - -#ifdef __cplusplus -} -#endif diff --git a/3rd/str/str_test.c b/3rd/str/str_test.c deleted file mode 100644 index f877916..0000000 --- a/3rd/str/str_test.c +++ /dev/null @@ -1,907 +0,0 @@ -/* -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define _POSIX_C_SOURCE 200809L - -#include "str.h" - -#include -#include -#include -#include -#include -#include - -// make sure assert is always enabled -#ifdef NDEBUG -#undef NDEBUG -#endif - -#include - -#define passed printf("passed: %s\n", __func__) - -static -void test_str_lit(void) -{ - const str s = str_lit("ZZZ"); - - assert(str_len(s) == 3); - assert(str_is_ref(s)); - assert(!str_is_owner(s)); - assert(str_eq(s, str_lit("ZZZ"))); - - passed; -} - -static -void test_str_cpy(void) -{ - str_auto s = str_null; - - assert(str_cpy(&s, str_lit("ZZZ")) == 0); - - assert(str_len(s) == 3); - assert(!str_is_ref(s)); - assert(str_is_owner(s)); - assert(str_eq(s, str_lit("ZZZ"))); - assert(*str_end(s) == 0); - - passed; -} - -static -void test_str_clear(void) -{ - str s = str_null; - - assert(str_cpy(&s, str_lit("ZZZ")) == 0); - - assert(str_len(s) == 3); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - str_clear(&s); - - assert(str_is_empty(s)); - assert(str_is_ref(s)); - - passed; -} - -static -void test_str_move(void) -{ - str s1 = str_null; - - assert(str_cpy(&s1, str_lit("ZZZ")) == 0); - - str s2 = str_move(&s1); - - assert(str_is_empty(s1)); - assert(str_is_ref(s1)); - - assert(str_is_owner(s2)); - assert(str_eq(s2, str_lit("ZZZ"))); - - str_free(s2); - passed; -} - -static -void test_str_pass(void) -{ - str s1 = str_null; - - assert(str_cpy(&s1, str_lit("ZZZ")) == 0); - - str s2 = str_pass(&s1); - - assert(str_is_ref(s1)); - assert(str_eq(s1, str_lit("ZZZ"))); - - assert(str_is_owner(s2)); - assert(str_eq(s2, str_lit("ZZZ"))); - - str_free(s2); - passed; -} - -static -void test_str_ref(void) -{ - str s = str_ref("ZZZ"); - - assert(str_len(s) == 3); - assert(str_is_ref(s)); - - s = str_ref(s); - - assert(str_is_ref(s)); - assert(str_eq(s, str_lit("ZZZ"))); - - const char* const p = "ZZZ"; - - s = str_ref(p); - - assert(str_is_ref(s)); - assert(str_eq(s, str_lit("ZZZ"))); - - passed; -} - -static -void test_str_cmp(void) -{ - const str_auto s = str_lit("zzz"); - - assert(str_cmp(s, s) == 0); - assert(str_cmp(s, str_lit("zzz")) == 0); - assert(str_cmp(s, str_lit("zz")) > 0); - assert(str_cmp(s, str_lit("zzzz")) < 0); - assert(str_cmp(s, str_null) > 0); - assert(str_cmp(str_null, s) < 0); - assert(str_cmp(str_null, str_null) == 0); - assert(str_eq(s, str_lit("zzz"))); - - passed; -} - -static -void test_str_cmp_ci(void) -{ - const str s = str_lit("zzz"); - - assert(str_cmp_ci(s, s) == 0); - assert(str_cmp_ci(s, str_lit("zzz")) == 0); - assert(str_cmp_ci(s, str_lit("zz")) > 0); - assert(str_cmp_ci(s, str_lit("zzzz")) < 0); - assert(str_cmp_ci(s, str_null) > 0); - assert(str_cmp_ci(str_null, s) < 0); - assert(str_cmp_ci(str_null, str_null) == 0); - assert(str_cmp_ci(s, str_lit("ZZZ")) == 0); - assert(str_cmp_ci(s, str_lit("ZZ")) > 0); - assert(str_cmp_ci(s, str_lit("ZZZZ")) < 0); - assert(str_eq_ci(s, str_lit("ZZZ"))); - - passed; -} - -static -void test_str_acquire(void) -{ - str_auto s = str_acquire(strdup("ZZZ")); - - assert(str_is_owner(s)); - assert(str_eq(s, str_lit("ZZZ"))); - assert(*str_end(s) == 0); - - passed; -} - -static -void test_str_cat(void) -{ - str s = str_null; - - assert(str_cat(&s, str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0); - - assert(str_eq(s, str_lit("AAABBBCCC"))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - assert(str_cat(&s, str_null, str_null, str_null) == 0); // this simply clears the target string - - assert(str_is_empty(s)); - assert(str_is_ref(s)); - - passed; -} - -static -void test_str_join(void) -{ - str s = str_null; - - assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_lit("CCC")) == 0); - - assert(str_eq(s, str_lit("AAA_BBB_CCC"))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - assert(str_join(&s, str_lit("_"), str_null, str_lit("BBB"), str_lit("CCC")) == 0); - - assert(str_eq(s, str_lit("_BBB_CCC"))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_null, str_lit("CCC")) == 0); - - assert(str_eq(s, str_lit("AAA__CCC"))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - assert(str_join(&s, str_lit("_"), str_lit("AAA"), str_lit("BBB"), str_null) == 0); - - assert(str_eq(s, str_lit("AAA_BBB_"))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - assert(str_join(&s, str_lit("_"), str_null, str_null, str_null) == 0); - - assert(str_eq(s, str_lit("__"))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - assert(str_join(&s, str_null) == 0); // this simply clears the target string - - assert(str_is_empty(s)); - assert(str_is_ref(s)); - - passed; -} - -static -void test_composition(void) -{ - str_auto s = str_lit(", "); - - assert(str_join(&s, s, str_lit("Here"), str_lit("there"), str_lit("and everywhere")) == 0); - assert(str_cat(&s, s, str_lit("...")) == 0); - - assert(str_eq(s, str_lit("Here, there, and everywhere..."))); - assert(str_is_owner(s)); - assert(*str_end(s) == 0); - - passed; -} - -static -void test_sort(void) -{ - str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") }; - - str_sort_range(str_order_asc, src, sizeof(src)/sizeof(src[0])); - - assert(str_eq(src[0], str_lit("aaa"))); - assert(str_eq(src[1], str_lit("bbb"))); - assert(str_eq(src[2], str_lit("z"))); - assert(str_eq(src[3], str_lit("zzz"))); - - str_sort_range(str_order_desc, src, sizeof(src)/sizeof(src[0])); - - assert(str_eq(src[0], str_lit("zzz"))); - assert(str_eq(src[1], str_lit("z"))); - assert(str_eq(src[2], str_lit("bbb"))); - assert(str_eq(src[3], str_lit("aaa"))); - - passed; -} - -static -void test_sort_ci(void) -{ - str src[] = { str_lit("ZZZ"), str_lit("zzz"), str_lit("aaa"), str_lit("AAA") }; - - str_sort_range(str_order_asc_ci, src, sizeof(src)/sizeof(src[0])); - - assert(str_eq_ci(src[0], str_lit("aaa"))); - assert(str_eq_ci(src[1], str_lit("aaa"))); - assert(str_eq_ci(src[2], str_lit("zzz"))); - assert(str_eq_ci(src[3], str_lit("zzz"))); - - str_sort_range(str_order_desc_ci, src, sizeof(src)/sizeof(src[0])); - - assert(str_eq_ci(src[0], str_lit("zzz"))); - assert(str_eq_ci(src[1], str_lit("zzz"))); - assert(str_eq_ci(src[2], str_lit("aaa"))); - assert(str_eq_ci(src[3], str_lit("aaa"))); - - passed; -} - -static -void test_search(void) -{ - str src[] = { str_lit("z"), str_lit("zzz"), str_lit("aaa"), str_lit("bbb") }; - const size_t count = sizeof(src)/sizeof(src[0]); - - str_sort_range(str_order_asc, src, count); - - assert(str_search_range(src[0], src, count) == &src[0]); - assert(str_search_range(src[1], src, count) == &src[1]); - assert(str_search_range(src[2], src, count) == &src[2]); - assert(str_search_range(src[3], src, count) == &src[3]); - assert(str_search_range(str_lit("xxx"), src, count) == NULL); - - passed; -} - -static -void test_prefix(void) -{ - const str s = str_lit("abcd"); - - assert(str_has_prefix(s, str_null)); - assert(str_has_prefix(s, str_lit("a"))); - assert(str_has_prefix(s, str_lit("ab"))); - assert(str_has_prefix(s, str_lit("abc"))); - assert(str_has_prefix(s, str_lit("abcd"))); - - assert(!str_has_prefix(s, str_lit("zzz"))); - assert(!str_has_prefix(s, str_lit("abcde"))); - - passed; -} - -static -void test_suffix(void) -{ - const str s = str_lit("abcd"); - - assert(str_has_suffix(s, str_null)); - assert(str_has_suffix(s, str_lit("d"))); - assert(str_has_suffix(s, str_lit("cd"))); - assert(str_has_suffix(s, str_lit("bcd"))); - assert(str_has_suffix(s, str_lit("abcd"))); - - assert(!str_has_suffix(s, str_lit("zzz"))); - assert(!str_has_suffix(s, str_lit("_abcd"))); - - passed; -} - -static -void test_cpy_to_fd(void) -{ - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_cpy(fileno(tmp), str_lit("ZZZ")) == 0); - - rewind(tmp); - - char buff[32]; - - assert(fread(buff, 1, sizeof(buff), tmp) == 3); - assert(memcmp(buff, "ZZZ", 3) == 0); - - fclose(tmp); - passed; -} - -static -void test_cpy_to_stream(void) -{ - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_cpy(tmp, str_lit("ZZZ")) == 0); - - assert(fflush(tmp) == 0); - rewind(tmp); - - char buff[32]; - - assert(fread(buff, 1, sizeof(buff), tmp) == 3); - assert(memcmp(buff, "ZZZ", 3) == 0); - - fclose(tmp); - passed; -} - -static -void test_cat_range_to_fd(void) -{ - const str src[] = { - str_lit("aaa"), - str_lit("bbb"), - str_null, - str_lit("ccc"), - str_lit("ddd"), - str_null, - str_null - }; - - const size_t num_items = sizeof(src)/sizeof(src[0]); - - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_cat_range(fileno(tmp), src, num_items) == 0); - - rewind(tmp); - - const char res[] = "aaabbbcccddd"; - const size_t len = sizeof(res) - 1; - char buff[32]; - - assert(fread(buff, 1, sizeof(buff), tmp) == len); - assert(memcmp(buff, res, len) == 0); - - fclose(tmp); - passed; -} - -static -void test_cat_large_range_to_fd(void) -{ - // prepare data - const size_t n = 100000; - str* const src = calloc(n, sizeof(str)); - - assert(src != NULL); - - char buff[100]; - - for(unsigned i = 0; i < n; i++) - assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u\n", i))) == 0); - - // write to file - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_cat_range(fileno(tmp), src, n) == 0); - - // clear input data - for(unsigned i = 0; i < n; ++i) - str_free(src[i]); - - free(src); - - // validate - rewind(tmp); - - char* line = NULL; - size_t cap = 0; - ssize_t len; - int i = 0; - - while((len = getline(&line, &cap, tmp)) >= 0) - assert(atoi(line) == i++); - - assert(i == (int)n); - - // all done - fclose(tmp); - free(line); - passed; -} - -static -void test_cat_range_to_stream(void) -{ - const str src[] = { - str_lit("aaa"), - str_lit("bbb"), - str_null, - str_lit("ccc"), - str_lit("ddd"), - str_null, - str_null - }; - - const size_t num_items = sizeof(src)/sizeof(src[0]); - - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_cat_range(tmp, src, num_items) == 0); - - assert(fflush(tmp) == 0); - rewind(tmp); - - const char res[] = "aaabbbcccddd"; - const size_t len = sizeof(res) - 1; - char buff[32]; - - assert(fread(buff, 1, sizeof(buff), tmp) == len); - assert(memcmp(buff, res, len) == 0); - - fclose(tmp); - passed; -} - -static -void test_join_to_fd(void) -{ - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_join(fileno(tmp), str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0); - - rewind(tmp); - - const char res[] = "aaa_bbb_ccc"; - const size_t len = sizeof(res) - 1; - char buff[32]; - - assert(fread(buff, 1, sizeof(buff), tmp) == len); - assert(memcmp(buff, res, len) == 0); - - fclose(tmp); - passed; -} - -static -void test_join_large_range_to_fd(void) -{ - // prepare data - const size_t n = 100000; - str* const src = calloc(n, sizeof(str)); - - assert(src != NULL); - - char buff[100]; - - for(unsigned i = 0; i < n; i++) - assert(str_cpy(&src[i], str_ref_chars(buff, sprintf(buff, "%u", i))) == 0); - - // write to file - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_join_range(fileno(tmp), str_lit("\n"), src, n) == 0); - - // clear input data - for(unsigned i = 0; i < n; ++i) - str_free(src[i]); - - free(src); - - // validate - rewind(tmp); - - char* line = NULL; - size_t cap = 0; - ssize_t len; - int i = 0; - - while((len = getline(&line, &cap, tmp)) >= 0) - assert(atoi(line) == i++); - - assert(i == (int)n); - - // all done - fclose(tmp); - free(line); - passed; -} - -static -void test_join_to_stream(void) -{ - FILE* const tmp = tmpfile(); - - assert(tmp != NULL); - assert(str_join(tmp, str_lit("_"), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0); - - assert(fflush(tmp) == 0); - rewind(tmp); - - const char res[] = "aaa_bbb_ccc"; - const size_t len = sizeof(res) - 1; - char buff[32]; - - assert(fread(buff, 1, sizeof(buff), tmp) == len); - assert(memcmp(buff, res, len) == 0); - - fclose(tmp); - passed; -} - -static -bool part_pred(const str s) { return str_len(s) < 2; } - -static -void test_partition_range(void) -{ - str src[] = { str_lit("aaa"), str_lit("a"), str_lit("aaaa"), str_lit("z") }; - - assert(str_partition_range(part_pred, src, 1) == 0); - - assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 2); - assert(str_eq(src[0], str_lit("a"))); - assert(str_eq(src[1], str_lit("z"))); - assert(str_partition_range(part_pred, src, 1) == 1); - - src[0] = str_lit("?"); - src[2] = str_lit("*"); - - assert(str_partition_range(part_pred, src, sizeof(src)/sizeof(src[0])) == 3); - assert(str_eq(src[0], str_lit("?"))); - assert(str_eq(src[1], str_lit("z"))); - assert(str_eq(src[2], str_lit("*"))); - assert(str_eq(src[3], str_lit("aaa"))); - - assert(str_partition_range(part_pred, NULL, 42) == 0); - assert(str_partition_range(part_pred, src, 0) == 0); - - passed; -} - -static -void test_unique_range(void) -{ - str src[] = { - str_lit("zzz"), - str_lit("aaa"), - str_lit("zzz"), - str_lit("bbb"), - str_lit("aaa"), - str_lit("ccc"), - str_lit("ccc"), - str_lit("aaa"), - str_lit("ccc"), - str_lit("zzz") - }; - - assert(str_unique_range(src, sizeof(src)/sizeof(src[0])) == 4); - assert(str_eq(src[0], str_lit("aaa"))); - assert(str_eq(src[1], str_lit("bbb"))); - assert(str_eq(src[2], str_lit("ccc"))); - assert(str_eq(src[3], str_lit("zzz"))); - - passed; -} - -static -void test_from_file(void) -{ - str_auto fname = str_null; - - assert(str_cat(&fname, str_lit("tmp_"), str_ref_chars(__func__, sizeof(__func__) - 1)) == 0); - - FILE* const stream = fopen(str_ptr(fname), "w"); - - assert(stream); - assert(str_join(stream, str_lit(" "), str_lit("aaa"), str_lit("bbb"), str_lit("ccc")) == 0); - assert(fclose(stream) == 0); - - str_auto res = str_null; - - assert(str_from_file(&res, str_ptr(fname)) == 0); - unlink(str_ptr(fname)); - assert(str_eq(res, str_lit("aaa bbb ccc"))); - assert(str_is_owner(res)); - - // test errors - assert(str_from_file(&res, ".") == EISDIR); - assert(str_from_file(&res, "/dev/null") == EOPNOTSUPP); - assert(str_from_file(&res, "does-not-exist") == ENOENT); - - passed; -} - -#ifdef __STDC_UTF_32__ - -static -void test_codepoint_iterator(void) -{ - const str src = str_lit(u8"ะถั‘ะปั‚ั‹ะน"); // means "yellow" in Russian - static const char32_t src32[] = { U'ะถ', U'ั‘', U'ะป', U'ั‚', U'ั‹', U'ะน' }; - size_t i = 0; - char32_t c; - - for_each_codepoint(c, src) - { - assert(i < sizeof(src32)/sizeof(src32[0])); - assert(c == src32[i++]); - } - - assert(c == CPI_END_OF_STRING); - assert(i == sizeof(src32)/sizeof(src32[0])); - - // empty string iteration - c = 0; - - for_each_codepoint(c, str_null) - assert(0); - - assert(c == CPI_END_OF_STRING); - passed; -} - -#endif // ifdef __STDC_UTF_32__ - -static -void test_tok(void) -{ - typedef struct - { - const str src, delim; - const unsigned n_tok; - const str tok[3]; - } test_data; - - static const test_data t[] = - { - { - str_lit("a,b,c"), - str_lit(","), - 3, - { str_lit("a"), str_lit("b"), str_lit("c") } - }, - { - str_lit(",,a,b,,c,"), - str_lit(","), - 3, - { str_lit("a"), str_lit("b"), str_lit("c") } - }, - { - str_lit("aaa;=~bbb~,=ccc="), - str_lit(",;=~"), - 3, - { str_lit("aaa"), str_lit("bbb"), str_lit("ccc") } - }, - { - str_lit(""), - str_lit(","), - 0, - { } - }, - { - str_lit(""), - str_lit(""), - 0, - { } - }, - { - str_lit(",.;,.;;.,;.,"), - str_lit(",.;"), - 0, - { } - }, - { - str_lit("aaa,bbb,ccc"), - str_lit(""), - 1, - { str_lit("aaa,bbb,ccc") } - }, - { - str_lit("aaa,bbb,ccc"), - str_lit(";-="), - 1, - { str_lit("aaa,bbb,ccc") } - } - }; - - for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i) - { - unsigned tok_count = 0; - - str tok = str_null; - str_tok_state state; - - str_tok_init(&state, t[i].src, t[i].delim); - - while(str_tok(&tok, &state)) - { -// printf("%u-%u: \"%.*s\" %zu\n", -// i, tok_count, (int)str_len(tok), str_ptr(tok), str_len(tok)); -// fflush(stdout); - - assert(tok_count < t[i].n_tok); - assert(str_eq(tok, t[i].tok[tok_count])); - - ++tok_count; - } - - assert(tok_count == t[i].n_tok); - } - - passed; -} - -static -void test_partition(void) -{ - typedef struct - { - const bool res; - const str src, patt, pref, suff; - } test_data; - - static const test_data t[] = - { - { true, str_lit("...abc..."), str_lit("abc"), str_lit("..."), str_lit("...") }, - { true, str_lit("......abc"), str_lit("abc"), str_lit("......"), str_null }, - { true, str_lit("abc......"), str_lit("abc"), str_null, str_lit("......") }, - - { true, str_lit("...a..."), str_lit("a"), str_lit("..."), str_lit("...") }, - { true, str_lit("......a"), str_lit("a"), str_lit("......"), str_null }, - { true, str_lit("a......"), str_lit("a"), str_null, str_lit("......") }, - - { false, str_lit("zzz"), str_null, str_lit("zzz"), str_null }, - { false, str_null, str_lit("zzz"), str_null, str_null }, - { false, str_null, str_null, str_null, str_null }, - - { false, str_lit("...zzz..."), str_lit("xxx"), str_lit("...zzz..."), str_null }, - { false, str_lit("...xxz..."), str_lit("xxx"), str_lit("...xxz..."), str_null }, - { true, str_lit("...xxz...xxx."), str_lit("xxx"), str_lit("...xxz..."), str_lit(".") }, - { true, str_lit(u8"...ั†ะธั„ั€ั‹___"), str_lit(u8"ั†ะธั„ั€ั‹"), str_lit("..."), str_lit("___") } - }; - - for(unsigned i = 0; i < sizeof(t)/sizeof(t[0]); ++i) - { - str pref = str_lit("???"), suff = str_lit("???"); - - assert(str_partition(t[i].src, t[i].patt, &pref, &suff) == t[i].res); - assert(str_eq(pref, t[i].pref)); - assert(str_eq(suff, t[i].suff)); - } - - passed; -} - -int main(void) -{ - // tests - test_str_lit(); - test_str_cpy(); - test_str_clear(); - test_str_move(); - test_str_pass(); - test_str_ref(); - test_str_cmp(); - test_str_cmp_ci(); - test_str_acquire(); - test_str_cat(); - test_str_join(); - test_composition(); - test_sort(); - test_sort_ci(); - test_search(); - test_prefix(); - test_suffix(); - test_cpy_to_fd(); - test_cpy_to_stream(); - test_cat_range_to_fd(); - test_cat_large_range_to_fd(); - test_cat_range_to_stream(); - test_join_to_fd(); - test_join_large_range_to_fd(); - test_join_to_stream(); - test_partition_range(); - test_unique_range(); - test_from_file(); - test_tok(); - test_partition(); - -#ifdef __STDC_UTF_32__ - assert(setlocale(LC_ALL, "C.UTF-8")); - - test_codepoint_iterator(); -#endif - - return puts("OK.") < 0; -} diff --git a/3rd/str/tools/file-to-str b/3rd/str/tools/file-to-str deleted file mode 100755 index 82366af..0000000 --- a/3rd/str/tools/file-to-str +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh - -die() { - echo >&2 "$@" - exit 1 -} - -[ $# -eq 2 ] || die "Usage: $(basename "$0") FILE VAR-NAME" -[ -f "$1" ] || die "$0: file \"$1\" does not exist, or is not a file." - -set -e - -cat << EOF -// AUTOMATICALLY GENERATED FILE - DO NOT EDIT - -// source file: $1 - -#include "str.h" - -static -const char _bytes[] = { -EOF - -od -v -w12 -A n -t x1 "$1" | sed -E 's/\<([[:xdigit:]]{2})\>/0x\1,/g' - -cat << EOF - 0x00 }; - -const str $2 = (const str){ _bytes, _ref_info(sizeof(_bytes) - 1) }; -EOF diff --git a/3rd/str/tools/gen_char_class.c b/3rd/str/tools/gen_char_class.c deleted file mode 100644 index 9c829d1..0000000 --- a/3rd/str/tools/gen_char_class.c +++ /dev/null @@ -1,209 +0,0 @@ -/* -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include -#include -#include -#include -#include -#include -#include - -// platform checks -#ifndef __STDC_ISO_10646__ -#error "this platform does not support UNICODE (__STDC_ISO_10646__ is not defined)" -#endif - -#if __SIZEOF_WCHAR_T__ < 4 || __SIZEOF_WINT_T__ < 4 -#error "this platform does not have a usable wchar_t (both sizeof(wchar_t) and sizeof(wint_t) should be at least 4)" -#endif - -// i/o helpers -static __attribute((noinline, noreturn)) -void die(const char* const msg) -{ - perror(msg); - exit(1); -} - -#define do_printf(fmt, ...) \ - do { \ - if(printf(fmt, ##__VA_ARGS__) < 0) \ - die("error writing output"); \ - } while(0) - -#define do_write(str) \ - do { \ - if(fwrite((str), 1, sizeof(str) - 1, stdout) != sizeof(str) - 1) \ - die("error writing output"); \ - } while(0) - -// char type selector (isw*() functions) -typedef int (*selector)(wint_t wc); - -// option parser -static __attribute__((noreturn)) -void usage_exit(void) -{ - static const char usage[] = - "Usage: gen-char-class SELECTOR\n" - " Generate a character classification C function that does the same as its\n" - " isw*() counterpart under the current locale as specified by LC_ALL\n" - " environment variable. SELECTOR specifies the classification function\n" - " to generate, it must be any one of:\n" - " --alnum -> use iswalnum()\n" - " --alpha -> use iswalpha()\n" - " --blank -> use iswblank()\n" - " --cntrl -> use iswcntrl()\n" - " --digit -> use iswdigit()\n" - " --graph -> use iswgraph()\n" - " --lower -> use iswlower()\n" - " --print -> use iswprint()\n" - " --punct -> use iswpunct()\n" - " --space -> use iswspace()\n" - " --upper -> use iswupper()\n" - " --xdigit -> use iswxdigit()\n"; - - fputs(usage, stderr); - exit(1); -} - -static -selector fn; - -static -const char* fn_name; - -static -const char* loc; - -#define ARG(name) \ - if(strcmp(argv[1], "--" #name) == 0) { \ - fn = isw ## name; fn_name = #name; \ - return; \ - } - -static -void read_opts(int argc, char* const argv[]) -{ - if(argc != 2) - usage_exit(); - - ARG(alnum) - ARG(alpha) - ARG(blank) - ARG(cntrl) - ARG(digit) - ARG(graph) - ARG(lower) - ARG(print) - ARG(punct) - ARG(space) - ARG(upper) - ARG(xdigit) - - if(strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) - usage_exit(); - - fprintf(stderr, "unknown option: \"%s\"\n", argv[1]); - exit(1); -} - -#undef ARG - -// range printing -static -void print_range(const wint_t first, const wint_t last) -{ - if(first == last) - do_printf("\t\tcase 0x%.2X:\n", first); - else - do_printf("\t\tcase 0x%.2X ... 0x%.2X:\n", first, last); -} - -// header/footer -static -const char header[] = - "/* LC_ALL = \"%s\" */\n" - "bool is_%s(const char32_t c)\n" - "{\n" - " switch(c)\n" - " {\n"; - -static -const char footer[] = - " return true;\n" - " default:\n" - " return false;\n" - " }\n" - "}\n"; - -// main -#define UTF32_MAX_CHAR 0x10ffff - -int main(int argc, char* const argv[]) -{ - read_opts(argc, argv); - - loc = getenv("LC_ALL"); - - if(loc && !setlocale(LC_ALL, loc)) - die("cannot change current locale"); - - errno = 0; - do_printf(header, loc ? loc : "", fn_name); - - wint_t first = 0; - bool in_range = false; - - for(wint_t c = 0; c <= UTF32_MAX_CHAR; ++c) - { - const bool match = (fn(c) != 0); - - if(in_range && !match) - print_range(first, c - 1); - else if(!in_range && match) - first = c; - - in_range = match; - } - - if(in_range) - print_range(first, UTF32_MAX_CHAR); - - do_write(footer); - - if(fflush(stdout)) - die("error writing output"); - - return 0; -} diff --git a/CMakeLists.txt b/CMakeLists.txt index dc40a17..c9b8382 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,7 +46,7 @@ add_library(hiload SHARED src/memory.c # dependencies - src/str.c + src/string/hi_string.c src/logger/sc_log.c ) diff --git a/scripts/copy_dependencies.sh b/scripts/copy_dependencies.sh index fc54610..c167f76 100644 --- a/scripts/copy_dependencies.sh +++ b/scripts/copy_dependencies.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -cp --verbose "3rd/str/str.h" "src/" -cp --verbose "3rd/str/str.c" "src/" cp --verbose "3rd/sc/array/sc_array.h" "src/array/" cp --verbose "3rd/sc/logger/sc_log.c" "src/logger/" cp --verbose "3rd/sc/logger/sc_log.h" "src/logger/" diff --git a/src/files.c b/src/files.c index bf8a4fd..3762ab2 100644 --- a/src/files.c +++ b/src/files.c @@ -1,25 +1,16 @@ #include "files.h" #include "logger.h" -#include "str.h" #include "types.h" +#include "string/hi_string.h" -HiloadResult read_file_to_str(str *s, const char *filename) { +const char* hi_file_to_str_dyn(const char *filename) { - int copied = str_from_file(s, filename); - if (copied != 0) { + size_t n = 0; + const char *s = hi_string_from_file_dyn(filename, 0, 0); + if (!s) { sc_log_error("Failed to read file: %s\n", filename); - return HILOAD_FAIL; + return 0; } - return HILOAD_OK; -} - -HiloadResult read_stream_to_str(str *s, const char *filename) { - int nread = 0; - int copied = str_from_stream(s, filename, &nread); - if (copied != 0) { - sc_log_error("Failed to read file: %s\n", filename); - return HILOAD_FAIL; - } - return HILOAD_OK; + return s; } diff --git a/src/files.h b/src/files.h index 0d80531..0744de0 100644 --- a/src/files.h +++ b/src/files.h @@ -1,10 +1,8 @@ #ifndef FILES_H_ #define FILES_H_ -#include "str.h" #include "types.h" -HiloadResult read_file_to_str(str *s, const char *); -HiloadResult read_stream_to_str(str *s, const char *); +const char* hi_file_to_str_dyn(const char *filename); #endif // FILES_H_ diff --git a/src/memory.c b/src/memory.c index 8c08f37..1b62c4e 100644 --- a/src/memory.c +++ b/src/memory.c @@ -32,16 +32,16 @@ HiloadResult memory_find_pointer(uptr ptr, } HiloadResult read_memory_maps_self(struct sc_array_memreg *regions) { - str memory_str = str_null; - sc_array_clear(regions); sc_array_init(regions); - HiloadResult res = read_stream_to_str(&memory_str, "/proc/self/maps"); - if (res == HILOAD_FAIL) + const char* maps_str = hi_file_to_str_dyn("/proc/self/maps"); + if (!maps_str) return HILOAD_FAIL; - char *strptr = (char *)str_ptr(memory_str); + sc_log_debug("/proc/self/maps:\n%s", maps_str); + + char *strptr = (char *)maps_str; char *line = strtok(strptr, "\n"); while (line) { MemoryRegion reg = {0}; @@ -80,7 +80,7 @@ HiloadResult read_memory_maps_self(struct sc_array_memreg *regions) { line = strtok(NULL, "\n"); } - str_free(memory_str); + free((void*)maps_str); return HILOAD_OK; } diff --git a/src/memory.h b/src/memory.h index 8fc54a4..067ef3b 100644 --- a/src/memory.h +++ b/src/memory.h @@ -2,7 +2,6 @@ #define MEMORY_H_ #include "array.h" -#include "str.h" #include "types.h" #include diff --git a/src/string/hi_string.c b/src/string/hi_string.c new file mode 100644 index 0000000..4ef88c3 --- /dev/null +++ b/src/string/hi_string.c @@ -0,0 +1,68 @@ +#include "hi_string.h" + +#include +#include +#include + +const char *hi_string_from_file_dyn(const char *filename, size_t *nread, + size_t nmax) { + + FILE *f = fopen(filename, "r"); + if (!f) { + perror("Failed to open file"); + return 0; + } + + // if nmax is set, use that as the chunk size and don't reallocate after + bool reallocate = true; + off_t chunk_size = 4096; + + if (nmax > 0) { + chunk_size = nmax; + reallocate = false; + } + + char *buf = malloc(chunk_size); + char *end = buf + chunk_size; + size_t total_read = 0; + char *p = buf; + size_t n = 0; + do { + n = fread(p, 1, end - p, f); + total_read += n; + p += n; + + if (p == end && reallocate) { + chunk_size *= 2; + char *new_buf = realloc(buf, chunk_size); + if (!new_buf) { + perror("Couldn't realloc memory"); + free(buf); + return 0; + } + buf = new_buf; + p = buf + total_read; + end = buf + chunk_size; + } + } while (n > 0); + + // If we didn't happen to read just one byte less than the max length, + // then reallocate again to shrink the memory to fit what's read + if (p != (end - 1)) { + char *new_buf = realloc(buf, total_read + 1); + if (!new_buf) { + perror("Couldn't realloc memory"); + free(buf); + return 0; + } + buf = new_buf; + } + + buf[total_read] = '\0'; + + if (nread) + *nread = total_read; + + fclose(f); + return buf; +} diff --git a/src/string/hi_string.h b/src/string/hi_string.h new file mode 100644 index 0000000..6954537 --- /dev/null +++ b/src/string/hi_string.h @@ -0,0 +1,21 @@ +#ifndef HI_STRING_H_ +#define HI_STRING_H_ + +#include + +/*** +* @brief Copy file content to a null terminated string, allocating memory while reading. +* +* This doesn't assume it can read file size, so it allocates memory in chunks +* (default 4096 bytes) and keeps reading until 0 bytes is read. If nmax is non-zero, instead that amount of bytes is allocated +* and that is read. +* +* In either case, the string is reallocated to match the length before returning. +* @param filename +* @param nread if not null, this will have the total amount read in bytes +* @param nmax if not 0, this amount of memory in bytes is read ant allocated +***/ +const char *hi_string_from_file_dyn(const char *filename, size_t *nread, + size_t nmax); + +#endif // HI_STRING_H_