diff --git a/src/str.c b/src/str.c deleted file mode 100644 index 3b41849..0000000 --- a/src/str.c +++ /dev/null @@ -1,768 +0,0 @@ -/* -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#define _DEFAULT_SOURCE // for strncasecmp() -#define _XOPEN_SOURCE 500 // for IOV_MAX - -#include "str.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// append to destination and return the end pointer -static inline void *mem_append(void *dest, const void *src, const size_t n) { - return memcpy(dest, src, n) + n; -} - -// string deallocation -void str_free(const str s) { - if (str_is_owner(s)) - free((void *)s.ptr); -} - -// version of str_free() for str_auto macro -void str_free_auto(const str *const ps) { - if (ps) - str_free(*ps); -} - -// memory allocation helpers -#define ALLOC(n) \ - ({ \ - void *const ___p = malloc(n); \ - if (!___p) \ - return ENOMEM; \ - ___p; \ - }) - -#define REALLOC(p, n) \ - ({ \ - void *const ___p = realloc((p), (n)); \ - if (!___p) \ - return ENOMEM; \ - ___p; \ - }) - -// errno checker -#define RETURN_ON_ERROR(expr) \ - while ((expr) < 0) \ - do { \ - const int __err = errno; \ - if (__err != EINTR) \ - return __err; \ - } while (0) - -// swap -void str_swap(str *const s1, str *const s2) { - const str tmp = *s1; - - *s1 = *s2; - *s2 = tmp; -} - -// empty string -const char *const str_empty_string = ""; - -// string comparison -// --------------------------------------------------------------------- compare -// two strings lexicographically -int str_cmp(const str s1, const str s2) { - const size_t n1 = str_len(s1), n2 = str_len(s2); - - // either string may be missing a null terminator, hence "memcmp" - const int res = memcmp(str_ptr(s1), str_ptr(s2), (n1 < n2) ? n1 : n2); - - if (res != 0 || n1 == n2) - return res; - - return (n1 < n2) ? -1 : 1; -} - -// case-insensitive comparison -int str_cmp_ci(const str s1, const str s2) { - const size_t n1 = str_len(s1), n2 = str_len(s2); - - // either string may be missing a null terminator, hence "strNcasecmp" - const int res = strncasecmp(str_ptr(s1), str_ptr(s2), (n1 < n2) ? n1 : n2); - - if (res != 0 || n1 == n2) - return res; - - return (n1 < n2) ? -1 : 1; -} - -// test for prefix -bool str_has_prefix(const str s, const str prefix) { - const size_t n = str_len(prefix); - - return (n == 0) || - (str_len(s) >= n && memcmp(str_ptr(s), str_ptr(prefix), n) == 0); -} - -// test for suffix -bool str_has_suffix(const str s, const str suffix) { - const size_t n = str_len(suffix); - - return (n == 0) || - (str_len(s) >= n && memcmp(str_end(s) - n, str_ptr(suffix), n) == 0); -} - -// string constructors -// ----------------------------------------------------------------- create a -// reference to the given range of chars -str str_ref_chars(const char *const s, const size_t n) { - return (s && n > 0) ? ((str){s, str_ref_info(n)}) : str_null; -} - -str str_ref_from_ptr(const char *const s) { - return s ? str_ref_chars(s, strlen(s)) : str_null; -} - -// take ownership of the given range of chars -str str_acquire_chars(const char *const s, const size_t n) { - if (!s) - return str_null; - - if (n == 0) { - free((void *)s); - return str_null; - } - - return (str){s, str_owner_info(n)}; -} - -// take ownership of the given C string -str str_acquire(const char *const s) { - return s ? str_acquire_chars(s, strlen(s)) : str_null; -} - -// allocate a copy of the given string -int str_dup_impl(str *const dest, const str s) { - const size_t n = str_len(s); - - if (n == 0) - str_clear(dest); - else { - char *const p = memcpy(ALLOC(n + 1), str_ptr(s), n); - - p[n] = 0; - str_assign(dest, str_acquire_chars(p, n)); - } - - return 0; -} - -#ifndef STR_MAX_FILE_SIZE -#define STR_MAX_FILE_SIZE (64 * 1024 * 1024 - 1) -#endif - -static int get_file_size(const int fd, off_t *const size) { - // stat the file - struct stat info; - - RETURN_ON_ERROR(fstat(fd, &info)); - - *size = info.st_size; - - // only regular files are allowed - switch (info.st_mode & S_IFMT) { - case S_IFREG: - return (info.st_size > STR_MAX_FILE_SIZE) ? EFBIG : 0; - case S_IFDIR: - return EISDIR; - default: - return EOPNOTSUPP; - } -} - -static int read_from_fd(const int fd, void *p, off_t *const psize) { - const void *const end = p + *psize; - ssize_t n; - - do { - RETURN_ON_ERROR(n = read(fd, p, end - p)); - - p += n; - } while (n > 0 && p < end); - - *psize -= end - p; - return 0; -} - -static int read_from_fd_cont(const int fd, void *p, off_t *const psize) { - const void *end = p + *psize; - void *buf = p; - ssize_t n; - ssize_t nread = 0; - - do { - RETURN_ON_ERROR(n = read(fd, p, end - p)); - - p += n; - nread += n; - - // pre-emptively realloc, even though we could potentially be at the end - if (p == end) { - *psize *= 2; - buf = REALLOC(buf, *psize); - p = buf + nread; - end = buf + *psize; - } - - } while (n > 0); - - return 0; -} - -static int str_from_fd(const int fd, const off_t size, str *const dest) { - if (size == 0) { - str_clear(dest); - return 0; - } - - char *buff = ALLOC(size + 1); - off_t n = size; - const int err = read_from_fd(fd, buff, &n); - - if (err != 0) { - free(buff); - return err; - } - - if (n == 0) { - free(buff); - str_clear(dest); - return 0; - } - - if (n < size) { - char *const p = realloc(buff, n + 1); - - if (!p) { - free(buff); - return ENOMEM; - } - - buff = p; - } - - buff[n] = 0; - str_assign(dest, str_acquire_chars(buff, n)); - return 0; -} - -static int str_from_stream_cont(const int fd, str *const dest, int *nmax) { - const size_t start_size = 8192; - - char *buff = ALLOC(start_size + 1); - off_t n = start_size; - const int err = read_from_fd_cont(fd, buff, &n); - - if (err != 0) { - free(buff); - return err; - } - - if (n == 0) { - free(buff); - str_clear(dest); - return 0; - } - - if (n < start_size) { - char *const p = realloc(buff, n + 1); - - if (!p) { - free(buff); - return ENOMEM; - } - - buff = p; - } - - buff[n] = '\0'; - str_assign(dest, str_acquire_chars(buff, n)); - return 0; -} - -int str_from_file(str *const dest, const char *const file_name) { - int fd; - - RETURN_ON_ERROR(fd = open(file_name, O_CLOEXEC | O_RDONLY)); - - off_t size = 0; - int err = get_file_size(fd, &size); - - if (err == 0) - err = str_from_fd(fd, size, dest); - - close(fd); - return err; -} - -int str_from_stream(str *const dest, const char *const file_name, int *nread) { - int fd; - - RETURN_ON_ERROR(fd = open(file_name, O_CLOEXEC | O_RDONLY)); - - int nmax = nread ? *nread : 0; - - off_t chunk_size = 4096; - int err = 0; - if (nmax == 0) - err = str_from_stream_cont(fd, dest, &nmax); - else - err = str_from_fd(fd, nmax, dest); - - if (nread) - *nread = nmax; - - close(fd); - return err; -} - -// string composition -// ----------------------------------------------------------------------- -// append string -static inline char *append_str(char *p, const str s) { - return mem_append(p, str_ptr(s), str_len(s)); -} - -static size_t total_length(const str *src, size_t count) { - size_t sum = 0; - - for (; count > 0; --count) - sum += str_len(*src++); - - return sum; -} - -// concatenate strings -int str_cat_range_impl(str *const dest, const str *src, size_t count) { - if (!src) { - str_clear(dest); - return 0; - } - - // calculate total length - const size_t num = total_length(src, count); - - if (num == 0) { - str_clear(dest); - return 0; - } - - // allocate - char *const buff = ALLOC(num + 1); - - // copy bytes - char *p = buff; - - for (; count > 0; --count) - p = append_str(p, *src++); - - // null-terminate and assign - *p = 0; - str_assign(dest, str_acquire_chars(buff, num)); - return 0; -} - -// writing to file descriptor -int str_cpy_to_fd(const int fd, const str s) { - size_t n = str_len(s); - const void *p = str_ptr(s); - - while (n > 0) { - ssize_t m; - - RETURN_ON_ERROR(m = write(fd, p, n)); - - n -= m; - p += m; - } - - return 0; -} - -// writing to byte stream -int str_cpy_to_stream(FILE *const stream, const str s) { - const size_t n = str_len(s); - - return (n > 0 && fwrite(str_ptr(s), 1, n, stream) < n) ? EIO : 0; -} - -// write iovec -static int write_iovec(const int fd, struct iovec *pv, unsigned nv) { - while (nv > 0) { - ssize_t n; - - RETURN_ON_ERROR(n = writev(fd, pv, nv)); - - // discard items already written - for (; nv > 0; ++pv, --nv) { - if (n < (ssize_t)pv->iov_len) { - pv->iov_base += n; - pv->iov_len -= n; - break; - } - - n -= (ssize_t)pv->iov_len; - } - } - - return 0; -} - -// concatenate to file descriptor -static struct iovec *vec_append(struct iovec *const pv, const str s) { - *pv = (struct iovec){(void *)str_ptr(s), str_len(s)}; - - return pv + 1; -} - -static struct iovec *vec_append_nonempty(struct iovec *const pv, const str s) { - return str_is_empty(s) ? pv : vec_append(pv, s); -} - -int str_cat_range_to_fd(const int fd, const str *src, size_t count) { - if (!src) - return 0; - - struct iovec v[IOV_MAX]; - - while (count > 0) { - struct iovec *p = vec_append_nonempty(v, *src++); - - while (--count > 0 && p < v + IOV_MAX) - p = vec_append_nonempty(p, *src++); - - const size_t n = p - v; - - if (n == 0) - break; - - const int ret = write_iovec(fd, v, n); - - if (ret != 0) - return ret; - } - - return 0; -} - -int str_cat_range_to_stream(FILE *const stream, const str *src, size_t count) { - if (!src) - return 0; - - int err = 0; - - for (; count > 0 && err == 0; --count) - err = str_cpy(stream, *src++); - - return err; -} - -// join strings -int str_join_range_impl(str *const dest, const str sep, const str *src, - size_t count) { - // test for simple cases - if (str_is_empty(sep)) - return str_cat_range(dest, src, count); - - if (!src || count == 0) { - str_clear(dest); - return 0; - } - - if (count == 1) - return str_cpy(dest, *src); - - // calculate total length - const size_t num = total_length(src, count) + str_len(sep) * (count - 1); - - // allocate - char *const buff = ALLOC(num + 1); - - // copy bytes - char *p = append_str(buff, *src++); - - while (--count > 0) - p = append_str(append_str(p, sep), *src++); - - // null-terminate and assign - *p = 0; - str_assign(dest, str_acquire_chars(buff, num)); - return 0; -} - -int str_join_range_to_fd(const int fd, const str sep, const str *src, - size_t count) { - if (str_is_empty(sep)) - return str_cat_range(fd, src, count); - - if (!src || count == 0) - return 0; - - if (count == 1) - return str_cpy(fd, *src); - - struct iovec v[IOV_MAX]; - - struct iovec *p = vec_append_nonempty(v, *src++); - - for (--count; count > 0; p = v) { - p = vec_append_nonempty(vec_append(p, sep), *src++); - - while (--count > 0 && p < v + IOV_MAX - 1) - p = vec_append_nonempty(vec_append(p, sep), *src++); - - const size_t n = p - v; - - if (n == 0) - break; - - const int ret = write_iovec(fd, v, n); - - if (ret != 0) - return ret; - } - - return 0; -} - -int str_join_range_to_stream(FILE *const stream, const str sep, const str *src, - size_t count) { - if (str_is_empty(sep)) - return str_cat_range(stream, src, count); - - if (!src || count == 0) - return 0; - - int err = str_cpy(stream, *src++); - - while (--count > 0 && err == 0) - err = str_cat(stream, sep, *src++); - - return err; -} - -// searching and sorting -// -------------------------------------------------------------------- string -// partitioning -bool str_partition(const str src, const str patt, str *const prefix, - str *const suffix) { - const size_t patt_len = str_len(patt); - - if (patt_len > 0 && !str_is_empty(src)) { - const char *s = memmem(str_ptr(src), str_len(src), str_ptr(patt), patt_len); - - if (s) { - if (prefix) - str_assign(prefix, str_ref_chars(str_ptr(src), s - str_ptr(src))); - - if (suffix) { - s += patt_len; - str_assign(suffix, str_ref_chars(s, str_end(src) - s)); - } - - return true; - } - } - - if (prefix) - str_assign(prefix, str_ref(src)); - - if (suffix) - str_clear(suffix); - - return false; -} - -// comparison functions -int str_order_asc(const void *const s1, const void *const s2) { - return str_cmp(*(const str *)s1, *(const str *)s2); -} - -int str_order_desc(const void *const s1, const void *const s2) { - return -str_cmp(*(const str *)s1, *(const str *)s2); -} - -int str_order_asc_ci(const void *const s1, const void *const s2) { - return str_cmp_ci(*(const str *)s1, *(const str *)s2); -} - -int str_order_desc_ci(const void *const s1, const void *const s2) { - return -str_cmp_ci(*(const str *)s1, *(const str *)s2); -} - -// sorting -void str_sort_range(const str_cmp_func cmp, str *const array, - const size_t count) { - if (array && count > 1) - qsort(array, count, sizeof(array[0]), cmp); -} - -// searching -const str *str_search_range(const str key, const str *const array, - const size_t count) { - if (!array || count == 0) - return NULL; - - if (count == 1) - return str_eq(key, array[0]) ? array : NULL; - - return bsearch(&key, array, count, sizeof(str), str_order_asc); -} - -// partitioning -size_t str_partition_range(bool (*pred)(const str), str *const array, - const size_t count) { - if (!array) - return 0; - - const str *const end = array + count; - str *p = array; - - while (p < end && pred(*p)) - ++p; - - for (str *s = p + 1; s < end; ++s) - if (pred(*s)) - str_swap(p++, s); - - return p - array; -} - -// unique partitioning -size_t str_unique_range(str *const array, const size_t count) { - if (!array || count == 0) - return 0; - - if (count == 1) - return 1; - - str_sort_range(str_order_asc, array, count); - - const str *const end = array + count; - str *p = array; - - for (str *s = array + 1; s < end; ++s) - if (!str_eq(*p, *s) && (++p < s)) - str_swap(p, s); - - return p + 1 - array; -} - -// string iterator function -#ifdef __STDC_UTF_32__ - -char32_t str_cp_iterator_next(str_cp_iterator *const it) { - if (it->curr >= it->end) - return CPI_END_OF_STRING; - - char32_t c; - const size_t n = mbrtoc32(&c, it->curr, it->end - it->curr, &it->state); - - switch (n) // see https://en.cppreference.com/w/c/string/multibyte/mbrtoc32 - { - case 0: // null character (U+0000) is allowed - ++it->curr; - return 0; - case (size_t)-1: // encoding error - case (size_t)-3: // surrogate pair detected - return CPI_ERR_INVALID_ENCODING; - case (size_t)-2: // incomplete sequence - return CPI_ERR_INCOMPLETE_SEQ; - default: // ok - it->curr += n; - return c; - } -} - -#endif // ifdef __STDC_UTF_32__ - -// tokeniser -static inline bool is_delim(const str_tok_state *const state, const char c) { - return state->bits[(unsigned char)c >> 3] & (1 << (c & 0x7)); -} - -static inline void set_bit(str_tok_state *const state, const char c) { - state->bits[(unsigned char)c >> 3] |= (1 << (c & 0x7)); -} - -void str_tok_delim(str_tok_state *const state, const str delim_set) { - memset(state->bits, 0, sizeof(state->bits)); - - const char *const end = str_end(delim_set); - - for (const char *s = str_ptr(delim_set); s < end; ++s) - set_bit(state, *s); -} - -void str_tok_init(str_tok_state *const state, const str src, - const str delim_set) { - state->src = str_ptr(src); - state->end = str_end(src); - - str_tok_delim(state, delim_set); -} - -bool str_tok(str *const dest, str_tok_state *const state) { - // token start - const char *begin = state->src; - - while (begin < state->end && is_delim(state, *begin)) - ++begin; - - if (begin == state->end) { - str_clear(dest); - return false; - } - - // token end - const char *end = begin + 1; - - while (end < state->end && !is_delim(state, *end)) - ++end; - - state->src = end; - str_assign(dest, str_ref_chars(begin, end - begin)); - - return true; -} diff --git a/src/str.h b/src/str.h deleted file mode 100644 index 29bd19b..0000000 --- a/src/str.h +++ /dev/null @@ -1,315 +0,0 @@ -/* -BSD 3-Clause License - -Copyright (c) 2020,2021,2022,2023,2024 Maxim Konakov and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#pragma once - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -// string type -// ---------------------------------------------------------------------------- -typedef struct { - const char *ptr; - size_t info; -} str; - -// NULL string -#define str_null ((str){0, 0}) - -// helper macros -#define str_ref_info(n) ((n) << 1) -#define str_owner_info(n) (str_ref_info(n) | 1) - -// string properties -// ---------------------------------------------------------------------- length -// of the string -static inline size_t str_len(const str s) { return s.info >> 1; } - -// pointer to the string -static inline const char *str_ptr(const str s) { - extern const char *const str_empty_string; - - return s.ptr ? s.ptr : str_empty_string; -} - -// end of the string -static inline const char *str_end(const str s) { - return str_ptr(s) + str_len(s); -} - -// test if the string is empty -static inline bool str_is_empty(const str s) { return str_len(s) == 0; } - -// test if the string is allocated on the heap -static inline bool str_is_owner(const str s) { return (s.info & 1) != 0; } - -// test if the string is a reference -static inline bool str_is_ref(const str s) { return !str_is_owner(s); } - -// string memory control -// ------------------------------------------------------------------- free -// memory allocated for the string -void str_free(const str s); - -// automatic cleanup -void str_free_auto(const str *const ps); - -#define str_auto str __attribute__((cleanup(str_free_auto))) - -// string movements -// ----------------------------------------------------------------------- free -// target string, then assign the new value to it -static inline void str_assign(str *const ps, const str s) { - str_free(*ps); - *ps = s; -} - -// move the string, resetting the source to str_null -static inline str str_move(str *const ps) { - const str t = *ps; - *ps = str_null; - return t; -} - -// pass ownership of the string -static inline str str_pass(str *const ps) { - const str t = *ps; - ps->info &= ~(size_t)1; - return t; -} - -// swap two string objects -void str_swap(str *const s1, str *const s2); - -// string helpers -// -------------------------------------------------------------------------- -// reset the string to str_null -static inline void str_clear(str *const ps) { str_assign(ps, str_null); } - -// compare two strings lexicographically -int str_cmp(const str s1, const str s2); - -// test if two strings match -static inline bool str_eq(const str s1, const str s2) { - return str_cmp(s1, s2) == 0; -} - -// case-insensitive comparison -int str_cmp_ci(const str s1, const str s2); - -// case-insensitive match -static inline bool str_eq_ci(const str s1, const str s2) { - return str_cmp_ci(s1, s2) == 0; -} - -// test for prefix -bool str_has_prefix(const str s, const str prefix); - -// test for suffix -bool str_has_suffix(const str s, const str suffix); - -// string composition -// ------------------------------------------------------------------ -// implementation helpers -int str_dup_impl(str *const dest, const str s); -int str_cpy_to_fd(const int fd, const str s); -int str_cpy_to_stream(FILE *const stream, const str s); - -// copy string -#define str_cpy(dest, src) \ - _Generic((dest), \ - str*: str_dup_impl, \ - int: str_cpy_to_fd, \ - FILE*: str_cpy_to_stream \ - )((dest), (src)) - -// implementation helpers -int str_cat_range_impl(str *const dest, const str *src, size_t count); -int str_cat_range_to_fd(const int fd, const str *src, size_t count); -int str_cat_range_to_stream(FILE *const stream, const str *src, size_t count); - -// concatenate range of strings -#define str_cat_range(dest, src, count) \ - _Generic((dest), \ - str*: str_cat_range_impl, \ - int: str_cat_range_to_fd, \ - FILE*: str_cat_range_to_stream \ - )((dest), (src), (count)) - -// concatenate string arguments -#define str_cat(dest, ...) \ - ({ \ - const str args[] = {__VA_ARGS__}; \ - str_cat_range((dest), args, sizeof(args) / sizeof(args[0])); \ - }) - -// implementation helpers -int str_join_range_impl(str *const dest, const str sep, const str *src, - size_t count); -int str_join_range_to_fd(const int fd, const str sep, const str *src, - size_t count); -int str_join_range_to_stream(FILE *const stream, const str sep, const str *src, - size_t count); - -// join strings around the separator -#define str_join_range(dest, sep, src, count) \ - _Generic((dest), \ - str*: str_join_range_impl, \ - int: str_join_range_to_fd, \ - FILE*: str_join_range_to_stream \ - )((dest), (sep), (src), (count)) - -// join string arguments around the separator -#define str_join(dest, sep, ...) \ - ({ \ - const str args[] = {__VA_ARGS__}; \ - str_join_range((dest), (sep), args, sizeof(args) / sizeof(args[0])); \ - }) - -// constructors -// ---------------------------------------------------------------------------- -// string reference from a string literal -#define str_lit(s) ((str){"" s, str_ref_info(sizeof(s) - 1)}) - -static inline str str_ref_impl(const str s) { - return (str){s.ptr, s.info & ~(size_t)1}; -} - -str str_ref_from_ptr(const char *const s); - -// string reference from anything -#define str_ref(s) \ - _Generic((s), \ - str: str_ref_impl, \ - char*: str_ref_from_ptr, \ - const char*: str_ref_from_ptr \ - )(s) - -// create a reference to the given range of chars -str str_ref_chars(const char *const s, const size_t n); - -// take ownership of the given range of chars -str str_acquire_chars(const char *const s, const size_t n); - -// take ownership of the given string -str str_acquire(const char *const s); - -// string from file -int str_from_file(str *const dest, const char *const file_name); - -// read maximum nread bytes from file, write bytes read. 0 reads until EOS. -int str_from_stream(str *const dest, const char *const file_name, int *nread); - -// searching and sorting -// -------------------------------------------------------------------- string -// partitioning (substring search) -bool str_partition(const str src, const str patt, str *const prefix, - str *const suffix); - -// comparison functions -typedef int (*str_cmp_func)(const void *, const void *); - -int str_order_asc(const void *const s1, const void *const s2); -int str_order_desc(const void *const s1, const void *const s2); -int str_order_asc_ci(const void *const s1, const void *const s2); -int str_order_desc_ci(const void *const s1, const void *const s2); - -// sort array of strings -void str_sort_range(const str_cmp_func cmp, str *const array, - const size_t count); - -// searching -const str *str_search_range(const str key, const str *const array, - const size_t count); - -// partitioning -size_t str_partition_range(bool (*pred)(const str), str *const array, - const size_t count); - -// unique partitioning -size_t str_unique_range(str *const array, const size_t count); - -// UTF-32 codepoint iterator -// ---------------------------------------------------------------- -#ifdef __STDC_UTF_32__ -#include - -// iterator -#define for_each_codepoint(var, src) \ - for_each_cp((var), (src), CAT1(inner_it_, __COUNTER__)) - -// iterator error codes -#define CPI_END_OF_STRING ((char32_t)-1) -#define CPI_ERR_INCOMPLETE_SEQ ((char32_t)-2) -#define CPI_ERR_INVALID_ENCODING ((char32_t)-3) - -// implementation -#define for_each_cp(var, src, it) \ - for (str_cp_iterator it = str_make_cp_iterator(src); \ - (var = str_cp_iterator_next(&it)) <= 0x10FFFFu;) - -#define CAT1(x, y) CAT2(x, y) -#define CAT2(x, y) x##y - -typedef struct { - const char *curr; - const char *const end; - mbstate_t state; -} str_cp_iterator; - -static inline str_cp_iterator str_make_cp_iterator(const str s) { - return (str_cp_iterator){.curr = str_ptr(s), .end = str_end(s)}; -} - -char32_t str_cp_iterator_next(str_cp_iterator *const it); - -#endif // ifdef __STDC_UTF_32__ - -// tokeniser -// -------------------------------------------------------------------------------- -typedef struct { - unsigned char bits[32]; // 256 / 8 - const char *src, *end; -} str_tok_state; - -void str_tok_init(str_tok_state *const state, const str src, - const str delim_set); -bool str_tok(str *const dest, str_tok_state *const state); -void str_tok_delim(str_tok_state *const state, const str delim_set); - -#ifdef __cplusplus -} -#endif