From 65082c26e7314a2d91dbb0dc9dd25a00570cec73 Mon Sep 17 00:00:00 2001 From: "Dustin J. Mitchell" Date: Sun, 23 Jan 2022 23:58:47 +0000 Subject: [PATCH] improved TCString support --- binding-tests/.gitignore | 1 + binding-tests/Makefile | 4 +- binding-tests/replica.cpp | 6 ++ binding-tests/string.cpp | 29 ++++++++ binding-tests/task.cpp | 10 --- lib/src/replica.rs | 17 ++--- lib/src/string.rs | 138 +++++++++++++++++++++++++++++++------- lib/src/task.rs | 32 ++------- lib/taskchampion.h | 22 +++++- 9 files changed, 183 insertions(+), 76 deletions(-) create mode 100644 binding-tests/string.cpp diff --git a/binding-tests/.gitignore b/binding-tests/.gitignore index fb93ddffb..aa82f31b4 100644 --- a/binding-tests/.gitignore +++ b/binding-tests/.gitignore @@ -1,2 +1,3 @@ *.o doctest +test-db diff --git a/binding-tests/Makefile b/binding-tests/Makefile index 121d942cc..26f621596 100644 --- a/binding-tests/Makefile +++ b/binding-tests/Makefile @@ -3,14 +3,16 @@ INC=-I ../lib LIB=-L ../target/debug RPATH=-Wl,-rpath,../target/debug -TESTS = replica.cpp uuid.cpp task.cpp +TESTS = replica.cpp string.cpp uuid.cpp task.cpp .PHONY: all test all: test test: doctest + @rm -rf test-db @./doctest --no-version --no-intro + @rm -rf test-db %.o: %.cpp ../lib/taskchampion.h $(CXX) $(INC) -c $< -o $@ diff --git a/binding-tests/replica.cpp b/binding-tests/replica.cpp index 4e1053d8c..18364a653 100644 --- a/binding-tests/replica.cpp +++ b/binding-tests/replica.cpp @@ -8,6 +8,12 @@ TEST_CASE("creating an in-memory TCReplica does not crash") { tc_replica_free(rep); } +TEST_CASE("creating an on-disk TCReplica does not crash") { + TCReplica *rep = tc_replica_new(tc_string_new("test-db")); + CHECK(tc_replica_error(rep) == NULL); + tc_replica_free(rep); +} + TEST_CASE("undo on an empty in-memory TCReplica does nothing") { TCReplica *rep = tc_replica_new(NULL); CHECK(tc_replica_error(rep) == NULL); diff --git a/binding-tests/string.cpp b/binding-tests/string.cpp new file mode 100644 index 000000000..dd726e862 --- /dev/null +++ b/binding-tests/string.cpp @@ -0,0 +1,29 @@ +#include +#include "doctest.h" +#include "taskchampion.h" + +TEST_CASE("creating borrowed strings does not crash") { + TCString *s = tc_string_new("abcdef"); + tc_string_free(s); +} + +TEST_CASE("creating cloned strings does not crash") { + char *abcdef = strdup("abcdef"); + TCString *s = tc_string_clone(abcdef); + free(abcdef); + CHECK(strcmp(tc_string_content(s), "abcdef") == 0); + tc_string_free(s); +} + +TEST_CASE("strings echo back their content") { + TCString *s = tc_string_new("abcdef"); + CHECK(strcmp(tc_string_content(s), "abcdef") == 0); + tc_string_free(s); +} + +TEST_CASE("tc_string_content returns NULL for strings containing embedded NULs") { + TCString *s = tc_string_clone_with_len("ab\0de", 5); + REQUIRE(s != NULL); + CHECK(tc_string_content(s) == NULL); + tc_string_free(s); +} diff --git a/binding-tests/task.cpp b/binding-tests/task.cpp index 0592b6b23..9d82767ca 100644 --- a/binding-tests/task.cpp +++ b/binding-tests/task.cpp @@ -23,13 +23,3 @@ TEST_CASE("creating a Task does not crash") { tc_replica_free(rep); } - -TEST_CASE("undo on an empty in-memory TCReplica does nothing") { - TCReplica *rep = tc_replica_new(NULL); - CHECK(tc_replica_error(rep) == NULL); - int rv = tc_replica_undo(rep); - CHECK(rv == 0); - CHECK(tc_replica_error(rep) == NULL); - tc_replica_free(rep); -} - diff --git a/lib/src/replica.rs b/lib/src/replica.rs index 81a387ca5..c137b105a 100644 --- a/lib/src/replica.rs +++ b/lib/src/replica.rs @@ -1,12 +1,8 @@ use crate::{status::TCStatus, string::TCString, task::TCTask}; use libc::c_char; -use std::ffi::{CStr, CString, OsStr}; -use std::path::PathBuf; +use std::ffi::CString; use taskchampion::{Replica, StorageConfig}; -// TODO: unix-only -use std::os::unix::ffi::OsStrExt; - /// A replica represents an instance of a user's task data, providing an easy interface /// for querying and modifying that data. pub struct TCReplica { @@ -50,14 +46,15 @@ where /// /// TCReplicas are not threadsafe. #[no_mangle] -pub extern "C" fn tc_replica_new<'a>(path: *const c_char) -> *mut TCReplica { +pub extern "C" fn tc_replica_new<'a>(path: *mut TCString) -> *mut TCReplica { let storage_res = if path.is_null() { StorageConfig::InMemory.into_storage() } else { - let path: &'a [u8] = unsafe { CStr::from_ptr(path) }.to_bytes(); - let path: &OsStr = OsStr::from_bytes(path); - let path: PathBuf = path.to_os_string().into(); - StorageConfig::OnDisk { taskdb_dir: path }.into_storage() + let path = TCString::from_arg(path); + StorageConfig::OnDisk { + taskdb_dir: path.to_path_buf(), + } + .into_storage() }; let storage = match storage_res { diff --git a/lib/src/string.rs b/lib/src/string.rs index 39e17be13..3f9086d0e 100644 --- a/lib/src/string.rs +++ b/lib/src/string.rs @@ -1,46 +1,134 @@ -use std::ffi::{CStr, CString, NulError}; - -// thinking: -// - TCString ownership always taken when passed in -// - TCString ownership always falls to C when passed out -// - accept that bytes must be copied to get owned string -// - Can we do this with an enum of some sort? +use std::ffi::{CStr, CString, OsStr}; +use std::os::unix::ffi::OsStrExt; +use std::path::PathBuf; /// TCString supports passing strings into and out of the TaskChampion API. -pub struct TCString(CString); +/// +/// Unless specified otherwise, functions in this API take ownership of a TCString when it appears +/// as a function argument, and transfer ownership to the caller when the TCString appears as a +/// return value or otput argument. +pub enum TCString<'a> { + CString(CString), + CStr(&'a CStr), + String(String), +} -impl TCString { - /// Take a TCString from C as an argument. - pub(crate) fn from_arg(tcstring: *mut TCString) -> Self { +impl<'a> TCString<'a> { + /// Take a TCString from C as an argument. C callers generally expect TC functions to take + /// ownership of a string, which is what this function does. + pub(crate) fn from_arg(tcstring: *mut TCString<'a>) -> Self { debug_assert!(!tcstring.is_null()); *(unsafe { Box::from_raw(tcstring) }) } + /// Borrow a TCString from C as an argument. + pub(crate) fn from_arg_ref(tcstring: *mut TCString<'a>) -> &'a mut Self { + debug_assert!(!tcstring.is_null()); + unsafe { &mut *tcstring } + } + /// Get a regular Rust &str for this value. pub(crate) fn as_str(&self) -> Result<&str, std::str::Utf8Error> { - self.0.as_c_str().to_str() + match self { + TCString::CString(cstring) => cstring.as_c_str().to_str(), + TCString::CStr(cstr) => cstr.to_str(), + TCString::String(string) => Ok(string.as_ref()), + } } - /// Construct a *mut TCString from a string for returning to C. - pub(crate) fn return_string(string: impl Into>) -> Result<*mut TCString, NulError> { - let tcstring = TCString(CString::new(string)?); - Ok(Box::into_raw(Box::new(tcstring))) + pub(crate) fn as_bytes(&self) -> &[u8] { + match self { + TCString::CString(cstring) => cstring.as_bytes(), + TCString::CStr(cstr) => cstr.to_bytes(), + TCString::String(string) => string.as_bytes(), + } + } + + pub(crate) fn to_path_buf(&self) -> PathBuf { + // TODO: this is UNIX-specific. + let path: &OsStr = OsStr::from_bytes(self.as_bytes()); + path.to_os_string().into() + } + + /// Convert this to a return value for handing off to C. + pub(crate) fn return_val(self) -> *mut TCString<'a> { + Box::into_raw(Box::new(self)) } } -#[no_mangle] -pub extern "C" fn tc_string_new(cstr: *const libc::c_char) -> *mut TCString { - let cstring = unsafe { CStr::from_ptr(cstr) }.into(); - Box::into_raw(Box::new(TCString(cstring))) +impl<'a> From for TCString<'a> { + fn from(string: String) -> TCString<'a> { + TCString::String(string) + } } -#[no_mangle] -pub extern "C" fn tc_string_content(string: *mut TCString) -> *const libc::c_char { - debug_assert!(!string.is_null()); - let string: &CString = unsafe { &(*string).0 }; - string.as_ptr() +impl<'a> From<&str> for TCString<'a> { + fn from(string: &str) -> TCString<'a> { + TCString::String(string.to_string()) + } } +/// Create a new TCString referencing the given C string. The C string must remain valid until +/// after the TCString is freed. It's typically easiest to ensure this by using a static string. +#[no_mangle] +pub extern "C" fn tc_string_new(cstr: *const libc::c_char) -> *mut TCString<'static> { + let cstr: &CStr = unsafe { CStr::from_ptr(cstr) }; + TCString::CStr(cstr).return_val() +} + +/// Create a new TCString by cloning the content of the given C string. +#[no_mangle] +pub extern "C" fn tc_string_clone(cstr: *const libc::c_char) -> *mut TCString<'static> { + let cstr: &CStr = unsafe { CStr::from_ptr(cstr) }; + TCString::CString(cstr.into()).return_val() +} + +/// Create a new TCString containing the given string with the given length. This allows creation +/// of strings containing embedded NUL characters. If the given string is not valid UTF-8, this +/// function will return NULL. +#[no_mangle] +pub extern "C" fn tc_string_clone_with_len( + buf: *const libc::c_char, + len: usize, +) -> *mut TCString<'static> { + let slice = unsafe { std::slice::from_raw_parts(buf as *const u8, len) }; + let vec = slice.to_vec(); + if let Ok(string) = String::from_utf8(vec) { + TCString::String(string).return_val() + } else { + std::ptr::null_mut() + } +} + +/// Get the content of the string as a regular C string. The given string must not be NULL. The +/// returned value may be NULL if the string contains NUL bytes. +/// This function does _not_ take ownership of the TCString. +#[no_mangle] +pub extern "C" fn tc_string_content(tcstring: *mut TCString) -> *const libc::c_char { + let tcstring = TCString::from_arg_ref(tcstring); + // if we have a String, we need to consume it and turn it into + // a CString. + if let TCString::String(string) = tcstring { + // TODO: get rid of this clone + match CString::new(string.clone()) { + Ok(cstring) => { + *tcstring = TCString::CString(cstring); + } + Err(_) => { + // TODO: could recover the underlying String + return std::ptr::null(); + } + } + } + + match tcstring { + TCString::CString(cstring) => cstring.as_ptr(), + TCString::String(_) => unreachable!(), // just converted this + TCString::CStr(cstr) => cstr.as_ptr(), + } +} + +/// Free a TCString. #[no_mangle] pub extern "C" fn tc_string_free(string: *mut TCString) { debug_assert!(!string.is_null()); diff --git a/lib/src/task.rs b/lib/src/task.rs index 781b5d3d0..ba129caa4 100644 --- a/lib/src/task.rs +++ b/lib/src/task.rs @@ -15,28 +15,6 @@ impl TCTask { } } -/// Utility function to allow using `?` notation to return an error value. -fn wrap<'a, T, F>(task: *const TCTask, f: F, err_value: T) -> T -where - F: FnOnce(&Task) -> anyhow::Result, -{ - let task: &'a Task = task_ref(task); - match f(task) { - Ok(v) => v, - Err(e) => { - /* - let error = e.to_string(); - let error = match CString::new(error.as_bytes()) { - Ok(e) => e, - Err(_) => CString::new("(invalid error message)".as_bytes()).unwrap(), - }; - */ - //task.error = Some(error); - err_value - } - } -} - /// Utility function to safely convert *const TCTask into &Task fn task_ref(task: *const TCTask) -> &'static Task { debug_assert!(!task.is_null()); @@ -66,12 +44,10 @@ pub extern "C" fn tc_task_get_status<'a>(task: *const TCTask) -> TCStatus { /// Get a task's description, or NULL if the task cannot be represented as a C string (e.g., if it /// contains embedded NUL characters). #[no_mangle] -pub extern "C" fn tc_task_get_description<'a>(task: *const TCTask) -> *mut TCString { - wrap( - task, - |task| Ok(TCString::return_string(task.get_description())?), - std::ptr::null_mut(), - ) +pub extern "C" fn tc_task_get_description<'a>(task: *const TCTask) -> *mut TCString<'static> { + let task = task_ref(task); + let descr: TCString = task.get_description().into(); + descr.return_val() } /* TODO diff --git a/lib/taskchampion.h b/lib/taskchampion.h index 5d9691d70..445916baf 100644 --- a/lib/taskchampion.h +++ b/lib/taskchampion.h @@ -19,6 +19,10 @@ enum TCStatus { struct TCReplica; /// TCString supports passing strings into and out of the TaskChampion API. +/// +/// Unless specified otherwise, functions in this API take ownership of a TCString when it appears +/// as a function argument, and transfer ownership to the caller when the TCString appears as a +/// return value or otput argument. struct TCString; /// A task, as publicly exposed by this library. @@ -46,7 +50,7 @@ extern const uintptr_t TC_UUID_STRING_BYTES; /// Returns NULL on error; see tc_replica_error. /// /// TCReplicas are not threadsafe. -TCReplica *tc_replica_new(const char *path); +TCReplica *tc_replica_new(TCString *path); /// Create a new task. The task must not already exist. /// @@ -67,10 +71,24 @@ const char *tc_replica_error(TCReplica *rep); /// Free a TCReplica. void tc_replica_free(TCReplica *rep); +/// Create a new TCString referencing the given C string. The C string must remain valid until +/// after the TCString is freed. It's typically easiest to ensure this by using a static string. TCString *tc_string_new(const char *cstr); -const char *tc_string_content(TCString *string); +/// Create a new TCString by cloning the content of the given C string. +TCString *tc_string_clone(const char *cstr); +/// Create a new TCString containing the given string with the given length. This allows creation +/// of strings containing embedded NUL characters. If the given string is not valid UTF-8, this +/// function will return NULL. +TCString *tc_string_clone_with_len(const char *buf, uintptr_t len); + +/// Get the content of the string as a regular C string. The given string must not be NULL. The +/// returned value may be NULL if the string contains NUL bytes. +/// This function does _not_ take ownership of the TCString. +const char *tc_string_content(TCString *tcstring); + +/// Free a TCString. void tc_string_free(TCString *string); /// Get a task's UUID.