libco: Update to latest version from ares (#4282)

This also removes the LIBCO_MPROTECT setting, I don't know why I enabled
that but it prevented putting the context swap code in the text section.
This commit is contained in:
Ziemas
2026-06-03 08:51:07 +02:00
committed by GitHub
parent fa2f652d2f
commit e00bc479f4
19 changed files with 220 additions and 546 deletions
+35 -5
View File
@@ -1,7 +1,37 @@
set(CMAKE_C_STANDARD 11)
add_library(libco STATIC libco.c)
set(LIBCO_SOURCES
libco.c
)
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} LOWERCASE_CMAKE_SYSTEM_PROCESSOR)
add_library(libco STATIC ${LIBCO_SOURCES})
if(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(i[3-6]86|x86)")
target_sources(libco PRIVATE x86.c)
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(x64|x86_64|amd64|e2k)")
target_sources(libco PRIVATE amd64.c)
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64(le)?")
target_sources(libco PRIVATE ppc64v2.c)
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(riscv)")
target_sources(libco PRIVATE riscv.c)
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64|arm64)")
target_sources(libco PRIVATE aarch64.c)
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(arm)")
target_sources(libco PRIVATE arm.c)
elseif(LOWERCASE_CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc)")
target_sources(libco PRIVATE ppc.c)
elseif(OS_WINDOWS)
target_sources(libco PRIVATE fiber.c)
else()
target_sources(libco PRIVATE sjlj.c)
endif()
target_sources(libco PRIVATE libco.h settings.h)
get_target_property(libco_SOURCES libco SOURCES)
target_include_directories(libco INTERFACE ..)
target_compile_options(libco PRIVATE $<$<CXX_COMPILER_ID:Clang,AppleClang>:-Wno-strict-prototypes>)
set_source_files_properties(libco ${libco_SOURCES} PROPERTIES HEADER_FILE_ONLY TRUE)
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${libco_SOURCES})
set_source_files_properties(libco libco.c PROPERTIES HEADER_FILE_ONLY FALSE)
Generated Vendored
+14 -4
View File
@@ -1,7 +1,17 @@
ISC License (ISC)
----------------------------------------------------------------------
ares
Copyright byuu and the higan team
Copyright (c) 2004-2025 ares team, Near et al
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
----------------------------------------------------------------------
-29
View File
@@ -1,29 +0,0 @@
# libco
libco is a cooperative multithreading library written in C89.
Although cooperative multithreading is limited to a single CPU core, it scales substantially better than preemptive multithreading.
For applications that need 100,000 or more context switches per second, the kernel overhead involved in preemptive multithreading can end up becoming the bottleneck in the application. libco can easily scale to 10,000,000 or more context switches per second.
Ideal use cases include servers (HTTP, RDBMS) and emulators (CPU cores, etc.)
It currently includes backends for:
* x86 CPUs
* amd64 CPUs
* PowerPC CPUs
* PowerPC64 ELFv1 CPUs
* PowerPC64 ELFv2 CPUs
* ARM 32-bit CPUs
* ARM 64-bit (AArch64) CPUs
* POSIX platforms (setjmp)
* Windows platforms (fibers)
See [doc/targets.md] for details.
See [doc/usage.md] for documentation.
## License
libco is released under the ISC license.
+2 -2
View File
@@ -10,7 +10,7 @@
extern "C" {
#endif
static thread_local uintptr_t co_active_buffer[64];
static thread_local alignas(16) uintptr_t co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (*co_swap)(cothread_t, cothread_t) = 0;
@@ -19,7 +19,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
#else
section(text)
#endif
static const uint32_t co_swap_function[1024] = {
const uint32_t co_swap_function[1024] = {
0x910003f0, /* mov x16,sp */
0xa9007830, /* stp x16,x30,[x1] */
0xa9407810, /* ldp x16,x30,[x0] */
Generated Vendored
+3 -3
View File
@@ -9,7 +9,7 @@
extern "C" {
#endif
static thread_local long long co_active_buffer[64];
static thread_local alignas(16) long long co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
static void (*co_swap)(cothread_t, cothread_t) = 0;
@@ -20,7 +20,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
#endif
#ifdef _WIN32
/* ABI: Win64 */
static const unsigned char co_swap_function[4096] = {
const unsigned char co_swap_function[4096] = {
0x48, 0x89, 0x22, /* mov [rdx],rsp */
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
0x58, /* pop rax */
@@ -87,7 +87,7 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
}
#else
/* ABI: SystemV */
static const unsigned char co_swap_function[4096] = {
const unsigned char co_swap_function[4096] = {
0x48, 0x89, 0x26, /* mov [rsi],rsp */
0x48, 0x8b, 0x27, /* mov rsp,[rdi] */
0x58, /* pop rax */
-4
View File
@@ -1,4 +0,0 @@
test_args
test_serialization
test_timing
*.o
-8
View File
@@ -1,8 +0,0 @@
cc -O3 -fomit-frame-pointer -I../.. -o libco.o -c ../../libco.c
c++ -O3 -fomit-frame-pointer -I../.. -c test_timing.cpp
c++ -O3 -fomit-frame-pointer -o test_timing libco.o test_timing.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_args.cpp
c++ -O3 -fomit-frame-pointer -o test_args libco.o test_args.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_serialization.cpp
c++ -O3 -fomit-frame-pointer -o test_serialization libco.o test_serialization.o
@del *.o
-8
View File
@@ -1,8 +0,0 @@
cc -O3 -fomit-frame-pointer -I../.. -o libco.o -c ../../libco.c
c++ -O3 -fomit-frame-pointer -I../.. -c test_timing.cpp
c++ -O3 -fomit-frame-pointer -o test_timing libco.o test_timing.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_args.cpp
c++ -O3 -fomit-frame-pointer -o test_args libco.o test_args.o
c++ -O3 -fomit-frame-pointer -I../.. -c test_serialization.cpp
c++ -O3 -fomit-frame-pointer -o test_serialization libco.o test_serialization.o
rm -f *.o
-6
View File
@@ -1,6 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <libco.h>
-76
View File
@@ -1,76 +0,0 @@
/*****
* cothread parameterized function example
*****
* entry point to cothreads cannot take arguments.
* this is due to portability issues: each processor,
* operating system, programming language and compiler
* can use different parameter passing methods, so
* arguments to the cothread entry points were omitted.
*
* however, the behavior can easily be simulated by use
* of a specialized co_switch to set global parameters to
* be used as function arguments.
*
* in this way, with a bit of extra red tape, one gains
* even more flexibility than would be possible with a
* fixed argument list entry point, such as void (*)(void*),
* as any number of arguments can be used.
*
* this also eliminates race conditions where a pointer
* passed to co_create may have changed or become invalidated
* before call to co_switch, as said pointer would now be set
* when calling co_switch, instead.
*****/
#include "test.h"
cothread_t thread[3];
namespace co_arg {
int param_x;
int param_y;
};
//one could also call this co_init or somesuch if they preferred ...
void co_switch(cothread_t thread, int param_x, int param_y) {
co_arg::param_x = param_x;
co_arg::param_y = param_y;
co_switch(thread);
}
void co_entrypoint() {
int param_x = co_arg::param_x;
int param_y = co_arg::param_y;
printf("co_entrypoint(%d, %d)\n", param_x, param_y);
co_switch(thread[0]);
//co_arg::param_x will change here (due to co_switch(cothread_t, int, int) call changing values),
//however, param_x and param_y will persist as they are thread local
printf("co_entrypoint(%d, %d)\n", param_x, param_y);
co_switch(thread[0]);
throw;
}
int main() {
printf("cothread parameterized function example\n\n");
thread[0] = co_active();
thread[1] = co_create(65536, co_entrypoint);
thread[2] = co_create(65536, co_entrypoint);
//use specialized co_switch(cothread_t, int, int) for initial co_switch call
co_switch(thread[1], 1, 2);
co_switch(thread[2], 4, 8);
//after first call, entry point arguments have been initialized, standard
//co_switch(cothread_t) can be used from now on
co_switch(thread[2]);
co_switch(thread[1]);
printf("\ndone\n");
#if defined(_MSC_VER) || defined(__DJGPP__)
getch();
#endif
return 0;
}
-117
View File
@@ -1,117 +0,0 @@
#include "test.h"
#include <stdint.h>
#include <sys/mman.h>
namespace Thread {
cothread_t host;
cothread_t cpu;
cothread_t apu;
}
namespace Buffer {
uint8_t cpu[65536];
uint8_t apu[65536];
}
namespace Memory {
uint8_t* buffer;
}
struct CPU {
static auto Enter() -> void;
auto main() -> void;
auto sub() -> void;
auto leaf() -> void;
} cpu;
struct APU {
static auto Enter() -> void;
auto main() -> void;
auto sub() -> void;
auto leaf() -> void;
} apu;
auto CPU::Enter() -> void {
while(true) cpu.main();
}
auto CPU::main() -> void {
printf("2\n");
sub();
}
auto CPU::sub() -> void {
co_switch(Thread::apu);
printf("4\n");
leaf();
}
auto CPU::leaf() -> void {
int x = 42;
co_switch(Thread::host);
printf("6\n");
co_switch(Thread::apu);
printf("8 (%d)\n", x);
co_switch(Thread::host);
}
auto APU::Enter() -> void {
while(true) apu.main();
}
auto APU::main() -> void {
printf("3\n");
sub();
}
auto APU::sub() -> void {
co_switch(Thread::cpu);
printf("7\n");
leaf();
}
auto APU::leaf() -> void {
co_switch(Thread::cpu);
}
auto main() -> int {
if(!co_serializable()) {
printf("This implementation does not support serialization\n");
return 1;
}
Memory::buffer = (uint8_t*)mmap(
(void*)0x10'0000'0000, 2 * 65536,
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0
);
Memory::buffer[0] = 42;
printf("%p (%u)\n", Memory::buffer, Memory::buffer[0]);
Thread::host = co_active();
Thread::cpu = co_derive((void*)(Memory::buffer + 0 * 65536), 65536, CPU::Enter);
Thread::apu = co_derive((void*)(Memory::buffer + 1 * 65536), 65536, APU::Enter);
printf("1\n");
co_switch(Thread::cpu);
printf("5\n");
memcpy(Buffer::cpu, Thread::cpu, 65536);
memcpy(Buffer::apu, Thread::apu, 65536);
co_switch(Thread::cpu);
Thread::cpu = nullptr;
Thread::apu = nullptr;
Thread::cpu = co_derive((void*)(Memory::buffer + 0 * 65536), 65536, CPU::Enter);
Thread::apu = co_derive((void*)(Memory::buffer + 1 * 65536), 65536, APU::Enter);
printf("9\n");
memcpy(Thread::cpu, Buffer::cpu, 65536);
memcpy(Thread::apu, Buffer::apu, 65536);
co_switch(Thread::cpu);
Thread::cpu = nullptr;
Thread::apu = nullptr;
munmap((void*)0x900000000, 2 * 65536);
return 0;
}
-52
View File
@@ -1,52 +0,0 @@
#include "test.h"
enum { Iterations = 500000000 };
namespace thread {
cothread_t x;
cothread_t y;
volatile int counter;
}
void co_timingtest() {
for(;;) {
thread::counter++;
co_switch(thread::x);
}
}
void sub_timingtest() {
thread::counter++;
}
int main() {
printf("context-switching timing test\n\n");
time_t start, end;
int i, t1, t2;
start = clock();
for(thread::counter = 0, i = 0; i < Iterations; i++) {
sub_timingtest();
}
end = clock();
t1 = (int)difftime(end, start);
printf("%2.3f seconds per 50 million subroutine calls (%d iterations)\n", (float)t1 / CLOCKS_PER_SEC, thread::counter);
thread::x = co_active();
thread::y = co_create(65536, co_timingtest);
start = clock();
for(thread::counter = 0, i = 0; i < Iterations; i++) {
co_switch(thread::y);
}
end = clock();
co_delete(thread::y);
t2 = (int)difftime(end, start);
printf("%2.3f seconds per 100 million co_switch calls (%d iterations)\n", (float)t2 / CLOCKS_PER_SEC, thread::counter);
printf("co_switch skew = %fx\n\n", (double)t2 / (double)t1);
return 0;
}
-68
View File
@@ -1,68 +0,0 @@
# Supported targets
In the following lists, supported targets are only those that have been tested
and confirmed working. It is quite possible that libco will work on more
processors, compilers and operating systems than those listed below.
The "Overhead" is the cost of switching co-routines, as compared to an ordinary
C function call.
## libco.x86
* **Overhead:** ~5x
* **Supported processor(s):** 32-bit x86
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Windows
* Mac OS X
* Linux
* BSD
## libco.amd64
* **Overhead:** ~10x (Windows), ~6x (all other platforms)
* **Supported processor(s):** 64-bit amd64
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Windows
* Mac OS X
* Linux
* BSD
## libco.ppc
* **Overhead:** ~20x
* **Supported processor(s):** 32-bit PowerPC, 64-bit PowerPC
* **Supported compiler(s):** GNU GCC
* **Supported operating system(s):**
* Mac OS X
* Linux
* BSD
* Playstation 3
**Note:** this module contains compiler flags to enable/disable FPU and Altivec
support.
## libco.fiber
This uses Windows' "fibers" API.
* **Overhead:** ~15x
* **Supported processor(s):** Processor independent
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Windows
## libco.sjlj
This uses the C standard library's `setjump`/`longjmp` APIs.
* **Overhead:** ~30x
* **Supported processor(s):** Processor independent
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Mac OS X
* Linux
* BSD
* Solaris
## libco.ucontext
This uses the POSIX "ucontext" API.
* **Overhead:** ***~300x***
* **Supported processor(s):** Processor independent
* **Supported compiler(s):** any
* **Supported operating system(s):**
* Linux
* BSD
-150
View File
@@ -1,150 +0,0 @@
# License
libco is released under the ISC license.
# Foreword
libco is a cross-platform, permissively licensed implementation of
cooperative-multithreading; a feature that is sorely lacking from the ISO C/C++
standard.
The library is designed for maximum speed and portability, and not for safety or
features. If safety or extra functionality is desired, a wrapper API can easily
be written to encapsulate all library functions.
Behavior of executing operations that are listed as not permitted below result
in undefined behavior. They may work anyway, they may cause undesired / unknown
behavior, or they may crash the program entirely.
The goal of this library was to simplify the base API as much as possible,
implementing only that which cannot be implemented using pure C. Additional
functionality after this would only complicate ports of this library to new
platforms.
# Porting
This document is included as a reference for porting libco. Please submit any
ports you create to me, so that libco can become more useful. Please note that
since libco is permissively licensed, you must submit your code as a work of the
public domain in order for it to be included in the official distribution.
Full credit will be given in the source code of the official release. Please
do not bother submitting code to me under any other license -- including GPL,
LGPL, BSD or CC -- I am not interested in creating a library with multiple
different licenses depending on which targets are used.
Note that there are a variety of compile-time options in `settings.h`,
so if you want to use libco on a platform where it is not supported by default,
you may be able to configure the implementation appropriately without having
to make a whole new port.
# Synopsis
```c
typedef void* cothread_t;
cothread_t co_active();
cothread_t co_create(unsigned int heapsize, void (*coentry)(void));
void co_delete(cothread_t cothread);
void co_switch(cothread_t cothread);
```
# Usage
## cothread_t
```c
typedef void* cothread_t;
```
Handle to cothread.
Handle must be of type `void*`.
A value of null (0) indicates an uninitialized or invalid handle, whereas a
non-zero value indicates a valid handle. A valid handle is backed by execution
state to which the execution can be co_switch()ed to.
## co_active
```c
cothread_t co_active();
```
Return handle to current cothread.
Note that the handle is valid even if the function is called from a non-cothread
context. To achieve this, we save the execution state in an internal buffer,
instead of using the user-provided memory. Since this handle is valid, it can
be used to co_switch to this context from another cothread. In multi-threaded
applications, make sure to not switch non-cothread context across CPU cores,
to prevent any possible conflicts with the OS scheduler.
## co_derive
```c
cothread_t co_derive(void* memory,
unsigned int heapsize,
void (*coentry)(void));
```
Initializes new cothread.
This function is identical to `co_create`, only it attempts to use the provided
memory instead of allocating new memory on the heap. Please note that certain
implementations (currently only Windows Fibers) cannot be created using existing
memory, and as such, this function will fail.
## co_create
```c
cothread_t co_create(unsigned int heapsize,
void (*coentry)(void));
```
Create new cothread.
`heapsize` is the amount of memory allocated for the cothread stack, specified
in bytes. This is unfortunately impossible to make fully portable. It is
recommended to specify sizes using `n * sizeof(void*)`. It is better to err
on the side of caution and allocate more memory than will be needed to ensure
compatibility with other platforms, within reason. A typical heapsize for a
32-bit architecture is ~1MB.
When the new cothread is first called, program execution jumps to coentry.
This function does not take any arguments, due to portability issues with
passing function arguments. However, arguments can be simulated by the use
of global variables, which can be set before the first call to each cothread.
`coentry()` must not return, and should end with an appropriate `co_switch()`
statement. Behavior is undefined if entry point returns normally.
Library is responsible for allocating cothread stack memory, to free
the user from needing to allocate special memory capable of being used
as program stack memory on platforms where this is required.
User is always responsible for deleting cothreads with `co_delete()`.
Return value of `null` (0) indicates cothread creation failed.
## co_delete
```c
void co_delete(cothread_t cothread);
```
Delete specified cothread.
`null` (0) or invalid cothread handle is not allowed.
Passing handle of active cothread to this function is not allowed.
Passing handle of primary cothread is not allowed.
## co_serializable
```c
int co_serializable(void);
```
Returns non-zero if the implementation keeps the entire coroutine state in the
buffer passed to `co_derive()`. That is, if `co_serializable()` returns
non-zero, and if your cothread does not modify the heap or any process-wide
state, then you can "snapshot" the cothread's state by taking a copy of the
buffer originally passed to `co_derive()`, and "restore" a previous state
by copying the snapshot back into the buffer it came from.
## co_switch
```c
void co_switch(cothread_t cothread);
```
Switch to specified cothread.
`null` (0) or invalid cothread handle is not allowed.
Passing handle of active cothread to this function is not allowed.
Generated Vendored
+4 -8
View File
@@ -1,11 +1,3 @@
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wparentheses"
/* placing code in section(text) does not mark it executable with Clang. */
#undef LIBCO_MPROTECT
#define LIBCO_MPROTECT
#endif
#if defined(__clang__) || defined(__GNUC__)
#if defined(__i386__)
#include "x86.c"
@@ -19,6 +11,8 @@
#include "ppc64v2.c"
#elif defined(_ARCH_PPC) && !defined(__LITTLE_ENDIAN__)
#include "ppc.c"
#elif defined(__riscv)
#include "riscv.c"
#elif defined(_WIN32)
#include "fiber.c"
#else
@@ -29,6 +23,8 @@
#include "x86.c"
#elif defined(_M_AMD64)
#include "amd64.c"
#elif defined(_M_ARM64)
#include "aarch64.c"
#else
#include "fiber.c"
#endif
+2 -1
View File
@@ -40,7 +40,8 @@ void swap_context(struct ppc64_context* read, struct ppc64_context* write);
__asm__(
".text\n"
".align 4\n"
".type swap_context @function\n"
".globl swap_context\n"
".type swap_context, @function\n"
"swap_context:\n"
".cfi_startproc\n"
Generated Vendored
+150
View File
@@ -0,0 +1,150 @@
#define LIBCO_C
#include "libco.h"
#include "settings.h"
#include <stdlib.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
static thread_local uint64_t co_active_buffer[64];
static thread_local cothread_t co_active_handle = 0;
#if __riscv_xlen == 32
#define I_STORE "sw"
#define I_LOAD "lw"
#elif __riscv_xlen == 64
#define I_STORE "sd"
#define I_LOAD "ld"
#else
#error Unsupported RISC-V XLEN
#endif
#if !defined(__riscv_flen)
#define F_STORE "#"
#define F_LOAD "#"
#elif __riscv_flen == 32
#define F_STORE "fsw"
#define F_LOAD "flw"
#elif __riscv_flen == 64
#define F_STORE "fsd"
#define F_LOAD "fld"
#else
#error Unsupported RISC-V FLEN
#endif
__attribute__((naked))
static void co_swap(cothread_t active, cothread_t previous) {
__asm__(
I_STORE " ra, 0 *8(a1)\n"
I_STORE " sp, 1 *8(a1)\n"
I_STORE " s0, 2 *8(a1)\n"
I_STORE " s1, 3 *8(a1)\n"
I_STORE " s2, 4 *8(a1)\n"
I_STORE " s3, 5 *8(a1)\n"
I_STORE " s4, 6 *8(a1)\n"
I_STORE " s5, 7 *8(a1)\n"
I_STORE " s6, 8 *8(a1)\n"
I_STORE " s7, 9 *8(a1)\n"
I_STORE " s8, 10*8(a1)\n"
I_STORE " s9, 11*8(a1)\n"
I_STORE " s10, 12*8(a1)\n"
I_STORE " s11, 13*8(a1)\n"
F_STORE " fs0, 14*8(a1)\n"
F_STORE " fs1, 15*8(a1)\n"
F_STORE " fs2, 16*8(a1)\n"
F_STORE " fs3, 17*8(a1)\n"
F_STORE " fs4, 18*8(a1)\n"
F_STORE " fs5, 19*8(a1)\n"
F_STORE " fs6, 20*8(a1)\n"
F_STORE " fs7, 21*8(a1)\n"
F_STORE " fs8, 22*8(a1)\n"
F_STORE " fs9, 23*8(a1)\n"
F_STORE " fs10, 24*8(a1)\n"
F_STORE " fs11, 25*8(a1)\n"
I_LOAD " ra, 0 *8(a0)\n"
I_LOAD " sp, 1 *8(a0)\n"
I_LOAD " s0, 2 *8(a0)\n"
I_LOAD " s1, 3 *8(a0)\n"
I_LOAD " s2, 4 *8(a0)\n"
I_LOAD " s3, 5 *8(a0)\n"
I_LOAD " s4, 6 *8(a0)\n"
I_LOAD " s5, 7 *8(a0)\n"
I_LOAD " s6, 8 *8(a0)\n"
I_LOAD " s7, 9 *8(a0)\n"
I_LOAD " s8, 10*8(a0)\n"
I_LOAD " s9, 11*8(a0)\n"
I_LOAD " s10, 12*8(a0)\n"
I_LOAD " s11, 13*8(a0)\n"
F_LOAD " fs0, 14*8(a0)\n"
F_LOAD " fs1, 15*8(a0)\n"
F_LOAD " fs2, 16*8(a0)\n"
F_LOAD " fs3, 17*8(a0)\n"
F_LOAD " fs4, 18*8(a0)\n"
F_LOAD " fs5, 19*8(a0)\n"
F_LOAD " fs6, 20*8(a0)\n"
F_LOAD " fs7, 21*8(a0)\n"
F_LOAD " fs8, 22*8(a0)\n"
F_LOAD " fs9, 23*8(a0)\n"
F_LOAD " fs10, 24*8(a0)\n"
F_LOAD " fs11, 25*8(a0)\n"
"ret\n"
);
}
static void co_entrypoint(cothread_t handle) {
uint64_t* buffer = (uint64_t*)handle;
void (*entrypoint)(void) = (void (*)(void))(uintptr_t)buffer[3];
entrypoint();
abort(); /* called only if cothread_t entrypoint returns */
}
cothread_t co_active() {
if(!co_active_handle) co_active_handle = &co_active_buffer;
return co_active_handle;
}
cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void)) {
uint64_t* handle;
if(!co_active_handle) co_active_handle = &co_active_buffer;
if(handle = (uint64_t*)memory) {
unsigned int offset = (size & ~15);
uint64_t* p = (uint64_t*)((uint8_t*)handle + offset);
*(uintptr_t*)&handle[0] = (uintptr_t)co_entrypoint; /* ra (return address) */
*(uintptr_t*)&handle[1] = (uintptr_t)p; /* sp (stack pointer) */
*(uintptr_t*)&handle[2] = (uintptr_t)p; /* s0 (frame pointer) */
*(uintptr_t*)&handle[3] = (uintptr_t)entrypoint; /* s1 (entry point) */
}
return handle;
}
cothread_t co_create(unsigned int size, void (*entrypoint)(void)) {
void* memory = malloc(size);
if(!memory) return (cothread_t)0;
return co_derive(memory, size, entrypoint);
}
void co_delete(cothread_t handle) {
free(handle);
}
void co_switch(cothread_t handle) {
cothread_t co_previous_handle = co_active_handle;
co_swap(co_active_handle = handle, co_previous_handle);
}
int co_serializable() {
return 1;
}
#ifdef __cplusplus
}
#endif
+9 -4
View File
@@ -3,7 +3,7 @@
/*[amd64, arm, ppc, x86]:
by default, co_swap_function is marked as a text (code) section
if not supported, uncomment the below line to use mprotect instead */
#define LIBCO_MPROTECT
/* #define LIBCO_MPROTECT */
/*[amd64]:
Win64 only: provides a substantial speed-up, but will thrash XMM regs
@@ -24,17 +24,22 @@
#endif
#if __STDC_VERSION__ >= 201112L
#define alignas(bytes) _Alignas(bytes)
#include <stdalign.h>
#else
#define alignas(bytes)
#endif
#if defined(_MSC_VER)
#define section(name) __declspec(allocate("." #name))
#pragma section(".text")
#define section(name) __declspec(allocate(".text"))
#elif defined(__APPLE__)
#define section(name) __attribute__((section("__TEXT,__" #name)))
#else
#elif defined(__GNUC__) && !defined(__clang__)
/* the # is treated as comment token by the GNU assembler. */
/* this is a hack to avoid a warning about changed section attributes. */
#define section(name) __attribute__((section("." #name "#")))
#else
#define section(name) __attribute__((section("." #name)))
#endif
/* if defined(LIBCO_C) */
Generated Vendored
+1 -1
View File
@@ -27,7 +27,7 @@ static void (fastcall *co_swap)(cothread_t, cothread_t) = 0;
section(text)
#endif
/* ABI: fastcall */
static const unsigned char co_swap_function[4096] = {
const unsigned char co_swap_function[4096] = {
0x89, 0x22, /* mov [edx],esp */
0x8b, 0x21, /* mov esp,[ecx] */
0x58, /* pop eax */