Skip to content

Commit 5e06fff

Browse files
authored
Move several str methods from C to applevel (#556)
Now we have enough machinery around to move most str methods to _str.spy. I decided to leave `str_eq`/`str_ne` in C mostly for performance reasons: I admit it's just a gut feeling, but I suppose that `==` is the most common operation on strings.
2 parents 5f19217 + 4c9834e commit 5e06fff

12 files changed

Lines changed: 226 additions & 243 deletions

File tree

spy/libspy/include/spy/str.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -95,42 +95,19 @@ spy_unsafe$_alloc_StrObject$impl(int32_t length) {
9595
return spy_unsafe$gc_ptr___str$StrObject_from_addr(spy_str_alloc((size_t)length));
9696
}
9797

98-
spy_StrObject *WASM_EXPORT(spy_str_add)(spy_StrObject *a, spy_StrObject *b);
99-
100-
spy_StrObject *WASM_EXPORT(spy_str_replace)(
101-
spy_StrObject *original,
102-
spy_StrObject *old,
103-
spy_StrObject *new_str
104-
);
105-
106-
spy_StrObject *WASM_EXPORT(spy_str_mul)(spy_StrObject *a, int32_t b);
107-
10898
bool WASM_EXPORT(spy_str_eq)(spy_StrObject *a, spy_StrObject *b);
10999

110100
static inline bool
111101
spy_str_ne(spy_StrObject *a, spy_StrObject *b) {
112102
return !spy_str_eq(a, b);
113103
}
114104

115-
// XXX: should we introduce a separate type Char?
116-
spy_StrObject *WASM_EXPORT(spy_str_getitem)(spy_StrObject *s, int32_t i);
117-
118-
int32_t WASM_EXPORT(spy_str_len)(spy_StrObject *s);
119-
120-
spy_StrObject *WASM_EXPORT(spy_str_repr)(spy_StrObject *s);
121-
122105
int32_t WASM_EXPORT(spy_str_hash)(spy_StrObject *s);
123106

124-
#define spy_operator$str_add spy_str_add
125-
#define spy_operator$str_mul spy_str_mul
126107
#define spy_operator$str_eq spy_str_eq
127108
#define spy_operator$str_ne spy_str_ne
128109
#define spy_operator$str_to_complex128 spy_str_to_complex128
129-
#define spy_builtins$str$replace spy_str_replace
130-
#define spy_builtins$str$__getitem__ spy_str_getitem
131-
#define spy_builtins$str$__len__ spy_str_len
132110
#define spy_builtins$str$__str__ spy_str_identity
133-
#define spy_builtins$str$__repr__ spy_str_repr
134111

135112
static inline spy_StrObject *
136113
spy_str_identity(spy_StrObject *s) {

spy/libspy/src/str.c

Lines changed: 0 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -30,178 +30,13 @@ spy_str_alloc(size_t length) {
3030
return res;
3131
}
3232

33-
spy_StrObject *
34-
spy_str_add(spy_StrObject *a, spy_StrObject *b) {
35-
size_t l = a->length + b->length;
36-
spy_StrObject *res = spy_str_alloc(l);
37-
char *buf = (char *)spy_StrObject_UTF8(res);
38-
memcpy(buf, spy_StrObject_UTF8(a), a->length);
39-
memcpy(buf + a->length, spy_StrObject_UTF8(b), b->length);
40-
return res;
41-
}
42-
43-
spy_StrObject *
44-
spy_str_replace(spy_StrObject *original, spy_StrObject *old, spy_StrObject *new_str) {
45-
size_t orig_len = original->length;
46-
size_t old_len = old->length;
47-
size_t new_len = new_str->length;
48-
49-
if (old_len == 0) {
50-
// when old_len is empty insert new_str before each byte and after the last
51-
size_t result_len = orig_len + (orig_len + 1) * new_len;
52-
spy_StrObject *res = spy_str_alloc(result_len);
53-
char *buf = (char *)spy_StrObject_UTF8(res);
54-
for (size_t i = 0; i < orig_len; i++) {
55-
memcpy(buf, spy_StrObject_UTF8(new_str), new_len);
56-
buf += new_len;
57-
buf[0] = spy_StrObject_UTF8(original)[i];
58-
buf++;
59-
}
60-
memcpy(buf, spy_StrObject_UTF8(new_str), new_len);
61-
return res;
62-
}
63-
64-
// First pass -> count occurrences
65-
size_t count = 0;
66-
const char *p = (const char *)spy_StrObject_UTF8(original);
67-
const char *end = p + orig_len;
68-
while (p <= end - old_len) {
69-
if (memcmp(p, spy_StrObject_UTF8(old), old_len) == 0) {
70-
count++;
71-
p += old_len;
72-
} else {
73-
p++;
74-
}
75-
}
76-
77-
if (count == 0) {
78-
// Return the original string when no occurrences are found
79-
spy_StrObject *res = spy_str_alloc(orig_len);
80-
memcpy((char *)spy_StrObject_UTF8(res), spy_StrObject_UTF8(original), orig_len);
81-
return res;
82-
}
83-
84-
// Second pass -> build the result
85-
size_t result_len = orig_len + count * (new_len - old_len);
86-
spy_StrObject *res = spy_str_alloc(result_len);
87-
char *buf = (char *)spy_StrObject_UTF8(res);
88-
p = (const char *)spy_StrObject_UTF8(original);
89-
while (p <= end - old_len) {
90-
if (memcmp(p, spy_StrObject_UTF8(old), old_len) == 0) {
91-
memcpy(buf, spy_StrObject_UTF8(new_str), new_len);
92-
buf += new_len;
93-
p += old_len;
94-
} else {
95-
*buf++ = *p++;
96-
}
97-
}
98-
// Copy remaining bytes
99-
size_t remaining = end - p;
100-
memcpy(buf, p, remaining);
101-
return res;
102-
}
103-
104-
spy_StrObject *
105-
spy_str_mul(spy_StrObject *a, int32_t b) {
106-
size_t l = a->length * b;
107-
spy_StrObject *res = spy_str_alloc(l);
108-
char *buf = (char *)spy_StrObject_UTF8(res);
109-
for (int i = 0; i < b; i++) {
110-
memcpy(buf, spy_StrObject_UTF8(a), a->length);
111-
buf += a->length;
112-
}
113-
return res;
114-
}
115-
11633
bool
11734
spy_str_eq(spy_StrObject *a, spy_StrObject *b) {
11835
if (a->length != b->length)
11936
return false;
12037
return memcmp(spy_StrObject_UTF8(a), spy_StrObject_UTF8(b), a->length) == 0;
12138
}
12239

123-
spy_StrObject *
124-
spy_str_getitem(spy_StrObject *s, int32_t i) {
125-
// XXX this is wrong: it should return a code point
126-
size_t l = s->length;
127-
if (i < 0) {
128-
i += l;
129-
}
130-
if (i >= l || i < 0) {
131-
spy_panic("IndexError", "string index out of bound", __FILE__, __LINE__);
132-
return NULL;
133-
}
134-
spy_StrObject *res = spy_str_alloc(1);
135-
char *buf = (char *)spy_StrObject_UTF8(res);
136-
buf[0] = spy_StrObject_UTF8(s)[i];
137-
return res;
138-
}
139-
140-
int32_t
141-
spy_str_len(spy_StrObject *s) {
142-
return (int32_t)s->length;
143-
}
144-
145-
spy_StrObject *
146-
spy_str_repr(spy_StrObject *s) {
147-
// Choose quote character: use double quotes if string contains ' but not "
148-
char quote = '\'';
149-
for (size_t i = 0; i < s->length; i++) {
150-
if (spy_StrObject_UTF8(s)[i] == '\'') {
151-
quote = '"';
152-
}
153-
if (spy_StrObject_UTF8(s)[i] == '"') {
154-
quote = '\'';
155-
break;
156-
}
157-
}
158-
159-
// First pass: calculate the output length
160-
size_t out_len = 2; // for the surrounding quotes
161-
for (size_t i = 0; i < s->length; i++) {
162-
unsigned char c = (unsigned char)spy_StrObject_UTF8(s)[i];
163-
if (c == '\\' || c == quote) {
164-
out_len += 2;
165-
} else if (c == '\n' || c == '\r' || c == '\t') {
166-
out_len += 2;
167-
} else if (c < 0x20) {
168-
out_len += 4; // \xNN
169-
} else {
170-
out_len += 1;
171-
}
172-
}
173-
174-
// Second pass: fill the buffer
175-
spy_StrObject *res = spy_str_alloc(out_len);
176-
char *buf = (char *)spy_StrObject_UTF8(res);
177-
*buf++ = quote;
178-
for (size_t i = 0; i < s->length; i++) {
179-
unsigned char c = (unsigned char)spy_StrObject_UTF8(s)[i];
180-
if (c == '\\') {
181-
*buf++ = '\\';
182-
*buf++ = '\\';
183-
} else if (c == quote) {
184-
*buf++ = '\\';
185-
*buf++ = quote;
186-
} else if (c == '\n') {
187-
*buf++ = '\\';
188-
*buf++ = 'n';
189-
} else if (c == '\r') {
190-
*buf++ = '\\';
191-
*buf++ = 'r';
192-
} else if (c == '\t') {
193-
*buf++ = '\\';
194-
*buf++ = 't';
195-
} else if (c < 0x20) {
196-
buf += sprintf(buf, "\\x%02x", c);
197-
} else {
198-
*buf++ = c;
199-
}
200-
}
201-
*buf++ = quote;
202-
return res;
203-
}
204-
20540
int32_t
20641
spy_str_hash(spy_StrObject *s) {
20742
if (s->hash != 0)

spy/tests/compiler/test_str.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,19 @@ def foo() -> str:
2020
""")
2121
assert mod.foo() == "hello àèìòù"
2222

23+
def test_empty_str_as_StrObject(self):
24+
src = """
25+
from unsafe import _str_to_StrObject
26+
from _str import StrObject
27+
28+
def get_length(s: str) -> i32:
29+
data = _str_to_StrObject(s)
30+
utf8 = data.utf8
31+
return data.length
32+
"""
33+
mod = self.compile(src)
34+
assert mod.get_length("") == 0
35+
2336
def test_add(self):
2437
mod = self.compile("""
2538
def foo() -> str:

spy/tests/test_cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ def test_build_and_execute(self, capfd):
294294
def test_execute_pyodide(self):
295295
# pyodide under node cannot access /tmp/, so we cannot try to execute
296296
# files which we wrote to self.tmpdir. Instead, let's try to execute
297-
# examples/hello.spy
298-
hello_spy = spy.ROOT.dirpath().join("examples", "hello.spy")
297+
# examples/1_high_level/hello.spy
298+
hello_spy = spy.ROOT.dirpath().join("examples", "1_high_level", "hello.spy")
299299
assert hello_spy.exists()
300300
res, stdout = self.run_external(PYODIDE_EXE, hello_spy)
301301
assert stdout == "Hello world!\n"

spy/tests/test_doppler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def `test::add[i32]::impl`(x: i32, y: i32) -> i32:
340340
return x + y
341341
342342
def `test::add[str]::impl`(x: str, y: str) -> str:
343-
return `operator::str_add`(x, y)
343+
return `_str::methods::__add__`(x, y)
344344
""")
345345

346346
def test_store_outer_var(self):

spy/tests/test_libspy.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ def test_str(self):
4747
#
4848
ptr_W = ll.call("mk_W")
4949
assert ll.read_str(ptr_W) == (5, 0, b"world")
50-
#
51-
ptr_HW = ll.call("spy_str_add", ptr_H, ptr_W)
52-
assert ll.read_str(ptr_HW) == (11, 0, b"hello world")
5350

5451
def test_debug_log(self):
5552
src = r"""

spy/vm/function.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,12 @@ def is_pure(self) -> bool:
241241

242242
_pure_fqns = {
243243
FQN("builtins::type::__new__"),
244+
FQN("_str::methods::__add__"),
245+
FQN("_str::methods::__mul__"),
246+
FQN("_str::methods::__getitem__"),
247+
FQN("_str::methods::__len__"),
248+
FQN("_str::methods::__repr__"),
249+
FQN("_str::methods::replace"),
244250
}
245251

246252
def compute_inner_ns(self, args_w: Sequence[W_Object]) -> FQN:

spy/vm/modules/operator/binop.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,6 @@
189189
MM.register("!=", num_t, "complex128", OP.w_complex128_ne)
190190

191191
# str ops
192-
MM.register("+", "str", "str", OP.w_str_add)
193-
MM.register("*", "str", "i32", OP.w_str_mul)
194192
MM.register("==", "str", "str", OP.w_str_eq)
195193
MM.register("!=", "str", "str", OP.w_str_ne)
196194

spy/vm/modules/operator/opimpl_str.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,6 @@
1010
from spy.vm.vm import SPyVM
1111

1212

13-
@OP.builtin_func
14-
def w_str_add(vm: "SPyVM", w_a: W_Str, w_b: W_Str) -> W_Str:
15-
assert isinstance(w_a, W_Str)
16-
assert isinstance(w_b, W_Str)
17-
ptr_c = vm.ll.call("spy_str_add", w_a.ptr, w_b.ptr)
18-
return W_Str.from_ptr(vm, ptr_c)
19-
20-
21-
@OP.builtin_func
22-
def w_str_mul(vm: "SPyVM", w_a: W_Str, w_b: W_I32) -> W_Str:
23-
assert isinstance(w_a, W_Str)
24-
assert isinstance(w_b, W_I32)
25-
ptr_c = vm.ll.call("spy_str_mul", w_a.ptr, w_b.value)
26-
return W_Str.from_ptr(vm, ptr_c)
27-
28-
2913
@OP.builtin_func
3014
def w_str_eq(vm: "SPyVM", w_a: W_Str, w_b: W_Str) -> W_Bool:
3115
assert isinstance(w_a, W_Str)

spy/vm/modules/unsafe/ptr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def __init__(
223223
if addr == 0:
224224
assert length == 0
225225
else:
226-
assert length >= 1
226+
assert length >= 0
227227
self.w_T = w_T
228228
self.addr = fixedint.Int32(addr)
229229
self.length = fixedint.Int32(length)

0 commit comments

Comments
 (0)