v / thirdparty / builtins / compiler_builtins.c
165 lines · 155 sloc · 5.11 KB · eeaaff218bbe695c655a6380963e2f82deadcb94
Raw
1// Compiler runtime builtins needed for cross-compilation from macOS to Linux.
2// When cross-compiling with clang, 128-bit integer operations (e.g. in mbedtls bignum.c)
3// generate calls to __udivti3/__umodti3 which are normally provided by libgcc or compiler-rt.
4// Since the linuxroot sysroot doesn't include these libraries, we provide minimal
5// implementations here.
6
7#ifdef __SIZEOF_INT128__
8
9typedef unsigned __int128 tu_int;
10typedef unsigned long long du_int;
11
12typedef union {
13 tu_int all;
14 struct {
15#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
16 du_int low;
17 du_int high;
18#else
19 du_int high;
20 du_int low;
21#endif
22 } s;
23} utwords;
24
25static inline unsigned __clzdi2(du_int a) {
26 unsigned n = 0;
27 if (a == 0) return 64;
28 if ((a & 0xFFFFFFFF00000000ULL) == 0) { n += 32; a <<= 32; }
29 if ((a & 0xFFFF000000000000ULL) == 0) { n += 16; a <<= 16; }
30 if ((a & 0xFF00000000000000ULL) == 0) { n += 8; a <<= 8; }
31 if ((a & 0xF000000000000000ULL) == 0) { n += 4; a <<= 4; }
32 if ((a & 0xC000000000000000ULL) == 0) { n += 2; a <<= 2; }
33 if ((a & 0x8000000000000000ULL) == 0) { n += 1; }
34 return n;
35}
36
37tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
38 const unsigned n_udword_bits = sizeof(du_int) * 8;
39 const unsigned n_utword_bits = sizeof(tu_int) * 8;
40 utwords n, d, q, r;
41 unsigned sr;
42 n.all = a;
43 d.all = b;
44
45 // Special cases
46 if (n.s.high == 0) {
47 if (d.s.high == 0) {
48 if (rem) *rem = n.s.low % d.s.low;
49 return n.s.low / d.s.low;
50 }
51 if (rem) *rem = n.s.low;
52 return 0;
53 }
54 // n.s.high != 0
55 if (d.s.low == 0) {
56 if (d.s.high == 0) {
57 // Division by zero (undefined behavior, just return 0)
58 if (rem) *rem = 0;
59 return 0;
60 }
61 if (n.s.low == 0) {
62 if (rem) {
63 r.s.high = n.s.high % d.s.high;
64 r.s.low = 0;
65 *rem = r.all;
66 }
67 return n.s.high / d.s.high;
68 }
69 // n.s.high != 0 && n.s.low != 0 && d.s.low == 0 && d.s.high != 0
70 if ((d.s.high & (d.s.high - 1)) == 0) {
71 // d is a power of 2
72 if (rem) {
73 r.s.low = n.s.low;
74 r.s.high = n.s.high & (d.s.high - 1);
75 *rem = r.all;
76 }
77 return n.s.high >> __clzdi2(d.s.high);
78 }
79 sr = __clzdi2(d.s.high) - __clzdi2(n.s.high);
80 if (sr > n_udword_bits - 2) {
81 if (rem) *rem = n.all;
82 return 0;
83 }
84 ++sr;
85 // 1 <= sr <= n_udword_bits - 1
86 q.s.low = 0;
87 q.s.high = n.s.low << (n_udword_bits - sr);
88 r.s.high = n.s.high >> sr;
89 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
90 goto shift_loop;
91 }
92 // d.s.low != 0
93 if (d.s.high == 0) {
94 if ((d.s.low & (d.s.low - 1)) == 0) {
95 if (rem) *rem = n.s.low & (d.s.low - 1);
96 if (d.s.low == 1) return n.all;
97 sr = __builtin_ctzll(d.s.low);
98 q.s.high = n.s.high >> sr;
99 q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
100 return q.all;
101 }
102 sr = 1 + n_udword_bits + __clzdi2(d.s.low) - __clzdi2(n.s.high);
103 if (sr == n_udword_bits) {
104 q.s.low = 0;
105 q.s.high = 0;
106 r.s.high = 0;
107 r.s.low = n.s.high;
108 } else if (sr < n_udword_bits) {
109 q.s.low = 0;
110 q.s.high = n.s.low << (n_udword_bits - sr);
111 r.s.high = n.s.high >> sr;
112 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
113 } else {
114 q.s.low = n.s.low << (n_utword_bits - sr);
115 q.s.high = (n.s.high << (n_utword_bits - sr)) | (n.s.low >> (sr - n_udword_bits));
116 r.s.high = 0;
117 r.s.low = n.s.high >> (sr - n_udword_bits);
118 }
119 goto shift_loop;
120 }
121 // d.s.high != 0 && d.s.low != 0
122 sr = __clzdi2(d.s.high) - __clzdi2(n.s.high);
123 if (sr > n_udword_bits - 1) {
124 if (rem) *rem = n.all;
125 return 0;
126 }
127 ++sr;
128 q.s.low = 0;
129 if (sr == n_udword_bits) {
130 q.s.high = 0;
131 r.s.high = 0;
132 r.s.low = n.s.high;
133 } else {
134 r.s.high = n.s.high >> sr;
135 r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
136 q.s.high = n.s.low << (n_udword_bits - sr);
137 }
138
139shift_loop:
140 // The main shift-and-subtract loop
141 for (; sr > 0; --sr) {
142 r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1));
143 r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1));
144 q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1));
145 q.s.low = (q.s.low << 1);
146 // Compute (r - d) and conditionally subtract
147 const long long s = (long long)(d.all - r.all - 1) >> (n_utword_bits - 1);
148 q.s.low |= s & 1;
149 r.all -= d.all & s;
150 }
151 if (rem) *rem = r.all;
152 return q.all;
153}
154
155tu_int __udivti3(tu_int a, tu_int b) {
156 return __udivmodti4(a, b, 0);
157}
158
159tu_int __umodti3(tu_int a, tu_int b) {
160 tu_int r;
161 __udivmodti4(a, b, &r);
162 return r;
163}
164
165#endif // __SIZEOF_INT128__
166