Nugget
Loading...
Searching...
No Matches
gte-precision.c
Go to the documentation of this file.
1// Precision tests: 44-bit MAC overflow detection, division table
2// spot-checks, RTPS IR3/FLAG.22 sf=0 anomaly.
3// These target the exact behaviors that cause subtle game glitches
4// when emulated imprecisely.
5
6// ==========================================================================
7// 44-bit MAC overflow detection (FLAG bits 25-30)
8// ==========================================================================
9// The GTE accumulator is 44 bits wide. Overflow is detected per-addition
10// in the chain, not on the final result. Two overflows that cancel out
11// will still both be flagged.
12
13// MAC1 positive overflow (FLAG.30): product exceeds +0x7FFFFFFFFFF
14CESTER_TEST(prec_mac1_positive_overflow, gte_tests,
15 // MVMVA with large matrix and large vector, sf=0 (no shift)
16 // R11=0x7FFF, V0.X=0x7FFF -> R11*VX = 0x3FFF0001
17 // With TR=0x7FFFFFFF and sf=0: TRX<<12 + R11*VX + R12*VY + R13*VZ
18 // TRX<<12 = 0x7FFFFFFF000 (43 bits) + 0x3FFF0001 = overflows 44-bit
19 cop2_putc(0, 0x00007fff); // R11=0x7FFF, R12=0
20 cop2_putc(1, 0x00000000);
21 cop2_putc(2, 0x00000000);
22 cop2_putc(3, 0x00000000);
23 cop2_putc(4, 0);
24 cop2_putc(5, 0x7fffffff); // TRX = max positive 32-bit
25 cop2_putc(6, 0);
26 cop2_putc(7, 0);
27 cop2_put(0, (0 << 16) | 0x7fff); // VX=0x7FFF, VY=0
28 cop2_put(1, 0);
31 uint32_t flag = gte_read_flag();
32 uint32_t f30 = (flag >> 30) & 1;
33 ramsyscall_printf("MAC1 pos overflow: FLAG=0x%08x F30=%u\n", flag, f30);
35)
36
37// MAC1 negative overflow (FLAG.27)
38CESTER_TEST(prec_mac1_negative_overflow, gte_tests,
39 cop2_putc(0, 0x00007fff); // R11=0x7FFF
40 cop2_putc(1, 0x00000000);
41 cop2_putc(2, 0x00000000);
42 cop2_putc(3, 0x00000000);
43 cop2_putc(4, 0);
44 cop2_putc(5, 0x80000000); // TRX = min negative 32-bit
45 cop2_putc(6, 0);
46 cop2_putc(7, 0);
47 cop2_put(0, (0 << 16) | 0x8000); // VX=-0x8000 (negative)
48 cop2_put(1, 0);
51 uint32_t flag = gte_read_flag();
52 uint32_t f27 = (flag >> 27) & 1;
53 ramsyscall_printf("MAC1 neg overflow: FLAG=0x%08x F27=%u\n", flag, f27);
55)
56
57// MAC2 overflow (FLAG.29 positive, FLAG.26 negative)
58CESTER_TEST(prec_mac2_overflow, gte_tests,
59 cop2_putc(0, 0x00000000);
60 cop2_putc(1, 0x7fff0000); // R21=0x7FFF (high16 of R13R21), R13=0
61 cop2_putc(2, 0x00000000);
62 cop2_putc(3, 0x00000000);
63 cop2_putc(4, 0);
64 cop2_putc(5, 0);
65 cop2_putc(6, 0x7fffffff); // TRY = max
66 cop2_putc(7, 0);
67 cop2_put(0, (0 << 16) | 0x7fff);
68 cop2_put(1, 0);
71 uint32_t flag = gte_read_flag();
72 uint32_t f29 = (flag >> 29) & 1;
73 ramsyscall_printf("MAC2 pos overflow: FLAG=0x%08x F29=%u\n", flag, f29);
75)
76
77// MAC3 overflow (FLAG.28 positive, FLAG.25 negative)
78CESTER_TEST(prec_mac3_overflow, gte_tests,
79 cop2_putc(0, 0x00000000);
80 cop2_putc(1, 0x00000000);
81 cop2_putc(2, 0x00000000);
82 cop2_putc(3, 0x00007fff); // R31=0x7FFF (high16 of R31R32)
83 cop2_putc(4, 0);
84 cop2_putc(5, 0);
85 cop2_putc(6, 0);
86 cop2_putc(7, 0x7fffffff); // TRZ = max
87 cop2_put(0, (0 << 16) | 0x7fff);
88 cop2_put(1, 0);
91 uint32_t flag = gte_read_flag();
92 uint32_t f28 = (flag >> 28) & 1;
93 ramsyscall_printf("MAC3 pos overflow: FLAG=0x%08x F28=%u\n", flag, f28);
95)
96
97// Two overflows that cancel: both positive and negative overflow
98// should be flagged even if the final result is in range
99CESTER_TEST(prec_mac_double_overflow, gte_tests,
100 // Use OP (cross product) sf=0 with values that cause intermediate
101 // overflow in both directions during the subtract
102 // MAC1 = R22*IR3 - R33*IR2
103 // Make R22*IR3 overflow positive, then R33*IR2 brings it back
104 cop2_putc(0, 0x00000000);
105 cop2_putc(2, 0x00007fff); // R22=0x7FFF
106 cop2_putc(4, 0x7fff); // R33=0x7FFF
107 cop2_put(9, 0);
108 cop2_put(10, 0x7fff); // IR2
109 cop2_put(11, 0x7fff); // IR3
111 cop2_cmd(COP2_OP_CP(0, 0)); // sf=0
112 int32_t mac1;
114 cop2_get(25, mac1);
115 flag = gte_read_flag();
116 ramsyscall_printf("double overflow: MAC1=%d FLAG=0x%08x\n", mac1, flag);
117 // R22*IR3 = 0x7FFF*0x7FFF = 0x3FFF0001 (fits in 44-bit)
118 // Then subtract R33*IR2 = 0x7FFF*0x7FFF = 0x3FFF0001
119 // Result = 0, but check if intermediate overflow flagged
122)
123
124// ==========================================================================
125// Division table spot-checks
126// ==========================================================================
127// The UNR table has 257 entries. Test specific H/SZ3 pairs that exercise
128// known table entries and verify exact quotients.
129
130// Helper: run RTPS with given H and SZ3 (via VZ), return quotient via SX
131// Uses VX=0x1000, OFX=0 so SX = VX * (H/SZ3) = 0x1000 * quotient >> 16
132// Actually simpler: set IR1=0x1000 before RTPS, read MAC0 for DQA path,
133// or just check SX directly.
134
135// H/SZ3 = 1/1: quotient should be near 0x10000 (1.0 in 0.16 fixed)
136CESTER_TEST(prec_div_1_over_1, gte_tests,
139 cop2_putc(24, 0); // OFX=0
140 cop2_putc(25, 0);
141 cop2_putc(26, 1); // H=1
142 cop2_putc(27, 0);
143 cop2_putc(28, 0);
144 cop2_put(0, (0 << 16) | 0x1000); // VX=0x1000, VY=0
145 cop2_put(1, 1); // VZ=1 -> SZ3=1
150 flag = gte_read_flag();
151 int16_t sx = (int16_t)(sxy2 & 0xffff);
152 ramsyscall_printf("div 1/1: SX=%d FLAG=0x%08x\n", sx, flag);
153 // H=1, SZ3=1 -> H >= SZ3*2? 1 >= 2? No -> no overflow
154 // quotient = H*0x20000/SZ3 = 0x20000. Saturated to 0x1FFFF.
155 // SX = IR1 * quotient >> 16 = 0x1000 * 0x1FFFF >> 16 = 0x1FFF
156 // Then saturated to 0x3FF
157 uint32_t f17 = (flag >> 17) & 1;
158 cester_assert_uint_eq(0, f17); // no division overflow
159)
160
161// H/SZ3 = 100/1000: quotient = 0.1 in fixed point
162CESTER_TEST(prec_div_100_over_1000, gte_tests,
164 gte_set_translation(0, 0, 0);
165 cop2_putc(24, 0);
166 cop2_putc(25, 0);
167 cop2_putc(26, 100); // H=100
168 cop2_putc(27, 0);
169 cop2_putc(28, 0);
170 cop2_put(0, (0 << 16) | 1000); // VX=1000
171 cop2_put(1, 1000); // VZ=1000
173 cop2_cmd(COP2_RTPS(1, 0));
175 cop2_get(14, sxy2);
176 int16_t sx = (int16_t)(sxy2 & 0xffff);
177 ramsyscall_printf("div 100/1000: SX=%d\n", sx);
178 // SX = 1000 * (100/1000) = 100 (roughly, depends on table rounding)
180)
181
182// The documented corner case: H=0xF015, SZ3=0x780B -> 0x20000 saturates to 0x1FFFF
183CESTER_TEST(prec_div_corner_f015_780b, gte_tests,
185 gte_set_translation(0, 0, 0);
186 cop2_putc(24, 0);
187 cop2_putc(25, 0);
188 cop2_putc(26, 0xf015); // H
189 cop2_putc(27, 0);
190 cop2_putc(28, 0);
191 cop2_put(0, (0 << 16) | 1); // VX=1 (minimal to see quotient effect)
192 cop2_put(1, 0x780b); // VZ = 0x780B
194 cop2_cmd(COP2_RTPS(1, 0));
196 cop2_get(14, sxy2);
197 flag = gte_read_flag();
198 int16_t sx = (int16_t)(sxy2 & 0xffff);
199 ramsyscall_printf("div F015/780B: SX=%d FLAG=0x%08x\n", sx, flag);
200 // This should NOT set FLAG.17 (division overflow)
201 uint32_t f17 = (flag >> 17) & 1;
203)
204
205// Large H, small SZ3 (just under overflow): H=0xFFFE, SZ3=0x8000
206CESTER_TEST(prec_div_large_h, gte_tests,
208 gte_set_translation(0, 0, 0);
209 cop2_putc(24, 0);
210 cop2_putc(25, 0);
211 cop2_putc(26, 0xfffe); // H near max
212 cop2_putc(27, 0);
213 cop2_putc(28, 0);
214 cop2_put(0, (0 << 16) | 1);
215 cop2_put(1, 0x7fff); // SZ3=0x7FFF -> H >= SZ3*2? 0xFFFE >= 0xFFFE -> yes, overflow
217 cop2_cmd(COP2_RTPS(1, 0));
219 flag = gte_read_flag();
220 uint32_t f17 = (flag >> 17) & 1;
221 ramsyscall_printf("div large H: FLAG=0x%08x F17=%u\n", flag, f17);
222 cester_assert_uint_eq(1, f17); // H >= SZ3*2 is true (equal counts)
223)
224
225// SZ3=1 with moderate H (quotient near max)
226CESTER_TEST(prec_div_sz3_one, gte_tests,
228 gte_set_translation(0, 0, 0);
229 cop2_putc(24, 0);
230 cop2_putc(25, 0);
231 cop2_putc(26, 1); // H=1
232 cop2_putc(27, 0);
233 cop2_putc(28, 0);
234 cop2_put(0, (0 << 16) | 1);
235 cop2_put(1, 1); // SZ3=1
237 cop2_cmd(COP2_RTPS(1, 0));
239 int32_t ir1;
240 cop2_get(14, sxy2);
242 flag = gte_read_flag();
243 int16_t sx = (int16_t)(sxy2 & 0xffff);
244 ramsyscall_printf("div SZ3=1: SX=%d IR1=%d FLAG=0x%08x\n", sx, ir1, flag);
245 // H/SZ3 = 1/1 -> quotient saturates to 0x1FFFF
246 // SX = IR1 * 0x1FFFF >> 16 = 1 * 0x1FFFF >> 16 = 1
248)
249
250// ==========================================================================
251// RTPS IR3/FLAG.22 anomaly with sf=0
252// ==========================================================================
253// psx-spx: "When using RTP with sf=0, the IR3 saturation flag (FLAG.22)
254// gets set only if MAC3 SAR 12 exceeds -8000h..+7FFFh, although IR3 is
255// saturated when MAC3 exceeds -8000h..+7FFFh."
256//
257// Need MAC3 that is out of [-0x8000, 0x7FFF] range (so IR3 saturates)
258// but MAC3 >> 12 is in range (so FLAG.22 should NOT be set).
259
260CESTER_TEST(prec_rtps_sf0_ir3_flag_anomaly, gte_tests,
262 // TRZ such that MAC3 is just over 0x7FFF but MAC3>>12 is in range
263 // With identity rotation and VZ=0: MAC3 = TRZ << 12 (sf=0, no shift)
264 // Wait - with sf=0 the formula is: MAC3 = TRZ*0x1000 + R3x*V
265 // Actually let's think more carefully.
266 // sf=0: A3 returns the raw 44-bit value without >>12
267 // MAC3 = TRZ<<12 + R31*VX + R32*VY + R33*VZ (no shift applied)
268 // With identity: MAC3 = TRZ<<12 + VZ*0x1000
269 // We want MAC3 > 0x7FFF (IR3 saturates) but MAC3>>12 in [-0x8000,0x7FFF]
270 // MAC3 = 0x8000 -> MAC3>>12 = 0 (in range) -> FLAG.22 NOT set but IR3 saturated
271 cop2_putc(5, 0);
272 cop2_putc(6, 0);
273 cop2_putc(7, 0); // TRZ = 0
274 cop2_putc(24, 0);
275 cop2_putc(25, 0);
276 cop2_putc(26, 200);
277 cop2_putc(27, 0);
278 cop2_putc(28, 0);
279 // VZ = 8 -> MAC3 = 0 + 0x1000*8 = 0x8000 (just over 0x7FFF)
280 cop2_put(0, 0x00000000);
281 cop2_put(1, 8);
283 cop2_cmd(COP2_RTPS(0, 0)); // sf=0
284 int32_t mac3;
286 cop2_get(27, mac3);
287 cop2_get(11, ir3);
288 flag = gte_read_flag();
289 uint32_t f22 = (flag >> 22) & 1;
290 ramsyscall_printf("sf=0 anomaly: MAC3=%d IR3=0x%04x FLAG=0x%08x F22=%u\n",
291 mac3, ir3 & 0xffff, flag, f22);
292 // MAC3 = 0x8000 -> out of [-0x8000, 0x7FFF] for IR3 (it equals -0x8000 boundary!)
293 // Hmm, 0x8000 = 32768 which is > 0x7FFF. IR3 should saturate to 0x7FFF.
294 // MAC3 >> 12 = 0x8000 >> 12 = 0 -> in range -> FLAG.22 should NOT be set.
295 // This is the anomaly: IR3 saturated but FLAG.22 not set.
297 cester_assert_uint_eq(0x7fff, ir3);
299 uint32_t f17 = (flag >> 17) & 1;
301)
302
303// Stronger test: MAC3 = 0x10000 -> well above 0x7FFF, but >>12 = 1 (in range)
304CESTER_TEST(prec_rtps_sf0_ir3_flag_strong, gte_tests,
306 cop2_putc(5, 0);
307 cop2_putc(6, 0);
308 cop2_putc(7, 0);
309 cop2_putc(24, 0);
310 cop2_putc(25, 0);
311 cop2_putc(26, 200);
312 cop2_putc(27, 0);
313 cop2_putc(28, 0);
314 // VZ = 16 -> MAC3 = 0x1000 * 16 = 0x10000 (65536, way above 0x7FFF)
315 cop2_put(0, 0x00000000);
316 cop2_put(1, 16);
319 int32_t mac3;
323 flag = gte_read_flag();
324 uint32_t f22 = (flag >> 22) & 1;
325 ramsyscall_printf("sf=0 strong: MAC3=%d IR3=0x%04x FLAG=0x%08x F22=%u\n",
326 mac3, ir3 & 0xffff, flag, f22);
327 // MAC3 = 0x10000 -> IR3 saturated to 0x7FFF
329 // MAC3 >> 12 = 0x10000 >> 12 = 16 -> in range -> FLAG.22 NOT set
331)
332
333// Counter-test: MAC3 >> 12 exceeds range -> FLAG.22 SHOULD be set
334CESTER_TEST(prec_rtps_sf0_ir3_flag_set, gte_tests,
336 cop2_putc(5, 0);
337 cop2_putc(6, 0);
338 cop2_putc(7, 8); // TRZ = 8, so MAC3 = 8<<12 + VZ*0x1000
339 cop2_putc(24, 0);
340 cop2_putc(25, 0);
341 cop2_putc(26, 200);
342 cop2_putc(27, 0);
343 cop2_putc(28, 0);
344 // VZ = 0x7FF0 -> MAC3 = 8*4096 + 0x7FF0*0x1000 = 0x8000 + 0x7FF0000 = 0x7FF8000
345 // MAC3 >> 12 = 0x7FF8 -> in range? 0x7FF8 < 0x7FFF -> yes, still in range
346 // Need TRZ large enough: TRZ = 0x7FFF -> MAC3 = 0x7FFF<<12 = 0x7FFF000
347 // MAC3>>12 = 0x7FFF -> at boundary. With VZ=1: MAC3 = 0x7FFF000 + 0x1000 = 0x8000000
348 // MAC3>>12 = 0x8000 -> OUT of range -> FLAG.22 should be set
349 cop2_putc(7, 0x7fff);
350 cop2_put(0, 0x00000000);
351 cop2_put(1, 1);
353 cop2_cmd(COP2_RTPS(0, 0));
354 int32_t mac3;
356 cop2_get(27, mac3);
357 cop2_get(11, ir3);
358 flag = gte_read_flag();
359 uint32_t f22 = (flag >> 22) & 1;
360 ramsyscall_printf("sf=0 flag set: MAC3=%d IR3=0x%04x FLAG=0x%08x F22=%u\n",
361 mac3, ir3 & 0xffff, flag, f22);
362 // MAC3>>12 = 0x8000 -> exceeds 0x7FFF -> FLAG.22 SHOULD be set
364)
#define COP2_OP_CP(sf, lm)
Definition cop2.h:136
#define cop2_cmd(op)
Definition cop2.h:175
#define COP2_V_V0
Definition cop2.h:70
#define cop2_put(reg, val)
Definition cop2.h:182
#define COP2_MX_RT
Definition cop2.h:64
#define cop2_putc(reg, val)
Definition cop2.h:196
#define COP2_RTPS(sf, lm)
Definition cop2.h:129
#define COP2_MVMVA(sf, mx, v, cv, lm)
Definition cop2.h:145
#define cop2_get(reg, dest)
Definition cop2.h:189
#define COP2_CV_TR
Definition cop2.h:76
int32_t mac1
Definition gte-depthcue.c:116
CESTER_TEST(prec_mac1_positive_overflow, gte_tests, cop2_putc(0, 0x00007fff);cop2_putc(1, 0x00000000);cop2_putc(2, 0x00000000);cop2_putc(3, 0x00000000);cop2_putc(4, 0);cop2_putc(5, 0x7fffffff);cop2_putc(6, 0);cop2_putc(7, 0);cop2_put(0,(0<< 16)|0x7fff);cop2_put(1, 0);gte_clear_flag();cop2_cmd(COP2_MVMVA(0, COP2_MX_RT, COP2_V_V0, COP2_CV_TR, 0));uint32_t flag=gte_read_flag();uint32_t f30=(flag > > 30) &1;ramsyscall_printf("MAC1 pos overflow: FLAG=0x%08x F30=%u\n", flag, f30);cester_assert_uint_eq(1, f30);) CESTER_TEST(prec_mac1_negative_overflow
cester_assert_int_eq(1, sx)
gte_set_identity_rotation()
uint32_t sxy2
Definition gte-precision.c:148
uint32_t f17
Definition gte-precision.c:157
ramsyscall_printf("MAC1 neg overflow: FLAG=0x%08x F27=%u\n", flag, f27)
int32_t ir1
Definition gte-precision.c:239
uint32_t f27
Definition gte-precision.c:52
gte_tests
Definition gte-precision.c:38
uint32_t flag
Definition gte-precision.c:51
int16_t sx
Definition gte-precision.c:151
cester_assert_uint_eq(1, f27)
gte_set_translation(0, 0, 0)
gte_clear_flag()
uint32_t f28
Definition gte-precision.c:92
uint32_t ir3
Definition gte-precision.c:320
int32_t mac3
Definition gte-precision.c:319
uint32_t f22
Definition gte-precision.c:324
void uint32_t(classId, spec)