Nugget
Loading...
Searching...
No Matches
probe-common.h
Go to the documentation of this file.
1/*
2
3MIT License
4
5Copyright (c) 2026 PCSX-Redux authors
6
7Permission is hereby granted, free of charge, to any person obtaining a copy
8of this software and associated documentation files (the "Software"), to deal
9in the Software without restriction, including without limitation the rights
10to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11copies of the Software, and to permit persons to whom the Software is
12furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice shall be included in all
15copies or substantial portions of the Software.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23SOFTWARE.
24
25*/
26
27#pragma once
28
29// Shared bare-metal helpers for the 573 VRAM test binaries.
30//
31// Each test binary is a standalone PS-EXE that initializes the GPU itself
32// (no PSYQo runtime, no snitch harness), runs one topic's worth of probes,
33// prints RESULT lines over Unirom's TTY, and idles. We deliberately stay
34// raw because the whole point of the suite is to characterize what the
35// silicon does with edge-case GP0/GP1 inputs without any library helpfully
36// clamping them on the way through.
37
38#include <stdint.h>
39
40#include "common/hardware/dma.h"
41#include "common/hardware/gpu.h"
43#include "common/hardware/irq.h"
45
46// Bring the GPU into a known polled-FIFO state. Modeled on the reset()
47// in src/mips/tests/gpu/gpu.c so subsequent transfers do not hang on
48// bits that only advance under DMA.
49static inline void probeReset(void) {
50 IMASK = 0;
51 IREG = 0;
52 for (unsigned i = 0; i < 7; i++) {
53 DMA_CTRL[i].CHCR = 0;
54 DMA_CTRL[i].BCR = 0;
55 DMA_CTRL[i].MADR = 0;
56 }
57 DPCR = 0x800;
59 DICR = dicr;
60 DICR = 0;
61
62 // GP1(0x00): full GPU reset.
63 GPU_STATUS = 0x00000000;
64
65 // Restore a sane retail-shaped display mode. None of these settings are
66 // load-bearing for the probe itself - we just want the GPU out of any
67 // weird mode Unirom may have left it in.
68 struct DisplayModeConfig config = {
70 .vResolution = VR_240,
71 .videoMode = VM_NTSC,
72 .colorDepth = CD_15BITS,
73 .videoInterlace = VI_OFF,
74 .hResolutionExtended = HRE_NORMAL,
75 };
76 setDisplayMode(&config);
77 setHorizontalRange(0, 0xa00);
78 setVerticalRange(16, 255);
79 setDisplayArea(0, 0);
80 setDrawingArea(0, 0, 320, 240);
81
82 // GP1(0x04, 1): DMA direction = FIFO. This is what unblocks the
83 // status bits that gate VRAM transfers when we are writing GPU_DATA
84 // from the CPU. Without this the GP0(0xA0)/GP0(0xC0) handshake hangs.
85 sendGPUStatus(0x04000001);
86}
87
88// GP1(0x09) - on retail this is "Texture Disable"; on arcade boards with a
89// second VRAM bank it is repurposed as the upper-bank gate. The polarity is
90// what we are characterizing, so the helper deliberately does not name the
91// bit "enable" or "disable" - caller passes the literal value.
92static inline void gp1_09(uint32_t value) { sendGPUStatus(0x09000000 | (value & 0xff)); }
93
94// GP1(0x01): clear the GPU command FIFO. Note this only flushes the FIFO
95// behind GP0; it does NOT reset the GPU's internal register state. For
96// inter-iteration cleanup that returns the GPU to a fully known state,
97// prefer gpuFullResetWithGate() below, which does a real GP1(0x00).
98static inline void resetCommandBuffer(void) { sendGPUStatus(0x01000000); }
99
100// Full GPU reset between test iterations. GP1 (port 1) is unbuffered so
101// GP1(0x00) reaches the GPU immediately and clears every internal register
102// (texpage, drawing area, mode bits, etc) without touching VRAM contents.
103// We then restore just the two settings every test relies on:
104// - GP1(0x04, 1) FIFO mode so polled CPU transfers advance bit 25
105// - GP1(0x09) bank gate at the requested polarity
106// This is faster than full probeReset() and avoids re-touching display
107// timing registers that don't matter for the test outcome.
108static inline void gpuFullResetWithGate(uint32_t gate_value) {
109 sendGPUStatus(0x00000000); // GP1(0x00): full reset
110 sendGPUStatus(0x04000001); // GP1(0x04): FIFO mode
111 sendGPUStatus(0x09000000 | (gate_value & 0xff));
112}
113
114// Per psx-spx GPU section "Masking for COPY Commands parameters":
115// Xsiz_eff = ((Xsiz - 1) AND 3FFh) + 1 ;range 1..1024
116// Ysiz_eff = ((Ysiz - 1) AND 1FFh) + 1 ;range 1..512
117// These give the actual number of pixels/rows the GPU will transfer for
118// GP0(0xA0) CPU->VRAM, GP0(0xC0) VRAM->CPU, and GP0(0x80) VRAM->VRAM.
119// Sending more than this in the data phase overflows into the command
120// stream and crashes the GPU. Sending fewer stalls the GPU forever
121// waiting for the rest. Use these helpers to send exactly the right
122// number of words.
123static inline int copyWidthEff(int w) { return (((w - 1) & 0x3ff) + 1); }
124static inline int copyHeightEff(int h) { return (((h - 1) & 0x1ff) + 1); }
125
126// Per psx-spx, fast-fill (GP0 0x02) has different masking:
127// Ysiz_eff = Ysiz AND 1FFh ;range 0..1FFh, 0 = NO FILL
128// So multiples of 512 are silently rejected (Ysiz=0 effective).
129static inline int fastFillHeightEff(int h) {
130 int eff = h & 0x1ff;
131 return eff; // 0 means "no fill at all"
132}
133
134// Multi-word GPU commands need exactly one waitGPU() at the start; bit 26
135// goes LOW after the first word and only returns high after the entire
136// transfer completes, so polling between sub-words hangs forever. This
137// matches the pattern in src/mips/tests/gpu/gpu.c::sendOnePolygon.
138//
139// All coordinate fields are passed through unmodified - we do NOT mask to
140// 16/9 bits because the masking behavior is part of what we are observing.
141
142// Write a single 16-bit pixel at (x, y). GP0(0xA0) consumes one full 32-bit
143// word per (1, 1) transfer; we set width=2 so the second pixel is filled
144// with a recognizable padding value, then do not care about it.
145static inline void writePixel(int16_t x, int16_t y, uint16_t value) {
146 waitGPU();
147 GPU_DATA = 0xa0000000;
148 GPU_DATA = ((uint32_t)(uint16_t)y << 16) | (uint32_t)(uint16_t)x;
149 GPU_DATA = ((uint32_t)(uint16_t)1 << 16) | (uint32_t)(uint16_t)2;
150 GPU_DATA = (uint32_t)value | ((uint32_t)0xdead << 16);
151}
152
153// Read a single 16-bit pixel at (x, y). After the GP0(0xC0) header words
154// we wait for status bit 27 ("Ready to send VRAM to CPU") before reading -
155// bit 26 is "ready to take a command word" which goes high BEFORE any
156// data is actually in the readback FIFO.
157static inline uint16_t readPixel(int16_t x, int16_t y) {
158 waitGPU();
159 GPU_DATA = 0xc0000000;
160 GPU_DATA = ((uint32_t)(uint16_t)y << 16) | (uint32_t)(uint16_t)x;
161 GPU_DATA = ((uint32_t)(uint16_t)1 << 16) | (uint32_t)(uint16_t)2;
162 while ((GPU_STATUS & 0x08000000) == 0) {
163 }
164 uint32_t word = GPU_DATA;
165 return (uint16_t)(word & 0xffff);
166}
167
168// Read N pixels from a horizontal strip starting at (x, y). |w| must be
169// even (so the readback word count is an integer). |dst| receives the
170// raw 16-bit words.
171static inline void readStrip(int16_t x, int16_t y, int16_t w, uint16_t* dst) {
172 waitGPU();
173 GPU_DATA = 0xc0000000;
174 GPU_DATA = ((uint32_t)(uint16_t)y << 16) | (uint32_t)(uint16_t)x;
175 GPU_DATA = ((uint32_t)(uint16_t)1 << 16) | (uint32_t)(uint16_t)w;
176 int words = (w + 1) >> 1;
177 for (int i = 0; i < words; i++) {
178 while ((GPU_STATUS & 0x08000000) == 0) {
179 }
180 uint32_t word = GPU_DATA;
181 dst[i * 2] = (uint16_t)(word & 0xffff);
182 if (i * 2 + 1 < w) dst[i * 2 + 1] = (uint16_t)(word >> 16);
183 }
184}
185
186// Pace large streamed payloads so the CPU does not outrun the GPU's
187// command-FIFO drain rate. Bit 25 ("DMA / Data Request") under DMA
188// direction = 1 (FIFO mode) means "FIFO has room". Without pacing,
189// transfers of more than a few hundred words deadlock or drop writes.
190static inline void streamPace(int idx) {
191 if ((idx & 7) == 0) {
192 while ((GPU_STATUS & 0x02000000) == 0) {
193 }
194 }
195}
196
197// Fill a rectangular region with a single 16-bit value via GP0(0xA0). Slow
198// but works at any (Y, H) including across Y=512 - we deliberately do NOT
199// use GP0(0x02) fast-fill here because that command's behavior is itself
200// what other tests are characterizing, and we need a known-good fill in
201// our test setup. Width must be even.
202static inline void fillRectViaUpload(int16_t x, int16_t y, int16_t w, int16_t h,
203 uint16_t value) {
204 waitGPU();
205 GPU_DATA = 0xa0000000;
206 GPU_DATA = ((uint32_t)(uint16_t)y << 16) | (uint32_t)(uint16_t)x;
207 GPU_DATA = ((uint32_t)(uint16_t)h << 16) | (uint32_t)(uint16_t)w;
208 uint32_t doubled = (uint32_t)value | ((uint32_t)value << 16);
209 int words = ((int)w * (int)h) >> 1;
210 for (int i = 0; i < words; i++) {
211 streamPace(i);
212 GPU_DATA = doubled;
213 }
214}
215
216// Fill the full Y=0..1023 range of a vertical column. GP0(0xA0) silently
217// caps height at 511 rows on at least some configurations (matches the
218// limit spicyjpeg observed for GP0(0x02) fast-fill); doing it as two
219// half-bank passes is the safe pattern.
220static inline void fillColumn(int16_t x, int16_t w, uint16_t value) {
221 fillRectViaUpload(x, 0, w, 256, value);
222 fillRectViaUpload(x, 256, w, 256, value);
223 fillRectViaUpload(x, 512, w, 256, value);
224 fillRectViaUpload(x, 768, w, 256, value);
225}
226
227// Compute a cheap 32-bit hash of a row of |n| 16-bit pixels. Useful for
228// quick "is this row what we expected" checks without dumping every byte.
229static inline uint32_t hashRow(const uint16_t* row, int n) {
230 uint32_t h = 0x811c9dc5u; // FNV-1a init
231 for (int i = 0; i < n; i++) {
232 h ^= row[i];
233 h *= 0x01000193u;
234 }
235 return h;
236}
237
238// Lightweight pass/fail accounting. Each test binary calls reportResult()
239// per observation, then reportSummary() at the end. We do not abort on
240// failure - characterization tests should record every observation, not
241// stop at the first surprise.
242typedef struct {
245 int info;
246} ProbeStats;
247
248static inline void probeStatsInit(ProbeStats* s) {
249 s->passed = 0;
250 s->failed = 0;
251 s->info = 0;
252}
253
254#define PROBE_PASS(stats, fmt, ...) \
255 do { \
256 (stats)->passed++; \
257 ramsyscall_printf("PASS " fmt "\n", ##__VA_ARGS__); \
258 } while (0)
259
260#define PROBE_FAIL(stats, fmt, ...) \
261 do { \
262 (stats)->failed++; \
263 ramsyscall_printf("FAIL " fmt "\n", ##__VA_ARGS__); \
264 } while (0)
265
266#define PROBE_INFO(stats, fmt, ...) \
267 do { \
268 (stats)->info++; \
269 ramsyscall_printf("INFO " fmt "\n", ##__VA_ARGS__); \
270 } while (0)
271
272#define PROBE_RESULT(fmt, ...) \
273 do { \
274 ramsyscall_printf("RESULT " fmt "\n", ##__VA_ARGS__); \
275 } while (0)
276
277static inline void probeStatsSummary(const ProbeStats* s, const char* name) {
278 ramsyscall_printf("SUMMARY name=%s passed=%d failed=%d info=%d\n", name, s->passed,
279 s->failed, s->info);
280 // Marker line for log-capture tools (psxup.py looks for this exact string
281 // to terminate its read loop).
282 ramsyscall_printf("=== Done ===\n");
283}
uint32_t dicr
Definition cester-cop0.c:98
@ VM_NTSC
Definition gpu.h:47
@ HRE_NORMAL
Definition gpu.h:62
@ VR_240
Definition gpu.h:42
@ HR_320
Definition gpu.h:36
@ VI_OFF
Definition gpu.h:57
@ CD_15BITS
Definition gpu.h:52
int n
Definition dcache.c:225
ramsyscall_printf("=== e01_kseg1_reads_no_fill ===\n")
sendGPUStatus(0)
#define DMA_CTRL
Definition dma.h:36
int i
Definition gte-regio.c:287
#define DPCR
Definition hwregs.h:49
#define GPU_DATA
Definition hwregs.h:52
#define IMASK
Definition hwregs.h:47
#define GPU_STATUS
Definition hwregs.h:53
#define DICR
Definition hwregs.h:50
#define IREG
Definition hwregs.h:46
char * s
Definition string.c:48
Definition gpu.h:66
enum HResolution hResolution
Definition gpu.h:67
Definition probe-common.h:242
int passed
Definition probe-common.h:243
int info
Definition probe-common.h:245
int failed
Definition probe-common.h:244
static int value
Definition syscalls.h:534
void int(code1, code2)
void uint32_t(classId, spec)