Nugget
Loading...
Searching...
No Matches
gte-kernels.hh
Go to the documentation of this file.
1/*
2
3MIT License
4
5Copyright (c) 2023 PCSX-Redux authors
6
7Permission is hereby granted, free of charge, to any person obtaining a copy
8of this software and associated documentation files (the "Software"), to deal
9in the Software without restriction, including without limitation the rights
10to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11copies of the Software, and to permit persons to whom the Software is
12furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice shall be included in all
15copies or substantial portions of the Software.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23SOFTWARE.
24
25*/
26
27#pragma once
28
29#include <stdint.h>
30
31namespace psyqo {
32
33namespace GTE {
34
46namespace Kernels {
47
48// Shift factor: Unsigned (no change) or Shifted (>> 12)
49enum SF : unsigned { Unshifted, Shifted };
50// Low limit: Unlimited (-2^15) or Limited (0)
51enum LM : unsigned { Unlimited, Limited };
52
53// Coordinate and Perspective Transformation
54
55// RTPS - Perspective Transformation (single)
56// pers(([rt]·[v0]) >> 12 + [tr]) -> sxy2
57// 14 cycles
58static inline void rtps() { asm volatile("cop2 0x0180001"); }
59
60// RTPT - Perspective Transformation (triple)
61// pers(([rt]·[v0]) >> 12 + [tr]) -> sxy0
62// pers(([rt]·[v1]) >> 12 + [tr]) -> sxy1
63// pers(([rt]·[v2]) >> 12 + [tr]) -> sxy2
64// 22 cycles
65static inline void rtpt() { asm volatile("cop2 0x0280030"); }
66
67// Depth Queuing
68
69// DCPL - Depth Cue Color light
70// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb, lv, sv
71// 8 cycles
72static inline void dpcl() { asm volatile("cop2 0x0680029"); }
73
74// DPCS - Depth Cueing (single)
75// (1 - dp)·[rgb] + dp·[fc] -> rgb, lv, sv
76// 8 cycles
77static inline void dpcs() { asm volatile("cop2 0x0780010"); }
78
79// DPCT - Depth Cueing (triple)
80// (1 - dp)·[rgb0] + dp·[fc] -> rgb0, lv, sv
81// (1 - dp)·[rgb1] + dp·[fc] -> rgb1, lv, sv
82// (1 - dp)·[rgb2] + dp·[fc] -> rgb2, lv, sv
83// 17 cycles
84static inline void dpct() { asm volatile("cop2 0x0f8002a"); }
85
86// INTPL - Interpolation of a vector and far color
87// (1 - dp)·[sv] + dp·[fc] -> rgb2, lv, sv
88// 8 cycles
89static inline void intpl() { asm volatile("cop2 0x0980011"); }
90
91// Termwise Vector Square
92// [sv.x² >> 12, sv.y² >> 12, sv.z² >> 12] -> lv, sv
93// 5 cycles
94template <SF sf = Shifted>
95static inline void sqr() {
96 if constexpr (sf == Shifted) {
97 asm volatile("cop2 0x0a80428");
98 } else {
99 asm volatile("cop2 0x0a00428");
100 }
101}
102
103// Light Source Calculations
104
105// NCS - Normal color (single)
106// limit(([ll]·[v0]) >> 12) -> sv
107// limit(([lc]·[sv]) >> 12) + [bk] -> rgb2
108// 14 cycles
109static inline void ncs() { asm volatile("cop2 0x0c8041e"); }
110
111// NCT - Normal color (triple)
112// limit(([ll]·[v0]) >> 12) -> sv
113// limit(([lc]·[sv]) >> 12) + [bk] -> rgb0
114// limit(([ll]·[v1]) >> 12) -> sv
115// limit(([lc]·[sv]) >> 12) + [bk] -> rgb1
116// limit(([ll]·[v2]) >> 12) -> sv
117// limit(([lc]·[sv]) >> 12) + [bk] -> rgb2
118// 30 cycles
119static inline void nct() { asm volatile("cop2 0x0d80420"); }
120
121// NCDS - Normal color depth cue (single vector)
122// limit(([ll]·[v0]) >> 12) -> sv
123// limit(([lc]·[sv]) >> 12) + [bk] -> sv
124// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb2
125// 19 cycles
126static inline void ncds() { asm volatile("cop2 0x0e80413"); }
127
128// NCDT - Normal color depth cue (triple vectors)
129// limit(([ll]·[v0]) >> 12) -> sv
130// limit(([lc]·[sv]) >> 12) + [bk] -> sv
131// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb0
132// limit(([ll]·[v1]) >> 12) -> sv
133// limit(([lc]·[sv]) >> 12) + [bk] -> sv
134// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb1
135// limit(([ll]·[v2]) >> 12) -> sv
136// limit(([lc]·[sv]) >> 12) + [bk] -> sv
137// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb2
138// 44 cycles
139static inline void ncdt() { asm volatile("cop2 0x0f80416"); }
140
141// NCCS - Normal Color Color (single vector)
142// limit(([ll]·[v0]) >> 12) -> sv
143// limit(([lc]·[sv]) >> 12) + [bk] -> sv
144// [rgb·sv] -> rgb2
145// 17 cycles
146static inline void nccs() { asm volatile("cop2 0x0108041b"); }
147
148// NCCT - Normal Color Color (triple vector)
149// limit(([ll]·[v0]) >> 12) -> sv
150// limit(([lc]·[sv]) >> 12) + [bk] -> sv
151// [rgb·sv] -> rgb0
152// limit(([ll]·[v1]) >> 12) -> sv
153// limit(([lc]·[sv]) >> 12) + [bk] -> sv
154// [rgb·sv] -> rgb1
155// limit(([ll]·[v2]) >> 12) -> sv
156// limit(([lc]·[sv]) >> 12) + [bk] -> sv
157// [rgb·sv] -> rgb2
158// 39 cycles
159static inline void ncct() { asm volatile("cop2 0x0118043f"); }
160
161// Color Depth Que
162// limit(([lc]·[sv]) >> 12) + [bk] -> sv
163// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb2
164// 13 cycles
165static inline void cdp() { asm volatile("cop2 0x01280414"); }
166
167// Color Color
168// limit(([lc]·[sv]) >> 12) + [bk] -> sv
169// [rgb·sv] -> rgb2
170// 11 cycles
171static inline void cc() { asm volatile("cop2 0x0138041c"); }
172
173// NCLIP - Normal clipping
174// sx0*sy1 + sx1*sy2 + sx2*sy0 - sx0*sy2 - sx1*sy0 - sx2*sy1 -> opz
175// aka determinant of the matrix
176// [sx1 - sx0, sy1 - sy0]
177// [sx2 - sx0, sy2 - sy0]
178// 8 cycles
179static inline void nclip() { asm volatile("cop2 0x01400006"); }
180
181// Z Average
182
183// AVSZ3 - Average of three Z values (for Triangles)
184// zsf3 * (sz0 + sz1 + sz2) -> otz
185// 5 cycles
186static inline void avsz3() { asm volatile("cop2 0x0158002d"); }
187
188// AVSZ4 - Average of four Z values (for Quads)
189// zsf4 * (sz0 + sz1 + sz2 + sz4) -> otz
190// 6 cycles
191static inline void avsz4() { asm volatile("cop2 0x0168002e"); }
192
193// Cross Product (improperly named Outer Product in Sony's lingo)
194// rt.22 * ir3 - rt.33 * ir2 -> ir1
195// rt.33 * ir1 - rt.11 * ir3 -> ir2
196// rt.11 * ir2 - rt.22 * ir1 -> ir3
197// 6 cycles
198template <SF sf = Shifted>
199static inline void cp() {
200 if constexpr (sf == Shifted) {
201 asm volatile("cop2 0x0178000c");
202 } else {
203 asm volatile("cop2 0x0170000c");
204 }
205}
206
207// General Interpolation
208
209// General purpose interpolation
210// dp·[sv] -> lv, sv
211// 5 cycles
212template <SF sf = Shifted>
213static inline void gpf() {
214 if constexpr (sf == Shifted) {
215 asm volatile("cop2 0x0198003d");
216 } else {
217 asm volatile("cop2 0x0190003d");
218 }
219}
220
221// General purpose interpolation with base
222// [lv] + dp·[sv] -> lv, sv
223// 5 cycles
224template <SF sf = Shifted>
225static inline void gpl() {
226 if constexpr (sf == Shifted) {
227 asm volatile("cop2 0x01a8003e");
228 } else {
229 asm volatile("cop2 0x01a0003e");
230 }
231}
232
233// All of the MVMVA operations take 8 cycles to complete.
234// The MVMVA operation is the basis for the matrix math operations.
235// The functions defined right underneath are simply aliases. They
236// are provided for convenience, as programmers may know them from
237// the original PS1 SDK documentation, but using the MVMVA operation
238// directly may actually be more readable.
239
240// Multiplication Matrix: Rotation, Light Source Direction, Light Source Color
241enum class MX : unsigned { RT, LL, LC };
242// Multiplication Vector
243enum class MV : unsigned { V0, V1, V2, IR };
244// Translation Vector: Translation, Back Color, Front Color, Zero
245enum class TV : unsigned { TR, BK, FC, Zero };
246
247// Multiply vector by matrix and add vector
248template <MX mx, MV v, TV cv = TV::Zero, SF sf = Shifted, LM lm = Unlimited>
249void mvmva() {
250 constexpr uint32_t op =
251 (4 << 20) | (sf << 19) | (uint32_t(mx) << 17) | (uint32_t(v) << 15) | (uint32_t(cv) << 13) | (lm << 10) | 18;
252 asm volatile("cop2 %0" : : "i"(op));
253}
254
255// Coordinate Conversion, Light Source Calculations
256// ([rt]·[v0]) >> 12 + [tr] -> lv, sv
257static inline void rt() { mvmva<MX::RT, MV::V0, TV::TR>(); }
258// limit(([ll]·[v0]) >> 12) -> lv, sv
259static inline void ll() { mvmva<MX::LL, MV::V0, TV::Zero, SF::Shifted, LM::Limited>(); }
260// limit(([lc]·[sv]) >> 12) + [bk] -> lv, sv
261static inline void lc() { mvmva<MX::LC, MV::IR, TV::BK, SF::Shifted, LM::Limited>(); }
262// [rt]·[sv] -> lv
263static inline void rtir_sf0() { mvmva<MX::RT, MV::IR, TV::Zero, SF::Unshifted>(); }
264
265// General Matrix Operations
266// ([rt]·[v0]) >> 12 -> lv, sv
267static inline void rtv0() { mvmva<MX::RT, MV::V0, TV::Zero>(); }
268// ([rt]·[v1]) >> 12 -> lv, sv
269static inline void rtv1() { mvmva<MX::RT, MV::V1, TV::Zero>(); }
270// ([rt]·[v2]) >> 12 -> lv, sv
271static inline void rtv2() { mvmva<MX::RT, MV::V2, TV::Zero>(); }
272// ([rt]·[sv]) >> 12 -> lv, sv
273static inline void rtir() { mvmva<MX::RT, MV::IR, TV::Zero>(); }
274// ([rt]·[v0]) >> 12 + [tr] -> lv, sv
275static inline void rtv0tr() { mvmva<MX::RT, MV::V0, TV::TR>(); }
276// ([rt]·[v1]) >> 12 + [tr] -> lv, sv
277static inline void rtv1tr() { mvmva<MX::RT, MV::V1, TV::TR>(); }
278// ([rt]·[v2]) >> 12 + [tr] -> lv, sv
279static inline void rtv2tr() { mvmva<MX::RT, MV::V2, TV::TR>(); }
280// ([rt]·[sv]) >> 12 + [tr] -> lv, sv
281static inline void rtirtr() { mvmva<MX::RT, MV::IR, TV::TR>(); }
282// ([rt]·[v0]) >> 12 + [bk] -> lv, sv
283static inline void rtv0bk() { mvmva<MX::RT, MV::V0, TV::BK>(); }
284// ([rt]·[v1]) >> 12 + [bk] -> lv, sv
285static inline void rtv1bk() { mvmva<MX::RT, MV::V1, TV::BK>(); }
286// ([rt]·[v2]) >> 12 + [bk] -> lv, sv
287static inline void rtv2bk() { mvmva<MX::RT, MV::V2, TV::BK>(); }
288// ([rt]·[sv]) >> 12 + [bk] -> lv, sv
289static inline void rtirbk() { mvmva<MX::RT, MV::IR, TV::BK>(); }
290// ([rt]·[v0]) >> 12 + [fc] -> lv, sv
291static inline void rtv0fc() { mvmva<MX::RT, MV::V0, TV::FC>(); }
292// ([rt]·[v1]) >> 12 + [fc] -> lv, sv
293static inline void rtv1fc() { mvmva<MX::RT, MV::V1, TV::FC>(); }
294// ([rt]·[v2]) >> 12 + [fc] -> lv, sv
295static inline void rtv2fc() { mvmva<MX::RT, MV::V2, TV::FC>(); }
296// ([rt]·[sv]) >> 12 + [fc] -> lv, sv
297static inline void rtirfc() { mvmva<MX::RT, MV::IR, TV::FC>(); }
298// ([ll]·[v0]) >> 12 -> lv, sv
299static inline void llv0() { mvmva<MX::LL, MV::V0, TV::Zero>(); }
300// ([ll]·[v1]) >> 12 -> lv, sv
301static inline void llv1() { mvmva<MX::LL, MV::V1, TV::Zero>(); }
302// ([ll]·[v2]) >> 12 -> lv, sv
303static inline void llv2() { mvmva<MX::LL, MV::V2, TV::Zero>(); }
304// ([ll]·[sv]) >> 12 -> lv, sv
305static inline void llir() { mvmva<MX::LL, MV::IR, TV::Zero>(); }
306// ([ll]·[v0]) >> 12 + [tr] -> lv, sv
307static inline void llv0tr() { mvmva<MX::LL, MV::V0, TV::TR>(); }
308// ([ll]·[v1]) >> 12 + [tr] -> lv, sv
309static inline void llv1tr() { mvmva<MX::LL, MV::V1, TV::TR>(); }
310// ([ll]·[v2]) >> 12 + [tr] -> lv, sv
311static inline void llv2tr() { mvmva<MX::LL, MV::V2, TV::TR>(); }
312// ([ll]·[sv]) >> 12 + [tr] -> lv, sv
313static inline void llirtr() { mvmva<MX::LL, MV::IR, TV::TR>(); }
314// ([ll]·[v0]) >> 12 + [bk] -> lv, sv
315static inline void llv0bk() { mvmva<MX::LL, MV::V0, TV::BK>(); }
316// ([ll]·[v1]) >> 12 + [bk] -> lv, sv
317static inline void llv1bk() { mvmva<MX::LL, MV::V1, TV::BK>(); }
318// ([ll]·[v2]) >> 12 + [bk] -> lv, sv
319static inline void llv2bk() { mvmva<MX::LL, MV::V2, TV::BK>(); }
320// ([ll]·[sv]) >> 12 + [bk] -> lv, sv
321static inline void llirbk() { mvmva<MX::LL, MV::IR, TV::BK>(); }
322// ([ll]·[v0]) >> 12 + [fc] -> lv, sv
323static inline void llv0fc() { mvmva<MX::LL, MV::V0, TV::FC>(); }
324// ([ll]·[v1]) >> 12 + [fc] -> lv, sv
325static inline void llv1fc() { mvmva<MX::LL, MV::V1, TV::FC>(); }
326// ([ll]·[v2]) >> 12 + [fc] -> lv, sv
327static inline void llv2fc() { mvmva<MX::LL, MV::V2, TV::FC>(); }
328// ([ll]·[sv]) >> 12 + [fc] -> lv, sv
329static inline void llirfc() { mvmva<MX::LL, MV::IR, TV::FC>(); }
330// ([lc]·[v0]) >> 12 -> lv, sv
331static inline void lcv0() { mvmva<MX::LC, MV::V0, TV::Zero>(); }
332// ([lc]·[v1]) >> 12 -> lv, sv
333static inline void lcv1() { mvmva<MX::LC, MV::V1, TV::Zero>(); }
334// ([lc]·[v2]) >> 12 -> lv, sv
335static inline void lcv2() { mvmva<MX::LC, MV::V2, TV::Zero>(); }
336// ([lc]·[sv]) >> 12 -> lv, sv
337static inline void lcir() { mvmva<MX::LC, MV::IR, TV::Zero>(); }
338// ([lc]·[v0]) >> 12 + [tr] -> lv, sv
339static inline void lcv0tr() { mvmva<MX::LC, MV::V0, TV::TR>(); }
340// ([lc]·[v1]) >> 12 + [tr] -> lv, sv
341static inline void lcv1tr() { mvmva<MX::LC, MV::V1, TV::TR>(); }
342// ([lc]·[v2]) >> 12 + [tr] -> lv, sv
343static inline void lcv2tr() { mvmva<MX::LC, MV::V2, TV::TR>(); }
344// ([lc]·[sv]) >> 12 + [tr] -> lv, sv
345static inline void lcirtr() { mvmva<MX::LC, MV::IR, TV::TR>(); }
346// ([lc]·[v0]) >> 12 + [bk] -> lv, sv
347static inline void lcv0bk() { mvmva<MX::LC, MV::V0, TV::BK>(); }
348// ([lc]·[v1]) >> 12 + [bk] -> lv, sv
349static inline void lcv1bk() { mvmva<MX::LC, MV::V1, TV::BK>(); }
350// ([lc]·[v2]) >> 12 + [bk] -> lv, sv
351static inline void lcv2bk() { mvmva<MX::LC, MV::V2, TV::BK>(); }
352// ([lc]·[sv]) >> 12 + [bk] -> lv, sv
353static inline void lcirbk() { mvmva<MX::LC, MV::IR, TV::BK>(); }
354// ([lc]·[v0]) >> 12 + [fc] -> lv, sv
355static inline void lcv0fc() { mvmva<MX::LC, MV::V0, TV::FC>(); }
356// ([lc]·[v1]) >> 12 + [fc] -> lv, sv
357static inline void lcv1fc() { mvmva<MX::LC, MV::V1, TV::FC>(); }
358// ([lc]·[v2]) >> 12 + [fc] -> lv, sv
359static inline void lcv2fc() { mvmva<MX::LC, MV::V2, TV::FC>(); }
360// ([lc]·[sv]) >> 12 + [fc] -> lv, sv
361static inline void lcirfc() { mvmva<MX::LC, MV::IR, TV::FC>(); }
362
363} // namespace Kernels
364
365} // namespace GTE
366
367} // namespace psyqo
LM
Definition gte-kernels.hh:51
@ Limited
Definition gte-kernels.hh:51
@ Unlimited
Definition gte-kernels.hh:51
SF
Definition gte-kernels.hh:49
@ Shifted
Definition gte-kernels.hh:49
@ Unshifted
Definition gte-kernels.hh:49
TV
Definition gte-kernels.hh:245
void mvmva()
Definition gte-kernels.hh:249
MV
Definition gte-kernels.hh:243
MX
Definition gte-kernels.hh:241
Definition cdrom-loader.hh:39
void uint32_t(classId, spec)