More perf tweaks to sprite rendering

This commit is contained in:
David Guillen Fandos 2023-08-04 01:30:53 +02:00
parent 4822923b90
commit 3858694aad

108
video.cc
View file

@ -45,7 +45,7 @@ typedef struct {
typedef void (* bitmap_render_function)( typedef void (* bitmap_render_function)(
u32 start, u32 end, void *dest_ptr, const u16 *pal); u32 start, u32 end, void *dest_ptr, const u16 *pal);
typedef void (* tile_render_function)( typedef void (* tile_render_function)(
u32 layer, u32 start, u32 end, void *dest_ptr); u32 layer, u32 start, u32 end, void *dest_ptr, const u16 *pal);
typedef void (*conditional_render_function)( typedef void (*conditional_render_function)(
u32 start, u32 end, u16 *scanline, u32 enable_flags); u32 start, u32 end, u16 *scanline, u32 enable_flags);
@ -144,12 +144,11 @@ void video_reload_counters()
template<typename dsttype, rendtype rdtype, bool is8bpp, bool isbase, bool hflip> template<typename dsttype, rendtype rdtype, bool is8bpp, bool isbase, bool hflip>
static inline void render_part_tile_Nbpp(u32 bg_comb, u32 px_comb, static inline void render_part_tile_Nbpp(u32 bg_comb, u32 px_comb,
dsttype *dest_ptr, u32 start, u32 end, u16 tile, dsttype *dest_ptr, u32 start, u32 end, u16 tile,
const u8 *tile_base, int vertical_pixel_flip const u8 *tile_base, int vertical_pixel_flip, const u16 *paltbl
) { ) {
// Seek to the specified tile, using the tile number and size. // Seek to the specified tile, using the tile number and size.
// tile_base already points to the right tile-line vertical offset // tile_base already points to the right tile-line vertical offset
const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)]; const u8 *tile_ptr = &tile_base[(tile & 0x3FF) * (is8bpp ? 64 : 32)];
const u16 *paltbl = &palette_ram_converted[0];
u16 bgcolor = paltbl[0]; u16 bgcolor = paltbl[0];
// On vertical flip, apply the mirror offset // On vertical flip, apply the mirror offset
@ -240,12 +239,13 @@ static inline void render_tile_Nbpp(
} }
} else { } else {
u16 tilepal = (tile >> 12) << 4; u16 tilepal = (tile >> 12) << 4;
const u16 *subpal = &paltbl[tilepal];
u32 tilepix = eswap32(*(u32*)tile_ptr); u32 tilepix = eswap32(*(u32*)tile_ptr);
for (u32 i = 0; i < 8; i++, dest_ptr++) { for (u32 i = 0; i < 8; i++, dest_ptr++) {
u8 pval = (hflip ? (tilepix >> ((7-i)*4)) : (tilepix >> (i*4))) & 0xF; u8 pval = (hflip ? (tilepix >> ((7-i)*4)) : (tilepix >> (i*4))) & 0xF;
if (pval) { if (pval) {
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = paltbl[tilepal | pval]; *dest_ptr = subpal[pval];
else if (rdtype == INDXCOLOR) else if (rdtype == INDXCOLOR)
*dest_ptr = px_comb | tilepal | pval; *dest_ptr = px_comb | tilepal | pval;
else if (rdtype == STCKCOLOR) else if (rdtype == STCKCOLOR)
@ -264,7 +264,7 @@ static inline void render_tile_Nbpp(
template<typename stype, rendtype rdtype, bool isbase, bool is8bpp> template<typename stype, rendtype rdtype, bool isbase, bool is8bpp>
static void render_scanline_text(u32 layer, static void render_scanline_text(u32 layer,
u32 start, u32 end, void *scanline) u32 start, u32 end, void *scanline, const u16 * paltbl)
{ {
u32 bg_control = read_ioreg(REG_BGxCNT(layer)); u32 bg_control = read_ioreg(REG_BGxCNT(layer));
u16 vcount = read_ioreg(REG_VCOUNT); u16 vcount = read_ioreg(REG_VCOUNT);
@ -273,7 +273,6 @@ static void render_scanline_text(u32 layer,
u32 hoffset = (start + read_ioreg(REG_BGxHOFS(layer))) % 512; u32 hoffset = (start + read_ioreg(REG_BGxHOFS(layer))) % 512;
u32 voffset = (vcount + read_ioreg(REG_BGxVOFS(layer))) % 512; u32 voffset = (vcount + read_ioreg(REG_BGxVOFS(layer))) % 512;
stype *dest_ptr = ((stype*)scanline) + start; stype *dest_ptr = ((stype*)scanline) + start;
const u16 * paltbl = &palette_ram_converted[0];
u32 i; u32 i;
// Calculate combine masks. These store 2 bits of info: 1st and 2nd target. // Calculate combine masks. These store 2 bits of info: 1st and 2nd target.
@ -349,10 +348,10 @@ static void render_scanline_text(u32 layer,
u16 tile = eswap16(*map_ptr++); u16 tile = eswap16(*map_ptr++);
if (tile & 0x400) // Tile horizontal flip if (tile & 0x400) // Tile horizontal flip
render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, true>( render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, true>(
bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, vflip_off); bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, vflip_off, paltbl);
else else
render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, false>( render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, false>(
bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, vflip_off); bg_comb, px_comb, dest_ptr, tile_hoff, stop, tile, tile_base, vflip_off, paltbl);
dest_ptr += todraw; dest_ptr += todraw;
end -= todraw; end -= todraw;
@ -407,10 +406,10 @@ static void render_scanline_text(u32 layer,
u16 tile = eswap16(*map_ptr++); u16 tile = eswap16(*map_ptr++);
if (tile & 0x400) // Tile horizontal flip if (tile & 0x400) // Tile horizontal flip
render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, true>( render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, true>(
bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off); bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off, paltbl);
else else
render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, false>( render_part_tile_Nbpp<stype, rdtype, is8bpp, isbase, false>(
bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off); bg_comb, px_comb, dest_ptr, 0, end, tile, tile_base, vflip_off, paltbl);
} }
} }
} }
@ -418,7 +417,7 @@ static void render_scanline_text(u32 layer,
template<typename dsttype, rendtype rdtype, bool isbase> template<typename dsttype, rendtype rdtype, bool isbase>
static inline void render_pixel_8bpp(u32 layer, static inline void render_pixel_8bpp(u32 layer,
dsttype *dest_ptr, u32 px, u32 py, u32 bg_comb, u32 px_comb, dsttype *dest_ptr, u32 px, u32 py, u32 bg_comb, u32 px_comb,
const u8 *tile_base, const u8 *map_base, u32 map_size const u8 *tile_base, const u8 *map_base, u32 map_size, const u16 *pal
) { ) {
// Pitch represents the log2(number of tiles per row) (from 16 to 128) // Pitch represents the log2(number of tiles per row) (from 16 to 128)
u32 map_pitch = map_size + 4; u32 map_pitch = map_size + 4;
@ -432,7 +431,7 @@ static inline void render_pixel_8bpp(u32 layer,
// Alhpa mode stacks previous value (unless rendering the first layer) // Alhpa mode stacks previous value (unless rendering the first layer)
if (pval) { if (pval) {
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[pval]; *dest_ptr = pal[pval];
else if (rdtype == INDXCOLOR) else if (rdtype == INDXCOLOR)
*dest_ptr = pval | px_comb; // Add combine flags *dest_ptr = pval | px_comb; // Add combine flags
else if (rdtype == STCKCOLOR) else if (rdtype == STCKCOLOR)
@ -442,17 +441,17 @@ static inline void render_pixel_8bpp(u32 layer,
else if (isbase) { else if (isbase) {
// Transparent pixel, but we are base layer, so render background. // Transparent pixel, but we are base layer, so render background.
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[0]; *dest_ptr = pal[0];
else else
*dest_ptr = 0 | bg_comb; // Just backdrop color and combine flags *dest_ptr = 0 | bg_comb; // Just backdrop color and combine flags
} }
} }
template<typename dsttype, rendtype rdtype> template<typename dsttype, rendtype rdtype>
static inline void render_bdrop_pixel_8bpp(dsttype *dest_ptr, u32 bg_comb) { static inline void render_bdrop_pixel_8bpp(dsttype *dest_ptr, u32 bg_comb, u16 bgcol) {
// Alhpa mode stacks previous value (unless rendering the first layer) // Alhpa mode stacks previous value (unless rendering the first layer)
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[0]; *dest_ptr = bgcol;
else else
*dest_ptr = 0 | bg_comb; *dest_ptr = 0 | bg_comb;
} }
@ -463,7 +462,8 @@ static inline void render_bdrop_pixel_8bpp(dsttype *dest_ptr, u32 bg_comb) {
template <typename dsttype, rendtype rdtype, bool isbase, bool wrap, bool rotate> template <typename dsttype, rendtype rdtype, bool isbase, bool wrap, bool rotate>
static inline void render_affine_background( static inline void render_affine_background(
u32 layer, u32 start, u32 cnt, const u8 *map_base, u32 layer, u32 start, u32 cnt, const u8 *map_base,
u32 map_size, const u8 *tile_base, dsttype *dst_ptr) { u32 map_size, const u8 *tile_base, dsttype *dst_ptr,
const u16* pal) {
// Backdrop and current layer combine bits. // Backdrop and current layer combine bits.
u32 bg_comb = color_flags(5); u32 bg_comb = color_flags(5);
@ -488,7 +488,7 @@ static inline void render_affine_background(
// Lookup pixel and draw it. // Lookup pixel and draw it.
render_pixel_8bpp<dsttype, rdtype, isbase>( render_pixel_8bpp<dsttype, rdtype, isbase>(
layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb, layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb,
tile_base, map_base, map_size); tile_base, map_base, map_size, pal);
source_x += dx; source_y += dy; // Move to the next pixel source_x += dx; source_y += dy; // Move to the next pixel
} }
@ -499,11 +499,12 @@ static inline void render_affine_background(
u32 pixel_x = (u32)(source_x >> 8) & (width_height-1); u32 pixel_x = (u32)(source_x >> 8) & (width_height-1);
render_pixel_8bpp<dsttype, rdtype, isbase>( render_pixel_8bpp<dsttype, rdtype, isbase>(
layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb, layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb,
tile_base, map_base, map_size); tile_base, map_base, map_size, pal);
source_x += dx; // Only moving in the X direction. source_x += dx; // Only moving in the X direction.
} }
} }
} else { } else {
u16 bgcol = pal[0];
if (rotate) { if (rotate) {
// Draw backdrop pixels if necessary until we reach the background edge. // Draw backdrop pixels if necessary until we reach the background edge.
while (cnt) { while (cnt) {
@ -516,7 +517,7 @@ static inline void render_affine_background(
// Draw a backdrop pixel if we are the base layer. // Draw a backdrop pixel if we are the base layer.
if (isbase) if (isbase)
render_bdrop_pixel_8bpp<dsttype, rdtype>(dst_ptr, bg_comb); render_bdrop_pixel_8bpp<dsttype, rdtype>(dst_ptr, bg_comb, bgcol);
dst_ptr++; dst_ptr++;
source_x += dx; source_y += dy; source_x += dx; source_y += dy;
@ -534,7 +535,7 @@ static inline void render_affine_background(
// Lookup pixel and draw it. // Lookup pixel and draw it.
render_pixel_8bpp<dsttype, rdtype, isbase>( render_pixel_8bpp<dsttype, rdtype, isbase>(
layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb, layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb,
tile_base, map_base, map_size); tile_base, map_base, map_size, pal);
// Move to the next pixel, update coords accordingly // Move to the next pixel, update coords accordingly
cnt--; cnt--;
@ -553,7 +554,7 @@ static inline void render_affine_background(
break; break;
if (isbase) if (isbase)
render_bdrop_pixel_8bpp<dsttype, rdtype>(dst_ptr, bg_comb); render_bdrop_pixel_8bpp<dsttype, rdtype>(dst_ptr, bg_comb, bgcol);
dst_ptr++; dst_ptr++;
source_x += dx; source_x += dx;
@ -567,7 +568,7 @@ static inline void render_affine_background(
render_pixel_8bpp<dsttype, rdtype, isbase>( render_pixel_8bpp<dsttype, rdtype, isbase>(
layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb, layer, dst_ptr++, pixel_x, pixel_y, bg_comb, px_comb,
tile_base, map_base, map_size); tile_base, map_base, map_size, pal);
source_x += dx; source_x += dx;
cnt--; cnt--;
@ -579,7 +580,7 @@ static inline void render_affine_background(
// Only necessary for the base layer, otherwise we can safely finish. // Only necessary for the base layer, otherwise we can safely finish.
if (isbase) if (isbase)
while (cnt--) while (cnt--)
render_bdrop_pixel_8bpp<dsttype, rdtype>(dst_ptr++, bg_comb); render_bdrop_pixel_8bpp<dsttype, rdtype>(dst_ptr++, bg_comb, bgcol);
} }
} }
@ -590,7 +591,7 @@ static inline void render_affine_background(
template<typename dsttype, rendtype rdtype, bool isbase> template<typename dsttype, rendtype rdtype, bool isbase>
static void render_scanline_affine(u32 layer, static void render_scanline_affine(u32 layer,
u32 start, u32 end, void *scanline) u32 start, u32 end, void *scanline, const u16 *pal)
{ {
u32 bg_control = read_ioreg(REG_BGxCNT(layer)); u32 bg_control = read_ioreg(REG_BGxCNT(layer));
u32 map_size = (bg_control >> 14) & 0x03; u32 map_size = (bg_control >> 14) & 0x03;
@ -612,17 +613,17 @@ static void render_scanline_affine(u32 layer,
if (has_wrap) { if (has_wrap) {
if (has_rotation) if (has_rotation)
render_affine_background<dsttype, rdtype, isbase, true, true>( render_affine_background<dsttype, rdtype, isbase, true, true>(
layer, start, end - start, map_base, map_size, tile_base, dest_ptr); layer, start, end - start, map_base, map_size, tile_base, dest_ptr, pal);
else else
render_affine_background<dsttype, rdtype, isbase, true, false>( render_affine_background<dsttype, rdtype, isbase, true, false>(
layer, start, end - start, map_base, map_size, tile_base, dest_ptr); layer, start, end - start, map_base, map_size, tile_base, dest_ptr, pal);
} else { } else {
if (has_rotation) if (has_rotation)
render_affine_background<dsttype, rdtype, isbase, false, true>( render_affine_background<dsttype, rdtype, isbase, false, true>(
layer, start, end - start, map_base, map_size, tile_base, dest_ptr); layer, start, end - start, map_base, map_size, tile_base, dest_ptr, pal);
else else
render_affine_background<dsttype, rdtype, isbase, false, false>( render_affine_background<dsttype, rdtype, isbase, false, false>(
layer, start, end - start, map_base, map_size, tile_base, dest_ptr); layer, start, end - start, map_base, map_size, tile_base, dest_ptr, pal);
} }
} }
@ -781,7 +782,7 @@ typedef struct {
// tile_offset points to the VRAM offset where the data lives. // tile_offset points to the VRAM offset where the data lives.
template<typename dsttype, rendtype rdtype, bool is8bpp, bool hflip> template<typename dsttype, rendtype rdtype, bool is8bpp, bool hflip>
static inline void render_obj_part_tile_Nbpp(u32 px_comb, static inline void render_obj_part_tile_Nbpp(u32 px_comb,
dsttype *dest_ptr, u32 start, u32 end, u32 tile_offset, u16 palette dsttype *dest_ptr, u32 start, u32 end, u32 tile_offset, u16 palette, const u16 *pal
) { ) {
// Note that the last VRAM bank wrap around, hence the offset aliasing // Note that the last VRAM bank wrap around, hence the offset aliasing
const u8* tile_ptr = &vram[0x10000 + (tile_offset & 0x7FFF)]; const u8* tile_ptr = &vram[0x10000 + (tile_offset & 0x7FFF)];
@ -795,7 +796,7 @@ static inline void render_obj_part_tile_Nbpp(u32 px_comb,
// Alhpa mode stacks previous value // Alhpa mode stacks previous value
if (pval) { if (pval) {
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[pval | 0x100]; *dest_ptr = pal[pval];
else if (rdtype == INDXCOLOR) else if (rdtype == INDXCOLOR)
*dest_ptr = pval | px_comb | 0x100; // Add combine flags *dest_ptr = pval | px_comb | 0x100; // Add combine flags
else if (rdtype == STCKCOLOR) { else if (rdtype == STCKCOLOR) {
@ -816,10 +817,11 @@ static inline void render_obj_part_tile_Nbpp(u32 px_comb,
u32 selb = hflip ? (3-i/2) : i/2; u32 selb = hflip ? (3-i/2) : i/2;
u32 seln = hflip ? ((i & 1) ^ 1) : (i & 1); u32 seln = hflip ? ((i & 1) ^ 1) : (i & 1);
u8 pval = (tile_ptr[selb] >> (seln * 4)) & 0xF; u8 pval = (tile_ptr[selb] >> (seln * 4)) & 0xF;
const u16 *subpal = &pal[palette];
if (pval) { if (pval) {
u8 colidx = pval | palette; u8 colidx = pval | palette;
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[colidx | 0x100]; *dest_ptr = subpal[pval];
else if (rdtype == INDXCOLOR) else if (rdtype == INDXCOLOR)
*dest_ptr = colidx | px_comb | 0x100; *dest_ptr = colidx | px_comb | 0x100;
else if (rdtype == STCKCOLOR) { else if (rdtype == STCKCOLOR) {
@ -838,7 +840,7 @@ static inline void render_obj_part_tile_Nbpp(u32 px_comb,
// Same as above but optimized for full tiles // Same as above but optimized for full tiles
template<typename dsttype, rendtype rdtype, bool is8bpp, bool hflip> template<typename dsttype, rendtype rdtype, bool is8bpp, bool hflip>
static inline void render_obj_tile_Nbpp(u32 px_comb, static inline void render_obj_tile_Nbpp(u32 px_comb,
dsttype *dest_ptr, u32 tile_offset, u16 palette dsttype *dest_ptr, u32 tile_offset, u16 palette, const u16 *pal
) { ) {
const u8* tile_ptr = &vram[0x10000 + (tile_offset & 0x7FFF)]; const u8* tile_ptr = &vram[0x10000 + (tile_offset & 0x7FFF)];
@ -849,7 +851,7 @@ static inline void render_obj_tile_Nbpp(u32 px_comb,
u8 pval = hflip ? (tilepix >> (24 - i*8)) : (tilepix >> (i*8)); u8 pval = hflip ? (tilepix >> (24 - i*8)) : (tilepix >> (i*8));
if (pval) { if (pval) {
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[pval | 0x100]; *dest_ptr = pal[pval];
else if (rdtype == INDXCOLOR) else if (rdtype == INDXCOLOR)
*dest_ptr = pval | px_comb | 0x100; // Add combine flags *dest_ptr = pval | px_comb | 0x100; // Add combine flags
else if (rdtype == STCKCOLOR) { else if (rdtype == STCKCOLOR) {
@ -867,10 +869,11 @@ static inline void render_obj_tile_Nbpp(u32 px_comb,
u32 tilepix = eswap32(*(u32*)tile_ptr); u32 tilepix = eswap32(*(u32*)tile_ptr);
for (u32 i = 0; i < 8; i++, dest_ptr++) { for (u32 i = 0; i < 8; i++, dest_ptr++) {
u8 pval = (hflip ? (tilepix >> ((7-i)*4)) : (tilepix >> (i*4))) & 0xF; u8 pval = (hflip ? (tilepix >> ((7-i)*4)) : (tilepix >> (i*4))) & 0xF;
const u16 *subpal = &pal[palette];
if (pval) { if (pval) {
u8 colidx = pval | palette; u8 colidx = pval | palette;
if (rdtype == FULLCOLOR) if (rdtype == FULLCOLOR)
*dest_ptr = palette_ram_converted[colidx | 0x100]; *dest_ptr = subpal[pval];
else if (rdtype == INDXCOLOR) else if (rdtype == INDXCOLOR)
*dest_ptr = colidx | px_comb | 0x100; *dest_ptr = colidx | px_comb | 0x100;
else if (rdtype == STCKCOLOR) { else if (rdtype == STCKCOLOR) {
@ -891,7 +894,8 @@ static inline void render_obj_tile_Nbpp(u32 px_comb,
// cnt is the maximum number of pixels to draw, honoring window, obj width, etc. // cnt is the maximum number of pixels to draw, honoring window, obj width, etc.
template <typename stype, rendtype rdtype, bool is8bpp, bool hflip> template <typename stype, rendtype rdtype, bool is8bpp, bool hflip>
static void render_object( static void render_object(
s32 delta_x, u32 cnt, stype *dst_ptr, u32 tile_offset, u32 px_comb, u16 palette s32 delta_x, u32 cnt, stype *dst_ptr, u32 tile_offset, u32 px_comb, u16 palette,
const u16* palptr
) { ) {
// Tile size in bytes for each mode // Tile size in bytes for each mode
const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp; const u32 tile_bsize = is8bpp ? tile_size_8bpp : tile_size_4bpp;
@ -911,7 +915,7 @@ static void render_object(
u32 residual = 8 - tile_off; // Pixel count to complete the first tile u32 residual = 8 - tile_off; // Pixel count to complete the first tile
u32 maxpix = MIN(residual, cnt); u32 maxpix = MIN(residual, cnt);
render_obj_part_tile_Nbpp<stype, rdtype, is8bpp, hflip>( render_obj_part_tile_Nbpp<stype, rdtype, is8bpp, hflip>(
px_comb, dst_ptr, tile_off, tile_off + maxpix, tile_offset, palette); px_comb, dst_ptr, tile_off, tile_off + maxpix, tile_offset, palette, palptr);
// Move to the next tile // Move to the next tile
tile_offset += tile_size_off; tile_offset += tile_size_off;
@ -928,7 +932,7 @@ static void render_object(
s32 num_tiles = cnt / 8; s32 num_tiles = cnt / 8;
while (num_tiles--) { while (num_tiles--) {
// Render full tiles // Render full tiles
render_obj_tile_Nbpp<stype, rdtype, is8bpp, hflip>(px_comb, dst_ptr, tile_offset, palette); render_obj_tile_Nbpp<stype, rdtype, is8bpp, hflip>(px_comb, dst_ptr, tile_offset, palette, palptr);
tile_offset += tile_size_off; tile_offset += tile_size_off;
dst_ptr += 8; dst_ptr += 8;
} }
@ -937,7 +941,7 @@ static void render_object(
cnt = cnt % 8; cnt = cnt % 8;
if (cnt) if (cnt)
render_obj_part_tile_Nbpp<stype, rdtype, is8bpp, hflip>( render_obj_part_tile_Nbpp<stype, rdtype, is8bpp, hflip>(
px_comb, dst_ptr, 0, cnt, tile_offset, palette); px_comb, dst_ptr, 0, cnt, tile_offset, palette, palptr);
} }
@ -1062,7 +1066,8 @@ static void render_affine_object(
// Renders a single sprite on the current scanline // Renders a single sprite on the current scanline
template <typename stype, rendtype rdtype, bool is8bpp> template <typename stype, rendtype rdtype, bool is8bpp>
inline static void render_sprite( inline static void render_sprite(
const t_sprite *obji, bool is_affine, u32 start, u32 end, stype *scanline, u32 pxcomb const t_sprite *obji, bool is_affine, u32 start, u32 end, stype *scanline,
u32 pxcomb, const u16* palptr
) { ) {
s32 vcount = read_ioreg(REG_VCOUNT); s32 vcount = read_ioreg(REG_VCOUNT);
bool obj1dmap = read_ioreg(REG_DISPCNT) & 0x40; bool obj1dmap = read_ioreg(REG_DISPCNT) & 0x40;
@ -1118,17 +1123,17 @@ inline static void render_sprite(
if (hflip) if (hflip)
render_object<stype, rdtype, is8bpp, true>( render_object<stype, rdtype, is8bpp, true>(
obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal); obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal, palptr);
else else
render_object<stype, rdtype, is8bpp, false>( render_object<stype, rdtype, is8bpp, false>(
obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal); obj_x_offset, max_draw, &scanline[start], tile_offset, pxcomb, pal, palptr);
} }
} }
// Renders objects on a scanline for a given priority. // Renders objects on a scanline for a given priority.
template <typename stype, rendtype rdtype> template <typename stype, rendtype rdtype>
static void render_scanline_objs( static void render_scanline_objs(
u32 priority, u32 start, u32 end, void *raw_ptr u32 priority, u32 start, u32 end, void *raw_ptr, const u16* palptr
) { ) {
stype *scanline = (stype*)raw_ptr; stype *scanline = (stype*)raw_ptr;
s32 vcount = read_ioreg(REG_VCOUNT); s32 vcount = read_ioreg(REG_VCOUNT);
@ -1196,10 +1201,10 @@ static void render_scanline_objs(
if (is_8bpp) if (is_8bpp)
render_sprite<stype, rdtype, true>( render_sprite<stype, rdtype, true>(
&obji, is_affine, start, end, scanline, pxcomb); &obji, is_affine, start, end, scanline, pxcomb, palptr);
else else
render_sprite<stype, rdtype, false>( render_sprite<stype, rdtype, false>(
&obji, is_affine, start, end, scanline, pxcomb); &obji, is_affine, start, end, scanline, pxcomb, palptr);
} }
} }
@ -1549,9 +1554,11 @@ void render_layers(u32 start, u32 end, dsttype *dst_ptr, u32 enabled_layers) {
// Optimization: skip blending mode if no blending can happen to this layer // Optimization: skip blending mode if no blending can happen to this layer
if (objmode == STCKCOLOR && can_skip_blend) if (objmode == STCKCOLOR && can_skip_blend)
render_scanline_objs<dsttype, INDXCOLOR>(layer & 0x3, start, end, dst_ptr); render_scanline_objs<dsttype, INDXCOLOR>(
layer & 0x3, start, end, dst_ptr, &palette_ram_converted[0x100]);
else else
render_scanline_objs<dsttype, objmode>(layer & 0x3, start, end, dst_ptr); render_scanline_objs<dsttype, objmode>(
layer & 0x3, start, end, dst_ptr, &palette_ram_converted[0x100]);
base_done = 1; base_done = 1;
} }
@ -1576,7 +1583,7 @@ void render_layers(u32 start, u32 end, dsttype *dst_ptr, u32 enabled_layers) {
render_scanline_affine<dsttype, INDXCOLOR, true>, render_scanline_affine<dsttype, INDXCOLOR, true>,
render_scanline_affine<dsttype, INDXCOLOR, false>, render_scanline_affine<dsttype, INDXCOLOR, false>,
}; };
rdfns[fnidx](layer, start, end, dst_ptr); rdfns[fnidx](layer, start, end, dst_ptr, palette_ram_converted);
} else { } else {
static const tile_render_function rdfns[8] = { static const tile_render_function rdfns[8] = {
render_scanline_text<dsttype, bgmode, true, false>, render_scanline_text<dsttype, bgmode, true, false>,
@ -1588,7 +1595,7 @@ void render_layers(u32 start, u32 end, dsttype *dst_ptr, u32 enabled_layers) {
render_scanline_affine<dsttype, bgmode, true>, render_scanline_affine<dsttype, bgmode, true>,
render_scanline_affine<dsttype, bgmode, false>, render_scanline_affine<dsttype, bgmode, false>,
}; };
rdfns[fnidx](layer, start, end, dst_ptr); rdfns[fnidx](layer, start, end, dst_ptr, palette_ram_converted);
} }
base_done = 1; base_done = 1;
@ -1744,7 +1751,8 @@ static void render_conditional_bitmap(
if(current_layer & 0x04) if(current_layer & 0x04)
{ {
if(enable_flags & 0x10) if(enable_flags & 0x10)
render_scanline_objs<u16, FULLCOLOR>(current_layer & 3, start, end, scanline); render_scanline_objs<u16, FULLCOLOR>(
current_layer & 3, start, end, scanline, &palette_ram_converted[0x100]);
} }
else else
{ {
@ -1800,7 +1808,7 @@ static void render_windowobj_pass(u16 *scanline, u32 start, u32 end)
// WinObj-mode to a temporary buffer and performs a "copy-mode" render. // WinObj-mode to a temporary buffer and performs a "copy-mode" render.
// In this mode, we copy pixels from the temp buffer to the final buffer // In this mode, we copy pixels from the temp buffer to the final buffer
// whenever an object pixel is rendered. // whenever an object pixel is rendered.
render_scanline_objs<u16, PIXCOPY>(4, start, end, scanline); render_scanline_objs<u16, PIXCOPY>(4, start, end, scanline, NULL);
// TODO: Evaluate whether it's better to render the whole line and copy, // TODO: Evaluate whether it's better to render the whole line and copy,
// or render subsegments and copy as we go (depends on the pixel/obj count) // or render subsegments and copy as we go (depends on the pixel/obj count)