xenocara/driver/xf86-video-ati/src/radeon_render.c
2006-11-26 20:00:15 +00:00

1050 lines
28 KiB
C

/*
* Copyright 2004 Eric Anholt
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <anholt@FreeBSD.org>
* Hui Yu <hyu@ati.com>
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#ifdef USE_XAA
#include "dixstruct.h"
#include "xaa.h"
#include "xaalocal.h"
#ifndef RENDER_GENERIC_HELPER
#define RENDER_GENERIC_HELPER
struct blendinfo {
Bool dst_alpha;
Bool src_alpha;
CARD32 blend_cntl;
};
/* The first part of blend_cntl corresponds to Fa from the render "protocol"
* document, and the second part to Fb.
*/
static const struct blendinfo RadeonBlendOp[] = {
/* Clear */
{0, 0, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ZERO},
/* Src */
{0, 0, RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ZERO},
/* Dst */
{0, 0, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ONE},
/* Over */
{0, 1, RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
/* OverReverse */
{1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
RADEON_DST_BLEND_GL_ONE},
/* In */
{1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA |
RADEON_DST_BLEND_GL_ZERO},
/* InReverse */
{0, 1, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_SRC_ALPHA},
/* Out */
{1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
RADEON_DST_BLEND_GL_ZERO},
/* OutReverse */
{0, 1, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
/* Atop */
{1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA |
RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
/* AtopReverse */
{1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
RADEON_DST_BLEND_GL_SRC_ALPHA},
/* Xor */
{1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA |
RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
/* Add */
{0, 0, RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ONE},
/* Saturate */
{1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
RADEON_DST_BLEND_GL_ONE},
{0, 0, 0},
{0, 0, 0},
/* DisjointClear */
{0, 0, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ZERO},
/* DisjointSrc */
{0, 0, RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ZERO},
/* DisjointDst */
{0, 0, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ONE},
/* DisjointOver unsupported */
{0, 0, 0},
/* DisjointOverReverse */
{1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
RADEON_DST_BLEND_GL_ONE},
/* DisjointIn unsupported */
{0, 0, 0},
/* DisjointInReverse unsupported */
{0, 0, 0},
/* DisjointOut unsupported */
{1, 1, RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE |
RADEON_DST_BLEND_GL_ZERO},
/* DisjointOutReverse unsupported */
{0, 0, 0},
/* DisjointAtop unsupported */
{0, 0, 0},
/* DisjointAtopReverse unsupported */
{0, 0, 0},
/* DisjointXor unsupported */
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
{0, 0, 0},
/* ConjointClear */
{0, 0, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ZERO},
/* ConjointSrc */
{0, 0, RADEON_SRC_BLEND_GL_ONE |
RADEON_DST_BLEND_GL_ZERO},
/* ConjointDst */
{0, 0, RADEON_SRC_BLEND_GL_ZERO |
RADEON_DST_BLEND_GL_ONE},
};
#define RadeonOpMax (sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
/* Note on texture formats:
* TXFORMAT_Y8 expands to (Y,Y,Y,1). TXFORMAT_I8 expands to (I,I,I,I)
* The RADEON and R200 TXFORMATS we use are the same on r100/r200.
*/
static CARD32 RADEONTextureFormats[] = {
PICT_a8r8g8b8,
PICT_a8,
PICT_x8r8g8b8,
PICT_r5g6b5,
PICT_a1r5g5b5,
PICT_x1r5g5b5,
0
};
static CARD32 RADEONDstFormats[] = {
PICT_a8r8g8b8,
PICT_x8r8g8b8,
PICT_r5g6b5,
PICT_a1r5g5b5,
PICT_x1r5g5b5,
0
};
static CARD32
RadeonGetTextureFormat(CARD32 format)
{
switch (format) {
case PICT_a8r8g8b8:
return RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
case PICT_a8:
return RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
case PICT_x8r8g8b8:
return RADEON_TXFORMAT_ARGB8888;
case PICT_r5g6b5:
return RADEON_TXFORMAT_RGB565;
case PICT_a1r5g5b5:
return RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
case PICT_x1r5g5b5:
return RADEON_TXFORMAT_ARGB1555;
default:
return 0;
}
}
static CARD32
RadeonGetColorFormat(CARD32 format)
{
switch (format) {
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return RADEON_COLOR_FORMAT_ARGB8888;
case PICT_r5g6b5:
return RADEON_COLOR_FORMAT_RGB565;
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
return RADEON_COLOR_FORMAT_ARGB1555;
default:
return 0;
}
}
/* Returns a RADEON_RB3D_BLENDCNTL value, or 0 if the operation is not
* supported
*/
static CARD32
RadeonGetBlendCntl(CARD8 op, CARD32 dstFormat)
{
CARD32 blend_cntl;
if (op >= RadeonOpMax || RadeonBlendOp[op].blend_cntl == 0)
return 0;
blend_cntl = RadeonBlendOp[op].blend_cntl;
if (RadeonBlendOp[op].dst_alpha && !PICT_FORMAT_A(dstFormat)) {
CARD32 srcblend = blend_cntl & RADEON_SRC_BLEND_MASK;
/* If there's no destination alpha channel, we need to wire the blending
* to treat the alpha channel as always 1.
*/
if (srcblend == RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA ||
srcblend == RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE)
blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
RADEON_SRC_BLEND_GL_ZERO;
else if (srcblend == RADEON_SRC_BLEND_GL_DST_ALPHA)
blend_cntl = (blend_cntl & ~RADEON_SRC_BLEND_MASK) |
RADEON_SRC_BLEND_GL_ONE;
}
return blend_cntl;
}
static __inline__ CARD32 F_TO_DW(float val)
{
union {
float f;
CARD32 l;
} tmp;
tmp.f = val;
return tmp.l;
}
/* Compute log base 2 of val. */
static __inline__ int
ATILog2(int val)
{
int bits;
for (bits = 0; val != 0; val >>= 1, ++bits)
;
return bits - 1;
}
static void
RemoveLinear (FBLinearPtr linear)
{
RADEONInfoPtr info = (RADEONInfoPtr)(linear->devPrivate.ptr);
info->RenderTex = NULL;
}
static void
RenderCallback (ScrnInfoPtr pScrn)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
if ((currentTime.milliseconds > info->RenderTimeout) && info->RenderTex) {
xf86FreeOffscreenLinear(info->RenderTex);
info->RenderTex = NULL;
}
if (!info->RenderTex)
info->RenderCallback = NULL;
}
static Bool
AllocateLinear (
ScrnInfoPtr pScrn,
int sizeNeeded
){
RADEONInfoPtr info = RADEONPTR(pScrn);
int cpp = info->CurrentLayout.bitsPerPixel / 8;
info->RenderTimeout = currentTime.milliseconds + 30000;
info->RenderCallback = RenderCallback;
/* XAA allocates in units of pixels at the screen bpp, so adjust size
* appropriately.
*/
sizeNeeded = (sizeNeeded + cpp - 1) / cpp;
if (info->RenderTex) {
if (info->RenderTex->size >= sizeNeeded)
return TRUE;
else {
if (xf86ResizeOffscreenLinear(info->RenderTex, sizeNeeded))
return TRUE;
xf86FreeOffscreenLinear(info->RenderTex);
info->RenderTex = NULL;
}
}
info->RenderTex = xf86AllocateOffscreenLinear(pScrn->pScreen, sizeNeeded, 32,
NULL, RemoveLinear, info);
return (info->RenderTex != NULL);
}
#if X_BYTE_ORDER == X_BIG_ENDIAN
static Bool RADEONSetupRenderByteswap(ScrnInfoPtr pScrn, int tex_bytepp)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
unsigned char *RADEONMMIO = info->MMIO;
CARD32 swapper = info->ModeReg.surface_cntl;
swapper &= ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP |
RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP);
/* Set up byte swapping for the framebuffer aperture as needed */
switch (tex_bytepp) {
case 1:
break;
case 2:
swapper |= RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP;
break;
case 4:
swapper |= RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP;
break;
default:
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: Don't know what to do for "
"tex_bytepp == %d!\n", __func__, tex_bytepp);
return FALSE;
}
OUTREG(RADEON_SURFACE_CNTL, swapper);
return TRUE;
}
static void RADEONRestoreByteswap(RADEONInfoPtr info)
{
unsigned char *RADEONMMIO = info->MMIO;
OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
}
#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
#endif /* RENDER_GENERIC_HELPER */
#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
#error Cannot define both MMIO and CP acceleration!
#endif
#if !defined(UNIXCPP) || defined(ANSICPP)
#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
#else
#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
#endif
#ifdef ACCEL_MMIO
#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
#else
#ifdef ACCEL_CP
#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
#else
#error No accel type defined!
#endif
#endif
static Bool FUNC_NAME(R100SetupTexture)(
ScrnInfoPtr pScrn,
CARD32 format,
CARD8 *src,
int src_pitch,
unsigned int width,
unsigned int height,
int flags)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
CARD8 *dst;
CARD32 tex_size = 0, txformat;
int dst_pitch, offset, size, tex_bytepp;
#ifdef ACCEL_CP
CARD32 buf_pitch, dst_pitch_off;
int x, y;
unsigned int hpass;
CARD8 *tmp_dst;
#endif
ACCEL_PREAMBLE();
/* render repeat is broken - fix in stable tree by falling back */
if (flags & XAA_RENDER_REPEAT)
return FALSE;
if ((width > 2048) || (height > 2048))
return FALSE;
txformat = RadeonGetTextureFormat(format);
tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
#ifndef ACCEL_CP
#if X_BYTE_ORDER == X_BIG_ENDIAN
if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
"failed!\n", __func__);
return FALSE;
}
#endif
#endif
dst_pitch = (width * tex_bytepp + 63) & ~63;
size = dst_pitch * height;
if (!AllocateLinear(pScrn, size))
return FALSE;
if (flags & XAA_RENDER_REPEAT) {
txformat |= ATILog2(width) << RADEON_TXFORMAT_WIDTH_SHIFT;
txformat |= ATILog2(height) << RADEON_TXFORMAT_HEIGHT_SHIFT;
} else {
tex_size = ((height - 1) << 16) | (width - 1);
txformat |= RADEON_TXFORMAT_NON_POWER2;
}
offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
dst = (CARD8*)(info->FB + offset);
/* Upload texture to card. */
#ifdef ACCEL_CP
RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
while ( height )
{
tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
dst_pitch_off, &buf_pitch,
x, &y, &height, &hpass );
RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
hpass, buf_pitch, src_pitch );
src += hpass * src_pitch;
}
RADEON_PURGE_CACHE();
RADEON_WAIT_UNTIL_IDLE();
#else
if (info->accel->NeedToSync)
info->accel->Sync(pScrn);
while (height--) {
memcpy(dst, src, width * tex_bytepp);
src += src_pitch;
dst += dst_pitch;
}
#if X_BYTE_ORDER == X_BIG_ENDIAN
RADEONRestoreByteswap(info);
#endif
#endif /* ACCEL_CP */
BEGIN_ACCEL(5);
OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, tex_size);
OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, dst_pitch - 32);
OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, offset + info->fbLocation +
pScrn->fbOffset);
OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, RADEON_MAG_FILTER_LINEAR |
RADEON_MIN_FILTER_LINEAR |
RADEON_CLAMP_S_WRAP |
RADEON_CLAMP_T_WRAP);
FINISH_ACCEL();
return TRUE;
}
static Bool
FUNC_NAME(R100SetupForCPUToScreenAlphaTexture) (
ScrnInfoPtr pScrn,
int op,
CARD16 red,
CARD16 green,
CARD16 blue,
CARD16 alpha,
CARD32 maskFormat,
CARD32 dstFormat,
CARD8 *alphaPtr,
int alphaPitch,
int width,
int height,
int flags
)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
CARD32 colorformat, srccolor, blend_cntl;
ACCEL_PREAMBLE();
blend_cntl = RadeonGetBlendCntl(op, dstFormat);
if (blend_cntl == 0)
return FALSE;
if (!info->XInited3D)
RADEONInit3DEngine(pScrn);
if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
width, height, flags))
return FALSE;
colorformat = RadeonGetColorFormat(dstFormat);
srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
(green & 0xff00);
BEGIN_ACCEL(7);
OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
RADEON_TEX_BLEND_0_ENABLE);
OUT_ACCEL_REG(RADEON_PP_TFACTOR_0, srccolor);
OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_A_TFACTOR_COLOR |
RADEON_COLOR_ARG_B_T0_ALPHA);
OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_A_TFACTOR_ALPHA |
RADEON_ALPHA_ARG_B_T0_ALPHA);
OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
RADEON_SE_VTX_FMT_ST0);
OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
FINISH_ACCEL();
return TRUE;
}
static Bool
FUNC_NAME(R100SetupForCPUToScreenTexture) (
ScrnInfoPtr pScrn,
int op,
CARD32 srcFormat,
CARD32 dstFormat,
CARD8 *texPtr,
int texPitch,
int width,
int height,
int flags
)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
CARD32 colorformat, blend_cntl;
ACCEL_PREAMBLE();
blend_cntl = RadeonGetBlendCntl(op, dstFormat);
if (blend_cntl == 0)
return FALSE;
if (!info->XInited3D)
RADEONInit3DEngine(pScrn);
if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
height, flags))
return FALSE;
colorformat = RadeonGetColorFormat(dstFormat);
BEGIN_ACCEL(6);
OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
RADEON_TEX_BLEND_0_ENABLE);
if (srcFormat != PICT_a8)
OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_T0_COLOR);
else
OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, RADEON_COLOR_ARG_C_ZERO);
OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, RADEON_ALPHA_ARG_C_T0_ALPHA);
OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
RADEON_SE_VTX_FMT_ST0);
OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
FINISH_ACCEL();
return TRUE;
}
static void
FUNC_NAME(R100SubsequentCPUToScreenTexture) (
ScrnInfoPtr pScrn,
int dstx,
int dsty,
int srcx,
int srcy,
int width,
int height
)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
int byteshift;
CARD32 fboffset;
float l, t, r, b, fl, fr, ft, fb;
ACCEL_PREAMBLE();
/* Note: we can't simply set up the 3D surface at the same location as the
* front buffer, because the 2048x2048 limit on coordinates may be smaller
* than the (MergedFB) screen.
* Can't use arbitrary offsets for color tiling
*/
if (info->tilingEnabled) {
/* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
fboffset = info->fbLocation + pScrn->fbOffset +
(pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
l = dstx;
t = (dsty % 16);
}
else {
byteshift = (pScrn->bitsPerPixel >> 4);
fboffset = (info->fbLocation + pScrn->fbOffset +
((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
l = ((dstx << byteshift) % 16) >> byteshift;
t = 0.0;
}
r = width + l;
b = height + t;
fl = srcx;
fr = srcx + width;
ft = srcy;
fb = srcy + height;
#ifdef ACCEL_CP
BEGIN_RING(25);
OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, 17));
/* RADEON_SE_VTX_FMT */
OUT_RING(RADEON_CP_VC_FRMT_XY |
RADEON_CP_VC_FRMT_ST0);
/* SE_VF_CNTL */
OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
RADEON_CP_VC_CNTL_MAOS_ENABLE |
RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
(4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
OUT_RING(F_TO_DW(l));
OUT_RING(F_TO_DW(t));
OUT_RING(F_TO_DW(fl));
OUT_RING(F_TO_DW(ft));
OUT_RING(F_TO_DW(r));
OUT_RING(F_TO_DW(t));
OUT_RING(F_TO_DW(fr));
OUT_RING(F_TO_DW(ft));
OUT_RING(F_TO_DW(r));
OUT_RING(F_TO_DW(b));
OUT_RING(F_TO_DW(fr));
OUT_RING(F_TO_DW(fb));
OUT_RING(F_TO_DW(l));
OUT_RING(F_TO_DW(b));
OUT_RING(F_TO_DW(fl));
OUT_RING(F_TO_DW(fb));
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
#else
BEGIN_ACCEL(20);
OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
OUT_ACCEL_REG(RADEON_SE_VF_CNTL, RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
RADEON_VF_PRIM_WALK_DATA |
RADEON_VF_RADEON_MODE |
(4 << RADEON_VF_NUM_VERTICES_SHIFT));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
FINISH_ACCEL();
#endif
}
static Bool FUNC_NAME(R200SetupTexture)(
ScrnInfoPtr pScrn,
CARD32 format,
CARD8 *src,
int src_pitch,
unsigned int width,
unsigned int height,
int flags)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
CARD8 *dst;
CARD32 tex_size = 0, txformat;
int dst_pitch, offset, size, tex_bytepp;
#ifdef ACCEL_CP
CARD32 buf_pitch, dst_pitch_off;
int x, y;
unsigned int hpass;
CARD8 *tmp_dst;
#endif
ACCEL_PREAMBLE();
/* render repeat is broken - fix in stable tree by falling back */
if (flags & XAA_RENDER_REPEAT)
return FALSE;
if ((width > 2048) || (height > 2048))
return FALSE;
txformat = RadeonGetTextureFormat(format);
tex_bytepp = PICT_FORMAT_BPP(format) >> 3;
#ifndef ACCEL_CP
#if X_BYTE_ORDER == X_BIG_ENDIAN
if (!RADEONSetupRenderByteswap(pScrn, tex_bytepp)) {
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "%s: RADEONSetupRenderByteswap() "
"failed!\n", __func__);
return FALSE;
}
#endif
#endif
dst_pitch = (width * tex_bytepp + 63) & ~63;
size = dst_pitch * height;
if (!AllocateLinear(pScrn, size))
return FALSE;
if (flags & XAA_RENDER_REPEAT) {
txformat |= ATILog2(width) << R200_TXFORMAT_WIDTH_SHIFT;
txformat |= ATILog2(height) << R200_TXFORMAT_HEIGHT_SHIFT;
} else {
tex_size = ((height - 1) << 16) | (width - 1);
txformat |= RADEON_TXFORMAT_NON_POWER2;
}
offset = info->RenderTex->offset * pScrn->bitsPerPixel / 8;
dst = (CARD8*)(info->FB + offset);
/* Upload texture to card. */
#ifdef ACCEL_CP
RADEONHostDataParams( pScrn, dst, dst_pitch, tex_bytepp, &dst_pitch_off, &x, &y );
while ( height )
{
tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
dst_pitch_off, &buf_pitch,
x, &y, &height, &hpass );
RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
hpass, buf_pitch, src_pitch );
src += hpass * src_pitch;
}
RADEON_PURGE_CACHE();
RADEON_WAIT_UNTIL_IDLE();
#else
if (info->accel->NeedToSync)
info->accel->Sync(pScrn);
while (height--) {
memcpy(dst, src, width * tex_bytepp);
src += src_pitch;
dst += dst_pitch;
}
#if X_BYTE_ORDER == X_BIG_ENDIAN
RADEONRestoreByteswap(info);
#endif
#endif /* ACCEL_CP */
BEGIN_ACCEL(6);
OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
OUT_ACCEL_REG(R200_PP_TXSIZE_0, tex_size);
OUT_ACCEL_REG(R200_PP_TXPITCH_0, dst_pitch - 32);
OUT_ACCEL_REG(R200_PP_TXOFFSET_0, offset + info->fbLocation +
pScrn->fbOffset);
OUT_ACCEL_REG(R200_PP_TXFILTER_0, R200_MAG_FILTER_NEAREST |
R200_MIN_FILTER_NEAREST |
R200_CLAMP_S_WRAP |
R200_CLAMP_T_WRAP);
FINISH_ACCEL();
return TRUE;
}
static Bool
FUNC_NAME(R200SetupForCPUToScreenAlphaTexture) (
ScrnInfoPtr pScrn,
int op,
CARD16 red,
CARD16 green,
CARD16 blue,
CARD16 alpha,
CARD32 maskFormat,
CARD32 dstFormat,
CARD8 *alphaPtr,
int alphaPitch,
int width,
int height,
int flags
)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
CARD32 colorformat, srccolor, blend_cntl;
ACCEL_PREAMBLE();
blend_cntl = RadeonGetBlendCntl(op, dstFormat);
if (blend_cntl == 0)
return FALSE;
if (!info->XInited3D)
RADEONInit3DEngine(pScrn);
if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
width, height, flags))
return FALSE;
colorformat = RadeonGetColorFormat(dstFormat);
srccolor = ((alpha & 0xff00) << 16) | ((red & 0xff00) << 8) | (blue >> 8) |
(green & 0xff00);
BEGIN_ACCEL(10);
OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
RADEON_TEX_BLEND_0_ENABLE);
OUT_ACCEL_REG(R200_PP_TFACTOR_0, srccolor);
OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_A_TFACTOR_COLOR |
R200_TXC_ARG_B_R0_ALPHA);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_A_TFACTOR_ALPHA |
R200_TXA_ARG_B_R0_ALPHA);
OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
FINISH_ACCEL();
return TRUE;
}
static Bool
FUNC_NAME(R200SetupForCPUToScreenTexture) (
ScrnInfoPtr pScrn,
int op,
CARD32 srcFormat,
CARD32 dstFormat,
CARD8 *texPtr,
int texPitch,
int width,
int height,
int flags
)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
CARD32 colorformat, blend_cntl;
ACCEL_PREAMBLE();
blend_cntl = RadeonGetBlendCntl(op, dstFormat);
if (blend_cntl == 0)
return FALSE;
if (!info->XInited3D)
RADEONInit3DEngine(pScrn);
if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
height, flags))
return FALSE;
colorformat = RadeonGetColorFormat(dstFormat);
BEGIN_ACCEL(9);
OUT_ACCEL_REG(RADEON_RB3D_CNTL, colorformat | RADEON_ALPHA_BLEND_ENABLE);
OUT_ACCEL_REG(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE |
RADEON_TEX_BLEND_0_ENABLE);
if (srcFormat != PICT_a8)
OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_R0_COLOR);
else
OUT_ACCEL_REG(R200_PP_TXCBLEND_0, R200_TXC_ARG_C_ZERO);
OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, R200_TXC_OUTPUT_REG_R0);
OUT_ACCEL_REG(R200_PP_TXABLEND_0, R200_TXA_ARG_C_R0_ALPHA);
OUT_ACCEL_REG(R200_PP_TXABLEND2_0, R200_TXA_OUTPUT_REG_R0);
OUT_ACCEL_REG(R200_SE_VTX_FMT_0, 0);
OUT_ACCEL_REG(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blend_cntl);
FINISH_ACCEL();
return TRUE;
}
static void
FUNC_NAME(R200SubsequentCPUToScreenTexture) (
ScrnInfoPtr pScrn,
int dstx,
int dsty,
int srcx,
int srcy,
int width,
int height
)
{
RADEONInfoPtr info = RADEONPTR(pScrn);
int byteshift;
CARD32 fboffset;
float l, t, r, b, fl, fr, ft, fb;
ACCEL_PREAMBLE();
/* Note: we can't simply set up the 3D surface at the same location as the
* front buffer, because the 2048x2048 limit on coordinates may be smaller
* than the (MergedFB) screen.
* Can't use arbitrary offsets for color tiling
*/
if (info->tilingEnabled) {
/* can't play tricks with x coordinate, or could we - tiling is disabled anyway in that case */
fboffset = info->fbLocation + pScrn->fbOffset +
(pScrn->displayWidth * (dsty & ~15) * (pScrn->bitsPerPixel >> 3));
l = dstx;
t = (dsty % 16);
}
else {
byteshift = (pScrn->bitsPerPixel >> 4);
fboffset = (info->fbLocation + pScrn->fbOffset +
((pScrn->displayWidth * dsty + dstx) << byteshift)) & ~15;
l = ((dstx << byteshift) % 16) >> byteshift;
t = 0.0;
}
r = width + l;
b = height + t;
fl = srcx;
fr = srcx + width;
ft = srcy;
fb = srcy + height;
#ifdef ACCEL_CP
BEGIN_RING(24);
OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, 16));
/* RADEON_SE_VF_CNTL */
OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
RADEON_CP_VC_CNTL_PRIM_WALK_RING |
(4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
OUT_RING(F_TO_DW(l));
OUT_RING(F_TO_DW(t));
OUT_RING(F_TO_DW(fl));
OUT_RING(F_TO_DW(ft));
OUT_RING(F_TO_DW(r));
OUT_RING(F_TO_DW(t));
OUT_RING(F_TO_DW(fr));
OUT_RING(F_TO_DW(ft));
OUT_RING(F_TO_DW(r));
OUT_RING(F_TO_DW(b));
OUT_RING(F_TO_DW(fr));
OUT_RING(F_TO_DW(fb));
OUT_RING(F_TO_DW(l));
OUT_RING(F_TO_DW(b));
OUT_RING(F_TO_DW(fl));
OUT_RING(F_TO_DW(fb));
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
ADVANCE_RING();
#else
BEGIN_ACCEL(20);
/* Note: we can't simply setup 3D surface at the same location as the front buffer,
some apps may draw offscreen pictures out of the limitation of radeon 3D surface.
*/
OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, pScrn->displayWidth |
((info->tilingEnabled && (dsty <= pScrn->virtualY)) ? RADEON_COLOR_TILE_ENABLE : 0));
OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, fboffset);
OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
RADEON_VF_PRIM_WALK_DATA |
4 << RADEON_VF_NUM_VERTICES_SHIFT));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(t));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(ft));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(r));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fr));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(l));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(b));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fl));
OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, F_TO_DW(fb));
OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
FINISH_ACCEL();
#endif
}
#undef FUNC_NAME
#endif /* USE_XAA */