xenocara/xserver/miext/rootless/accel/rlBlt.c

409 lines
9.6 KiB
C

/*
* Accelerated rootless blit
*/
/*
* This code is largely copied from fbBlt.c.
*
* Copyright © 1998 Keith Packard
* Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.
* Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#ifdef HAVE_DIX_CONFIG_H
#include <dix-config.h>
#endif
#include <stddef.h> /* For NULL */
#include <string.h>
#include "fb.h"
#include "rootlessCommon.h"
#include "rlAccel.h"
#define InitializeShifts(sx,dx,ls,rs) { \
if (sx != dx) { \
if (sx > dx) { \
ls = sx - dx; \
rs = FB_UNIT - ls; \
} else { \
rs = dx - sx; \
ls = FB_UNIT - rs; \
} \
} \
}
void
rlBlt (FbBits *srcLine,
FbStride srcStride,
int srcX,
ScreenPtr pDstScreen,
FbBits *dstLine,
FbStride dstStride,
int dstX,
int width,
int height,
int alu,
FbBits pm,
int bpp,
Bool reverse,
Bool upsidedown)
{
FbBits *src, *dst;
int leftShift, rightShift;
FbBits startmask, endmask;
FbBits bits, bits1;
int n, nmiddle;
Bool destInvarient;
int startbyte, endbyte;
FbDeclareMergeRop ();
#ifdef FB_24BIT
if (bpp == 24 && !FbCheck24Pix (pm))
{
fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,
width, height, alu, pm, reverse, upsidedown);
return;
}
#endif
if (alu == GXcopy && pm == FB_ALLONES && !reverse &&
!(srcX & 7) && !(dstX & 7) && !(width & 7)) {
int i;
CARD8 *src = (CARD8 *) srcLine;
CARD8 *dst = (CARD8 *) dstLine;
srcStride *= sizeof(FbBits);
dstStride *= sizeof(FbBits);
width >>= 3;
src += (srcX >> 3);
dst += (dstX >> 3);
if (!upsidedown)
for (i = 0; i < height; i++)
memcpy(dst + i * dstStride, src + i * srcStride, width);
else
for (i = height - 1; i >= 0; i--)
memcpy(dst + i * dstStride, src + i * srcStride, width);
return;
}
FbInitializeMergeRop(alu, pm);
destInvarient = FbDestInvarientMergeRop();
if (upsidedown)
{
srcLine += (height - 1) * (srcStride);
dstLine += (height - 1) * (dstStride);
srcStride = -srcStride;
dstStride = -dstStride;
}
FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,
nmiddle, endmask, endbyte);
/*
* Beginning of the rootless acceleration code
*/
if (!startmask && !endmask && alu == GXcopy &&
height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)
{
if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)
{
SCREENREC(pDstScreen)->imp->CopyBytes(
nmiddle * sizeof(*dst), height,
(char *) srcLine + (srcX >> 3),
srcStride * sizeof (*src),
(char *) dstLine + (dstX >> 3),
dstStride * sizeof (*dst));
return;
}
/* FIXME: the pm test here isn't super-wonderful - just because
we don't care about the top eight bits doesn't necessarily
mean we want them set to 255. But doing this does give a
factor of two performance improvement when copying from a
pixmap to a window, which is pretty common.. */
else if (bpp == 32 && sizeof(FbBits) == 4 &&
pm == 0x00FFFFFFUL && !reverse &&
SCREENREC(pDstScreen)->imp->CompositePixels)
{
/* need to copy XRGB to ARGB. */
void *src[2], *dest[2];
unsigned int src_rowbytes[2], dest_rowbytes[2];
unsigned int fn;
src[0] = (char *) srcLine + (srcX >> 3);
src[1] = NULL;
src_rowbytes[0] = srcStride * sizeof(*src);
src_rowbytes[1] = 0;
dest[0] = (char *) dstLine + (dstX >> 3);
dest[1] = dest[0];
dest_rowbytes[0] = dstStride * sizeof(*dst);
dest_rowbytes[1] = dest_rowbytes[0];
fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,
RL_DEPTH_NIL, RL_DEPTH_ARGB8888);
if (SCREENREC(pDstScreen)->imp->CompositePixels(
nmiddle, height,
fn, src, src_rowbytes,
NULL, 0, dest, dest_rowbytes) == Success)
{
return;
}
}
}
/* End of the rootless acceleration code */
if (reverse)
{
srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;
dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;
srcX = (srcX + width - 1) & FB_MASK;
dstX = (dstX + width - 1) & FB_MASK;
}
else
{
srcLine += srcX >> FB_SHIFT;
dstLine += dstX >> FB_SHIFT;
srcX &= FB_MASK;
dstX &= FB_MASK;
}
if (srcX == dstX)
{
while (height--)
{
src = srcLine;
srcLine += srcStride;
dst = dstLine;
dstLine += dstStride;
if (reverse)
{
if (endmask)
{
bits = *--src;
--dst;
FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
}
n = nmiddle;
if (destInvarient)
{
while (n--)
*--dst = FbDoDestInvarientMergeRop(*--src);
}
else
{
while (n--)
{
bits = *--src;
--dst;
*dst = FbDoMergeRop (bits, *dst);
}
}
if (startmask)
{
bits = *--src;
--dst;
FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
}
}
else
{
if (startmask)
{
bits = *src++;
FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
dst++;
}
n = nmiddle;
if (destInvarient)
{
#if 0
/*
* This provides some speedup on screen->screen blts
* over the PCI bus, usually about 10%. But fb
* isn't usually used for this operation...
*/
if (_ca2 + 1 == 0 && _cx2 == 0)
{
FbBits t1, t2, t3, t4;
while (n >= 4)
{
t1 = *src++;
t2 = *src++;
t3 = *src++;
t4 = *src++;
*dst++ = t1;
*dst++ = t2;
*dst++ = t3;
*dst++ = t4;
n -= 4;
}
}
#endif
while (n--)
*dst++ = FbDoDestInvarientMergeRop(*src++);
}
else
{
while (n--)
{
bits = *src++;
*dst = FbDoMergeRop (bits, *dst);
dst++;
}
}
if (endmask)
{
bits = *src;
FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
}
}
}
}
else
{
if (srcX > dstX)
{
leftShift = srcX - dstX;
rightShift = FB_UNIT - leftShift;
}
else
{
rightShift = dstX - srcX;
leftShift = FB_UNIT - rightShift;
}
while (height--)
{
src = srcLine;
srcLine += srcStride;
dst = dstLine;
dstLine += dstStride;
bits1 = 0;
if (reverse)
{
if (srcX < dstX)
bits1 = *--src;
if (endmask)
{
bits = FbScrRight(bits1, rightShift);
if (FbScrRight(endmask, leftShift))
{
bits1 = *--src;
bits |= FbScrLeft(bits1, leftShift);
}
--dst;
FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
}
n = nmiddle;
if (destInvarient)
{
while (n--)
{
bits = FbScrRight(bits1, rightShift);
bits1 = *--src;
bits |= FbScrLeft(bits1, leftShift);
--dst;
*dst = FbDoDestInvarientMergeRop(bits);
}
}
else
{
while (n--)
{
bits = FbScrRight(bits1, rightShift);
bits1 = *--src;
bits |= FbScrLeft(bits1, leftShift);
--dst;
*dst = FbDoMergeRop(bits, *dst);
}
}
if (startmask)
{
bits = FbScrRight(bits1, rightShift);
if (FbScrRight(startmask, leftShift))
{
bits1 = *--src;
bits |= FbScrLeft(bits1, leftShift);
}
--dst;
FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
}
}
else
{
if (srcX > dstX)
bits1 = *src++;
if (startmask)
{
bits = FbScrLeft(bits1, leftShift);
if (FbScrLeft(startmask, rightShift))
{
bits1 = *src++;
bits |= FbScrRight(bits1, rightShift);
}
FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
dst++;
}
n = nmiddle;
if (destInvarient)
{
while (n--)
{
bits = FbScrLeft(bits1, leftShift);
bits1 = *src++;
bits |= FbScrRight(bits1, rightShift);
*dst = FbDoDestInvarientMergeRop(bits);
dst++;
}
}
else
{
while (n--)
{
bits = FbScrLeft(bits1, leftShift);
bits1 = *src++;
bits |= FbScrRight(bits1, rightShift);
*dst = FbDoMergeRop(bits, *dst);
dst++;
}
}
if (endmask)
{
bits = FbScrLeft(bits1, leftShift);
if (FbScrLeft(endmask, rightShift))
{
bits1 = *src;
bits |= FbScrRight(bits1, rightShift);
}
FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);
}
}
}
}
}