409 lines
9.6 KiB
C
409 lines
9.6 KiB
C
/*
|
|
* Accelerated rootless blit
|
|
*/
|
|
/*
|
|
* This code is largely copied from fbBlt.c.
|
|
*
|
|
* Copyright © 1998 Keith Packard
|
|
* Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.
|
|
* Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.
|
|
*
|
|
* Permission to use, copy, modify, distribute, and sell this software and its
|
|
* documentation for any purpose is hereby granted without fee, provided that
|
|
* the above copyright notice appear in all copies and that both that
|
|
* copyright notice and this permission notice appear in supporting
|
|
* documentation, and that the name of Keith Packard not be used in
|
|
* advertising or publicity pertaining to distribution of the software without
|
|
* specific, written prior permission. Keith Packard makes no
|
|
* representations about the suitability of this software for any purpose. It
|
|
* is provided "as is" without express or implied warranty.
|
|
*
|
|
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
|
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
|
|
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
|
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
|
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
|
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
|
* PERFORMANCE OF THIS SOFTWARE.
|
|
*/
|
|
|
|
#ifdef HAVE_DIX_CONFIG_H
|
|
#include <dix-config.h>
|
|
#endif
|
|
|
|
#include <stddef.h> /* For NULL */
|
|
#include <string.h>
|
|
#include "fb.h"
|
|
#include "rootlessCommon.h"
|
|
#include "rlAccel.h"
|
|
|
|
#define InitializeShifts(sx,dx,ls,rs) { \
|
|
if (sx != dx) { \
|
|
if (sx > dx) { \
|
|
ls = sx - dx; \
|
|
rs = FB_UNIT - ls; \
|
|
} else { \
|
|
rs = dx - sx; \
|
|
ls = FB_UNIT - rs; \
|
|
} \
|
|
} \
|
|
}
|
|
|
|
void
|
|
rlBlt (FbBits *srcLine,
|
|
FbStride srcStride,
|
|
int srcX,
|
|
|
|
ScreenPtr pDstScreen,
|
|
FbBits *dstLine,
|
|
FbStride dstStride,
|
|
int dstX,
|
|
|
|
int width,
|
|
int height,
|
|
|
|
int alu,
|
|
FbBits pm,
|
|
int bpp,
|
|
|
|
Bool reverse,
|
|
Bool upsidedown)
|
|
{
|
|
FbBits *src, *dst;
|
|
int leftShift, rightShift;
|
|
FbBits startmask, endmask;
|
|
FbBits bits, bits1;
|
|
int n, nmiddle;
|
|
Bool destInvarient;
|
|
int startbyte, endbyte;
|
|
FbDeclareMergeRop ();
|
|
|
|
#ifdef FB_24BIT
|
|
if (bpp == 24 && !FbCheck24Pix (pm))
|
|
{
|
|
fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,
|
|
width, height, alu, pm, reverse, upsidedown);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
if (alu == GXcopy && pm == FB_ALLONES && !reverse &&
|
|
!(srcX & 7) && !(dstX & 7) && !(width & 7)) {
|
|
int i;
|
|
CARD8 *src = (CARD8 *) srcLine;
|
|
CARD8 *dst = (CARD8 *) dstLine;
|
|
|
|
srcStride *= sizeof(FbBits);
|
|
dstStride *= sizeof(FbBits);
|
|
width >>= 3;
|
|
src += (srcX >> 3);
|
|
dst += (dstX >> 3);
|
|
|
|
if (!upsidedown)
|
|
for (i = 0; i < height; i++)
|
|
memcpy(dst + i * dstStride, src + i * srcStride, width);
|
|
else
|
|
for (i = height - 1; i >= 0; i--)
|
|
memcpy(dst + i * dstStride, src + i * srcStride, width);
|
|
|
|
return;
|
|
}
|
|
|
|
FbInitializeMergeRop(alu, pm);
|
|
destInvarient = FbDestInvarientMergeRop();
|
|
if (upsidedown)
|
|
{
|
|
srcLine += (height - 1) * (srcStride);
|
|
dstLine += (height - 1) * (dstStride);
|
|
srcStride = -srcStride;
|
|
dstStride = -dstStride;
|
|
}
|
|
FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,
|
|
nmiddle, endmask, endbyte);
|
|
|
|
/*
|
|
* Beginning of the rootless acceleration code
|
|
*/
|
|
if (!startmask && !endmask && alu == GXcopy &&
|
|
height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)
|
|
{
|
|
if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)
|
|
{
|
|
SCREENREC(pDstScreen)->imp->CopyBytes(
|
|
nmiddle * sizeof(*dst), height,
|
|
(char *) srcLine + (srcX >> 3),
|
|
srcStride * sizeof (*src),
|
|
(char *) dstLine + (dstX >> 3),
|
|
dstStride * sizeof (*dst));
|
|
return;
|
|
}
|
|
|
|
/* FIXME: the pm test here isn't super-wonderful - just because
|
|
we don't care about the top eight bits doesn't necessarily
|
|
mean we want them set to 255. But doing this does give a
|
|
factor of two performance improvement when copying from a
|
|
pixmap to a window, which is pretty common.. */
|
|
|
|
else if (bpp == 32 && sizeof(FbBits) == 4 &&
|
|
pm == 0x00FFFFFFUL && !reverse &&
|
|
SCREENREC(pDstScreen)->imp->CompositePixels)
|
|
{
|
|
/* need to copy XRGB to ARGB. */
|
|
|
|
void *src[2], *dest[2];
|
|
unsigned int src_rowbytes[2], dest_rowbytes[2];
|
|
unsigned int fn;
|
|
|
|
src[0] = (char *) srcLine + (srcX >> 3);
|
|
src[1] = NULL;
|
|
src_rowbytes[0] = srcStride * sizeof(*src);
|
|
src_rowbytes[1] = 0;
|
|
|
|
dest[0] = (char *) dstLine + (dstX >> 3);
|
|
dest[1] = dest[0];
|
|
dest_rowbytes[0] = dstStride * sizeof(*dst);
|
|
dest_rowbytes[1] = dest_rowbytes[0];
|
|
|
|
fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,
|
|
RL_DEPTH_NIL, RL_DEPTH_ARGB8888);
|
|
|
|
if (SCREENREC(pDstScreen)->imp->CompositePixels(
|
|
nmiddle, height,
|
|
fn, src, src_rowbytes,
|
|
NULL, 0, dest, dest_rowbytes) == Success)
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
/* End of the rootless acceleration code */
|
|
|
|
if (reverse)
|
|
{
|
|
srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;
|
|
dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;
|
|
srcX = (srcX + width - 1) & FB_MASK;
|
|
dstX = (dstX + width - 1) & FB_MASK;
|
|
}
|
|
else
|
|
{
|
|
srcLine += srcX >> FB_SHIFT;
|
|
dstLine += dstX >> FB_SHIFT;
|
|
srcX &= FB_MASK;
|
|
dstX &= FB_MASK;
|
|
}
|
|
if (srcX == dstX)
|
|
{
|
|
while (height--)
|
|
{
|
|
src = srcLine;
|
|
srcLine += srcStride;
|
|
dst = dstLine;
|
|
dstLine += dstStride;
|
|
if (reverse)
|
|
{
|
|
if (endmask)
|
|
{
|
|
bits = *--src;
|
|
--dst;
|
|
FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
|
|
}
|
|
n = nmiddle;
|
|
if (destInvarient)
|
|
{
|
|
while (n--)
|
|
*--dst = FbDoDestInvarientMergeRop(*--src);
|
|
}
|
|
else
|
|
{
|
|
while (n--)
|
|
{
|
|
bits = *--src;
|
|
--dst;
|
|
*dst = FbDoMergeRop (bits, *dst);
|
|
}
|
|
}
|
|
if (startmask)
|
|
{
|
|
bits = *--src;
|
|
--dst;
|
|
FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (startmask)
|
|
{
|
|
bits = *src++;
|
|
FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
|
|
dst++;
|
|
}
|
|
n = nmiddle;
|
|
if (destInvarient)
|
|
{
|
|
#if 0
|
|
/*
|
|
* This provides some speedup on screen->screen blts
|
|
* over the PCI bus, usually about 10%. But fb
|
|
* isn't usually used for this operation...
|
|
*/
|
|
if (_ca2 + 1 == 0 && _cx2 == 0)
|
|
{
|
|
FbBits t1, t2, t3, t4;
|
|
while (n >= 4)
|
|
{
|
|
t1 = *src++;
|
|
t2 = *src++;
|
|
t3 = *src++;
|
|
t4 = *src++;
|
|
*dst++ = t1;
|
|
*dst++ = t2;
|
|
*dst++ = t3;
|
|
*dst++ = t4;
|
|
n -= 4;
|
|
}
|
|
}
|
|
#endif
|
|
while (n--)
|
|
*dst++ = FbDoDestInvarientMergeRop(*src++);
|
|
}
|
|
else
|
|
{
|
|
while (n--)
|
|
{
|
|
bits = *src++;
|
|
*dst = FbDoMergeRop (bits, *dst);
|
|
dst++;
|
|
}
|
|
}
|
|
if (endmask)
|
|
{
|
|
bits = *src;
|
|
FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (srcX > dstX)
|
|
{
|
|
leftShift = srcX - dstX;
|
|
rightShift = FB_UNIT - leftShift;
|
|
}
|
|
else
|
|
{
|
|
rightShift = dstX - srcX;
|
|
leftShift = FB_UNIT - rightShift;
|
|
}
|
|
while (height--)
|
|
{
|
|
src = srcLine;
|
|
srcLine += srcStride;
|
|
dst = dstLine;
|
|
dstLine += dstStride;
|
|
|
|
bits1 = 0;
|
|
if (reverse)
|
|
{
|
|
if (srcX < dstX)
|
|
bits1 = *--src;
|
|
if (endmask)
|
|
{
|
|
bits = FbScrRight(bits1, rightShift);
|
|
if (FbScrRight(endmask, leftShift))
|
|
{
|
|
bits1 = *--src;
|
|
bits |= FbScrLeft(bits1, leftShift);
|
|
}
|
|
--dst;
|
|
FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
|
|
}
|
|
n = nmiddle;
|
|
if (destInvarient)
|
|
{
|
|
while (n--)
|
|
{
|
|
bits = FbScrRight(bits1, rightShift);
|
|
bits1 = *--src;
|
|
bits |= FbScrLeft(bits1, leftShift);
|
|
--dst;
|
|
*dst = FbDoDestInvarientMergeRop(bits);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while (n--)
|
|
{
|
|
bits = FbScrRight(bits1, rightShift);
|
|
bits1 = *--src;
|
|
bits |= FbScrLeft(bits1, leftShift);
|
|
--dst;
|
|
*dst = FbDoMergeRop(bits, *dst);
|
|
}
|
|
}
|
|
if (startmask)
|
|
{
|
|
bits = FbScrRight(bits1, rightShift);
|
|
if (FbScrRight(startmask, leftShift))
|
|
{
|
|
bits1 = *--src;
|
|
bits |= FbScrLeft(bits1, leftShift);
|
|
}
|
|
--dst;
|
|
FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (srcX > dstX)
|
|
bits1 = *src++;
|
|
if (startmask)
|
|
{
|
|
bits = FbScrLeft(bits1, leftShift);
|
|
if (FbScrLeft(startmask, rightShift))
|
|
{
|
|
bits1 = *src++;
|
|
bits |= FbScrRight(bits1, rightShift);
|
|
}
|
|
FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
|
|
dst++;
|
|
}
|
|
n = nmiddle;
|
|
if (destInvarient)
|
|
{
|
|
while (n--)
|
|
{
|
|
bits = FbScrLeft(bits1, leftShift);
|
|
bits1 = *src++;
|
|
bits |= FbScrRight(bits1, rightShift);
|
|
*dst = FbDoDestInvarientMergeRop(bits);
|
|
dst++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while (n--)
|
|
{
|
|
bits = FbScrLeft(bits1, leftShift);
|
|
bits1 = *src++;
|
|
bits |= FbScrRight(bits1, rightShift);
|
|
*dst = FbDoMergeRop(bits, *dst);
|
|
dst++;
|
|
}
|
|
}
|
|
if (endmask)
|
|
{
|
|
bits = FbScrLeft(bits1, leftShift);
|
|
if (FbScrLeft(endmask, rightShift))
|
|
{
|
|
bits1 = *src;
|
|
bits |= FbScrRight(bits1, rightShift);
|
|
}
|
|
FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|