xenocara/xserver/miext/rootless/accel/rlBlt.c

/*
 * Accelerated rootless blit
 */
/*
 * This code is largely copied from fbBlt.c.
 *
 * Copyright © 1998 Keith Packard
 * Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.
 * Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the name of Keith Packard not be used in
 * advertising or publicity pertaining to distribution of the software without
 * specific, written prior permission.  Keith Packard makes no
 * representations about the suitability of this software for any purpose.  It
 * is provided "as is" without express or implied warranty.
 *
 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 */

#ifdef HAVE_DIX_CONFIG_H
#include <dix-config.h>
#endif

#include "fb.h"
#include "rootlessCommon.h"
#include "rlAccel.h"


void
rlBlt (FbBits   *srcLine,
       FbStride	srcStride,
       int	srcX,

       ScreenPtr pDstScreen,
       FbBits   *dstLine,
       FbStride dstStride,
       int	dstX,

       int	width,
       int	height,

       int	alu,
       FbBits	pm,
       int	bpp,

       Bool	reverse,
       Bool	upsidedown)
{
    FbBits  *src, *dst;
    int	    leftShift, rightShift;
    FbBits  startmask, endmask;
    FbBits  bits, bits1;
    int	    n, nmiddle;
    Bool    destInvarient;
    int	    startbyte, endbyte;
    FbDeclareMergeRop ();

#ifdef FB_24BIT
    if (bpp == 24 && !FbCheck24Pix (pm))
    {
	fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,
		 width, height, alu, pm, reverse, upsidedown);
	return;
    }
#endif
    FbInitializeMergeRop(alu, pm);
    destInvarient = FbDestInvarientMergeRop();
    if (upsidedown)
    {
	srcLine += (height - 1) * (srcStride);
	dstLine += (height - 1) * (dstStride);
	srcStride = -srcStride;
	dstStride = -dstStride;
    }
    FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,
		     nmiddle, endmask, endbyte);

    /*
     * Beginning of the rootless acceleration code
     */
    if (!startmask && !endmask && alu == GXcopy &&
        height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)
    {
	if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)
	{
	    SCREENREC(pDstScreen)->imp->CopyBytes(
                            nmiddle * sizeof(*dst), height,
                            (char *) srcLine + (srcX >> 3),
                            srcStride * sizeof (*src),
                            (char *) dstLine + (dstX >> 3),
                            dstStride * sizeof (*dst));
	    return;
	}

	/* FIXME: the pm test here isn't super-wonderful - just because
	   we don't care about the top eight bits doesn't necessarily
	   mean we want them set to 255. But doing this does give a
	   factor of two performance improvement when copying from a
	   pixmap to a window, which is pretty common.. */

	else if (bpp == 32 && sizeof(FbBits) == 4 &&
                 pm == 0x00FFFFFFUL && !reverse &&
                 SCREENREC(pDstScreen)->imp->CompositePixels)
	{
	    /* need to copy XRGB to ARGB. */

	    void *src[2], *dest[2];
	    unsigned int src_rowbytes[2], dest_rowbytes[2];
            unsigned int fn;

	    src[0] = (char *) srcLine + (srcX >> 3);
	    src[1] = NULL;
	    src_rowbytes[0] = srcStride * sizeof(*src);
	    src_rowbytes[1] = 0;

	    dest[0] = (char *) dstLine + (dstX >> 3);
	    dest[1] = dest[0];
	    dest_rowbytes[0] = dstStride * sizeof(*dst);
	    dest_rowbytes[1] = dest_rowbytes[0];

	    fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,
                                       RL_DEPTH_NIL, RL_DEPTH_ARGB8888);

            if (SCREENREC(pDstScreen)->imp->CompositePixels(
                                nmiddle, height,
                                fn, src, src_rowbytes,
                                NULL, 0, dest, dest_rowbytes) == Success)
            {
                return;
            }
	}
    }
    /* End of the rootless acceleration code */

    if (reverse)
    {
	srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;
	dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;
	srcX = (srcX + width - 1) & FB_MASK;
	dstX = (dstX + width - 1) & FB_MASK;
    }
    else
    {
	srcLine += srcX >> FB_SHIFT;
	dstLine += dstX >> FB_SHIFT;
	srcX &= FB_MASK;
	dstX &= FB_MASK;
    }
    if (srcX == dstX)
    {
	while (height--)
	{
	    src = srcLine;
	    srcLine += srcStride;
	    dst = dstLine;
	    dstLine += dstStride;
	    if (reverse)
	    {
		if (endmask)
		{
		    bits = *--src;
		    --dst;
		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
		}
		n = nmiddle;
		if (destInvarient)
		{
		    while (n--)
			*--dst = FbDoDestInvarientMergeRop(*--src);
		}
		else
		{
		    while (n--)
		    {
			bits = *--src;
			--dst;
			*dst = FbDoMergeRop (bits, *dst);
		    }
		}
		if (startmask)
		{
		    bits = *--src;
		    --dst;
		    FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
		}
	    }
	    else
	    {
		if (startmask)
		{
		    bits = *src++;
		    FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
		    dst++;
		}
		n = nmiddle;
		if (destInvarient)
		{
#if 0
		    /*
		     * This provides some speedup on screen->screen blts
		     * over the PCI bus, usually about 10%.  But fb
		     * isn't usually used for this operation...
		     */
		    if (_ca2 + 1 == 0 && _cx2 == 0)
		    {
			FbBits	t1, t2, t3, t4;
			while (n >= 4)
			{
			    t1 = *src++;
			    t2 = *src++;
			    t3 = *src++;
			    t4 = *src++;
			    *dst++ = t1;
			    *dst++ = t2;
			    *dst++ = t3;
			    *dst++ = t4;
			    n -= 4;
			}
		    }
#endif
		    while (n--)
			*dst++ = FbDoDestInvarientMergeRop(*src++);
		}
		else
		{
		    while (n--)
		    {
			bits = *src++;
			*dst = FbDoMergeRop (bits, *dst);
			dst++;
		    }
		}
		if (endmask)
		{
		    bits = *src;
		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
		}
	    }
	}
    }
    else
    {
	if (srcX > dstX)
	{
	    leftShift = srcX - dstX;
	    rightShift = FB_UNIT - leftShift;
	}
	else
	{
	    rightShift = dstX - srcX;
	    leftShift = FB_UNIT - rightShift;
	}
	while (height--)
	{
	    src = srcLine;
	    srcLine += srcStride;
	    dst = dstLine;
	    dstLine += dstStride;
	    
	    bits1 = 0;
	    if (reverse)
	    {
		if (srcX < dstX)
		    bits1 = *--src;
		if (endmask)
		{
		    bits = FbScrRight(bits1, rightShift);
		    if (FbScrRight(endmask, leftShift))
		    {
			bits1 = *--src;
			bits |= FbScrLeft(bits1, leftShift);
		    }
		    --dst;
		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
		}
		n = nmiddle;
		if (destInvarient)
		{
		    while (n--)
		    {
			bits = FbScrRight(bits1, rightShift);
			bits1 = *--src;
			bits |= FbScrLeft(bits1, leftShift);
			--dst;
			*dst = FbDoDestInvarientMergeRop(bits);
		    }
		}
		else
		{
		    while (n--)
		    {
			bits = FbScrRight(bits1, rightShift);
			bits1 = *--src;
			bits |= FbScrLeft(bits1, leftShift);
			--dst;
			*dst = FbDoMergeRop(bits, *dst);
		    }
		}
		if (startmask)
		{
		    bits = FbScrRight(bits1, rightShift);
		    if (FbScrRight(startmask, leftShift))
		    {
			bits1 = *--src;
			bits |= FbScrLeft(bits1, leftShift);
		    }
		    --dst;
		    FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
		}
	    }
	    else
	    {
		if (srcX > dstX)
		    bits1 = *src++;
		if (startmask)
		{
		    bits = FbScrLeft(bits1, leftShift);
		    bits1 = *src++;
		    bits |= FbScrRight(bits1, rightShift);
		    FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
		    dst++;
		}
		n = nmiddle;
		if (destInvarient)
		{
		    while (n--)
		    {
			bits = FbScrLeft(bits1, leftShift);
			bits1 = *src++;
			bits |= FbScrRight(bits1, rightShift);
			*dst = FbDoDestInvarientMergeRop(bits);
			dst++;
		    }
		}
		else
		{
		    while (n--)
		    {
			bits = FbScrLeft(bits1, leftShift);
			bits1 = *src++;
			bits |= FbScrRight(bits1, rightShift);
			*dst = FbDoMergeRop(bits, *dst);
			dst++;
		    }
		}
		if (endmask)
		{
		    bits = FbScrLeft(bits1, leftShift);
		    if (FbScrLeft(endmask, rightShift))
		    {
			bits1 = *src;
			bits |= FbScrRight(bits1, rightShift);
		    }
		    FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);
		}
	    }
	}
    }
}
Importing xserver from X.Org 7.2RC2 2006-11-26 11:13:41 -07:00			`/*`
			`* Accelerated rootless blit`
			`*/`
			`/*`
			`* This code is largely copied from fbBlt.c.`
			`*`
			`* Copyright © 1998 Keith Packard`
			`* Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.`
			`* Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.`
			`*`
			`* Permission to use, copy, modify, distribute, and sell this software and its`
			`* documentation for any purpose is hereby granted without fee, provided that`
			`* the above copyright notice appear in all copies and that both that`
			`* copyright notice and this permission notice appear in supporting`
			`* documentation, and that the name of Keith Packard not be used in`
			`* advertising or publicity pertaining to distribution of the software without`
			`* specific, written prior permission. Keith Packard makes no`
			`* representations about the suitability of this software for any purpose. It`
			`* is provided "as is" without express or implied warranty.`
			`*`
			`* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,`
			`* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO`
			`* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR`
			`* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,`
			`* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER`
			`* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR`
			`* PERFORMANCE OF THIS SOFTWARE.`
			`*/`

			`#ifdef HAVE_DIX_CONFIG_H`
			`#include <dix-config.h>`
			`#endif`

			`#include "fb.h"`
			`#include "rootlessCommon.h"`
			`#include "rlAccel.h"`


			`void`
			`rlBlt (FbBits *srcLine,`
			`FbStride srcStride,`
			`int srcX,`

			`ScreenPtr pDstScreen,`
			`FbBits *dstLine,`
			`FbStride dstStride,`
			`int dstX,`

			`int width,`
			`int height,`

			`int alu,`
			`FbBits pm,`
			`int bpp,`

			`Bool reverse,`
			`Bool upsidedown)`
			`{`
			`FbBits src, dst;`
			`int leftShift, rightShift;`
			`FbBits startmask, endmask;`
			`FbBits bits, bits1;`
			`int n, nmiddle;`
			`Bool destInvarient;`
			`int startbyte, endbyte;`
			`FbDeclareMergeRop ();`

			`#ifdef FB_24BIT`
			`if (bpp == 24 && !FbCheck24Pix (pm))`
			`{`
			`fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,`
			`width, height, alu, pm, reverse, upsidedown);`
			`return;`
			`}`
			`#endif`
			`FbInitializeMergeRop(alu, pm);`
			`destInvarient = FbDestInvarientMergeRop();`
			`if (upsidedown)`
			`{`
			`srcLine += (height - 1) * (srcStride);`
			`dstLine += (height - 1) * (dstStride);`
			`srcStride = -srcStride;`
			`dstStride = -dstStride;`
			`}`
			`FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,`
			`nmiddle, endmask, endbyte);`

			`/*`
			`* Beginning of the rootless acceleration code`
			`*/`
			`if (!startmask && !endmask && alu == GXcopy &&`
			`height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)`
			`{`
			`if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)`
			`{`
			`SCREENREC(pDstScreen)->imp->CopyBytes(`
			`nmiddle * sizeof(*dst), height,`
			`(char *) srcLine + (srcX >> 3),`
			`srcStride * sizeof (*src),`
			`(char *) dstLine + (dstX >> 3),`
			`dstStride * sizeof (*dst));`
			`return;`
			`}`

			`/* FIXME: the pm test here isn't super-wonderful - just because`
			`we don't care about the top eight bits doesn't necessarily`
			`mean we want them set to 255. But doing this does give a`
			`factor of two performance improvement when copying from a`
			`pixmap to a window, which is pretty common.. */`

			`else if (bpp == 32 && sizeof(FbBits) == 4 &&`
			`pm == 0x00FFFFFFUL && !reverse &&`
			`SCREENREC(pDstScreen)->imp->CompositePixels)`
			`{`
			`/* need to copy XRGB to ARGB. */`

			`void src[2], dest[2];`
			`unsigned int src_rowbytes[2], dest_rowbytes[2];`
			`unsigned int fn;`

			`src[0] = (char *) srcLine + (srcX >> 3);`
			`src[1] = NULL;`
			`src_rowbytes[0] = srcStride * sizeof(*src);`
			`src_rowbytes[1] = 0;`

			`dest[0] = (char *) dstLine + (dstX >> 3);`
			`dest[1] = dest[0];`
			`dest_rowbytes[0] = dstStride * sizeof(*dst);`
			`dest_rowbytes[1] = dest_rowbytes[0];`

			`fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,`
			`RL_DEPTH_NIL, RL_DEPTH_ARGB8888);`

			`if (SCREENREC(pDstScreen)->imp->CompositePixels(`
			`nmiddle, height,`
			`fn, src, src_rowbytes,`
			`NULL, 0, dest, dest_rowbytes) == Success)`
			`{`
			`return;`
			`}`
			`}`
			`}`
			`/* End of the rootless acceleration code */`

			`if (reverse)`
			`{`
			`srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;`
			`dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;`
			`srcX = (srcX + width - 1) & FB_MASK;`
			`dstX = (dstX + width - 1) & FB_MASK;`
			`}`
			`else`
			`{`
			`srcLine += srcX >> FB_SHIFT;`
			`dstLine += dstX >> FB_SHIFT;`
			`srcX &= FB_MASK;`
			`dstX &= FB_MASK;`
			`}`
			`if (srcX == dstX)`
			`{`
			`while (height--)`
			`{`
			`src = srcLine;`
			`srcLine += srcStride;`
			`dst = dstLine;`
			`dstLine += dstStride;`
			`if (reverse)`
			`{`
			`if (endmask)`
			`{`
			`bits = *--src;`
			`--dst;`
			`FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);`
			`}`
			`n = nmiddle;`
			`if (destInvarient)`
			`{`
			`while (n--)`
			`--dst = FbDoDestInvarientMergeRop(--src);`
			`}`
			`else`
			`{`
			`while (n--)`
			`{`
			`bits = *--src;`
			`--dst;`
			`dst = FbDoMergeRop (bits, dst);`
			`}`
			`}`
			`if (startmask)`
			`{`
			`bits = *--src;`
			`--dst;`
			`FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);`
			`}`
			`}`
			`else`
			`{`
			`if (startmask)`
			`{`
			`bits = *src++;`
			`FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);`
			`dst++;`
			`}`
			`n = nmiddle;`
			`if (destInvarient)`
			`{`
			`#if 0`
			`/*`
			`* This provides some speedup on screen->screen blts`
			`* over the PCI bus, usually about 10%. But fb`
			`* isn't usually used for this operation...`
			`*/`
			`if (_ca2 + 1 == 0 && _cx2 == 0)`
			`{`
			`FbBits t1, t2, t3, t4;`
			`while (n >= 4)`
			`{`
			`t1 = *src++;`
			`t2 = *src++;`
			`t3 = *src++;`
			`t4 = *src++;`
			`*dst++ = t1;`
			`*dst++ = t2;`
			`*dst++ = t3;`
			`*dst++ = t4;`
			`n -= 4;`
			`}`
			`}`
			`#endif`
			`while (n--)`
			`dst++ = FbDoDestInvarientMergeRop(src++);`
			`}`
			`else`
			`{`
			`while (n--)`
			`{`
			`bits = *src++;`
			`dst = FbDoMergeRop (bits, dst);`
			`dst++;`
			`}`
			`}`
			`if (endmask)`
			`{`
			`bits = *src;`
			`FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);`
			`}`
			`}`
			`}`
			`}`
			`else`
			`{`
			`if (srcX > dstX)`
			`{`
			`leftShift = srcX - dstX;`
			`rightShift = FB_UNIT - leftShift;`
			`}`
			`else`
			`{`
			`rightShift = dstX - srcX;`
			`leftShift = FB_UNIT - rightShift;`
			`}`
			`while (height--)`
			`{`
			`src = srcLine;`
			`srcLine += srcStride;`
			`dst = dstLine;`
			`dstLine += dstStride;`

			`bits1 = 0;`
			`if (reverse)`
			`{`
			`if (srcX < dstX)`
			`bits1 = *--src;`
			`if (endmask)`
			`{`
			`bits = FbScrRight(bits1, rightShift);`
			`if (FbScrRight(endmask, leftShift))`
			`{`
			`bits1 = *--src;`
			`bits \|= FbScrLeft(bits1, leftShift);`
			`}`
			`--dst;`
			`FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);`
			`}`
			`n = nmiddle;`
			`if (destInvarient)`
			`{`
			`while (n--)`
			`{`
			`bits = FbScrRight(bits1, rightShift);`
			`bits1 = *--src;`
			`bits \|= FbScrLeft(bits1, leftShift);`
			`--dst;`
			`*dst = FbDoDestInvarientMergeRop(bits);`
			`}`
			`}`
			`else`
			`{`
			`while (n--)`
			`{`
			`bits = FbScrRight(bits1, rightShift);`
			`bits1 = *--src;`
			`bits \|= FbScrLeft(bits1, leftShift);`
			`--dst;`
			`dst = FbDoMergeRop(bits, dst);`
			`}`
			`}`
			`if (startmask)`
			`{`
			`bits = FbScrRight(bits1, rightShift);`
			`if (FbScrRight(startmask, leftShift))`
			`{`
			`bits1 = *--src;`
			`bits \|= FbScrLeft(bits1, leftShift);`
			`}`
			`--dst;`
			`FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);`
			`}`
			`}`
			`else`
			`{`
			`if (srcX > dstX)`
			`bits1 = *src++;`
			`if (startmask)`
			`{`
			`bits = FbScrLeft(bits1, leftShift);`
			`bits1 = *src++;`
			`bits \|= FbScrRight(bits1, rightShift);`
			`FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);`
			`dst++;`
			`}`
			`n = nmiddle;`
			`if (destInvarient)`
			`{`
			`while (n--)`
			`{`
			`bits = FbScrLeft(bits1, leftShift);`
			`bits1 = *src++;`
			`bits \|= FbScrRight(bits1, rightShift);`
			`*dst = FbDoDestInvarientMergeRop(bits);`
			`dst++;`
			`}`
			`}`
			`else`
			`{`
			`while (n--)`
			`{`
			`bits = FbScrLeft(bits1, leftShift);`
			`bits1 = *src++;`
			`bits \|= FbScrRight(bits1, rightShift);`
			`dst = FbDoMergeRop(bits, dst);`
			`dst++;`
			`}`
			`}`
			`if (endmask)`
			`{`
			`bits = FbScrLeft(bits1, leftShift);`
			`if (FbScrLeft(endmask, rightShift))`
			`{`
			`bits1 = *src;`
			`bits \|= FbScrRight(bits1, rightShift);`
			`}`
			`FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);`
			`}`
			`}`
			`}`
			`}`
			`}`