409 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			409 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			C
		
	
	
	
| /*
 | |
|  * Accelerated rootless blit
 | |
|  */
 | |
| /*
 | |
|  * This code is largely copied from fbBlt.c.
 | |
|  *
 | |
|  * Copyright © 1998 Keith Packard
 | |
|  * Copyright (c) 2002 Apple Computer, Inc. All Rights Reserved.
 | |
|  * Copyright (c) 2003 Torrey T. Lyons. All Rights Reserved.
 | |
|  *
 | |
|  * Permission to use, copy, modify, distribute, and sell this software and its
 | |
|  * documentation for any purpose is hereby granted without fee, provided that
 | |
|  * the above copyright notice appear in all copies and that both that
 | |
|  * copyright notice and this permission notice appear in supporting
 | |
|  * documentation, and that the name of Keith Packard not be used in
 | |
|  * advertising or publicity pertaining to distribution of the software without
 | |
|  * specific, written prior permission.  Keith Packard makes no
 | |
|  * representations about the suitability of this software for any purpose.  It
 | |
|  * is provided "as is" without express or implied warranty.
 | |
|  *
 | |
|  * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 | |
|  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 | |
|  * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 | |
|  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 | |
|  * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 | |
|  * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 | |
|  * PERFORMANCE OF THIS SOFTWARE.
 | |
|  */
 | |
| 
 | |
| #ifdef HAVE_DIX_CONFIG_H
 | |
| #include <dix-config.h>
 | |
| #endif
 | |
| 
 | |
| #include <stddef.h> /* For NULL */
 | |
| #include <string.h>
 | |
| #include "fb.h"
 | |
| #include "rootlessCommon.h"
 | |
| #include "rlAccel.h"
 | |
| 
 | |
| #define InitializeShifts(sx,dx,ls,rs) { \
 | |
|     if (sx != dx) { \
 | |
| 	if (sx > dx) { \
 | |
| 	    ls = sx - dx; \
 | |
| 	    rs = FB_UNIT - ls; \
 | |
| 	} else { \
 | |
| 	    rs = dx - sx; \
 | |
| 	    ls = FB_UNIT - rs; \
 | |
| 	} \
 | |
|     } \
 | |
| }
 | |
| 
 | |
| void
 | |
| rlBlt (FbBits   *srcLine,
 | |
|        FbStride	srcStride,
 | |
|        int	srcX,
 | |
| 
 | |
|        ScreenPtr pDstScreen,
 | |
|        FbBits   *dstLine,
 | |
|        FbStride dstStride,
 | |
|        int	dstX,
 | |
| 
 | |
|        int	width,
 | |
|        int	height,
 | |
| 
 | |
|        int	alu,
 | |
|        FbBits	pm,
 | |
|        int	bpp,
 | |
| 
 | |
|        Bool	reverse,
 | |
|        Bool	upsidedown)
 | |
| {
 | |
|     FbBits  *src, *dst;
 | |
|     int	    leftShift, rightShift;
 | |
|     FbBits  startmask, endmask;
 | |
|     FbBits  bits, bits1;
 | |
|     int	    n, nmiddle;
 | |
|     Bool    destInvarient;
 | |
|     int	    startbyte, endbyte;
 | |
|     FbDeclareMergeRop ();
 | |
| 
 | |
| #ifdef FB_24BIT
 | |
|     if (bpp == 24 && !FbCheck24Pix (pm))
 | |
|     {
 | |
| 	fbBlt24 (srcLine, srcStride, srcX, dstLine, dstStride, dstX,
 | |
| 		 width, height, alu, pm, reverse, upsidedown);
 | |
| 	return;
 | |
|     }
 | |
| #endif
 | |
| 
 | |
|     if (alu == GXcopy && pm == FB_ALLONES && !reverse &&
 | |
|             !(srcX & 7) && !(dstX & 7) && !(width & 7)) {
 | |
|         int i;
 | |
|         CARD8 *src = (CARD8 *) srcLine;
 | |
|         CARD8 *dst = (CARD8 *) dstLine;
 | |
|         
 | |
|         srcStride *= sizeof(FbBits);
 | |
|         dstStride *= sizeof(FbBits);
 | |
|         width >>= 3;
 | |
|         src += (srcX >> 3);
 | |
|         dst += (dstX >> 3);
 | |
| 
 | |
|         if (!upsidedown)
 | |
|             for (i = 0; i < height; i++)
 | |
|                 memcpy(dst + i * dstStride, src + i * srcStride, width);
 | |
|         else
 | |
|             for (i = height - 1; i >= 0; i--)
 | |
|                 memcpy(dst + i * dstStride, src + i * srcStride, width);
 | |
| 
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     FbInitializeMergeRop(alu, pm);
 | |
|     destInvarient = FbDestInvarientMergeRop();
 | |
|     if (upsidedown)
 | |
|     {
 | |
| 	srcLine += (height - 1) * (srcStride);
 | |
| 	dstLine += (height - 1) * (dstStride);
 | |
| 	srcStride = -srcStride;
 | |
| 	dstStride = -dstStride;
 | |
|     }
 | |
|     FbMaskBitsBytes (dstX, width, destInvarient, startmask, startbyte,
 | |
| 		     nmiddle, endmask, endbyte);
 | |
| 
 | |
|     /*
 | |
|      * Beginning of the rootless acceleration code
 | |
|      */
 | |
|     if (!startmask && !endmask && alu == GXcopy &&
 | |
|         height * nmiddle * sizeof(*dst) > rootless_CopyBytes_threshold)
 | |
|     {
 | |
| 	if (pm == FB_ALLONES && SCREENREC(pDstScreen)->imp->CopyBytes)
 | |
| 	{
 | |
| 	    SCREENREC(pDstScreen)->imp->CopyBytes(
 | |
|                             nmiddle * sizeof(*dst), height,
 | |
|                             (char *) srcLine + (srcX >> 3),
 | |
|                             srcStride * sizeof (*src),
 | |
|                             (char *) dstLine + (dstX >> 3),
 | |
|                             dstStride * sizeof (*dst));
 | |
| 	    return;
 | |
| 	}
 | |
| 
 | |
| 	/* FIXME: the pm test here isn't super-wonderful - just because
 | |
| 	   we don't care about the top eight bits doesn't necessarily
 | |
| 	   mean we want them set to 255. But doing this does give a
 | |
| 	   factor of two performance improvement when copying from a
 | |
| 	   pixmap to a window, which is pretty common.. */
 | |
| 
 | |
| 	else if (bpp == 32 && sizeof(FbBits) == 4 &&
 | |
|                  pm == 0x00FFFFFFUL && !reverse &&
 | |
|                  SCREENREC(pDstScreen)->imp->CompositePixels)
 | |
| 	{
 | |
| 	    /* need to copy XRGB to ARGB. */
 | |
| 
 | |
| 	    void *src[2], *dest[2];
 | |
| 	    unsigned int src_rowbytes[2], dest_rowbytes[2];
 | |
|             unsigned int fn;
 | |
| 
 | |
| 	    src[0] = (char *) srcLine + (srcX >> 3);
 | |
| 	    src[1] = NULL;
 | |
| 	    src_rowbytes[0] = srcStride * sizeof(*src);
 | |
| 	    src_rowbytes[1] = 0;
 | |
| 
 | |
| 	    dest[0] = (char *) dstLine + (dstX >> 3);
 | |
| 	    dest[1] = dest[0];
 | |
| 	    dest_rowbytes[0] = dstStride * sizeof(*dst);
 | |
| 	    dest_rowbytes[1] = dest_rowbytes[0];
 | |
| 
 | |
| 	    fn = RL_COMPOSITE_FUNCTION(RL_COMPOSITE_SRC, RL_DEPTH_ARGB8888,
 | |
|                                        RL_DEPTH_NIL, RL_DEPTH_ARGB8888);
 | |
| 
 | |
|             if (SCREENREC(pDstScreen)->imp->CompositePixels(
 | |
|                                 nmiddle, height,
 | |
|                                 fn, src, src_rowbytes,
 | |
|                                 NULL, 0, dest, dest_rowbytes) == Success)
 | |
|             {
 | |
|                 return;
 | |
|             }
 | |
| 	}
 | |
|     }
 | |
|     /* End of the rootless acceleration code */
 | |
| 
 | |
|     if (reverse)
 | |
|     {
 | |
| 	srcLine += ((srcX + width - 1) >> FB_SHIFT) + 1;
 | |
| 	dstLine += ((dstX + width - 1) >> FB_SHIFT) + 1;
 | |
| 	srcX = (srcX + width - 1) & FB_MASK;
 | |
| 	dstX = (dstX + width - 1) & FB_MASK;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
| 	srcLine += srcX >> FB_SHIFT;
 | |
| 	dstLine += dstX >> FB_SHIFT;
 | |
| 	srcX &= FB_MASK;
 | |
| 	dstX &= FB_MASK;
 | |
|     }
 | |
|     if (srcX == dstX)
 | |
|     {
 | |
| 	while (height--)
 | |
| 	{
 | |
| 	    src = srcLine;
 | |
| 	    srcLine += srcStride;
 | |
| 	    dst = dstLine;
 | |
| 	    dstLine += dstStride;
 | |
| 	    if (reverse)
 | |
| 	    {
 | |
| 		if (endmask)
 | |
| 		{
 | |
| 		    bits = *--src;
 | |
| 		    --dst;
 | |
| 		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
 | |
| 		}
 | |
| 		n = nmiddle;
 | |
| 		if (destInvarient)
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 			*--dst = FbDoDestInvarientMergeRop(*--src);
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 		    {
 | |
| 			bits = *--src;
 | |
| 			--dst;
 | |
| 			*dst = FbDoMergeRop (bits, *dst);
 | |
| 		    }
 | |
| 		}
 | |
| 		if (startmask)
 | |
| 		{
 | |
| 		    bits = *--src;
 | |
| 		    --dst;
 | |
| 		    FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
 | |
| 		}
 | |
| 	    }
 | |
| 	    else
 | |
| 	    {
 | |
| 		if (startmask)
 | |
| 		{
 | |
| 		    bits = *src++;
 | |
| 		    FbDoLeftMaskByteMergeRop(dst, bits, startbyte, startmask);
 | |
| 		    dst++;
 | |
| 		}
 | |
| 		n = nmiddle;
 | |
| 		if (destInvarient)
 | |
| 		{
 | |
| #if 0
 | |
| 		    /*
 | |
| 		     * This provides some speedup on screen->screen blts
 | |
| 		     * over the PCI bus, usually about 10%.  But fb
 | |
| 		     * isn't usually used for this operation...
 | |
| 		     */
 | |
| 		    if (_ca2 + 1 == 0 && _cx2 == 0)
 | |
| 		    {
 | |
| 			FbBits	t1, t2, t3, t4;
 | |
| 			while (n >= 4)
 | |
| 			{
 | |
| 			    t1 = *src++;
 | |
| 			    t2 = *src++;
 | |
| 			    t3 = *src++;
 | |
| 			    t4 = *src++;
 | |
| 			    *dst++ = t1;
 | |
| 			    *dst++ = t2;
 | |
| 			    *dst++ = t3;
 | |
| 			    *dst++ = t4;
 | |
| 			    n -= 4;
 | |
| 			}
 | |
| 		    }
 | |
| #endif
 | |
| 		    while (n--)
 | |
| 			*dst++ = FbDoDestInvarientMergeRop(*src++);
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 		    {
 | |
| 			bits = *src++;
 | |
| 			*dst = FbDoMergeRop (bits, *dst);
 | |
| 			dst++;
 | |
| 		    }
 | |
| 		}
 | |
| 		if (endmask)
 | |
| 		{
 | |
| 		    bits = *src;
 | |
| 		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
 | |
| 		}
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
|     else
 | |
|     {
 | |
| 	if (srcX > dstX)
 | |
| 	{
 | |
| 	    leftShift = srcX - dstX;
 | |
| 	    rightShift = FB_UNIT - leftShift;
 | |
| 	}
 | |
| 	else
 | |
| 	{
 | |
| 	    rightShift = dstX - srcX;
 | |
| 	    leftShift = FB_UNIT - rightShift;
 | |
| 	}
 | |
| 	while (height--)
 | |
| 	{
 | |
| 	    src = srcLine;
 | |
| 	    srcLine += srcStride;
 | |
| 	    dst = dstLine;
 | |
| 	    dstLine += dstStride;
 | |
| 	    
 | |
| 	    bits1 = 0;
 | |
| 	    if (reverse)
 | |
| 	    {
 | |
| 		if (srcX < dstX)
 | |
| 		    bits1 = *--src;
 | |
| 		if (endmask)
 | |
| 		{
 | |
| 		    bits = FbScrRight(bits1, rightShift);
 | |
| 		    if (FbScrRight(endmask, leftShift))
 | |
| 		    {
 | |
| 			bits1 = *--src;
 | |
| 			bits |= FbScrLeft(bits1, leftShift);
 | |
| 		    }
 | |
| 		    --dst;
 | |
| 		    FbDoRightMaskByteMergeRop(dst, bits, endbyte, endmask);
 | |
| 		}
 | |
| 		n = nmiddle;
 | |
| 		if (destInvarient)
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 		    {
 | |
| 			bits = FbScrRight(bits1, rightShift);
 | |
| 			bits1 = *--src;
 | |
| 			bits |= FbScrLeft(bits1, leftShift);
 | |
| 			--dst;
 | |
| 			*dst = FbDoDestInvarientMergeRop(bits);
 | |
| 		    }
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 		    {
 | |
| 			bits = FbScrRight(bits1, rightShift);
 | |
| 			bits1 = *--src;
 | |
| 			bits |= FbScrLeft(bits1, leftShift);
 | |
| 			--dst;
 | |
| 			*dst = FbDoMergeRop(bits, *dst);
 | |
| 		    }
 | |
| 		}
 | |
| 		if (startmask)
 | |
| 		{
 | |
| 		    bits = FbScrRight(bits1, rightShift);
 | |
| 		    if (FbScrRight(startmask, leftShift))
 | |
| 		    {
 | |
| 			bits1 = *--src;
 | |
| 			bits |= FbScrLeft(bits1, leftShift);
 | |
| 		    }
 | |
| 		    --dst;
 | |
| 		    FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
 | |
| 		}
 | |
| 	    }
 | |
| 	    else
 | |
| 	    {
 | |
| 		if (srcX > dstX)
 | |
| 		    bits1 = *src++;
 | |
| 		if (startmask)
 | |
| 		{
 | |
| 		    bits = FbScrLeft(bits1, leftShift); 
 | |
| 		    if (FbScrLeft(startmask, rightShift))
 | |
| 		    {
 | |
| 			bits1 = *src++;
 | |
| 			bits |= FbScrRight(bits1, rightShift);
 | |
| 		    }
 | |
| 		    FbDoLeftMaskByteMergeRop (dst, bits, startbyte, startmask);
 | |
| 		    dst++;
 | |
| 		}
 | |
| 		n = nmiddle;
 | |
| 		if (destInvarient)
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 		    {
 | |
| 			bits = FbScrLeft(bits1, leftShift);
 | |
| 			bits1 = *src++;
 | |
| 			bits |= FbScrRight(bits1, rightShift);
 | |
| 			*dst = FbDoDestInvarientMergeRop(bits);
 | |
| 			dst++;
 | |
| 		    }
 | |
| 		}
 | |
| 		else
 | |
| 		{
 | |
| 		    while (n--)
 | |
| 		    {
 | |
| 			bits = FbScrLeft(bits1, leftShift);
 | |
| 			bits1 = *src++;
 | |
| 			bits |= FbScrRight(bits1, rightShift);
 | |
| 			*dst = FbDoMergeRop(bits, *dst);
 | |
| 			dst++;
 | |
| 		    }
 | |
| 		}
 | |
| 		if (endmask)
 | |
| 		{
 | |
| 		    bits = FbScrLeft(bits1, leftShift);
 | |
| 		    if (FbScrLeft(endmask, rightShift))
 | |
| 		    {
 | |
| 			bits1 = *src;
 | |
| 			bits |= FbScrRight(bits1, rightShift);
 | |
| 		    }
 | |
| 		    FbDoRightMaskByteMergeRop (dst, bits, endbyte, endmask);
 | |
| 		}
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
| }
 |