Commit ecfeb4aa authored by Siarhei Siamashka's avatar Siarhei Siamashka
Browse files

G2D: Hardware acceleration for XCopyArea (initially 32bpp only)



Wrap CreateGC function to add a hook for CopyArea operation, which
can be accelerated using G2D for the buffers inside of the visible
part of the framebuffer. In the future we may try to also ensure
that DRI2 buffers are copied using G2D instead of CPU in the case
if we hit the fallback path and can't avoid this copy.

Benchmark using "x11perf -scroll500 -copywinwin500":

=== ShadowFB (software rendering) ===

   3000 reps @   2.0308 msec (   492.0/sec): Scroll 500x500 pixels
   3000 reps @   1.9741 msec (   507.0/sec): Scroll 500x500 pixels
   3000 reps @   1.9826 msec (   504.0/sec): Scroll 500x500 pixels
   3000 reps @   1.9830 msec (   504.0/sec): Scroll 500x500 pixels
   3000 reps @   1.9965 msec (   501.0/sec): Scroll 500x500 pixels
  15000 trep @   1.9934 msec (   502.0/sec): Scroll 500x500 pixels

   1600 reps @   3.3054 msec (   303.0/sec): Copy 500x500 from window to window
   1600 reps @   3.3179 msec (   301.0/sec): Copy 500x500 from window to window
   1600 reps @   3.2263 msec (   310.0/sec): Copy 500x500 from window to window
   1600 reps @   3.2491 msec (   308.0/sec): Copy 500x500 from window to window
   1600 reps @   3.2357 msec (   309.0/sec): Copy 500x500 from window to window
   8000 trep @   3.2669 msec (   306.0/sec): Copy 500x500 from window to window

=== G2D (hardware acceleration) ===

   3000 reps @   2.1949 msec (   456.0/sec): Scroll 500x500 pixels
   3000 reps @   2.1929 msec (   456.0/sec): Scroll 500x500 pixels
   3000 reps @   2.1923 msec (   456.0/sec): Scroll 500x500 pixels
   3000 reps @   2.1889 msec (   457.0/sec): Scroll 500x500 pixels
   3000 reps @   2.1941 msec (   456.0/sec): Scroll 500x500 pixels
  15000 trep @   2.1926 msec (   456.0/sec): Scroll 500x500 pixels

   2800 reps @   1.8114 msec (   552.0/sec): Copy 500x500 from window to window
   2800 reps @   1.8103 msec (   552.0/sec): Copy 500x500 from window to window
   2800 reps @   1.8160 msec (   551.0/sec): Copy 500x500 from window to window
   2800 reps @   1.8099 msec (   553.0/sec): Copy 500x500 from window to window
   2800 reps @   1.8126 msec (   552.0/sec): Copy 500x500 from window to window
  14000 trep @   1.8120 msec (   552.0/sec): Copy 500x500 from window to window

CPU usage remains low when running this test with G2D acceleration enabled.
Signed-off-by: default avatarSiarhei Siamashka <siarhei.siamashka@gmail.com>
parent 8e6dd003
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include "config.h" #include "config.h"
#endif #endif
#include <pixman.h>
#include "xorgVersion.h" #include "xorgVersion.h"
#include "xf86_OSproc.h" #include "xf86_OSproc.h"
#include "xf86.h" #include "xf86.h"
...@@ -67,7 +69,8 @@ xCopyWindowProc(DrawablePtr pSrcDrawable, ...@@ -67,7 +69,8 @@ xCopyWindowProc(DrawablePtr pSrcDrawable,
fbGetDrawable(pDstDrawable, dst, dstStride, dstBpp, dstXoff, dstYoff); fbGetDrawable(pDstDrawable, dst, dstStride, dstBpp, dstXoff, dstYoff);
if (srcBpp == 32 && dstBpp == 32 && if (srcBpp == 32 && dstBpp == 32 &&
disp->framebuffer_addr == src && disp->framebuffer_addr == dst && disp->framebuffer_addr == (void *)src &&
disp->framebuffer_addr == (void *)dst &&
(dy + srcYoff != dstYoff || dx + srcXoff + 1 >= dstXoff)) (dy + srcYoff != dstYoff || dx + srcXoff + 1 >= dstXoff))
{ {
while (nbox--) { while (nbox--) {
...@@ -127,6 +130,118 @@ xCopyWindow(WindowPtr pWin, DDXPointRec ptOldOrg, RegionPtr prgnSrc) ...@@ -127,6 +130,118 @@ xCopyWindow(WindowPtr pWin, DDXPointRec ptOldOrg, RegionPtr prgnSrc)
fbValidateDrawable(&pWin->drawable); fbValidateDrawable(&pWin->drawable);
} }
/*****************************************************************************/
static void
xCopyNtoN(DrawablePtr pSrcDrawable,
DrawablePtr pDstDrawable,
GCPtr pGC,
BoxPtr pbox,
int nbox,
int dx,
int dy,
Bool reverse, Bool upsidedown, Pixel bitplane, void *closure)
{
CARD8 alu = pGC ? pGC->alu : GXcopy;
FbBits pm = pGC ? fbGetGCPrivate(pGC)->pm : FB_ALLONES;
FbBits *src;
FbStride srcStride;
int srcBpp;
int srcXoff, srcYoff;
FbBits *dst;
FbStride dstStride;
int dstBpp;
int dstXoff, dstYoff;
ScreenPtr pScreen = pDstDrawable->pScreen;
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
sunxi_disp_t *disp = SUNXI_DISP(pScrn);
Bool use_g2d;
fbGetDrawable(pSrcDrawable, src, srcStride, srcBpp, srcXoff, srcYoff);
fbGetDrawable(pDstDrawable, dst, dstStride, dstBpp, dstXoff, dstYoff);
use_g2d = disp->framebuffer_addr == (void *)src &&
disp->framebuffer_addr == (void *)dst &&
(dy + srcYoff != dstYoff || dx + srcXoff + 1 >= dstXoff);
while (nbox--) {
if (use_g2d) {
sunxi_g2d_blit_a8r8g8b8(disp,
pbox->x1 + dstXoff, pbox->y1 + dstYoff,
pbox->x1 + dx + srcXoff, pbox->y1 + dy + srcYoff,
pbox->x2 - pbox->x1,
pbox->y2 - pbox->y1);
}
else if (!reverse && !upsidedown) {
pixman_blt((uint32_t *) src, (uint32_t *) dst, srcStride, dstStride,
srcBpp, dstBpp, (pbox->x1 + dx + srcXoff),
(pbox->y1 + dy + srcYoff), (pbox->x1 + dstXoff),
(pbox->y1 + dstYoff), (pbox->x2 - pbox->x1),
(pbox->y2 - pbox->y1));
}
else {
fbBlt(src + (pbox->y1 + dy + srcYoff) * srcStride,
srcStride,
(pbox->x1 + dx + srcXoff) * srcBpp,
dst + (pbox->y1 + dstYoff) * dstStride,
dstStride,
(pbox->x1 + dstXoff) * dstBpp,
(pbox->x2 - pbox->x1) * dstBpp,
(pbox->y2 - pbox->y1), alu, pm, dstBpp, reverse, upsidedown);
}
pbox++;
}
fbFinishAccess(pDstDrawable);
fbFinishAccess(pSrcDrawable);
}
static RegionPtr
xCopyArea(DrawablePtr pSrcDrawable,
DrawablePtr pDstDrawable,
GCPtr pGC,
int xIn, int yIn, int widthSrc, int heightSrc, int xOut, int yOut)
{
CARD8 alu = pGC ? pGC->alu : GXcopy;
FbBits pm = pGC ? fbGetGCPrivate(pGC)->pm : FB_ALLONES;
if (pm == FB_ALLONES && alu == GXcopy &&
pSrcDrawable->bitsPerPixel == pDstDrawable->bitsPerPixel &&
pSrcDrawable->bitsPerPixel == 32)
{
return miDoCopy(pSrcDrawable, pDstDrawable, pGC, xIn, yIn,
widthSrc, heightSrc, xOut, yOut, xCopyNtoN, 0, 0);
}
return fbCopyArea(pSrcDrawable,
pDstDrawable,
pGC,
xIn, yIn, widthSrc, heightSrc, xOut, yOut);
}
static Bool
xCreateGC(GCPtr pGC)
{
ScreenPtr pScreen = pGC->pScreen;
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
SunxiG2D *self = SUNXI_G2D(pScrn);
Bool result;
if (!fbCreateGC(pGC))
return FALSE;
if (!self->pGCOps) {
self->pGCOps = calloc(1, sizeof(GCOps));
memcpy(self->pGCOps, pGC->ops, sizeof(GCOps));
/* Add our own hook for CopyArea function */
self->pGCOps->CopyArea = xCopyArea;
}
pGC->ops = self->pGCOps;
return TRUE;
}
/*****************************************************************************/
SunxiG2D *SunxiG2D_Init(ScreenPtr pScreen) SunxiG2D *SunxiG2D_Init(ScreenPtr pScreen)
{ {
...@@ -151,6 +266,10 @@ SunxiG2D *SunxiG2D_Init(ScreenPtr pScreen) ...@@ -151,6 +266,10 @@ SunxiG2D *SunxiG2D_Init(ScreenPtr pScreen)
private->CopyWindow = pScreen->CopyWindow; private->CopyWindow = pScreen->CopyWindow;
pScreen->CopyWindow = xCopyWindow; pScreen->CopyWindow = xCopyWindow;
/* Wrap the current CreateGC function */
private->CreateGC = pScreen->CreateGC;
pScreen->CreateGC = xCreateGC;
return private; return private;
} }
...@@ -160,4 +279,9 @@ void SunxiG2D_Close(ScreenPtr pScreen) ...@@ -160,4 +279,9 @@ void SunxiG2D_Close(ScreenPtr pScreen)
SunxiG2D *private = SUNXI_G2D(pScrn); SunxiG2D *private = SUNXI_G2D(pScrn);
pScreen->CopyWindow = private->CopyWindow; pScreen->CopyWindow = private->CopyWindow;
pScreen->CreateGC = private->CreateGC;
if (private->pGCOps) {
free(private->pGCOps);
}
} }
...@@ -25,7 +25,10 @@ ...@@ -25,7 +25,10 @@
#define SUNXI_X_G2D_H #define SUNXI_X_G2D_H
typedef struct { typedef struct {
GCOps *pGCOps;
CopyWindowProcPtr CopyWindow; CopyWindowProcPtr CopyWindow;
CreateGCProcPtr CreateGC;
} SunxiG2D; } SunxiG2D;
SunxiG2D *SunxiG2D_Init(ScreenPtr pScreen); SunxiG2D *SunxiG2D_Init(ScreenPtr pScreen);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment