+static void radeonfb_prim_imageblit(struct radeonfb_info *rinfo,
+ const struct fb_image *image,
+ u32 fg, u32 bg)
+{
+ unsigned int src_bytes, dwords;
+ u32 *bits;
+
+ radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache,
+ rinfo->dp_gui_mc_base |
+ GMC_BRUSH_NONE |
+ GMC_SRC_DATATYPE_MONO_FG_BG |
+ ROP3_S |
+ GMC_BYTE_ORDER_MSB_TO_LSB |
+ DP_SRC_SOURCE_HOST_DATA);
+ radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache,
+ DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM);
+ radeonfb_set_creg(rinfo, DP_SRC_FRGD_CLR, &rinfo->dp_src_fg_cache, fg);
+ radeonfb_set_creg(rinfo, DP_SRC_BKGD_CLR, &rinfo->dp_src_bg_cache, bg);
+
+ radeon_fifo_wait(rinfo, 1);
+ OUTREG(DST_Y_X, (image->dy << 16) | image->dx);
+
+ /* Ensure the dst cache is flushed and the engine idle before
+ * issuing the operation.
+ *
+ * This works around engine lockups on some cards
+ */
+#if FLUSH_CACHE_WORKAROUND
+ radeon_fifo_wait(rinfo, 2);
+ OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL);
+ OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE));
+#endif
+
+ /* X here pads width to a multiple of 32 and uses the clipper to
+ * adjust the result. Is that really necessary ? Things seem to
+ * work ok for me without that and the doco doesn't seem to imply
+ * there is such a restriction.
+ */
+ OUTREG(DST_WIDTH_HEIGHT, (image->width << 16) | image->height);
+
+ src_bytes = (((image->width * image->depth) + 7) / 8) * image->height;
+ dwords = (src_bytes + 3) / 4;
+ bits = (u32*)(image->data);
+
+ while(dwords >= 8) {
+ radeon_fifo_wait(rinfo, 8);
+#if BITS_PER_LONG == 64
+ __raw_writeq(*((u64 *)(bits)), rinfo->mmio_base + HOST_DATA0);
+ __raw_writeq(*((u64 *)(bits+2)), rinfo->mmio_base + HOST_DATA2);
+ __raw_writeq(*((u64 *)(bits+4)), rinfo->mmio_base + HOST_DATA4);
+ __raw_writeq(*((u64 *)(bits+6)), rinfo->mmio_base + HOST_DATA6);
+ bits += 8;
+#else
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA1);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA2);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA3);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA4);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA5);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA6);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA7);
+#endif
+ dwords -= 8;
+ }
+ while(dwords--) {
+ radeon_fifo_wait(rinfo, 1);
+ __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0);
+ }
+}
+