blitting. This is used by drivers that don't provide their own
          (accelerated) version.
 
+config FB_CFB_REV_PIXELS_IN_BYTE
+       bool
+       depends on FB
+       default n
+       ---help---
+         Allow generic frame-buffer functions to work on displays with 1, 2
+         and 4 bits per pixel depths which has opposite order of pixels in
+         byte order to bytes in long order.
+
 config FB_SYS_FILLRECT
        tristate
        depends on FB
 
 
 static void
 bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src,
-       int src_idx, int bits, unsigned n)
+       int src_idx, int bits, unsigned n, u32 bswapmask)
 {
        unsigned long first, last;
        int const shift = dst_idx-src_idx;
        int left, right;
 
-       first = FB_SHIFT_HIGH(~0UL, dst_idx);
-       last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+       first = fb_shifted_pixels_mask_long(dst_idx, bswapmask);
+       last = ~fb_shifted_pixels_mask_long((dst_idx+n) % bits, bswapmask);
 
        if (!shift) {
                // Same alignment for source and dest
 
 static void
 bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src,
-               int src_idx, int bits, unsigned n)
+               int src_idx, int bits, unsigned n, u32 bswapmask)
 {
        unsigned long first, last;
        int shift;
 
        shift = dst_idx-src_idx;
 
-       first = FB_SHIFT_LOW(~0UL, bits - 1 - dst_idx);
-       last = ~(FB_SHIFT_LOW(~0UL, bits - 1 - ((dst_idx-n) % bits)));
+       first = fb_shifted_pixels_mask_long(bits - 1 - dst_idx, bswapmask);
+       last = ~fb_shifted_pixels_mask_long(bits - 1 - ((dst_idx-n) % bits), bswapmask);
 
        if (!shift) {
                // Same alignment for source and dest
        unsigned long __iomem *dst = NULL, *src = NULL;
        int bits = BITS_PER_LONG, bytes = bits >> 3;
        int dst_idx = 0, src_idx = 0, rev_copy = 0;
+       u32 bswapmask = fb_compute_bswapmask(p);
 
        if (p->state != FBINFO_STATE_RUNNING)
                return;
                        src += src_idx >> (ffs(bits) - 1);
                        src_idx &= (bytes - 1);
                        bitcpy_rev(dst, dst_idx, src, src_idx, bits,
-                               width*p->var.bits_per_pixel);
+                               width*p->var.bits_per_pixel, bswapmask);
                }
        } else {
                while (height--) {
                        src += src_idx >> (ffs(bits) - 1);
                        src_idx &= (bytes - 1);
                        bitcpy(dst, dst_idx, src, src_idx, bits,
-                               width*p->var.bits_per_pixel);
+                               width*p->var.bits_per_pixel, bswapmask);
                        dst_idx += bits_per_line;
                        src_idx += bits_per_line;
                }
 
      */
 
 static void
-bitfill_aligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsigned n, int bits)
+bitfill_aligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
+               unsigned n, int bits, u32 bswapmask)
 {
        unsigned long first, last;
 
        if (!n)
                return;
 
-       first = FB_SHIFT_HIGH(~0UL, dst_idx);
-       last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+       first = fb_shifted_pixels_mask_long(dst_idx, bswapmask);
+       last = ~fb_shifted_pixels_mask_long((dst_idx+n) % bits, bswapmask);
 
        if (dst_idx+n <= bits) {
                // Single word
      *  Aligned pattern invert using 32/64-bit memory accesses
      */
 static void
-bitfill_aligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsigned n, int bits)
+bitfill_aligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
+               unsigned n, int bits, u32 bswapmask)
 {
        unsigned long val = pat, dat;
        unsigned long first, last;
        if (!n)
                return;
 
-       first = FB_SHIFT_HIGH(~0UL, dst_idx);
-       last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+       first = fb_shifted_pixels_mask_long(dst_idx, bswapmask);
+       last = ~fb_shifted_pixels_mask_long((dst_idx+n) % bits, bswapmask);
 
        if (dst_idx+n <= bits) {
                // Single word
        if (p->fbops->fb_sync)
                p->fbops->fb_sync(p);
        if (!left) {
+               u32 bswapmask = fb_compute_bswapmask(p);
                void (*fill_op32)(unsigned long __iomem *dst, int dst_idx,
-                                 unsigned long pat, unsigned n, int bits) = NULL;
+                                 unsigned long pat, unsigned n, int bits,
+                                 u32 bswapmask) = NULL;
 
                switch (rect->rop) {
                case ROP_XOR:
                while (height--) {
                        dst += dst_idx >> (ffs(bits) - 1);
                        dst_idx &= (bits - 1);
-                       fill_op32(dst, dst_idx, pat, width*bpp, bits);
+                       fill_op32(dst, dst_idx, pat, width*bpp, bits, bswapmask);
                        dst_idx += p->fix.line_length*8;
                }
        } else {
 
 #include <linux/string.h>
 #include <linux/fb.h>
 #include <asm/types.h>
+#include "fb_draw.h"
 
 #define DEBUG
 
        u32 null_bits = 32 - bpp;
        u32 *palette = (u32 *) p->pseudo_palette;
        const u8 *src = image->data;
+       u32 bswapmask = fb_compute_bswapmask(p);
 
        dst2 = (u32 __iomem *) dst1;
        for (i = image->height; i--; ) {
                val = 0;
                
                if (start_index) {
-                       u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0, start_index));
+                       u32 start_mask = ~fb_shifted_pixels_mask_u32(start_index, bswapmask);
                        val = FB_READL(dst) & start_mask;
                        shift = start_index;
                }
                        else
                                color = *src;
                        color <<= FB_LEFT_POS(bpp);
-                       val |= FB_SHIFT_HIGH(color, shift);
+                       val |= FB_SHIFT_HIGH(color, shift ^ bswapmask);
                        if (shift >= null_bits) {
                                FB_WRITEL(val, dst++);
        
                        src++;
                }
                if (shift) {
-                       u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+                       u32 end_mask = fb_shifted_pixels_mask_u32(shift, bswapmask);
 
                        FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
                }
        u32 spitch = (image->width+7)/8;
        const u8 *src = image->data, *s;
        u32 i, j, l;
-       
+       u32 bswapmask = fb_compute_bswapmask(p);
+
        dst2 = (u32 __iomem *) dst1;
        fgcolor <<= FB_LEFT_POS(bpp);
        bgcolor <<= FB_LEFT_POS(bpp);
 
                /* write leading bits */
                if (start_index) {
-                       u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,start_index));
+                       u32 start_mask = ~fb_shifted_pixels_mask_u32(start_index, bswapmask);
                        val = FB_READL(dst) & start_mask;
                        shift = start_index;
                }
                while (j--) {
                        l--;
                        color = (*s & (1 << l)) ? fgcolor : bgcolor;
-                       val |= FB_SHIFT_HIGH(color, shift);
+                       val |= FB_SHIFT_HIGH(color, shift ^ bswapmask);
                        
                        /* Did the bitshift spill bits to the next long? */
                        if (shift >= null_bits) {
 
                /* write trailing bits */
                if (shift) {
-                       u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+                       u32 end_mask = fb_shifted_pixels_mask_u32(shift, bswapmask);
 
                        FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
                }
 
 #define _FB_DRAW_H
 
 #include <asm/types.h>
+#include <linux/fb.h>
 
     /*
      *  Compose two values, using a bitmask as decision value
     }
 }
 #endif
+
+#ifdef CONFIG_FB_CFB_REV_PIXELS_IN_BYTE
+
+static inline u32 fb_shifted_pixels_mask_u32(u32 index, u32 bswapmask)
+{
+       u32 mask;
+
+       if (!bswapmask) {
+               mask = FB_SHIFT_HIGH(~(u32)0, index);
+       } else {
+               mask = 0xff << FB_LEFT_POS(8);
+               mask = FB_SHIFT_LOW(mask, index & (bswapmask)) & mask;
+               mask = FB_SHIFT_HIGH(mask, index & ~(bswapmask));
+#if defined(__i386__) || defined(__x86_64__)
+               /* Shift argument is limited to 0 - 31 on x86 based CPU's */
+               if(index + bswapmask < 32)
+#endif
+                       mask |= FB_SHIFT_HIGH(~(u32)0,
+                                       (index + bswapmask) & ~(bswapmask));
+       }
+       return mask;
+}
+
+static inline unsigned long fb_shifted_pixels_mask_long(u32 index, u32 bswapmask)
+{
+       unsigned long mask;
+
+       if (!bswapmask) {
+               mask = FB_SHIFT_HIGH(~0UL, index);
+       } else {
+               mask = 0xff << FB_LEFT_POS(8);
+               mask = FB_SHIFT_LOW(mask, index & (bswapmask)) & mask;
+               mask = FB_SHIFT_HIGH(mask, index & ~(bswapmask));
+#if defined(__i386__) || defined(__x86_64__)
+               /* Shift argument is limited to 0 - 31 on x86 based CPU's */
+               if(index + bswapmask < BITS_PER_LONG)
+#endif
+                       mask |= FB_SHIFT_HIGH(~0UL,
+                                       (index + bswapmask) & ~(bswapmask));
+       }
+       return mask;
+}
+
+
+static inline u32 fb_compute_bswapmask(struct fb_info *info)
+{
+       u32 bswapmask = 0;
+       unsigned bpp = info->var.bits_per_pixel;
+
+       if ((bpp < 8) && (info->var.nonstd & FB_NONSTD_REV_PIX_IN_B)) {
+               /*
+                * Reversed order of pixel layout in bytes
+                * works only for 1, 2 and 4 bpp
+                */
+               bswapmask = 7 - bpp + 1;
+       }
+       return bswapmask;
+}
+
+#else /* CONFIG_FB_CFB_REV_PIXELS_IN_BYTE */
+
+#define fb_shifted_pixels_mask_u32(i, b) FB_SHIFT_HIGH(~(u32)0, (i))
+#define fb_shifted_pixels_mask_long(i, b) FB_SHIFT_HIGH(~0UL, (i))
+#define fb_compute_bswapmask(...) 0
+
+#endif  /* CONFIG_FB_CFB_REV_PIXELS_IN_BYTE */
+
 #endif /* FB_DRAW_H */
 
 };
 
 #define FB_NONSTD_HAM          1       /* Hold-And-Modify (HAM)        */
+#define FB_NONSTD_REV_PIX_IN_B 2       /* order of pixels in each byte is reversed */
 
 #define FB_ACTIVATE_NOW                0       /* set values immediately (or vbl)*/
 #define FB_ACTIVATE_NXTOPEN    1       /* activate on next open        */