Allegro.cc - Online Community

Allegro.cc Forums » Programming Questions » Converting Between Bit Depth (ASM gurus click here)

This thread is locked; no one can reply to it. rss feed Print
Converting Between Bit Depth (ASM gurus click here)
ImLeftFooted
Member #3,935
October 2003
avatar

I wrote a routine that will convert between any whole integer bit depth from 1 to 24. The routine works without flaw...

But it is slow :(. I'm looking for help from a guru.

The target is the iPhone ARM processor.

#SelectExpand
1void convertImage(int srcBpp, unsigned char *src, int srcSize, 2 int destBpp, unsigned char *dest, int destSize, 3 int lineStart, int destLineWidth, int linePadding) 4{ 5 PIXEL_PACK_TABLE(Src, srcBpp) 6 PIXEL_PACK_TABLE(Dest, destBpp) 7 8 if(srcSize < 1 || destSize < 1) 9 return; 10 11 if(destLineWidth < 1) 12 destLineWidth = 1; 13 14 if(srcBpp < 6 && destBpp > 5) { 15 16 convertImageBW2COL(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 17 return; 18 } 19 else if(srcBpp > 5 && destBpp < 6) { 20 21 //convertImageCOL2BW(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 22 //return; 23 } 24 else if(srcBpp < 6 && destBpp < 6) { 25 26 //convertImageBW2BW(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 27 //return; 28 } 29 30 // Max unsigned char number for convience. 31 const unsigned char max = 0xff; 32 33 // current color index 34 // 0 is red, 1 is green, 2 is blue 35 int col = 0; 36 37 // number of bits from src and dest. 38 int srcOffset = 0; 39 int destOffset = 0; 40 41 // dest index 42 int destI = 0; 43 44 unsigned char byte = 0; 45 int srcCol; 46 47 // srcI is src index 48 for(int srcI = 0; ; ) { 49 50 // TODO fix all the bit shifts to cast back to unsigned char correctly. 51 // do the same on the server. 52 53 srcCol = 54 // Put the color component against the left wall 55 (unsigned char)( ( (unsigned char)((src[srcI] << srcOffset) & 0xffu) 56 57 // Now move it over to the right wall 58 >> (8 - sizesSrc[col]) 59 ) & 0xffu ); 60 61 srcCol |= 62 // Move bits that were cut off before to the right edge 63 // and binary or them with srcCol. 64 (unsigned char)( (src[srcI + 1] >> (8 - ((sizesSrc[col] + srcOffset) - 8))) & 0xffu 65 ); 66 /* 67 if(sizesSrc[col] + srcOffset >= 8) 68 ++srcI; 69 */ 70 srcI += (((sizesSrc[col] + srcOffset) & 8) >> 3); 71 72 if(srcI >= srcSize) 73 break; 74 75 byte |= 76 // We multiply color by new max value and divide by old max value. 77 // This converts src[srcI] to the new bpp 78 (unsigned char)( ( (unsigned char)( ( (unsigned char)(int(srcCol) * MAX_FOR_COLOR(sizesDest[col]) / MAX_FOR_COLOR(sizesSrc[col])) 79 // We push the data to the left of the byte, making room for 80 // the next pixel inside the same byte. 81 << (8 - sizesDest[col]) 82 ) & 0xffu ) 83 // Now we shift _right_ in case another pixel is already using this part of this byte. 84 >> destOffset 85 ) & 0xffu ); 86 87 // If this color component fills up the byte... 88 if(sizesDest[col] + destOffset >= 8) { 89 90 if(destI >= destSize) 91 break; 92 93 dest[destI] = byte; 94 byte = 0; 95 96 // Move to the next byte 97 destI++; 98 99 if(0 == (((destI + linePadding + lineStart)) % (destLineWidth))) { 100 101 destI += linePadding; 102 } 103 104 // Here we do the same as before, except... 105 byte |= 106 (unsigned char)( ( (unsigned char)( ( (unsigned char)(int(src[srcI]) * MAX_FOR_COLOR(sizesDest[col]) / MAX_FOR_COLOR(sizesSrc[col])) 107 << (8 - sizesDest[col]) 108 ) & 0xffu ) 109 // ...now we move _left_ to get only the piece that was cut off in the first operation 110 // the piece will be stuffed in the left part of the byte. 111 << (8 - destOffset) 112 ) & 0xffu ); 113 } 114 115 // Increment offset by how many bits we've inserted 116 // Decrement offset if offset >= 8 117 destOffset += sizesDest[col]; 118 destOffset = destOffset % 8; 119 120 // Increment offset by how many bits we've read 121 // Increment srcI if we've gone past our 8 bits 122 // Decrement offset if offset >= 8 123 srcOffset += sizesSrc[col]; 124 srcOffset = srcOffset % 8; 125 126 // We move on to the next color component, wrapping back to 127 // the first one if needed. 128 col = ++col % 3; 129 } 130}

Here is the macro definition.

#SelectExpand
1// The biggest number a color component can contain. 2#define MAX_FOR_COLOR(bpp) (unsigned char)(~(max << (bpp)) & 0xff) 3 4/* Begin pixel packing code */ 5// Our variable encoding scheme accepts all whole integers from 6// 24 bpp to 1 bpp. This is a table of what each color component 7// is for each color depth. 8#define PIXEL_PACK_TABLE(name, bpp) \ 9int sizes##name[3]; \ 10int &rSize##name = sizes##name[0]; \ 11int &gSize##name = sizes##name[1]; \ 12int &bSize##name = sizes##name[2]; \ 13switch(bpp) { \ 14case 24: \ 15rSize##name = 8, gSize##name = 8, bSize##name = 8; \ 16break; \ 17case 23: \ 18rSize##name = 7, gSize##name = 8, bSize##name = 8; \ 19break; \ 20case 22: \ 21rSize##name = 7, gSize##name = 7, bSize##name = 8; \ 22break; \ 23case 21: \ 24rSize##name = 7, gSize##name = 7, bSize##name = 7; \ 25break; \ 26case 20: \ 27rSize##name = 6, gSize##name = 7, bSize##name = 7; \ 28break; \ 29case 19: \ 30rSize##name = 6, gSize##name = 6, bSize##name = 7; \ 31break; \ 32case 18: \ 33rSize##name = 6, gSize##name = 6, bSize##name = 6; \ 34break; \ 35case 17: \ 36rSize##name = 5, gSize##name = 6, bSize##name = 6; \ 37break; \ 38case 16: \ 39rSize##name = 5, gSize##name = 5, bSize##name = 6; \ 40break; \ 41case 15: \ 42rSize##name = 5, gSize##name = 5, bSize##name = 5; \ 43break; \ 44case 14: \ 45rSize##name = 4, gSize##name = 5, bSize##name = 5; \ 46break; \ 47case 13: \ 48rSize##name = 4, gSize##name = 4, bSize##name = 5; \ 49break; \ 50case 12: \ 51rSize##name = 4, gSize##name = 4, bSize##name = 4; \ 52break; \ 53case 11: \ 54rSize##name = 3, gSize##name = 4, bSize##name = 4; \ 55break; \ 56case 10: \ 57rSize##name = 3, gSize##name = 3, bSize##name = 4; \ 58break; \ 59case 9: \ 60rSize##name = 3, gSize##name = 3, bSize##name = 3; \ 61break; \ 62case 8: \ 63rSize##name = 2, gSize##name = 3, bSize##name = 3; \ 64break; \ 65case 7: \ 66rSize##name = 2, gSize##name = 2, bSize##name = 3; \ 67break; \ 68case 6: \ 69rSize##name = 2, gSize##name = 2, bSize##name = 2; \ 70break; \ 71case 5: \ 72rSize##name = 1, gSize##name = 2, bSize##name = 2; \ 73break; \ 74case 4: \ 75rSize##name = 1, gSize##name = 1, bSize##name = 2; \ 76break; \ 77case 3: \ 78rSize##name = 1, gSize##name = 1, bSize##name = 1; \ 79break; \ 80case 2: \ 81rSize##name = 0, gSize##name = 1, bSize##name = 1; \ 82break; \ 83case 1: \ 84rSize##name = 0, gSize##name = 0, bSize##name = 1; \ 85break; \ 86}

edit

Here is the final solution.

#SelectExpand
1int convertImage(int srcBpp, unsigned char *src, int srcSize, 2 int destBpp, unsigned char *dest, int destSize, 3 int lineStart, int destLineWidth, int linePadding) 4{ 5 PIXEL_PACK_TABLE(Src, srcBpp) 6 PIXEL_PACK_TABLE(Dest, destBpp) 7 8 // current color index 9 // 0 is red, 1 is green, 2 is blue 10 int col = 0; 11 12 // number of bits from src and dest. 13 int srcOffset = 0; 14 int destOffset = 0; 15 16 // dest index 17 int destI = 0; 18 19 unsigned char byte = 0; 20 int srcCol; 21 22 int destLineWidthIncremented = destLineWidth; 23 int srcI = 0; 24 25 int diff1[3] = 26 { 27 sizesDest[0] - sizesSrc[0], 28 sizesDest[1] - sizesSrc[1], 29 sizesDest[2] - sizesSrc[2] 30 }; 31 32 int diff2[3] = 33 { 34 8 - (sizesDest[0] - sizesSrc[0]), 35 8 - (sizesDest[1] - sizesSrc[1]), 36 8 - (sizesDest[2] - sizesSrc[2]), 37 }; 38 39 for(int col = 0; col < 3; col++) { 40 41 if(sizesSrc[col] > sizesDest[col]) { 42 43 swap(diff1[col], diff2[col]); 44 45 diff1[col] = -diff1[col]; 46 diff2[col] = -diff2[col]; 47 } 48 } 49 50 // srcI is src index 51 for( ;; ) { 52 53 // TODO fix all the bit shifts to cast back to unsigned char correctly. 54 // do the same on the server. 55 56 srcCol = 57 58 // Put the color component against the left wall 59 (unsigned char)( ( (unsigned char)((src[srcI] << srcOffset) & 0xffu) 60 61 // Now move it over to the right wall 62 >> (8 - sizesSrc[col]) 63 ) & 0xffu ); 64 65 srcCol |= 66 // Move bits that were cut off before to the right edge 67 // and binary or them with srcCol. 68 (unsigned char)( (src[srcI + 1] >> (8 - ((sizesSrc[col] + srcOffset) - 8))) & 0xffu 69 ); 70 /* 71 if(sizesSrc[col] + srcOffset >= 8) 72 ++srcI; 73 */ 74 srcI += (((sizesSrc[col] + srcOffset) & 8) >> 3); 75 76 if(srcI >= srcSize || destI >= destSize) 77 break; 78 79 byte |= 80 // We multiply color by new max value and divide by old max value. 81 // This converts src[srcI] to the new bpp 82 (unsigned char)( ( (unsigned char)( ( (unsigned char)( ((srcCol << (diff1[col])) & 0xffu) | ((srcCol >> (diff2[col])) & 0xffu ) ) 83 // We push the data to the left of the byte, making room for 84 // the next pixel inside the same byte. 85 << (8 - sizesDest[col]) 86 ) & 0xffu ) 87 // Now we shift _right_ in case another pixel is already using this part of this byte. 88 >> destOffset 89 ) & 0xffu ); 90 91 // If this color component fills up the byte... 92 if(sizesDest[col] + destOffset >= 8) { 93 94 dest[destI] = byte; 95 byte = 0; 96 97 // Move to the next byte 98 destI++; 99 100 if(destI + linePadding + lineStart >= destLineWidthIncremented) { 101 102 destI += linePadding; 103 destLineWidthIncremented += destLineWidth; 104 } 105 106 // Here we do the same as before, except... 107 byte |= 108 (unsigned char)( ( (unsigned char)( ( (unsigned char)( ((srcCol << (diff1[col])) & 0xffu) | ((srcCol >> (diff2[col])) & 0xffu ) ) 109 << (8 - sizesDest[col]) 110 ) & 0xffu ) 111 // ...now we move _left_ to get only the piece that was cut off in the first operation 112 // the piece will be stuffed in the left part of the byte. 113 << (8 - destOffset) 114 ) & 0xffu ); 115 } 116 117 // Increment offset by how many bits we've inserted 118 // Decrement offset if offset >= 8 119 destOffset += sizesDest[col]; 120 destOffset = (destOffset < 8 ? destOffset : destOffset - 8); 121 122 // Increment offset by how many bits we've read 123 // Increment srcI if we've gone past our 8 bits 124 // Decrement offset if offset >= 8 125 srcOffset += sizesSrc[col]; 126 srcOffset = (srcOffset < 8 ? srcOffset : srcOffset - 8); 127 128 // We move on to the next color component, wrapping back to 129 // the first one if needed. 130 col = (col+1 < 3 ? col+1 : 0); 131 } 132 133 return min(destSize, destI); 134}

Audric
Member #907
January 2001

Can you compare the performances in a "trivial" case where you would hard-code the shifts to work just for this case (ex: 888 to 666)
If the improvement is not noticeable, it means the code inside this function is as good as can be.

Bob
Free Market Evangelist
September 2000
avatar

Those divides can't be good for performance. You can replace the multiply, then divide sequences into bitwise ANDs since you know that color components always occupy an integer number of bits.

There are also quality issues with this converter. For example, converting pure white from 565 into 888 will not result in pure white, but an "almost white" shade of gray.

You'll want to write something like:

    new_color = (old_color << shift) | (old_color >> (8 - shift)).

Or something like that.

Finally, you should change the routine to perform the conversion on 8, 16 or 32-bit types natively instead of always working on 1 byte at a time. That will get rid of most of the if statements.

--
- Bob
[ -- All my signature links are 404 -- ]

ImLeftFooted
Member #3,935
October 2003
avatar

#SelectExpand
1int convertImage(int srcBpp, unsigned char *src, int srcSize, 2 int destBpp, unsigned char *dest, int destSize, 3 int lineStart, int destLineWidth, int linePadding) 4{ 5 PIXEL_PACK_TABLE(Src, srcBpp) 6 PIXEL_PACK_TABLE(Dest, destBpp) 7 8 if(srcBpp < 6 && destBpp > 5) { 9 10 convertImageBW2COL(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 11 return 0; 12 } 13 else if(srcBpp > 5 && destBpp < 6) { 14 15 //convertImageCOL2BW(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 16 //return; 17 } 18 else if(srcBpp < 6 && destBpp < 6) { 19 20 //convertImageBW2BW(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 21 //return; 22 } 23 24 // Max unsigned char number for convience. 25 const unsigned char max = 0xff; 26 27 // current color index 28 // 0 is red, 1 is green, 2 is blue 29 int col = 0; 30 31 // number of bits from src and dest. 32 int srcOffset = 0; 33 int destOffset = 0; 34 35 // dest index 36 int destI = 0; 37 38 unsigned char byte = 0; 39 int srcCol; 40 41 //[03:10] <Planck_> One way to avoid division might be to do a fixed-point multiply 42 //[03:12] <Planck_> Instead of x * A / B, precalculate (say) C = 256*A/B and compute (x*C) >> 8. 43 //[03:14] <Planck_> Since it looks like there are are 3 values of C you need here. 44 45 //[03:24] <Planck_> To minimize the rounding problem, you can shift C upward 8 bits. So it's like a floating point number with 8 bits of precision 46 //[03:24] <Planck_> You just need to make sure to shift it down again after you multiply 47 //[03:24] <ddustin> hmm 48 //[03:25] <Planck_> So when you calculate (srcCol * C) >> 8, it's really srcCol * (256 * A / B) / 256. 49 //[03:26] <Planck_> So the maximum error is 1/256th 50 //[03:26] <Planck_> 1/256th of srcCol, I mean. 51 52 int C[3] = 53 { 54 0xff * MAX_FOR_COLOR(sizesDest[0]) / MAX_FOR_COLOR(sizesSrc[0]), 55 0xff * MAX_FOR_COLOR(sizesDest[1]) / MAX_FOR_COLOR(sizesSrc[1]), 56 0xff * MAX_FOR_COLOR(sizesDest[2]) / MAX_FOR_COLOR(sizesSrc[2]) 57 }; 58 59 int destLineWidthIncremented = destLineWidth; 60 int srcI = 0; 61 62 // srcI is src index 63 for( ;; ) { 64 65 // TODO fix all the bit shifts to cast back to unsigned char correctly. 66 // do the same on the server. 67 68 srcCol = 69 // Put the color component against the left wall 70 (unsigned char)( ( (unsigned char)((src[srcI] << srcOffset) & 0xffu) 71 72 // Now move it over to the right wall 73 >> (8 - sizesSrc[col]) 74 ) & 0xffu ); 75 76 srcCol |= 77 // Move bits that were cut off before to the right edge 78 // and binary or them with srcCol. 79 (unsigned char)( (src[srcI + 1] >> (8 - ((sizesSrc[col] + srcOffset) - 8))) & 0xffu 80 ); 81 /* 82 if(sizesSrc[col] + srcOffset >= 8) 83 ++srcI; 84 */ 85 srcI += (((sizesSrc[col] + srcOffset) & 8) >> 3); 86 87 if(srcI >= srcSize) 88 break; 89 90 byte |= 91 // We multiply color by new max value and divide by old max value. 92 // This converts src[srcI] to the new bpp 93 (unsigned char)( ( (unsigned char)( ( (unsigned char)((srcCol * C[col]) >> 8) 94 // We push the data to the left of the byte, making room for 95 // the next pixel inside the same byte. 96 << (8 - sizesDest[col]) 97 ) & 0xffu ) 98 // Now we shift _right_ in case another pixel is already using this part of this byte. 99 >> destOffset 100 ) & 0xffu ); 101 102 // If this color component fills up the byte... 103 if(sizesDest[col] + destOffset >= 8) { 104 105 if(destI >= destSize) 106 break; 107 108 dest[destI] = byte; 109 byte = 0; 110 111 // Move to the next byte 112 destI++; 113 114 if(destI + linePadding + lineStart >= destLineWidthIncremented) { 115 116 destI += linePadding; 117 destLineWidthIncremented += destLineWidth; 118 } 119 120 // Here we do the same as before, except... 121 byte |= 122 (unsigned char)( ( (unsigned char)( ( (unsigned char)((srcCol * C[col]) >> 8) 123 << (8 - sizesDest[col]) 124 ) & 0xffu ) 125 // ...now we move _left_ to get only the piece that was cut off in the first operation 126 // the piece will be stuffed in the left part of the byte. 127 << (8 - destOffset) 128 ) & 0xffu ); 129 } 130 131 // Increment offset by how many bits we've inserted 132 // Decrement offset if offset >= 8 133 destOffset += sizesDest[col]; 134 destOffset = (destOffset < 8 ? destOffset : destOffset - 8); 135 136 // Increment offset by how many bits we've read 137 // Increment srcI if we've gone past our 8 bits 138 // Decrement offset if offset >= 8 139 srcOffset += sizesSrc[col]; 140 srcOffset = (srcOffset < 8 ? srcOffset : srcOffset - 8); 141 142 // We move on to the next color component, wrapping back to 143 // the first one if needed. 144 col = (col+1 < 3 ? col+1 : 0); 145 } 146 147 return min(destSize, destI); 148}

Here is the implementation I'm using now.

Is this faster than new_color = (old_color << shift) | (old_color >> (8 - shift))?

Quote:

Finally, you should change the routine to perform the conversion on 8, 16 or 32-bit types natively instead of always working on 1 byte at a time.

The way the function is used it doesn't get a large advantage from that.

Quote:

That will get rid of most of the if statements.

Apparently ARM has 'conditional instructions' which leads me to believe if statements with a single instruction inside are optimized. I noted replacing col = ++col % 3 with a branch had a decent improvement in performance.

edit created destLineWidthIncremented variable to remove a multiplication.
edit What should 'shift' be?

I cant quite work out how that works.

10000001 should become 0111. We're binary ORing only twice from two digits and must produce three digits.

If shift is 3 we get

10000001 << 3 | 10000001 >> 5
00001000      | 00000100

00001100

If shift is 2 we get

10000001 << 2 | 10000001 >> 6
00000100      | 00000010

00000110

Hm, lets try with gray

10000000 << 4 | 10000000 >> 4
00001000      | 00000000

00001000

Okay this is correct but why use '4'?

Maybe this is oldBpp - newBpp?

Lets try 7 -> 5 bpp

1000001 << 2 | 1000001 >> 6
0000100      | 0000001

0000101

Holy crap that works! I don't really get why though... Ah spoke too soon.

Lets try reversing it

1000001 >> 2 | 1000001 << 6
0010000      | 1000000

1010000 & 0011111 =
0010000

This is close but doesn't feel right. Lets make it a bit more white.

1000010 >> 2 | 1000010 << 6
0010000      | 0000000

0010000

Hm....

1000100 >> 2 | 1000100 << 6
0010001      | 0000000

0010001

Yes this would appear correct...
I don't understand the theory behind this though.

edit
I can't get it to work.
Here is how I modified the code. The result looks like it's just black.

    byte |=
    // We multiply color by new max value and divide by old max value.
    // This converts src[srcI] to the new bpp
    (unsigned char)( ( (unsigned char)( ( (unsigned char)( ((srcCol << (sizesSrc[col] - sizesDest[col])) & 0xffu) | ((srcCol >> (8 - (sizesSrc[col] - sizesDest[col]))) & 0xffu ) )
                       // We push the data to the left of the byte, making room for
                       // the next pixel inside the same byte.
                       << (8 - sizesDest[col])
                       )  & 0xffu )
              // Now we shift _right_ in case another pixel is already using this part of this byte.
              >> destOffset
              ) & 0xffu );

edit, I just flipped shift directions a bunch until it started working. Now it works and has nothing but bitwise and addition! Woo!

I still wouldn't mind an explanation on this trick :). Here is the code as it is now.

#SelectExpand
1int convertImage(int srcBpp, unsigned char *src, int srcSize, 2 int destBpp, unsigned char *dest, int destSize, 3 int lineStart, int destLineWidth, int linePadding) 4{ 5 PIXEL_PACK_TABLE(Src, srcBpp) 6 PIXEL_PACK_TABLE(Dest, destBpp) 7 8 if(srcBpp < 6 && destBpp > 5) { 9 10 convertImageBW2COL(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 11 return 0; 12 } 13 else if(srcBpp > 5 && destBpp < 6) { 14 15 //convertImageCOL2BW(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 16 //return; 17 } 18 else if(srcBpp < 6 && destBpp < 6) { 19 20 //convertImageBW2BW(srcBpp, src, srcSize, destBpp, dest, destSize, lineStart, destLineWidth, linePadding); 21 //return; 22 } 23 24 // Max unsigned char number for convience. 25 const unsigned char max = 0xff; 26 27 (void)max; 28 29 // current color index 30 // 0 is red, 1 is green, 2 is blue 31 int col = 0; 32 33 // number of bits from src and dest. 34 int srcOffset = 0; 35 int destOffset = 0; 36 37 // dest index 38 int destI = 0; 39 40 unsigned char byte = 0; 41 int srcCol; 42 43 int destLineWidthIncremented = destLineWidth; 44 int srcI = 0; 45 46 int diff1[3] = 47 { 48 sizesDest[0] - sizesSrc[0], 49 sizesDest[1] - sizesSrc[1], 50 sizesDest[2] - sizesSrc[2] 51 }; 52 53 int diff2[3] = 54 { 55 8 - (sizesDest[0] - sizesSrc[0]), 56 8 - (sizesDest[1] - sizesSrc[1]), 57 8 - (sizesDest[2] - sizesSrc[2]), 58 }; 59 60 for(int col = 0; col < 3; col++) { 61 62 if(sizesSrc[col] > sizesDest[col]) { 63 64 swap(diff1[col], diff2[col]); 65 66 diff1[col] = -diff1[col]; 67 diff2[col] = -diff2[col]; 68 } 69 } 70 71 // srcI is src index 72 for( ;; ) { 73 74 // TODO fix all the bit shifts to cast back to unsigned char correctly. 75 // do the same on the server. 76 77 srcCol = 78 79 // Put the color component against the left wall 80 (unsigned char)( ( (unsigned char)((src[srcI] << srcOffset) & 0xffu) 81 82 // Now move it over to the right wall 83 >> (8 - sizesSrc[col]) 84 ) & 0xffu ); 85 86 srcCol |= 87 // Move bits that were cut off before to the right edge 88 // and binary or them with srcCol. 89 (unsigned char)( (src[srcI + 1] >> (8 - ((sizesSrc[col] + srcOffset) - 8))) & 0xffu 90 ); 91 /* 92 if(sizesSrc[col] + srcOffset >= 8) 93 ++srcI; 94 */ 95 srcI += (((sizesSrc[col] + srcOffset) & 8) >> 3); 96 97 if(srcI >= srcSize || destI >= destSize) 98 break; 99 100 byte |= 101 // We multiply color by new max value and divide by old max value. 102 // This converts src[srcI] to the new bpp 103 (unsigned char)( ( (unsigned char)( ( (unsigned char)( ((srcCol << (diff1[col])) & 0xffu) | ((srcCol >> (diff2[col])) & 0xffu ) ) 104 // We push the data to the left of the byte, making room for 105 // the next pixel inside the same byte. 106 << (8 - sizesDest[col]) 107 ) & 0xffu ) 108 // Now we shift _right_ in case another pixel is already using this part of this byte. 109 >> destOffset 110 ) & 0xffu ); 111 112 // If this color component fills up the byte... 113 if(sizesDest[col] + destOffset >= 8) { 114 115 dest[destI] = byte; 116 byte = 0; 117 118 // Move to the next byte 119 destI++; 120 121 if(destI + linePadding + lineStart >= destLineWidthIncremented) { 122 123 destI += linePadding; 124 destLineWidthIncremented += destLineWidth; 125 } 126 127 // Here we do the same as before, except... 128 byte |= 129 (unsigned char)( ( (unsigned char)( ( (unsigned char)( ((srcCol << (diff1[col])) & 0xffu) | ((srcCol >> (diff2[col])) & 0xffu ) ) 130 << (8 - sizesDest[col]) 131 ) & 0xffu ) 132 // ...now we move _left_ to get only the piece that was cut off in the first operation 133 // the piece will be stuffed in the left part of the byte. 134 << (8 - destOffset) 135 ) & 0xffu ); 136 } 137 138 // Increment offset by how many bits we've inserted 139 // Decrement offset if offset >= 8 140 destOffset += sizesDest[col]; 141 destOffset = (destOffset < 8 ? destOffset : destOffset - 8); 142 143 // Increment offset by how many bits we've read 144 // Increment srcI if we've gone past our 8 bits 145 // Decrement offset if offset >= 8 146 srcOffset += sizesSrc[col]; 147 srcOffset = (srcOffset < 8 ? srcOffset : srcOffset - 8); 148 149 // We move on to the next color component, wrapping back to 150 // the first one if needed. 151 col = (col+1 < 3 ? col+1 : 0); 152 } 153 154 return min(destSize, destI); 155}

edit The code only worked for converting 'up' bpp (ie 16 -> 24), now the code works for both ways.

GullRaDriel
Member #3,861
September 2003
avatar

Is it working faster now ?

"Code is like shit - it only smells if it is not yours"
Allegro Wiki, full of examples and articles !!

ImLeftFooted
Member #3,935
October 2003
avatar

Yes much faster. I would be happy to make it even faster though -- if anyone knows of some ASM tricks I could pull.

GullRaDriel
Member #3,861
September 2003
avatar

Why do you keep empty if statements, and a call to (void)max; ?

"Code is like shit - it only smells if it is not yours"
Allegro Wiki, full of examples and articles !!

Vanneto
Member #8,643
May 2007

(void)max; prevents the compiler from spitting out "unused variable" warning. Im almost certain that gets optimized down to nothing - the same with empty if-statements. :P

In capitalist America bank robs you.

ImLeftFooted
Member #3,935
October 2003
avatar

Also they only happen once per function call. The real need-for-speed is inside the loop.

Go to: