Here is my compression algorithm, for you smart guys out there, let me know how to improve it, it only works for Blank.gif and it's small as heck. Let me know what you think
using System; using System.IO; using System.Collections.Generic; using System.Linq; using System.Numerics; using System.Threading.Tasks; namespace HelloWorld { class Hello { public static byte[] bytes = System.IO.File.ReadAllBytes(@"E:\data\Blank.gif"); public static byte[] bytes_new; static void Main() { BigInteger number_compress = new BigInteger(bytes); //Console.WriteLine(number_compress); //Console.WriteLine(); int[] one = new int[50]; int[] zero = new int[50]; int[] position = new int[50]; for (int i = 0; i < 50; i++) { Console.WriteLine(i); one = IteratedBitcount(number_compress); zero = IteratedZerocount(number_compress); position = one + zero; BigInteger fac = Factorial(one); BigInteger n = FindN(number_compress, one, position); number_compress = n; } //Console.WriteLine(number_compress); //Console.WriteLine(); BigInteger number_decompress = 0; for (int i = 501; i >= 0; i) { Console.WriteLine(i); number_decompress = FindNumber(number_compress, one, position); number_compress = number_decompress; } //Console.WriteLine(number_decompress); //Console.WriteLine(); bytes_new = number_decompress.ToByteArray(); File.WriteAllBytes(@"E:\data\New_Blank.gif", bytes_new); } static BigInteger FindNumber(BigInteger n, int one, int position) { BigInteger number=0; int k = one; for (int j = position1; j >=0 ; j) { BigInteger mult = j; BigInteger num = 0; for (BigInteger i = 1; i < (BigInteger)k; i++) { mult *= (j  i); } num = (mult / Factorial(k)); if (num <= n) { number = number + BigInteger.Pow(2, j); k; n = n  num; } if (k == 0) break; } return number; } static BigInteger FindN(BigInteger number, int one, int position) { BigInteger findMSB = number; BigInteger test = number; var numBits = (int)Math.Ceiling(BigInteger.Log(2)); BigInteger MSB=1; while (findMSB > 0) { MSB <<= 1; findMSB >>= 1; } MSB >>= 1; BigInteger n = 0; int k = one; int pos = position; while (MSB != 0) { if ((test & MSB) > 0) { int a = pos1; BigInteger mult = a; for (BigInteger i = 1; i < (BigInteger)k; i++) { mult *= (a  i); } n += (mult/Factorial(k)); k; } MSB >>= 1; pos; } return n; } static int IteratedBitcount(BigInteger n) { BigInteger test = n; int count = 0; while (test != 0) { if ((test & 1) == 1) { count++; } test >>= 1; } return count; } static int IteratedZerocount(BigInteger n) { BigInteger test = n; int count = 0; while (test != 0) { if ((test & 1)==0) { count++; } test >>= 1; } return count; } static BigInteger Factorial(int arg) { BigInteger value = 1; for (int index = 2; index <= arg; index++) { //Console.WriteLine(index); value *= index; } return value; } } } 



https://commons.wikimedia.org/wiki/File:Blank.gif




To begin with I want to make IteratedBitcount more efficient




Maybe instead of counting individual bits in the BigInteger, count bits in the byte array using a lookup table.
static int[] bitcountLookup = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; static int GetBitcount(byte[] bytes) { int bitcount = 0; for (int j = 0; j < bytes.Length; j++) bitcount += bitcountLookup[bytes[j]]; return bitcount; } 



I'm a physicist and I'm confused: if you tell me how many bits are in a binary number, how can there be only finitely many binary numbers with that number of bits? The only thing I can think is that you need some maximum number of bits. And 5 bits (which the original post uses) seems fairly low.


"Those four limbs are made for dancing. They're made for dancing madly!"


auroraloose wrote: I'm a physicist and I'm confused: if you tell me how many bits are in a binary number, how can there be only finitely many binary numbers with that number of bits? The only thing I can think is that you need some maximum number of bits. And 5 bits (which the original post uses) seems fairly low. I was a little confused at first too. The total number of bits isn't important because the ranks can keep counting to infinity if you keep increasing the number of bits. The OP used 5 bits total as an example. 



Hmm, the iterated loop is alright, I just happened to encounter a negative value for big integer. The thing that is taking up most of the time is the mult where it needs to run as much as the bit count. If there is a way to expand all the equations it would be great (a)*(a1)*(a2)...(an), so I don't have to loop. I looked up but there doesn't seem to be an easy way for this. Other than that the factorial function also takes a while. If Staphen can think of a way to expand the equations it would be cool :D. Keep in mind you need system.numerics to run BigInteger, this is c# code!




fredreload wrote: Hmm, the iterated loop is alright, I just happened to encounter a negative value for big integer. The thing that is taking up most of the time is the mult where it needs to run as much as the bit count. If there is a way to expand all the equations it would be great (a)*(a1)*(a2)...(an), so I don't have to loop. I looked up but there doesn't seem to be an easy way for this. Other than that the factorial function also takes a while. If Staphen can think of a way to expand the equations it would be cool :D. Keep in mind you need system.numerics to run BigInteger, this is c# code! I would recommend constraining the problem. First of all, you should be able to decompress the data using only the bitcount and the rank. Knowing that, I would decide on how to encode those two values into the compressed result. For instance, you could store one byte for the bitcount and one byte for the rank. This would allow you to store bitcounts up to 255 and ranks up to 255, which puts constraints on the size of the numbers for which you would need to compute factorials. Then you could precompute the results of the factorial function for various values within your constraints and use the precomputed values to decrease the number of multiplication operations that need to be performed. For instance, if you precomputed the value of 128!, you could compute the value of 133! using only five multiplication operations. 133! = 133 * 132 * 131 * 130 * 129 * 128! This may also help you with computing values for mult because of the following relationship. 125 * 124 * 123 * 122 * 121 = 125! / 120! Therefore, for large values of k, precomputing factorials could easily reduce the number of operations to compute both mult and k!. 



Hmm, what would happen if the ranking start from 0 at the bottom, so it starts from 11100, what would become of the formula?




I'm trying to do this in reverse. Assuming 11010 is my data, the n would be 8. 8 in binary is 1000 and it takes n=3 to get down to 0001. Then finally 3 with n=0. The thing is I can't get back up in reverse without recording the information about the number of zero and ones. In this case in order for 0 to become 3 I need to record 2 ones and 0 zeros. For 3 to become 8 I need to record 1 ones and 3 zeros. And then 8 becomes 11010 which is 26. Now if I can find a pattern for n traveling download it would be good but I can't seem to find it. For the data I am testing on, the first few n is, 1, 5, 21, 69, 469, 2135, 16921, 84593. Think you can find a pattern?
00111 0 0001 0 11 0 01011 1 0010 1 01101 2 0100 2 01110 3 1000 3 10011 4 10101 5 10110 6 11001 7 11010 8 11100 9 



using System;
using System.IO; using System.Collections.Generic; using System.Linq; using System.Numerics; using System.Threading.Tasks; using System.Text; namespace HelloWorld { class Hello { public static byte[] bytes = System.IO.File.ReadAllBytes(@"E:\Data\Blank.gif"); public static byte[] bytes_new; static void Main() { int star = 0; StringBuilder sb = new StringBuilder(); /* for (int k = 0; k < bytes.Length; k++) { sb.Append(Convert.ToString(bytes[k], 2)); } sb.Clear(); sb.Append("10101011"); */ BigInteger nb = new BigInteger(bytes.Concat(new byte[] { 0 }).ToArray()); bytes_new = nb.ToByteArray(); for (int i = 0; i < bytes_new.Length; i++) { sb.Append(Convert.ToString(bytes_new, 2).PadLeft(8, '0')); } Console.WriteLine(sb); Console.WriteLine(); int[] one = new int[2000]; int[] zero = new int[2000]; int[] position = new int[2000]; int[] new_one = new int[2000]; int[] new_zero = new int[2000]; int[] new_position = new int[2000]; BigInteger[] n = new BigInteger[2000]; for (int i = 0; i < 2000; i++) { for (int j = 0; j < sb.Length; j++) { char c = sb[j]; if (c == '0') { zero++; } else { one++; } } //one = IteratedBitcount(number_compress); //zero = IteratedZerocount(number_compress); position = one + zero; BigInteger fac = Factorial(one); n = FindN(sb, one, position, fac); Console.WriteLine(sb); sb = ToB(n, sb); Console.WriteLine(position); Console.WriteLine(one); Console.WriteLine(zero); Console.WriteLine(n); Console.WriteLine(); //sb.Append(string.Join("",n.ToByteArray().Select(x => Convert.ToString(x, 2).PadLeft(8, '0')))); if (sb.ToString() == "1"  sb.ToString() == "11") { star = i; break; } } //Console.WriteLine(number_compress); //Console.WriteLine(); for (int i = 0; i < star; i++) { new_one = one  one[i+1]; new_zero = zero  zero[i + 1]; new_position = position  position[i + 1]; } BigInteger number_decompress = 0; for (int i = star; i >= 0; i) { BigInteger fac = Factorial(one); number_decompress = FindNumber(sb, one, position, fac); sb = ToB(number_decompress, sb); //sb.Append(string.Join("",number_decompress.ToByteArray().Select(x => Convert.ToString(x, 2).PadLeft(8, '0')))); Console.WriteLine(sb); Console.WriteLine(); } //Console.WriteLine(number_decompress); Console.WriteLine(); byte[] final = new byte[sb.Length / 8]; for (int i = sb.Length / 8  1; i >= 0; i) { final = Convert.ToByte(sb.ToString(i * 8, 8), 2); } BigInteger b = new BigInteger(final); bytes_new = b.ToByteArray(); //byte[] buffer = Encoding.ASCII.GetBytes(sb.ToString()); File.WriteAllBytes(@"E:\Data\new_small.png", bytes_new); } static StringBuilder ToB(BigInteger n, StringBuilder sb) { sb.Length = 0; sb.Capacity = 0; sb.Clear(); int num = 0; while (BigInteger.Pow(2, num) <= n) { num++; } for (int k = num  1; k >= 0; k) { BigInteger counter = BigInteger.Pow(2, k); if (n >= counter) { sb.Append("1"); n = n  counter; } else { sb.Append("0"); } } return sb; } static string ToBinaryString(BigInteger bigint) { var bytes = bigint.ToByteArray(); var idx = bytes.Length  1; // Create a StringBuilder having appropriate capacity. var base2 = new StringBuilder(bytes.Length * 8); // Convert first byte to binary. var binary = Convert.ToString(bytes[idx], 2); // Ensure leading zero exists if value is positive. if (binary[0] != '0' && bigint.Sign == 1) { base2.Append('0'); } // Append binary string to StringBuilder. base2.Append(binary); // Convert remaining bytes adding leading zeros. for (idx; idx >= 0; idx) { base2.Append(Convert.ToString(bytes[idx], 2).PadLeft(8, '0')); } return base2.ToString(); } static BigInteger FindNumber(StringBuilder sb, int one, int position, BigInteger fac) { BigInteger number = 0; BigInteger n = 0; int k = one; for (int l = 0; l < sb.Length; l++) { if (sb[l] == '1') { n = n + BigInteger.Pow(2, sb.Length  l  1); } } for (int j = position  1; j >= 0; j) { BigInteger mult = j; BigInteger num = 0; for (int i = 1; i < (int)k; i++) { mult *= (j  i); } num = (mult / fac); if (num <= n) { number = number + BigInteger.Pow(2, j); fac /= k; k; n = n  num; } //Console.WriteLine(k); if (k == 0) break; } return number; } static BigInteger FindN(StringBuilder sb, int one, int position, BigInteger fac) { /* BigInteger findMSB = number; BigInteger test = number; BigInteger MSB = 1; while (findMSB > 0) { MSB <<= 1; findMSB >>= 1; } MSB >>= 1; */ BigInteger n = 0; int k = one; int pos = position; for (int i = 0; i < sb.Length; i++) { if (sb == '1') { int a = pos  1; BigInteger mult = a; for (int j = 1; j < k; j++) { mult *= (BigInteger)(a  j); } n += (mult / fac); fac /= k; k; } //Console.WriteLine(k); if (k == 0) break; pos; } return n; } static int IteratedBitcount(BigInteger n) { BigInteger test = n; int count = 0; while (test != 0) { if ((test & 1) == 1) { count++; } test >>= 1; } return count; } static int IteratedZerocount(BigInteger n) { BigInteger test = n; int count = 0; while (test != 0) { if ((test & 1) == 0) { count++; } test >>= 1; } return count; } static BigInteger Factorial(int arg) { BigInteger value = 1; for (int index = 2; index <= arg; index++) { value *= index; } return value; } } } 



The problem appears to be that by successively calculating the rank of the rank down to the lowest value you've created a one way function where different inputs can produce the same result. In which case it's like adding a series of numbers and trying to calculate the original series from just the sum, impossible.




nooneinparticular wrote: The problem appears to be that by successively calculating the rank of the rank down to the lowest value you've created a one way function where different inputs can produce the same result. In which case it's like adding a series of numbers and trying to calculate the original series from just the sum, impossible. Pretty much this, except that it is possible to return if you store the entire series of bitcounts. So it's like you're turning a single number into a series of bitcounts followed by a single rank. And I highly doubt that there is a simple pattern you can derive here either. If there was, then you wouldn't need the polynomial equation in the first place. 



there seems to be a number of magicians on this thread


what the... why am i procrastinating again....


For a number of N digits that has only one zero in it, there are N places the zero can go. If there are two zeroes, the first zero takes up one place, so there are N  1 places the second zero can go. However, some of them are places you already counted when placing the first zero, so you have to account for this. That's as far as I got when I reached the point in the thread where the problem had been solved, and it became uninteresting to me. I don't know immediately why it's a factorial relationship, and I don't care enough to think about it further. 

