HalfFloatUtils.cs 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. using System.IO;
  2. internal class HalfFloatUtils
  3. {
  4. private static MemoryStream buffer = new MemoryStream(4);
  5. private static BinaryReader bufferReader = new BinaryReader(buffer);
  6. private static BinaryWriter bufferWriter = new BinaryWriter(buffer);
  7. private static uint[] baseTable = new uint[512];
  8. private static uint[] shiftTable = new uint[512];
  9. private static uint[] mantissaTable = new uint[2048];
  10. private static uint[] exponentTable = new uint[64];
  11. private static uint[] offsetTable = new uint[64];
  12. static HalfFloatUtils()
  13. {
  14. for (int i = 0; i < 256; ++i)
  15. {
  16. int e = i - 127;
  17. // very small number (0, -0)
  18. if (e < -27)
  19. {
  20. baseTable[i | 0x000] = 0x0000;
  21. baseTable[i | 0x100] = 0x8000;
  22. shiftTable[i | 0x000] = 24;
  23. shiftTable[i | 0x100] = 24;
  24. // small number (denorm)
  25. }
  26. else if (e < -14)
  27. {
  28. baseTable[i | 0x000] = (uint)(0x0400 >> (-e - 14));
  29. baseTable[i | 0x100] = (uint)((0x0400 >> (-e - 14)) | 0x8000);
  30. shiftTable[i | 0x000] = (uint)(-e - 1);
  31. shiftTable[i | 0x100] = (uint)(-e - 1);
  32. // normal number
  33. }
  34. else if (e <= 15)
  35. {
  36. baseTable[i | 0x000] = (uint)((e + 15) << 10);
  37. baseTable[i | 0x100] = (uint)(((e + 15) << 10) | 0x8000);
  38. shiftTable[i | 0x000] = 13;
  39. shiftTable[i | 0x100] = 13;
  40. // large number (Infinity, -Infinity)
  41. }
  42. else if (e < 128)
  43. {
  44. baseTable[i | 0x000] = 0x7c00;
  45. baseTable[i | 0x100] = 0xfc00;
  46. shiftTable[i | 0x000] = 24;
  47. shiftTable[i | 0x100] = 24;
  48. // stay (NaN, Infinity, -Infinity)
  49. }
  50. else
  51. {
  52. baseTable[i | 0x000] = 0x7c00;
  53. baseTable[i | 0x100] = 0xfc00;
  54. shiftTable[i | 0x000] = 13;
  55. shiftTable[i | 0x100] = 13;
  56. }
  57. }
  58. mantissaTable[0] = 0;
  59. for (int i = 1; i < 1024; ++i)
  60. {
  61. int m = i << 13; // zero pad mantissa bits
  62. int e = 0; // zero exponent
  63. // normalized
  64. while ((m & 0x00800000) == 0)
  65. {
  66. e -= 0x00800000; // decrement exponent
  67. m <<= 1;
  68. }
  69. m &= ~0x00800000; // clear leading 1 bit
  70. e += 0x38800000; // adjust bias
  71. mantissaTable[i] = (uint)(m | e);
  72. }
  73. for (int i = 1024; i < 2048; ++i)
  74. {
  75. mantissaTable[i] = (uint)(0x38000000 + ((i - 1024) << 13));
  76. }
  77. exponentTable[0] = 0;
  78. for (int i = 1; i < 31; ++i)
  79. {
  80. exponentTable[i] = (uint)(i << 23);
  81. }
  82. exponentTable[31] = 0x47800000;
  83. exponentTable[32] = 0x80000000;
  84. for (int i = 33; i < 63; ++i)
  85. {
  86. exponentTable[i] = (uint)(0x80000000 + ((i - 32) << 23));
  87. }
  88. exponentTable[63] = 0xc7800000;
  89. offsetTable[0] = 0;
  90. for (int i = 1; i < 64; ++i)
  91. {
  92. if (i == 32)
  93. {
  94. offsetTable[i] = 0;
  95. }
  96. else
  97. {
  98. offsetTable[i] = 1024;
  99. }
  100. }
  101. }
  102. /**
  103. * round a number to a half float number bits.
  104. * @param {number} num
  105. */
  106. public static ushort roundToFloat16Bits(float num)
  107. {
  108. buffer.Position = 0;
  109. bufferWriter.Write(num);
  110. buffer.Position = 0;
  111. uint f = bufferReader.ReadUInt32();
  112. uint e = (f >> 23) & 0x1ff;
  113. return (ushort)(baseTable[e] + ((f & 0x007fffff) >> (int)shiftTable[e]));
  114. }
  115. /**
  116. * convert a half float number bits to a number.
  117. * @param {number} float16bits - half float number bits
  118. */
  119. private static float convertToNumber(uint float16bits)
  120. {
  121. uint m = float16bits >> 10;
  122. buffer.Position = 0;
  123. bufferWriter.Write(mantissaTable[offsetTable[m] + (float16bits & 0x3ff)] + exponentTable[m]);
  124. buffer.Position = 0;
  125. return bufferReader.ReadSingle();
  126. }
  127. }