Android package volume optimization to remove debuginfo some thinking

Previously seen in the Redex library github.com/facebook/re… There are some optimizations in dex, one of which is to remove debuginfo from dex files.

Stats StripDebugInfo::run(IRCode& code, bool should_drop_synth) {
  Stats stats;
  ++stats.num_matches;
  bool debug_info_empty = true;
  bool force_discard = m_config.drop_all_dbg_info || should_drop_synth;

  for (auto it = code.begin(a); it ! = code.end();) {
    const auto& mie = *it;
    if (should_remove(mie, stats) || (force_discard && is_debug_entry(mie))) {
      // Even though force_discard will drop the debug item below, preventing
      // any of the debug entries for :meth to be output, we still want to
      // erase those entries here so that transformations like inlining won't
      // move these entries into a method that does have a debug item.
      it = code.erase(it);
    } else {
      switch (mie.type) {
      case MFLOW_DEBUG:
        // Any debug information op other than an end sequence means
        // we have debug info.
        if (mie.dbgop->opcode() != DBG_END_SEQUENCE) debug_info_empty = false;
        break;
      case MFLOW_POSITION:
        // Any line position entry means we have debug info.
        debug_info_empty = false;
        break;
      default:
        break; } ++it; }}Copy the code

Alipay’s classmates have also been introduced, juejin.cn/post/684490… However, I did not write the details clearly, so I did some thinking and recorded them.

1 what’s debuginfo

The debuginfo is actually a structure in the dex file.Debug_info_item located under code_item under class_data_item.Let’s open up debug_info, what’s in there?The first term is the state machine for the number of rows. The second term is the number of parameters to the method. The third item is the StringID corresponding to this method parameter. The fourth item starts with the state machine of the method’s debuginfo. So how do you calculate this? For example, let’s seeIn this case, the value is 14 which is 0E, reference codeAndroidxref.com/9.0.0_r3/xr…It is from this value that the corresponding address and line are calculated. The following DBG_END_SEQUENCE is the end status code.

  // Debug info opcodes and constants
  enum {
    DBG_END_SEQUENCE         = 0x00,
    DBG_ADVANCE_PC           = 0x01,
    DBG_ADVANCE_LINE         = 0x02,
    DBG_START_LOCAL          = 0x03,
    DBG_START_LOCAL_EXTENDED = 0x04,
    DBG_END_LOCAL            = 0x05,
    DBG_RESTART_LOCAL        = 0x06,
    DBG_SET_PROLOGUE_END     = 0x07,
    DBG_SET_EPILOGUE_BEGIN   = 0x08,
    DBG_SET_FILE             = 0x09,
    DBG_FIRST_SPECIAL        = 0x0a,
    DBG_LINE_BASE            = 4 -,
    DBG_LINE_RANGE           = 15};template<typename DexDebugNewPosition, typename IndexToStringData>
bool DexFile::DecodeDebugPositionInfo(const uint8_t* stream,
                                      IndexToStringData index_to_string_data,
                                      DexDebugNewPosition position_functor,
                                      void* context) {
  if (stream == nullptr) {
    return false;
  }

  PositionInfo entry = PositionInfo(a); entry.line_ =DecodeUnsignedLeb128(&stream);
  uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
  for (uint32_t i = 0; i < parameters_size; ++i) {
    DecodeUnsignedLeb128P1(&stream);  // Parameter name.
  }

  for (;;)  {
    uint8_t opcode = *stream++;
    switch (opcode) {
      case DBG_END_SEQUENCE:
        return true;  // end of stream.
      case DBG_ADVANCE_PC:
        entry.address_ += DecodeUnsignedLeb128(&stream);
        break;
      case DBG_ADVANCE_LINE:
        entry.line_ += DecodeSignedLeb128(&stream);
        break;
      case DBG_START_LOCAL:
        DecodeUnsignedLeb128(&stream);  // reg.
        DecodeUnsignedLeb128P1(&stream);  // name.
        DecodeUnsignedLeb128P1(&stream);  // descriptor.
        break;
      case DBG_START_LOCAL_EXTENDED:
        DecodeUnsignedLeb128(&stream);  // reg.
        DecodeUnsignedLeb128P1(&stream);  // name.
        DecodeUnsignedLeb128P1(&stream);  // descriptor.
        DecodeUnsignedLeb128P1(&stream);  // signature.
        break;
      case DBG_END_LOCAL:
      case DBG_RESTART_LOCAL:
        DecodeUnsignedLeb128(&stream);  // reg.
        break;
      case DBG_SET_PROLOGUE_END:
        entry.prologue_end_ = true;
        break;
      case DBG_SET_EPILOGUE_BEGIN:
        entry.epilogue_begin_ = true;
        break;
      case DBG_SET_FILE: {
        uint32_t name_idx = DecodeUnsignedLeb128P1(&stream);
        entry.source_file_ = index_to_string_data(name_idx);
        break;
      }
      default: {
        int adjopcode = opcode - DBG_FIRST_SPECIAL;
        entry.address_ += adjopcode / DBG_LINE_RANGE;
        entry.line_ += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
        if (position_functor(context, entry)) {
          return true;  // early exit.
        }
        entry.prologue_end_ = false;
        entry.epilogue_begin_ = false;
        break; }}}Copy the code

2 What does the debuginfo do

What does debugInfo do? We don’t keep in suspense. Debuginfo records information about the line number of the code, the stack that the code prints when our application crashes, and what we use when debugging. When we remove it, the printed stack looks like this

java.lang.Exception
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown)
at com.aa.concurrent.DefaultThreadFactory$RevisePriorityRunnable.run(Unknown)
at java.lang.Thread.run(Unknown)
Copy the code

This is what happens if you have debuginfo

java.lang.Exception
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1167)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:641)
at com.aa.concurrent.DefaultThreadFactory$RevisePriorityRunnable.run(DefaultThreadFactory.java:46)
at java.lang.Thread.run(Thread.java:919)
Copy the code

So how is it possible to find the corresponding line number with debuginfo? Combine the above code with LineNumForPcCb below. if

entry line entry address
0 0
1 4
2 8

If context->address_ = 8

  • Context ->line_num_ = 0; context->line_num_ = 0
  • Context ->line_num_ = 1; context->line_num_ = 1
  • Context ->line_num_ = 2; context->line_num_ = 2; context->line_num_ = 2

The line number that you end up with is the starting line number of this method plus 2.

bool DexFile::LineNumForPcCb(void* raw_context, const PositionInfo& entry) {
  LineNumFromPcContext* context = reinterpret_cast<LineNumFromPcContext*>(raw_context);

  // We know that this callback will be called in
  // ascending address order, so keep going until we find
  // a match or we've just gone past it.
  if (entry.address_ > context->address_) {
    // The line number from the previous positions callback
    // wil be the final result.
    return true;
  } else {
    context->line_num_ = entry.line_;
    return entry.address_ == context->address_;
  }
}
Copy the code

3 Industry-related debuginfo service optimization ideas

Reference before pay treasure big guy to writeJuejin. Cn/post / 684490…Obviously, the debuginfo information occupies a certain amount of volume in the dex. If we can delete it, we can reduce the package size, but if we delete it all, some exception stack information will be lost. In the alipay article

All of the above cases involve stack information, and in scenario 1, calling internal stackTrace objects in the throwable via reflection simply doesn't work and requires a different approach. The initial idea was to try hook Art virtual machine. I searched the source code every day to see the points that could hook, but finally I gave up. One was worried about compatibility problems, and the other was that there were too many hook points, which made me panic. Finally, I tried to modify the dex file directly and reserved a small debugItem so that when the system searched for the line number, the line number of the instruction set was consistent with the line number of the source file. In this way, there was no need to do anything. Any line number reported by monitoring was directly changed into the line number of the instruction set. It can be represented by the following diagram: Originally each method will have a debugInfoItem, each debugInfoItem has an instruction set line number and the mapping relationship between the source file line number, I made the modification is actually very simple, is to delete all the redundant debugInfoItem, Only one debugInfoItem is left, all methods point to the same debugInfoItem, and the instruction set line number in the debugInfoItem is the same as the source file line number, so that no matter what way to look up the line number, get the instruction set line number. In fact, it is not enough to leave a debugInfoItem. To be compatible with all virtual machine search methods, you need to partition the debugInfoItem, and the debugInfoItem table can not be too large. Encountered a pit is on androidO for dex2OAT optimization, will frequently traverse the debugInfoItem, resulting in AOT compilation is relatively slow, and finally through the debugInfoItem partition to solve. Source: author: mPaaS links: https://juejin.cn/post/6844903712201277448 the nuggets copyright owned by the author. Commercial reprint please contact the author for authorization, non-commercial reprint please indicate the source.Copy the code

I actually have some questions about this idea

  • (1) How to distinguish overloaded methods
  • (2) How to use methods with different parameters
  • (3) Can items with different instruction numbers be reused
  • (4) How to distinguish the system method from our own method

Do not know what train of thought is pay treasure here?

3.1 How to solve the above problems

My personal thought: since the most important thing for debuginfo is to get a line number, if we reuse an item in all methods, we need a very long number, such as 16 or 32 bits, to represent the line number. Bits 0-4 indicate overloaded methods, bits 4-7 indicate the number of arguments, and so on. It turns out to be a very large number. Given this number, we decrypt it, and finally we need to decrypt the number of lines based on the mapping file generated during compilation.

3.2 Easy dex operation library.

Recommend this library, for dex file operation, github.com/JesusFreke/… You can refer to it

private int writeDebugItem(@Nonnull DexDataWriter writer,
                               @Nonnull DebugWriter<StringKey, TypeKey> debugWriter,
                               @Nullable Iterable<? extends StringKey> parameterNames,
                               @Nullable Iterable<? extends DebugItem> debugItems) throws IOException {
        int parameterCount = 0;
        int lastNamedParameterIndex = -1;
        if(parameterNames ! =null) {
            parameterCount = Iterables.size(parameterNames);
            int index = 0;
            for (StringKey parameterName: parameterNames) {
                if(parameterName ! =null) { lastNamedParameterIndex = index; } index++; }}if (lastNamedParameterIndex == -1 && (debugItems == null || Iterables.isEmpty(debugItems))) {
            return NO_OFFSET;
        }

        numDebugInfoItems++;

        int debugItemOffset = writer.getPosition();
        int startingLineNumber = 0;

        if(debugItems ! =null) {
            for (org.jf.dexlib2.iface.debug.DebugItem debugItem: debugItems) {
                if (debugItem instanceof LineNumber) {
                    startingLineNumber = ((LineNumber)debugItem).getLineNumber();
                    break;
                }
            }
        }
        writer.writeUleb128(startingLineNumber);

        writer.writeUleb128(parameterCount);
        if(parameterNames ! =null) {
            int index = 0;
            for (StringKey parameterName: parameterNames) {
                if (index == parameterCount) {
                    break;
                }
                index++;
                writer.writeUleb128(stringSection.getNullableItemIndex(parameterName) + 1); }}if(debugItems ! =null) {
            debugWriter.reset(startingLineNumber);

            for(DebugItem debugItem: debugItems) { classSection.writeDebugItem(debugWriter, debugItem); }}// write an END_SEQUENCE opcode, to end the debug item
        writer.write(0);

        return debugItemOffset;
    }
Copy the code

Thanks for reading.