gdb 反汇编disas源码排列问题

发布时间 2023-07-29 17:57:08作者: tsecer

问题

在开发过程中,可能需要查看cpp文件生成的汇编代码来确认一些问题。由于单纯的汇编代码看起来并不太容易捋清楚内部逻辑,所以最好能够把源代码的位置列出来。在gdb的早期版本中,这个功能是通过disas命令的/m修饰符(选项)来实现的。

如果使用过这个选项就会发现,这个功能显示的结果使人非常困惑。正如gdb自己内置帮助文档对该选项的吐槽:

With a /m modifier, source lines are included (if available).
This view is "source centric": the output is in source line order,
regardless of any optimization that is present.  Only the main source file
is displayed, not those of, e.g., any inlined functions.
This modifier hasn't proved useful in practice and is deprecated
in favor of /s.

gdb该功能实现

这个实现其实比较直观,debug信息中包含了linetable_entry结构,这个结构包含了某一行对应的内存地址。gdb读取到[行号,地址]这种pair之后,按照行号进行排序。

这里面还有一个细节,就是如果行号相同,则会丢弃掉后添加的内容:

      if (le[i].line == le[i + 1].line && le[i].pc == le[i + 1].pc)
	continue;		/* Ignore duplicates.  */

完整的函数实现:

/* Each item represents a line-->pc (or the reverse) mapping.  This is
   somewhat more wasteful of space than one might wish, but since only
   the files which are actually debugged are read in to core, we don't
   waste much space.  */

struct linetable_entry
{
  /* The line number for this entry.  */
  int line;

  /* True if this PC is a good location to place a breakpoint for LINE.  */
  unsigned is_stmt : 1;

  /* The address for this entry.  */
  CORE_ADDR pc;
};

///@file: gdb-10.1\gdb\disasm.c
/* The idea here is to present a source-O-centric view of a
   function to the user.  This means that things are presented
   in source order, with (possibly) out of order assembly
   immediately following.

   N.B. This view is deprecated.  */

static void
do_mixed_source_and_assembly_deprecated
  (struct gdbarch *gdbarch, struct ui_out *uiout,
   struct symtab *symtab,
   CORE_ADDR low, CORE_ADDR high,
   int how_many, gdb_disassembly_flags flags)
{
  int newlines = 0;
  int nlines;
  struct linetable_entry *le;
  struct deprecated_dis_line_entry *mle;
  struct symtab_and_line sal;
  int i;
  int out_of_order = 0;
  int next_line = 0;
  int num_displayed = 0;
  print_source_lines_flags psl_flags = 0;

  gdb_assert (symtab != NULL && SYMTAB_LINETABLE (symtab) != NULL);

  nlines = SYMTAB_LINETABLE (symtab)->nitems;
  le = SYMTAB_LINETABLE (symtab)->item;

  if (flags & DISASSEMBLY_FILENAME)
    psl_flags |= PRINT_SOURCE_LINES_FILENAME;

  mle = (struct deprecated_dis_line_entry *)
    alloca (nlines * sizeof (struct deprecated_dis_line_entry));

  /* Copy linetable entries for this function into our data
     structure, creating end_pc's and setting out_of_order as
     appropriate.  */

  /* First, skip all the preceding functions.  */

  for (i = 0; i < nlines - 1 && le[i].pc < low; i++);

  /* Now, copy all entries before the end of this function.  */

  for (; i < nlines - 1 && le[i].pc < high; i++)
    {
      if (le[i].line == le[i + 1].line && le[i].pc == le[i + 1].pc)
	continue;		/* Ignore duplicates.  */

      /* Skip any end-of-function markers.  */
      if (le[i].line == 0)
	continue;

      mle[newlines].line = le[i].line;
      if (le[i].line > le[i + 1].line)
	out_of_order = 1;
      mle[newlines].start_pc = le[i].pc;
      mle[newlines].end_pc = le[i + 1].pc;
      newlines++;
    }

  /* If we're on the last line, and it's part of the function,
     then we need to get the end pc in a special way.  */

  if (i == nlines - 1 && le[i].pc < high)
    {
      mle[newlines].line = le[i].line;
      mle[newlines].start_pc = le[i].pc;
      sal = find_pc_line (le[i].pc, 0);
      mle[newlines].end_pc = sal.end;
      newlines++;
    }

  /* Now, sort mle by line #s (and, then by addresses within lines).  */

  if (out_of_order)
    std::sort (mle, mle + newlines, line_is_less_than);

  /* Now, for each line entry, emit the specified lines (unless
     they have been emitted before), followed by the assembly code
     for that line.  */

  ui_out_emit_list asm_insns_list (uiout, "asm_insns");

  gdb::optional<ui_out_emit_tuple> outer_tuple_emitter;
  gdb::optional<ui_out_emit_list> inner_list_emitter;

  for (i = 0; i < newlines; i++)
    {
      /* Print out everything from next_line to the current line.  */
      if (mle[i].line >= next_line)
	{
	  if (next_line != 0)
	    {
	      /* Just one line to print.  */
	      if (next_line == mle[i].line)
		{
		  outer_tuple_emitter.emplace (uiout, "src_and_asm_line");
		  print_source_lines (symtab, next_line, mle[i].line + 1, psl_flags);
		}
	      else
		{
		  /* Several source lines w/o asm instructions associated.  */
		  for (; next_line < mle[i].line; next_line++)
		    {
		      ui_out_emit_tuple tuple_emitter (uiout,
						       "src_and_asm_line");
		      print_source_lines (symtab, next_line, next_line + 1,
					  psl_flags);
		      ui_out_emit_list temp_list_emitter (uiout,
							  "line_asm_insn");
		    }
		  /* Print the last line and leave list open for
		     asm instructions to be added.  */
		  outer_tuple_emitter.emplace (uiout, "src_and_asm_line");
		  print_source_lines (symtab, next_line, mle[i].line + 1, psl_flags);
		}
	    }
	  else
	    {
	      outer_tuple_emitter.emplace (uiout, "src_and_asm_line");
	      print_source_lines (symtab, mle[i].line, mle[i].line + 1, psl_flags);
	    }

	  next_line = mle[i].line + 1;
	  inner_list_emitter.emplace (uiout, "line_asm_insn");
	}

      num_displayed += dump_insns (gdbarch, uiout,
				   mle[i].start_pc, mle[i].end_pc,
				   how_many, flags, NULL);

      /* When we've reached the end of the mle array, or we've seen the last
         assembly range for this source line, close out the list/tuple.  */
      if (i == (newlines - 1) || mle[i + 1].line > mle[i].line)
	{
	  inner_list_emitter.reset ();
	  outer_tuple_emitter.reset ();
	  uiout->text ("\n");
	}
      if (how_many >= 0 && num_displayed >= how_many)
	break;
    }
}

gcc生成的line信息

  • 行号信息
tsecer@harry: cat main.cpp 
#include <string.h>
#include <stdio.h>

struct tsecer
{
    tsecer()
    {
        memset(arr, 0, sizeof(0));
    }

    char arr[100];
};


int main(int argc, const char *argv[])
{
    for (int i = 0; i < argc; i++)
    {
        tsecer t;
        printf("hello world\n");
    }

    return 0;
}
tsecer@harry: g++ -g main.cpp 
tsecer@harry: readelf --debug-dump a.out                           
.debug_aranges 节的内容:

 行号语句:
  [0x000000b1]  扩充操作码 2: 设置地址为 0x4005c8
  [0x000000bc]  Special opcode 10: advance Address by 0 to 0x4005c8 and Line by 5 to 6
  [0x000000bd]  Special opcode 175: advance Address by 12 to 0x4005d4 and Line by 2 to 8
  [0x000000be]  Advance PC by constant 17 to 0x4005e5
  [0x000000bf]  Special opcode 76: advance Address by 5 to 0x4005ea and Line by 1 to 9
  [0x000000c0]  Advance PC by 3 to 0x4005ed
  [0x000000c2]  扩充操作码 1: 序列结束

  [0x000000c5]  扩充操作码 2: 设置地址为 0x400587
  [0x000000d0]  Advance Line by 15 to 16
  [0x000000d2]  复制
  [0x000000d3]  Special opcode 216: advance Address by 15 to 0x400596 and Line by 1 to 17
  [0x000000d4]  扩充操作码 4: set Discriminator to 1
  [0x000000d8]  将 is_stmt 设定为 0
  [0x000000d9]  Special opcode 103: advance Address by 7 to 0x40059d and Line by 0 to 17
  [0x000000da]  将 is_stmt 设定为 1
  [0x000000db]  Special opcode 119: advance Address by 8 to 0x4005a5 and Line by 2 to 19
  [0x000000dc]  Special opcode 174: advance Address by 12 to 0x4005b1 and Line by 1 to 20
  [0x000000dd]  Special opcode 142: advance Address by 10 to 0x4005bb and Line by -3 to 17
  [0x000000de]  Special opcode 95: advance Address by 6 to 0x4005c1 and Line by 6 to 23
  [0x000000df]  Special opcode 76: advance Address by 5 to 0x4005c6 and Line by 1 to 24
  [0x000000e0]  Advance PC by 2 to 0x4005c8
  [0x000000e2]  扩充操作码 1: 序列结束

对于典型的循环来说,它生成的机器指令并不是连续的。这个信息在debug中也有体现。注意下面三行中中间一行有一个行号减去3行的操作,这是由于源代码中for循环生成机器指令中有一个循环体最后的前向jump指令。

  [0x000000dc]  Special opcode 174: advance Address by 12 to 0x4005b1 and Line by 1 to 20
  [0x000000dd]  Special opcode 142: advance Address by 10 to 0x4005bb and Line by -3 to 17
  [0x000000de]  Special opcode 95: advance Address by 6 to 0x4005c1 and Line by 6 to 23

编译器优化

前面还只是在没有开启优化的时候看到的内容,如果开启了优化,这个输出就更加混乱了。

因为重复的行号会被丢弃,它们会被计算为不属于它的源代码,进而导致每个源代码行号对应的机器指令数量出现错乱。