cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/cmdline.h>
18 #include <util/expr_initializer.h>
19 #include <util/magic.h>
20 #include <util/pointer_expr.h>
21 #include <util/run.h>
22 #include <util/tempfile.h>
23 
24 #include <json/json_parser.h>
25 
27 
30 
31 #include "compile.h"
32 
34 {
35  if(!cmdline.isset('T'))
36  return 0;
37 
38  auto original_goto_model =
40 
41  if(!original_goto_model.has_value())
42  {
43  log.error() << "Unable to read goto binary for linker script merging"
44  << messaget::eom;
45  return 1;
46  }
47 
48  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
49  std::list<irep_idt> linker_defined_symbols;
50  int fail = get_linker_script_data(
51  linker_defined_symbols,
52  original_goto_model->symbol_table,
53  elf_binary,
54  linker_def_outfile());
55  // ignore linker script parsing failures until the code is tested more widely
56  if(fail!=0)
57  return 0;
58 
59  jsont data;
60  fail = parse_json(linker_def_outfile(), log.get_message_handler(), data);
61  if(fail!=0)
62  {
63  log.error() << "Problem parsing linker script JSON data" << messaget::eom;
64  return fail;
65  }
66 
68  if(fail!=0)
69  {
70  log.error() << "Malformed linker-script JSON document" << messaget::eom;
71  data.output(log.error());
72  return fail;
73  }
74 
75  fail=1;
76  linker_valuest linker_values;
77  const auto &pair =
78  original_goto_model->goto_functions.function_map.find(INITIALIZE_FUNCTION);
79  if(pair == original_goto_model->goto_functions.function_map.end())
80  {
81  log.error() << "No " << INITIALIZE_FUNCTION << " found in goto_functions"
82  << messaget::eom;
83  return fail;
84  }
85  fail = ls_data2instructions(
86  data,
87  cmdline.get_value('T'),
88  pair->second.body,
89  original_goto_model->symbol_table,
90  linker_values);
91  if(fail!=0)
92  {
93  log.error() << "Could not add linkerscript defs to " INITIALIZE_FUNCTION
94  << messaget::eom;
95  return fail;
96  }
97 
98  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
99  if(fail!=0)
100  return fail;
101 
102  // The keys of linker_values are exactly the elements of
103  // linker_defined_symbols, so iterate over linker_values from now on.
104 
105  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
106 
107  if(fail!=0)
108  {
109  log.error() << "Could not pointerize all linker-defined expressions"
110  << messaget::eom;
111  return fail;
112  }
113 
115  goto_binary,
116  *original_goto_model,
117  cmdline.isset("validate-goto-model"),
119 
120  if(fail!=0)
121  {
122  log.error() << "Could not write linkerscript-augmented binary"
123  << messaget::eom;
124  }
125 
126  return fail;
127 }
128 
130  const std::string &elf_binary,
131  const std::string &goto_binary,
132  const cmdlinet &cmdline,
133  message_handlert &message_handler)
134  : elf_binary(elf_binary),
135  goto_binary(goto_binary),
136  cmdline(cmdline),
137  log(message_handler),
138  replacement_predicates(
140  "address of array's first member",
141  [](const exprt &expr) -> const symbol_exprt & {
142  return to_symbol_expr(
143  to_index_expr(to_address_of_expr(expr).object()).index());
144  },
145  [](const exprt &expr) {
146  return expr.id() == ID_address_of &&
147  expr.type().id() == ID_pointer &&
148 
149  to_address_of_expr(expr).object().id() == ID_index &&
150  to_address_of_expr(expr).object().type().id() ==
151  ID_unsignedbv &&
152 
153  to_index_expr(to_address_of_expr(expr).object())
154  .array()
155  .id() == ID_symbol &&
156  to_index_expr(to_address_of_expr(expr).object())
157  .array()
158  .type()
159  .id() == ID_array &&
160 
161  to_index_expr(to_address_of_expr(expr).object())
162  .index()
163  .id() == ID_constant &&
164  to_index_expr(to_address_of_expr(expr).object())
165  .index()
166  .type()
167  .id() == ID_signedbv;
168  }),
170  "address of array",
171  [](const exprt &expr) -> const symbol_exprt & {
172  return to_symbol_expr(to_address_of_expr(expr).object());
173  },
174  [](const exprt &expr) {
175  return expr.id() == ID_address_of &&
176  expr.type().id() == ID_pointer &&
177 
178  to_address_of_expr(expr).object().id() == ID_symbol &&
179  to_address_of_expr(expr).object().type().id() == ID_array;
180  }),
182  "address of struct",
183  [](const exprt &expr) -> const symbol_exprt & {
184  return to_symbol_expr(to_address_of_expr(expr).object());
185  },
186  [](const exprt &expr) {
187  return expr.id() == ID_address_of &&
188  expr.type().id() == ID_pointer &&
189 
190  to_address_of_expr(expr).object().id() == ID_symbol &&
191  (to_address_of_expr(expr).object().type().id() == ID_struct ||
192  to_address_of_expr(expr).object().type().id() ==
193  ID_struct_tag);
194  }),
196  "array variable",
197  [](const exprt &expr) -> const symbol_exprt & {
198  return to_symbol_expr(expr);
199  },
200  [](const exprt &expr) {
201  return expr.id() == ID_symbol && expr.type().id() == ID_array;
202  }),
204  "pointer (does not need pointerizing)",
205  [](const exprt &expr) -> const symbol_exprt & {
206  return to_symbol_expr(expr);
207  },
208  [](const exprt &expr) {
209  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
210  })})
211 {}
212 
214  goto_modelt &goto_model,
215  const linker_valuest &linker_values)
216 {
217  const namespacet ns(goto_model.symbol_table);
218 
219  int ret=0;
220  // First, pointerize the actual linker-defined symbols
221  for(const auto &pair : linker_values)
222  {
223  const auto maybe_symbol = goto_model.symbol_table.get_writeable(pair.first);
224  if(!maybe_symbol)
225  continue;
226  symbolt &entry=*maybe_symbol;
227  entry.type=pointer_type(char_type());
228  entry.is_extern=false;
229  entry.value=pair.second.second;
230  }
231 
232  // Next, find all occurrences of linker-defined symbols that are _values_
233  // of some symbol in the symbol table, and pointerize them too
234  for(const auto &pair : goto_model.symbol_table.symbols)
235  {
236  std::list<symbol_exprt> to_pointerize;
237  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
238 
239  if(to_pointerize.empty())
240  continue;
241  log.debug() << "Pointerizing the symbol-table value of symbol "
242  << pair.first << messaget::eom;
243  int fail = pointerize_subexprs_of(
244  goto_model.symbol_table.get_writeable_ref(pair.first).value,
245  to_pointerize,
246  linker_values);
247  if(to_pointerize.empty() && fail==0)
248  continue;
249  ret=1;
250  for(const auto &sym : to_pointerize)
251  {
252  log.error() << " Could not pointerize '" << sym.get_identifier()
253  << "' in symbol table entry " << pair.first << ". Pretty:\n"
254  << sym.pretty() << "\n";
255  }
256  log.error() << messaget::eom;
257  }
258 
259  // Finally, pointerize all occurrences of linker-defined symbols in the
260  // goto program
261  for(auto &gf : goto_model.goto_functions.function_map)
262  {
263  goto_programt &program=gf.second.body;
264  for(auto &instruction : program.instructions)
265  {
266  for(exprt *insts : std::list<exprt *>(
267  {&(instruction.code_nonconst()), &(instruction.guard)}))
268  {
269  std::list<symbol_exprt> to_pointerize;
270  symbols_to_pointerize(linker_values, *insts, to_pointerize);
271  if(to_pointerize.empty())
272  continue;
273  log.debug() << "Pointerizing a program expression..." << messaget::eom;
274  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
275  if(to_pointerize.empty() && fail==0)
276  continue;
277  ret=1;
278  for(const auto &sym : to_pointerize)
279  {
280  log.error() << " Could not pointerize '" << sym.get_identifier()
281  << "' in function " << gf.first << ". Pretty:\n"
282  << sym.pretty() << "\n";
283  log.error().source_location = instruction.source_location;
284  }
285  log.error() << messaget::eom;
286  }
287  }
288  }
289  return ret;
290 }
291 
293  exprt &old_expr,
294  const linker_valuest &linker_values,
295  const symbol_exprt &old_symbol,
296  const irep_idt &ident,
297  const std::string &shape)
298 {
299  auto it=linker_values.find(ident);
300  if(it==linker_values.end())
301  {
302  log.error() << "Could not find a new expression for linker script-defined "
303  << "symbol '" << ident << "'" << messaget::eom;
304  return 1;
305  }
306  symbol_exprt new_expr=it->second.first;
307  new_expr.add_source_location()=old_symbol.source_location();
308  log.debug() << "Pointerizing linker-defined symbol '" << ident
309  << "' of shape <" << shape << ">." << messaget::eom;
310  old_expr=new_expr;
311  return 0;
312 }
313 
315  exprt &expr,
316  std::list<symbol_exprt> &to_pointerize,
317  const linker_valuest &linker_values)
318 {
319  int fail=0, tmp=0;
320  for(auto const &pair : linker_values)
321  for(auto const &pattern : replacement_predicates)
322  {
323  if(!pattern.match(expr))
324  continue;
325  // take a copy, expr will be changed below
326  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
327  if(pair.first!=inner_symbol.get_identifier())
328  continue;
329  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
330  pattern.description());
331  fail=tmp?tmp:fail;
332  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
333  inner_symbol);
334  if(result==to_pointerize.end())
335  {
336  fail=1;
337  log.error() << "Too many removals of '" << inner_symbol.get_identifier()
338  << "'" << messaget::eom;
339  }
340  else
341  to_pointerize.erase(result);
342  // If we get here, we've just pointerized this expression. That expression
343  // will be a symbol possibly wrapped in some unary junk, but won't contain
344  // other symbols as subexpressions that might need to be pointerized. So
345  // it's safe to bail out here.
346  return fail;
347  }
348 
349  for(auto &op : expr.operands())
350  {
351  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
352  fail=tmp?tmp:fail;
353  }
354  return fail;
355 }
356 
358  const linker_valuest &linker_values,
359  const exprt &expr,
360  std::list<symbol_exprt> &to_pointerize) const
361 {
362  for(const auto &op : expr.operands())
363  {
364  if(op.id()!=ID_symbol)
365  continue;
366  const symbol_exprt &sym_exp=to_symbol_expr(op);
367  const auto &pair=linker_values.find(sym_exp.get_identifier());
368  if(pair!=linker_values.end())
369  to_pointerize.push_back(sym_exp);
370  }
371  for(const auto &op : expr.operands())
372  symbols_to_pointerize(linker_values, op, to_pointerize);
373 }
374 
375 #if 0
376 The current implementation of this function is less precise than the
377  commented-out version below. To understand the difference between these
378  implementations, consider the following example:
379 
380 Suppose we have a section in the linker script, 100 bytes long, where the
381 address of the symbol sec_start is the start of the section (value 4096) and the
382 address of sec_end is the end of that section (value 4196).
383 
384 The current implementation synthesizes the goto-version of the following C:
385 
386  char __sec_array[100];
387  char *sec_start=(&__sec_array[0]);
388  char *sec_end=((&__sec_array[0])+100);
389  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
390  // by __sec_array, not the last element of __sec_array.
391 
392 This is imprecise for the following reason: the actual address of the array and
393 the pointers shall be some random CBMC-internal address, instead of being 4096
394 and 4196. The linker script, on the other hand, would have specified the exact
395 position of the section, and we even know what the actual values of sec_start
396 and sec_end are from the object file (these values are in the `addresses` list
397 of the `data` argument to this function). If the correctness of the code depends
398 on these actual values, then CBMCs model of the code is too imprecise to verify
399 this.
400 
401 The commented-out version of this function below synthesizes the following:
402 
403  char *sec_start=4096;
404  char *sec_end=4196;
405  __CPROVER_allocated_memory(4096, 100);
406 
407 This code has both the actual addresses of the start and end of the section and
408 tells CBMC that the intermediate region is valid. However, the allocated_memory
409 macro does not currently allocate an actual object at the address 4096, so
410 symbolic execution can fail. In particular, the 'allocated memory' is part of
411 __CPROVER_memory, which does not have a bounded size; this means that (for
412 example) calls to memcpy or memset fail, because the first and third arguments
413 do not have know n size. The commented-out implementation should be reinstated
414 once this limitation of __CPROVER_allocated_memory has been fixed.
415 
416 In either case, no other changes to the rest of the code (outside this function)
417 should be necessary. The rest of this file converts expressions containing the
418 linker-defined symbol into pointer types if they were not already, and this is
419 the right behaviour for both implementations.
420 #endif
422  jsont &data,
423  const std::string &linker_script,
424  goto_programt &gp,
425  symbol_tablet &symbol_table,
426  linker_valuest &linker_values)
427 #if 1
428 {
429  goto_programt::instructionst initialize_instructions=gp.instructions;
430  std::map<irep_idt, std::size_t> truncated_symbols;
431  for(auto &d : to_json_array(data["regions"]))
432  {
433  bool has_end=d["has-end-symbol"].is_true();
434 
435  std::ostringstream array_name;
436  array_name << CPROVER_PREFIX << "linkerscript-array_"
437  << d["start-symbol"].value;
438  if(has_end)
439  array_name << "-" << d["end-symbol"].value;
440 
441 
442  // Array symbol_exprt
443  mp_integer array_size = string2integer(d["size"].value);
444  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
445  {
446  log.warning() << "Object section '" << d["section"].value << "' of size "
447  << array_size << " is too large to model. Truncating to "
448  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << messaget::eom;
449  array_size=MAX_FLATTENED_ARRAY_SIZE;
450  if(!has_end)
451  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
452  }
453 
454  constant_exprt array_size_expr=from_integer(array_size, size_type());
455  array_typet array_type(char_type(), array_size_expr);
456  symbol_exprt array_expr(array_name.str(), array_type);
457  source_locationt array_loc;
458 
459  array_loc.set_file(linker_script);
460  std::ostringstream array_comment;
461  array_comment << "Object section '" << d["section"].value << "' of size "
462  << array_size << " bytes";
463  array_loc.set_comment(array_comment.str());
464  array_expr.add_source_location()=array_loc;
465 
466  // Array start address
467  index_exprt zero_idx(array_expr, from_integer(0, size_type()));
468  address_of_exprt array_start(zero_idx);
469 
470  // Linker-defined symbol_exprt pointing to start address
471  symbol_exprt start_sym(d["start-symbol"].value, pointer_type(char_type()));
472  linker_values.emplace(
473  d["start-symbol"].value, std::make_pair(start_sym, array_start));
474 
475  // Since the value of the pointer will be a random CBMC address, write a
476  // note about the real address in the object file
477  auto it = std::find_if(
478  to_json_array(data["addresses"]).begin(),
479  to_json_array(data["addresses"]).end(),
480  [&d](const jsont &add) {
481  return add["sym"].value == d["start-symbol"].value;
482  });
483  if(it == to_json_array(data["addresses"]).end())
484  {
485  log.error() << "Start: Could not find address corresponding to symbol '"
486  << d["start-symbol"].value << "' (start of section)"
487  << messaget::eom;
488  return 1;
489  }
490  source_locationt start_loc;
491  start_loc.set_file(linker_script);
492  std::ostringstream start_comment;
493  start_comment << "Pointer to beginning of object section '"
494  << d["section"].value << "'. Original address in object file"
495  << " is " << (*it)["val"].value;
496  start_loc.set_comment(start_comment.str());
497  start_sym.add_source_location()=start_loc;
498 
499  // Instruction for start-address pointer in __CPROVER_initialize
500  code_assignt start_assign(start_sym, array_start);
501  start_assign.add_source_location()=start_loc;
502  goto_programt::instructiont start_assign_i =
503  goto_programt::make_assignment(start_assign, start_loc);
504  initialize_instructions.push_front(start_assign_i);
505 
506  if(has_end) // Same for pointer to end of array
507  {
508  plus_exprt array_end(array_start, array_size_expr);
509 
510  symbol_exprt end_sym(d["end-symbol"].value, pointer_type(char_type()));
511  linker_values.emplace(
512  d["end-symbol"].value, std::make_pair(end_sym, array_end));
513 
514  auto entry = std::find_if(
515  to_json_array(data["addresses"]).begin(),
516  to_json_array(data["addresses"]).end(),
517  [&d](const jsont &add) {
518  return add["sym"].value == d["end-symbol"].value;
519  });
520  if(entry == to_json_array(data["addresses"]).end())
521  {
522  log.debug() << "Could not find address corresponding to symbol '"
523  << d["end-symbol"].value << "' (end of section)"
524  << messaget::eom;
525  return 1;
526  }
527  source_locationt end_loc;
528  end_loc.set_file(linker_script);
529  std::ostringstream end_comment;
530  end_comment << "Pointer to end of object section '" << d["section"].value
531  << "'. Original address in object file"
532  << " is " << (*entry)["val"].value;
533  end_loc.set_comment(end_comment.str());
534  end_sym.add_source_location()=end_loc;
535 
536  code_assignt end_assign(end_sym, array_end);
537  end_assign.add_source_location()=end_loc;
538  goto_programt::instructiont end_assign_i =
539  goto_programt::make_assignment(end_assign, end_loc);
540  initialize_instructions.push_front(end_assign_i);
541  }
542 
543  // Add the array to the symbol table. We don't add the pointer(s) to the
544  // symbol table because they were already there, but declared as extern and
545  // probably with a different type. We change the externness and type in
546  // pointerize_linker_defined_symbols.
547  symbolt array_sym;
548  array_sym.name=array_name.str();
549  array_sym.pretty_name=array_name.str();
550  array_sym.is_lvalue=array_sym.is_static_lifetime=true;
551  array_sym.type=array_type;
552  array_sym.location=array_loc;
553  symbol_table.add(array_sym);
554 
555  // Push the array initialization to the front now, so that it happens before
556  // the initialization of the symbols that point to it.
557  namespacet ns(symbol_table);
558  const auto zi = zero_initializer(array_type, array_loc, ns);
559  CHECK_RETURN(zi.has_value());
560  code_assignt array_assign(array_expr, *zi);
561  array_assign.add_source_location()=array_loc;
562  goto_programt::instructiont array_assign_i =
563  goto_programt::make_assignment(array_assign, array_loc);
564  initialize_instructions.push_front(array_assign_i);
565  }
566 
567  // We've added every linker-defined symbol that marks the start or end of a
568  // region. But there might be other linker-defined symbols with some random
569  // address. These will have been declared extern too, so we need to give them
570  // a value also. Here, we give them the actual value that they have in the
571  // object file, since we're not assigning any object to them.
572  for(const auto &d : to_json_array(data["addresses"]))
573  {
574  auto it=linker_values.find(irep_idt(d["sym"].value));
575  if(it!=linker_values.end())
576  // sym marks the start or end of a region; already dealt with.
577  continue;
578 
579  // Perhaps this is a size symbol for a section whose size we truncated
580  // earlier.
581  irep_idt symbol_value;
582  const auto &pair=truncated_symbols.find(d["sym"].value);
583  if(pair==truncated_symbols.end())
584  symbol_value=d["val"].value;
585  else
586  {
587  log.debug()
588  << "Truncating the value of symbol " << d["sym"].value << " from "
589  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
590  << " because it corresponds to the size of a too-large section."
591  << messaget::eom;
593  }
594 
595  source_locationt loc;
596  loc.set_file(linker_script);
597  loc.set_comment("linker script-defined symbol: char *"+
598  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
599 
600  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
601 
602  constant_exprt rhs(
604  string2integer(id2string(symbol_value)),
605  unsigned_int_type().get_width()),
607 
608  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
609 
610  linker_values.emplace(
611  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
612 
613  code_assignt assign(lhs, rhs_tc);
614  assign.add_source_location()=loc;
615  goto_programt::instructiont assign_i =
616  goto_programt::make_assignment(assign, loc);
617  initialize_instructions.push_front(assign_i);
618  }
619  return 0;
620 }
621 #else
622 {
623  goto_programt::instructionst initialize_instructions=gp.instructions;
624  for(const auto &d : to_json_array(data["regions"]))
625  {
626  unsigned start=safe_string2unsigned(d["start"].value);
627  unsigned size=safe_string2unsigned(d["size"].value);
628  constant_exprt first=from_integer(start, size_type());
629  constant_exprt second=from_integer(size, size_type());
630  const code_typet void_t({}, empty_typet());
632  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
633 
634  source_locationt loc;
635  loc.set_file(linker_script);
636  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
637  d["annot"].value);
638  f.add_source_location()=loc;
639 
641  i.make_function_call(f);
642  initialize_instructions.push_front(i);
643  }
644 
645  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
646  {
647  symbolt sym;
648  sym.name=CPROVER_PREFIX "allocated_memory";
649  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
650  sym.is_lvalue=sym.is_static_lifetime=true;
651  const code_typet void_t({}, empty_typet());
652  sym.type=void_t;
653  symbol_table.add(sym);
654  }
655 
656  for(const auto &d : to_json_array(data["addresses"]))
657  {
658  source_locationt loc;
659  loc.set_file(linker_script);
660  loc.set_comment("linker script-defined symbol: char *"+
661  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
662 
663  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
664 
665  constant_exprt rhs;
667  string2integer(d["val"].value), unsigned_int_type().get_width()));
668  rhs.type()=unsigned_int_type();
669 
670  exprt rhs_tc =
672 
673  linker_values.emplace(
674  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
675 
676  code_assignt assign(lhs, rhs_tc);
677  assign.add_source_location()=loc;
679  assign_i.make_assignment(assign);
680  initialize_instructions.push_front(assign_i);
681  }
682  return 0;
683 }
684 #endif
685 
687  std::list<irep_idt> &linker_defined_symbols,
688  const symbol_tablet &symbol_table,
689  const std::string &out_file,
690  const std::string &def_out_file)
691 {
692  for(auto const &pair : symbol_table.symbols)
693  {
694  if(
695  pair.second.is_extern && pair.second.value.is_nil() &&
696  pair.second.name != CPROVER_PREFIX "memory")
697  {
698  linker_defined_symbols.push_back(pair.second.name);
699  }
700  }
701 
702  std::ostringstream linker_def_str;
703  std::copy(
704  linker_defined_symbols.begin(),
705  linker_defined_symbols.end(),
706  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
707  log.debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
708  << messaget::eom;
709 
710  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
711  std::ofstream linker_def_file(linker_def_infile());
712  linker_def_file << linker_def_str.str();
713  linker_def_file.close();
714 
715  std::vector<std::string> argv=
716  {
717  "ls_parse.py",
718  "--script", cmdline.get_value('T'),
719  "--object", out_file,
720  "--sym-file", linker_def_infile(),
721  "--out-file", def_out_file
722  };
723 
725  argv.push_back("--very-verbose");
727  argv.push_back("--verbose");
728 
729  log.debug() << "RUN:";
730  for(std::size_t i=0; i<argv.size(); i++)
731  log.debug() << " " << argv[i];
732  log.debug() << messaget::eom;
733 
734  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
735  if(rc!=0)
736  log.warning() << "Problem parsing linker script" << messaget::eom;
737 
738  return rc;
739 }
740 
742  const std::list<irep_idt> &linker_defined_symbols,
743  const linker_valuest &linker_values)
744 {
745  int fail=0;
746  for(const auto &sym : linker_defined_symbols)
747  if(linker_values.find(sym)==linker_values.end())
748  {
749  fail=1;
750  log.error() << "Variable '" << sym
751  << "' was declared extern but never given "
752  << "a value, even in a linker script" << messaget::eom;
753  }
754 
755  for(const auto &pair : linker_values)
756  {
757  auto it=std::find(linker_defined_symbols.begin(),
758  linker_defined_symbols.end(), pair.first);
759  if(it==linker_defined_symbols.end())
760  {
761  fail=1;
762  log.error()
763  << "Linker script-defined symbol '" << pair.first << "' was "
764  << "either defined in the C source code, not declared extern in "
765  << "the C source code, or does not appear in the C source code"
766  << messaget::eom;
767  }
768  }
769  return fail;
770 }
771 
773 {
774  if(!data.is_object())
775  return true;
776 
777  const json_objectt &data_object = to_json_object(data);
778  return (
779  !(data_object.find("regions") != data_object.end() &&
780  data_object.find("addresses") != data_object.end() &&
781  data["regions"].is_array() && data["addresses"].is_array() &&
782  std::all_of(
783  to_json_array(data["addresses"]).begin(),
784  to_json_array(data["addresses"]).end(),
785  [](const jsont &j) {
786  if(!j.is_object())
787  return false;
788 
789  const json_objectt &address = to_json_object(j);
790  return address.find("val") != address.end() &&
791  address.find("sym") != address.end() &&
792  address["val"].is_number() && address["sym"].is_string();
793  }) &&
794  std::all_of(
795  to_json_array(data["regions"]).begin(),
796  to_json_array(data["regions"]).end(),
797  [](const jsont &j) {
798  if(!j.is_object())
799  return false;
800 
801  const json_objectt &region = to_json_object(j);
802  return region.find("start") != region.end() &&
803  region.find("size") != region.end() &&
804  region.find("annot") != region.end() &&
805  region.find("commt") != region.end() &&
806  region.find("start-symbol") != region.end() &&
807  region.find("has-end-symbol") != region.end() &&
808  region.find("section") != region.end() &&
809  region["start"].is_number() && region["size"].is_number() &&
810  region["annot"].is_string() &&
811  region["start-symbol"].is_string() &&
812  region["section"].is_string() && region["commt"].is_string() &&
813  ((region["has-end-symbol"].is_true() &&
814  region.find("end-symbol") != region.end() &&
815  region["end-symbol"].is_string()) ||
816  (region["has-end-symbol"].is_false() &&
817  region.find("size-symbol") != region.end() &&
818  region.find("end-symbol") == region.end() &&
819  region["size-symbol"].is_string()));
820  })));
821 }
symbol_table_baset::has_symbol
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Definition: symbol_table_base.h:87
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
array_name
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:19
messaget::M_RESULT
@ M_RESULT
Definition: message.h:170
tempfile.h
typecast_exprt::conditional_cast
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1874
symbol_tablet
The symbol table.
Definition: symbol_table.h:14
mp_integer
BigInt mp_integer
Definition: smt_terms.h:12
source_locationt::set_comment
void set_comment(const irep_idt &comment)
Definition: source_location.h:140
linker_script_merget::linker_valuest
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
Definition: linker_script_merge.h:84
linker_script_merge.h
Merge linker script-defined symbols into a goto-program.
arith_tools.h
cmdlinet::isset
virtual bool isset(char option) const
Definition: cmdline.cpp:29
address_of_exprt::object
exprt & object()
Definition: pointer_expr.h:350
linker_script_merget::get_linker_script_data
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
Definition: linker_script_merge.cpp:686
__CPROVER_allocated_memory
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
compilet::write_bin_object_file
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:577
to_index_expr
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1382
message_handlert::get_verbosity
unsigned get_verbosity() const
Definition: message.h:54
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
goto_programt::instructionst
std::list< instructiont > instructionst
Definition: goto_program.h:644
data
Definition: kdev_t.h:24
linker_script_merget::add_linker_script_definitions
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
Definition: linker_script_merge.cpp:33
json_objectt::find
iterator find(const std::string &key)
Definition: json.h:356
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
MAX_FLATTENED_ARRAY_SIZE
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
file
Definition: kdev_t.h:19
goto_model.h
Symbol Table + CFG.
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:914
linker_script_merget::ls_data2instructions
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
Definition: linker_script_merge.cpp:421
run
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
exprt
Base class for all expressions.
Definition: expr.h:54
linker_script_merget::replace_expr
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
Definition: linker_script_merge.cpp:292
goto_modelt
Definition: goto_model.h:26
linker_script_merget::log
messaget log
Definition: linker_script_merge.h:96
irep_idt
dstringt irep_idt
Definition: irep.h:37
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:57
messaget::eom
static eomt eom
Definition: message.h:297
run.h
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:29
to_json_array
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:80
jsont
Definition: json.h:27
linker_script_merget::pointerize_subexprs_of
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
Definition: linker_script_merge.cpp:314
magic.h
Magic numbers used throughout the codebase.
goto_programt::make_assignment
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
Definition: goto_program.h:1082
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:317
symbolt::pretty_name
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
linker_script_merget::goto_binary
const std::string & goto_binary
Definition: linker_script_merge.h:94
json_objectt
Definition: json.h:300
json_parser.h
replacement_predicatet
Patterns of expressions that should be replaced.
Definition: linker_script_merge.h:28
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
linker_script_merget::linker_data_is_malformed
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
Definition: linker_script_merge.cpp:772
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:82
code_function_callt
codet representation of a function call statement.
Definition: std_code.h:1213
cmdlinet
Definition: cmdline.h:21
expr_initializer.h
Expression Initialization.
messaget::error
mstreamt & error() const
Definition: message.h:399
empty_typet
The empty type.
Definition: std_types.h:51
integer2bvrep
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
Definition: arith_tools.cpp:379
symbol_table_baset::get_writeable_ref
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
Definition: symbol_table_base.h:121
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:49
messaget::mstreamt::source_location
source_locationt source_location
Definition: message.h:247
symbol_exprt::get_identifier
const irep_idt & get_identifier() const
Definition: std_expr.h:109
source_locationt::set_file
void set_file(const irep_idt &file)
Definition: source_location.h:95
pointer_expr.h
API to expression classes for Pointers.
INITIALIZE_FUNCTION
#define INITIALIZE_FUNCTION
Definition: static_lifetime_init.h:22
cmdlinet::get_value
std::string get_value(char option) const
Definition: cmdline.cpp:47
compile.h
Compile and link source and object files.
linker_script_merget::cmdline
const cmdlinet & cmdline
Definition: linker_script_merge.h:95
linker_script_merget::symbols_to_pointerize
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
Definition: linker_script_merge.cpp:357
index_exprt::index
exprt & index()
Definition: std_expr.h:1354
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
index_exprt::array
exprt & array()
Definition: std_expr.h:1344
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
code_typet
Base type of functions.
Definition: std_types.h:539
linker_script_merget::elf_binary
const std::string & elf_binary
Definition: linker_script_merge.h:93
irept::id
const irep_idt & id() const
Definition: irep.h:407
message_handlert
Definition: message.h:28
read_goto_binary.h
Read Goto Programs.
safe_string2unsigned
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:16
linker_script_merget::linker_script_merget
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
Definition: linker_script_merge.cpp:129
linker_script_merget::goto_and_object_mismatch
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
Definition: linker_script_merge.cpp:741
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
source_locationt
Definition: source_location.h:19
symbol_table_baset::add
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Definition: symbol_table_base.cpp:18
symbol_tablet::get_writeable
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
Definition: symbol_table.h:90
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:652
symbolt::value
exprt value
Initial value of symbol.
Definition: symbol.h:34
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:184
json_objectt::end
iterator end()
Definition: json.h:386
array_typet
Arrays with given size.
Definition: std_types.h:763
symbolt::is_extern
bool is_extern
Definition: symbol.h:66
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
to_json_object
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
symbolt::location
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
cmdline.h
symbolt
Symbol table entry.
Definition: symbol.h:28
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
symbol_table_baset::symbols
const symbolst & symbols
Read-only field, used to look up symbols given their names.
Definition: symbol_table_base.h:30
CPROVER_PREFIX
#define CPROVER_PREFIX
Definition: cprover_prefix.h:14
symbolt::is_static_lifetime
bool is_static_lifetime
Definition: symbol.h:65
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:71
read_goto_binary
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Definition: read_goto_binary.cpp:60
linker_script_merget::replacement_predicates
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
Definition: linker_script_merge.h:105
jsont::is_object
bool is_object() const
Definition: json.h:56
to_address_of_expr
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:378
exprt::operands
operandst & operands()
Definition: expr.h:92
symbolt::is_lvalue
bool is_lvalue
Definition: symbol.h:66
messaget::debug
mstreamt & debug() const
Definition: message.h:429
messaget::M_DEBUG
@ M_DEBUG
Definition: message.h:171
index_exprt
Array index operator.
Definition: std_expr.h:1328
static_lifetime_init.h
address_of_exprt
Operator to return the address of an object.
Definition: pointer_expr.h:341
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:235
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:1866
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:58
code_assignt
A codet representing an assignment in the program.
Definition: std_code.h:293
constant_exprt
A constant literal expression.
Definition: std_expr.h:2753
messaget::warning
mstreamt & warning() const
Definition: message.h:404
linker_script_merget::pointerize_linker_defined_symbols
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
Definition: linker_script_merge.cpp:213
parse_json
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:178
constant_exprt::set_value
void set_value(const irep_idt &value)
Definition: std_expr.h:2766
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:230
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
c_types.h
symbolt::name
irep_idt name
The unique identifier.
Definition: symbol.h:40
temporary_filet
Definition: tempfile.h:24
jsont::value
std::string value
Definition: json.h:132