From 37d6f666cae62d9d44c5aef4b2185e43635d5091 Mon Sep 17 00:00:00 2001 From: Wei Mi Date: Mon, 12 Nov 2012 15:51:13 +0000 Subject: [PATCH] Initial import of asan from the Google branch This patch imports the initial state of asan as it was in the Google branch. It provides basic infrastructure for asan to instrument memory accesses on the heap, at -O3. Note that it supports neither stack nor global variable protection. The rest of the patches of the set is intended to further improve this base. gcc/ChangeLog * Makefile.in: Add asan.c and its dependencies. * common.opt: Add -faddress-sanitizer option. * invoke.texi: Document the new flag. * passes.c: Add the asan pass. * toplev.c (compile_file): Call asan_finish_file. * asan.c: New file. * asan.h: New file. * tree-pass.h: Declare pass_asan. Co-Authored-By: Diego Novillo Co-Authored-By: Dodji Seketeli From-SVN: r193432 --- gcc/ChangeLog | 13 ++ gcc/Makefile.in | 5 + gcc/asan.c | 404 ++++++++++++++++++++++++++++++++++++++++++++ gcc/asan.h | 26 +++ gcc/common.opt | 4 + gcc/doc/invoke.texi | 13 +- gcc/passes.c | 1 + gcc/toplev.c | 5 + gcc/tree-pass.h | 1 + 9 files changed, 469 insertions(+), 3 deletions(-) create mode 100644 gcc/asan.c create mode 100644 gcc/asan.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d815477a4293..53dfd9817c82 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2012-11-12 Wei Mi + Diego Novillo + Dodji Seketeli + + * Makefile.in: Add asan.c and its dependencies. + * common.opt: Add -faddress-sanitizer option. + * invoke.texi: Document the new flag. + * passes.c: Add the asan pass. + * toplev.c (compile_file): Call asan_finish_file. + * asan.c: New file. + * asan.h: New file. + * tree-pass.h: Declare pass_asan. + 2012-11-12 Tobias Burnus * diagnostic.c (diagnostic_append_note): Also call va_end when diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 24791a4f1708..dde9b50a1ae9 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1349,6 +1349,7 @@ OBJS = \ tracer.o \ trans-mem.o \ tree-affine.o \ + asan.o \ tree-call-cdce.o \ tree-cfg.o \ tree-cfgcleanup.o \ @@ -2207,6 +2208,10 @@ stor-layout.o : stor-layout.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) $(RTL_H) \ $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \ $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H) +asan.o : asan.c asan.h $(CONFIG_H) pointer-set.h \ + $(SYSTEM_H) $(TREE_H) $(GIMPLE_H) \ + output.h $(DIAGNOSTIC_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ + tree-pretty-print.h tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) \ diff --git a/gcc/asan.c b/gcc/asan.c new file mode 100644 index 000000000000..4b07c9646c6a --- /dev/null +++ b/gcc/asan.c @@ -0,0 +1,404 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tm_p.h" +#include "basic-block.h" +#include "flags.h" +#include "function.h" +#include "tree-inline.h" +#include "gimple.h" +#include "tree-iterator.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "tree-pass.h" +#include "diagnostic.h" +#include "demangle.h" +#include "langhooks.h" +#include "ggc.h" +#include "cgraph.h" +#include "gimple.h" +#include "asan.h" +#include "gimple-pretty-print.h" + +/* + AddressSanitizer finds out-of-bounds and use-after-free bugs + with <2x slowdown on average. + + The tool consists of two parts: + instrumentation module (this file) and a run-time library. + The instrumentation module adds a run-time check before every memory insn. + For a 8- or 16- byte load accessing address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = *(char*)ShadowAddr; // *(short*) for 16-byte access. + if (ShadowValue) + __asan_report_load8(X); + For a load of N bytes (N=1, 2 or 4) from address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = *(char*)ShadowAddr; + if (ShadowValue) + if ((X & 7) + N - 1 > ShadowValue) + __asan_report_loadN(X); + Stores are instrumented similarly, but using __asan_report_storeN functions. + A call too __asan_init() is inserted to the list of module CTORs. + + The run-time library redefines malloc (so that redzone are inserted around + the allocated memory) and free (so that reuse of free-ed memory is delayed), + provides __asan_report* and __asan_init functions. + + Read more: + http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm + + Future work: + The current implementation supports only detection of out-of-bounds and + use-after-free bugs in heap. + In order to support out-of-bounds for stack and globals we will need + to create redzones for stack and global object and poison them. +*/ + +/* The shadow address is computed as (X>>asan_scale) + (1<src; + join_bb = e->dest; + + /* A recap at this point: join_bb is the basic block at whose head + is the gimple statement for which this check expression is being + built. cond_bb is the (possibly new, synthetic) basic block the + end of which will contain the cache-lookup code, and a + conditional that jumps to the cache-miss code or, much more + likely, over to join_bb. */ + + /* Create the bb that contains the crash block. */ + then_bb = create_empty_bb (cond_bb); + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_single_succ_edge (then_bb, join_bb, EDGE_FALLTHRU); + + /* Mark the pseudo-fallthrough edge from cond_bb to join_bb. */ + e = find_edge (cond_bb, join_bb); + e->flags = EDGE_FALSE_VALUE; + e->count = cond_bb->count; + e->probability = REG_BR_PROB_BASE; + + /* Update dominance info. Note that bb_join's data was + updated by split_block. */ + if (dom_info_available_p (CDI_DOMINATORS)) + { + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); + } + + base_addr = create_tmp_reg (uintptr_type, "__asan_base_addr"); + + seq = NULL; + t = fold_convert_loc (location, uintptr_type, + unshare_expr (base)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + g = gimple_build_assign (base_addr, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* Build (base_addr >> asan_scale) + (1 << asan_offset_log). */ + + t = build2 (RSHIFT_EXPR, uintptr_type, base_addr, + build_int_cst (uintptr_type, asan_scale)); + t = build2 (PLUS_EXPR, uintptr_type, t, + build2 (LSHIFT_EXPR, uintptr_type, + build_int_cst (uintptr_type, 1), + build_int_cst (uintptr_type, asan_offset_log) + )); + t = build1 (INDIRECT_REF, shadow_type, + build1 (VIEW_CONVERT_EXPR, shadow_ptr_type, t)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + shadow_value = create_tmp_reg (shadow_type, "__asan_shadow"); + g = gimple_build_assign (shadow_value, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + t = build2 (NE_EXPR, boolean_type_node, shadow_value, + build_int_cst (shadow_type, 0)); + if (size_in_bytes < 8) + { + + /* Slow path for 1-, 2- and 4- byte accesses. + Build ((base_addr & 7) + (size_in_bytes - 1)) >= shadow_value. */ + + u = build2 (BIT_AND_EXPR, uintptr_type, + base_addr, + build_int_cst (uintptr_type, 7)); + u = build1 (CONVERT_EXPR, shadow_type, u); + u = build2 (PLUS_EXPR, shadow_type, u, + build_int_cst (shadow_type, size_in_bytes - 1)); + u = build2 (GE_EXPR, uintptr_type, u, shadow_value); + } + else + u = build_int_cst (boolean_type_node, 1); + t = build2 (TRUTH_AND_EXPR, boolean_type_node, t, u); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + cond = create_tmp_reg (boolean_type_node, "__asan_crash_cond"); + g = gimple_build_assign (cond, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + g = gimple_build_cond (NE_EXPR, cond, boolean_false_node, NULL_TREE, + NULL_TREE); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* Generate call to the run-time library (e.g. __asan_report_load8). */ + + gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + seq = NULL; + g = gimple_build_call (report_error_func (is_store, size_in_bytes), + 1, base_addr); + gimple_seq_add_stmt (&seq, g); + + /* Insert the check code in the THEN block. */ + + gsi = gsi_start_bb (then_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + + *iter = gsi_start_bb (join_bb); +} + +/* If T represents a memory access, add instrumentation code before ITER. + LOCATION is source code location. + IS_STORE is either 1 (for a store) or 0 (for a load). */ + +static void +instrument_derefs (gimple_stmt_iterator *iter, tree t, + location_t location, int is_store) +{ + tree type, base; + int size_in_bytes; + + type = TREE_TYPE (t); + if (type == error_mark_node) + return; + switch (TREE_CODE (t)) + { + case ARRAY_REF: + case COMPONENT_REF: + case INDIRECT_REF: + case MEM_REF: + break; + default: + return; + } + size_in_bytes = tree_low_cst (TYPE_SIZE (type), 0) / BITS_PER_UNIT; + if (size_in_bytes != 1 && size_in_bytes != 2 && + size_in_bytes != 4 && size_in_bytes != 8 && size_in_bytes != 16) + return; + { + /* For now just avoid instrumenting bit field acceses. + Fixing it is doable, but expected to be messy. */ + + HOST_WIDE_INT bitsize, bitpos; + tree offset; + enum machine_mode mode; + int volatilep = 0, unsignedp = 0; + get_inner_reference (t, &bitsize, &bitpos, &offset, + &mode, &unsignedp, &volatilep, false); + if (bitpos != 0 || bitsize != size_in_bytes * BITS_PER_UNIT) + return; + } + + base = build_addr (t, current_function_decl); + build_check_stmt (base, iter, location, is_store, size_in_bytes); +} + +/* asan: this looks too complex. Can this be done simpler? */ +/* Transform + 1) Memory references. + 2) BUILTIN_ALLOCA calls. +*/ + +static void +transform_statements (void) +{ + basic_block bb; + gimple_stmt_iterator i; + int saved_last_basic_block = last_basic_block; + enum gimple_rhs_class grhs_class; + + FOR_EACH_BB (bb) + { + if (bb->index >= saved_last_basic_block) continue; + for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) + { + gimple s = gsi_stmt (i); + if (gimple_code (s) != GIMPLE_ASSIGN) + continue; + instrument_derefs (&i, gimple_assign_lhs (s), + gimple_location (s), 1); + instrument_derefs (&i, gimple_assign_rhs1 (s), + gimple_location (s), 0); + grhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (s)); + if (grhs_class == GIMPLE_BINARY_RHS) + instrument_derefs (&i, gimple_assign_rhs2 (s), + gimple_location (s), 0); + } + } +} + +/* Module-level instrumentation. + - Insert __asan_init() into the list of CTORs. + - TODO: insert redzones around globals. + */ + +void +asan_finish_file (void) +{ + tree ctor_statements = NULL_TREE; + append_to_statement_list (build_call_expr (asan_init_func (), 0), + &ctor_statements); + cgraph_build_static_cdtor ('I', ctor_statements, + MAX_RESERVED_INIT_PRIORITY - 1); +} + +/* Instrument the current function. */ + +static unsigned int +asan_instrument (void) +{ + struct gimplify_ctx gctx; + tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode, true); + int is_64 = tree_low_cst (TYPE_SIZE (uintptr_type), 0) == 64; + asan_offset_log = is_64 ? asan_offset_log_64 : asan_offset_log_32; + push_gimplify_context (&gctx); + transform_statements (); + pop_gimplify_context (NULL); + return 0; +} + +static bool +gate_asan (void) +{ + return flag_asan != 0; +} + +struct gimple_opt_pass pass_asan = +{ + { + GIMPLE_PASS, + "asan", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + gate_asan, /* gate */ + asan_instrument, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_flow | TODO_verify_stmts + | TODO_update_ssa /* todo_flags_finish */ + } +}; diff --git a/gcc/asan.h b/gcc/asan.h new file mode 100644 index 000000000000..590cf359bd20 --- /dev/null +++ b/gcc/asan.h @@ -0,0 +1,26 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef TREE_ASAN +#define TREE_ASAN + +extern void asan_finish_file(void); + +#endif /* TREE_ASAN */ diff --git a/gcc/common.opt b/gcc/common.opt index f947a72cc2e5..6088d1a7fec1 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -837,6 +837,10 @@ fargument-noalias-anything Common Ignore Does nothing. Preserved for backward compatibility. +faddress-sanitizer +Common Report Var(flag_asan) +Enable AddressSanitizer, a memory error detector + fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization Generate unwind tables that are exact at each instruction boundary diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 9477ffced5a1..ef2e69ad0913 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -354,9 +354,10 @@ Objective-C and Objective-C++ Dialects}. @item Optimization Options @xref{Optimize Options,,Options that Control Optimization}. @gccoptlist{-falign-functions[=@var{n}] -falign-jumps[=@var{n}] @gol --falign-labels[=@var{n}] -falign-loops[=@var{n}] -fassociative-math @gol --fauto-inc-dec -fbranch-probabilities -fbranch-target-load-optimize @gol --fbranch-target-load-optimize2 -fbtr-bb-exclusive -fcaller-saves @gol +-falign-labels[=@var{n}] -falign-loops[=@var{n}] -faddress-sanitizer @gol +--fassociative-math fauto-inc-dec -fbranch-probabilities @gol +--fbranch-target-load-optimize fbranch-target-load-optimize2 @gol +--fbtr-bb-exclusive -fcaller-saves @gol -fcheck-data-deps -fcombine-stack-adjustments -fconserve-stack @gol -fcompare-elim -fcprop-registers -fcrossjumping @gol -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules @gol @@ -6848,6 +6849,12 @@ assumptions based on that. The default is @option{-fzero-initialized-in-bss}. +@item -faddress-sanitizer +Enable AddressSanitizer, a fast memory error detector. +Memory access instructions will be instrumented to detect +out-of-bounds and use-after-free bugs. So far only heap bugs will be detected. +See @uref{http://code.google.com/p/address-sanitizer/} for more details. + @item -fmudflap -fmudflapth -fmudflapir @opindex fmudflap @opindex fmudflapth diff --git a/gcc/passes.c b/gcc/passes.c index 67aae52391c9..66a2f74b15ec 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -1456,6 +1456,7 @@ init_optimization_passes (void) NEXT_PASS (pass_split_crit_edges); NEXT_PASS (pass_pre); NEXT_PASS (pass_sink_code); + NEXT_PASS (pass_asan); NEXT_PASS (pass_tree_loop); { struct opt_pass **p = &pass_tree_loop.pass.sub; diff --git a/gcc/toplev.c b/gcc/toplev.c index 5cbb36411de3..3ca0736995e8 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -72,6 +72,7 @@ along with GCC; see the file COPYING3. If not see #include "value-prof.h" #include "alloc-pool.h" #include "tree-mudflap.h" +#include "asan.h" #include "gimple.h" #include "tree-ssa-alias.h" #include "plugin.h" @@ -570,6 +571,10 @@ compile_file (void) if (flag_mudflap) mudflap_finish_file (); + /* File-scope initialization for AddressSanitizer. */ + if (flag_asan) + asan_finish_file (); + output_shared_constant_pool (); output_object_blocks (); finish_tm_clone_pairs (); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 09ec531f27bf..0e618569e650 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -259,6 +259,7 @@ struct register_pass_info extern struct gimple_opt_pass pass_mudflap_1; extern struct gimple_opt_pass pass_mudflap_2; +extern struct gimple_opt_pass pass_asan; extern struct gimple_opt_pass pass_lower_cf; extern struct gimple_opt_pass pass_refactor_eh; extern struct gimple_opt_pass pass_lower_eh; -- 2.47.3