From echristo at apple.com Mon May 10 01:40:04 2010 From: echristo at apple.com (Eric Christopher) Date: Mon, 10 May 2010 06:40:04 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103394 - /llvm-gcc-4.2/trunk/gcc/Makefile.in Message-ID: <20100510064004.905B4312800A@llvm.org> Author: echristo Date: Mon May 10 01:40:04 2010 New Revision: 103394 URL: http://llvm.org/viewvc/llvm-project?rev=103394&view=rev Log: Make sure we can install multiple times in the same directory. Modified: llvm-gcc-4.2/trunk/gcc/Makefile.in Modified: llvm-gcc-4.2/trunk/gcc/Makefile.in URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/Makefile.in?rev=103394&r1=103393&r2=103394&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/Makefile.in (original) +++ llvm-gcc-4.2/trunk/gcc/Makefile.in Mon May 10 01:40:04 2010 @@ -4108,6 +4108,7 @@ -rm -f $(DESTDIR)$(bindir)/$(GCC_INSTALL_NAME)$(exeext) -$(INSTALL_PROGRAM) xgcc$(exeext) $(DESTDIR)$(bindir)/$(GCC_INSTALL_NAME)$(exeext) -rm -f $(DESTDIR)$(bindir)/$(target_noncanonical)-gcc-$(version)$(exeext) + -rm -f $(DESTDIR)$(libsubdir)/libstdc++.dylib $(LN_S) /usr/lib/libstdc++.6.dylib $(DESTDIR)$(libsubdir)/libstdc++.dylib -( cd $(DESTDIR)$(bindir) && \ $(LN) $(GCC_INSTALL_NAME)$(exeext) $(target_noncanonical)-gcc-$(version)$(exeext) ) From baldrick at free.fr Mon May 10 02:17:37 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 10 May 2010 07:17:37 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103395 - in /llvm-gcc-4.2/trunk/extras: ./ build-4-mingw32 buildbot-launcher Message-ID: <20100510071737.B06BD312800A@llvm.org> Author: baldrick Date: Mon May 10 02:17:37 2010 New Revision: 103395 URL: http://llvm.org/viewvc/llvm-project?rev=103395&view=rev Log: Patch by Galina Kistanova, adding 2 shell script to the llvm-gcc/extras directory (the same way we have it for DragonEgg): * buildbot-launcher - prepares environment and launches required build script; * build-4-mingw32 - cross builds llvm and llvm-gcc for --build=x86_64-apple-darwin10 --host=i686-pc-mingw32 --target=i686-pc-mingw32 Added: llvm-gcc-4.2/trunk/extras/ llvm-gcc-4.2/trunk/extras/build-4-mingw32 llvm-gcc-4.2/trunk/extras/buildbot-launcher Added: llvm-gcc-4.2/trunk/extras/build-4-mingw32 URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/extras/build-4-mingw32?rev=103395&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/extras/build-4-mingw32 (added) +++ llvm-gcc-4.2/trunk/extras/build-4-mingw32 Mon May 10 02:17:37 2010 @@ -0,0 +1,175 @@ +#!/bin/bash + +set -e # Terminate script at the first line that fails. +set -o pipefail # Return the first non-zero pipe command error. +set -x # Print commands as they are executed + +# This script performs an automated cross build on x86_64-apple-darwin10 of +# self-hosted llvm-gcc for i686-pc-mingw32. It assumes the valid native +# cross compiler for i686-pc-mingw32 is in place and available as well as +# cross libraries and headers. + +# --build=x86_64-apple-darwin10 +# --host=i686-pc-mingw32 +# --target=i686-pc-mingw32 + +# The usage: +# Run this build from the build from the build root directory as +# build-4-mingw32 [] [] + +# Expected project tree structure: +# +# +-- ${LLVM_src} +# +-- ${LLVM_GCC_src} +# +-- ${LLVM_obj} +# +-- ${LLVM_GCC_obj} +# +-- ${INSTALL} + +LLVM_src=llvm.src # The LLVM source code root directory name. +LLVM_GCC_src=llvm-gcc.src # The LLVM-GCC source code root directory name. +LLVM_obj=llvm.obj # The LLVM build root directory name. +LLVM_GCC_obj=llvm-gcc.obj # The LLVM-GCC build root directory name. +INSTALL=install # Where the result will be installed. + +# CFLAGS and CXXFLAGS must not be set during the building of cross-tools. +unset CFLAGS +unset CXXFLAGS + +BUILD_ROOT=$PWD # Where build happens. +PRIVATE_INSTALL=${BUILD_ROOT}/${INSTALL} # Where the result will be installed. + +#------------------------------------------------------------------------------ +# Define build steps, parse and validate input parameters +#------------------------------------------------------------------------------ + +# This script supports the following steps: +do_clean=no # Clean up the build directory. +do_configure_llvm=no # Configure LLVM. +do_make_llvm=no # Make LLVM. +do_configure_llvmgcc=no # Configure LLVM-GCC. +do_make_llvmgcc=no # Make LLVM-GCC. +do_install_llvmgcc=no # Install LLVM-GCC. +do_all=no # Runs all steps at once when requested. + +# Set step parameter +if (( $# == 0 )) ; then + do_all=yes +fi +# else +if (( ! $# == 0 )) ; then + # First check that the parameter actually defines a step. + case $1 in + clean | \ + configure_llvm | \ + make_llvm | \ + configure_llvmgcc | \ + make_llvmgcc | \ + install_llvmgcc | \ + all) + eval do_$1=yes # Set the flag for the requested step . + shift # Remove it since is is ours and already precessed. + ;; + + *) + # Not our parameter. Pass it as is. + esac +fi + +# Set all steps if do_all requested +if [ "$do_all" == "yes" ] ; then + # Set all steps to yes + do_clean=yes + do_configure_llvm=yes + do_make_llvm=yes + do_configure_llvmgcc=yes + do_make_llvmgcc=yes + do_install_llvmgcc=yes +fi + +#------------------------------------------------------------------------------ +# Step: Clean up. +#------------------------------------------------------------------------------ +if [ "$do_clean" == "yes" ] ; then + + # Remove everything from where we will be installing the result. + rm -rf ${PRIVATE_INSTALL} + mkdir -p ${PRIVATE_INSTALL} + chmod a+rx ${PRIVATE_INSTALL} + +fi + +#------------------------------------------------------------------------------ +# Step: Configure LLVM. +#------------------------------------------------------------------------------ +if [ "$do_configure_llvm" == "yes" ] ; then + + # Remove previous build files if any. + rm -rf ${BUILD_ROOT}/${LLVM_obj} + mkdir -p ${BUILD_ROOT}/${LLVM_obj} + chmod a+rx ${BUILD_ROOT}/${LLVM_obj} + cd ${BUILD_ROOT}/${LLVM_obj} + + ../${LLVM_src}/configure --prefix=${PRIVATE_INSTALL} \ + --build=x86_64-apple-darwin10 --host=i686-pc-mingw32 \ + --target=i686-pc-mingw32 \ + --enable-optimize \ + $@ # Extra args if any + +fi + +#------------------------------------------------------------------------------ +# Step: Make LLVM. +#------------------------------------------------------------------------------ +if [ "$do_make_llvm" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_obj} + nice -n 20 make ENABLE_OPTIMIZED=1 \ + $@ # Extra args if any, like -j16 for example. + +fi + +#------------------------------------------------------------------------------ +# Step: Configure LLVM-GCC. +#------------------------------------------------------------------------------ +if [ "$do_configure_llvmgcc" == "yes" ] ; then + + # Remove previous build files if any. + rm -rf ${BUILD_ROOT}/${LLVM_GCC_obj} + mkdir -p ${BUILD_ROOT}/${LLVM_GCC_obj} + chmod a+rx ${BUILD_ROOT}/${LLVM_GCC_obj} + cd ${BUILD_ROOT}/${LLVM_GCC_obj} + + ../${LLVM_GCC_src}/configure --prefix=${PRIVATE_INSTALL} \ + --build=x86_64-apple-darwin10 --host=i686-pc-mingw32 \ + --target=i686-pc-mingw32 \ + --program-prefix=llvm- \ + --enable-llvm=${BUILD_ROOT}/${LLVM_obj} \ + --enable-languages=c,c++ \ + --disable-multilib --disable-nls --disable-shared \ + --disable-sjlj-exceptions --disable-__cxa_atexit \ + $@ # Extra args if any + +fi + +#------------------------------------------------------------------------------ +# Step: Make LLVM-GCC. +#------------------------------------------------------------------------------ +if [ "$do_make_llvmgcc" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_GCC_obj} + # NOTE: Do not build in parallel! It doesn't build. + nice -n 20 make \ + $@ # Extra args if any + +fi + +#------------------------------------------------------------------------------ +# Step: Install LLVM-GCC. +#------------------------------------------------------------------------------ +if [ "$do_install_llvmgcc" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_GCC_obj} + nice -n 20 make install \ + $@ # Extra args if any + +fi Added: llvm-gcc-4.2/trunk/extras/buildbot-launcher URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/extras/buildbot-launcher?rev=103395&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/extras/buildbot-launcher (added) +++ llvm-gcc-4.2/trunk/extras/buildbot-launcher Mon May 10 02:17:37 2010 @@ -0,0 +1,32 @@ +#!/bin/bash + +set -e # Terminate script at the first line that fails. +set -o pipefail # Return the first non-zero pipe command error. +set -x # Print commands as they are executed + +# The fist 4 parameters are consumed by this script and they are: +BUILD_SCRIPT=$1 # Build script name to launch +shift +LLVM_SOURCE=$1 # Directory name where the LLVM source code is. +shift +LLVM_GCC_SOURCE=$1 # Directory name where the LLVM-GCC source code is. +shift +BUILD_DIR=$1 # Path to the build root directory. +shift +# The rest of the parameters will pass through. + +# The build script expects to be run from the build root directory. +cd $BUILD_DIR + +# The build script expects source code directories in certain place with +# certain names. +# TODO: Handle relative paths here +if [ "$LLVM_SOURCE" != "llvm.src" ] ; then + ln -sf $BUILD_DIR/$LLVM_SOURCE $BUILD_DIR/llvm.src +fi +if [ "$LLVM_GCC_SOURCE" != "llvm-gcc.src" ] ; then + ln -sf $BUILD_DIR/$LLVM_GCC_SOURCE $BUILD_DIR/llvm-gcc.src +fi + +# Launch the build script with all the remaining parameters +$BUILD_SCRIPT $@ From baldrick at free.fr Mon May 10 02:19:59 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 10 May 2010 07:19:59 -0000 Subject: [llvm-commits] [zorg] r103396 - in /zorg/trunk: buildbot/osuosl/master/config/builders.py zorg/buildbot/builders/ScriptedBuilder.py Message-ID: <20100510071959.CD417312800A@llvm.org> Author: baldrick Date: Mon May 10 02:19:59 2010 New Revision: 103396 URL: http://llvm.org/viewvc/llvm-project?rev=103396&view=rev Log: Patch by Galina Kistanova, adding a new buildbot builder ScriptedBuilder to work with the new llvm-gcc build scripts, and updates configuration of the llvm-gcc-x86_64-darwin10-cross-mingw32 buildslave to use ScriptedBuilder. Added: zorg/trunk/zorg/buildbot/builders/ScriptedBuilder.py Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/buildbot/osuosl/master/config/builders.py?rev=103396&r1=103395&r2=103396&view=diff ============================================================================== --- zorg/trunk/buildbot/osuosl/master/config/builders.py (original) +++ zorg/trunk/buildbot/osuosl/master/config/builders.py Mon May 10 02:19:59 2010 @@ -18,6 +18,12 @@ reload(NightlytestBuilder) from zorg.buildbot.builders import NightlytestBuilder +from zorg.buildbot.builders import ScriptedBuilder +reload(ScriptedBuilder) +from zorg.buildbot.builders import ScriptedBuilder + +from buildbot.steps.source import SVN + # Plain LLVM builders. def _get_llvm_builders(): return [ @@ -247,21 +253,38 @@ extra_configure_args=['--disable-multilib']), 'category' : 'llvm-gcc.exp' }, - {'name' : "llvm-gcc-x86_64-darwin10-cross-mingw32", - 'slavenames':["kistanova1"], - 'builddir': "llvm-gcc-x86_64-darwin10-cross-mingw32", - 'factory':LLVMGCCBuilder.getLLVMGCCBuildFactory( - 16, build='x86_64-apple-darwin10', - host='i686-pc-mingw32', - target='i686-pc-mingw32', - useTwoStage=False, - extra_configure_args=['--disable-multilib', '--disable-nls', '--disable-shared', - '--disable-sjlj-exceptions', '--disable-__cxa_atexit', - '--with-local-prefix=/tools'], - verbose=True, - env={ 'PATH' : '/cross-tools/bin:/usr/bin:/bin:/usr/sbin:/sbin' }, - ), - 'category':'llvm-gcc'}, + {'name' : "llvm-gcc-x86_64-darwin10-cross-mingw32", + 'slavenames': [ "kistanova1" ], + 'builddir' : "llvm-gcc-x86_64-darwin10-cross-mingw32", + 'factory' : ScriptedBuilder.getScriptedBuildFactory( + source_code = [SVN(name='svn-llvm', + mode='update', baseURL='http://llvm.org/svn/llvm-project/llvm/', + defaultBranch='trunk', + workdir="llvm.src"), + SVN(name='svn-llvm-gcc', + mode='update', baseURL='http://llvm.org/svn/llvm-project/llvm-gcc-4.2/', + defaultBranch='trunk', + workdir="llvm-gcc.src"),], + launcher = 'llvm-gcc.src/extras/buildbot-launcher', + build_script = 'llvm-gcc.src/extras/build-4-mingw32', + extra_args = [], + build_steps = [{'name' : 'configure_llvm', + 'description' : 'Configure LLVM', + 'haltOnFailure' : True }, + {'name' : 'make_llvm', + 'description' : 'Make LLVM', + 'extra_args' : ['-j8'], # Extra step-specific properties + 'haltOnFailure' : True }, + {'name' : 'configure_llvmgcc', + 'description' : 'Configure LLVM-GCC', + 'haltOnFailure' : True }, + {'name' : 'make_llvmgcc', + 'description' : 'Make LLVM-GCC', + 'haltOnFailure' : True }, + {'name' : 'install_llvmgcc', + 'description' : 'Install LLVM-GCC', + 'haltOnFailure' : True },]), + 'category' : 'llvm-gcc' }, {'name' : "clang-i686-linux-selfhost-rel", 'slavenames' : ["osu8"], Added: zorg/trunk/zorg/buildbot/builders/ScriptedBuilder.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/zorg/buildbot/builders/ScriptedBuilder.py?rev=103396&view=auto ============================================================================== --- zorg/trunk/zorg/buildbot/builders/ScriptedBuilder.py (added) +++ zorg/trunk/zorg/buildbot/builders/ScriptedBuilder.py Mon May 10 02:19:59 2010 @@ -0,0 +1,105 @@ +import buildbot +from buildbot.steps.shell import ShellCommand, SetProperty +from buildbot.process.properties import WithProperties + +def getScriptedBuildFactory( + source_code = [], # List of source code check out commands. + launcher = None, # Build script launcher name. + build_script = None, # Build script name or common prefix. + extra_args = [], # Extra args common for all steps. + build_steps = [], # List of step commands. + env = {}): # Environmental variables for all steps. + + # Validate input parameters + if not launcher: + raise ValueError,"Must specify launcher." + if not build_script: + raise ValueError,"Must specify build_script." + + f = buildbot.process.factory.BuildFactory() + + # Determine the build directory. + f.addStep( + buildbot.steps.shell.SetProperty( + name = "get.builddir", + command = ["pwd"], + property = "builddir", + description = "set build dir", + workdir = ".")) + + # Get all the source code we need for this build + for checkout in source_code: + + # Figure out from the source code check out commands where + # llvm and llvm-gcc source code directories are. + if checkout.name == 'svn-llvm': + llvm_src_dir = checkout.args.get('workdir', None) + elif checkout.name == 'svn-llvm-gcc': + llvm_gcc_src_dir = checkout.args.get('workdir', None) + + f.addStep(checkout) + + assert llvm_src_dir, \ + "Cannot retrieve where llvm source code gets checked out to." + assert llvm_gcc_src_dir, \ + "Cannot retrieve where llvm-gcc source code gets checked out to." + + # Run build script for each requested step + for step_params in build_steps: + # TODO: Validate type step_params is dict + + # Handle some of the parameters here. + scripted_step_name = step_params.pop('name', None) + scripted_step_description = step_params.pop('description', None) + scripted_step_descriptionDone = step_params.pop('descriptionDone', None) + scripted_step_extra_args = step_params.pop('extra_args', []) + scripted_step_env = step_params.pop('env', {}) + # The rest will pass through. + + assert 'command' not in step_params, "Command is generated, please do not specify it." + + # scripted_step_extra_args must be a list + if isinstance(scripted_step_extra_args, str): + scripted_step_extra_args = [scripted_step_extra_args] + + # Combine together common env and step-specific env + scripted_step_env.update(env) + step_params['env'] = scripted_step_env + + f.addStep( + ShellCommand( + name = "run.build.step." + scripted_step_name, + description = scripted_step_description, + descriptionDone = scripted_step_descriptionDone, + command = ( + [WithProperties("%(builddir)s/" + launcher)] + + [WithProperties(build_script)] + # Build script to launch + [WithProperties(llvm_src_dir)] + # TODO: Escape spaces and special charactes + [WithProperties(llvm_gcc_src_dir)] + # TODO: Escape spaces and special charactes + [WithProperties("%(builddir)s")] + # TODO: Escape spaces and special charactes + [WithProperties(scripted_step_name)] + # The requested step name + scripted_step_extra_args + # Step-specific extra args + extra_args # Common extra args + ), + **step_params)) + + if len(build_steps) == 0: # If no steps were defined. + + # Run the build_script once + f.addStep( + ShellCommand( + name="run.build.script", + command=( + [WithProperties("%(builddir)s/" + launcher)] + + [WithProperties(build_script)] + # Build script to launch + [WithProperties(llvm_src_dir)] + # TODO: Escape spaces and special charactes + [WithProperties(llvm_gcc_src_dir)] + # TODO: Escape spaces and special charactes + [WithProperties("%(builddir)s")] + # TODO: Escape spaces and special charactes + extra_args # Common extra args + ), + haltOnFailure = True, + description = "Run build script", + workdir = ".", + env = env)) + + return f From kalle.raiskila at nokia.com Mon May 10 02:38:37 2010 From: kalle.raiskila at nokia.com (Kalle Raiskila) Date: Mon, 10 May 2010 07:38:37 -0000 Subject: [llvm-commits] [llvm] r103397 - in /llvm/trunk/tools/bugpoint: ExecutionDriver.cpp ToolRunner.cpp ToolRunner.h Message-ID: <20100510073837.BD14D312800A@llvm.org> Author: kraiskil Date: Mon May 10 02:38:37 2010 New Revision: 103397 URL: http://llvm.org/viewvc/llvm-project?rev=103397&view=rev Log: Add command line option --gcc to bugpoint. Remove sending duplicate of the --gcc-tool-args parameters to gcc. Modified: llvm/trunk/tools/bugpoint/ExecutionDriver.cpp llvm/trunk/tools/bugpoint/ToolRunner.cpp llvm/trunk/tools/bugpoint/ToolRunner.h Modified: llvm/trunk/tools/bugpoint/ExecutionDriver.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/bugpoint/ExecutionDriver.cpp?rev=103397&r1=103396&r2=103397&view=diff ============================================================================== --- llvm/trunk/tools/bugpoint/ExecutionDriver.cpp (original) +++ llvm/trunk/tools/bugpoint/ExecutionDriver.cpp Mon May 10 02:38:37 2010 @@ -118,6 +118,10 @@ cl::desc("..."), cl::ZeroOrMore, cl::PositionalEatsArgs); + cl::opt + GCCBinary("gcc", cl::init("gcc"), + cl::desc("The gcc binary to use. (default 'gcc')")); + cl::list GCCToolArgv("gcc-tool-args", cl::Positional, cl::desc("..."), @@ -143,8 +147,8 @@ case AutoPick: InterpreterSel = RunCBE; Interpreter = - AbstractInterpreter::createCBE(getToolName(), Message, &ToolArgv, - &GCCToolArgv); + AbstractInterpreter::createCBE(getToolName(), Message, GCCBinary, + &ToolArgv, &GCCToolArgv); if (!Interpreter) { InterpreterSel = RunJIT; Interpreter = AbstractInterpreter::createJIT(getToolName(), Message, @@ -153,7 +157,8 @@ if (!Interpreter) { InterpreterSel = RunLLC; Interpreter = AbstractInterpreter::createLLC(getToolName(), Message, - &ToolArgv, &GCCToolArgv); + GCCBinary, &ToolArgv, + &GCCToolArgv); } if (!Interpreter) { InterpreterSel = RunLLI; @@ -173,7 +178,8 @@ case RunLLCIA: case LLC_Safe: Interpreter = AbstractInterpreter::createLLC(getToolName(), Message, - &ToolArgv, &GCCToolArgv, + GCCBinary, &ToolArgv, + &GCCToolArgv, InterpreterSel == RunLLCIA); break; case RunJIT: @@ -183,7 +189,8 @@ case RunCBE: case CBE_bug: Interpreter = AbstractInterpreter::createCBE(getToolName(), Message, - &ToolArgv, &GCCToolArgv); + GCCBinary, &ToolArgv, + &GCCToolArgv); break; case Custom: Interpreter = AbstractInterpreter::createCustom(Message, CustomExecCommand); @@ -209,6 +216,7 @@ SafeInterpreterSel = RunLLC; SafeToolArgs.push_back("--relocation-model=pic"); SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message, + GCCBinary, &SafeToolArgs, &GCCToolArgv); } @@ -219,6 +227,7 @@ SafeInterpreterSel = RunLLC; SafeToolArgs.push_back("--relocation-model=pic"); SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message, + GCCBinary, &SafeToolArgs, &GCCToolArgv); } @@ -230,6 +239,7 @@ InterpreterSel != RunCBE) { SafeInterpreterSel = RunCBE; SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message, + GCCBinary, &SafeToolArgs, &GCCToolArgv); } @@ -239,6 +249,7 @@ SafeInterpreterSel = RunLLC; SafeToolArgs.push_back("--relocation-model=pic"); SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message, + GCCBinary, &SafeToolArgs, &GCCToolArgv); } @@ -251,13 +262,13 @@ case RunLLCIA: SafeToolArgs.push_back("--relocation-model=pic"); SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message, - &SafeToolArgs, + GCCBinary, &SafeToolArgs, &GCCToolArgv, SafeInterpreterSel == RunLLCIA); break; case RunCBE: SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message, - &SafeToolArgs, + GCCBinary, &SafeToolArgs, &GCCToolArgv); break; case Custom: @@ -271,7 +282,7 @@ } if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); } - gcc = GCC::create(Message, &GCCToolArgv); + gcc = GCC::create(Message, GCCBinary, &GCCToolArgv); if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); } // If there was an error creating the selected interpreter, quit with error. Modified: llvm/trunk/tools/bugpoint/ToolRunner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/bugpoint/ToolRunner.cpp?rev=103397&r1=103396&r2=103397&view=diff ============================================================================== --- llvm/trunk/tools/bugpoint/ToolRunner.cpp (original) +++ llvm/trunk/tools/bugpoint/ToolRunner.cpp Mon May 10 02:38:37 2010 @@ -413,7 +413,6 @@ std::vector GCCArgs(ArgsForGCC); GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end()); - GCCArgs.insert(GCCArgs.end(), gccArgs.begin(), gccArgs.end()); // Assuming LLC worked, compile the result with GCC and run it. return gcc->ExecuteProgram(OutputAsmFile.str(), Args, FileKind, @@ -425,6 +424,7 @@ /// LLC *AbstractInterpreter::createLLC(const char *Argv0, std::string &Message, + const std::string &GCCBinary, const std::vector *Args, const std::vector *GCCArgs, bool UseIntegratedAssembler) { @@ -436,12 +436,12 @@ } Message = "Found llc: " + LLCPath + "\n"; - GCC *gcc = GCC::create(Message, GCCArgs); + GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs); if (!gcc) { errs() << Message << "\n"; exit(1); } - return new LLC(LLCPath, gcc, Args, GCCArgs, UseIntegratedAssembler); + return new LLC(LLCPath, gcc, Args, UseIntegratedAssembler); } //===---------------------------------------------------------------------===// @@ -593,6 +593,7 @@ /// CBE *AbstractInterpreter::createCBE(const char *Argv0, std::string &Message, + const std::string &GCCBinary, const std::vector *Args, const std::vector *GCCArgs) { sys::Path LLCPath = @@ -604,7 +605,7 @@ } Message = "Found llc: " + LLCPath.str() + "\n"; - GCC *gcc = GCC::create(Message, GCCArgs); + GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs); if (!gcc) { errs() << Message << "\n"; exit(1); @@ -852,10 +853,11 @@ /// create - Try to find the `gcc' executable /// GCC *GCC::create(std::string &Message, + const std::string &GCCBinary, const std::vector *Args) { - sys::Path GCCPath = sys::Program::FindProgramByName("gcc"); + sys::Path GCCPath = sys::Program::FindProgramByName(GCCBinary); if (GCCPath.isEmpty()) { - Message = "Cannot find `gcc' in executable directory or PATH!\n"; + Message = "Cannot find `"+ GCCBinary +"' in executable directory or PATH!\n"; return 0; } Modified: llvm/trunk/tools/bugpoint/ToolRunner.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/bugpoint/ToolRunner.h?rev=103397&r1=103396&r2=103397&view=diff ============================================================================== --- llvm/trunk/tools/bugpoint/ToolRunner.h (original) +++ llvm/trunk/tools/bugpoint/ToolRunner.h Mon May 10 02:38:37 2010 @@ -49,6 +49,7 @@ enum FileType { AsmFile, ObjectFile, CFile }; static GCC *create(std::string &Message, + const std::string &GCCBinary, const std::vector *Args); /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is @@ -87,9 +88,11 @@ class AbstractInterpreter { public: static CBE *createCBE(const char *Argv0, std::string &Message, + const std::string &GCCBinary, const std::vector *Args = 0, const std::vector *GCCArgs = 0); static LLC *createLLC(const char *Argv0, std::string &Message, + const std::string &GCCBinary, const std::vector *Args = 0, const std::vector *GCCArgs = 0, bool UseIntegratedAssembler = false); @@ -187,19 +190,16 @@ class LLC : public AbstractInterpreter { std::string LLCPath; // The path to the LLC executable. std::vector ToolArgs; // Extra args to pass to LLC. - std::vector gccArgs; // Extra args to pass to GCC. GCC *gcc; bool UseIntegratedAssembler; public: LLC(const std::string &llcPath, GCC *Gcc, const std::vector *Args, - const std::vector *GCCArgs, bool useIntegratedAssembler) : LLCPath(llcPath), gcc(Gcc), UseIntegratedAssembler(useIntegratedAssembler) { ToolArgs.clear(); if (Args) ToolArgs = *Args; - if (GCCArgs) gccArgs = *GCCArgs; } ~LLC() { delete gcc; } From kalle.raiskila at nokia.com Mon May 10 02:41:56 2010 From: kalle.raiskila at nokia.com (Kalle Raiskila) Date: Mon, 10 May 2010 10:41:56 +0300 Subject: [llvm-commits] Patch - small improvements to bugpoint In-Reply-To: References: <4BE2BDFE.1050209@nokia.com> Message-ID: <4BE7B8C4.7040102@nokia.com> Committed in 103397. Chris Lattner skrev: > On May 6, 2010, at 6:02 AM, Kalle Raiskila wrote: > >> Hi, >> >> attached is a patch with two small improvements for bugpoint that I have been using a while now. >> >> 1) introduce the --gcc command line option so the user can specify the safe gcc binary to use (e.g. clang, spu-gcc,...). If no --gcc option is given, it defaults to 'gcc', which is current behaviour. >> >> 2) the parameters passed in with --gcc-tool-args currently get passed twice to gcc. First when creating the GCC object in bugoint, and then when e.g. the LLC object uses GCC. The patch removes the second route. This would be a cosmetic change, but I frequently am passing hosting C files via the --gcc-tool-args - gcc chokes on a redefinition of the main function. > > Looks good to me. Please commit, or send me (offlist) the info requested in the Developer Policy document if you don't have commit access yet. > > -Chris > From kalle.raiskila at nokia.com Mon May 10 03:13:49 2010 From: kalle.raiskila at nokia.com (Kalle Raiskila) Date: Mon, 10 May 2010 08:13:49 -0000 Subject: [llvm-commits] [llvm] r103399 - in /llvm/trunk: lib/Target/CellSPU/SPUInstrInfo.td test/CodeGen/CellSPU/sub_ops.ll Message-ID: <20100510081349.80670312800A@llvm.org> Author: kraiskil Date: Mon May 10 03:13:49 2010 New Revision: 103399 URL: http://llvm.org/viewvc/llvm-project?rev=103399&view=rev Log: Fix encoding of 'sf' and 'sfh' instructions. Added: llvm/trunk/test/CodeGen/CellSPU/sub_ops.ll Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td?rev=103399&r1=103398&r2=103399&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td (original) +++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.td Mon May 10 03:13:49 2010 @@ -655,7 +655,7 @@ def SFHr16: RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), "sfh\t$rT, $rA, $rB", IntegerOp, - [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>; + [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>; def SFHIvec: RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), @@ -670,11 +670,11 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "sf\t$rT, $rA, $rB", IntegerOp, - [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, - [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>; + [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; def SFIvec: RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), Added: llvm/trunk/test/CodeGen/CellSPU/sub_ops.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/sub_ops.ll?rev=103399&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/CellSPU/sub_ops.ll (added) +++ llvm/trunk/test/CodeGen/CellSPU/sub_ops.ll Mon May 10 03:13:49 2010 @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=cellspu | FileCheck %s + +define i32 @subword( i32 %param1, i32 %param2) { +; Check ordering of registers ret=param1-param2 -> rt=rb-ra +; CHECK-NOT: sf $3, $3, $4 +; CHECK: sf $3, $4, $3 + %1 = sub i32 %param1, %param2 + ret i32 %1 +} + +define i16 @subhword( i16 %param1, i16 %param2) { +; Check ordering of registers ret=param1-param2 -> rt=rb-ra +; CHECK-NOT: sfh $3, $3, $4 +; CHECK: sfh $3, $4, $3 + %1 = sub i16 %param1, %param2 + ret i16 %1 +} + +define float @subfloat( float %param1, float %param2) { +; Check ordering of registers ret=param1-param2 -> rt=ra-rb +; (yes this is reverse of i32 instruction) +; CHECK-NOT: fs $3, $4, $3 +; CHECK: fs $3, $3, $4 + %1 = fsub float %param1, %param2 + ret float %1 +} From baldrick at free.fr Mon May 10 05:54:39 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 10 May 2010 10:54:39 -0000 Subject: [llvm-commits] [dragonegg] r103400 - in /dragonegg/trunk: Makefile gcc_revision_tested_with llvm-backend.cpp Message-ID: <20100510105439.5666B312800A@llvm.org> Author: baldrick Date: Mon May 10 05:54:39 2010 New Revision: 103400 URL: http://llvm.org/viewvc/llvm-project?rev=103400&view=rev Log: New tested gcc-4.6 version. Supporting this version of gcc-4.6 requires knowing the targeted gcc version, since it has diverged from gcc-4.5. Modified: dragonegg/trunk/Makefile dragonegg/trunk/gcc_revision_tested_with dragonegg/trunk/llvm-backend.cpp Modified: dragonegg/trunk/Makefile URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/Makefile?rev=103400&r1=103399&r2=103400&view=diff ============================================================================== --- dragonegg/trunk/Makefile (original) +++ dragonegg/trunk/Makefile Mon May 10 05:54:39 2010 @@ -27,6 +27,9 @@ endif GCC_PLUGIN_DIR:=$(shell $(GCC) -print-file-name=plugin) +GCC_VERSION:=$(shell $(GCC) -dumpversion) +GCC_MAJOR=$(word 1, $(subst ., ,$(GCC_VERSION))) +GCC_MINOR=$(word 2, $(subst ., ,$(GCC_VERSION))) TARGET_TRIPLE:=$(shell $(GCC) -dumpmachine) # NOTE: replace with an informative string when doing a release. @@ -47,6 +50,7 @@ CPP_OPTIONS+=$(CPPFLAGS) $(shell $(LLVM_CONFIG) --cppflags) \ -MD -MP \ -DIN_GCC -DREVISION=\"$(REVISION)\" \ + -DGCC_MAJOR=$(GCC_MAJOR) -DGCC_MINOR=$(GCC_MINOR) \ -DTARGET_NAME=\"$(TARGET_TRIPLE)\" \ -I$(SRC_DIR) -I$(GCC_PLUGIN_DIR)/include Modified: dragonegg/trunk/gcc_revision_tested_with URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/gcc_revision_tested_with?rev=103400&r1=103399&r2=103400&view=diff ============================================================================== --- dragonegg/trunk/gcc_revision_tested_with (original) +++ dragonegg/trunk/gcc_revision_tested_with Mon May 10 05:54:39 2010 @@ -1 +1 @@ -158344 +159211 Modified: dragonegg/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-backend.cpp?rev=103400&r1=103399&r2=103400&view=diff ============================================================================== --- dragonegg/trunk/llvm-backend.cpp (original) +++ dragonegg/trunk/llvm-backend.cpp Mon May 10 05:54:39 2010 @@ -92,6 +92,10 @@ #include "llvm-cache.h" } +#if (GCC_MAJOR != 4) +#error Unsupported GCC major version +#endif + // Non-zero if bytecode from PCH is successfully read. int flag_llvm_pch_read; @@ -1892,7 +1896,11 @@ } /// emit_functions - Turn all functions in the compilation unit into LLVM IR. -static void emit_functions(cgraph_node_set set) { +static void emit_functions(cgraph_node_set set +#if (GCC_MINOR > 5) + , varpool_node_set vset ATTRIBUTE_UNUSED +#endif + ) { if (errorcount || sorrycount) return; // Do not process broken code. @@ -1947,15 +1955,24 @@ NULL, /* generate_summary */ emit_functions, /* write_summary */ NULL, /* read_summary */ +#if (GCC_MINOR > 5) + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ +#else NULL, /* function_read_summary */ +#endif NULL, /* stmt_fixup */ - 0, /* TODOs */ + 0, /* function_transform_todo_flags_start */ NULL, /* function_transform */ NULL /* variable_transform */ }; /// emit_variables - Output GCC global variables to the LLVM IR. -static void emit_variables(cgraph_node_set set) { +static void emit_variables(cgraph_node_set set +#if (GCC_MINOR > 5) + , varpool_node_set vset ATTRIBUTE_UNUSED +#endif + ) { if (errorcount || sorrycount) return; // Do not process broken code. @@ -1999,7 +2016,12 @@ NULL, /* generate_summary */ emit_variables, /* write_summary */ NULL, /* read_summary */ +#if (GCC_MINOR > 5) + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ +#else NULL, /* function_read_summary */ +#endif NULL, /* stmt_fixup */ 0, /* function_transform_todo_flags_start */ NULL, /* function_transform */ @@ -2525,12 +2547,14 @@ pass_info.pos_op = PASS_POS_REPLACE; register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); +#if (GCC_MINOR < 6) // Disable any other LTO passes. pass_info.pass = &pass_ipa_null.pass; pass_info.reference_pass_name = "lto_wpa_fixup"; pass_info.ref_pass_instance_number = 0; pass_info.pos_op = PASS_POS_REPLACE; register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &pass_info); +#endif // Disable pass_lower_eh_dispatch, which runs after LLVM conversion. pass_info.pass = &pass_gimple_null.pass; From baldrick at free.fr Mon May 10 09:04:42 2010 From: baldrick at free.fr (Duncan Sands) Date: Mon, 10 May 2010 14:04:42 -0000 Subject: [llvm-commits] [dragonegg] r103401 - in /dragonegg/trunk: Makefile exports.map llvm-backend.cpp Message-ID: <20100510140442.52B1C312800A@llvm.org> Author: baldrick Date: Mon May 10 09:04:42 2010 New Revision: 103401 URL: http://llvm.org/viewvc/llvm-project?rev=103401&view=rev Log: Reduce the number of relocations when loading the plugin from more than 40000 to less than 2500. First of all, change the default visibility to hidden. However this still leaves gazillions of visible LLVM symbols. The LLVM header inclusions cannot simply be placed in a "visibility hidden" scope, since they include system headers (declaring system symbols hidden causes the plugin to not link), so use a linker script instead. Added: dragonegg/trunk/exports.map Modified: dragonegg/trunk/Makefile dragonegg/trunk/llvm-backend.cpp Modified: dragonegg/trunk/Makefile URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/Makefile?rev=103401&r1=103400&r2=103401&view=diff ============================================================================== --- dragonegg/trunk/Makefile (original) +++ dragonegg/trunk/Makefile Mon May 10 09:04:42 2010 @@ -17,13 +17,13 @@ QUIET:=@ endif -CFLAGS+=-Wall $(shell $(LLVM_CONFIG) --cflags) -CXXFLAGS+=-Wall $(shell $(LLVM_CONFIG) --cxxflags) +CFLAGS+=-Wall $(shell $(LLVM_CONFIG) --cflags) -fvisibility=hidden +CXXFLAGS+=-Wall $(shell $(LLVM_CONFIG) --cxxflags) -fvisibility=hidden ifeq ($(shell uname),Darwin) LOADABLE_MODULE_OPTIONS=-bundle -undefined dynamic_lookup else -LOADABLE_MODULE_OPTIONS=-shared +LOADABLE_MODULE_OPTIONS=-shared -Wl,--version-script=exports.map endif GCC_PLUGIN_DIR:=$(shell $(GCC) -print-file-name=plugin) Added: dragonegg/trunk/exports.map URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/exports.map?rev=103401&view=auto ============================================================================== --- dragonegg/trunk/exports.map (added) +++ dragonegg/trunk/exports.map Mon May 10 09:04:42 2010 @@ -0,0 +1,7 @@ +{ + global: + plugin_is_GPL_compatible; + plugin_init; + local: + *; +}; Modified: dragonegg/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-backend.cpp?rev=103401&r1=103400&r2=103401&view=diff ============================================================================== --- dragonegg/trunk/llvm-backend.cpp (original) +++ dragonegg/trunk/llvm-backend.cpp Mon May 10 09:04:42 2010 @@ -1564,7 +1564,9 @@ // This plugin's code is licensed under the GPLv2 or later. The LLVM libraries // use the GPL compatible University of Illinois/NCSA Open Source License. +#pragma GCC visibility push(default) int plugin_is_GPL_compatible; // This plugin is GPL compatible. +#pragma GCC visibility pop /// llvm_start_unit - Perform late initialization. This is called by GCC just @@ -2357,8 +2359,9 @@ /// plugin_init - Plugin initialization routine, called by GCC. This is the /// first code executed in the plugin (except for constructors). Configure /// the plugin and setup GCC, taking over optimization and code generation. -int plugin_init (struct plugin_name_args *plugin_info, - struct plugin_gcc_version *version) { +#pragma GCC visibility push(default) +int plugin_init(struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) { const char *plugin_name = plugin_info->base_name; struct register_pass_info pass_info; @@ -2649,3 +2652,4 @@ return 0; } +#pragma GCC visibility pop From daniel at zuster.org Mon May 10 10:29:00 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 15:29:00 -0000 Subject: [llvm-commits] [test-suite] r103404 - in /test-suite/trunk: Makefile.programs MultiSource/Benchmarks/Olden/treeadd/Makefile MultiSource/Benchmarks/Olden/treeadd/reference-output.normal MultiSource/Benchmarks/Olden/treeadd/reference-output.small Message-ID: <20100510152900.96ED33128018@llvm.org> Author: ddunbar Date: Mon May 10 10:29:00 2010 New Revision: 103404 URL: http://llvm.org/viewvc/llvm-project?rev=103404&view=rev Log: Revert "add the ability for a makefile to specify a custom reference output," it broke in places. Removed: test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.normal test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.small Modified: test-suite/trunk/Makefile.programs test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/Makefile Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=103404&r1=103403&r2=103404&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Mon May 10 10:29:00 2010 @@ -73,12 +73,6 @@ RUNTIMELIMIT := 500 endif -# If the program specified a REFERENCE_OUTPUT_FILE, they obviously want to -# USE_REFERENCE_OUTPUT. -ifdef REFERENCE_OUTPUT_FILE -USE_REFERENCE_OUTPUT := 1 -endif - # RUNSAFELY - This program simply runs another program. If the program works # correctly, this script has no effect, otherwise it will do things like print a # stack trace of a core dump. It always returns "successful" so that tests will @@ -764,17 +758,10 @@ cp $< $@ else ifdef USE_REFERENCE_OUTPUT - -# If the app wants to USE_REFERENCE_OUTPUT, but hasn't specified a file -# containing the reference output, default to programname.reference_output. -ifndef REFERENCE_OUTPUT_FILE -REFERENCE_OUTPUT_FILE = $(PROJ_SRC_DIR)/%.reference_output -endif - # In this case, we opt out of generating the native output and just # copy it from a reference output $(PROGRAMS_TO_TEST:%=Output/%.out-nat): \ -Output/%.out-nat: $(REFERENCE_OUTPUT_FILE) Output/.dir +Output/%.out-nat: $(PROJ_SRC_DIR)/%.reference_output Output/.dir cp $< $@ endif endif Modified: test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/Makefile?rev=103404&r1=103403&r2=103404&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/Makefile (original) +++ test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/Makefile Mon May 10 10:29:00 2010 @@ -6,10 +6,8 @@ ifdef SMALL_PROBLEM_SIZE RUN_OPTIONS = 20 -REFERENCE_OUTPUT_FILE = $(PROJ_SRC_DIR)/reference-output.small else RUN_OPTIONS = 22 -REFERENCE_OUTPUT_FILE = $(PROJ_SRC_DIR)/reference-output.normal endif include $(LEVEL)/MultiSource/Makefile.multisrc Removed: test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.normal URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.normal?rev=103403&view=auto ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.normal (original) +++ test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.normal (removed) @@ -1,5 +0,0 @@ -Treeadd with 22 levels on 4 processors -About to enter TreeAlloc -About to enter TreeAdd -Received result of 4194303 -exit 0 Removed: test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.small URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.small?rev=103403&view=auto ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.small (original) +++ test-suite/trunk/MultiSource/Benchmarks/Olden/treeadd/reference-output.small (removed) @@ -1,5 +0,0 @@ -Treeadd with 20 levels on 4 processors -About to enter TreeAlloc -About to enter TreeAdd -Received result of 1048575 -exit 0 From daniel at zuster.org Mon May 10 10:29:08 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 15:29:08 -0000 Subject: [llvm-commits] [test-suite] r103405 - in /test-suite/trunk/MultiSource/Benchmarks/Olden/power: Makefile power.reference_output Message-ID: <20100510152908.BA3ED312800A@llvm.org> Author: ddunbar Date: Mon May 10 10:29:08 2010 New Revision: 103405 URL: http://llvm.org/viewvc/llvm-project?rev=103405&view=rev Log: Revert "the makefiles already support reference outputs, show daniel ", it broke in places. Removed: test-suite/trunk/MultiSource/Benchmarks/Olden/power/power.reference_output Modified: test-suite/trunk/MultiSource/Benchmarks/Olden/power/Makefile Modified: test-suite/trunk/MultiSource/Benchmarks/Olden/power/Makefile URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Olden/power/Makefile?rev=103405&r1=103404&r2=103405&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Olden/power/Makefile (original) +++ test-suite/trunk/MultiSource/Benchmarks/Olden/power/Makefile Mon May 10 10:29:08 2010 @@ -4,7 +4,6 @@ CPPFLAGS = -DTORONTO LDFLAGS = -lm FP_TOLERANCE = 0.00001 -USE_REFERENCE_OUTPUT = 1 include $(LEVEL)/MultiSource/Makefile.multisrc Removed: test-suite/trunk/MultiSource/Benchmarks/Olden/power/power.reference_output URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Benchmarks/Olden/power/power.reference_output?rev=103404&view=auto ============================================================================== --- test-suite/trunk/MultiSource/Benchmarks/Olden/power/power.reference_output (original) +++ test-suite/trunk/MultiSource/Benchmarks/Olden/power/power.reference_output (removed) @@ -1,119 +0,0 @@ -Past initialization -Built tree -COMPUTED TREE -TR=0.70, TI=0.14, P0=11599.73, Q0=2349.87 -D TR-0.11, TI=0.02 -TR=0.81, TI=0.16, P0=6397.86, Q0=1288.50 -D TR--0.06, TI=-0.01 -TR=0.76, TI=0.15, P0=9580.62, Q0=1936.90 -D TR-0.06, TI=0.01 -TR=0.81, TI=0.16, P0=6698.68, Q0=1349.63 -D TR--0.05, TI=-0.01 -TR=0.77, TI=0.15, P0=9056.32, Q0=1829.78 -D TR-0.04, TI=0.01 -TR=0.81, TI=0.16, P0=7022.54, Q0=1415.44 -D TR--0.03, TI=-0.01 -TR=0.77, TI=0.16, P0=8705.80, Q0=1758.23 -D TR-0.03, TI=0.01 -TR=0.80, TI=0.16, P0=7261.30, Q0=1463.98 -D TR--0.02, TI=-0.00 -TR=0.78, TI=0.16, P0=8481.17, Q0=1712.41 -D TR-0.02, TI=0.00 -TR=0.80, TI=0.16, P0=7439.02, Q0=1500.13 -D TR--0.02, TI=-0.00 -TR=0.78, TI=0.16, P0=8305.54, Q0=1676.60 -D TR-0.01, TI=0.00 -TR=0.80, TI=0.16, P0=7565.44, Q0=1525.86 -D TR--0.01, TI=-0.00 -TR=0.78, TI=0.16, P0=8192.23, Q0=1653.51 -D TR-0.01, TI=0.00 -TR=0.79, TI=0.16, P0=7658.60, Q0=1544.82 -D TR--0.01, TI=-0.00 -TR=0.79, TI=0.16, P0=8111.11, Q0=1636.97 -D TR-0.01, TI=0.00 -TR=0.79, TI=0.16, P0=7725.99, Q0=1558.54 -D TR--0.01, TI=-0.00 -TR=0.79, TI=0.16, P0=8052.56, Q0=1625.04 -D TR-0.01, TI=0.00 -TR=0.79, TI=0.16, P0=7774.66, Q0=1568.44 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=8010.15, Q0=1616.40 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7809.89, Q0=1575.62 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7979.58, Q0=1610.18 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7835.31, Q0=1580.79 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7957.55, Q0=1605.69 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7853.63, Q0=1584.52 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7941.66, Q0=1602.45 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7866.84, Q0=1587.22 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7930.20, Q0=1600.12 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7876.37, Q0=1589.16 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7921.95, Q0=1598.44 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7883.24, Q0=1590.55 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7916.00, Q0=1597.23 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7888.19, Q0=1591.56 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7911.70, Q0=1596.35 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7891.76, Q0=1592.29 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7908.61, Q0=1595.72 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7894.33, Q0=1592.81 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7906.38, Q0=1595.27 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7896.19, Q0=1593.19 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7904.77, Q0=1594.94 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7897.41, Q0=1593.44 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7903.70, Q0=1594.72 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7898.33, Q0=1593.63 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7902.89, Q0=1594.56 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7899.02, Q0=1593.77 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7902.30, Q0=1594.44 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7899.53, Q0=1593.87 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7901.87, Q0=1594.35 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7899.91, Q0=1593.95 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7901.54, Q0=1594.28 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7900.19, Q0=1594.01 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7901.30, Q0=1594.23 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7900.39, Q0=1594.05 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7901.12, Q0=1594.20 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7900.55, Q0=1594.08 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7900.99, Q0=1594.17 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7900.66, Q0=1594.10 -D TR--0.00, TI=-0.00 -TR=0.79, TI=0.16, P0=7900.89, Q0=1594.15 -D TR-0.00, TI=0.00 -TR=0.79, TI=0.16, P0=7900.75, Q0=1594.12 -exit 0 From csdavec at swan.ac.uk Mon May 10 10:52:26 2010 From: csdavec at swan.ac.uk (David Chisnall) Date: Mon, 10 May 2010 16:52:26 +0100 Subject: [llvm-commits] Make library interface pass for review In-Reply-To: References: <6C17D5AF-F163-4746-8B3E-BD26BC8F0355@swan.ac.uk> Message-ID: On 9 May 2010, at 19:23, Chris Lattner wrote: > > On May 8, 2010, at 1:46 PM, David Chisnall wrote: > >> Hi, >> >> Does anyone object if I commit the attached diff? It's a work-in-progress pass that prepares the bitcode used to compile a shared library for use by the inliner. After running the pass, the resulting bitcode can be linked into code that links against the library and used for inlining and any other related optimisations. >> >> In theory, it strips out anything that depends on globals that are not exposed outside of the module and marks everything as available_externally. In practice, there are almost certainly some cases that I've missed (reports welcome)... >> >> David > > A few requests: > > Please document this in Passes.html Done. > +++ lib/Transforms/IPO/MakeRuntimeLibraryInterface.cpp (revision 0) > @@ -0,0 +1,201 @@ > +#include "llvm/Pass.h" > +#include "llvm/Function.h" > +#include "llvm/Module.h" > +#include "llvm/LLVMContext.h" > +#include "llvm/Instructions.h" > +#include "llvm/Constants.h" > +#include "llvm/GlobalVariable.h" > > You need a file header commend, please prune out the redundant includes you don't need. Fixed / added. > +namespace > +{ > + class MakeRuntimeLibraryInterface : public ModulePass > + { > > Please put the { on the same line, conforming to the predominate style. Fixed. > + for (Module::iterator I=M.begin(), E=M.end() ; > + I!=E ; ++I) { > > Please use whitespace like the rest of llvm :). You don't need braces if the for loop contains a single statement. Fixed > Please remove ProcessGlobals / IteratorMethod until and when they are needed. Ooops. 'when they are needed' is in the past - I thought I'd removed them already. > > + switch (V->getLinkage()) { > + case GlobalValue::ExternalLinkage: > + case GlobalValue::DLLExportLinkage: > + case GlobalValue::ExternalWeakLinkage: > + case GlobalValue::CommonLinkage: > + V->setLinkage(GlobalValue::AvailableExternallyLinkage); > + return true; > + case GlobalValue::AvailableExternallyLinkage: > + // Already done > + case GlobalValue::WeakAnyLinkage: > + case GlobalValue::WeakODRLinkage: > + case GlobalValue::LinkOnceAnyLinkage: > + case GlobalValue::LinkOnceODRLinkage: > + // Copy link-once and weak stuff > + case GlobalValue::AppendingLinkage: > + case GlobalValue::InternalLinkage: > + case GlobalValue::PrivateLinkage: > + case GlobalValue::LinkerPrivateLinkage: > + case GlobalValue::DLLImportLinkage: > + // Ignore internal symbols here. > + return false; > > Use predicates (like hasLocalLinkage) instead of enumerating all of this stuff. Okay. > Also, you should be able to handle linkonce_odr and weak_odr. Probably the _any versions too... those should be visible externally, I think. Can we just turn all of these into > Since you have several *different* places where your reasoning about the same linkage types, you should probably write your own predicates as a static inline function in your file, instead of duplicating the logic all over. I currently have three switch block, each testing for a different one of these conditions. I think, in fact, that some of this logic was wrong (I said it was a work in progress). The new version now only has two cases: 1) is something visible externally. 2) Is something that is visible externally something that should not be initialized here. I've factored 1) out into an inline function, because it's used in two places, but I've left 2) inline. > Please resend this for another iteration after you incorporate these changes, Attached. I'm still not 100% convinced that the logic is correct, especially with respect to handling linkonce linkage types. Should they be turned into something else and their initialisers removed, or should they be left as is? I'm now leaving them as-is, but I'm not convinced that this is the right thing to do. David -- This email complies with ISO 3103 -------------- next part -------------- A non-text attachment was scrubbed... Name: MakeRuntimeLibraryInterface.diff Type: application/octet-stream Size: 11175 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100510/7e2841c9/attachment.obj From anton at korobeynikov.info Mon May 10 11:58:56 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Mon, 10 May 2010 20:58:56 +0400 Subject: [llvm-commits] [llvm] r103373 - /llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp In-Reply-To: <20100509055228.4CD06312800A@llvm.org> References: <20100509055228.4CD06312800A@llvm.org> Message-ID: Hello, Nathan > made COFF target dllexport logic apply to all subtargets Why? It does not make any sense to emit something coff-specific on linux/darwin. Please revert. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From anton at korobeynikov.info Mon May 10 11:59:51 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Mon, 10 May 2010 20:59:51 +0400 Subject: [llvm-commits] COFF dllexport path In-Reply-To: References: Message-ID: > looks good to me, please commit! The change in question is invalid: it does not make any sense to emit COFF-specific stuff on ELF/MachO -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From bob.wilson at apple.com Mon May 10 12:14:27 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Mon, 10 May 2010 17:14:27 -0000 Subject: [llvm-commits] [llvm] r103407 - /llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp Message-ID: <20100510171427.2337D312800A@llvm.org> Author: bwilson Date: Mon May 10 12:14:26 2010 New Revision: 103407 URL: http://llvm.org/viewvc/llvm-project?rev=103407&view=rev Log: Fix PR7096. When a block containing multiple defs is tail duplicated, the SSAUpdater for the value from the first def may see uses of undefined values, because the later defs will not have been updated yet. Modified: llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp Modified: llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp?rev=103407&r1=103406&r2=103407&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp Mon May 10 12:14:26 2010 @@ -329,7 +329,7 @@ /// InstrIsPHI - Check if an instruction is a PHI. /// static MachineInstr *InstrIsPHI(MachineInstr *I) { - if (I->isPHI()) + if (I && I->isPHI()) return I; return 0; } From dalej at apple.com Mon May 10 12:26:14 2010 From: dalej at apple.com (Dale Johannesen) Date: Mon, 10 May 2010 10:26:14 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r103366 - in /llvm-gcc-4.2/trunk/gcc: config/darwin-c.c config/t-darwin testsuite/gcc.apple/opt-pragma-1.c testsuite/gcc.apple/opt-pragma-2.c testsuite/gcc.apple/opt-pragma-3.c testsuite/gcc.apple/opt-pragma-4.c testsuite/gcc.apple/opt-pragma-5.c testsuite/gcc.apple/opt-pragma-6.c testsuite/gcc.apple/opt-pragma-7.c In-Reply-To: References: <20100508234321.246CF312800A@llvm.org> Message-ID: On May 9, 2010, at 10:37 AMPDT, Chris Lattner wrote: > On May 9, 2010, at 9:22 AM, Dale Johannesen wrote: > On May 8, 2010, at 4:43 PM, Eric Christopher wrote: >> >>> Author: echristo >>> Date: Sat May 8 18:43:20 2010 >>> New Revision: 103366 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=103366&view=rev >>> Log: >>> Disable optimization pragmas. >>> >>> Fixes rdar://7820636 >> >> There are people using these. You will certainly get complaints. > > They don't work in llvm-gcc and clang doesn't support them. It seems no harm. It may be true that we don't need the functionality if nobody has noticed it doesn't work. But now they silently don't work, and the change gives a warning. This will break projects that build with -Werror. From evan.cheng at apple.com Mon May 10 12:33:49 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 17:33:49 -0000 Subject: [llvm-commits] [llvm] r103410 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Message-ID: <20100510173349.7D74B312800A@llvm.org> Author: evancheng Date: Mon May 10 12:33:49 2010 New Revision: 103410 URL: http://llvm.org/viewvc/llvm-project?rev=103410&view=rev Log: Re-defined valno is always valno even for partial re-def's. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=103410&r1=103409&r2=103410&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon May 10 12:33:49 2010 @@ -432,8 +432,7 @@ // that at this point, there should be exactly one value number in it. assert((PartReDef || interval.containsOneValue()) && "Unexpected 2-addr liveint!"); - unsigned NumVals = interval.getNumValNums(); - SlotIndex DefIndex = interval.getValNumInfo(NumVals-1)->def.getDefIndex(); + SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex(); SlotIndex RedefIndex = MIIdx.getDefIndex(); if (MO.isEarlyClobber()) RedefIndex = MIIdx.getUseIndex(); From evan.cheng at apple.com Mon May 10 12:34:18 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 17:34:18 -0000 Subject: [llvm-commits] [llvm] r103411 - in /llvm/trunk/lib/Target/ARM: ARMISelDAGToDAG.cpp ARMISelLowering.cpp NEONPreAllocPass.cpp Message-ID: <20100510173418.AF73D312800A@llvm.org> Author: evancheng Date: Mon May 10 12:34:18 2010 New Revision: 103411 URL: http://llvm.org/viewvc/llvm-project?rev=103411&view=rev Log: Model vld2 / vst2 with reg_sequence. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103411&r1=103410&r2=103411&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon May 10 12:34:18 2010 @@ -172,9 +172,17 @@ char ConstraintCode, std::vector &OutOps); - /// PairDRegs - Insert a pair of double registers into an implicit def to - /// form a quad register. + /// PairDRegs - Form a quad register from a pair of D registers. + /// SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); + + /// PairDRegs - Form a quad register pair from a pair of Q registers. + /// + SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); + + /// QuadDRegs - Form a quad register pair from a quad of D registers. + /// + SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); }; } @@ -942,8 +950,8 @@ return NULL; } -/// PairDRegs - Insert a pair of double registers into an implicit def to -/// form a quad register. +/// PairDRegs - Form a quad register from a pair of D registers. +/// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); @@ -960,6 +968,29 @@ VT, SDValue(Pair, 0), V1, SubReg1); } +/// PairDRegs - Form a quad register pair from a pair of Q registers. +/// +SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::QSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::QSUBREG_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + +/// QuadDRegs - Form a octo register from a quad of D registers. +/// +SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::DSUBREG_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::DSUBREG_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type /// for a 64-bit subregister of the vector. static EVT GetNEONSubregVT(EVT VT) { @@ -1028,9 +1059,24 @@ Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); + } else { + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::QSUBREG_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::QSUBREG_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); + } + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1118,17 +1164,40 @@ // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + if (llvm::ModelWithRegSequence() && NumVecs == 2) { + // First extract the quad D registers. + SDValue Q0 = N->getOperand(3); + SDValue Q1 = N->getOperand(4); + + // Form a QQ register. + SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0); + + // Now extract the D registers back out. Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); + QQ)); Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3))); + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_2, dl, RegVT, + QQ)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_3, dl, RegVT, + QQ)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), + 5 + 2 * NumVecs); } - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), - 5 + 2 * NumVecs); } // Otherwise, quad registers are stored with two separate instructions, Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103411&r1=103410&r2=103411&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Mon May 10 12:34:18 2010 @@ -266,6 +266,11 @@ addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); + // Map v4i64 to QQ registers but do not make the type legal for any + // operations. v4i64 is only used for REG_SEQUENCE to load / store quad + // D registers. + addRegisterClass(MVT::v4i64, ARM::QQPRRegisterClass); + // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. setOperationAction(ISD::FADD, MVT::v2f64, Expand); Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103411&r1=103410&r2=103411&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Mon May 10 12:34:18 2010 @@ -360,7 +360,7 @@ return false; RegSeq = UseMI; } else { - // Extracting from a Q register. + // Extracting from a Q or QQ register. MachineInstr *DefMI = MRI->getVRegDef(VirtReg); if (!DefMI || !DefMI->isExtractSubreg()) return false; @@ -368,8 +368,11 @@ if (LastSrcReg && LastSrcReg != VirtReg) return false; const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (RC != ARM::QPRRegisterClass) - return false; + if (NumRegs == 2) { + if (RC != ARM::QPRRegisterClass) + return false; + } else if (RC != ARM::QQPRRegisterClass) + return false; unsigned SubIdx = DefMI->getOperand(2).getImm(); if (LastSubIdx && LastSubIdx != SubIdx-1) return false; From bob.wilson at apple.com Mon May 10 12:42:49 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Mon, 10 May 2010 10:42:49 -0700 Subject: [llvm-commits] [llvm] r103407 - /llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp In-Reply-To: <20100510171427.2337D312800A@llvm.org> References: <20100510171427.2337D312800A@llvm.org> Message-ID: <1CAFD0AA-8FAC-4741-AA2F-853D326FCE94@apple.com> I forgot to mention that I was unable to get a testcase to commit for this. I attached a reduced bitcode file to pr7096, but the problem is masked if I convert that file to a .ll file. Apparently it depends on some use-list ordering to expose the problem. On May 10, 2010, at 10:14 AM, Bob Wilson wrote: > Author: bwilson > Date: Mon May 10 12:14:26 2010 > New Revision: 103407 > > URL: http://llvm.org/viewvc/llvm-project?rev=103407&view=rev > Log: > Fix PR7096. When a block containing multiple defs is tail duplicated, the > SSAUpdater for the value from the first def may see uses of undefined values, > because the later defs will not have been updated yet. > > Modified: > llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp > > Modified: llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp?rev=103407&r1=103406&r2=103407&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp (original) > +++ llvm/trunk/lib/CodeGen/MachineSSAUpdater.cpp Mon May 10 12:14:26 2010 > @@ -329,7 +329,7 @@ > /// InstrIsPHI - Check if an instruction is a PHI. > /// > static MachineInstr *InstrIsPHI(MachineInstr *I) { > - if (I->isPHI()) > + if (I && I->isPHI()) > return I; > return 0; > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From clattner at apple.com Mon May 10 12:44:44 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 10 May 2010 10:44:44 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r103366 - in /llvm-gcc-4.2/trunk/gcc: config/darwin-c.c config/t-darwin testsuite/gcc.apple/opt-pragma-1.c testsuite/gcc.apple/opt-pragma-2.c testsuite/gcc.apple/opt-pragma-3.c testsuite/gcc.apple/opt-pragma-4.c testsuite/gcc.apple/opt-pragma-5.c testsuite/gcc.apple/opt-pragma-6.c testsuite/gcc.apple/opt-pragma-7.c In-Reply-To: References: <20100508234321.246CF312800A@llvm.org> Message-ID: <814D134F-F75D-43CC-9620-6E1D047713E1@apple.com> On May 10, 2010, at 10:26 AM, Dale Johannesen wrote: >> >> They don't work in llvm-gcc and clang doesn't support them. It seems no harm. > > It may be true that we don't need the functionality if nobody has noticed it doesn't work. But now they silently don't work, and the change gives a warning. This will break projects that build with -Werror. Producing a warning that we don't support it seems like a good thing. -Chris From clattner at apple.com Mon May 10 12:45:10 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 10 May 2010 10:45:10 -0700 Subject: [llvm-commits] COFF dllexport path In-Reply-To: References: Message-ID: On May 10, 2010, at 9:59 AM, Anton Korobeynikov wrote: >> looks good to me, please commit! > The change in question is invalid: it does not make any sense to emit > COFF-specific stuff on ELF/MachO It is guarded above to only happen on coff systems. -Chris From clattner at apple.com Mon May 10 12:45:25 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 10 May 2010 10:45:25 -0700 Subject: [llvm-commits] [llvm] r103373 - /llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp In-Reply-To: References: <20100509055228.4CD06312800A@llvm.org> Message-ID: <6D47C5E0-59B0-4470-BAAE-E9A76D38D26D@apple.com> On May 10, 2010, at 9:58 AM, Anton Korobeynikov wrote: > Hello, Nathan > >> made COFF target dllexport logic apply to all subtargets > Why? It does not make any sense to emit something coff-specific on linux/darwin. > > Please revert. It only happens on COFF. -Chris From anton at korobeynikov.info Mon May 10 12:46:18 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Mon, 10 May 2010 21:46:18 +0400 Subject: [llvm-commits] COFF dllexport path In-Reply-To: References: Message-ID: > It is guarded above to only happen on coff systems. Ah, ok then. Sorry for noise. I'm starting to review all COFF-related stuff submitted recently. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From criswell at uiuc.edu Mon May 10 12:45:31 2010 From: criswell at uiuc.edu (John Criswell) Date: Mon, 10 May 2010 17:45:31 -0000 Subject: [llvm-commits] [poolalloc] r103413 - /poolalloc/branches/release_26/runtime/FL2Allocator/PoolAllocator.cpp Message-ID: <20100510174531.9470F312800A@llvm.org> Author: criswell Date: Mon May 10 12:45:31 2010 New Revision: 103413 URL: http://llvm.org/viewvc/llvm-project?rev=103413&view=rev Log: Fixed compilation warning and potential functional error in poolrealloc(). Modified: poolalloc/branches/release_26/runtime/FL2Allocator/PoolAllocator.cpp Modified: poolalloc/branches/release_26/runtime/FL2Allocator/PoolAllocator.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/branches/release_26/runtime/FL2Allocator/PoolAllocator.cpp?rev=103413&r1=103412&r2=103413&view=diff ============================================================================== --- poolalloc/branches/release_26/runtime/FL2Allocator/PoolAllocator.cpp (original) +++ poolalloc/branches/release_26/runtime/FL2Allocator/PoolAllocator.cpp Mon May 10 12:45:31 2010 @@ -896,6 +896,7 @@ pthread_mutex_lock(&Pool->pool_lock); void* to_return = poolrealloc_internal(Pool, Node, NumBytes); pthread_mutex_unlock(&Pool->pool_lock); + return to_return; } #ifdef USE_DYNCALL From blunted2night at gmail.com Mon May 10 12:48:27 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Mon, 10 May 2010 10:48:27 -0700 Subject: [llvm-commits] [llvm] r103373 - /llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp In-Reply-To: <6D47C5E0-59B0-4470-BAAE-E9A76D38D26D@apple.com> References: <20100509055228.4CD06312800A@llvm.org> <6D47C5E0-59B0-4470-BAAE-E9A76D38D26D@apple.com> Message-ID: <9BE0C54DE5FB4BDD80E5D0A3AEAFF06C@N8DEV> -------------------------------------------------- From: "Chris Lattner" Sent: Monday, May 10, 2010 10:45 AM To: "Anton Korobeynikov" Cc: "Nathan Jeffords" ; Subject: Re: [llvm-commits] [llvm] r103373 - /llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp > > On May 10, 2010, at 9:58 AM, Anton Korobeynikov wrote: > >> Hello, Nathan >> >>> made COFF target dllexport logic apply to all subtargets >> Why? It does not make any sense to emit something coff-specific on >> linux/darwin. >> >> Please revert. > > It only happens on COFF. > > -Chris I apologize, my commit message was not precise enough. -Nathan From dpatel at apple.com Mon May 10 12:49:04 2010 From: dpatel at apple.com (Devang Patel) Date: Mon, 10 May 2010 17:49:04 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103414 - /llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Message-ID: <20100510174904.E5EBD312800A@llvm.org> Author: dpatel Date: Mon May 10 12:49:04 2010 New Revision: 103414 URL: http://llvm.org/viewvc/llvm-project?rev=103414&view=rev Log: Do not emit variable debug info if debug info for corresponding type is not available. This fixes PR 7104 crash. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=103414&r1=103413&r2=103414&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Mon May 10 12:49:04 2010 @@ -459,6 +459,9 @@ DIType Ty = getOrCreateType(type); if (DECL_ARTIFICIAL (decl)) Ty = DebugFactory.CreateArtificialType(Ty); + // If type info is not available then do not emit debug info for this var. + if (!Ty) + return; llvm::DIVariable D = DebugFactory.CreateVariable(Tag, VarScope, Name, getOrCreateFile(Loc.file), @@ -1169,6 +1172,7 @@ // gen_type_die(TYPE_OFFSET_BASETYPE(type), context_die); // gen_type_die(TREE_TYPE(type), context_die); // gen_ptr_to_mbr_type_die(type, context_die); + // PR 7104 break; } From dpatel at apple.com Mon May 10 12:49:40 2010 From: dpatel at apple.com (Devang Patel) Date: Mon, 10 May 2010 17:49:40 -0000 Subject: [llvm-commits] [llvm] r103415 - /llvm/trunk/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp Message-ID: <20100510174940.BE1D6312800A@llvm.org> Author: dpatel Date: Mon May 10 12:49:40 2010 New Revision: 103415 URL: http://llvm.org/viewvc/llvm-project?rev=103415&view=rev Log: Test case for 103414. Added: llvm/trunk/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp Added: llvm/trunk/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC%2B%2B/2010-05-10-Var-DbgInfo.cpp?rev=103415&view=auto ============================================================================== --- llvm/trunk/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp (added) +++ llvm/trunk/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp Mon May 10 12:49:40 2010 @@ -0,0 +1,43 @@ +// RUN: %llvmgcc -S -O0 -g %s -o /dev/null +// PR 7104 + +struct A { + int Ai; +}; + +struct B : public A {}; +struct C : public B {}; + +const char * f(int C::*){ return ""; } +int f(int B::*) { return 1; } + +struct D : public C {}; + +const char * g(int B::*){ return ""; } +int g(int D::*) { return 1; } + +void test() +{ + int i = f(&A::Ai); + + const char * str = g(&A::Ai); +} + +// conversion of B::* to C::* is better than conversion of A::* to C::* +typedef void (A::*pmfa)(); +typedef void (B::*pmfb)(); +typedef void (C::*pmfc)(); + +struct X { + operator pmfa(); + operator pmfb(); +}; + + +void g(pmfc); + +void test2(X x) +{ + g(x); +} + From sabre at nondot.org Mon May 10 12:49:52 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 17:49:52 -0000 Subject: [llvm-commits] [test-suite] r103416 - /test-suite/trunk/Makefile.programs Message-ID: <20100510174952.4D95A312800A@llvm.org> Author: lattner Date: Mon May 10 12:49:52 2010 New Revision: 103416 URL: http://llvm.org/viewvc/llvm-project?rev=103416&view=rev Log: add support for REFERENCE_OUTPUT_FILE back, even if treeadd can't use it. Modified: test-suite/trunk/Makefile.programs Modified: test-suite/trunk/Makefile.programs URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/Makefile.programs?rev=103416&r1=103415&r2=103416&view=diff ============================================================================== --- test-suite/trunk/Makefile.programs (original) +++ test-suite/trunk/Makefile.programs Mon May 10 12:49:52 2010 @@ -73,6 +73,12 @@ RUNTIMELIMIT := 500 endif +# If the program specified a REFERENCE_OUTPUT_FILE, they obviously want to +# USE_REFERENCE_OUTPUT. +ifdef REFERENCE_OUTPUT_FILE +USE_REFERENCE_OUTPUT := 1 +endif + # RUNSAFELY - This program simply runs another program. If the program works # correctly, this script has no effect, otherwise it will do things like print a # stack trace of a core dump. It always returns "successful" so that tests will @@ -181,9 +187,7 @@ DISABLE_DIFFS := 1 endif -ifndef DISABLE_LLC -all:: $(LLCCODEGEN) -else +ifdef DISABLE_LLC DISABLE_LLC_DIFFS = 1 endif @@ -758,10 +762,17 @@ cp $< $@ else ifdef USE_REFERENCE_OUTPUT + +# If the app wants to USE_REFERENCE_OUTPUT, but hasn't specified a file +# containing the reference output, default to programname.reference_output. +ifndef REFERENCE_OUTPUT_FILE +REFERENCE_OUTPUT_FILE = $(PROJ_SRC_DIR)/%.reference_output +endif + # In this case, we opt out of generating the native output and just # copy it from a reference output $(PROGRAMS_TO_TEST:%=Output/%.out-nat): \ -Output/%.out-nat: $(PROJ_SRC_DIR)/%.reference_output Output/.dir +Output/%.out-nat: $(REFERENCE_OUTPUT_FILE) Output/.dir cp $< $@ endif endif From blunted2night at gmail.com Mon May 10 13:42:50 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Mon, 10 May 2010 11:42:50 -0700 Subject: [llvm-commits] patch to add a modifyFlags function to MCSymbolData Message-ID: I am requesting approval to commit this patch to trunk It adds a trivial function to modify the flags value of MCSymbolData. The function takes the value and a mask, and clears the mask bits before applying the value. -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100510/5ac26010/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: MCSymbolData-modifyFlags.patch Type: application/octet-stream Size: 604 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100510/5ac26010/attachment.obj From gohman at apple.com Mon May 10 13:44:04 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 10 May 2010 11:44:04 -0700 Subject: [llvm-commits] [llvm] r103257 - in /llvm/trunk: include/llvm/LinkAllPasses.h include/llvm/Transforms/Scalar.h lib/Transforms/Scalar/Sink.cpp test/Transforms/Sink/ test/Transforms/Sink/basic.ll test/Transforms/Sink/dg.exp In-Reply-To: References: <20100507154013.E14C0312800A@llvm.org> Message-ID: <37F54FA9-00D8-4E95-9399-91F8B3DBC86E@apple.com> On May 7, 2010, at 1:22 PM, Eli Friedman wrote: > On Fri, May 7, 2010 at 8:40 AM, Dan Gohman wrote: >> Author: djg >> Date: Fri May 7 10:40:13 2010 >> New Revision: 103257 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=103257&view=rev >> Log: >> Add an LLVM IR version of code sinking. This uses the same simple algorithm >> as MachineSink, but it isn't constrained by MachineInstr-level details. > > Interesting... done any benchmarking yet? It helps a whole bunch in one testcase. It doesn't make much difference in a bunch of others. > It looks like this pass could potentially sink allocas out of the > entry block; am I missing something? I believe the isSafeToMove function handles that, with the isSafeToSpeculativelyExecute check. Dan From evan.cheng at apple.com Mon May 10 14:03:57 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 19:03:57 -0000 Subject: [llvm-commits] [llvm] r103419 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll Message-ID: <20100510190358.115A4312800A@llvm.org> Author: evancheng Date: Mon May 10 14:03:57 2010 New Revision: 103419 URL: http://llvm.org/viewvc/llvm-project?rev=103419&view=rev Log: Be careful with operand promotion. For a binary operation, the source operands may be the same. PR7018. rdar://7939869. Added: llvm/trunk/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=103419&r1=103418&r2=103419&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon May 10 14:03:57 2010 @@ -760,12 +760,18 @@ bool Replace1 = false; SDValue N1 = Op.getOperand(1); - SDValue NN1 = PromoteOperand(N1, PVT, Replace1); - if (NN1.getNode() == 0) - return SDValue(); + SDValue NN1; + if (N0 == N1) + NN1 = NN0; + else { + NN1 = PromoteOperand(N1, PVT, Replace1); + if (NN1.getNode() == 0) + return SDValue(); + } AddToWorkList(NN0.getNode()); - AddToWorkList(NN1.getNode()); + if (NN1.getNode()) + AddToWorkList(NN1.getNode()); if (Replace0) ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); Added: llvm/trunk/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll?rev=103419&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll (added) +++ llvm/trunk/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll Mon May 10 14:03:57 2010 @@ -0,0 +1,11 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 +; PR7018 +; rdar://7939869 + +define i32 @CXB30130(i32 %num1, i16* nocapture %num2, float* nocapture %num3, double* nocapture %num4) nounwind ssp { +entry: + %0 = load i16* %num2, align 2 ; [#uses=2] + %1 = mul nsw i16 %0, %0 ; [#uses=1] + store i16 %1, i16* %num2, align 2 + ret i32 undef +} From gohman at apple.com Mon May 10 14:47:21 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 10 May 2010 19:47:21 -0000 Subject: [llvm-commits] [llvm] r103420 - /llvm/trunk/include/llvm/Constants.h Message-ID: <20100510194721.9B17A312800A@llvm.org> Author: djg Date: Mon May 10 14:47:21 2010 New Revision: 103420 URL: http://llvm.org/viewvc/llvm-project?rev=103420&view=rev Log: Delete an obsolete comment. Modified: llvm/trunk/include/llvm/Constants.h Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=103420&r1=103419&r2=103420&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Mon May 10 14:47:21 2010 @@ -691,9 +691,6 @@ // ConstantExpr class, because they will attempt to fold the constant // expression into something simpler if possible. - /// Cast constant expr - /// - /// getAlignOf constant expr - computes the alignment of a type in a target /// independent way (Note: the return type is an i64). static Constant *getAlignOf(const Type* Ty); From gohman at apple.com Mon May 10 15:07:44 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 10 May 2010 20:07:44 -0000 Subject: [llvm-commits] [llvm] r103422 - /llvm/trunk/lib/Analysis/DebugInfo.cpp Message-ID: <20100510200744.A4B1F312800A@llvm.org> Author: djg Date: Mon May 10 15:07:44 2010 New Revision: 103422 URL: http://llvm.org/viewvc/llvm-project?rev=103422&view=rev Log: Fix whitespace in debug output to be consistent. Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=103422&r1=103421&r2=103422&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Mon May 10 15:07:44 2010 @@ -466,7 +466,7 @@ if (getLanguage()) OS << " [" << dwarf::LanguageString(getLanguage()) << "] "; - OS << " [" << getDirectory() << "/" << getFilename() << " ]"; + OS << " [" << getDirectory() << "/" << getFilename() << "]"; } /// print - Print type. From eli.friedman at gmail.com Mon May 10 15:12:59 2010 From: eli.friedman at gmail.com (Eli Friedman) Date: Mon, 10 May 2010 13:12:59 -0700 Subject: [llvm-commits] [llvm] r103257 - in /llvm/trunk: include/llvm/LinkAllPasses.h include/llvm/Transforms/Scalar.h lib/Transforms/Scalar/Sink.cpp test/Transforms/Sink/ test/Transforms/Sink/basic.ll test/Transforms/Sink/dg.exp In-Reply-To: <37F54FA9-00D8-4E95-9399-91F8B3DBC86E@apple.com> References: <20100507154013.E14C0312800A@llvm.org> <37F54FA9-00D8-4E95-9399-91F8B3DBC86E@apple.com> Message-ID: On Mon, May 10, 2010 at 11:44 AM, Dan Gohman wrote: > > On May 7, 2010, at 1:22 PM, Eli Friedman wrote: > >> On Fri, May 7, 2010 at 8:40 AM, Dan Gohman wrote: >>> Author: djg >>> Date: Fri May ?7 10:40:13 2010 >>> New Revision: 103257 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=103257&view=rev >>> Log: >>> Add an LLVM IR version of code sinking. This uses the same simple algorithm >>> as MachineSink, but it isn't constrained by MachineInstr-level details. >> >> Interesting... done any benchmarking yet? > > It helps a whole bunch in one testcase. It doesn't make much difference in > a bunch of others. > >> It looks like this pass could potentially sink allocas out of the >> entry block; am I missing something? > > I believe the isSafeToMove function handles that, with the > isSafeToSpeculativelyExecute check. Ah, right, I missed that bit. -Eli From daniel at zuster.org Mon May 10 15:11:56 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 20:11:56 -0000 Subject: [llvm-commits] [llvm] r103423 - in /llvm/trunk: Makefile.config.in Makefile.rules autoconf/configure.ac configure include/llvm/Config/config.h.in lib/Support/CommandLine.cpp Message-ID: <20100510201156.B03C5312800A@llvm.org> Author: ddunbar Date: Mon May 10 15:11:56 2010 New Revision: 103423 URL: http://llvm.org/viewvc/llvm-project?rev=103423&view=rev Log: Add new configure option, --disable-timestamps, intended to turn off anything which would mess up binary/object comparisons. Currently: - Disables 'Built on ...' in 'foo --version'. - Disables timestamps from being embedded into .dir files. Modified: llvm/trunk/Makefile.config.in llvm/trunk/Makefile.rules llvm/trunk/autoconf/configure.ac llvm/trunk/configure llvm/trunk/include/llvm/Config/config.h.in llvm/trunk/lib/Support/CommandLine.cpp Modified: llvm/trunk/Makefile.config.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/Makefile.config.in?rev=103423&r1=103422&r2=103423&view=diff ============================================================================== --- llvm/trunk/Makefile.config.in (original) +++ llvm/trunk/Makefile.config.in Mon May 10 15:11:56 2010 @@ -270,6 +270,9 @@ # Use -fvisibility-inlines-hidden? ENABLE_VISIBILITY_INLINES_HIDDEN := @ENABLE_VISIBILITY_INLINES_HIDDEN@ +# Do we want to allow timestamping information into builds? +ENABLE_TIMESTAMPS := @ENABLE_TIMESTAMPS@ + # This option tells the Makefiles to produce verbose output. # It essentially prints the commands that make is executing #VERBOSE = 1 Modified: llvm/trunk/Makefile.rules URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/Makefile.rules?rev=103423&r1=103422&r2=103423&view=diff ============================================================================== --- llvm/trunk/Makefile.rules (original) +++ llvm/trunk/Makefile.rules Mon May 10 15:11:56 2010 @@ -447,6 +447,14 @@ endif endif +# Support makefile variable to disable any kind of timestamp/non-deterministic +# info from being used in the build. +ifeq ($(ENABLE_TIMESTAMPS),1) + DOTDIR_TIMESTAMP_COMMAND := $(DATE) +else + DOTDIR_TIMESTAMP_COMMAND := echo 'Created.' +endif + ifeq ($(HOST_OS),MingW) # Work around PR4957 CPP.Defines += -D__NO_CTYPE_INLINE @@ -779,7 +787,7 @@ # To create other directories, as needed, and timestamp their creation %/.dir: $(Verb) $(MKDIR) $* > /dev/null - $(Verb) $(DATE) > $@ + $(Verb) $(DOTDIR_TIMESTAMP_COMMAND) > $@ .PRECIOUS: $(ObjDir)/.dir $(LibDir)/.dir $(ToolDir)/.dir $(ExmplDir)/.dir .PRECIOUS: $(LLVMLibDir)/.dir $(LLVMToolDir)/.dir $(LLVMExmplDir)/.dir Modified: llvm/trunk/autoconf/configure.ac URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/autoconf/configure.ac?rev=103423&r1=103422&r2=103423&view=diff ============================================================================== --- llvm/trunk/autoconf/configure.ac (original) +++ llvm/trunk/autoconf/configure.ac Mon May 10 15:11:56 2010 @@ -525,6 +525,20 @@ *) AC_MSG_ERROR([Invalid setting for --enable-shared. Use "yes" or "no"]) ;; esac +dnl Enable embedding timestamp information into build. +AC_ARG_ENABLE(timestamps, + AS_HELP_STRING([--enable-timestamps], + [Enable embedding timestamp information in build (default is YES)]),, + enableval=default) +case "$enableval" in + yes) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;; + no) AC_SUBST(ENABLE_TIMESTAMPS,[0]) ;; + default) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;; + *) AC_MSG_ERROR([Invalid setting for --enable-timestamps. Use "yes" or "no"]) ;; +esac +AC_DEFINE_UNQUOTED([ENABLE_TIMESTAMPS],$ENABLE_TIMESTAMPS, + [Define if timestamp information (e.g., __DATE___) is allowed]) + dnl Allow specific targets to be specified for building (or not) TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], Modified: llvm/trunk/configure URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/configure?rev=103423&r1=103422&r2=103423&view=diff ============================================================================== --- llvm/trunk/configure (original) +++ llvm/trunk/configure Mon May 10 15:11:56 2010 @@ -690,6 +690,7 @@ ENABLE_THREADS ENABLE_PIC ENABLE_SHARED +ENABLE_TIMESTAMPS TARGETS_TO_BUILD LLVM_ENUM_TARGETS LLVM_ENUM_ASM_PRINTERS @@ -1410,6 +1411,8 @@ is YES) --enable-shared Build a shared library and link tools against it (default is NO) + --enable-timestamps Enable embedding timestamp information in build + (default is YES) --enable-targets Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu, @@ -4923,6 +4926,30 @@ { (exit 1); exit 1; }; } ;; esac +# Check whether --enable-timestamps was given. +if test "${enable_timestamps+set}" = set; then + enableval=$enable_timestamps; +else + enableval=default +fi + +case "$enableval" in + yes) ENABLE_TIMESTAMPS=1 + ;; + no) ENABLE_TIMESTAMPS=0 + ;; + default) ENABLE_TIMESTAMPS=1 + ;; + *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&5 +echo "$as_me: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&2;} + { (exit 1); exit 1; }; } ;; +esac + +cat >>confdefs.h <<_ACEOF +#define ENABLE_TIMESTAMPS $ENABLE_TIMESTAMPS +_ACEOF + + TARGETS_TO_BUILD="" # Check whether --enable-targets was given. if test "${enable_targets+set}" = set; then @@ -11357,7 +11384,7 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <conf$$subs.sed <<_ACEOF +ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim @@ -21313,7 +21341,7 @@ LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 94; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 Modified: llvm/trunk/include/llvm/Config/config.h.in URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Config/config.h.in?rev=103423&r1=103422&r2=103423&view=diff ============================================================================== --- llvm/trunk/include/llvm/Config/config.h.in (original) +++ llvm/trunk/include/llvm/Config/config.h.in Mon May 10 15:11:56 2010 @@ -24,6 +24,9 @@ /* Define if threads enabled */ #undef ENABLE_THREADS +/* Define if timestamp information (e.g., __DATE___) is allowed */ +#undef ENABLE_TIMESTAMPS + /* Define to 1 if you have the `argz_append' function. */ #undef HAVE_ARGZ_APPEND Modified: llvm/trunk/lib/Support/CommandLine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CommandLine.cpp?rev=103423&r1=103422&r2=103423&view=diff ============================================================================== --- llvm/trunk/lib/Support/CommandLine.cpp (original) +++ llvm/trunk/lib/Support/CommandLine.cpp Mon May 10 15:11:56 2010 @@ -1170,7 +1170,9 @@ std::string CPU = sys::getHostCPUName(); if (CPU == "generic") CPU = "(unknown)"; OS << ".\n" +#if (ENABLE_TIMESTAMPS == 1) << " Built " << __DATE__ << " (" << __TIME__ << ").\n" +#endif << " Host: " << sys::getHostTriple() << '\n' << " Host CPU: " << CPU << '\n' << '\n' From gohman at apple.com Mon May 10 15:14:02 2010 From: gohman at apple.com (Dan Gohman) Date: Mon, 10 May 2010 20:14:02 -0000 Subject: [llvm-commits] [llvm] r103424 - /llvm/trunk/include/llvm/Support/DOTGraphTraits.h Message-ID: <20100510201402.552A9312800A@llvm.org> Author: djg Date: Mon May 10 15:14:02 2010 New Revision: 103424 URL: http://llvm.org/viewvc/llvm-project?rev=103424&view=rev Log: Add an explicit keyword. Modified: llvm/trunk/include/llvm/Support/DOTGraphTraits.h Modified: llvm/trunk/include/llvm/Support/DOTGraphTraits.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/DOTGraphTraits.h?rev=103424&r1=103423&r2=103424&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/DOTGraphTraits.h (original) +++ llvm/trunk/include/llvm/Support/DOTGraphTraits.h Mon May 10 15:14:02 2010 @@ -36,7 +36,7 @@ } public: - DefaultDOTGraphTraits (bool simple=false) : IsSimple (simple) {} + explicit DefaultDOTGraphTraits(bool simple=false) : IsSimple (simple) {} /// getGraphName - Return the label for the graph as a whole. Printed at the /// top of the graph. From greened at obbligato.org Mon May 10 15:24:27 2010 From: greened at obbligato.org (David Greene) Date: Mon, 10 May 2010 20:24:27 -0000 Subject: [llvm-commits] [llvm] r103425 - in /llvm/trunk: include/llvm/PassManager.h lib/VMCore/PassManager.cpp test/Other/2010-05-60-Printer.ll Message-ID: <20100510202427.AE20F312800A@llvm.org> Author: greened Date: Mon May 10 15:24:27 2010 New Revision: 103425 URL: http://llvm.org/viewvc/llvm-project?rev=103425&view=rev Log: Fix PR6875: This includes a patch by Roman Divacky to fix the initial crash. Move the actual addition of passes from *PassManager::add to *PassManager::addImpl. That way, when adding printer passes we won't recurse infinitely. Finally, check to make sure that we are actually adding a FunctionPass to a FunctionPassManager before doing a print before or after it. Immutable passes are strange in this way because they aren't FunctionPasses yet they can be and are added to the FunctionPassManager. Added: llvm/trunk/test/Other/2010-05-60-Printer.ll Modified: llvm/trunk/include/llvm/PassManager.h llvm/trunk/lib/VMCore/PassManager.cpp Modified: llvm/trunk/include/llvm/PassManager.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/PassManager.h?rev=103425&r1=103424&r2=103425&view=diff ============================================================================== --- llvm/trunk/include/llvm/PassManager.h (original) +++ llvm/trunk/include/llvm/PassManager.h Mon May 10 15:24:27 2010 @@ -60,6 +60,9 @@ bool run(Module &M); private: + /// addImpl - Add a pass to the queue of passes to run, without + /// checking whether to add a printer pass. + void addImpl(Pass *P); /// PassManagerImpl_New is the actual class. PassManager is just the /// wraper to publish simple pass manager interface @@ -96,6 +99,10 @@ bool doFinalization(); private: + /// addImpl - Add a pass to the queue of passes to run, without + /// checking whether to add a printer pass. + void addImpl(Pass *P); + FunctionPassManagerImpl *FPM; Module *M; }; Modified: llvm/trunk/lib/VMCore/PassManager.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/PassManager.cpp?rev=103425&r1=103424&r2=103425&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/PassManager.cpp (original) +++ llvm/trunk/lib/VMCore/PassManager.cpp Mon May 10 15:24:27 2010 @@ -275,7 +275,7 @@ addImmutablePass(IP); recordAvailableAnalysis(IP); } else { - P->assignPassManager(activeStack); + P->assignPassManager(activeStack, PMT_FunctionPassManager); } } @@ -418,7 +418,7 @@ addImmutablePass(IP); recordAvailableAnalysis(IP); } else { - P->assignPassManager(activeStack); + P->assignPassManager(activeStack, PMT_ModulePassManager); } } @@ -1270,20 +1270,30 @@ delete FPM; } +/// addImpl - Add a pass to the queue of passes to run, without +/// checking whether to add a printer pass. +void FunctionPassManager::addImpl(Pass *P) { + FPM->add(P); +} + /// add - Add a pass to the queue of passes to run. This passes /// ownership of the Pass to the PassManager. When the /// PassManager_X is destroyed, the pass will be destroyed as well, so /// there is no need to delete the pass. (TODO delete passes.) /// This implies that all passes MUST be allocated with 'new'. void FunctionPassManager::add(Pass *P) { - if (ShouldPrintBeforePass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") - + P->getPassName() + " ***")); - FPM->add(P); - - if (ShouldPrintAfterPass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") - + P->getPassName() + " ***")); + // If this is a not a function pass, don't add a printer for it. + if (P->getPassKind() == PT_Function) + if (ShouldPrintBeforePass(P)) + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") + + P->getPassName() + " ***")); + + addImpl(P); + + if (P->getPassKind() == PT_Function) + if (ShouldPrintAfterPass(P)) + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") + + P->getPassName() + " ***")); } /// run - Execute all of the passes scheduled for execution. Keep @@ -1588,20 +1598,26 @@ delete PM; } +/// addImpl - Add a pass to the queue of passes to run, without +/// checking whether to add a printer pass. +void PassManager::addImpl(Pass *P) { + PM->add(P); +} + /// add - Add a pass to the queue of passes to run. This passes ownership of /// the Pass to the PassManager. When the PassManager is destroyed, the pass /// will be destroyed as well, so there is no need to delete the pass. This /// implies that all passes MUST be allocated with 'new'. void PassManager::add(Pass *P) { if (ShouldPrintBeforePass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") - + P->getPassName() + " ***")); + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") + + P->getPassName() + " ***")); - PM->add(P); + addImpl(P); if (ShouldPrintAfterPass(P)) - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") - + P->getPassName() + " ***")); + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") + + P->getPassName() + " ***")); } /// run - Execute all of the passes scheduled for execution. Keep track of @@ -1764,7 +1780,7 @@ // [3] Assign manager to manage this new manager. This may create // and push new managers into PMS - BBP->assignPassManager(PMS); + BBP->assignPassManager(PMS, PreferredType); // [4] Push new manager into PMS PMS.push(BBP); Added: llvm/trunk/test/Other/2010-05-60-Printer.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Printer.ll?rev=103425&view=auto ============================================================================== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (added) +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 15:24:27 2010 @@ -0,0 +1,6 @@ +; RUN: llc -O2 -print-after-all < %s 2>@1 + +define void @tester(){ + ret void +} + From sabre at nondot.org Mon May 10 15:51:06 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 20:51:06 -0000 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll Message-ID: <20100510205106.4F283312800A@llvm.org> Author: lattner Date: Mon May 10 15:51:06 2010 New Revision: 103427 URL: http://llvm.org/viewvc/llvm-project?rev=103427&view=rev Log: fix a pretty obvious typo. We test things before committing them, right? Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Printer.ll?rev=103427&r1=103426&r2=103427&view=diff ============================================================================== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (original) +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 15:51:06 2010 @@ -1,4 +1,4 @@ -; RUN: llc -O2 -print-after-all < %s 2>@1 +; RUN: llc -O2 -print-after-all < %s 2>&1 define void @tester(){ ret void From sabre at nondot.org Mon May 10 15:53:17 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 20:53:17 -0000 Subject: [llvm-commits] [llvm] r103428 - in /llvm/trunk: lib/VMCore/AsmWriter.cpp test/Feature/metadata.ll Message-ID: <20100510205317.91451312800A@llvm.org> Author: lattner Date: Mon May 10 15:53:17 2010 New Revision: 103428 URL: http://llvm.org/viewvc/llvm-project?rev=103428&view=rev Log: fix PR7105 by enumerating MDNodes on all @llvm.foo function calls, not just recognized intrinsics. Added: llvm/trunk/test/Feature/metadata.ll Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AsmWriter.cpp?rev=103428&r1=103427&r2=103428&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/AsmWriter.cpp (original) +++ llvm/trunk/lib/VMCore/AsmWriter.cpp Mon May 10 15:53:17 2010 @@ -677,11 +677,16 @@ if (!I->getType()->isVoidTy() && !I->hasName()) CreateFunctionSlot(I); - // Intrinsics can directly use metadata. - if (isa(I)) - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) - CreateMetadataSlot(N); + // Intrinsics can directly use metadata. We allow direct calls to any + // llvm.foo function here, because the target may not be linked into the + // optimizer. + if (const CallInst *CI = dyn_cast(I)) { + if (Function *F = CI->getCalledFunction()) + if (F->getName().startswith("llvm.")) + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) + CreateMetadataSlot(N); + } // Process metadata attached with this instruction. I->getAllMetadata(MDForInst); Added: llvm/trunk/test/Feature/metadata.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Feature/metadata.ll?rev=103428&view=auto ============================================================================== --- llvm/trunk/test/Feature/metadata.ll (added) +++ llvm/trunk/test/Feature/metadata.ll Mon May 10 15:53:17 2010 @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis +; PR7105 + +define void @foo() { + call void @llvm.zonk(metadata !1, i64 0, metadata !1) + ret void +} + +declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone + +!named = !{!0} +!0 = metadata !{i8** null} +!1 = metadata !{i8* null} From sabre at nondot.org Mon May 10 15:58:42 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 20:58:42 -0000 Subject: [llvm-commits] [llvm] r103430 - /llvm/trunk/lib/VMCore/Verifier.cpp Message-ID: <20100510205842.B2CF2312800A@llvm.org> Author: lattner Date: Mon May 10 15:58:42 2010 New Revision: 103430 URL: http://llvm.org/viewvc/llvm-project?rev=103430&view=rev Log: Simplify by using startswith instead of substr. Modified: llvm/trunk/lib/VMCore/Verifier.cpp Modified: llvm/trunk/lib/VMCore/Verifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Verifier.cpp?rev=103430&r1=103429&r2=103430&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Verifier.cpp (original) +++ llvm/trunk/lib/VMCore/Verifier.cpp Mon May 10 15:58:42 2010 @@ -1152,7 +1152,7 @@ Assert1(CS.arg_size() == FTy->getNumParams(), "Incorrect number of arguments passed to called function!", I); - // Verify that all arguments to the call match the function type... + // Verify that all arguments to the call match the function type. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i), "Call parameter type does not match function signature!", @@ -1180,7 +1180,7 @@ // Verify that there's no metadata unless it's a direct call to an intrinsic. if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 || - CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") { + !CS.getCalledFunction()->getName().startswith("llvm.")) { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) Assert1(!PI->get()->isMetadataTy(), From sabre at nondot.org Mon May 10 15:59:18 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 20:59:18 -0000 Subject: [llvm-commits] [llvm] r103431 - /llvm/trunk/lib/VMCore/Verifier.cpp Message-ID: <20100510205918.CE616312800A@llvm.org> Author: lattner Date: Mon May 10 15:59:18 2010 New Revision: 103431 URL: http://llvm.org/viewvc/llvm-project?rev=103431&view=rev Log: simplify more. Modified: llvm/trunk/lib/VMCore/Verifier.cpp Modified: llvm/trunk/lib/VMCore/Verifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Verifier.cpp?rev=103431&r1=103430&r2=103431&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Verifier.cpp (original) +++ llvm/trunk/lib/VMCore/Verifier.cpp Mon May 10 15:59:18 2010 @@ -1179,7 +1179,7 @@ } // Verify that there's no metadata unless it's a direct call to an intrinsic. - if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 || + if (!CS.getCalledFunction() || !CS.getCalledFunction()->getName().startswith("llvm.")) { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) From sabre at nondot.org Mon May 10 16:01:47 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 21:01:47 -0000 Subject: [llvm-commits] [llvm] r103432 - /llvm/trunk/test/Other/2010-05-60-Printer.ll Message-ID: <20100510210147.71302312800A@llvm.org> Author: lattner Date: Mon May 10 16:01:47 2010 New Revision: 103432 URL: http://llvm.org/viewvc/llvm-project?rev=103432&view=rev Log: just remove this, it isn't needed. Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Printer.ll?rev=103432&r1=103431&r2=103432&view=diff ============================================================================== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (original) +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 16:01:47 2010 @@ -1,4 +1,4 @@ -; RUN: llc -O2 -print-after-all < %s 2>&1 +; RUN: llc -O2 -print-after-all < %s define void @tester(){ ret void From daniel at zuster.org Mon May 10 16:08:27 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 21:08:27 -0000 Subject: [llvm-commits] [test-suite] r103433 - /test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_text.c Message-ID: <20100510210827.A1B8E312800A@llvm.org> Author: ddunbar Date: Mon May 10 16:08:27 2010 New Revision: 103433 URL: http://llvm.org/viewvc/llvm-project?rev=103433&view=rev Log: Remove what appears to be an unnecessary include. Modified: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_text.c Modified: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_text.c URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_text.c?rev=103433&r1=103432&r2=103433&view=diff ============================================================================== --- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_text.c (original) +++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_text.c Mon May 10 16:08:27 2010 @@ -98,7 +98,6 @@ #include #ifdef C_DARWIN #include -#include #else #ifdef HAVE_MALLOC_H /* tk: FreeBSD-CURRENT doesn't support malloc.h */ #ifndef C_BSD /* BSD now uses stdlib.h */ From clattner at apple.com Mon May 10 16:19:19 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 10 May 2010 14:19:19 -0700 Subject: [llvm-commits] [llvm] r103425 - in /llvm/trunk: include/llvm/PassManager.h lib/VMCore/PassManager.cpp test/Other/2010-05-60-Printer.ll In-Reply-To: <20100510202427.AE20F312800A@llvm.org> References: <20100510202427.AE20F312800A@llvm.org> Message-ID: On May 10, 2010, at 1:24 PM, David Greene wrote: > URL: http://llvm.org/viewvc/llvm-project?rev=103425&view=rev > Log: > > Fix PR6875: > > This includes a patch by Roman Divacky to fix the initial crash. > > Move the actual addition of passes from *PassManager::add to > *PassManager::addImpl. That way, when adding printer passes we won't > recurse infinitely. > > Finally, check to make sure that we are actually adding a FunctionPass > to a FunctionPassManager before doing a print before or after it. > Immutable passes are strange in this way because they aren't > FunctionPasses yet they can be and are added to the FunctionPassManager. This patch broke the buildbot. Please test all patches before submitting or committing them. > Added: > llvm/trunk/test/Other/2010-05-60-Printer.ll You typo'd the name of the test, please svn mv it to the right name. > /// add - Add a pass to the queue of passes to run. This passes > /// ownership of the Pass to the PassManager. When the > /// PassManager_X is destroyed, the pass will be destroyed as well, so > /// there is no need to delete the pass. (TODO delete passes.) > /// This implies that all passes MUST be allocated with 'new'. > void FunctionPassManager::add(Pass *P) { > - if (ShouldPrintBeforePass(P)) > - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") > - + P->getPassName() + " ***")); > - FPM->add(P); > - > - if (ShouldPrintAfterPass(P)) > - add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ") > - + P->getPassName() + " ***")); > + // If this is a not a function pass, don't add a printer for it. > + if (P->getPassKind() == PT_Function) > + if (ShouldPrintBeforePass(P)) > + addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ") > + + P->getPassName() + " ***")); This doesn't seem like a great fix here. You're seriously violating object orientation by doing this. Can't you sink the 'ShouldPrintAfterPass' logic down into the pass manager to the point where it actually adds the passes to the pass managers? You didn't really explain what the actual bug is that you're fixing, but I assume that it has to do with adding printers multiple times? -Chris From sabre at nondot.org Mon May 10 16:23:48 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 21:23:48 -0000 Subject: [llvm-commits] [llvm] r103434 - /llvm/trunk/test/Other/2010-05-60-Printer.ll Message-ID: <20100510212348.BA5B4312800A@llvm.org> Author: lattner Date: Mon May 10 16:23:48 2010 New Revision: 103434 URL: http://llvm.org/viewvc/llvm-project?rev=103434&view=rev Log: this really is needed. :( Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Printer.ll?rev=103434&r1=103433&r2=103434&view=diff ============================================================================== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (original) +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 16:23:48 2010 @@ -1,4 +1,4 @@ -; RUN: llc -O2 -print-after-all < %s +; RUN: llc -O2 -print-after-all < %s 2>&1 define void @tester(){ ret void From evan.cheng at apple.com Mon May 10 16:24:55 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 21:24:55 -0000 Subject: [llvm-commits] [llvm] r103435 - /llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Message-ID: <20100510212455.F1B6C312800A@llvm.org> Author: evancheng Date: Mon May 10 16:24:55 2010 New Revision: 103435 URL: http://llvm.org/viewvc/llvm-project?rev=103435&view=rev Log: Clear RegSequences vector after eliminating REG_SEQUENCE instructions. Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=103435&r1=103434&r2=103435&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon May 10 16:24:55 2010 @@ -1179,5 +1179,6 @@ MI->eraseFromParent(); } + RegSequences.clear(); return true; } From clattner at apple.com Mon May 10 16:26:23 2010 From: clattner at apple.com (Chris Lattner) Date: Mon, 10 May 2010 14:26:23 -0700 Subject: [llvm-commits] [patch] add support for a movi32 of a symbol to the ARM MC asm printer In-Reply-To: References: <7B47D23A-6261-479A-BD52-577214EEBB74@apple.com> Message-ID: <112064A7-D9A3-436A-8709-2AF86CE7DAC3@apple.com> On May 9, 2010, at 7:36 PM, Rafael Espindola wrote: >> Right, this isn't the right way to go here. In general, the MC stuff should use *fewer* instructions that codegen does. Things like "the tail call form of return" etc go away with MC. >> >> The right thing to do here depends on whether the reference to the global ends up with a special relocation type in the relocation entry. Assuming it does, adding new VariantKinds is the right way to go (currently VariantKind is going to be a union of all target's relocation flags). The best way to handle this is to have isel set these as TargetFlags when it lowers the instruction, then have ARM's MCInstLowering convert the target flags to the VariantKinds. > > Ah, much better. The references do end up as different relocations > (R_ARM_MOVW_ABS_NC and R_ARM_MOVT_ABS). There are not that many > relocation types, so we might be able to have one enum for all > targets. Right. Ok, but please add a comment in MCExpr saying what these are. Also, since printing syntax is implied by the enum, it's probably best to name it VK_ARM_HI16/VK_ARM_LO16. Please commit with this fix, thanks Rafael! > The attached patch fixes the same issue as before, but now by updating > VariantKind. There is currently no tests for > -enable-arm-mcinst-printer. Should I add one or this is just something > we just expect to make the default in a short(ish) time frame? I would really like for it to finish (so that the old instruction printer can be expunged), but I got half done and then got distracted. I probably won't get back to it for another month or two. I was testing it by just running it over large programs in llvm-test and diff'ing the output with the old instprinter. I don't think it was to the point yet where large programs were close to working, so no tests are really needed :) -Chris From evan.cheng at apple.com Mon May 10 16:25:30 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 21:25:30 -0000 Subject: [llvm-commits] [llvm] r103436 - /llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp Message-ID: <20100510212530.E31BE312800A@llvm.org> Author: evancheng Date: Mon May 10 16:25:30 2010 New Revision: 103436 URL: http://llvm.org/viewvc/llvm-project?rev=103436&view=rev Log: It's not safe to propagate implicit_def that defines part of a register. Modified: llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp Modified: llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp?rev=103436&r1=103435&r2=103436&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp (original) +++ llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp Mon May 10 16:25:30 2010 @@ -89,6 +89,8 @@ MachineInstr *MI = &*I; ++I; if (MI->isImplicitDef()) { + if (MI->getOperand(0).getSubReg()) + continue; unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { From evan.cheng at apple.com Mon May 10 16:26:25 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 21:26:25 -0000 Subject: [llvm-commits] [llvm] r103437 - in /llvm/trunk/lib/Target/ARM: ARMISelDAGToDAG.cpp NEONPreAllocPass.cpp Message-ID: <20100510212625.10CA2312800A@llvm.org> Author: evancheng Date: Mon May 10 16:26:24 2010 New Revision: 103437 URL: http://llvm.org/viewvc/llvm-project?rev=103437&view=rev Log: Model some vld3 instructions with REG_SEQUENCE. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103437&r1=103436&r2=103437&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon May 10 16:26:24 2010 @@ -1044,7 +1044,40 @@ const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + if (!llvm::ModelWithRegSequence() || NumVecs < 2) + return VLd; + + assert(NumVecs <= 4); + SDValue V0 = SDValue(VLd, 0); + SDValue V1 = SDValue(VLd, 1); + SDValue RegSeq; + + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = SDValue(VLd, 2); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLd, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, VT, RegSeq); + ReplaceUses(SDValue(N, 0), D0); + SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, VT, RegSeq); + ReplaceUses(SDValue(N, 1), D1); + + if (NumVecs > 2) { + SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_2, dl, VT, RegSeq); + ReplaceUses(SDValue(N, 2), D2); + } + if (NumVecs > 3) { + SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_3, dl, VT, RegSeq); + ReplaceUses(SDValue(N, 3), D3); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); + return NULL; } EVT RegVT = GetNEONSubregVT(VT); Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103437&r1=103436&r2=103437&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Mon May 10 16:26:24 2010 @@ -33,7 +33,7 @@ private: bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs); + unsigned FirstOpnd, unsigned NumRegs) const; bool PreAllocNEONRegisters(MachineBasicBlock &MBB); }; @@ -338,18 +338,22 @@ return false; } -bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) { - MachineInstr *RegSeq = 0; - unsigned LastSrcReg = 0; - unsigned LastSubIdx = 0; - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - if (MO.isDef()) { +bool +NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, + unsigned FirstOpnd, unsigned NumRegs) const { + MachineOperand &FMO = MI->getOperand(FirstOpnd); + assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = FMO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + if (FMO.isDef()) { + MachineInstr *RegSeq = 0; + for (unsigned R = 0; R < NumRegs; ++R) { + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); // Feeding into a REG_SEQUENCE. if (!MRI->hasOneNonDBGUse(VirtReg)) return false; @@ -359,25 +363,45 @@ if (RegSeq && RegSeq != UseMI) return false; RegSeq = UseMI; - } else { - // Extracting from a Q or QQ register. + } + + // Make sure trailing operands of REG_SEQUENCE are undef. + unsigned NumExps = (RegSeq->getNumOperands() - 1) / 2; + for (unsigned i = NumRegs * 2 + 1; i < NumExps; i += 2) { + const MachineOperand &MO = RegSeq->getOperand(i); + unsigned VirtReg = MO.getReg(); MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isExtractSubreg()) - return false; - VirtReg = DefMI->getOperand(1).getReg(); - if (LastSrcReg && LastSrcReg != VirtReg) + if (!DefMI || !DefMI->isImplicitDef()) return false; - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (NumRegs == 2) { - if (RC != ARM::QPRRegisterClass) - return false; - } else if (RC != ARM::QQPRRegisterClass) - return false; - unsigned SubIdx = DefMI->getOperand(2).getImm(); - if (LastSubIdx && LastSubIdx != SubIdx-1) - return false; - LastSubIdx = SubIdx; } + return true; + } + + unsigned LastSrcReg = 0; + unsigned LastSubIdx = 0; + for (unsigned R = 0; R < NumRegs; ++R) { + const MachineOperand &MO = MI->getOperand(FirstOpnd + R); + assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); + unsigned VirtReg = MO.getReg(); + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "expected a virtual register"); + // Extracting from a Q or QQ register. + MachineInstr *DefMI = MRI->getVRegDef(VirtReg); + if (!DefMI || !DefMI->isExtractSubreg()) + return false; + VirtReg = DefMI->getOperand(1).getReg(); + if (LastSrcReg && LastSrcReg != VirtReg) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + if (NumRegs == 2) { + if (RC != ARM::QPRRegisterClass) + return false; + } else if (RC != ARM::QQPRRegisterClass) + return false; + unsigned SubIdx = DefMI->getOperand(2).getImm(); + if (LastSubIdx && LastSubIdx != SubIdx-1) + return false; + LastSubIdx = SubIdx; } return true; } From daniel at zuster.org Mon May 10 17:45:09 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 22:45:09 -0000 Subject: [llvm-commits] [llvm] r103438 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp lib/MC/MCMachOStreamer.cpp Message-ID: <20100510224509.43268312800A@llvm.org> Author: ddunbar Date: Mon May 10 17:45:09 2010 New Revision: 103438 URL: http://llvm.org/viewvc/llvm-project?rev=103438&view=rev Log: MC/Mach-O: Explicitly track atoms, as represented by their defining symbol, for each fragment (not yet used). Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp llvm/trunk/lib/MC/MCMachOStreamer.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103438&r1=103437&r2=103438&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Mon May 10 17:45:09 2010 @@ -32,6 +32,7 @@ class MCSection; class MCSectionData; class MCSymbol; +class MCSymbolData; class MCValue; class TargetAsmBackend; @@ -78,6 +79,11 @@ /// Parent - The data for the section this fragment is in. MCSectionData *Parent; + /// Atom - The atom this fragment is in, as represented by it's defining + /// symbol. Atom's are only used by backends which set + /// \see MCAsmBackend::hasReliableSymbolDifference(). + MCSymbolData *Atom; + /// @name Assembler Backend Data /// @{ // @@ -110,6 +116,9 @@ MCSectionData *getParent() const { return Parent; } void setParent(MCSectionData *Value) { Parent = Value; } + MCSymbolData *getAtom() const { return Atom; } + void setAtom(MCSymbolData *Value) { Atom = Value; } + unsigned getOrdinal() const { return Ordinal; } void setOrdinal(unsigned Value) { Ordinal = Value; } Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103438&r1=103437&r2=103438&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Mon May 10 17:45:09 2010 @@ -133,17 +133,13 @@ SD->FileSize = Value; } - /// @} - /* *** */ MCFragment::MCFragment() : Kind(FragmentType(~0)) { } MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) - : Kind(_Kind), - Parent(_Parent), - EffectiveSize(~UINT64_C(0)) + : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=103438&r1=103437&r2=103438&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Mon May 10 17:45:09 2010 @@ -31,6 +31,9 @@ MCAssembler Assembler; MCSectionData *CurSectionData; + /// Track the current atom for each section. + DenseMap CurrentAtomMap; + private: MCFragment *getCurrentFragment() const { assert(CurSectionData && "No current section!"); @@ -46,10 +49,17 @@ MCDataFragment *getOrCreateDataFragment() const { MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); if (!F) - F = new MCDataFragment(CurSectionData); + F = createDataFragment(); return F; } + /// Create a new data fragment in the current section. + MCDataFragment *createDataFragment() const { + MCDataFragment *DF = new MCDataFragment(CurSectionData); + DF->setAtom(CurrentAtomMap.lookup(CurSectionData)); + return DF; + } + public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter) @@ -159,12 +169,23 @@ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(CurSection && "Cannot emit before setting section!"); + MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + + // Update the current atom map, if necessary. + bool MustCreateFragment = false; + if (Assembler.isSymbolLinkerVisible(&SD)) { + CurrentAtomMap[CurSectionData] = &SD; + + // We have to create a new fragment, fragments cannot span atoms. + MustCreateFragment = true; + } + // FIXME: This is wasteful, we don't necessarily need to create a data // fragment. Instead, we should mark the symbol as pointing into the data // fragment if it exists, otherwise we should just queue the label and set its // fragment pointer when we emit the next fragment. - MCDataFragment *F = getOrCreateDataFragment(); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCDataFragment *F = + MustCreateFragment ? createDataFragment() : getOrCreateDataFragment(); assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); SD.setFragment(F); SD.setOffset(F->getContents().size()); @@ -302,6 +323,8 @@ MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData); SD.setFragment(F); + if (Assembler.isSymbolLinkerVisible(&SD)) + F->setAtom(&SD); Symbol->setSection(*Section); @@ -336,8 +359,10 @@ unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, - false /* EmitNops */, CurSectionData); + MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize, + MaxBytesToEmit, /*EmitNops=*/false, + CurSectionData); + F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > CurSectionData->getAlignment()) @@ -348,8 +373,9 @@ unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, - true /* EmitNops */, CurSectionData); + MCFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + /*EmitNops=*/true, CurSectionData); + F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. if (ByteAlignment > CurSectionData->getAlignment()) @@ -358,7 +384,8 @@ void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - new MCOrgFragment(*Offset, Value, CurSectionData); + MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData); + F->setAtom(CurrentAtomMap.lookup(CurSectionData)); } void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { @@ -401,6 +428,7 @@ // are going to often know that we can never fully resolve a fixup. if (Assembler.getBackend().MayNeedRelaxation(Inst, AsmFixups)) { MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData); + IF->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Add the fixups and data. // From dpatel at apple.com Mon May 10 17:49:56 2010 From: dpatel at apple.com (Devang Patel) Date: Mon, 10 May 2010 22:49:56 -0000 Subject: [llvm-commits] [llvm] r103439 - in /llvm/trunk: lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h test/DebugInfo/2010-05-10-MultipleCU.ll Message-ID: <20100510224956.229E2312800A@llvm.org> Author: dpatel Date: Mon May 10 17:49:55 2010 New Revision: 103439 URL: http://llvm.org/viewvc/llvm-project?rev=103439&view=rev Log: Enable multiple Compile Units in one module. This means now 'llvm-ld a.bc b.bc' will preserve debug info appropriately. Added: llvm/trunk/test/DebugInfo/2010-05-10-MultipleCU.ll Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=103439&r1=103438&r2=103439&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon May 10 17:49:55 2010 @@ -326,7 +326,7 @@ } DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), ModuleCU(0), + : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), CurrentFnDbgScope(0), PrevLabel(NULL) { NextStringPoolNumber = 0; @@ -881,22 +881,23 @@ } else if (Context.isNameSpace()) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = ModuleCU->getDIE(Context)) + } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); else - ModuleCU->addDie(Die); + getCompileUnit(Context)->addDie(Die); } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { - DIE *TyDIE = ModuleCU->getDIE(Ty); + CompileUnit *TypeCU = getCompileUnit(Ty); + DIE *TyDIE = TypeCU->getDIE(Ty); if (TyDIE) return TyDIE; // Create new type. TyDIE = new DIE(dwarf::DW_TAG_base_type); - ModuleCU->insertDIE(Ty, TyDIE); + TypeCU->insertDIE(Ty, TyDIE); if (Ty.isBasicType()) constructTypeDIE(*TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) @@ -916,7 +917,8 @@ return; // Check for pre-existence. - DIEEntry *Entry = ModuleCU->getDIEEntry(Ty); + CompileUnit *TypeCU = getCompileUnit(Ty); + DIEEntry *Entry = TypeCU->getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); @@ -928,7 +930,7 @@ // Set up proxy. Entry = createDIEEntry(Buffer); - ModuleCU->insertDIEEntry(Ty, Entry); + TypeCU->insertDIEEntry(Ty, Entry); Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); } @@ -1123,15 +1125,16 @@ DIArray Elements = CTy->getTypeArray(); // Get an anonymous type for index type. - DIE *IdxTy = ModuleCU->getIndexTyDie(); + CompileUnit *TheCU = getCompileUnit(*CTy); + DIE *IdxTy = TheCU->getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. IdxTy = new DIE(dwarf::DW_TAG_base_type); addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - ModuleCU->addDie(IdxTy); - ModuleCU->setIndexTyDie(IdxTy); + TheCU->addDie(IdxTy); + TheCU->setIndexTyDie(IdxTy); } // Add subranges to array type. @@ -1265,7 +1268,8 @@ /// createSubprogramDIE - Create new DIE using SP. DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { - DIE *SPDie = ModuleCU->getDIE(SP); + CompileUnit *SPCU = getCompileUnit(SP); + DIE *SPDie = SPCU->getDIE(SP); if (SPDie) return SPDie; @@ -1338,7 +1342,7 @@ addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); // DW_TAG_inlined_subroutine may refer to this DIE. - ModuleCU->insertDIE(SP, SPDie); + SPCU->insertDIE(SP, SPDie); return SPDie; } @@ -1388,7 +1392,8 @@ /// If there are global variables in this scope then create and insert /// DIEs for these variables. DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { - DIE *SPDie = ModuleCU->getDIE(SPNode); + CompileUnit *SPCU = getCompileUnit(SPNode); + DIE *SPDie = SPCU->getDIE(SPNode); assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); @@ -1419,7 +1424,7 @@ SPDie = new DIE(dwarf::DW_TAG_subprogram); addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, SPDeclDie); - ModuleCU->addDie(SPDie); + SPCU->addDie(SPDie); } addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, @@ -1508,7 +1513,8 @@ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); DISubprogram InlinedSP = getDISubprogram(DS); - DIE *OriginDIE = ModuleCU->getDIE(InlinedSP); + CompileUnit *TheCU = getCompileUnit(InlinedSP); + DIE *OriginDIE = TheCU->getDIE(InlinedSP); assert(OriginDIE && "Unable to find Origin DIE!"); addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, OriginDIE); @@ -1530,7 +1536,7 @@ I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); + addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; @@ -1571,7 +1577,7 @@ if (AbsDIE) { DIScope DS(Scope->getScopeNode()); DISubprogram InlinedSP = getDISubprogram(DS); - DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP); + DIE *OriginSPDIE = getCompileUnit(InlinedSP)->getDIE(InlinedSP); (void) OriginSPDIE; assert(OriginSPDIE && "Unable to find Origin DIE for the SP!"); DIE *AbsDIE = DV->getAbstractVariable()->getDIE(); @@ -1660,8 +1666,9 @@ DICompositeType CATy = getDICompositeType(ATy); if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() && !CATy.isForwardDecl()) { - if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy)) - ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry()); + CompileUnit *TheCU = getCompileUnit(CATy); + if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) + TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); } } } @@ -1677,7 +1684,7 @@ ScopeDIE = constructInlinedScopeDIE(Scope); else if (DS.isSubprogram()) { if (Scope->isAbstractScope()) - ScopeDIE = ModuleCU->getDIE(DS); + ScopeDIE = getCompileUnit(DS)->getDIE(DS); else ScopeDIE = updateSubprogramScopeDIE(DS); } @@ -1747,11 +1754,12 @@ /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { - DIE *NDie = ModuleCU->getDIE(NS); + CompileUnit *TheCU = getCompileUnit(NS); + DIE *NDie = TheCU->getDIE(NS); if (NDie) return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); - ModuleCU->insertDIE(NS, NDie); + TheCU->insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); addSourceLine(NDie, &NS); @@ -1759,12 +1767,10 @@ return NDie; } +/// constructCompileUnit - Create new CompileUnit for the given +/// metadata node with tag DW_TAG_compile_unit. void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); - // Use first compile unit marked as isMain as the compile unit for this - // module. - if (ModuleCU || !DIUnit.isMain()) - return; StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); unsigned ID = GetOrCreateSourceID(Dir, FN); @@ -1797,11 +1803,43 @@ addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - assert(!ModuleCU && - "ModuleCU assigned since the top of constructCompileUnit"); - ModuleCU = new CompileUnit(ID, Die); + CompileUnit *NewCU = new CompileUnit(ID, Die); + if (!FirstCU) + FirstCU = NewCU; + CUMap.insert(std::make_pair(N, NewCU)); +} + +/// getCompielUnit - Get CompileUnit DIE. +CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const { + assert (N && "Invalid DwarfDebug::getCompileUnit argument!"); + DIDescriptor D(N); + const MDNode *CUNode = NULL; + if (D.isCompileUnit()) + CUNode = N; + else if (D.isSubprogram()) + CUNode = DISubprogram(N).getCompileUnit(); + else if (D.isType()) + CUNode = DIType(N).getCompileUnit(); + else if (D.isGlobalVariable()) + CUNode = DIGlobalVariable(N).getCompileUnit(); + else if (D.isVariable()) + CUNode = DIVariable(N).getCompileUnit(); + else if (D.isNameSpace()) + CUNode = DINameSpace(N).getCompileUnit(); + else if (D.isFile()) + CUNode = DIFile(N).getCompileUnit(); + else + return FirstCU; + + DenseMap::const_iterator I + = CUMap.find(CUNode); + if (I == CUMap.end()) + return FirstCU; + return I->second; } + +/// constructGlobalVariableDIE - Construct global variable DIE. void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { DIGlobalVariable DI_GV(N); @@ -1810,7 +1848,8 @@ return; // Check for pre-existence. - if (ModuleCU->getDIE(DI_GV)) + CompileUnit *TheCU = getCompileUnit(N); + if (TheCU->getDIE(DI_GV)) return; DIE *VariableDie = createGlobalVariableDIE(DI_GV); @@ -1818,7 +1857,7 @@ return; // Add to map. - ModuleCU->insertDIE(N, VariableDie); + TheCU->insertDIE(N, VariableDie); // Add to context owner. DIDescriptor GVContext = DI_GV.getContext(); @@ -1837,7 +1876,7 @@ Asm->Mang->getSymbol(DI_GV.getGlobal())); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - ModuleCU->addDie(VariableSpecDIE); + TheCU->addDie(VariableSpecDIE); } else { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); @@ -1848,23 +1887,25 @@ addToContextOwner(VariableDie, GVContext); // Expose as global. FIXME - need to check external flag. - ModuleCU->addGlobal(DI_GV.getName(), VariableDie); + TheCU->addGlobal(DI_GV.getName(), VariableDie); DIType GTy = DI_GV.getType(); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { - DIEEntry *Entry = ModuleCU->getDIEEntry(GTy); + DIEEntry *Entry = TheCU->getDIEEntry(GTy); assert(Entry && "Missing global type!"); - ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry()); + TheCU->addGlobalType(GTy.getName(), Entry->getEntry()); } return; } +/// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(const MDNode *N) { DISubprogram SP(N); // Check for pre-existence. - if (ModuleCU->getDIE(N)) + CompileUnit *TheCU = getCompileUnit(N); + if (TheCU->getDIE(N)) return; if (!SP.isDefinition()) @@ -1875,13 +1916,13 @@ DIE *SubprogramDie = createSubprogramDIE(SP); // Add to map. - ModuleCU->insertDIE(N, SubprogramDie); + TheCU->insertDIE(N, SubprogramDie); // Add to context owner. addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. - ModuleCU->addGlobal(SP.getName(), SubprogramDie); + TheCU->addGlobal(SP.getName(), SubprogramDie); return; } @@ -1955,7 +1996,7 @@ /// endModule - Emit all Dwarf sections that should come after the content. /// void DwarfDebug::endModule() { - if (!ModuleCU) return; + if (!FirstCU) return; // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), @@ -1969,7 +2010,7 @@ DIE *SPDie = CI->first; const MDNode *N = dyn_cast_or_null(CI->second); if (!N) continue; - DIE *NDie = ModuleCU->getDIE(N); + DIE *NDie = getCompileUnit(N)->getDIE(N); if (!NDie) continue; addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); } @@ -2030,8 +2071,10 @@ // Emit info into a debug str section. emitDebugStr(); - delete ModuleCU; - ModuleCU = NULL; // Reset for the next Module, if any. + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) + delete I->second; + FirstCU = NULL; // Reset for the next Module, if any. } /// findAbstractVariable - Find abstract variable, if any, associated with Var. @@ -2670,14 +2713,18 @@ /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// void DwarfDebug::computeSizeAndOffsets() { - // Compute size of compile unit header. - static unsigned Offset = - sizeof(int32_t) + // Length of Compilation Unit Info - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) - - computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true); + unsigned PrevOffset = 0; + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + // Compute size of compile unit header. + static unsigned Offset = PrevOffset + + sizeof(int32_t) + // Length of Compilation Unit Info + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + computeSizeAndOffset(I->second->getCUDie(), Offset, true); + PrevOffset = Offset; + } } /// EmitSectionSym - Switch to the specified MCSection and emit an assembler @@ -2798,37 +2845,41 @@ // Start debug info section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfInfoSection()); - DIE *Die = ModuleCU->getCUDie(); - - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", - ModuleCU->getID())); - - // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int32_t); // FIXME - extra pad for gdb bug. - - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), - DwarfAbbrevSectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getTargetData().getPointerSize()); - - emitDIE(Die); - // FIXME - extra padding for gdb bug. - Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", ModuleCU->getID())); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + DIE *Die = TheCU->getCUDie(); + + // Emit the compile units header. + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", + TheCU->getID())); + + // Emit size of content not including length itself + unsigned ContentSize = Die->getSize() + + sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int32_t); // FIXME - extra pad for gdb bug. + + Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), + DwarfAbbrevSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getTargetData().getPointerSize()); + + emitDIE(Die); + // FIXME - extra padding for gdb bug. + Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->EmitInt8(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); + } } /// emitAbbreviations - Emit the abbreviation section. @@ -3153,91 +3204,99 @@ /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames() { - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); - - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubnames_end", ModuleCU->getID()), - Asm->GetTempSymbol("pubnames_begin", ModuleCU->getID()), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", - ModuleCU->getID())); - - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()), - Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - 4); - - const StringMap &Globals = ModuleCU->getGlobals(); - for (StringMap::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubnames_end", TheCU->getID()), + Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", + TheCU->getID())); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), + Asm->GetTempSymbol("info_begin", TheCU->getID()), + 4); + + const StringMap &Globals = TheCU->getGlobals(); + for (StringMap::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", + TheCU->getID())); } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", - ModuleCU->getID())); } void DwarfDebug::emitDebugPubTypes() { - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubTypesSection()); - Asm->OutStreamer.AddComment("Length of Public Types Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", ModuleCU->getID()), - Asm->GetTempSymbol("pubtypes_begin", ModuleCU->getID()), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - ModuleCU->getID())); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation ModuleCU Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()), - Asm->GetTempSymbol("info_begin", ModuleCU->getID()), - 4); - - const StringMap &Globals = ModuleCU->getGlobalTypes(); - for (StringMap::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE * Entity = GI->second; - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); - } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - ModuleCU->getID())); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.AddComment("Length of Public Types Info"); + Asm->EmitLabelDifference( + Asm->GetTempSymbol("pubtypes_end", TheCU->getID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", + TheCU->getID())); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), + Asm->GetTempSymbol("info_begin", TheCU->getID()), + 4); + + const StringMap &Globals = TheCU->getGlobalTypes(); + for (StringMap::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE * Entity = GI->second; + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", + TheCU->getID())); + } } /// emitDebugStr - Emit visible names into a debug str section. @@ -3335,7 +3394,7 @@ if (!Asm->MAI->doesDwarfUsesInlineInfoSection()) return; - if (!ModuleCU) + if (!FirstCU) return; Asm->OutStreamer.SwitchSection( Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=103439&r1=103438&r2=103439&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Mon May 10 17:49:55 2010 @@ -82,8 +82,8 @@ // Attributes used to construct specific Dwarf sections. // - /// ModuleCU - All DIEs are inserted in ModuleCU. - CompileUnit *ModuleCU; + CompileUnit *FirstCU; + DenseMap CUMap; /// AbbreviationsSet - Used to uniquely define abbreviations. /// @@ -506,10 +506,17 @@ /// maps as well. unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); + /// constructCompileUnit - Create new CompileUnit for the given + /// metadata node with tag DW_TAG_compile_unit. void constructCompileUnit(const MDNode *N); + /// getCompielUnit - Get CompileUnit DIE. + CompileUnit *getCompileUnit(const MDNode *N) const; + + /// constructGlobalVariableDIE - Construct global variable DIE. void constructGlobalVariableDIE(const MDNode *N); + /// construct SubprogramDIE - Construct subprogram DIE. void constructSubprogramDIE(const MDNode *N); // FIXME: This should go away in favor of complex addresses. Added: llvm/trunk/test/DebugInfo/2010-05-10-MultipleCU.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/2010-05-10-MultipleCU.ll?rev=103439&view=auto ============================================================================== --- llvm/trunk/test/DebugInfo/2010-05-10-MultipleCU.ll (added) +++ llvm/trunk/test/DebugInfo/2010-05-10-MultipleCU.ll Mon May 10 17:49:55 2010 @@ -0,0 +1,30 @@ +; RUN: llc -O0 -asm-verbose %s -o %t +; RUN: grep DW_TAG_compile_unit %t | count 3 +; One for a.c, second one for b.c and third one for abbrev. + +define i32 @foo() nounwind readnone ssp { +return: + ret i32 42, !dbg !0 +} + +define i32 @bar() nounwind readnone ssp { +return: + ret i32 21, !dbg !8 +} + +!0 = metadata !{i32 3, i32 0, metadata !1, null} +!1 = metadata !{i32 524299, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ] +!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ] +!3 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ] +!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ] +!6 = metadata !{metadata !7} +!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 3, i32 0, metadata !9, null} +!9 = metadata !{i32 524299, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ] +!10 = metadata !{i32 524334, i32 0, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ] +!12 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!13 = metadata !{i32 524309, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ] +!14 = metadata !{metadata !15} +!15 = metadata !{i32 524324, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] From evan.cheng at apple.com Mon May 10 18:08:19 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 10 May 2010 23:08:19 -0000 Subject: [llvm-commits] [llvm] r103441 - /llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Message-ID: <20100510230819.EDBC1312800A@llvm.org> Author: evancheng Date: Mon May 10 18:08:19 2010 New Revision: 103441 URL: http://llvm.org/viewvc/llvm-project?rev=103441&view=rev Log: Indentation. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=103441&r1=103440&r2=103441&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Mon May 10 18:08:19 2010 @@ -532,10 +532,10 @@ if (i & 1) { unsigned SubIdx = cast(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); - const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); - const TargetRegisterClass *SRC = - getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); - assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0)); + assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); } #endif AddOperand(MI, Op, i+1, &II, VRBaseMap); From daniel at zuster.org Mon May 10 18:15:13 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 23:15:13 -0000 Subject: [llvm-commits] [llvm] r103442 - /llvm/trunk/lib/MC/MachObjectWriter.cpp Message-ID: <20100510231513.42A6A312800A@llvm.org> Author: ddunbar Date: Mon May 10 18:15:13 2010 New Revision: 103442 URL: http://llvm.org/viewvc/llvm-project?rev=103442&view=rev Log: MC/Mach-O: Factor out doesSymbolRequireExternRelocation. Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=103442&r1=103441&r2=103442&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Mon May 10 18:15:13 2010 @@ -59,6 +59,20 @@ Kind == X86::reloc_riprel_4byte_movq_load; } +static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) { + // Undefined symbols are always extern. + if (SD->Symbol->isUndefined()) + return true; + + // References to weak definitions require external relocation entries; the + // definition may not always be the one in the same object file. + if (SD->getFlags() & SF_WeakDefinition) + return true; + + // Otherwise, we can use an internal relocation. + return false; +} + namespace { class MachObjectWriterImpl { @@ -754,18 +768,14 @@ const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); MCSymbolData *SD = &Asm.getSymbolData(*Symbol); - // Both references to undefined symbols and references to Weak Definitions - // get external relocation entries. This is so the static and then the - // the dynamic linker can resolve them to the actual definition that will - // be used. And in the case of Weak Definitions a reference to one will - // not always be to the definition in the same object file. - if (Symbol->isUndefined() || (SD->getFlags() & SF_WeakDefinition)) { + // Check whether we need an external or internal relocation. + if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; Index = SD->getIndex(); - // In the case of a Weak Definition the FixedValue needs to be set to - // to not have the address of the symbol. In the case of an undefined - // symbol you can't call getSymbolAddress(). - if (SD->getFlags() & SF_WeakDefinition) + // For external relocations, make sure to offset the fixup value to + // compensate for the addend of the symbol address, if it was + // undefined. This occurs with weak definitions, for example. + if (!SD->Symbol->isUndefined()) FixedValue -= Layout.getSymbolAddress(SD); Value = 0; } else { From daniel at zuster.org Mon May 10 18:15:20 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Mon, 10 May 2010 23:15:20 -0000 Subject: [llvm-commits] [llvm] r103443 - in /llvm/trunk: lib/MC/MachObjectWriter.cpp test/MC/MachO/reloc.s Message-ID: <20100510231520.96A1B312800A@llvm.org> Author: ddunbar Date: Mon May 10 18:15:20 2010 New Revision: 103443 URL: http://llvm.org/viewvc/llvm-project?rev=103443&view=rev Log: MC/Mach-O: Fix another mismatch with .weak_definition, we shouldn't use a scattered relocation entry with a .weak_definition. Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp llvm/trunk/test/MC/MachO/reloc.s Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=103443&r1=103442&r2=103443&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Mon May 10 18:15:20 2010 @@ -740,15 +740,24 @@ // If this is a difference or a defined symbol plus an offset, then we need // a scattered relocation entry. + // Differences always require scattered relocations. + if (Target.getSymB()) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); + + // Get the symbol data, if any. + MCSymbolData *SD = 0; + if (Target.getSymA()) + SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + + // If this is an internal relocation with an offset, it also needs a + // scattered relocation entry. uint32_t Offset = Target.getConstant(); if (IsPCRel) Offset += 1 << Log2Size; - if (Target.getSymB() || - (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() && - Offset)) { - RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue); - return; - } + if (Offset && SD && !doesSymbolRequireExternRelocation(SD)) + return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup, + Target, FixedValue); // See . uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; @@ -765,9 +774,6 @@ Type = RIT_Vanilla; Value = 0; } else { - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData *SD = &Asm.getSymbolData(*Symbol); - // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; Modified: llvm/trunk/test/MC/MachO/reloc.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/reloc.s?rev=103443&r1=103442&r2=103443&view=diff ============================================================================== --- llvm/trunk/test/MC/MachO/reloc.s (original) +++ llvm/trunk/test/MC/MachO/reloc.s Mon May 10 18:15:20 2010 @@ -50,6 +50,7 @@ _f1: .data .long _f1 + .long _f1 + 4 // CHECK: ('cputype', 7) // CHECK: ('cpusubtype', 3) @@ -63,9 +64,9 @@ // CHECK: ('size', 260) // CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('vm_addr', 0) -// CHECK: ('vm_size', 67) +// CHECK: ('vm_size', 71) // CHECK: ('file_offset', 392) -// CHECK: ('file_size', 67) +// CHECK: ('file_size', 71) // CHECK: ('maxprot', 7) // CHECK: ('initprot', 7) // CHECK: ('num_sections', 3) @@ -78,7 +79,7 @@ // CHECK: ('size', 8) // CHECK: ('offset', 392) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 460) +// CHECK: ('reloc_offset', 464) // CHECK: ('num_reloc', 1) // CHECK: ('flags', 0x80000400) // CHECK: ('reserved1', 0) @@ -89,61 +90,64 @@ // CHECK: (('word-0', 0x1), // CHECK: ('word-1', 0x5000003)), // CHECK: ]) -// CHECK: ('_section_data', '\xe96\x00\x00\x00\xeb\xf9\xc3') +// CHECK: ('_section_data', '\xe9:\x00\x00\x00\xeb\xf9\xc3') // CHECK: # Section 1 // CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 8) -// CHECK: ('size', 47) +// CHECK: ('size', 51) // CHECK: ('offset', 400) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 468) -// CHECK: ('num_reloc', 10) +// CHECK: ('reloc_offset', 472) +// CHECK: ('num_reloc', 11) // CHECK: ('flags', 0x0) // CHECK: ('reserved1', 0) // CHECK: ('reserved2', 0) // CHECK: ), // CHECK: ('_relocations', [ // CHECK: # Relocation 0 -// CHECK: (('word-0', 0x2b), +// CHECK: (('word-0', 0x2f), // CHECK: ('word-1', 0xc000007)), // CHECK: # Relocation 1 +// CHECK: (('word-0', 0x2b), +// CHECK: ('word-1', 0xc000007)), +// CHECK: # Relocation 2 // CHECK: (('word-0', 0x8000002a), // CHECK: ('word-1', 0x18)), -// CHECK: # Relocation 2 +// CHECK: # Relocation 3 // CHECK: (('word-0', 0x90000028), // CHECK: ('word-1', 0x18)), -// CHECK: # Relocation 3 +// CHECK: # Relocation 4 // CHECK: (('word-0', 0xa0000024), // CHECK: ('word-1', 0x18)), -// CHECK: # Relocation 4 +// CHECK: # Relocation 5 // CHECK: (('word-0', 0xa0000020), // CHECK: ('word-1', 0x18)), -// CHECK: # Relocation 5 +// CHECK: # Relocation 6 // CHECK: (('word-0', 0xa4000014), // CHECK: ('word-1', 0x1c)), -// CHECK: # Relocation 6 +// CHECK: # Relocation 7 // CHECK: (('word-0', 0xa1000000), // CHECK: ('word-1', 0x24)), -// CHECK: # Relocation 7 +// CHECK: # Relocation 8 // CHECK: (('word-0', 0x8), // CHECK: ('word-1', 0x4000002)), -// CHECK: # Relocation 8 +// CHECK: # Relocation 9 // CHECK: (('word-0', 0x4), // CHECK: ('word-1', 0xc000009)), -// CHECK: # Relocation 9 +// CHECK: # Relocation 10 // CHECK: (('word-0', 0x0), // CHECK: ('word-1', 0xc000009)), // CHECK: ]) -// CHECK: ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q\x00\x00\x00\x00') +// CHECK: ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xed\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x19\x00\x00\x00"\x00\x00\x00,\x00q\x00\x00\x00\x00\x04\x00\x00\x00') // CHECK: # Section 2 // CHECK: (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 55) +// CHECK: ('address', 59) // CHECK: ('size', 12) -// CHECK: ('offset', 447) +// CHECK: ('offset', 451) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 548) +// CHECK: ('reloc_offset', 560) // CHECK: ('num_reloc', 4) // CHECK: ('flags', 0x0) // CHECK: ('reserved1', 0) @@ -161,17 +165,17 @@ // CHECK: ('word-1', 0x18)), // CHECK: # Relocation 3 // CHECK: (('word-0', 0xa1000000), -// CHECK: ('word-1', 0x37)), +// CHECK: ('word-1', 0x3b)), // CHECK: ]) -// CHECK: ('_section_data', '\x02\x00\x00\x00;\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('_section_data', '\xfe\xff\xff\xff?\x00\x00\x00\x00\x00\x00\x00') // CHECK: ]) // CHECK: ), // CHECK: # Load Command 1 // CHECK: (('command', 2) // CHECK: ('size', 24) -// CHECK: ('symoff', 580) +// CHECK: ('symoff', 592) // CHECK: ('nsyms', 10) -// CHECK: ('stroff', 700) +// CHECK: ('stroff', 712) // CHECK: ('strsize', 88) // CHECK: ('_string_data', '\x00undef\x00local_a_ext\x00.objc_class_name_A\x00_f1\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00\x00') // CHECK: ('_symbols', [ @@ -212,7 +216,7 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 3) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 55) +// CHECK: ('n_value', 59) // CHECK: ('_string', 'bar') // CHECK: ), // CHECK: # Symbol 5 From bob.wilson at apple.com Mon May 10 18:30:59 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Mon, 10 May 2010 23:30:59 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103445 - /llvm-gcc-4.2/trunk/gcc/config/i386/i386.h Message-ID: <20100510233059.60F27312800A@llvm.org> Author: bwilson Date: Mon May 10 18:30:59 2010 New Revision: 103445 URL: http://llvm.org/viewvc/llvm-project?rev=103445&view=rev Log: Fix a regression reported by Duncan for a test from a more recent version of GCC. Register names are allowed to be decimal values that are treated as indices into the table of register names. That doesn't make much sense on x86 but llvm-gcc was crashing on the test because it had a variable in register "1". Preserve the old behavior to avoid crashing. Modified: llvm-gcc-4.2/trunk/gcc/config/i386/i386.h Modified: llvm-gcc-4.2/trunk/gcc/config/i386/i386.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/i386.h?rev=103445&r1=103444&r2=103445&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/i386/i386.h (original) +++ llvm-gcc-4.2/trunk/gcc/config/i386/i386.h Mon May 10 18:30:59 2010 @@ -3927,8 +3927,10 @@ the string extracted from the magic symbol built for that register, rather than reg_names. The latter maps both AH and AL to the same thing, which means we can't distinguish them. */ -#define LLVM_GET_REG_NAME(REG_NAME, REG_NUM) \ - ((REG_NAME) + (*(REG_NAME) == '%' ? 1 : 0)) +#define LLVM_GET_REG_NAME(REG_NAME, REG_NUM) __extension__ \ + ({ const char *nm = (REG_NAME); \ + if (*nm == '%' || *nm == '#') ++nm; \ + (ISDIGIT (*nm) ? reg_names[REG_NUM] : nm); }) /* Propagate code model setting to backend */ /* From evan.cheng at apple.com Mon May 10 19:04:31 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 11 May 2010 00:04:31 -0000 Subject: [llvm-commits] [llvm] r103449 - /llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Message-ID: <20100511000431.6BC34312800A@llvm.org> Author: evancheng Date: Mon May 10 19:04:31 2010 New Revision: 103449 URL: http://llvm.org/viewvc/llvm-project?rev=103449&view=rev Log: Ensure REG_SEQUENCE source operands are unique. Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=103449&r1=103448&r2=103449&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Mon May 10 19:04:31 2010 @@ -1164,6 +1164,8 @@ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); llvm_unreachable(0); } + + SmallSet Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); if (MI->getOperand(i).getSubReg() || @@ -1171,6 +1173,23 @@ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); llvm_unreachable(0); } + + if (!Seen.insert(SrcReg)) { + // REG_SEQUENCE cannot have duplicated operands. Add a copy. + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + unsigned NewReg = MRI->createVirtualRegister(RC); + bool Emitted = + TII->copyRegToReg(*MI->getParent(), MI, NewReg, SrcReg, RC, RC, + MI->getDebugLoc()); + (void)Emitted; + assert(Emitted && "Unable to issue a copy instruction!\n"); + MI->getOperand(i).setReg(NewReg); + MI->getOperand(i).setIsKill(); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); unsigned SrcIdx = MI->getOperand(i+1).getImm(); UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI); } From evan.cheng at apple.com Mon May 10 19:20:03 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 11 May 2010 00:20:03 -0000 Subject: [llvm-commits] [llvm] r103450 - in /llvm/trunk/lib/CodeGen: ProcessImplicitDefs.cpp RegAllocFast.cpp SimpleRegisterCoalescing.cpp VirtRegRewriter.cpp Message-ID: <20100511002003.A66AA312800A@llvm.org> Author: evancheng Date: Mon May 10 19:20:03 2010 New Revision: 103450 URL: http://llvm.org/viewvc/llvm-project?rev=103450&view=rev Log: It's not safe eliminate copies where src and dst have different sub-register indices. Modified: llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp Modified: llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp?rev=103450&r1=103449&r2=103450&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp (original) +++ llvm/trunk/lib/CodeGen/ProcessImplicitDefs.cpp Mon May 10 19:20:03 2010 @@ -46,7 +46,7 @@ const TargetInstrInfo *tii_) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg) + Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) return true; if (OpIdx == 2 && MI->isSubregToReg()) @@ -220,7 +220,7 @@ // Turn a copy use into an implicit_def. unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg) { + Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) { RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103450&r1=103449&r2=103450&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Mon May 10 19:20:03 2010 @@ -583,7 +583,8 @@ unsigned SrcCopyPhysReg = 0U; bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg); - if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) + if (isCopy && SrcCopySubReg == 0 && DstCopySubReg == 0 && + TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); // Loop over the implicit uses, making sure they don't get reallocated. Modified: llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp?rev=103450&r1=103449&r2=103450&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp (original) +++ llvm/trunk/lib/CodeGen/SimpleRegisterCoalescing.cpp Mon May 10 19:20:03 2010 @@ -460,7 +460,7 @@ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) continue; - if (DstReg == IntB.reg) { + if (DstReg == IntB.reg && DstSubIdx == 0) { // This copy will become a noop. If it's defining a new val#, // remove that val# as well. However this live range is being // extended to the end of the existing live range defined by the copy. @@ -624,7 +624,7 @@ LR->valno->addKill(LastUseIdx.getDefIndex()); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - DstReg == li.reg) { + DstReg == li.reg && DstSubIdx == 0) { // Last use is itself an identity code. int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_); LastUseMI->getOperand(DeadIdx).setIsDead(); @@ -810,6 +810,8 @@ unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && + CopySrcSubIdx == 0 && + CopyDstSubIdx == 0 && CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { // If the use is a copy and it won't be coalesced away, and its source @@ -2637,7 +2639,7 @@ MachineInstr *UseMI = Use.getParent(); unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg) + SrcReg == DstReg && SrcSubIdx == DstSubIdx) // Ignore identity copies. continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); @@ -2666,7 +2668,7 @@ // Ignore identity copies. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && - SrcReg == DstReg)) + SrcReg == DstReg && SrcSubIdx == DstSubIdx)) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); if (Use.isReg() && Use.isUse() && Use.getReg() && @@ -2797,7 +2799,7 @@ // If the move will be an identity move delete it bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (isMove && SrcReg == DstReg) { + if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) { if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range Modified: llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp?rev=103450&r1=103449&r2=103450&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp (original) +++ llvm/trunk/lib/CodeGen/VirtRegRewriter.cpp Mon May 10 19:20:03 2010 @@ -2425,7 +2425,8 @@ // eliminate this or else the undef marker is lost and it will // confuses the scavenger. This is extremely rare. unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && + Src == Dst && SrcSR == DstSR && !MI.findRegisterUseOperand(Src)->isUndef()) { ++NumDCE; DEBUG(dbgs() << "Removing now-noop copy: " << MI); @@ -2514,7 +2515,8 @@ // instruction before considering the dest reg to be changed. { unsigned Src, Dst, SrcSR, DstSR; - if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) { + if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && + Src == Dst && SrcSR == DstSR) { ++NumDCE; DEBUG(dbgs() << "Removing now-noop copy: " << MI); InvalidateKills(MI, TRI, RegKills, KillOps); From isanbard at gmail.com Mon May 10 19:30:02 2010 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 11 May 2010 00:30:02 -0000 Subject: [llvm-commits] [llvm] r103451 - in /llvm/trunk: include/llvm/Target/SubtargetFeature.h lib/Target/SubtargetFeature.cpp tools/lto/LTOCodeGenerator.cpp tools/lto/LTOModule.cpp Message-ID: <20100511003002.CB5E6312800A@llvm.org> Author: void Date: Mon May 10 19:30:02 2010 New Revision: 103451 URL: http://llvm.org/viewvc/llvm-project?rev=103451&view=rev Log: The getDefaultSubtargetFeatures method of SubtargetFeature did actually return a string of features for that target. However LTO was using that string to pass into the "create target machine" stuff. That stuff needed the feature string to be in a particular form. In particular, it needed the CPU specified first and then the attributes. If there isn't a CPU specified, it required it to be blank -- e.g., ",+altivec". Yuck. Modify the getDefaultSubtargetFeatures method to be a non-static member function. For all attributes for a specific subtarget, it will add them in like normal. It will also take a CPU string so that it can satisfy this horrible syntax. Modified: llvm/trunk/include/llvm/Target/SubtargetFeature.h llvm/trunk/lib/Target/SubtargetFeature.cpp llvm/trunk/tools/lto/LTOCodeGenerator.cpp llvm/trunk/tools/lto/LTOModule.cpp Modified: llvm/trunk/include/llvm/Target/SubtargetFeature.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/SubtargetFeature.h?rev=103451&r1=103450&r2=103451&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/SubtargetFeature.h (original) +++ llvm/trunk/include/llvm/Target/SubtargetFeature.h Mon May 10 19:30:02 2010 @@ -108,9 +108,10 @@ // Dump feature info. void dump() const; - /// Retrieve a formatted string of the default features for - /// the specified target triple. - static std::string getDefaultSubtargetFeatures(const Triple &Triple); + /// Retrieve a formatted string of the default features for the specified + /// target triple. + void getDefaultSubtargetFeatures(const std::string &CPU, + const Triple& Triple); }; } // End namespace llvm Modified: llvm/trunk/lib/Target/SubtargetFeature.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SubtargetFeature.cpp?rev=103451&r1=103450&r2=103451&view=diff ============================================================================== --- llvm/trunk/lib/Target/SubtargetFeature.cpp (original) +++ llvm/trunk/lib/Target/SubtargetFeature.cpp Mon May 10 19:30:02 2010 @@ -359,29 +359,41 @@ print(dbgs()); } -/// getDefaultSubtargetFeatures - Return a string listing -/// the features associated with the target triple. +/// getDefaultSubtargetFeatures - Return a string listing the features +/// associated with the target triple. /// /// FIXME: This is an inelegant way of specifying the features of a /// subtarget. It would be better if we could encode this information /// into the IR. See . /// -std::string SubtargetFeatures::getDefaultSubtargetFeatures( - const Triple& Triple) { +void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU, + const Triple& Triple) { + setCPU(CPU); + + const char *Attrs = 0; + switch (Triple.getVendor()) { case Triple::Apple: switch (Triple.getArch()) { case Triple::ppc: // powerpc-apple-* - return std::string("altivec"); + Attrs = "altivec"; + break; case Triple::ppc64: // powerpc64-apple-* - return std::string("64bit,altivec"); + Attrs = "64bit,altivec"; + break; default: break; } break; default: break; - } + } + + StringRef SR(Attrs); - return std::string(""); + while (!SR.empty()) { + std::pair Res = SR.split(','); + AddFeature(Res.first); + SR = Res.second; + } } Modified: llvm/trunk/tools/lto/LTOCodeGenerator.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOCodeGenerator.cpp?rev=103451&r1=103450&r2=103451&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOCodeGenerator.cpp (original) +++ llvm/trunk/tools/lto/LTOCodeGenerator.cpp Mon May 10 19:30:02 2010 @@ -300,8 +300,9 @@ } // construct LTModule, hand over ownership of module and target - const std::string FeatureStr = - SubtargetFeatures::getDefaultSubtargetFeatures(llvm::Triple(Triple)); + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple)); + std::string FeatureStr = Features.getString(); _target = march->createTargetMachine(Triple, FeatureStr); } return false; Modified: llvm/trunk/tools/lto/LTOModule.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/lto/LTOModule.cpp?rev=103451&r1=103450&r2=103451&view=diff ============================================================================== --- llvm/trunk/tools/lto/LTOModule.cpp (original) +++ llvm/trunk/tools/lto/LTOModule.cpp Mon May 10 19:30:02 2010 @@ -140,8 +140,9 @@ return NULL; // construct LTModule, hand over ownership of module and target - const std::string FeatureStr = - SubtargetFeatures::getDefaultSubtargetFeatures(llvm::Triple(Triple)); + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple)); + std::string FeatureStr = Features.getString(); TargetMachine* target = march->createTargetMachine(Triple, FeatureStr); return new LTOModule(m.take(), target); } From evan.cheng at apple.com Mon May 10 20:19:40 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 11 May 2010 01:19:40 -0000 Subject: [llvm-commits] [llvm] r103453 - in /llvm/trunk/lib/Target/ARM: ARMISelDAGToDAG.cpp NEONPreAllocPass.cpp Message-ID: <20100511011940.89184312800A@llvm.org> Author: evancheng Date: Mon May 10 20:19:40 2010 New Revision: 103453 URL: http://llvm.org/viewvc/llvm-project?rev=103453&view=rev Log: Model some vst3 and vst4 with reg_sequence. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103453&r1=103452&r2=103453&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Mon May 10 20:19:40 2010 @@ -1049,14 +1049,16 @@ return VLd; assert(NumVecs <= 4); + SDValue RegSeq; SDValue V0 = SDValue(VLd, 0); SDValue V1 = SDValue(VLd, 1); - SDValue RegSeq; + // Form a REG_SEQUENCE to force register allocation. if (NumVecs == 2) RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); else { SDValue V2 = SDValue(VLd, 2); + // If it's a vld3, form a quad D-register but discard the last part. SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) : SDValue(VLd, 3); @@ -1183,12 +1185,44 @@ Ops.push_back(Align); if (is64BitVector) { - unsigned Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + assert(NumVecs <= 4); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + else { + SDValue V2 = N->getOperand(2+3); + // If it's a vld3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } Ops.push_back(Pred); Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); + unsigned Opc = DOpcodes[OpcodeIndex]; return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } @@ -1198,7 +1232,7 @@ // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; if (llvm::ModelWithRegSequence() && NumVecs == 2) { - // First extract the quad D registers. + // First extract the pair of Q registers. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103453&r1=103452&r2=103453&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Mon May 10 20:19:40 2010 @@ -392,15 +392,19 @@ VirtReg = DefMI->getOperand(1).getReg(); if (LastSrcReg && LastSrcReg != VirtReg) return false; + LastSrcReg = VirtReg; const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (NumRegs == 2) { - if (RC != ARM::QPRRegisterClass) - return false; - } else if (RC != ARM::QQPRRegisterClass) + if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass) return false; unsigned SubIdx = DefMI->getOperand(2).getImm(); - if (LastSubIdx && LastSubIdx != SubIdx-1) - return false; + if (LastSubIdx) { + if (LastSubIdx != SubIdx-1) + return false; + } else { + // Must start from arm_dsubreg_0 or arm_qsubreg_0. + if (SubIdx != ARM::DSUBREG_0 && SubIdx != ARM::QSUBREG_0) + return false; + } LastSubIdx = SubIdx; } return true; From scallanan at apple.com Mon May 10 20:27:09 2010 From: scallanan at apple.com (Sean Callanan) Date: Tue, 11 May 2010 01:27:09 -0000 Subject: [llvm-commits] [llvm] r103454 - in /llvm/trunk/tools: edis/EDInst.cpp llvm-mc/Disassembler.cpp Message-ID: <20100511012709.27F4A312800A@llvm.org> Author: spyffe Date: Mon May 10 20:27:08 2010 New Revision: 103454 URL: http://llvm.org/viewvc/llvm-project?rev=103454&view=rev Log: Extended the edis "IsBranch" property to call instructions as well. Added support for checking this to the llvm-mc tester as well. Modified: llvm/trunk/tools/edis/EDInst.cpp llvm/trunk/tools/llvm-mc/Disassembler.cpp Modified: llvm/trunk/tools/edis/EDInst.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/edis/EDInst.cpp?rev=103454&r1=103453&r2=103454&view=diff ============================================================================== --- llvm/trunk/tools/edis/EDInst.cpp (original) +++ llvm/trunk/tools/edis/EDInst.cpp Mon May 10 20:27:08 2010 @@ -81,7 +81,9 @@ bool EDInst::isBranch() { if (ThisInstInfo) - return ThisInstInfo->instructionType == kInstructionTypeBranch; + return + ThisInstInfo->instructionType == kInstructionTypeBranch || + ThisInstInfo->instructionType == kInstructionTypeCall; else return false; } Modified: llvm/trunk/tools/llvm-mc/Disassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc/Disassembler.cpp?rev=103454&r1=103453&r2=103454&view=diff ============================================================================== --- llvm/trunk/tools/llvm-mc/Disassembler.cpp (original) +++ llvm/trunk/tools/llvm-mc/Disassembler.cpp Mon May 10 20:27:08 2010 @@ -314,6 +314,11 @@ } outs() << " "; + + if (EDInstIsBranch(inst)) + outs() << "
"; + if (EDInstIsMove(inst)) + outs() << " "; int numOperands = EDNumOperands(inst); From isanbard at gmail.com Mon May 10 20:33:39 2010 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 11 May 2010 01:33:39 -0000 Subject: [llvm-commits] [llvm] r103455 - /llvm/trunk/lib/Target/SubtargetFeature.cpp Message-ID: <20100511013339.D030F312800A@llvm.org> Author: void Date: Mon May 10 20:33:39 2010 New Revision: 103455 URL: http://llvm.org/viewvc/llvm-project?rev=103455&view=rev Log: Don't create a StringRef with a NULL value. Modified: llvm/trunk/lib/Target/SubtargetFeature.cpp Modified: llvm/trunk/lib/Target/SubtargetFeature.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SubtargetFeature.cpp?rev=103455&r1=103454&r2=103455&view=diff ============================================================================== --- llvm/trunk/lib/Target/SubtargetFeature.cpp (original) +++ llvm/trunk/lib/Target/SubtargetFeature.cpp Mon May 10 20:33:39 2010 @@ -389,6 +389,8 @@ break; } + if (!Attrs) return; + StringRef SR(Attrs); while (!SR.empty()) { From greened at obbligato.org Mon May 10 22:25:28 2010 From: greened at obbligato.org (David Greene) Date: Mon, 10 May 2010 22:25:28 -0500 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <20100510205106.4F283312800A@llvm.org> References: <20100510205106.4F283312800A@llvm.org> Message-ID: <201005102225.28115.greened@obbligato.org> On Monday 10 May 2010 15:51:06 Chris Lattner wrote: > Author: lattner > Date: Mon May 10 15:51:06 2010 > New Revision: 103427 > > URL: http://llvm.org/viewvc/llvm-project?rev=103427&view=rev > Log: > fix a pretty obvious typo. We test things before committing them, right? Yes we do. For three days, in fact. The sarcasm isn't helping anyone. This is not a typo. It's deliberate. >From http://www.llvm.org/docs/TestingGuide.html: As with a Unix shell, the RUN: lines permit pipelines and I/O redirection to be used. However, the usage is slightly different than for Bash. To check what's legal, see the documentation for the Tcl exec command and the tutorial. The major differences are: You can't do 2>&1. That will cause Tcl to write to a file named &1. Usually this is done to get stderr to go through a pipe. You can do that in tcl with |& so replace this idiom: ... 2>&1 | grep with ... |& grep You can only redirect to a file, not to another descriptor and not from a here document. tcl supports redirecting to open files with the @ syntax but you shouldn't use that here. Then from the linked http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2: 2>@1 Standard error from all commands in the pipeline is redirected to the command result. This operator is only valid at the end of the command pipeline. The only thing I can imagine being wrong here is a different interpretation of what "open file" means. In fact, your solution explicitly violates the testing documentation. That's why I didn't use it. -Dave From greened at obbligato.org Mon May 10 22:26:35 2010 From: greened at obbligato.org (David Greene) Date: Mon, 10 May 2010 22:26:35 -0500 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <20100510205106.4F283312800A@llvm.org> References: <20100510205106.4F283312800A@llvm.org> Message-ID: <201005102226.35622.greened@obbligato.org> On Monday 10 May 2010 15:51:06 Chris Lattner wrote: > Author: lattner > Date: Mon May 10 15:51:06 2010 > New Revision: 103427 > > URL: http://llvm.org/viewvc/llvm-project?rev=103427&view=rev > Log: > fix a pretty obvious typo. We test things before committing them, right? > > Modified: > llvm/trunk/test/Other/2010-05-60-Printer.ll > > Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll > URL: > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Print > er.ll?rev=103427&r1=103426&r2=103427&view=diff > ========================================================================== > ==== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (original) > +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 15:51:06 2010 > @@ -1,4 +1,4 @@ > -; RUN: llc -O2 -print-after-all < %s 2>@1 > +; RUN: llc -O2 -print-after-all < %s 2>&1 Is Tcl on the buildbots broken? What version is it? Again, this works just fine on my machine. -Dave From sabre at nondot.org Mon May 10 23:33:37 2010 From: sabre at nondot.org (Chris Lattner) Date: Mon, 10 May 2010 21:33:37 -0700 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <201005102226.35622.greened@obbligato.org> References: <20100510205106.4F283312800A@llvm.org> <201005102226.35622.greened@obbligato.org> Message-ID: <24B717FA-C973-47DE-9818-44E709A83EDC@nondot.org> On May 10, 2010, at 8:26 PM, David Greene wrote: > On Monday 10 May 2010 15:51:06 Chris Lattner wrote: >> Author: lattner >> Date: Mon May 10 15:51:06 2010 >> New Revision: 103427 >> >> Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll >> URL: >> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Print >> er.ll?rev=103427&r1=103426&r2=103427&view=diff >> ========================================================================== >> ==== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (original) >> +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 15:51:06 2010 >> @@ -1,4 +1,4 @@ >> -; RUN: llc -O2 -print-after-all < %s 2>@1 >> +; RUN: llc -O2 -print-after-all < %s 2>&1 > > Is Tcl on the buildbots broken? What version is it? Again, this works just > fine on my machine. Daniel, is this a lit bug? -Chris From daniel_dunbar at apple.com Tue May 11 00:55:05 2010 From: daniel_dunbar at apple.com (Daniel Dunbar) Date: Mon, 10 May 2010 22:55:05 -0700 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <24B717FA-C973-47DE-9818-44E709A83EDC@nondot.org> References: <20100510205106.4F283312800A@llvm.org> <201005102226.35622.greened@obbligato.org> <24B717FA-C973-47DE-9818-44E709A83EDC@nondot.org> Message-ID: <49AE2548-9ACC-4C2E-BA68-1C0E80D5575F@apple.com> On May 10, 2010, at 9:33 PM, Chris Lattner wrote: > On May 10, 2010, at 8:26 PM, David Greene wrote: >> On Monday 10 May 2010 15:51:06 Chris Lattner wrote: >>> Author: lattner >>> Date: Mon May 10 15:51:06 2010 >>> New Revision: 103427 >>> >>> Modified: llvm/trunk/test/Other/2010-05-60-Printer.ll >>> URL: >>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/2010-05-60-Print >>> er.ll?rev=103427&r1=103426&r2=103427&view=diff >>> ========================================================================== >>> ==== --- llvm/trunk/test/Other/2010-05-60-Printer.ll (original) >>> +++ llvm/trunk/test/Other/2010-05-60-Printer.ll Mon May 10 15:51:06 2010 >>> @@ -1,4 +1,4 @@ >>> -; RUN: llc -O2 -print-after-all < %s 2>@1 >>> +; RUN: llc -O2 -print-after-all < %s 2>&1 >> >> Is Tcl on the buildbots broken? What version is it? Again, this works just >> fine on my machine. > > Daniel, is this a lit bug? This is a 'lit' unimplementism. I didn't implement a full Tcl parser, just enough for the parts we use. Since I plan to kill off the DejaGNU side at some point, I don't feel it is worth adding support for more Tcl-isms, but I can if people like. - Daniel > -Chris From dgregor at apple.com Tue May 11 01:17:44 2010 From: dgregor at apple.com (Douglas Gregor) Date: Tue, 11 May 2010 06:17:44 -0000 Subject: [llvm-commits] [llvm] r103457 - in /llvm/trunk: include/llvm/ADT/EquivalenceClasses.h lib/CodeGen/IntrinsicLowering.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/CodeGen/SelectionDAG/TargetLowering.cpp lib/CodeGen/Spiller.cpp lib/Transforms/Scalar/LoopStrengthReduce.cpp lib/Transforms/Utils/PromoteMemoryToRegister.cpp utils/TableGen/IntrinsicEmitter.cpp utils/TableGen/IntrinsicEmitter.h Message-ID: <20100511061744.C8C88312800A@llvm.org> Author: dgregor Date: Tue May 11 01:17:44 2010 New Revision: 103457 URL: http://llvm.org/viewvc/llvm-project?rev=103457&view=rev Log: Fixes for Microsoft Visual Studio 2010, from Steven Watanabe! Modified: llvm/trunk/include/llvm/ADT/EquivalenceClasses.h llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/trunk/lib/CodeGen/Spiller.cpp llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp llvm/trunk/utils/TableGen/IntrinsicEmitter.h Modified: llvm/trunk/include/llvm/ADT/EquivalenceClasses.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/EquivalenceClasses.h?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/EquivalenceClasses.h (original) +++ llvm/trunk/include/llvm/ADT/EquivalenceClasses.h Tue May 11 01:17:44 2010 @@ -191,7 +191,7 @@ /// insert - Insert a new value into the union/find set, ignoring the request /// if the value already exists. iterator insert(const ElemTy &Data) { - return TheMapping.insert(Data).first; + return TheMapping.insert(ECValue(Data)).first; } /// findLeader - Given a value in the set, return a member iterator for the Modified: llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp (original) +++ llvm/trunk/lib/CodeGen/IntrinsicLowering.cpp Tue May 11 01:17:44 2010 @@ -83,6 +83,12 @@ return NewCI; } +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + void IntrinsicLowering::AddPrototypes(Module &M) { LLVMContext &Context = M.getContext(); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue May 11 01:17:44 2010 @@ -3726,6 +3726,12 @@ return true; } +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) +#define setjmp_undefined_for_visual_studio +#undef setjmp +#endif + /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Tue May 11 01:17:44 2010 @@ -2417,7 +2417,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint[0] != '{') - return std::pair(0, 0); + return std::make_pair(0u, static_cast(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. @@ -2449,7 +2449,7 @@ } } - return std::pair(0, 0); + return std::make_pair(0u, static_cast(0)); } //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/CodeGen/Spiller.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/Spiller.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/Spiller.cpp (original) +++ llvm/trunk/lib/CodeGen/Spiller.cpp Tue May 11 01:17:44 2010 @@ -451,9 +451,9 @@ // reg. MachineBasicBlock *useMBB = useInst->getParent(); MachineBasicBlock::iterator useItr(useInst); - tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc, + tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc, DebugLoc()); - MachineInstr *copyMI = next(useItr); + MachineInstr *copyMI = llvm::next(useItr); copyMI->addRegisterKilled(newVReg, tri); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); Modified: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original) +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp Tue May 11 01:17:44 2010 @@ -2899,7 +2899,7 @@ // instead of at the end, so that it can be used for other expansions. if (IDom == Inst->getParent() && (!BetterPos || DT.dominates(BetterPos, Inst))) - BetterPos = next(BasicBlock::iterator(Inst)); + BetterPos = llvm::next(BasicBlock::iterator(Inst)); } if (!AllDominate) break; Modified: llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (original) +++ llvm/trunk/lib/Transforms/Utils/PromoteMemoryToRegister.cpp Tue May 11 01:17:44 2010 @@ -861,7 +861,7 @@ // Find the nearest store that has a lower than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::pair(LoadIdx, 0), + std::pair(LoadIdx, static_cast(0)), StoreIndexSearchPredicate()); // If there is no store before this load, then we can't promote this load. Modified: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp Tue May 11 01:17:44 2010 @@ -30,6 +30,8 @@ if (TargetOnly && !Ints.empty()) TargetPrefix = Ints[0].TargetPrefix; + EmitPrefix(OS); + // Emit the enum information. EmitEnumInfo(Ints, OS); @@ -59,6 +61,23 @@ // Emit code to translate GCC builtins into LLVM intrinsics. EmitIntrinsicToGCCBuiltinMap(Ints, OS); + + EmitSuffix(OS); +} + +void IntrinsicEmitter::EmitPrefix(raw_ostream &OS) { + OS << "// VisualStudio defines setjmp as _setjmp\n" + "#if defined(_MSC_VER) && defined(setjmp)\n" + "#define setjmp_undefined_for_visual_studio\n" + "#undef setjmp\n" + "#endif\n\n"; +} + +void IntrinsicEmitter::EmitSuffix(raw_ostream &OS) { + OS << "#if defined(_MSC_VER) && defined(setjmp_undefined_for_visual_studio)\n" + "// let's return it to _setjmp state\n" + "#define setjmp _setjmp\n" + "#endif\n\n"; } void IntrinsicEmitter::EmitEnumInfo(const std::vector &Ints, Modified: llvm/trunk/utils/TableGen/IntrinsicEmitter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/IntrinsicEmitter.h?rev=103457&r1=103456&r2=103457&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/IntrinsicEmitter.h (original) +++ llvm/trunk/utils/TableGen/IntrinsicEmitter.h Tue May 11 01:17:44 2010 @@ -28,6 +28,8 @@ : Records(R), TargetOnly(T) {} void run(raw_ostream &OS); + + void EmitPrefix(raw_ostream &OS); void EmitEnumInfo(const std::vector &Ints, raw_ostream &OS); @@ -50,6 +52,7 @@ raw_ostream &OS); void EmitIntrinsicToGCCBuiltinMap(const std::vector &Ints, raw_ostream &OS); + void EmitSuffix(raw_ostream &OS); }; } // End llvm namespace From evan.cheng at apple.com Tue May 11 02:26:33 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 11 May 2010 07:26:33 -0000 Subject: [llvm-commits] [llvm] r103459 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrInfo.td lib/Target/ARM/ARMInstrThumb.td test/CodeGen/ARM/trap.ll test/CodeGen/Thumb/trap.ll Message-ID: <20100511072633.1E12B312800A@llvm.org> Author: evancheng Date: Tue May 11 02:26:32 2010 New Revision: 103459 URL: http://llvm.org/viewvc/llvm-project?rev=103459&view=rev Log: Select @llvm.trap to the special B with 1111 condition (i.e. trap) instruction. Added: llvm/trunk/test/CodeGen/ARM/trap.ll llvm/trunk/test/CodeGen/Thumb/trap.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103459&r1=103458&r2=103459&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue May 11 02:26:32 2010 @@ -384,6 +384,8 @@ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=103459&r1=103458&r2=103459&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Tue May 11 02:26:32 2010 @@ -791,8 +791,8 @@ } // A5.4 Permanently UNDEFINED instructions. -def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "", - [/* For disassembly only; pattern left blank */]>, +let isBarrier = 1, isTerminator = 1 in +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, Requires<[IsARM]> { let Inst{27-25} = 0b011; let Inst{24-20} = 0b11111; Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=103459&r1=103458&r2=103459&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Tue May 11 02:26:32 2010 @@ -417,9 +417,10 @@ } } -// A8.6.16 B: Encoding T1 -- for disassembly only +// A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1110 then UNDEFINED -def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 { +let isBarrier = 1, isTerminator = 1 in +def tTRAP : TI<(outs), (ins), IIC_Br, "trap", [(trap)]>, Encoding16 { let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; } Added: llvm/trunk/test/CodeGen/ARM/trap.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/trap.ll?rev=103459&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/trap.ll (added) +++ llvm/trunk/test/CodeGen/ARM/trap.ll Tue May 11 02:26:32 2010 @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=arm | FileCheck %s +; rdar://7961298 + +define arm_apcscc void @t() nounwind { +entry: +; CHECK: t: +; CHECK: trap + call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() nounwind Added: llvm/trunk/test/CodeGen/Thumb/trap.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/trap.ll?rev=103459&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/Thumb/trap.ll (added) +++ llvm/trunk/test/CodeGen/Thumb/trap.ll Tue May 11 02:26:32 2010 @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=thumb | FileCheck %s +; rdar://7961298 + +define arm_apcscc void @t() nounwind { +entry: +; CHECK: t: +; CHECK: trap + call void @llvm.trap() + unreachable +} + +declare void @llvm.trap() nounwind From baldrick at free.fr Tue May 11 02:27:11 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 11 May 2010 07:27:11 -0000 Subject: [llvm-commits] [dragonegg] r103460 - /dragonegg/trunk/Makefile Message-ID: <20100511072711.2912D312800A@llvm.org> Author: baldrick Date: Tue May 11 02:27:11 2010 New Revision: 103460 URL: http://llvm.org/viewvc/llvm-project?rev=103460&view=rev Log: Use an absolute path for the linker version script, hopefully fixing the buildbots. Modified: dragonegg/trunk/Makefile Modified: dragonegg/trunk/Makefile URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/Makefile?rev=103460&r1=103459&r2=103460&view=diff ============================================================================== --- dragonegg/trunk/Makefile (original) +++ dragonegg/trunk/Makefile Tue May 11 02:27:11 2010 @@ -23,7 +23,7 @@ ifeq ($(shell uname),Darwin) LOADABLE_MODULE_OPTIONS=-bundle -undefined dynamic_lookup else -LOADABLE_MODULE_OPTIONS=-shared -Wl,--version-script=exports.map +LOADABLE_MODULE_OPTIONS=-shared -Wl,--version-script=$(SRC_DIR)/exports.map endif GCC_PLUGIN_DIR:=$(shell $(GCC) -print-file-name=plugin) From baldrick at free.fr Tue May 11 02:58:04 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 11 May 2010 07:58:04 -0000 Subject: [llvm-commits] [dragonegg] r103462 - /dragonegg/trunk/Makefile Message-ID: <20100511075804.93E9B312800A@llvm.org> Author: baldrick Date: Tue May 11 02:58:04 2010 New Revision: 103462 URL: http://llvm.org/viewvc/llvm-project?rev=103462&view=rev Log: Tell the linker to optimize the shared library. This actually does something when using ELF. Modified: dragonegg/trunk/Makefile Modified: dragonegg/trunk/Makefile URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/Makefile?rev=103462&r1=103461&r2=103462&view=diff ============================================================================== --- dragonegg/trunk/Makefile (original) +++ dragonegg/trunk/Makefile Tue May 11 02:58:04 2010 @@ -23,7 +23,7 @@ ifeq ($(shell uname),Darwin) LOADABLE_MODULE_OPTIONS=-bundle -undefined dynamic_lookup else -LOADABLE_MODULE_OPTIONS=-shared -Wl,--version-script=$(SRC_DIR)/exports.map +LOADABLE_MODULE_OPTIONS=-shared -Wl,-O1 -Wl,--version-script=$(SRC_DIR)/exports.map endif GCC_PLUGIN_DIR:=$(shell $(GCC) -print-file-name=plugin) From kalle.raiskila at nokia.com Tue May 11 06:00:02 2010 From: kalle.raiskila at nokia.com (Kalle Raiskila) Date: Tue, 11 May 2010 11:00:02 -0000 Subject: [llvm-commits] [llvm] r103466 - in /llvm/trunk: lib/Target/CellSPU/SPUInstrInfo.cpp test/CodeGen/CellSPU/jumptable.ll Message-ID: <20100511110002.3B941312800A@llvm.org> Author: kraiskil Date: Tue May 11 06:00:02 2010 New Revision: 103466 URL: http://llvm.org/viewvc/llvm-project?rev=103466&view=rev Log: Make SPU backend not assert on jump tables. Added: llvm/trunk/test/CodeGen/CellSPU/jumptable.ll Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp Modified: llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp?rev=103466&r1=103465&r2=103466&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPUInstrInfo.cpp Tue May 11 06:00:02 2010 @@ -467,6 +467,9 @@ // If there is only one terminator instruction, process it. if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { if (isUncondBranch(LastInst)) { + // Check for jump tables + if (!LastInst->getOperand(0).isMBB()) + return true; TBB = LastInst->getOperand(0).getMBB(); return false; } else if (isCondBranch(LastInst)) { Added: llvm/trunk/test/CodeGen/CellSPU/jumptable.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/CellSPU/jumptable.ll?rev=103466&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/CellSPU/jumptable.ll (added) +++ llvm/trunk/test/CodeGen/CellSPU/jumptable.ll Tue May 11 06:00:02 2010 @@ -0,0 +1,21 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +; This is to check that emitting jumptables doesn't crash llc +define i32 @test(i32 %param) { +entry: +;CHECK: ai $4, $3, -1 +;CHECK: clgti $5, $4, 3 +;CHECK: brnz $5,.LBB0_2 + switch i32 %param, label %bb1 [ + i32 1, label %bb3 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb1 + ] + +bb1: + ret i32 1 +bb2: + ret i32 2 +bb3: + ret i32 3 +} From gohman at apple.com Tue May 11 11:21:03 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 16:21:03 -0000 Subject: [llvm-commits] [llvm] r103474 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/Target/ARM/ARMISelLowering.h lib/Target/X86/X86ISelLowering.h Message-ID: <20100511162103.9808C312800A@llvm.org> Author: djg Date: Tue May 11 11:21:03 2010 New Revision: 103474 URL: http://llvm.org/viewvc/llvm-project?rev=103474&view=rev Log: Remove the TargetLowering::getSubtarget() virtual function, which was unused. TargetMachine::getSubtarget() is used instead. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h llvm/trunk/lib/Target/ARM/ARMISelLowering.h llvm/trunk/lib/Target/X86/X86ISelLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103474&r1=103473&r2=103474&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue May 11 11:21:03 2010 @@ -1103,12 +1103,6 @@ } public: - - virtual const TargetSubtarget *getSubtarget() const { - assert(0 && "Not Implemented"); - return NULL; // this is here to silence compiler errors - } - //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGLowering code knows how to lower these. Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=103474&r1=103473&r2=103474&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue May 11 11:21:03 2010 @@ -236,7 +236,7 @@ std::vector &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() const { + const ARMSubtarget* getSubtarget() const { return Subtarget; } Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=103474&r1=103473&r2=103474&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue May 11 11:21:03 2010 @@ -563,7 +563,7 @@ return !X86ScalarSSEf64 || VT == MVT::f80; } - virtual const X86Subtarget* getSubtarget() const { + const X86Subtarget* getSubtarget() const { return Subtarget; } From tonic at nondot.org Tue May 11 11:46:45 2010 From: tonic at nondot.org (Tanya Lattner) Date: Tue, 11 May 2010 16:46:45 -0000 Subject: [llvm-commits] [llvm] r103477 - /llvm/trunk/docs/CommandGuide/index.html Message-ID: <20100511164645.BB2FC312800A@llvm.org> Author: tbrethou Date: Tue May 11 11:46:45 2010 New Revision: 103477 URL: http://llvm.org/viewvc/llvm-project?rev=103477&view=rev Log: Test commit. Modified: llvm/trunk/docs/CommandGuide/index.html Modified: llvm/trunk/docs/CommandGuide/index.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CommandGuide/index.html?rev=103477&r1=103476&r2=103477&view=diff ============================================================================== --- llvm/trunk/docs/CommandGuide/index.html (original) +++ llvm/trunk/docs/CommandGuide/index.html Tue May 11 11:46:45 2010 @@ -22,7 +22,7 @@ - + From tonic at nondot.org Tue May 11 11:47:42 2010 From: tonic at nondot.org (Tanya Lattner) Date: Tue, 11 May 2010 16:47:42 -0000 Subject: [llvm-commits] [llvm] r103478 - /llvm/trunk/docs/CommandGuide/index.html Message-ID: <20100511164742.D5DE1312800A@llvm.org> Author: tbrethou Date: Tue May 11 11:47:42 2010 New Revision: 103478 URL: http://llvm.org/viewvc/llvm-project?rev=103478&view=rev Log: Test commit. Modified: llvm/trunk/docs/CommandGuide/index.html Modified: llvm/trunk/docs/CommandGuide/index.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CommandGuide/index.html?rev=103478&r1=103477&r2=103478&view=diff ============================================================================== --- llvm/trunk/docs/CommandGuide/index.html (original) +++ llvm/trunk/docs/CommandGuide/index.html Tue May 11 11:47:42 2010 @@ -22,7 +22,7 @@ - + From daniel at zuster.org Tue May 11 12:22:45 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 17:22:45 -0000 Subject: [llvm-commits] [llvm] r103479 - /llvm/trunk/docs/CommandGuide/lit.pod Message-ID: <20100511172245.CFB3D312800A@llvm.org> Author: ddunbar Date: Tue May 11 12:22:45 2010 New Revision: 103479 URL: http://llvm.org/viewvc/llvm-project?rev=103479&view=rev Log: Test commit. Modified: llvm/trunk/docs/CommandGuide/lit.pod Modified: llvm/trunk/docs/CommandGuide/lit.pod URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CommandGuide/lit.pod?rev=103479&r1=103478&r2=103479&view=diff ============================================================================== --- llvm/trunk/docs/CommandGuide/lit.pod (original) +++ llvm/trunk/docs/CommandGuide/lit.pod Tue May 11 12:22:45 2010 @@ -253,8 +253,8 @@ discovered test suites at startup. Once a test suite is discovered, its config file is loaded. Config files -themselves are just Python modules which will be executed. When the config file -is executed, two important global variables are predefined: +themselves are Python modules which will be executed. When the config file is +executed, two important global variables are predefined: =over From daniel at zuster.org Tue May 11 12:22:51 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 17:22:51 -0000 Subject: [llvm-commits] [llvm] r103480 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp test/MC/MachO/darwin-x86_64-reloc.s Message-ID: <20100511172251.1D1AA3128018@llvm.org> Author: ddunbar Date: Tue May 11 12:22:50 2010 New Revision: 103480 URL: http://llvm.org/viewvc/llvm-project?rev=103480&view=rev Log: MC/Mach-O x86_64: Switch to using fragment atom symbol. - This eliminates getAtomForAddress() (which was a linear search) and simplifies getAtom(). - This also fixes some correctness problems where local labels at the same address as non-local labels could be assigned to the wrong atom. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103480&r1=103479&r2=103480&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Tue May 11 12:22:50 2010 @@ -680,18 +680,8 @@ void FinishLayout(MCAsmLayout &Layout); public: - /// Find the symbol which defines the atom containing given address, inside - /// the given section, or null if there is no such symbol. - // - // FIXME-PERF: Eliminate this, it is very slow. - const MCSymbolData *getAtomForAddress(const MCAsmLayout &Layout, - const MCSectionData *Section, - uint64_t Address) const; - /// Find the symbol which defines the atom containing the given symbol, or /// null if there is no such symbol. - // - // FIXME-PERF: Eliminate this, it is very slow. const MCSymbolData *getAtom(const MCAsmLayout &Layout, const MCSymbolData *Symbol) const; Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103480&r1=103479&r2=103480&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Tue May 11 12:22:50 2010 @@ -49,8 +49,8 @@ void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { // We shouldn't have to do anything special to support negative slides, and it - // is a perfectly valid thing to do as long as other parts of the system are - // can guarantee convergence. + // is a perfectly valid thing to do as long as other parts of the system can + // guarantee convergence. assert(SlideAmount >= 0 && "Negative slides not yet supported"); // Update the layout by simply recomputing the layout for the entire @@ -287,36 +287,6 @@ SD->getFragment()->getParent()->getSection()); } -// FIXME-PERF: This routine is really slow. -const MCSymbolData *MCAssembler::getAtomForAddress(const MCAsmLayout &Layout, - const MCSectionData *Section, - uint64_t Address) const { - const MCSymbolData *Best = 0; - uint64_t BestAddress = 0; - - for (MCAssembler::const_symbol_iterator it = symbol_begin(), - ie = symbol_end(); it != ie; ++it) { - // Ignore non-linker visible symbols. - if (!isSymbolLinkerVisible(it)) - continue; - - // Ignore symbols not in the same section. - if (!it->getFragment() || it->getFragment()->getParent() != Section) - continue; - - // Otherwise, find the closest symbol preceding this address (ties are - // resolved in favor of the last defined symbol). - uint64_t SymbolAddress = Layout.getSymbolAddress(it); - if (SymbolAddress <= Address && (!Best || SymbolAddress >= BestAddress)) { - Best = it; - BestAddress = SymbolAddress; - } - } - - return Best; -} - -// FIXME-PERF: This routine is really slow. const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, const MCSymbolData *SD) const { // Linker visible symbols define atoms. @@ -327,9 +297,8 @@ if (!SD->getFragment()) return 0; - // Otherwise, search by address. - return getAtomForAddress(Layout, SD->getFragment()->getParent(), - Layout.getSymbolAddress(SD)); + // Otherwise, return the atom for the containing fragment. + return SD->getFragment()->getAtom(); } bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, @@ -370,8 +339,7 @@ // symbol) that the fixup value is relative to. const MCSymbolData *BaseSymbol = 0; if (IsPCRel) { - BaseSymbol = getAtomForAddress( - Layout, DF->getParent(), Layout.getFragmentAddress(DF)+Fixup.Offset); + BaseSymbol = DF->getAtom(); if (!BaseSymbol) IsResolved = false; } @@ -837,6 +805,7 @@ // // FIXME: Add MCAsmLayout utility for this. DF->setParent(IF->getParent()); + DF->setAtom(IF->getAtom()); DF->setOrdinal(IF->getOrdinal()); Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF)); Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF)); Modified: llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s?rev=103480&r1=103479&r2=103480&view=diff ============================================================================== --- llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s (original) +++ llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Tue May 11 12:22:50 2010 @@ -46,6 +46,16 @@ .quad L1 .quad _ext_foo +// Make sure local label which overlaps with non-local one is assigned to the +// right atom. + .text +_f2: +L2_0: + addl $0, %eax +L2_1: +_f3: + addl L2_1 - L2_0, %eax + // CHECK: ('cputype', 16777223) // CHECK: ('cpusubtype', 3) // CHECK: ('filetype', 1) @@ -59,9 +69,9 @@ // CHECK: ('size', 312) // CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('vm_addr', 0) -// CHECK: ('vm_size', 205) +// CHECK: ('vm_size', 215) // CHECK: ('file_offset', 448) -// CHECK: ('file_size', 205) +// CHECK: ('file_size', 215) // CHECK: ('maxprot', 7) // CHECK: ('initprot', 7) // CHECK: ('num_sections', 3) @@ -71,10 +81,10 @@ // CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) -// CHECK: ('size', 181) +// CHECK: ('size', 191) // CHECK: ('offset', 448) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 656) +// CHECK: ('reloc_offset', 664) // CHECK: ('num_reloc', 27) // CHECK: ('flags', 0x80000400) // CHECK: ('reserved1', 0) @@ -164,15 +174,15 @@ // CHECK: (('word-0', 0x2), // CHECK: ('word-1', 0x2d000000)), // CHECK: ]) -// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00') // CHECK: # Section 1 // CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 181) +// CHECK: ('address', 191) // CHECK: ('size', 8) -// CHECK: ('offset', 629) +// CHECK: ('offset', 639) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 872) +// CHECK: ('reloc_offset', 880) // CHECK: ('num_reloc', 2) // CHECK: ('flags', 0x0) // CHECK: ('reserved1', 0) @@ -185,17 +195,17 @@ // CHECK: ('word-1', 0x4d000000)), // CHECK: # Relocation 1 // CHECK: (('word-0', 0x0), -// CHECK: ('word-1', 0x4d000005)), +// CHECK: ('word-1', 0x4d000007)), // CHECK: ]) // CHECK: ('_section_data', '\x04\x00\x00\x00\x04\x00\x00\x00') // CHECK: # Section 2 // CHECK: (('section_name', '__debug_frame\x00\x00\x00') // CHECK: ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 189) +// CHECK: ('address', 199) // CHECK: ('size', 16) -// CHECK: ('offset', 637) +// CHECK: ('offset', 647) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 888) +// CHECK: ('reloc_offset', 896) // CHECK: ('num_reloc', 2) // CHECK: ('flags', 0x2000000) // CHECK: ('reserved1', 0) @@ -205,7 +215,7 @@ // CHECK: ('_relocations', [ // CHECK: # Relocation 0 // CHECK: (('word-0', 0x8), -// CHECK: ('word-1', 0xe000004)), +// CHECK: ('word-1', 0xe000006)), // CHECK: # Relocation 1 // CHECK: (('word-0', 0x0), // CHECK: ('word-1', 0x6000001)), @@ -216,11 +226,11 @@ // CHECK: # Load Command 1 // CHECK: (('command', 2) // CHECK: ('size', 24) -// CHECK: ('symoff', 904) -// CHECK: ('nsyms', 6) -// CHECK: ('stroff', 1000) -// CHECK: ('strsize', 40) -// CHECK: ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00\x00') +// CHECK: ('symoff', 912) +// CHECK: ('nsyms', 8) +// CHECK: ('stroff', 1040) +// CHECK: ('strsize', 48) +// CHECK: ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00\x00') // CHECK: ('_symbols', [ // CHECK: # Symbol 0 // CHECK: (('n_strx', 18) @@ -255,6 +265,22 @@ // CHECK: ('_string', '_prev') // CHECK: ), // CHECK: # Symbol 4 +// CHECK: (('n_strx', 39) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 181) +// CHECK: ('_string', '_f2') +// CHECK: ), +// CHECK: # Symbol 5 +// CHECK: (('n_strx', 43) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 184) +// CHECK: ('_string', '_f3') +// CHECK: ), +// CHECK: # Symbol 6 // CHECK: (('n_strx', 9) // CHECK: ('n_type', 0x1) // CHECK: ('n_sect', 0) @@ -262,7 +288,7 @@ // CHECK: ('n_value', 0) // CHECK: ('_string', '_ext_foo') // CHECK: ), -// CHECK: # Symbol 5 +// CHECK: # Symbol 7 // CHECK: (('n_strx', 1) // CHECK: ('n_type', 0x1) // CHECK: ('n_sect', 0) @@ -276,10 +302,10 @@ // CHECK: (('command', 11) // CHECK: ('size', 80) // CHECK: ('ilocalsym', 0) -// CHECK: ('nlocalsym', 4) -// CHECK: ('iextdefsym', 4) +// CHECK: ('nlocalsym', 6) +// CHECK: ('iextdefsym', 6) // CHECK: ('nextdefsym', 0) -// CHECK: ('iundefsym', 4) +// CHECK: ('iundefsym', 6) // CHECK: ('nundefsym', 2) // CHECK: ('tocoff', 0) // CHECK: ('ntoc', 0) From gohman at apple.com Tue May 11 12:31:57 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 17:31:57 -0000 Subject: [llvm-commits] [llvm] r103481 - in /llvm/trunk: include/llvm/CodeGen/ include/llvm/Target/ lib/CodeGen/SelectionDAG/ lib/Target/ARM/ lib/Target/Alpha/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MSP430/ lib/Target/Mips/ lib/Target/PIC16/ lib/Target/PowerPC/ lib/Target/Sparc/ lib/Target/SystemZ/ lib/Target/X86/ lib/Target/XCore/ Message-ID: <20100511173157.F2CF0312800A@llvm.org> Author: djg Date: Tue May 11 12:31:57 2010 New Revision: 103481 URL: http://llvm.org/viewvc/llvm-project?rev=103481&view=rev Log: Implement a bunch more TargetSelectionDAGInfo infrastructure. Move EmitTargetCodeForMemcpy, EmitTargetCodeForMemset, and EmitTargetCodeForMemmove out of TargetLowering and into SelectionDAGInfo to exercise this. Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h llvm/trunk/include/llvm/Target/TargetLowering.h llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.h llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.h llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp llvm/trunk/lib/Target/ARM/ARMTargetMachine.h llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.h llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.cpp llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.h llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.cpp llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.h llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.h llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.cpp llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.h llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.cpp llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.h llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.h llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.cpp llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.h llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.cpp llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.h llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp llvm/trunk/lib/Target/Mips/MipsTargetMachine.h llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.h llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.cpp llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.h llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.h llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.h llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.cpp llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.h llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp llvm/trunk/lib/Target/Sparc/SparcTargetMachine.h llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.h llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.h llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.h llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.h llvm/trunk/lib/Target/X86/X86TargetMachine.cpp llvm/trunk/lib/Target/X86/X86TargetMachine.h llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.cpp llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.h llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp llvm/trunk/lib/Target/XCore/XCoreTargetMachine.h Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original) +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Tue May 11 12:31:57 2010 @@ -36,6 +36,7 @@ class SDNodeOrdering; class SDDbgValue; class TargetLowering; +class TargetSelectionDAGInfo; template<> struct ilist_traits : public ilist_default_traits { private: @@ -131,6 +132,7 @@ class SelectionDAG { const TargetMachine &TM; const TargetLowering &TLI; + const TargetSelectionDAGInfo &TSI; MachineFunction *MF; FunctionLoweringInfo &FLI; LLVMContext *Context; @@ -201,6 +203,7 @@ MachineFunction &getMachineFunction() const { return *MF; } const TargetMachine &getTarget() const { return TM; } const TargetLowering &getTargetLoweringInfo() const { return TLI; } + const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; } FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; } LLVMContext *getContext() const {return Context; } Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue May 11 12:31:57 2010 @@ -1194,61 +1194,6 @@ return SDValue(); // this is here to silence compiler errors } - /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a - /// memcpy. This can be used by targets to provide code sequences for cases - /// that don't fit the target's parameters for simple loads/stores and can be - /// more efficient than using a library call. This function can return a null - /// SDValue if the target declines to use custom code and a different - /// lowering strategy should be used. - /// - /// If AlwaysInline is true, the size is constant and the target should not - /// emit any calls and is strongly encouraged to attempt to emit inline code - /// even if it is beyond the usual threshold because this intrinsic is being - /// expanded in a place where calls are not feasible (e.g. within the prologue - /// for another call). If the target chooses to decline an AlwaysInline - /// request here, legalize will resort to using simple loads and stores. - virtual SDValue - EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool isVolatile, - bool AlwaysInline, - const Value *DstSV, uint64_t DstOff, - const Value *SrcSV, uint64_t SrcOff) const { - return SDValue(); - } - - /// EmitTargetCodeForMemmove - Emit target-specific code that performs a - /// memmove. This can be used by targets to provide code sequences for cases - /// that don't fit the target's parameters for simple loads/stores and can be - /// more efficient than using a library call. This function can return a null - /// SDValue if the target declines to use custom code and a different - /// lowering strategy should be used. - virtual SDValue - EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool isVolatile, - const Value *DstSV, uint64_t DstOff, - const Value *SrcSV, uint64_t SrcOff) const { - return SDValue(); - } - - /// EmitTargetCodeForMemset - Emit target-specific code that performs a - /// memset. This can be used by targets to provide code sequences for cases - /// that don't fit the target's parameters for simple stores and can be more - /// efficient than using a library call. This function can return a null - /// SDValue if the target declines to use custom code and a different - /// lowering strategy should be used. - virtual SDValue - EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Op1, SDValue Op2, - SDValue Op3, unsigned Align, bool isVolatile, - const Value *DstSV, uint64_t DstOff) const { - return SDValue(); - } - /// LowerOperationWrapper - This callback is invoked by the type legalizer /// to legalize nodes with an illegal operand type but legal result types. /// It replaces the LowerOperation callback in the type Legalizer. Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -16,8 +16,13 @@ #ifndef LLVM_TARGET_TARGETSELECTIONDAGINFO_H #define LLVM_TARGET_TARGETSELECTIONDAGINFO_H +#include "llvm/CodeGen/SelectionDAGNodes.h" + namespace llvm { +class TargetData; +class TargetMachine; + //===----------------------------------------------------------------------===// /// TargetSelectionDAGLowering - Targets can subclass this to parameterize the /// SelectionDAG lowering and instruction selection process. @@ -26,9 +31,69 @@ TargetSelectionDAGInfo(const TargetSelectionDAGInfo &); // DO NOT IMPLEMENT void operator=(const TargetSelectionDAGInfo &); // DO NOT IMPLEMENT + const TargetData *TD; + +protected: + const TargetData *getTargetData() const { return TD; } + public: - TargetSelectionDAGInfo(); + explicit TargetSelectionDAGInfo(const TargetMachine &TM); virtual ~TargetSelectionDAGInfo(); + + /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a + /// memcpy. This can be used by targets to provide code sequences for cases + /// that don't fit the target's parameters for simple loads/stores and can be + /// more efficient than using a library call. This function can return a null + /// SDValue if the target declines to use custom code and a different + /// lowering strategy should be used. + /// + /// If AlwaysInline is true, the size is constant and the target should not + /// emit any calls and is strongly encouraged to attempt to emit inline code + /// even if it is beyond the usual threshold because this intrinsic is being + /// expanded in a place where calls are not feasible (e.g. within the prologue + /// for another call). If the target chooses to decline an AlwaysInline + /// request here, legalize will resort to using simple loads and stores. + virtual SDValue + EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + bool AlwaysInline, + const Value *DstSV, uint64_t DstOff, + const Value *SrcSV, uint64_t SrcOff) const { + return SDValue(); + } + + /// EmitTargetCodeForMemmove - Emit target-specific code that performs a + /// memmove. This can be used by targets to provide code sequences for cases + /// that don't fit the target's parameters for simple loads/stores and can be + /// more efficient than using a library call. This function can return a null + /// SDValue if the target declines to use custom code and a different + /// lowering strategy should be used. + virtual SDValue + EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + const Value *DstSV, uint64_t DstOff, + const Value *SrcSV, uint64_t SrcOff) const { + return SDValue(); + } + + /// EmitTargetCodeForMemset - Emit target-specific code that performs a + /// memset. This can be used by targets to provide code sequences for cases + /// that don't fit the target's parameters for simple stores and can be more + /// efficient than using a library call. This function can return a null + /// SDValue if the target declines to use custom code and a different + /// lowering strategy should be used. + virtual SDValue + EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + const Value *DstSV, uint64_t DstOff) const { + return SDValue(); + } }; } // end llvm namespace Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue May 11 12:31:57 2010 @@ -33,6 +33,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -790,7 +791,8 @@ // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) - : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli), + : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), + FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -3536,7 +3538,7 @@ // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) @@ -3601,7 +3603,7 @@ // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, + TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff, SrcSV, SrcSVOff); if (Result.getNode()) return Result; @@ -3652,7 +3654,7 @@ // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, + TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, DstSV, DstSVOff); if (Result.getNode()) return Result; Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo() { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) + : TD(TM.getTargetData()) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue May 11 12:31:57 2010 @@ -2124,116 +2124,6 @@ return FrameAddr; } -SDValue -ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // Do repeated 4-byte loads and stores. To be improved. - // This requires 4-byte alignment. - if ((Align & 3) != 0) - return SDValue(); - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - unsigned BytesLeft = SizeVal & 3; - unsigned NumMemOps = SizeVal >> 2; - unsigned EmittedNumMemOps = 0; - EVT VT = MVT::i32; - unsigned VTSize = 4; - unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; - uint64_t SrcOff = 0, DstOff = 0; - - // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the - // same number of stores. The loads and stores will get combined into - // ldm/stm later on. - while (EmittedNumMemOps < NumMemOps) { - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); - TFOps[i] = Loads[i].getValue(1); - SrcOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - for (i = 0; - i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, isVolatile, false, 0); - DstOff += VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - EmittedNumMemOps += i; - } - - if (BytesLeft == 0) - return Chain; - - // Issue loads / stores for the trailing (1 - 3) bytes. - unsigned BytesLeftSave = BytesLeft; - i = 0; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - Loads[i] = DAG.getLoad(VT, dl, Chain, - DAG.getNode(ISD::ADD, dl, MVT::i32, Src, - DAG.getConstant(SrcOff, MVT::i32)), - SrcSV, SrcSVOff + SrcOff, false, false, 0); - TFOps[i] = Loads[i].getValue(1); - ++i; - SrcOff += VTSize; - BytesLeft -= VTSize; - } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); - - i = 0; - BytesLeft = BytesLeftSave; - while (BytesLeft) { - if (BytesLeft >= 2) { - VT = MVT::i16; - VTSize = 2; - } else { - VT = MVT::i8; - VTSize = 1; - } - - TFOps[i] = DAG.getStore(Chain, dl, Loads[i], - DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, - DAG.getConstant(DstOff, MVT::i32)), - DstSV, DstSVOff + DstOff, false, false, 0); - ++i; - DstOff += VTSize; - BytesLeft -= VTSize; - } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); -} - /// ExpandBIT_CONVERT - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue May 11 12:31:57 2010 @@ -301,15 +301,6 @@ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, Modified: llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,11 +12,123 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-selectiondag-info" -#include "ARMSelectionDAGInfo.h" +#include "ARMTargetMachine.h" using namespace llvm; -ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget()) { } ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } + +SDValue +ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, MVT::i32, Src, + DAG.getConstant(SrcOff, MVT::i32)), + SrcSV, SrcSVOff + SrcOff, false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, + DAG.getConstant(DstOff, MVT::i32)), + DstSV, DstSVOff + DstOff, false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} Modified: llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -19,9 +19,24 @@ namespace llvm { class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. + const ARMSubtarget *Subtarget; + public: - ARMSelectionDAGInfo(); + explicit ARMSelectionDAGInfo(const TargetMachine &TM); ~ARMSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -62,7 +62,8 @@ DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, @@ -76,7 +77,8 @@ "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetMachine.h (original) +++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.h Tue May 11 12:31:57 2010 @@ -21,6 +21,7 @@ #include "ARMJITInfo.h" #include "ARMSubtarget.h" #include "ARMISelLowering.h" +#include "ARMSelectionDAGInfo.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/OwningPtr.h" @@ -63,6 +64,7 @@ ARMInstrInfo InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -75,6 +77,10 @@ return &TLInfo; } + virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } }; @@ -88,6 +94,7 @@ OwningPtr InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; + ARMSelectionDAGInfo TSInfo; public: ThumbTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -101,6 +108,10 @@ return &TLInfo; } + virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } + /// returns either Thumb1InstrInfo or Thumb2InstrInfo virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); Modified: llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "alpha-selectiondag-info" -#include "AlphaSelectionDAGInfo.h" +#include "AlphaTargetMachine.h" using namespace llvm; -AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() { +AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/Alpha/AlphaSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class AlphaTargetMachine; + class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo { public: - AlphaSelectionDAGInfo(); + explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM); ~AlphaSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -32,7 +32,8 @@ FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), JITInfo(*this), Subtarget(TT, FS), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { setRelocationModel(Reloc::PIC_); } Modified: llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.h (original) +++ llvm/trunk/lib/Target/Alpha/AlphaTargetMachine.h Tue May 11 12:31:57 2010 @@ -20,6 +20,7 @@ #include "AlphaInstrInfo.h" #include "AlphaJITInfo.h" #include "AlphaISelLowering.h" +#include "AlphaSelectionDAGInfo.h" #include "AlphaSubtarget.h" namespace llvm { @@ -33,6 +34,7 @@ AlphaJITInfo JITInfo; AlphaSubtarget Subtarget; AlphaTargetLowering TLInfo; + AlphaSelectionDAGInfo TSInfo; public: AlphaTargetMachine(const Target &T, const std::string &TT, @@ -47,6 +49,9 @@ virtual const AlphaTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual AlphaJITInfo* getJITInfo() { return &JITInfo; Modified: llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "blackfin-selectiondag-info" -#include "BlackfinSelectionDAGInfo.h" +#include "BlackfinTargetMachine.h" using namespace llvm; -BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() { +BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo( + const BlackfinTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class BlackfinTargetMachine; + class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo { public: - BlackfinSelectionDAGInfo(); + explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM); ~BlackfinSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -31,6 +31,7 @@ DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, FS), TLInfo(*this), + TSInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) { } Modified: llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.h (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinTargetMachine.h Tue May 11 12:31:57 2010 @@ -20,6 +20,7 @@ #include "BlackfinInstrInfo.h" #include "BlackfinSubtarget.h" #include "BlackfinISelLowering.h" +#include "BlackfinSelectionDAGInfo.h" #include "BlackfinIntrinsicInfo.h" namespace llvm { @@ -28,6 +29,7 @@ const TargetData DataLayout; BlackfinSubtarget Subtarget; BlackfinTargetLowering TLInfo; + BlackfinSelectionDAGInfo TSInfo; BlackfinInstrInfo InstrInfo; TargetFrameInfo FrameInfo; BlackfinIntrinsicInfo IntrinsicInfo; @@ -46,6 +48,9 @@ virtual const BlackfinTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); Modified: llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "cellspu-selectiondag-info" -#include "SPUSelectionDAGInfo.h" +#include "SPUTargetMachine.h" using namespace llvm; -SPUSelectionDAGInfo::SPUSelectionDAGInfo() { +SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/CellSPU/SPUSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class SPUTargetMachine; + class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SPUSelectionDAGInfo(); + explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM); ~SPUSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -42,6 +42,7 @@ InstrInfo(*this), FrameInfo(*this), TLInfo(*this), + TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { // For the time being, use static relocations, since there's really no // support for PIC yet. Modified: llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.h (original) +++ llvm/trunk/lib/Target/CellSPU/SPUTargetMachine.h Tue May 11 12:31:57 2010 @@ -17,6 +17,7 @@ #include "SPUSubtarget.h" #include "SPUInstrInfo.h" #include "SPUISelLowering.h" +#include "SPUSelectionDAGInfo.h" #include "SPUFrameInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -34,6 +35,7 @@ SPUInstrInfo InstrInfo; SPUFrameInfo FrameInfo; SPUTargetLowering TLInfo; + SPUSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: SPUTargetMachine(const Target &T, const std::string &TT, @@ -61,6 +63,10 @@ return &TLInfo; } + virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const SPURegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } Modified: llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mblaze-selectiondag-info" -#include "MBlazeSelectionDAGInfo.h" +#include "MBlazeTargetMachine.h" using namespace llvm; -MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() { +MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class MBlazeTargetMachine; + class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo { public: - MBlazeSelectionDAGInfo(); + explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM); ~MBlazeSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -39,7 +39,7 @@ "f64:32:32-v64:32:32-v128:32:32-n32"), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) { + TLInfo(*this), TSInfo(*this) { if (getRelocationModel() == Reloc::Default) { setRelocationModel(Reloc::Static); } Modified: llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.h (original) +++ llvm/trunk/lib/Target/MBlaze/MBlazeTargetMachine.h Tue May 11 12:31:57 2010 @@ -17,6 +17,7 @@ #include "MBlazeSubtarget.h" #include "MBlazeInstrInfo.h" #include "MBlazeISelLowering.h" +#include "MBlazeSelectionDAGInfo.h" #include "MBlazeIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -31,6 +32,7 @@ MBlazeInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MBlazeTargetLowering TLInfo; + MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; public: MBlazeTargetMachine(const Target &T, const std::string &TT, @@ -54,6 +56,9 @@ virtual const MBlazeTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const + { return &TSInfo; } + const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } Modified: llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "msp430-selectiondag-info" -#include "MSP430SelectionDAGInfo.h" +#include "MSP430TargetMachine.h" using namespace llvm; -MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() { +MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { Modified: llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/MSP430/MSP430SelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class MSP430TargetMachine; + class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { public: - MSP430SelectionDAGInfo(); + explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM); ~MSP430SelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.cpp (original) +++ llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.cpp Tue May 11 12:31:57 2010 @@ -33,7 +33,7 @@ Subtarget(TT, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } Modified: llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.h (original) +++ llvm/trunk/lib/Target/MSP430/MSP430TargetMachine.h Tue May 11 12:31:57 2010 @@ -17,6 +17,7 @@ #include "MSP430InstrInfo.h" #include "MSP430ISelLowering.h" +#include "MSP430SelectionDAGInfo.h" #include "MSP430RegisterInfo.h" #include "MSP430Subtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ const TargetData DataLayout; // Calculates type size & alignment MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; + MSP430SelectionDAGInfo TSInfo; // MSP430 does not have any call stack frame, therefore not having // any MSP430 specific FrameInfo class. @@ -54,6 +56,10 @@ return &TLInfo; } + virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // MSP430TargetMachine. Modified: llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-selectiondag-info" -#include "MipsSelectionDAGInfo.h" +#include "MipsTargetMachine.h" using namespace llvm; -MipsSelectionDAGInfo::MipsSelectionDAGInfo() { +MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/Mips/MipsSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class MipsTargetMachine; + class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { public: - MipsSelectionDAGInfo(); + explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM); ~MipsSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -42,7 +42,7 @@ std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), - TLInfo(*this) { + TLInfo(*this), TSInfo(*this) { // Abicall enables PIC by default if (getRelocationModel() == Reloc::Default) { if (Subtarget.isABI_O32()) Modified: llvm/trunk/lib/Target/Mips/MipsTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsTargetMachine.h (original) +++ llvm/trunk/lib/Target/Mips/MipsTargetMachine.h Tue May 11 12:31:57 2010 @@ -17,6 +17,7 @@ #include "MipsSubtarget.h" #include "MipsInstrInfo.h" #include "MipsISelLowering.h" +#include "MipsSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" @@ -30,6 +31,7 @@ MipsInstrInfo InstrInfo; TargetFrameInfo FrameInfo; MipsTargetLowering TLInfo; + MipsSelectionDAGInfo TSInfo; public: MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle); @@ -51,6 +53,10 @@ return &TLInfo; } + virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); Modified: llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pic16-selectiondag-info" -#include "PIC16SelectionDAGInfo.h" +#include "PIC16TargetMachine.h" using namespace llvm; -PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() { +PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() { Modified: llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/PIC16/PIC16SelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class PIC16TargetMachine; + class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo { public: - PIC16SelectionDAGInfo(); + explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM); ~PIC16SelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.cpp (original) +++ llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.cpp Tue May 11 12:31:57 2010 @@ -35,7 +35,7 @@ : LLVMTargetMachine(T, TT), Subtarget(TT, FS, Trad), DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } Modified: llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.h (original) +++ llvm/trunk/lib/Target/PIC16/PIC16TargetMachine.h Tue May 11 12:31:57 2010 @@ -17,6 +17,7 @@ #include "PIC16InstrInfo.h" #include "PIC16ISelLowering.h" +#include "PIC16SelectionDAGInfo.h" #include "PIC16RegisterInfo.h" #include "PIC16Subtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ const TargetData DataLayout; // Calculates type size & alignment PIC16InstrInfo InstrInfo; PIC16TargetLowering TLInfo; + PIC16SelectionDAGInfo TSInfo; // PIC16 does not have any call stack frame, therefore not having // any PIC16 specific FrameInfo class. @@ -54,6 +56,10 @@ return &TLInfo; } + virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); Modified: llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "powerpc-selectiondag-info" -#include "PPCSelectionDAGInfo.h" +#include "PPCTargetMachine.h" using namespace llvm; -PPCSelectionDAGInfo::PPCSelectionDAGInfo() { +PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/PowerPC/PPCSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class PPCTargetMachine; + class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { public: - PPCSelectionDAGInfo(); + explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM); ~PPCSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -44,7 +44,8 @@ : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), - FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this), + FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), + TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { if (getRelocationModel() == Reloc::Default) { Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.h (original) +++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.h Tue May 11 12:31:57 2010 @@ -19,6 +19,7 @@ #include "PPCJITInfo.h" #include "PPCInstrInfo.h" #include "PPCISelLowering.h" +#include "PPCSelectionDAGInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" @@ -35,6 +36,7 @@ PPCFrameInfo FrameInfo; PPCJITInfo JITInfo; PPCTargetLowering TLInfo; + PPCSelectionDAGInfo TSInfo; InstrItineraryData InstrItins; public: @@ -47,6 +49,9 @@ virtual const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } Modified: llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sparc-selectiondag-info" -#include "SparcSelectionDAGInfo.h" +#include "SparcTargetMachine.h" using namespace llvm; -SparcSelectionDAGInfo::SparcSelectionDAGInfo() { +SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/Sparc/SparcSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class SparcTargetMachine; + class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SparcSelectionDAGInfo(); + explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM); ~SparcSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/Sparc/SparcTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -34,7 +34,7 @@ : LLVMTargetMachine(T, TT), Subtarget(TT, FS, is64bit), DataLayout(Subtarget.getDataLayout()), - TLInfo(*this), InstrInfo(Subtarget), + TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } Modified: llvm/trunk/lib/Target/Sparc/SparcTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Sparc/SparcTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/Sparc/SparcTargetMachine.h (original) +++ llvm/trunk/lib/Target/Sparc/SparcTargetMachine.h Tue May 11 12:31:57 2010 @@ -20,6 +20,7 @@ #include "SparcInstrInfo.h" #include "SparcSubtarget.h" #include "SparcISelLowering.h" +#include "SparcSelectionDAGInfo.h" namespace llvm { @@ -27,6 +28,7 @@ SparcSubtarget Subtarget; const TargetData DataLayout; // Calculates type size & alignment SparcTargetLowering TLInfo; + SparcSelectionDAGInfo TSInfo; SparcInstrInfo InstrInfo; TargetFrameInfo FrameInfo; public: @@ -42,6 +44,9 @@ virtual const SparcTargetLowering* getTargetLowering() const { return &TLInfo; } + virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration Modified: llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "systemz-selectiondag-info" -#include "SystemZSelectionDAGInfo.h" +#include "SystemZTargetMachine.h" using namespace llvm; -SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() { +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class SystemZTargetMachine; + class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { public: - SystemZSelectionDAGInfo(); + explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); ~SystemZSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -29,7 +29,7 @@ Subtarget(TT, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" "-f64:64:64-f128:128:128-a0:16:16-n32:64"), - InstrInfo(*this), TLInfo(*this), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { if (getRelocationModel() == Reloc::Default) Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.h (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZTargetMachine.h Tue May 11 12:31:57 2010 @@ -17,6 +17,7 @@ #include "SystemZInstrInfo.h" #include "SystemZISelLowering.h" +#include "SystemZSelectionDAGInfo.h" #include "SystemZRegisterInfo.h" #include "SystemZSubtarget.h" #include "llvm/Target/TargetData.h" @@ -32,6 +33,7 @@ const TargetData DataLayout; // Calculates type size & alignment SystemZInstrInfo InstrInfo; SystemZTargetLowering TLInfo; + SystemZSelectionDAGInfo TSInfo; // SystemZ does not have any call stack frame, therefore not having // any SystemZ specific FrameInfo class. @@ -53,6 +55,10 @@ return &TLInfo; } + virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // SystemZTargetMachine. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue May 11 12:31:57 2010 @@ -6593,221 +6593,6 @@ return DAG.getMergeValues(Ops1, 2, dl); } -SDValue -X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const { - ConstantSDNode *ConstantSize = dyn_cast(Size); - - // If not DWORD aligned or size is more than the threshold, call the library. - // The libc version is likely to be faster for these cases. It can use the - // address value and run time information about the CPU. - if ((Align & 3) != 0 || - !ConstantSize || - ConstantSize->getZExtValue() > - getSubtarget()->getMaxInlineSizeThreshold()) { - SDValue InFlag(0, 0); - - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast(Src); - - if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { - EVT IntPtr = getPointerTy(); - const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - std::pair CallResult = - LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); - return CallResult.second; - } - - // Otherwise have the target-independent code call memset. - return SDValue(); - } - - uint64_t SizeVal = ConstantSize->getZExtValue(); - SDValue InFlag(0, 0); - EVT AVT; - SDValue Count; - ConstantSDNode *ValC = dyn_cast(Src); - unsigned BytesLeft = 0; - bool TwoRepStos = false; - if (ValC) { - unsigned ValReg; - uint64_t Val = ValC->getZExtValue() & 255; - - // If the value is a constant, then we can potentially use larger sets. - switch (Align & 3) { - case 2: // WORD aligned - AVT = MVT::i16; - ValReg = X86::AX; - Val = (Val << 8) | Val; - break; - case 0: // DWORD aligned - AVT = MVT::i32; - ValReg = X86::EAX; - Val = (Val << 8) | Val; - Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned - AVT = MVT::i64; - ValReg = X86::RAX; - Val = (Val << 32) | Val; - } - break; - default: // Byte aligned - AVT = MVT::i8; - ValReg = X86::AL; - Count = DAG.getIntPtrConstant(SizeVal); - break; - } - - if (AVT.bitsGT(MVT::i8)) { - unsigned UBytes = AVT.getSizeInBits() / 8; - Count = DAG.getIntPtrConstant(SizeVal / UBytes); - BytesLeft = SizeVal % UBytes; - } - - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), - InFlag); - InFlag = Chain.getValue(1); - } else { - AVT = MVT::i8; - Count = DAG.getIntPtrConstant(SizeVal); - Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); - InFlag = Chain.getValue(1); - } - - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); - } else if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT AddrVT = Dst.getValueType(); - EVT SizeVT = Size.getValueType(); - - Chain = DAG.getMemset(Chain, dl, - DAG.getNode(ISD::ADD, dl, AddrVT, Dst, - DAG.getConstant(Offset, AddrVT)), - Src, - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstSV, DstSVOff + Offset); - } - - // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. - return Chain; -} - -SDValue -X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const { - // This requires the copy size to be a constant, preferrably - // within a subtarget-specific limit. - ConstantSDNode *ConstantSize = dyn_cast(Size); - if (!ConstantSize) - return SDValue(); - uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) - return SDValue(); - - /// If not DWORD aligned, call the library. - if ((Align & 3) != 0) - return SDValue(); - - // DWORD aligned - EVT AVT = MVT::i32; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned - AVT = MVT::i64; - - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal); - unsigned BytesLeft = SizeVal % UBytes; - - SDValue InFlag(0, 0); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); - InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); - InFlag = Chain.getValue(1); - - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, - array_lengthof(Ops)); - - SmallVector Results; - Results.push_back(RepMovs); - if (BytesLeft) { - // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; - EVT DstVT = Dst.getValueType(); - EVT SrcVT = Src.getValueType(); - EVT SizeVT = Size.getValueType(); - Results.push_back(DAG.getMemcpy(Chain, dl, - DAG.getNode(ISD::ADD, dl, DstVT, Dst, - DAG.getConstant(Offset, DstVT)), - DAG.getNode(ISD::ADD, dl, SrcVT, Src, - DAG.getConstant(Offset, SrcVT)), - DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, - DstSV, DstSVOff + Offset, - SrcSV, SrcSVOff + Offset)); - } - - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Results[0], Results.size()); -} - SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue May 11 12:31:57 2010 @@ -743,23 +743,6 @@ void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned NewOp) const; - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - const Value *DstSV, - uint64_t DstSVOff) const; - SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, bool AlwaysInline, - const Value *DstSV, - uint64_t DstSVOff, - const Value *SrcSV, - uint64_t SrcSVOff) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic Modified: llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,11 +12,232 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "x86-selectiondag-info" -#include "X86SelectionDAGInfo.h" +#include "X86TargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; -X86SelectionDAGInfo::X86SelectionDAGInfo() { +X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : + TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget()), + TLI(*TM.getTargetLowering()) { } X86SelectionDAGInfo::~X86SelectionDAGInfo() { } + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const { + ConstantSDNode *ConstantSize = dyn_cast(Size); + + // If not DWORD aligned or size is more than the threshold, call the library. + // The libc version is likely to be faster for these cases. It can use the + // address value and run time information about the CPU. + if ((Align & 3) != 0 || + !ConstantSize || + ConstantSize->getZExtValue() > + Subtarget->getMaxInlineSizeThreshold()) { + SDValue InFlag(0, 0); + + // Check to see if there is a specialized entry-point for memory zeroing. + ConstantSDNode *V = dyn_cast(Src); + + if (const char *bzeroEntry = V && + V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + EVT IntPtr = TLI.getPointerTy(); + const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + std::pair CallResult = + TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, CallingConv::C, false, /*isReturnValueUsed=*/false, + DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, + DAG, dl); + return CallResult.second; + } + + // Otherwise have the target-independent code call memset. + return SDValue(); + } + + uint64_t SizeVal = ConstantSize->getZExtValue(); + SDValue InFlag(0, 0); + EVT AVT; + SDValue Count; + ConstantSDNode *ValC = dyn_cast(Src); + unsigned BytesLeft = 0; + bool TwoRepStos = false; + if (ValC) { + unsigned ValReg; + uint64_t Val = ValC->getZExtValue() & 255; + + // If the value is a constant, then we can potentially use larger sets. + switch (Align & 3) { + case 2: // WORD aligned + AVT = MVT::i16; + ValReg = X86::AX; + Val = (Val << 8) | Val; + break; + case 0: // DWORD aligned + AVT = MVT::i32; + ValReg = X86::EAX; + Val = (Val << 8) | Val; + Val = (Val << 16) | Val; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + AVT = MVT::i64; + ValReg = X86::RAX; + Val = (Val << 32) | Val; + } + break; + default: // Byte aligned + AVT = MVT::i8; + ValReg = X86::AL; + Count = DAG.getIntPtrConstant(SizeVal); + break; + } + + if (AVT.bitsGT(MVT::i8)) { + unsigned UBytes = AVT.getSizeInBits() / 8; + Count = DAG.getIntPtrConstant(SizeVal / UBytes); + BytesLeft = SizeVal % UBytes; + } + + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), + InFlag); + InFlag = Chain.getValue(1); + } else { + AVT = MVT::i8; + Count = DAG.getIntPtrConstant(SizeVal); + Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); + InFlag = Chain.getValue(1); + } + + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + + if (TwoRepStos) { + InFlag = Chain.getValue(1); + Count = Size; + EVT CVT = Count.getValueType(); + SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, + DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : + X86::ECX, + Left, InFlag); + InFlag = Chain.getValue(1); + Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + } else if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT AddrVT = Dst.getValueType(); + EVT SizeVT = Size.getValueType(); + + Chain = DAG.getMemset(Chain, dl, + DAG.getNode(ISD::ADD, dl, AddrVT, Dst, + DAG.getConstant(Offset, AddrVT)), + Src, + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, DstSV, DstSVOff + Offset); + } + + // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. + return Chain; +} + +SDValue +X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { + // This requires the copy size to be a constant, preferrably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + return SDValue(); + + /// If not DWORD aligned, call the library. + if ((Align & 3) != 0) + return SDValue(); + + // DWORD aligned + EVT AVT = MVT::i32; + if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned + AVT = MVT::i64; + + unsigned UBytes = AVT.getSizeInBits() / 8; + unsigned CountVal = SizeVal / UBytes; + SDValue Count = DAG.getIntPtrConstant(CountVal); + unsigned BytesLeft = SizeVal % UBytes; + + SDValue InFlag(0, 0); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + X86::ECX, + Count, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + X86::EDI, + Dst, InFlag); + InFlag = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : + X86::ESI, + Src, InFlag); + InFlag = Chain.getValue(1); + + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); + SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; + SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, + array_lengthof(Ops)); + + SmallVector Results; + Results.push_back(RepMovs); + if (BytesLeft) { + // Handle the last 1 - 7 bytes. + unsigned Offset = SizeVal - BytesLeft; + EVT DstVT = Dst.getValueType(); + EVT SrcVT = Src.getValueType(); + EVT SizeVT = Size.getValueType(); + Results.push_back(DAG.getMemcpy(Chain, dl, + DAG.getNode(ISD::ADD, dl, DstVT, Dst, + DAG.getConstant(Offset, DstVT)), + DAG.getNode(ISD::ADD, dl, SrcVT, Src, + DAG.getConstant(Offset, SrcVT)), + DAG.getConstant(BytesLeft, SizeVT), + Align, isVolatile, AlwaysInline, + DstSV, DstSVOff + Offset, + SrcSV, SrcSVOff + Offset)); + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Results[0], Results.size()); +} Modified: llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,10 +18,40 @@ namespace llvm { +class X86TargetLowering; +class X86TargetMachine; +class X86Subtarget; + class X86SelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the X86Subtarget around so that we can + /// make the right decision when generating code for different targets. + const X86Subtarget *Subtarget; + + const X86TargetLowering &TLI; + public: - X86SelectionDAGInfo(); + explicit X86SelectionDAGInfo(const X86TargetMachine &TM); ~X86SelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, + const Value *DstSV, + uint64_t DstSVOff) const; + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; }; } Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original) +++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Tue May 11 12:31:57 2010 @@ -82,7 +82,8 @@ Subtarget.getStackAlignment(), (Subtarget.isTargetWin64() ? -40 : (Subtarget.is64Bit() ? -8 : -4))), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this), + ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // If no relocation model was picked, default as appropriate for the target. Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86TargetMachine.h (original) +++ llvm/trunk/lib/Target/X86/X86TargetMachine.h Tue May 11 12:31:57 2010 @@ -23,6 +23,7 @@ #include "X86JITInfo.h" #include "X86Subtarget.h" #include "X86ISelLowering.h" +#include "X86SelectionDAGInfo.h" namespace llvm { @@ -35,6 +36,7 @@ X86InstrInfo InstrInfo; X86JITInfo JITInfo; X86TargetLowering TLInfo; + X86SelectionDAGInfo TSInfo; X86ELFWriterInfo ELFWriterInfo; Reloc::Model DefRelocModel; // Reloc model before it's overridden. @@ -54,6 +56,9 @@ virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { + return &TSInfo; + } virtual const X86RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } Modified: llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.cpp Tue May 11 12:31:57 2010 @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "xcore-selectiondag-info" -#include "XCoreSelectionDAGInfo.h" +#include "XCoreTargetMachine.h" using namespace llvm; -XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() { +XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { } XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { Modified: llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.h (original) +++ llvm/trunk/lib/Target/XCore/XCoreSelectionDAGInfo.h Tue May 11 12:31:57 2010 @@ -18,9 +18,11 @@ namespace llvm { +class XCoreTargetMachine; + class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { public: - XCoreSelectionDAGInfo(); + explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM); ~XCoreSelectionDAGInfo(); }; Modified: llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp (original) +++ llvm/trunk/lib/Target/XCore/XCoreTargetMachine.cpp Tue May 11 12:31:57 2010 @@ -28,7 +28,8 @@ "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), FrameInfo(*this), - TLInfo(*this) { + TLInfo(*this), + TSInfo(*this) { } bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, Modified: llvm/trunk/lib/Target/XCore/XCoreTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/XCore/XCoreTargetMachine.h?rev=103481&r1=103480&r2=103481&view=diff ============================================================================== --- llvm/trunk/lib/Target/XCore/XCoreTargetMachine.h (original) +++ llvm/trunk/lib/Target/XCore/XCoreTargetMachine.h Tue May 11 12:31:57 2010 @@ -20,6 +20,7 @@ #include "XCoreSubtarget.h" #include "XCoreInstrInfo.h" #include "XCoreISelLowering.h" +#include "XCoreSelectionDAGInfo.h" namespace llvm { @@ -29,6 +30,7 @@ XCoreInstrInfo InstrInfo; XCoreFrameInfo FrameInfo; XCoreTargetLowering TLInfo; + XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, const std::string &TT, const std::string &FS); @@ -40,6 +42,10 @@ return &TLInfo; } + virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const TargetRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } From gohman at apple.com Tue May 11 13:03:41 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 18:03:41 -0000 Subject: [llvm-commits] [llvm] r103483 - /llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h Message-ID: <20100511180341.30FCF312800A@llvm.org> Author: djg Date: Tue May 11 13:03:41 2010 New Revision: 103483 URL: http://llvm.org/viewvc/llvm-project?rev=103483&view=rev Log: Fix a comment. Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h Modified: llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h?rev=103483&r1=103482&r2=103483&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetSelectionDAGInfo.h Tue May 11 13:03:41 2010 @@ -24,7 +24,7 @@ class TargetMachine; //===----------------------------------------------------------------------===// -/// TargetSelectionDAGLowering - Targets can subclass this to parameterize the +/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the /// SelectionDAG lowering and instruction selection process. /// class TargetSelectionDAGInfo { From bob.wilson at apple.com Tue May 11 13:51:10 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 11 May 2010 11:51:10 -0700 Subject: [llvm-commits] PATCH: use LLVM_GET_REG_NAME consistently in llvm-gcc Message-ID: <060D0BBE-FF47-439F-8C60-F89AF3748C41@apple.com> Duncan pointed out that there is one remaining reference to "reg_names" in gcc/llvm-convert.cpp, and he offered to test a patch to use LLVM_GET_REG_NAME instead. Here you go, Duncan. Happy testing! -------------- next part -------------- A non-text attachment was scrubbed... Name: llvm-regname.patch Type: application/octet-stream Size: 1580 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100511/58a4d2e2/attachment.obj From stoklund at 2pi.dk Tue May 11 13:54:45 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 18:54:45 -0000 Subject: [llvm-commits] [llvm] r103488 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll Message-ID: <20100511185445.5FC50312800A@llvm.org> Author: stoklund Date: Tue May 11 13:54:45 2010 New Revision: 103488 URL: http://llvm.org/viewvc/llvm-project?rev=103488&view=rev Log: Mostly rewrite RegAllocFast. Sorry for the big change. The path leading up to this patch had some TableGen changes that I didn't want to commit before I knew they were useful. They weren't, and this version does not need them. The fast register allocator now does no liveness calculations. Instead it relies on kill flags provided by isel. (Currently those kill flags are also ignored due to isel bugs). The allocation algorithm is supposed to work with any subset of valid kill flags. More kill flags simply means fewer spills inserted. Registers are allocated from a working set that contains no aliases. That means most allocations can be done directly without expensive alias checks. When the working set runs out of registers we do the full alias check to find new free registers. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll llvm/trunk/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103488&r1=103487&r2=103488&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 13:54:45 2010 @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Target/TargetInstrInfo.h" @@ -57,65 +56,44 @@ // values are spilled. IndexedMap StackSlotForVirtReg; - // Virt2PhysRegMap - This map contains entries for each virtual register + // Virt2PhysMap - This map contains entries for each virtual register // that is currently available in a physical register. - IndexedMap Virt2PhysRegMap; + DenseMap Virt2PhysMap; - unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { - return Virt2PhysRegMap[VirtReg]; - } + // RegState - Track the state of a physical register. + enum RegState { + // A disabled register is not available for allocation, but an alias may + // be in use. A register can only be moved out of the disabled state if + // all aliases are disabled. + regDisabled, + + // A free register is not currently in use and can be allocated + // immediately without checking aliases. + regFree, + + // A reserved register has been assigned expolicitly (e.g., setting up a + // call parameter), and it remains reserved until it is used. + regReserved + + // A register state may also be a virtual register number, indication that + // the physical register is currently allocated to a virtual register. In + // that case, Virt2PhysMap contains the inverse mapping. + }; - // PhysRegsUsed - This array is effectively a map, containing entries for - // each physical register that currently has a value (ie, it is in - // Virt2PhysRegMap). The value mapped to is the virtual register - // corresponding to the physical register (the inverse of the - // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned - // because it is used by a future instruction, and to -2 if it is not - // allocatable. If the entry for a physical register is -1, then the - // physical register is "not in the map". - // - std::vector PhysRegsUsed; + // PhysRegState - One of the RegState enums, or a virtreg. + std::vector PhysRegState; // UsedInInstr - BitVector of physregs that are used in the current // instruction, and so cannot be allocated. BitVector UsedInInstr; - // Virt2LastUseMap - This maps each virtual register to its last use - // (MachineInstr*, operand index pair). - IndexedMap, VirtReg2IndexFunctor> - Virt2LastUseMap; - - std::pair& getVirtRegLastUse(unsigned Reg) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - return Virt2LastUseMap[Reg]; - } - - // VirtRegModified - This bitset contains information about which virtual - // registers need to be spilled back to memory when their registers are - // scavenged. If a virtual register has simply been rematerialized, there - // is no reason to spill it to memory when we need the register back. - // - BitVector VirtRegModified; - - // UsedInMultipleBlocks - Tracks whether a particular register is used in - // more than one block. - BitVector UsedInMultipleBlocks; - - void markVirtRegModified(unsigned Reg, bool Val = true) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - Reg -= TargetRegisterInfo::FirstVirtualRegister; - if (Val) - VirtRegModified.set(Reg); - else - VirtRegModified.reset(Reg); - } - - bool isVirtRegModified(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - assert(Reg - TargetRegisterInfo::FirstVirtualRegister < - VirtRegModified.size() && "Illegal virtual register!"); - return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; - } + // PhysRegDirty - A bit is set for each physreg that holds a dirty virtual + // register. Bits for physregs that are not mapped to a virtual register are + // invalid. + BitVector PhysRegDirty; + + // ReservedRegs - vector of reserved physical registers. + BitVector ReservedRegs; public: virtual const char *getPassName() const { @@ -124,104 +102,32 @@ virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); - AU.addRequired(); AU.addRequiredID(PHIEliminationID); AU.addRequiredID(TwoAddressInstructionPassID); MachineFunctionPass::getAnalysisUsage(AU); } private: - /// runOnMachineFunction - Register allocate the whole function bool runOnMachineFunction(MachineFunction &Fn); - - /// AllocateBasicBlock - Register allocate the specified basic block. void AllocateBasicBlock(MachineBasicBlock &MBB); - - - /// areRegsEqual - This method returns true if the specified registers are - /// related to each other. To do this, it checks to see if they are equal - /// or if the first register is in the alias set of the second register. - /// - bool areRegsEqual(unsigned R1, unsigned R2) const { - if (R1 == R2) return true; - for (const unsigned *AliasSet = TRI->getAliasSet(R2); - *AliasSet; ++AliasSet) { - if (*AliasSet == R1) return true; - } - return false; - } - - /// getStackSpaceFor - This returns the frame index of the specified virtual - /// register on the stack, allocating space if necessary. int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - - /// removePhysReg - This method marks the specified physical register as no - /// longer being in use. - /// - void removePhysReg(unsigned PhysReg); - - /// spillVirtReg - This method spills the value specified by PhysReg into - /// the virtual register slot specified by VirtReg. It then updates the RA - /// data structures to indicate the fact that PhysReg is now available. - /// + void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned VirtReg, unsigned PhysReg); - - /// spillPhysReg - This method spills the specified physical register into - /// the virtual register slot associated with it. If OnlyVirtRegs is set to - /// true, then the request is ignored if the physical register does not - /// contain a virtual register. - /// + unsigned VirtReg, bool isKill); + void killPhysReg(unsigned PhysReg); void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs = false); - - /// assignVirtToPhysReg - This method updates local state so that we know - /// that PhysReg is the proper container for VirtReg now. The physical - /// register must not be used for anything else when this is called. - /// + unsigned PhysReg, bool isKill); void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - - /// isPhysRegAvailable - Return true if the specified physical register is - /// free and available for use. This also includes checking to see if - /// aliased registers are all free... - /// - bool isPhysRegAvailable(unsigned PhysReg) const; - - /// isPhysRegSpillable - Can PhysReg be freed by spilling? - bool isPhysRegSpillable(unsigned PhysReg) const; - - /// getFreeReg - Look to see if there is a free register available in the - /// specified register class. If not, return 0. - /// - unsigned getFreeReg(const TargetRegisterClass *RC); - - /// getReg - Find a physical register to hold the specified virtual - /// register. If all compatible physical registers are used, this method - /// spills the last used virtual register to the stack, and uses that - /// register. If NoFree is true, that means the caller knows there isn't - /// a free register, do not call getFreeReg(). - unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg, bool NoFree = false); - - /// reloadVirtReg - This method transforms the specified virtual - /// register use to refer to a physical register. This method may do this - /// in one of several ways: if the register is available in a physical - /// register already, it uses that physical register. If the value is not - /// in a physical register, and if there are physical registers available, - /// it loads it into a register: PhysReg if that is an available physical - /// register, otherwise any physical register of the right class. - /// If register pressure is high, and it is possible, it tries to fold the - /// load of the virtual register into the instruction itself. It avoids - /// doing this if register pressure is low to improve the chance that - /// subsequent instructions can use the reloaded value. This method - /// returns the modified instruction. - /// - MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, SmallSet &RRegs, - unsigned PhysReg); - - void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, - unsigned PhysReg); + unsigned allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + unsigned defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + unsigned reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); + void reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned PhysReg); + void spillAll(MachineBasicBlock &MBB, MachineInstr *MI); + void setPhysReg(MachineOperand &MO, unsigned PhysReg); }; char RAFast::ID = 0; } @@ -243,676 +149,544 @@ return FrameIdx; } - -/// removePhysReg - This method marks the specified physical register as no -/// longer being in use. -/// -void RAFast::removePhysReg(unsigned PhysReg) { - PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used -} - - -/// spillVirtReg - This method spills the value specified by PhysReg into the -/// virtual register slot specified by VirtReg. It then updates the RA data -/// structures to indicate the fact that PhysReg is now available. -/// +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "killVirtReg needs a virtual register"); + DEBUG(dbgs() << " Killing %reg" << VirtReg << "\n"); + DenseMap::iterator i = Virt2PhysMap.find(VirtReg); + if (i == Virt2PhysMap.end()) return; + unsigned PhysReg = i->second; + assert(PhysRegState[PhysReg] == VirtReg && "Broken RegState mapping"); + PhysRegState[PhysReg] = regFree; + Virt2PhysMap.erase(i); +} + +/// spillVirtReg - This method spills the value specified by VirtReg into the +/// corresponding stack slot if needed. If isKill is set, the register is also +/// killed. void RAFast::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg) { - assert(VirtReg && "Spilling a physical register is illegal!" - " Must not have appropriate kill for the register or use exists beyond" - " the intended one."); - DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg); - - if (!isVirtRegModified(VirtReg)) { - DEBUG(dbgs() << " which has not been modified, so no store necessary!"); - std::pair &LastUse = getVirtRegLastUse(VirtReg); - if (LastUse.first) - LastUse.first->getOperand(LastUse.second).setIsKill(); - } else { - // Otherwise, there is a virtual register corresponding to this physical - // register. We only need to spill it into its stack slot if it has been - // modified. + MachineBasicBlock::iterator I, + unsigned VirtReg, bool isKill) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Spilling a physical register is illegal!"); + DenseMap::iterator i = Virt2PhysMap.find(VirtReg); + assert(i != Virt2PhysMap.end() && "Spilling unmapped virtual register"); + unsigned PhysReg = i->second; + assert(PhysRegState[PhysReg] == VirtReg && "Broken RegState mapping"); + + if (PhysRegDirty.test(PhysReg)) { + PhysRegDirty.reset(PhysReg); + DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) + << " containing %reg" << VirtReg); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); - // If the instruction reads the register that's spilled, (e.g. this can - // happen if it is a move to a physical register), then the spill - // instruction is not a kill. - bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); + DEBUG(dbgs() << " to stack slot #" << FrameIndex << "\n"); TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI); ++NumStores; // Update statistics } - getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - - DEBUG(dbgs() << '\n'); - removePhysReg(PhysReg); + if (isKill) { + PhysRegState[PhysReg] = regFree; + Virt2PhysMap.erase(i); + } } - -/// spillPhysReg - This method spills the specified physical register into the -/// virtual register slot associated with it. If OnlyVirtRegs is set to true, -/// then the request is ignored if the physical register does not contain a -/// virtual register. -/// -void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs) { - if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! - assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); - if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) - spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); +/// spillAll - Spill all dirty virtregs without killing them. +void RAFast::spillAll(MachineBasicBlock &MBB, MachineInstr *MI) { + SmallVector Dirty; + for (DenseMap::iterator i = Virt2PhysMap.begin(), + e = Virt2PhysMap.end(); i != e; ++i) + if (PhysRegDirty.test(i->second)) + Dirty.push_back(i->first); + for (unsigned i = 0, e = Dirty.size(); i != e; ++i) + spillVirtReg(MBB, MI, Dirty[i], false); +} + +/// killPhysReg - Kill any virtual register aliased by PhysReg. +void RAFast::killPhysReg(unsigned PhysReg) { + // Fast path for the normal case. + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return; + case regReserved: + PhysRegState[PhysReg] = regFree; + return; + default: + killVirtReg(VirtReg); return; } - // If the selected register aliases any other registers, we must make - // sure that one of the aliases isn't alive. - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. - PhysRegsUsed[*AliasSet] == -2) // If allocatable. - continue; - - if (PhysRegsUsed[*AliasSet]) - spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); + // This is a disabled register, we have to check aliases. + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + case regFree: + break; + case regReserved: + PhysRegState[Alias] = regFree; + break; + default: + killVirtReg(VirtReg); + break; + } } } +/// spillPhysReg - Spill any dirty virtual registers that aliases PhysReg. If +/// isKill is set, they are also killed. +void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned PhysReg, bool isKill) { + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return; + case regReserved: + if (isKill) + PhysRegState[PhysReg] = regFree; + return; + default: + spillVirtReg(MBB, MI, VirtReg, isKill); + return; + } + + // This is a disabled register, we have to check aliases. + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + case regFree: + break; + case regReserved: + if (isKill) + PhysRegState[Alias] = regFree; + break; + default: + spillVirtReg(MBB, MI, VirtReg, isKill); + break; + } + } +} /// assignVirtToPhysReg - This method updates local state so that we know /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. /// void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); - // Update information to note the fact that this register was just used, and - // it holds VirtReg. - PhysRegsUsed[PhysReg] = VirtReg; - getVirt2PhysRegMapSlot(VirtReg) = PhysReg; - UsedInInstr.set(PhysReg); + DEBUG(dbgs() << " Assigning %reg" << VirtReg << " to " + << TRI->getName(PhysReg) << "\n"); + Virt2PhysMap.insert(std::make_pair(VirtReg, PhysReg)); + PhysRegState[PhysReg] = VirtReg; } +/// allocVirtReg - Allocate a physical register for VirtReg. +unsigned RAFast::allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg) { + const unsigned spillCost = 100; + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Can only allocate virtual registers"); -/// isPhysRegAvailable - Return true if the specified physical register is free -/// and available for use. This also includes checking to see if aliased -/// registers are all free... -/// -bool RAFast::isPhysRegAvailable(unsigned PhysReg) const { - if (PhysRegsUsed[PhysReg] != -1) return false; - - // If the selected register aliases any other allocated registers, it is - // not free! - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? - return false; // Can't use this reg then. - return true; -} - -/// isPhysRegSpillable - Return true if the specified physical register can be -/// spilled for use in the current instruction. -/// -bool RAFast::isPhysRegSpillable(unsigned PhysReg) const { - // Test that PhysReg and all aliases are either free or assigned to a VirtReg - // that is not used in the instruction. - if (PhysRegsUsed[PhysReg] != -1 && - (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg))) - return false; - - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] != -1 && - (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet))) - return false; - return true; -} - + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); + TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); -/// getFreeReg - Look to see if there is a free register available in the -/// specified register class. If not, return 0. -/// -unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) { - // Get iterators defining the range of registers that are valid to allocate in - // this class, which also specifies the preferred allocation order. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegAvailable(*RI)) { // Is reg unused? - assert(*RI != 0 && "Cannot use register!"); - return *RI; // Found an unused register! + // First try to find a completely free register. + unsigned BestCost = 0, BestReg = 0; + bool hasDisabled = false; + for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + unsigned PhysReg = *I; + switch(PhysRegState[PhysReg]) { + case regDisabled: + hasDisabled = true; + case regReserved: + continue; + case regFree: + if (!UsedInInstr.test(PhysReg)) { + assignVirtToPhysReg(VirtReg, PhysReg); + return PhysReg; + } + continue; + default: + // Grab the first spillable register we meet. + if (!BestReg && !UsedInInstr.test(PhysReg)) { + BestReg = PhysReg; + BestCost = PhysRegDirty.test(PhysReg) ? spillCost : 1; + } + continue; } - return 0; -} - + } -/// getReg - Find a physical register to hold the specified virtual -/// register. If all compatible physical registers are used, this method spills -/// the last used virtual register to the stack, and uses that register. -/// -unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned VirtReg, bool NoFree) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + DEBUG(dbgs() << " Allocating %reg" << VirtReg << " from " << RC->getName() + << " candidate=" << TRI->getName(BestReg) << "\n"); - // First check to see if we have a free register of the requested type... - unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); + // Try to extend the working set for RC if there were any disabled registers. + if (hasDisabled && (!BestReg || BestCost >= spillCost)) { + for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { + unsigned PhysReg = *I; + if (PhysRegState[PhysReg] != regDisabled || UsedInInstr.test(PhysReg)) + continue; - if (PhysReg != 0) { - // Assign the register. - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; - } - - // If we didn't find an unused register, scavenge one now! Don't be fancy, - // just grab the first possible register. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegSpillable(*RI)) { - PhysReg = *RI; - break; + // Calculate the cost of bringing PhysReg into the working set. + unsigned Cost=0; + bool Impossible = false; + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + if (UsedInInstr.test(Alias)) { + Impossible = true; + break; + } + switch (PhysRegState[Alias]) { + case regDisabled: + break; + case regReserved: + Impossible = true; + break; + case regFree: + Cost++; + break; + default: + Cost += PhysRegDirty.test(Alias) ? spillCost : 1; + break; + } + } + if (Impossible) continue; + DEBUG(dbgs() << " - candidate " << TRI->getName(PhysReg) + << " cost=" << Cost << "\n"); + if (!BestReg || Cost < BestCost) { + BestReg = PhysReg; + BestCost = Cost; + if (Cost < spillCost) break; + } } + } - assert(PhysReg && "Physical register not assigned!?!?"); - spillPhysReg(MBB, I, PhysReg); - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; -} - - -/// reloadVirtReg - This method transforms the specified virtual -/// register use to refer to a physical register. This method may do this in -/// one of several ways: if the register is available in a physical register -/// already, it uses that physical register. If the value is not in a physical -/// register, and if there are physical registers available, it loads it into a -/// register: PhysReg if that is an available physical register, otherwise any -/// register. If register pressure is high, and it is possible, it tries to -/// fold the load of the virtual register into the instruction itself. It -/// avoids doing this if register pressure is low to improve the chance that -/// subsequent instructions can use the reloaded value. This method returns -/// the modified instruction. -/// -MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, - SmallSet &ReloadedRegs, - unsigned PhysReg) { - unsigned VirtReg = MI->getOperand(OpNum).getReg(); - - // If the virtual register is already available, just update the instruction - // and return. - if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - MI->getOperand(OpNum).setReg(PR); // Assign the input register - if (!MI->isDebugValue()) { - // Do not do these for DBG_VALUE as they can affect codegen. - UsedInInstr.set(PR); - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); + if (BestReg) { + // BestCost is 0 when all aliases are already disabled. + if (BestCost) { + if (PhysRegState[BestReg] != regDisabled) + spillVirtReg(MBB, MI, PhysRegState[BestReg], true); + else { + MF->getRegInfo().setPhysRegUsed(BestReg); + // Make sure all aliases are disabled. + for (const unsigned *AS = TRI->getAliasSet(BestReg); + unsigned Alias = *AS; ++AS) { + MF->getRegInfo().setPhysRegUsed(Alias); + switch (PhysRegState[Alias]) { + case regDisabled: + continue; + case regFree: + PhysRegState[Alias] = regDisabled; + break; + default: + spillVirtReg(MBB, MI, PhysRegState[Alias], true); + PhysRegState[Alias] = regDisabled; + break; + } + } + } } - return MI; + assignVirtToPhysReg(VirtReg, BestReg); + return BestReg; } - // Otherwise, we need to fold it into the current instruction, or reload it. - // If we have registers available to hold the value, use them. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - // If we already have a PhysReg (this happens when the instruction is a - // reg-to-reg copy with a PhysReg destination) use that. - if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || - !isPhysRegAvailable(PhysReg)) - PhysReg = getFreeReg(RC); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - - if (PhysReg) { // Register is available, allocate it! - assignVirtToPhysReg(VirtReg, PhysReg); - } else { // No registers available. - // Force some poor hapless value out of the register file to - // make room for the new register, and reload it. - PhysReg = getReg(MBB, MI, VirtReg, true); + // Nothing we can do. + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Ran out of registers during register allocation!"; + if (MI->isInlineAsm()) { + Msg << "\nPlease check your inline asm statement for " + << "invalid constraints:\n"; + MI->print(Msg, TM); } + report_fatal_error(Msg.str()); + return 0; +} - markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded +/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. +unsigned RAFast::defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + unsigned PhysReg = Virt2PhysMap.lookup(VirtReg); + if (!PhysReg) + PhysReg = allocVirtReg(MBB, MI, VirtReg); + UsedInInstr.set(PhysReg); + PhysRegDirty.set(PhysReg); + return PhysReg; +} - DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"); +/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. +unsigned RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + unsigned PhysReg = Virt2PhysMap.lookup(VirtReg); + if (!PhysReg) { + PhysReg = allocVirtReg(MBB, MI, VirtReg); + PhysRegDirty.reset(PhysReg); + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " + << TRI->getName(PhysReg) << "\n"); + TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); + ++NumLoads; + } + UsedInInstr.set(PhysReg); + return PhysReg; +} - // Add move instruction(s) - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); - ++NumLoads; // Update statistics +/// reservePhysReg - Mark PhysReg as reserved. This is very similar to +/// defineVirtReg except the physreg is reverved instead of allocated. +void RAFast::reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned PhysReg) { + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + PhysRegState[PhysReg] = regReserved; + return; + case regReserved: + return; + default: + spillVirtReg(MBB, MI, VirtReg, true); + PhysRegState[PhysReg] = regReserved; + return; + } + // This is a disabled register, disable all aliases. + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) { + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + case regFree: + break; + case regReserved: + // is a super register already reserved? + if (TRI->isSuperRegister(PhysReg, Alias)) + return; + break; + default: + spillVirtReg(MBB, MI, VirtReg, true); + break; + } + PhysRegState[Alias] = regDisabled; + MF->getRegInfo().setPhysRegUsed(Alias); + } + PhysRegState[PhysReg] = regReserved; MF->getRegInfo().setPhysRegUsed(PhysReg); - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); +} - if (!ReloadedRegs.insert(PhysReg)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (ReloadedRegs.insert(*SubRegs)) continue; - - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - - return MI; -} - -/// isReadModWriteImplicitKill - True if this is an implicit kill for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - MO.isDef() && !MO.isDead()) - return true; - } - return false; -} - -/// isReadModWriteImplicitDef - True if this is an implicit def for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - !MO.isDef() && MO.isKill()) - return true; - } - return false; +// setPhysReg - Change MO the refer the PhysReg, considering subregs. +void RAFast::setPhysReg(MachineOperand &MO, unsigned PhysReg) { + if (unsigned Idx = MO.getSubReg()) { + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, Idx) : 0); + MO.setSubReg(0); + } else + MO.setReg(PhysReg); } void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { - // loop over each instruction - MachineBasicBlock::iterator MII = MBB.begin(); + DEBUG(dbgs() << "\nBB#" << MBB.getNumber() << ", "<< MBB.getName() << "\n"); + + PhysRegState.assign(TRI->getNumRegs(), regDisabled); + assert(Virt2PhysMap.empty() && "Mapping not cleared form last block?"); + PhysRegDirty.reset(); - DEBUG({ - const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) - dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); - }); + MachineBasicBlock::iterator MII = MBB.begin(); - // Add live-in registers as active. + // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - unsigned Reg = *I; - MF->getRegInfo().setPhysRegUsed(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } + E = MBB.livein_end(); I != E; ++I) + reservePhysReg(MBB, MII, *I); + + SmallVector VirtKills, PhysKills, PhysDefs; // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB.end()) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); DEBUG({ - dbgs() << "\nStarting RegAlloc of: " << *MI; - dbgs() << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) - dbgs() << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; + dbgs() << "\nStarting RegAlloc of: " << *MI << "Working set:"; + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << TRI->getName(Reg); + switch(PhysRegState[Reg]) { + case regFree: + break; + case regReserved: + dbgs() << "(resv)"; + break; + default: + dbgs() << "=%reg" << PhysRegState[Reg]; + if (PhysRegDirty.test(Reg)) + dbgs() << "*"; + assert(Virt2PhysMap.lookup(PhysRegState[Reg]) == Reg && + "Bad inverse map"); + break; + } + } dbgs() << '\n'; + // Check that Virt2PhysMap is the inverse. + for (DenseMap::iterator i = Virt2PhysMap.begin(), + e = Virt2PhysMap.end(); i != e; ++i) { + assert(TargetRegisterInfo::isVirtualRegister(i->first) && + "Bad map key"); + assert(TargetRegisterInfo::isPhysicalRegister(i->second) && + "Bad map value"); + assert(PhysRegState[i->second] == i->first && "Bad inverse map"); + } }); - // Track registers used by instruction. - UsedInInstr.reset(); - - // Determine whether this is a copy instruction. The cases where the - // source or destination are phys regs are handled specially. - unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; - unsigned SrcCopyPhysReg = 0U; - bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg); - if (isCopy && SrcCopySubReg == 0 && DstCopySubReg == 0 && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) - SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); - - // Loop over the implicit uses, making sure they don't get reallocated. - if (TID.ImplicitUses) { - for (const unsigned *ImplicitUses = TID.ImplicitUses; - *ImplicitUses; ++ImplicitUses) - UsedInInstr.set(*ImplicitUses); - } - - SmallVector Kills; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - - if (!MO.isImplicit()) - Kills.push_back(MO.getReg()); - else if (!isReadModWriteImplicitKill(MI, MO.getReg())) - // These are extra physical register kills when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - Kills.push_back(MO.getReg()); - } - - // If any physical regs are earlyclobber, spill any value they might - // have in them, then mark them unallocatable. - // If any virtual regs are earlyclobber, allocate them now (before - // freeing inputs that are killed). - if (MI->isInlineAsm()) { + // Debug values are not allowed to change codegen in any way. + if (MI->isDebugValue()) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || - !MO.getReg()) - continue; - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = - std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - MO.setReg(DestPhysReg); // Assign the earlyclobber register - } else { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - } - } + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // This may be 0 if the register is currently spilled. Tough. + setPhysReg(MO, Virt2PhysMap.lookup(Reg)); } + // Next instruction. + continue; } - // If a DBG_VALUE says something is located in a spilled register, - // change the DBG_VALUE to be undef, which prevents the register - // from being reloaded here. Doing that would change the generated - // code, unless another use immediately follows this instruction. - if (MI->isDebugValue() && - MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { - unsigned VirtReg = MI->getOperand(0).getReg(); - if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && - !getVirt2PhysRegMapSlot(VirtReg)) - MI->getOperand(0).setReg(0U); - } - - // Get the used operands into registers. This has the potential to spill - // incoming values if we are out of registers. Note that we completely - // ignore physical register uses here. We assume that if an explicit - // physical register is referenced by the instruction, that it is guaranteed - // to be live-in, or the input is badly hosed. - // - SmallSet ReloadedRegs; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + // Track registers used by instruction. + UsedInInstr.reset(); + PhysDefs.clear(); + + // First scan. + // Mark physreg uses and early clobbers as used. + // Collect PhysKills. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - // here we are looking for only used operands (never def&use) - if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, - isCopy ? DstCopyReg : 0); - } - - // If this instruction is the last user of this register, kill the - // value, freeing the register being used, so it doesn't need to be - // spilled to memory. - // - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned VirtReg = Kills[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // If the virtual register was never materialized into a register, it - // might not be in the map, but it won't hurt to zero it out anyway. - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else { - assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && - "Silently clearing a virtual register?"); - } + if (!MO.isReg()) continue; - if (!PhysReg) continue; + // FIXME: For now, don't trust kill flags + if (MO.isUse()) MO.setIsKill(false); - DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(dbgs() << " Last use of " - << TRI->getName(*SubRegs) << "[%reg" << VirtReg - <<"], removing it from live set\n"); - removePhysReg(*SubRegs); - } + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || + ReservedRegs.test(Reg)) continue; + if (MO.isUse()) { + PhysKills.push_back(Reg); // Any clean physreg use is a kill. + UsedInInstr.set(Reg); + } else if (MO.isEarlyClobber()) { + spillPhysReg(MBB, MI, Reg, true); + UsedInInstr.set(Reg); + PhysDefs.push_back(Reg); } } - // Track registers defined by instruction. - UsedInInstr.reset(); - - // Loop over all of the operands of the instruction, spilling registers that - // are defined, and marking explicit destinations in the PhysRegsUsed map. + // Second scan. + // Allocate virtreg uses and early clobbers. + // Collect VirtKills for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - + if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + setPhysReg(MO, reloadVirtReg(MBB, MI, Reg)); + if (MO.isKill()) + VirtKills.push_back(Reg); + } else if (MO.isEarlyClobber()) { + unsigned PhysReg = defineVirtReg(MBB, MI, Reg); + setPhysReg(MO, PhysReg); + PhysDefs.push_back(PhysReg); } } - // Loop over the implicit defs, spilling them as well. - if (TID.ImplicitDefs) { - for (const unsigned *ImplicitDefs = TID.ImplicitDefs; - *ImplicitDefs; ++ImplicitDefs) { - unsigned Reg = *ImplicitDefs; - if (PhysRegsUsed[Reg] != -2) { - spillPhysReg(MBB, MI, Reg, true); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - } - MF->getRegInfo().setPhysRegUsed(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - } + // Process virtreg kills + for (unsigned i = 0, e = VirtKills.size(); i != e; ++i) + killVirtReg(VirtKills[i]); + VirtKills.clear(); + + // Process physreg kills + for (unsigned i = 0, e = PhysKills.size(); i != e; ++i) + killPhysReg(PhysKills[i]); + PhysKills.clear(); - SmallVector DeadDefs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead()) - DeadDefs.push_back(MO.getReg()); + // Track registers defined by instruction - early clobbers at this point. + UsedInInstr.reset(); + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { + unsigned PhysReg = PhysDefs[i]; + UsedInInstr.set(PhysReg); + for (const unsigned *AS = TRI->getAliasSet(PhysReg); + unsigned Alias = *AS; ++AS) + UsedInInstr.set(Alias); } - // Okay, we have allocated all of the source operands and spilled any values - // that would be destroyed by defs of this instruction. Loop over the - // explicit defs and assign them to a register, spilling incoming values if - // we need to scavenge a register. - // + // Third scan. + // Allocate defs and collect dead defs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; + unsigned Reg = MO.getReg(); - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { - // If this is a copy try to reuse the input as the output; - // that will make the copy go away. - // If this is a copy, the source reg is a phys reg, and - // that reg is available, use that phys reg for DestPhysReg. - // If this is a copy, the source reg is a virtual reg, and - // the phys reg that was assigned to that virtual reg is now - // available, use that phys reg for DestPhysReg. (If it's now - // available that means this was the last use of the source.) - if (isCopy && - TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && - isPhysRegAvailable(SrcCopyReg)) { - DestPhysReg = SrcCopyReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else if (isCopy && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && - SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && - MF->getRegInfo().getRegClass(DestVirtReg)-> - contains(SrcCopyPhysReg)) { - DestPhysReg = SrcCopyPhysReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else - DestPhysReg = getReg(MBB, MI, DestVirtReg); - } - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - MO.setReg(DestPhysReg); // Assign the output register - UsedInInstr.set(DestPhysReg); - } - - // If this instruction defines any registers that are immediately dead, - // kill them now. - // - for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { - unsigned VirtReg = DeadDefs[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - assert(PhysReg != 0); - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else if (!PhysReg) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (ReservedRegs.test(Reg)) continue; + if (MO.isImplicit()) + spillPhysReg(MBB, MI, Reg, true); + else + reservePhysReg(MBB, MI, Reg); + if (MO.isDead()) + PhysKills.push_back(Reg); continue; - - DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"); - removePhysReg(*AliasSet); - } } - } - - // Finally, if this is a noop copy instruction, zap it. (Except that if - // the copy is dead, it must be kept to avoid messing up liveness info for - // the register scavenger. See pr4100.) - if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && DeadDefs.empty()) - MBB.erase(MI); + if (MO.isDead()) + VirtKills.push_back(Reg); + setPhysReg(MO, defineVirtReg(MBB, MI, Reg)); + } + + // Spill all dirty virtregs before a call, in case of an exception. + if (TID.isCall()) { + DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MBB, MI); + } + + // Process virtreg deads. + for (unsigned i = 0, e = VirtKills.size(); i != e; ++i) + killVirtReg(VirtKills[i]); + VirtKills.clear(); + + // Process physreg deads. + for (unsigned i = 0, e = PhysKills.size(); i != e; ++i) + killPhysReg(PhysKills[i]); + PhysKills.clear(); } + // Spill all physical registers holding virtual registers now. + DEBUG(dbgs() << "Killing live registers at end of block.\n"); MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); + while (!Virt2PhysMap.empty()) + spillVirtReg(MBB, MI, Virt2PhysMap.begin()->first, true); - // Spill all physical registers holding virtual registers now. - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (unsigned VirtReg = PhysRegsUsed[i]) - spillVirtReg(MBB, MI, VirtReg, i); - else - removePhysReg(i); - } + DEBUG(MBB.dump()); } /// runOnMachineFunction - Register allocate the whole function /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "Machine Function\n"); + DEBUG(Fn.dump()); MF = &Fn; TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); - PhysRegsUsed.assign(TRI->getNumRegs(), -1); + PhysRegDirty.resize(TRI->getNumRegs()); UsedInInstr.resize(TRI->getNumRegs()); - - // At various places we want to efficiently check to see whether a register - // is allocatable. To handle this, we mark all unallocatable registers as - // being pinned down, permanently. - { - BitVector Allocable = TRI->getAllocatableSet(Fn); - for (unsigned i = 0, e = Allocable.size(); i != e; ++i) - if (!Allocable[i]) - PhysRegsUsed[i] = -2; // Mark the reg unallocable. - } + ReservedRegs = TRI->getReservedRegs(*MF); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); StackSlotForVirtReg.grow(LastVirtReg); - Virt2PhysRegMap.grow(LastVirtReg); - Virt2LastUseMap.grow(LastVirtReg); - VirtRegModified.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - UsedInMultipleBlocks.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); // Loop over all of the basic blocks, eliminating virtual register references for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -920,11 +694,6 @@ AllocateBasicBlock(*MBB); StackSlotForVirtReg.clear(); - PhysRegsUsed.clear(); - VirtRegModified.clear(); - UsedInMultipleBlocks.clear(); - Virt2PhysRegMap.clear(); - Virt2LastUseMap.clear(); return true; } Modified: llvm/trunk/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll?rev=103488&r1=103487&r2=103488&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll (original) +++ llvm/trunk/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll Tue May 11 13:54:45 2010 @@ -1,6 +1,6 @@ -; RUN: llc < %s -O0 -regalloc=local | FileCheck %s +; RUN-XFAIL: llc < %s -O0 -regalloc=local | FileCheck %s +; RUN: llc < %s -O0 -regalloc=fast | FileCheck %s ; PR6520 -; XFAIL: * target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0.0" Modified: llvm/trunk/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll?rev=103488&r1=103487&r2=103488&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll (original) +++ llvm/trunk/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll Tue May 11 13:54:45 2010 @@ -1,4 +1,5 @@ -; RUN: llc -regalloc=local %s -o /dev/null +; RUN: llc -regalloc=local %s -o %t +; RUN: llc -regalloc=fast %s -o %t ; PR7066 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" From gohman at apple.com Tue May 11 14:11:43 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 19:11:43 -0000 Subject: [llvm-commits] [llvm] r103489 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/LegalizeDAG.cpp lib/CodeGen/SelectionDAG/TargetLowering.cpp Message-ID: <20100511191143.ED37E312800A@llvm.org> Author: djg Date: Tue May 11 14:11:43 2010 New Revision: 103489 URL: http://llvm.org/viewvc/llvm-project?rev=103489&view=rev Log: Trim #includes and forward declarations. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103489&r1=103488&r2=103489&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue May 11 14:11:43 2010 @@ -55,7 +55,6 @@ class TargetData; class TargetMachine; class TargetRegisterClass; - class TargetSubtarget; class TargetLoweringObjectFile; class Value; Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=103489&r1=103488&r2=103489&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Tue May 11 14:11:43 2010 @@ -23,7 +23,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=103489&r1=103488&r2=103489&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Tue May 11 14:11:43 2010 @@ -18,7 +18,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtarget.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineFrameInfo.h" From gohman at apple.com Tue May 11 14:57:55 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 19:57:55 -0000 Subject: [llvm-commits] [llvm] r103492 - in /llvm/trunk: include/llvm/Target/TargetMachine.h lib/Target/CBackend/CBackend.cpp lib/Target/CBackend/CTargetMachine.h lib/Target/CppBackend/CPPBackend.cpp lib/Target/CppBackend/CPPTargetMachine.h lib/Target/MSIL/MSILWriter.cpp tools/llc/llc.cpp Message-ID: <20100511195756.0CC8F312800A@llvm.org> Author: djg Date: Tue May 11 14:57:55 2010 New Revision: 103492 URL: http://llvm.org/viewvc/llvm-project?rev=103492&view=rev Log: Remove the "WantsWholeFile" concept, as it's no longer needed. CBE and the others use the regular addPassesToEmitFile hook now, and llc no longer needs a bunch of redundant code to handle the whole-file case. Modified: llvm/trunk/include/llvm/Target/TargetMachine.h llvm/trunk/lib/Target/CBackend/CBackend.cpp llvm/trunk/lib/Target/CBackend/CTargetMachine.h llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp llvm/trunk/lib/Target/CppBackend/CPPTargetMachine.h llvm/trunk/lib/Target/MSIL/MSILWriter.cpp llvm/trunk/tools/llc/llc.cpp Modified: llvm/trunk/include/llvm/Target/TargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetMachine.h?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetMachine.h (original) +++ llvm/trunk/include/llvm/Target/TargetMachine.h Tue May 11 14:57:55 2010 @@ -225,17 +225,6 @@ bool = true) { return true; } - - /// addPassesToEmitWholeFile - This method can be implemented by targets that - /// require having the entire module at once. This is not recommended, do not - /// use this. - virtual bool WantsWholeFile() const { return false; } - virtual bool addPassesToEmitWholeFile(PassManager &, formatted_raw_ostream &, - CodeGenFileType, - CodeGenOpt::Level, - bool = true) { - return true; - } }; /// LLVMTargetMachine - This class describes a target machine that is Modified: llvm/trunk/lib/Target/CBackend/CBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CBackend/CBackend.cpp?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/lib/Target/CBackend/CBackend.cpp (original) +++ llvm/trunk/lib/Target/CBackend/CBackend.cpp Tue May 11 14:57:55 2010 @@ -3554,11 +3554,11 @@ // External Interface declaration //===----------------------------------------------------------------------===// -bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { +bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(createGCLoweringPass()); Modified: llvm/trunk/lib/Target/CBackend/CTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CBackend/CTargetMachine.h?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/lib/Target/CBackend/CTargetMachine.h (original) +++ llvm/trunk/lib/Target/CBackend/CTargetMachine.h Tue May 11 14:57:55 2010 @@ -23,12 +23,11 @@ CTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; Modified: llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp (original) +++ llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Tue May 11 14:57:55 2010 @@ -2007,11 +2007,11 @@ // External Interface declaration //===----------------------------------------------------------------------===// -bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) { +bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); return false; Modified: llvm/trunk/lib/Target/CppBackend/CPPTargetMachine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CppBackend/CPPTargetMachine.h?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/lib/Target/CppBackend/CPPTargetMachine.h (original) +++ llvm/trunk/lib/Target/CppBackend/CPPTargetMachine.h Tue May 11 14:57:55 2010 @@ -26,12 +26,11 @@ const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; Modified: llvm/trunk/lib/Target/MSIL/MSILWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSIL/MSILWriter.cpp?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSIL/MSILWriter.cpp (original) +++ llvm/trunk/lib/Target/MSIL/MSILWriter.cpp Tue May 11 14:57:55 2010 @@ -34,12 +34,11 @@ MSILTarget(const Target &T, const std::string &TT, const std::string &FS) : TargetMachine(T) {} - virtual bool WantsWholeFile() const { return true; } - virtual bool addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } }; @@ -1686,11 +1685,11 @@ // External Interface declaration //===----------------------------------------------------------------------===// -bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, - bool DisableVerify) +bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &o, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; MSILWriter* Writer = new MSILWriter(o); Modified: llvm/trunk/tools/llc/llc.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llc/llc.cpp?rev=103492&r1=103491&r2=103492&view=diff ============================================================================== --- llvm/trunk/tools/llc/llc.cpp (original) +++ llvm/trunk/tools/llc/llc.cpp Tue May 11 14:57:55 2010 @@ -312,75 +312,34 @@ bool DisableVerify = true; #endif - // If this target requires addPassesToEmitWholeFile, do it now. This is - // used by strange things like the C backend. - if (Target.WantsWholeFile()) { - PassManager PM; - - // Add the target data from the target machine, if it exists, or the module. - if (const TargetData *TD = Target.getTargetData()) - PM.add(new TargetData(*TD)); - else - PM.add(new TargetData(&mod)); - - if (!NoVerify) - PM.add(createVerifierPass()); - - // Ask the target to add backend passes as necessary. - if (Target.addPassesToEmitWholeFile(PM, *Out, FileType, OLvl, - DisableVerify)) { - errs() << argv[0] << ": target does not support generation of this" - << " file type!\n"; - if (Out != &fouts()) delete Out; - // And the Out file is empty and useless, so remove it now. - sys::Path(OutputFilename).eraseFromDisk(); - return 1; - } - PM.run(mod); - } else { - // Build up all of the passes that we want to do to the module. - FunctionPassManager Passes(M.get()); - - // Add the target data from the target machine, if it exists, or the module. - if (const TargetData *TD = Target.getTargetData()) - Passes.add(new TargetData(*TD)); - else - Passes.add(new TargetData(&mod)); - -#ifndef NDEBUG - if (!NoVerify) - Passes.add(createVerifierPass()); -#endif - - // Override default to generate verbose assembly. - Target.setAsmVerbosityDefault(true); + // Build up all of the passes that we want to do to the module. + PassManager PM; - if (Target.addPassesToEmitFile(Passes, *Out, FileType, OLvl, - DisableVerify)) { - errs() << argv[0] << ": target does not support generation of this" - << " file type!\n"; - if (Out != &fouts()) delete Out; - // And the Out file is empty and useless, so remove it now. - sys::Path(OutputFilename).eraseFromDisk(); - return 1; - } - - Passes.doInitialization(); - - // Run our queue of passes all at once now, efficiently. - // TODO: this could lazily stream functions out of the module. - for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I) - if (!I->isDeclaration()) { - if (DisableRedZone) - I->addFnAttr(Attribute::NoRedZone); - if (NoImplicitFloats) - I->addFnAttr(Attribute::NoImplicitFloat); - Passes.run(*I); - } - - Passes.doFinalization(); + // Add the target data from the target machine, if it exists, or the module. + if (const TargetData *TD = Target.getTargetData()) + PM.add(new TargetData(*TD)); + else + PM.add(new TargetData(&mod)); + + if (!NoVerify) + PM.add(createVerifierPass()); + + // Override default to generate verbose assembly. + Target.setAsmVerbosityDefault(true); + + // Ask the target to add backend passes as necessary. + if (Target.addPassesToEmitFile(PM, *Out, FileType, OLvl, + DisableVerify)) { + errs() << argv[0] << ": target does not support generation of this" + << " file type!\n"; + if (Out != &fouts()) delete Out; + // And the Out file is empty and useless, so remove it now. + sys::Path(OutputFilename).eraseFromDisk(); + return 1; } + PM.run(mod); + // Delete the ostream if it's not a stdout stream if (Out != &fouts()) delete Out; From gohman at apple.com Tue May 11 14:58:43 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 19:58:43 -0000 Subject: [llvm-commits] [llvm] r103493 - /llvm/trunk/lib/VMCore/PassManager.cpp Message-ID: <20100511195843.EC916312800A@llvm.org> Author: djg Date: Tue May 11 14:58:43 2010 New Revision: 103493 URL: http://llvm.org/viewvc/llvm-project?rev=103493&view=rev Log: Teach the regular pass manager how to materialize functions as needed. Modified: llvm/trunk/lib/VMCore/PassManager.cpp Modified: llvm/trunk/lib/VMCore/PassManager.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/PassManager.cpp?rev=103493&r1=103492&r2=103493&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/PassManager.cpp (original) +++ llvm/trunk/lib/VMCore/PassManager.cpp Tue May 11 14:58:43 2010 @@ -1444,8 +1444,16 @@ bool FPPassManager::runOnModule(Module &M) { bool Changed = doInitialization(M); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (F.isMaterializable()) { + std::string errstr; + if (F.Materialize(&errstr)) + report_fatal_error("Error reading bitcode file: " + Twine(errstr)); + } + runOnFunction(*I); + } return doFinalization(M) || Changed; } From baldrick at free.fr Tue May 11 15:16:10 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 11 May 2010 20:16:10 -0000 Subject: [llvm-commits] [llvm] r103495 - in /llvm/trunk: docs/ include/llvm/Support/ lib/CodeGen/ lib/CodeGen/SelectionDAG/ lib/Target/ARM/AsmPrinter/ lib/Target/MSP430/AsmPrinter/ lib/Target/PIC16/ lib/Target/PIC16/AsmPrinter/ lib/Target/X86/AsmPrinter/ lib/Transforms/InstCombine/ Message-ID: <20100511201610.37D6F3128018@llvm.org> Author: baldrick Date: Tue May 11 15:16:09 2010 New Revision: 103495 URL: http://llvm.org/viewvc/llvm-project?rev=103495&view=rev Log: I got tired of VISIBILITY_HIDDEN colliding with the gcc enum. Rename it to LLVM_LIBRARY_VISIBILITY and introduce LLVM_GLOBAL_VISIBILITY, which is the opposite, for future use by dragonegg. Modified: llvm/trunk/docs/GarbageCollection.html llvm/trunk/include/llvm/Support/Compiler.h llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.h llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/trunk/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h llvm/trunk/lib/Target/PIC16/PIC16ISelDAGToDAG.h llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.h llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.h llvm/trunk/lib/Transforms/InstCombine/InstCombine.h llvm/trunk/lib/Transforms/InstCombine/InstCombineWorklist.h Modified: llvm/trunk/docs/GarbageCollection.html URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/GarbageCollection.html?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/docs/GarbageCollection.html (original) +++ llvm/trunk/docs/GarbageCollection.html Tue May 11 15:16:09 2010 @@ -617,7 +617,7 @@ using namespace llvm; namespace { - class VISIBILITY_HIDDEN MyGC : public GCStrategy { + class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy { public: MyGC() {} }; @@ -1229,7 +1229,7 @@ using namespace llvm; namespace { - class VISIBILITY_HIDDEN MyGCPrinter : public GCMetadataPrinter { + class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter { public: virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP, const TargetAsmInfo &TAI); Modified: llvm/trunk/include/llvm/Support/Compiler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/Compiler.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/Compiler.h (original) +++ llvm/trunk/include/llvm/Support/Compiler.h Tue May 11 15:16:09 2010 @@ -15,12 +15,21 @@ #ifndef LLVM_SUPPORT_COMPILER_H #define LLVM_SUPPORT_COMPILER_H -// The VISIBILITY_HIDDEN macro, used for marking classes with the GCC-specific -// visibility("hidden") attribute. +/// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked +/// into a shared library, then the class should be private to the library and +/// not accessible from outside it. Can also be used to mark variables and +/// functions, making them private to any shared library they are linked into. + +/// LLVM_GLOBAL_VISIBILITY - If a class marked with this attribute is linked +/// into a shared library, then the class will be accessible from outside the +/// the library. Can also be used to mark variables and functions, making them +/// accessible from outside any shared library they are linked into. #if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__) -#define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden"))) +#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden"))) +#define LLVM_GLOBAL_VISIBILITY __attribute__ ((visibility("default"))) #else -#define VISIBILITY_HIDDEN +#define LLVM_LIBRARY_VISIBILITY +#define LLVM_GLOBAL_VISIBILITY #endif #if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.h (original) +++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.h Tue May 11 15:16:09 2010 @@ -32,7 +32,7 @@ /// For example, loop induction variable increments should be /// scheduled as soon as possible after the variable's last use. /// - class VISIBILITY_HIDDEN LoopDependencies { + class LLVM_LIBRARY_VISIBILITY LoopDependencies { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; @@ -94,7 +94,7 @@ /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. - class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG { + class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; const MachineFrameInfo *MFI; Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Tue May 11 15:16:09 2010 @@ -32,7 +32,7 @@ /// involves promoting small sizes to large sizes or splitting up large values /// into small values. /// -class VISIBILITY_HIDDEN DAGTypeLegalizer { +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { const TargetLowering &TLI; SelectionDAG &DAG; public: Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue May 11 15:16:09 2010 @@ -4944,7 +4944,7 @@ namespace llvm { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class VISIBILITY_HIDDEN SDISelAsmOperandInfo : +class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h Tue May 11 15:16:09 2010 @@ -26,7 +26,7 @@ //class ARMSubtarget; /// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN ARMMCInstLower { +class LLVM_LIBRARY_VISIBILITY ARMMCInstLower { MCContext &Ctx; Mangler &Mang; AsmPrinter &Printer; Modified: llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h (original) +++ llvm/trunk/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h Tue May 11 15:16:09 2010 @@ -26,7 +26,7 @@ /// MSP430MCInstLower - This class is used to lower an MachineInstr /// into an MCInst. -class VISIBILITY_HIDDEN MSP430MCInstLower { +class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower { MCContext &Ctx; Mangler &Mang; Modified: llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h (original) +++ llvm/trunk/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h Tue May 11 15:16:09 2010 @@ -29,7 +29,7 @@ #include namespace llvm { - class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter { + class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter { public: explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); private: Modified: llvm/trunk/lib/Target/PIC16/PIC16ISelDAGToDAG.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PIC16/PIC16ISelDAGToDAG.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Target/PIC16/PIC16ISelDAGToDAG.h (original) +++ llvm/trunk/lib/Target/PIC16/PIC16ISelDAGToDAG.h Tue May 11 15:16:09 2010 @@ -26,7 +26,7 @@ namespace { -class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { +class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel { /// TM - Keep a reference to PIC16TargetMachine. const PIC16TargetMachine &TM; Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.h (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86AsmPrinter.h Tue May 11 15:16:09 2010 @@ -31,7 +31,7 @@ class MCStreamer; class MCSymbol; -class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { +class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) Modified: llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.h (original) +++ llvm/trunk/lib/Target/X86/AsmPrinter/X86MCInstLower.h Tue May 11 15:16:09 2010 @@ -25,7 +25,7 @@ class X86Subtarget; /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. -class VISIBILITY_HIDDEN X86MCInstLower { +class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; Mangler *Mang; X86AsmPrinter &AsmPrinter; Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombine.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombine.h (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombine.h Tue May 11 15:16:09 2010 @@ -51,7 +51,7 @@ /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. -class VISIBILITY_HIDDEN InstCombineIRInserter +class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter { InstCombineWorklist &Worklist; public: @@ -65,7 +65,7 @@ }; /// InstCombiner - The -instcombine pass. -class VISIBILITY_HIDDEN InstCombiner +class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor { TargetData *TD; Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineWorklist.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineWorklist.h?rev=103495&r1=103494&r2=103495&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineWorklist.h (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineWorklist.h Tue May 11 15:16:09 2010 @@ -22,7 +22,7 @@ /// InstCombineWorklist - This is the worklist management logic for /// InstCombine. -class VISIBILITY_HIDDEN InstCombineWorklist { +class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { SmallVector Worklist; DenseMap WorklistMap; From baldrick at free.fr Tue May 11 15:19:21 2010 From: baldrick at free.fr (Duncan Sands) Date: Tue, 11 May 2010 20:19:21 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103496 - in /llvm-gcc-4.2/trunk/gcc: llvm-abi.h llvm-backend.cpp llvm-types.cpp Message-ID: <20100511201921.24955312800A@llvm.org> Author: baldrick Date: Tue May 11 15:19:20 2010 New Revision: 103496 URL: http://llvm.org/viewvc/llvm-project?rev=103496&view=rev Log: No need to avoid collisions between LLVM's VISIBILITY_HIDDEN and gcc's, now that LLVM's has been renamed LLVM_LIBRARY_VISIBILITY. Modified: llvm-gcc-4.2/trunk/gcc/llvm-abi.h llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp llvm-gcc-4.2/trunk/gcc/llvm-types.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-abi.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-abi.h?rev=103496&r1=103495&r2=103496&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-abi.h (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-abi.h Tue May 11 15:19:20 2010 @@ -43,8 +43,6 @@ class BasicBlock; } -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" Modified: llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp?rev=103496&r1=103495&r2=103496&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-backend.cpp Tue May 11 15:19:20 2010 @@ -52,7 +52,6 @@ #include "llvm/System/Program.h" #include -#undef VISIBILITY_HIDDEN extern "C" { #include "config.h" #include "system.h" Modified: llvm-gcc-4.2/trunk/gcc/llvm-types.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-types.cpp?rev=103496&r1=103495&r2=103496&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-types.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-types.cpp Tue May 11 15:19:20 2010 @@ -37,7 +37,6 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" #include -#undef VISIBILITY_HIDDEN extern "C" { #include "config.h" From gohman at apple.com Tue May 11 15:30:00 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 20:30:00 -0000 Subject: [llvm-commits] [llvm] r103499 - /llvm/trunk/lib/VMCore/PassManager.cpp Message-ID: <20100511203000.A0D64312800A@llvm.org> Author: djg Date: Tue May 11 15:30:00 2010 New Revision: 103499 URL: http://llvm.org/viewvc/llvm-project?rev=103499&view=rev Log: Revert r103493, materializing functions in the regular PassManager. It works in simple cases, but it isn't a general solution. Modified: llvm/trunk/lib/VMCore/PassManager.cpp Modified: llvm/trunk/lib/VMCore/PassManager.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/PassManager.cpp?rev=103499&r1=103498&r2=103499&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/PassManager.cpp (original) +++ llvm/trunk/lib/VMCore/PassManager.cpp Tue May 11 15:30:00 2010 @@ -1444,16 +1444,8 @@ bool FPPassManager::runOnModule(Module &M) { bool Changed = doInitialization(M); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; - if (F.isMaterializable()) { - std::string errstr; - if (F.Materialize(&errstr)) - report_fatal_error("Error reading bitcode file: " + Twine(errstr)); - } - + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) runOnFunction(*I); - } return doFinalization(M) || Changed; } From stoklund at 2pi.dk Tue May 11 15:30:28 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 20:30:28 -0000 Subject: [llvm-commits] [llvm] r103500 - in /llvm/trunk: include/llvm/CodeGen/MachineRegisterInfo.h lib/CodeGen/MachineRegisterInfo.cpp lib/CodeGen/RegAllocFast.cpp test/CodeGen/X86/fp-stack-O0-crash.ll test/CodeGen/X86/liveness-local-regalloc.ll Message-ID: <20100511203028.6FFF1312800A@llvm.org> Author: stoklund Date: Tue May 11 15:30:28 2010 New Revision: 103500 URL: http://llvm.org/viewvc/llvm-project?rev=103500&view=rev Log: Simplify the tracking of used physregs to a bulk bitor followed by a transitive closure after allocating all blocks. Add a few more test cases for -regalloc=fast. Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/test/CodeGen/X86/fp-stack-O0-crash.ll llvm/trunk/test/CodeGen/X86/liveness-local-regalloc.ll Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h?rev=103500&r1=103499&r2=103500&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h Tue May 11 15:30:28 2010 @@ -229,11 +229,18 @@ /// setPhysRegUsed - Mark the specified register used in this function. /// This should only be called during and after register allocation. void setPhysRegUsed(unsigned Reg) { UsedPhysRegs[Reg] = true; } - + + /// addPhysRegsUsed - Mark the specified registers used in this function. + /// This should only be called during and after register allocation. + void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; } + /// setPhysRegUnused - Mark the specified register unused in this function. /// This should only be called during and after register allocation. void setPhysRegUnused(unsigned Reg) { UsedPhysRegs[Reg] = false; } - + + /// closePhysRegsUsed - Expand UsedPhysRegs to its transitive closure over + /// subregisters. That means that if R is used, so are all subregisters. + void closePhysRegsUsed(const TargetRegisterInfo&); //===--------------------------------------------------------------------===// // LiveIn/LiveOut Management Modified: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp?rev=103500&r1=103499&r2=103500&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Tue May 11 15:30:28 2010 @@ -267,6 +267,15 @@ EntryMBB->addLiveIn(I->first); } +void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { + for (int i = UsedPhysRegs.find_first(); i >= 0; + i = UsedPhysRegs.find_next(i)) + for (const unsigned *SS = TRI.getSubRegisters(i); + unsigned SubReg = *SS; ++SS) + if (SubReg > i) + UsedPhysRegs.set(SubReg); +} + #ifndef NDEBUG void MachineRegisterInfo::dumpUses(unsigned Reg) const { for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I) Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103500&r1=103499&r2=103500&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 15:30:28 2010 @@ -370,11 +370,9 @@ if (PhysRegState[BestReg] != regDisabled) spillVirtReg(MBB, MI, PhysRegState[BestReg], true); else { - MF->getRegInfo().setPhysRegUsed(BestReg); // Make sure all aliases are disabled. for (const unsigned *AS = TRI->getAliasSet(BestReg); unsigned Alias = *AS; ++AS) { - MF->getRegInfo().setPhysRegUsed(Alias); switch (PhysRegState[Alias]) { case regDisabled: continue; @@ -443,6 +441,7 @@ /// defineVirtReg except the physreg is reverved instead of allocated. void RAFast::reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned PhysReg) { + UsedInInstr.set(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -460,6 +459,7 @@ // This is a disabled register, disable all aliases. for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { + UsedInInstr.set(Alias); switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: case regFree: @@ -474,10 +474,8 @@ break; } PhysRegState[Alias] = regDisabled; - MF->getRegInfo().setPhysRegUsed(Alias); } PhysRegState[PhysReg] = regReserved; - MF->getRegInfo().setPhysRegUsed(PhysReg); } // setPhysReg - Change MO the refer the PhysReg, considering subregs. @@ -611,6 +609,8 @@ killPhysReg(PhysKills[i]); PhysKills.clear(); + MF->getRegInfo().addPhysRegsUsed(UsedInInstr); + // Track registers defined by instruction - early clobbers at this point. UsedInInstr.reset(); for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { @@ -658,6 +658,8 @@ for (unsigned i = 0, e = PhysKills.size(); i != e; ++i) killPhysReg(PhysKills[i]); PhysKills.clear(); + + MF->getRegInfo().addPhysRegsUsed(UsedInInstr); } // Spill all physical registers holding virtual registers now. @@ -693,6 +695,9 @@ MBB != MBBe; ++MBB) AllocateBasicBlock(*MBB); + // Make sure the set of used physregs is closed under subreg operations. + MF->getRegInfo().closePhysRegsUsed(*TRI); + StackSlotForVirtReg.clear(); return true; } Modified: llvm/trunk/test/CodeGen/X86/fp-stack-O0-crash.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-stack-O0-crash.ll?rev=103500&r1=103499&r2=103500&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/fp-stack-O0-crash.ll (original) +++ llvm/trunk/test/CodeGen/X86/fp-stack-O0-crash.ll Tue May 11 15:30:28 2010 @@ -1,4 +1,5 @@ ; RUN: llc %s -O0 -fast-isel -regalloc=local -o - +; RUN: llc %s -O0 -fast-isel -regalloc=fast -o - ; PR4767 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" Modified: llvm/trunk/test/CodeGen/X86/liveness-local-regalloc.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/liveness-local-regalloc.ll?rev=103500&r1=103499&r2=103500&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/liveness-local-regalloc.ll (original) +++ llvm/trunk/test/CodeGen/X86/liveness-local-regalloc.ll Tue May 11 15:30:28 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -O3 -regalloc=local -mtriple=x86_64-apple-darwin10 +; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10 ; %0 = type { i32, i8*, i8*, %1*, i8*, i64, i64, i32, i32, i32, i32, [1024 x i8] } From bob.wilson at apple.com Tue May 11 15:39:52 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 11 May 2010 20:39:52 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103502 - /llvm-gcc-4.2/trunk/GNUmakefile Message-ID: <20100511203952.15978312800A@llvm.org> Author: bwilson Date: Tue May 11 15:39:51 2010 New Revision: 103502 URL: http://llvm.org/viewvc/llvm-project?rev=103502&view=rev Log: Fix corrupted version numbers when building llvmCore inside llvmgcc42. I added the ARM_HOSTED_BUILD option to build_llvm but forgot to update this invocation. Modified: llvm-gcc-4.2/trunk/GNUmakefile Modified: llvm-gcc-4.2/trunk/GNUmakefile URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/GNUmakefile?rev=103502&r1=103501&r2=103502&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/GNUmakefile (original) +++ llvm-gcc-4.2/trunk/GNUmakefile Tue May 11 15:39:51 2010 @@ -101,7 +101,7 @@ DEVELOPER_DIR=Developer \ $(SRC)/llvmCore/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \ $(SRC)/llvmCore /usr/local $(DSTROOT) $(SYMROOT) \ - $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \ + $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) no \ $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) llvmgcc42: $(OBJROOT) $(SYMROOT) $(DSTROOT) From isanbard at gmail.com Tue May 11 15:46:04 2010 From: isanbard at gmail.com (Bill Wendling) Date: Tue, 11 May 2010 20:46:04 -0000 Subject: [llvm-commits] [llvm] r103507 - /llvm/trunk/lib/Target/SubtargetFeature.cpp Message-ID: <20100511204604.CDDA5312800A@llvm.org> Author: void Date: Tue May 11 15:46:04 2010 New Revision: 103507 URL: http://llvm.org/viewvc/llvm-project?rev=103507&view=rev Log: Simplify this logic of creating a default Features object. Modified: llvm/trunk/lib/Target/SubtargetFeature.cpp Modified: llvm/trunk/lib/Target/SubtargetFeature.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SubtargetFeature.cpp?rev=103507&r1=103506&r2=103507&view=diff ============================================================================== --- llvm/trunk/lib/Target/SubtargetFeature.cpp (original) +++ llvm/trunk/lib/Target/SubtargetFeature.cpp Tue May 11 15:46:04 2010 @@ -370,32 +370,14 @@ const Triple& Triple) { setCPU(CPU); - const char *Attrs = 0; - - switch (Triple.getVendor()) { - case Triple::Apple: - switch (Triple.getArch()) { - case Triple::ppc: // powerpc-apple-* - Attrs = "altivec"; - break; - case Triple::ppc64: // powerpc64-apple-* - Attrs = "64bit,altivec"; - break; - default: - break; + if (Triple.getVendor() == Triple::Apple) { + if (Triple.getArch() == Triple::ppc) { + // powerpc-apple-* + AddFeature("altivec"); + } else if (Triple.getArch() == Triple::ppc64) { + // powerpc64-apple-* + AddFeature("64bit"); + AddFeature("altivec"); } - break; - default: - break; - } - - if (!Attrs) return; - - StringRef SR(Attrs); - - while (!SR.empty()) { - std::pair Res = SR.split(','); - AddFeature(Res.first); - SR = Res.second; } } From echristo at apple.com Tue May 11 15:48:22 2010 From: echristo at apple.com (Eric Christopher) Date: Tue, 11 May 2010 13:48:22 -0700 Subject: [llvm-commits] [llvm-gcc-4.2] r103502 - /llvm-gcc-4.2/trunk/GNUmakefile In-Reply-To: <20100511203952.15978312800A@llvm.org> References: <20100511203952.15978312800A@llvm.org> Message-ID: <14CD010A-FF3C-4E12-B0F8-AEB957D6C837@apple.com> On May 11, 2010, at 1:39 PM, Bob Wilson wrote: > Author: bwilson > Date: Tue May 11 15:39:51 2010 > New Revision: 103502 > > URL: http://llvm.org/viewvc/llvm-project?rev=103502&view=rev > Log: > Fix corrupted version numbers when building llvmCore inside llvmgcc42. > I added the ARM_HOSTED_BUILD option to build_llvm but forgot to update > this invocation. > > Modified: > llvm-gcc-4.2/trunk/GNUmakefile > > Modified: llvm-gcc-4.2/trunk/GNUmakefile > URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/GNUmakefile?rev=103502&r1=103501&r2=103502&view=diff > ============================================================================== > --- llvm-gcc-4.2/trunk/GNUmakefile (original) > +++ llvm-gcc-4.2/trunk/GNUmakefile Tue May 11 15:39:51 2010 > @@ -101,7 +101,7 @@ > DEVELOPER_DIR=Developer \ > $(SRC)/llvmCore/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \ > $(SRC)/llvmCore /usr/local $(DSTROOT) $(SYMROOT) \ > - $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \ > + $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) no \ > $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) Pretty please add a comment? Thanks! -eric From stoklund at 2pi.dk Tue May 11 15:51:04 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 20:51:04 -0000 Subject: [llvm-commits] [llvm] r103508 - /llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Message-ID: <20100511205104.DF9EE312800A@llvm.org> Author: stoklund Date: Tue May 11 15:51:04 2010 New Revision: 103508 URL: http://llvm.org/viewvc/llvm-project?rev=103508&view=rev Log: Silence warning Modified: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Modified: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp?rev=103508&r1=103507&r2=103508&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Tue May 11 15:51:04 2010 @@ -272,7 +272,7 @@ i = UsedPhysRegs.find_next(i)) for (const unsigned *SS = TRI.getSubRegisters(i); unsigned SubReg = *SS; ++SS) - if (SubReg > i) + if (SubReg > unsigned(i)) UsedPhysRegs.set(SubReg); } From stoklund at 2pi.dk Tue May 11 15:51:07 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 20:51:07 -0000 Subject: [llvm-commits] [llvm] r103509 - /llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll Message-ID: <20100511205107.58E043128018@llvm.org> Author: stoklund Date: Tue May 11 15:51:07 2010 New Revision: 103509 URL: http://llvm.org/viewvc/llvm-project?rev=103509&view=rev Log: One more -regalloc=fast test Modified: llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll Modified: llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll?rev=103509&r1=103508&r2=103509&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll (original) +++ llvm/trunk/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll Tue May 11 15:51:07 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -O0 -regalloc=local -relocation-model=pic -disable-fp-elim | FileCheck %s +; RUN: llc < %s -O0 -regalloc=fast -relocation-model=pic -disable-fp-elim | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" target triple = "i386-apple-darwin10.0.0" From bob.wilson at apple.com Tue May 11 15:55:14 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Tue, 11 May 2010 20:55:14 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103512 - /llvm-gcc-4.2/trunk/GNUmakefile Message-ID: <20100511205514.B7F5C312800A@llvm.org> Author: bwilson Date: Tue May 11 15:55:14 2010 New Revision: 103512 URL: http://llvm.org/viewvc/llvm-project?rev=103512&view=rev Log: Add a comment for Eric. Modified: llvm-gcc-4.2/trunk/GNUmakefile Modified: llvm-gcc-4.2/trunk/GNUmakefile URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/GNUmakefile?rev=103512&r1=103511&r2=103512&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/GNUmakefile (original) +++ llvm-gcc-4.2/trunk/GNUmakefile Tue May 11 15:55:14 2010 @@ -77,6 +77,9 @@ LLVM_OPTIMIZED := yes endif +# Cross-builds for ARM hosts are not supported here. +ARM_HOSTED_BUILD := no + ifndef RC_ProjectSourceVersion RC_ProjectSourceVersion = 9999 endif @@ -101,7 +104,8 @@ DEVELOPER_DIR=Developer \ $(SRC)/llvmCore/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \ $(SRC)/llvmCore /usr/local $(DSTROOT) $(SYMROOT) \ - $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) no \ + $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \ + $(ARM_HOSTED_BUILD) \ $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) llvmgcc42: $(OBJROOT) $(SYMROOT) $(DSTROOT) From evan.cheng at apple.com Tue May 11 16:07:36 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Tue, 11 May 2010 21:07:36 -0000 Subject: [llvm-commits] [llvm] r103513 - /llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Message-ID: <20100511210736.C897A312800A@llvm.org> Author: evancheng Date: Tue May 11 16:07:36 2010 New Revision: 103513 URL: http://llvm.org/viewvc/llvm-project?rev=103513&view=rev Log: Avoid breaking vstd when reg_sequence is not used. Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103513&r1=103512&r2=103513&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Tue May 11 16:07:36 2010 @@ -419,7 +419,8 @@ unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (FormsRegSequence(MI, FirstOpnd, NumRegs)) + if (llvm::ModelWithRegSequence() && + FormsRegSequence(MI, FirstOpnd, NumRegs)) continue; MachineBasicBlock::iterator NextI = llvm::next(MBBI); From criswell at uiuc.edu Tue May 11 16:50:08 2010 From: criswell at uiuc.edu (John Criswell) Date: Tue, 11 May 2010 21:50:08 -0000 Subject: [llvm-commits] [poolalloc] r103518 - in /poolalloc/trunk: ./ include/poolalloc/ lib/AssistDS/ lib/DSA/ lib/Macroscopic/ lib/PoolAllocate/ lib/rDSA/ runtime/FL2Allocator/ runtime/FreeListAllocator/ runtime/HeapFrag/ runtime/PoolAllocator/ runtime/PreRT/ test/ Message-ID: <20100511215008.6F0A6312800A@llvm.org> Author: criswell Date: Tue May 11 16:50:07 2010 New Revision: 103518 URL: http://llvm.org/viewvc/llvm-project?rev=103518&view=rev Log: Merged in the release_26 branch. This brings in fixes that I've made in the LLVM 2.6 version as well as enhancements made by Patrick. Added: poolalloc/trunk/lib/rDSA/ - copied from r103512, poolalloc/branches/release_26/lib/rDSA/ poolalloc/trunk/lib/rDSA/BottomUpClosure.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/BottomUpClosure.cpp poolalloc/trunk/lib/rDSA/CMakeLists.txt - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/CMakeLists.txt poolalloc/trunk/lib/rDSA/CompleteBottomUp.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/CompleteBottomUp.cpp poolalloc/trunk/lib/rDSA/DataStructure.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/DataStructure.cpp poolalloc/trunk/lib/rDSA/DataStructureAA.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/DataStructureAA.cpp poolalloc/trunk/lib/rDSA/DataStructureOpt.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/DataStructureOpt.cpp poolalloc/trunk/lib/rDSA/DataStructureStats.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/DataStructureStats.cpp poolalloc/trunk/lib/rDSA/EquivClassGraphs.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/EquivClassGraphs.cpp poolalloc/trunk/lib/rDSA/GraphChecker.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/GraphChecker.cpp poolalloc/trunk/lib/rDSA/Local.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/Local.cpp poolalloc/trunk/lib/rDSA/Makefile - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/Makefile poolalloc/trunk/lib/rDSA/Printer.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/Printer.cpp poolalloc/trunk/lib/rDSA/StdLibPass.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/StdLibPass.cpp poolalloc/trunk/lib/rDSA/Steensgaard.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/Steensgaard.cpp poolalloc/trunk/lib/rDSA/SteensgaardAA.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/SteensgaardAA.cpp poolalloc/trunk/lib/rDSA/TopDownClosure.cpp - copied unchanged from r103512, poolalloc/branches/release_26/lib/rDSA/TopDownClosure.cpp Modified: poolalloc/trunk/ (props changed) poolalloc/trunk/include/poolalloc/PoolAllocate.h poolalloc/trunk/lib/AssistDS/Makefile poolalloc/trunk/lib/DSA/EquivClassGraphs.cpp poolalloc/trunk/lib/DSA/Makefile poolalloc/trunk/lib/Macroscopic/Makefile poolalloc/trunk/lib/PoolAllocate/Heuristic.cpp poolalloc/trunk/lib/PoolAllocate/Makefile poolalloc/trunk/lib/PoolAllocate/PASimple.cpp poolalloc/trunk/lib/PoolAllocate/PoolAllocate.cpp poolalloc/trunk/lib/PoolAllocate/PoolOptimize.cpp poolalloc/trunk/lib/PoolAllocate/TransformFunctionBody.cpp poolalloc/trunk/runtime/FL2Allocator/CMakeLists.txt poolalloc/trunk/runtime/FL2Allocator/Makefile poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.h poolalloc/trunk/runtime/FreeListAllocator/Makefile poolalloc/trunk/runtime/HeapFrag/Makefile poolalloc/trunk/runtime/PoolAllocator/Makefile poolalloc/trunk/runtime/PreRT/Makefile poolalloc/trunk/test/Makefile poolalloc/trunk/test/TEST.poolalloc.Makefile Propchange: poolalloc/trunk/ ------------------------------------------------------------------------------ svn:mergeinfo = /poolalloc/branches/release_26:97820-103512 Modified: poolalloc/trunk/include/poolalloc/PoolAllocate.h URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/include/poolalloc/PoolAllocate.h?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/include/poolalloc/PoolAllocate.h (original) +++ poolalloc/trunk/include/poolalloc/PoolAllocate.h Tue May 11 16:50:07 2010 @@ -54,7 +54,7 @@ /// maps to the original function... /// struct FuncInfo { - FuncInfo(Function &f) : F(f), Clone(0) {} + FuncInfo(Function &f) : F(f), Clone(0), rev_pool_desc_map_computed(false) {} /// MarkedNodes - The set of nodes which are not locally pool allocatable in /// the current function. @@ -83,6 +83,23 @@ /// function. std::map PoolDescriptors; + //Reverse mapping for PoolDescriptors, needed by TPPA + std::map ReversePoolDescriptors; + + //This is a hack -- a function should be added which maintains these in parallel + //and all of PoolAlloc and SafeCode should be updated to use it instead of adding + //to either map directly. + bool rev_pool_desc_map_computed; + void calculate_reverse_pool_descriptors() + { + if(rev_pool_desc_map_computed) + return; + rev_pool_desc_map_computed = true; + + for(std::map::iterator i = PoolDescriptors.begin(); i!=PoolDescriptors.end(); i++) + ReversePoolDescriptors[i->second] = i->first; + } + /// This is a map from Old to New Values (the reverse of NewToOldValueMap). /// SAFECode uses this for check insertion. std::map ValueMap; @@ -147,6 +164,7 @@ return Graphs->getGlobalsGraph (); } + /* Return value is of type PoolDescPtrTy */ virtual Value * getPool (const DSNode * N, Function & F) {return 0;} virtual Value * getGlobalPool (const DSNode * Node) {return 0;} @@ -168,7 +186,7 @@ std::map CloneToOrigMap; public: - Constant *PoolInit, *PoolDestroy, *PoolAlloc, *PoolRealloc, *PoolMemAlign; + Constant *PoolInit, *PoolDestroy, *PoolAlloc, *PoolRealloc, *PoolMemAlign, *PoolThreadWrapper; Constant *PoolFree; Constant *PoolCalloc; Constant *PoolStrdup; @@ -354,10 +372,10 @@ } // - // We either do not have a pool, or the pool is not accessible from the - // specified function. Return NULL. + // Perhaps this is a global pool. If it isn't, then return a NULL + // pointer. // - return 0; + return getGlobalPool (N); } virtual Value * getGlobalPool (const DSNode * Node) { Modified: poolalloc/trunk/lib/AssistDS/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/AssistDS/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/AssistDS/Makefile (original) +++ poolalloc/trunk/lib/AssistDS/Makefile Tue May 11 16:50:07 2010 @@ -8,7 +8,11 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) SHARED_LIBRARY=1 +endif +endif LIBRARYNAME = AssistDS include $(LEVEL)/Makefile.common Modified: poolalloc/trunk/lib/DSA/EquivClassGraphs.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/DSA/EquivClassGraphs.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/DSA/EquivClassGraphs.cpp (original) +++ poolalloc/trunk/lib/DSA/EquivClassGraphs.cpp Tue May 11 16:50:07 2010 @@ -64,6 +64,8 @@ for (EquivalenceClasses::member_iterator MI = GlobalECs.member_begin(EQSI); MI != GlobalECs.member_end(); ++MI) { if (const Function* F = dyn_cast(*MI)) { + if(F->isDeclaration()) //ignore functions with no body + continue; if (!BaseGraph) { BaseGraph = getOrCreateGraph(F); BaseGraph->getFunctionArgumentsForCall(F, Args); Modified: poolalloc/trunk/lib/DSA/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/DSA/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/DSA/Makefile (original) +++ poolalloc/trunk/lib/DSA/Makefile Tue May 11 16:50:07 2010 @@ -10,8 +10,12 @@ LEVEL = ../.. LIBRARYNAME = LLVMDataStructure BUILD_ARCHIVE := 1 +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) SHARED_LIBRARY := 1 #LOADABLE_MODULE := 1 +endif +endif include $(LEVEL)/Makefile.common Modified: poolalloc/trunk/lib/Macroscopic/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/Macroscopic/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/Macroscopic/Makefile (original) +++ poolalloc/trunk/lib/Macroscopic/Makefile Tue May 11 16:50:07 2010 @@ -2,8 +2,12 @@ LEVEL=../.. # Give the name of a library. This will build a dynamic version. +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) SHARED_LIBRARY=1 LOADABLE_MODULE = 1 +endif +endif DONT_BUILD_RELINKED=1 LIBRARYNAME=macroscopic Modified: poolalloc/trunk/lib/PoolAllocate/Heuristic.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/PoolAllocate/Heuristic.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/PoolAllocate/Heuristic.cpp (original) +++ poolalloc/trunk/lib/PoolAllocate/Heuristic.cpp Tue May 11 16:50:07 2010 @@ -122,15 +122,31 @@ /// DSNode. /// unsigned Heuristic::getRecommendedAlignment(const DSNode *N) { - //FIXME: Type - //if (!N->getType() || N->getType()->isVoidTy()) // Is this void or collapsed? - return 0; // No known alignment, let runtime decide. +#if 0 + const Type * VoidType = Type::getVoidTy(getGlobalContext()); + + // + // If this node has a void type (which can be signified by getType() + // returning NULL) or the node is collapsed, then there is no known + // alignment. We will return 0 to let the runtime decide. + // + if ((!(N->getType())) || (N->getType() == VoidType)) + return 0; - //const TargetData &TD = N->getParentGraph()->getTargetData(); + const TargetData &TD = N->getParentGraph()->getTargetData(); // If there are no doubles on an 8-byte boundary in this structure, there is // no reason to 8-byte align objects in the pool. - //return Wants8ByteAlignment(N->getType(), 0, TD) ? 8 : 4; + return Wants8ByteAlignment(N->getType(), 0, TD) ? 8 : 4; +#else + // + // I believe there was a FIXME in the previous version of this code, but it + // was too vague for me to understand what, exactly, needed to be fixed. + // + // In any event, it seems that this code should be deactivated for now. + // + return 0; +#endif } @@ -455,7 +471,7 @@ if (!NullGlobal) { Module *M = I->getParent()->getParent()->getParent(); const Type * PoolTy = PoolAllocate::PoolDescPtrTy; - Constant * Init = ConstantAggregateZero::get(PoolTy); + Constant * Init = ConstantPointerNull::get(cast(PoolTy)); NullGlobal = new GlobalVariable(*M, PoolAllocate::PoolDescPtrTy, false, GlobalValue::ExternalLinkage, Modified: poolalloc/trunk/lib/PoolAllocate/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/PoolAllocate/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/PoolAllocate/Makefile (original) +++ poolalloc/trunk/lib/PoolAllocate/Makefile Tue May 11 16:50:07 2010 @@ -8,8 +8,12 @@ # LIBRARYNAME=poolalloc BUILD_ARCHIVE := 1 +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) SHARED_LIBRARY := 1 #LOADABLE_MODULE := 1 +endif +endif # # Include Makefile.common so we know what to do. Modified: poolalloc/trunk/lib/PoolAllocate/PASimple.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/PoolAllocate/PASimple.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/PoolAllocate/PASimple.cpp (original) +++ poolalloc/trunk/lib/PoolAllocate/PASimple.cpp Tue May 11 16:50:07 2010 @@ -142,11 +142,20 @@ TheGlobalPool = CreateGlobalPool(32, 1, M); // - // Now that all call targets are available, rewrite the function bodies of the - // clones. + // Now that all call targets are available, rewrite the function bodies of + // the clones. + // for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + // + // Skip functions that this pass added. + // std::string name = I->getName(); if (name == "__poolalloc_init") continue; + if (name == PoolInit->getNameStr()) continue; + + // + // Skip declarations. + // if (!(I->isDeclaration())) ProcessFunctionBodySimple(*I, TD); } @@ -226,6 +235,11 @@ Name, InsertPt); + // + // Update the DSGraph. + // + CombinedDSGraph->getScalarMap().replaceScalar (CI, V); + Instruction *Casted = V; if (V->getType() != CI->getType()) Casted = CastInst::CreatePointerCast (V, CI->getType(), V->getName(), InsertPt); @@ -267,6 +281,11 @@ Opts + 3, Name, InsertPt); + // + // Update the DSGraph. + // + CombinedDSGraph->getScalarMap().replaceScalar (CI, V); + Instruction *Casted = V; if (V->getType() != CI->getType()) Casted = CastInst::CreatePointerCast (V, CI->getType(), V->getName(), InsertPt); @@ -309,6 +328,11 @@ Name, InsertPt); + // + // Update the DSGraph. + // + CombinedDSGraph->getScalarMap().replaceScalar (CI, V); + Instruction *Casted = V; if (V->getType() != CI->getType()) Casted = CastInst::CreatePointerCast (V, CI->getType(), V->getName(), InsertPt); @@ -342,6 +366,12 @@ Opts + 2, Name, InsertPt); + + // + // Update the DSGraph. + // + CombinedDSGraph->getScalarMap().replaceScalar (CI, V); + Instruction *Casted = V; if (V->getType() != CI->getType()) Casted = CastInst::CreatePointerCast (V, CI->getType(), V->getName(), InsertPt); @@ -373,6 +403,17 @@ PoolAllocateSimple::CreateGlobalPool (unsigned RecSize, unsigned Align, Module& M) { + // + // Give poolinit() a dummy body. A later transform will remove the dummy + // body. + // + if (SAFECodeEnabled) { + LLVMContext & Context = M.getContext(); + Function * PoolInitFunc = dyn_cast(PoolInit); + BasicBlock * entryBB = BasicBlock::Create (Context, "entry", PoolInitFunc); + ReturnInst::Create (Context, entryBB); + } + GlobalVariable *GV = new GlobalVariable(M, getPoolType(&M.getContext()), false, GlobalValue::ExternalLinkage, Modified: poolalloc/trunk/lib/PoolAllocate/PoolAllocate.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/PoolAllocate/PoolAllocate.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/PoolAllocate/PoolAllocate.cpp (original) +++ poolalloc/trunk/lib/PoolAllocate/PoolAllocate.cpp Tue May 11 16:50:07 2010 @@ -268,6 +268,18 @@ //Get the poolregister function PoolRegister = M->getOrInsertFunction("poolregister", VoidType, PoolDescPtrTy, VoidPtrTy, Int32Type, NULL); + + Function* pthread_create_func = M->getFunction("pthread_create"); + if(pthread_create_func) + { + Function::arg_iterator i = pthread_create_func->arg_begin(); + std::vector non_vararg_params; + non_vararg_params.push_back(i++->getType()); + non_vararg_params.push_back(i++->getType()); + non_vararg_params.push_back(i++->getType()); + non_vararg_params.push_back(Int32Type); + PoolThreadWrapper = M->getOrInsertFunction("poolalloc_pthread_create",FunctionType::get(Int32Type,non_vararg_params,true)); + } } static void getCallsOf(Constant *C, std::vector &Calls) { @@ -733,7 +745,8 @@ for (DSGraph::node_iterator I = G->node_begin(), E = G->node_end(); I != E;++I){ // We only need to make a pool if there is a heap object in it... DSNode *N = I; - if ((N->isHeapNode()) || (BoundsChecksEnabled && (N->isArrayNode()))) { + if ((N->isHeapNode()) || (BoundsChecksEnabled && (N->isArrayNode())) || + GlobalsGraphNodeMapping.count(N) && GlobalsGraphNodeMapping[N].getNode()->isHeapNode()) { if (GlobalsGraphNodeMapping.count(N)) { // If it is a global pool, set up the pool descriptor appropriately. DSNode *GGN = GlobalsGraphNodeMapping[N].getNode(); Modified: poolalloc/trunk/lib/PoolAllocate/PoolOptimize.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/PoolAllocate/PoolOptimize.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/PoolAllocate/PoolOptimize.cpp (original) +++ poolalloc/trunk/lib/PoolAllocate/PoolOptimize.cpp Tue May 11 16:50:07 2010 @@ -32,7 +32,7 @@ static char ID; bool SAFECodeEnabled; - PoolOptimize(bool SAFECode = false) : ModulePass((intptr_t)&ID) { + PoolOptimize(bool SAFECode = true) : ModulePass((intptr_t)&ID) { SAFECodeEnabled = SAFECode; } bool runOnModule(Module &M); @@ -72,7 +72,7 @@ const Type *VoidPtrTy = PointerType::getUnqual(Int8Type); const Type *PoolDescPtrTy; if (SAFECodeEnabled) - PoolDescPtrTy = PointerType::getUnqual(ArrayType::get(VoidPtrTy, 50)); + PoolDescPtrTy = PointerType::getUnqual(ArrayType::get(VoidPtrTy, 92)); else PoolDescPtrTy = PointerType::getUnqual(ArrayType::get(VoidPtrTy, 16)); Modified: poolalloc/trunk/lib/PoolAllocate/TransformFunctionBody.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/lib/PoolAllocate/TransformFunctionBody.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/lib/PoolAllocate/TransformFunctionBody.cpp (original) +++ poolalloc/trunk/lib/PoolAllocate/TransformFunctionBody.cpp Tue May 11 16:50:07 2010 @@ -266,34 +266,15 @@ #endif void FuncTransform::visitAllocaInst(AllocaInst &MI) { - // Don't do anything if bounds checking will not be done by SAFECode later. - if (!(PAInfo.BoundsChecksEnabled)) return; - - // Get the pool handle for the node that this contributes to... - DSNode *Node = getDSNodeHFor(&MI).getNode(); - if (Node->isArrayNode()) { - Value *PH = getPoolHandle(&MI); - if (PH == 0 || isa(PH)) return; - TargetData &TD = PAInfo.getAnalysis(); - Value *AllocSize = ConstantInt::get(Type::getInt32Ty(MI.getContext()), TD.getTypeAllocSize(MI.getAllocatedType())); - - if (MI.isArrayAllocation()) - AllocSize = BinaryOperator::Create(Instruction::Mul, AllocSize, - MI.getOperand(0), "sizetmp", &MI); - - // TransformAllocationInstr(&MI, AllocSize); - BasicBlock::iterator InsertPt(MI); - ++InsertPt; - Instruction *Casted = CastInst::CreatePointerCast(&MI, PointerType::getUnqual(Type::getInt8Ty(MI.getContext())), - MI.getName()+".casted", InsertPt); - std::vector args; - args.push_back (PH); - args.push_back (Casted); - args.push_back (AllocSize); - Instruction *V = CallInst::Create(PAInfo.PoolRegister, - args.begin(), args.end(), "", InsertPt); - AddPoolUse(*V, PH, PoolUses); - } + // + // SAFECode will register alloca instructions with the run-time, so do not + // do that here. + // + // FIXME: + // There is a chance that we may need to update PoolUses to make sure that + // the pool handle is available in this function. + // + return; } @@ -563,6 +544,7 @@ void FuncTransform::visitCallSite(CallSite& CS) { const Function *CF = CS.getCalledFunction(); Instruction *TheCall = CS.getInstruction(); + bool thread_creation_point = false; const Type* Int32Type = Type::getInt32Ty(CS.getInstruction()->getContext()); @@ -604,6 +586,21 @@ } else if (CF->getName() == "valloc") { errs() << "VALLOC USED BUT NOT HANDLED!\n"; abort(); + } else if (CF->getName() == "pthread_create") { + thread_creation_point = true; + //Get DSNode representing the void* passed to the callee + DSNodeHandle passed_dsnode_handle = G->getNodeForValue(CS.getArgument(3)); + + //Get DSNode representing the DSNode of the function pointer Value of the pthread_create call + DSNode* thread_callee_node = G->getNodeForValue(CS.getArgument(2)).getNode(); + if(!thread_callee_node) + { + FuncInfo *CFI = PAInfo.getFuncInfo(*CF); + thread_callee_node = G->getNodeForValue(CFI->MapValueToOriginal(CS.getArgument(2))).getNode(); + } + + //Fill in CF with the name of one of the functions in thread_callee_node + CF = const_cast(dyn_cast(*thread_callee_node->globals_begin())); } } @@ -775,8 +772,9 @@ Args.push_back(ArgVal); } - // Add the rest of the arguments... - Args.insert(Args.end(), CS.arg_begin(), CS.arg_end()); + // Add the rest of the arguments unless we're a thread creation point, in which case we only need the pools + if(!thread_creation_point) + Args.insert(Args.end(), CS.arg_begin(), CS.arg_end()); // // There are circumstances where a function is casted to another type and @@ -797,7 +795,27 @@ std::string Name = TheCall->getName(); TheCall->setName(""); - if (InvokeInst *II = dyn_cast(TheCall)) { + if(thread_creation_point) { + Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); + Value* pthread_replacement = M->getFunction("poolalloc_pthread_create"); + std::vector thread_args; + + //Push back original thread arguments through the callee + thread_args.push_back(CS.getArgument(0)); + thread_args.push_back(CS.getArgument(1)); + thread_args.push_back(CS.getArgument(2)); + + //Push back the integer argument saying how many uses there are + thread_args.push_back(Constant::getIntegerValue(llvm::Type::getInt32Ty(M->getContext()),APInt(32,Args.size()))); + thread_args.insert(thread_args.end(),Args.begin(),Args.end()); + thread_args.push_back(CS.getArgument(3)); + + //Make the thread creation call + NewCall = CallInst::Create(pthread_replacement, + thread_args.begin(),thread_args.end(), + Name,TheCall); + } + else if (InvokeInst *II = dyn_cast(TheCall)) { NewCall = InvokeInst::Create (NewCallee, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), Name, TheCall); Modified: poolalloc/trunk/runtime/FL2Allocator/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/FL2Allocator/CMakeLists.txt?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/FL2Allocator/CMakeLists.txt (original) +++ poolalloc/trunk/runtime/FL2Allocator/CMakeLists.txt Tue May 11 16:50:07 2010 @@ -1,2 +1,9 @@ +include_directories(/localhome/simmon12/progs/dyncall-0.5/dyncall) +link_directories(/localhome/simmon12/progs/dyncall-0.5/dyncall/build_out/linux_x86_gcc_release) file(GLOB sources *.cpp) -add_llvm_library( poolalloc_rt ${sources} ) \ No newline at end of file +add_llvm_library( poolalloc_rt ${sources} ) +set_property( + TARGET poolalloc_rt + PROPERTY COMPILE_DEFINITIONS USE_DYNCALL + ) +target_link_libraries( poolalloc_rt dyncall_s ) Modified: poolalloc/trunk/runtime/FL2Allocator/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/FL2Allocator/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/FL2Allocator/Makefile (original) +++ poolalloc/trunk/runtime/FL2Allocator/Makefile Tue May 11 16:50:07 2010 @@ -1,7 +1,16 @@ LEVEL = ../.. -SHARED_LIBRARY=1 LIBRARYNAME=poolalloc_rt +# +# Build shared libraries on all platforms except Cygwin and MingW (which do +# not support them). +# +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) +SHARED_LIBRARY=1 +endif +endif + ifdef ENABLE_OPTIMIZED CXXFLAGS += -DNDEBUG=1 endif Modified: poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp (original) +++ poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.cpp Tue May 11 16:50:07 2010 @@ -366,6 +366,7 @@ void poolinit_bp(PoolTy *Pool, unsigned ObjAlignment) { DO_IF_PNP(memset(Pool, 0, sizeof(PoolTy))); + pthread_mutex_init(&Pool->pool_lock,NULL); Pool->Slabs = 0; if (ObjAlignment < 4) ObjAlignment = __alignof(double); Pool->AllocSize = INITIAL_SLAB_SIZE; @@ -392,6 +393,8 @@ getPoolNumber(Pool), NumBytes)); DO_IF_PNP(if (Pool->NumObjects == 0) ++PoolCounter); // Track # pools. + pthread_mutex_lock(&Pool->pool_lock); + if (NumBytes >= LARGE_SLAB_SIZE) goto LargeObject; @@ -415,6 +418,7 @@ // Update bump ptr. Pool->ObjFreeList = (FreedNodeHeader*)(BumpPtr+NumBytes); DO_IF_TRACE(fprintf(stderr, "%p\n", Result)); + pthread_mutex_unlock(&Pool->pool_lock); return Result; } @@ -431,6 +435,7 @@ LAH->Marker = ~0U; LAH->LinkIntoList(&Pool->LargeArrays); DO_IF_TRACE(fprintf(stderr, "%p [large]\n", LAH+1)); + pthread_mutex_unlock(&Pool->pool_lock); return LAH+1; } @@ -444,6 +449,8 @@ #endif DO_IF_POOLDESTROY_STATS(PrintPoolStats(Pool)); + pthread_mutex_destroy(&Pool->pool_lock); + // Free all allocated slabs. PoolSlab *PS = Pool->Slabs; while (PS) { @@ -476,6 +483,7 @@ unsigned DeclaredSize, unsigned ObjAlignment) { assert(Pool && "Null pool pointer passed into poolinit!\n"); memset(Pool, 0, sizeof(PoolTy)); + pthread_mutex_init(&Pool->pool_lock,NULL); Pool->AllocSize = INITIAL_SLAB_SIZE; if (ObjAlignment < 4) ObjAlignment = __alignof(double); @@ -516,6 +524,7 @@ // void pooldestroy(PoolTy *Pool) { assert(Pool && "Null pool pointer passed in to pooldestroy!\n"); + pthread_mutex_destroy(&Pool->pool_lock); #ifdef ENABLE_POOL_IDS unsigned PID; @@ -858,29 +867,74 @@ void *poolalloc(PoolTy *Pool, unsigned NumBytes) { DO_IF_FORCE_MALLOCFREE(return malloc(NumBytes)); - return poolalloc_internal(Pool, NumBytes); + pthread_mutex_lock(&Pool->pool_lock); + void* to_return = poolalloc_internal(Pool, NumBytes); + pthread_mutex_unlock(&Pool->pool_lock); + return to_return; } void *poolmemalign(PoolTy *Pool, unsigned Alignment, unsigned NumBytes) { //punt and use pool alloc. //I don't know if this is safe or breaks any assumptions in the runtime + pthread_mutex_lock(&Pool->pool_lock); intptr_t base = (intptr_t)poolalloc_internal(Pool, NumBytes + Alignment - 1); + pthread_mutex_unlock(&Pool->pool_lock); return (void*)((base + (Alignment - 1)) & ~((intptr_t)Alignment -1)); } void poolfree(PoolTy *Pool, void *Node) { DO_IF_FORCE_MALLOCFREE(free(Node); return); + pthread_mutex_lock(&Pool->pool_lock); poolfree_internal(Pool, Node); + pthread_mutex_unlock(&Pool->pool_lock); } void *poolrealloc(PoolTy *Pool, void *Node, unsigned NumBytes) { DO_IF_FORCE_MALLOCFREE(return realloc(Node, NumBytes)); - return poolrealloc_internal(Pool, Node, NumBytes); + pthread_mutex_lock(&Pool->pool_lock); + void* to_return = poolrealloc_internal(Pool, Node, NumBytes); + pthread_mutex_unlock(&Pool->pool_lock); + return to_return; } +#ifdef USE_DYNCALL +#include +#include +#include + +void* poolalloc_thread_start(void* arg_) +{ + void** arg = (void**)arg_; + DCCallVM* callVM = dcNewCallVM((size_t)arg[1]*sizeof(size_t)+108); + int i; + for(i=0; i<(size_t)arg[1]; i++) + dcArgPointer(callVM,arg[2+i]); + dcArgPointer(callVM,arg[2+i]); + void* to_return = dcCallPointer(callVM,arg[0]); + dcFree(callVM); + return to_return; +} +int poolalloc_pthread_create(pthread_t* thread, + const pthread_attr_t* attr, + void *(*start_routine)(void*), int num_pools, ...) +{ + void** arg_array = (void**)malloc(sizeof(void*)*(2+num_pools)); + arg_array[0] = (void*)start_routine; + arg_array[1] = (void*)num_pools; + va_list argpools; + va_start(argpools,num_pools); + int i; + for(i=0; i *Pool) { assert(Pool && "Null pool pointer passed in to pooldestroy!\n"); + pthread_mutex_destroy(&Pool->pool_lock); if (Pool->Slabs == 0) return; // no memory allocated from this pool. @@ -970,17 +1025,23 @@ unsigned long long poolalloc_pc(PoolTy *Pool, unsigned NumBytes) { + pthread_mutex_lock(&Pool->pool_lock); void *Result = poolalloc_internal(Pool, NumBytes); + pthread_mutex_unlock(&Pool->pool_lock); return (char*)Result-(char*)Pool->Slabs; } void poolfree_pc(PoolTy *Pool, unsigned long long Node) { + pthread_mutex_lock(&Pool->pool_lock); poolfree_internal(Pool, (char*)Pool->Slabs+Node); + pthread_mutex_unlock(&Pool->pool_lock); } unsigned long long poolrealloc_pc(PoolTy *Pool, unsigned long long Node, unsigned NumBytes) { + pthread_mutex_lock(&Pool->pool_lock); void *Result = poolrealloc_internal(Pool, (char*)Pool->Slabs+Node, NumBytes); + pthread_mutex_unlock(&Pool->pool_lock); return (char*)Result-(char*)Pool->Slabs; } @@ -998,18 +1059,26 @@ void* poolalloc_pca(PoolTy *Pool, unsigned NumBytes) { - return poolalloc_internal(Pool, NumBytes); + pthread_mutex_lock(&Pool->pool_lock); + void* to_return = poolalloc_internal(Pool, NumBytes); + pthread_mutex_unlock(&Pool->pool_lock); + return to_return; } void poolfree_pca(PoolTy *Pool, void* Node) { + pthread_mutex_lock(&Pool->pool_lock); poolfree_internal(Pool, Node); + pthread_mutex_unlock(&Pool->pool_lock); } void* poolrealloc_pca(PoolTy *Pool, void* Node, unsigned NumBytes) { - return poolrealloc_internal(Pool, Node, NumBytes); + pthread_mutex_lock(&Pool->pool_lock); + void* to_return = poolrealloc_internal(Pool, Node, NumBytes); + pthread_mutex_unlock(&Pool->pool_lock); + return to_return; } //===----------------------------------------------------------------------===// Modified: poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.h URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.h?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.h (original) +++ poolalloc/trunk/runtime/FL2Allocator/PoolAllocator.h Tue May 11 16:50:07 2010 @@ -23,6 +23,7 @@ #define POOLALLOCATOR_RUNTIME_H #include +#include template struct PoolSlab; @@ -174,6 +175,9 @@ // BytesAllocated - The total number of bytes ever allocated from this pool. // Together with NumObjects, allows us to calculate average object size. unsigned BytesAllocated; + + // Lock for the pool + pthread_mutex_t pool_lock; }; extern "C" { @@ -229,6 +233,13 @@ // Access tracing runtime library support. void poolaccesstraceinit(void); void poolaccesstrace(void *Ptr, void *PD); + + // Auxiliary functions for thread support +#ifdef USE_DYNCALL + int poolalloc_pthread_create(pthread_t* thread, + const pthread_attr_t* attr, + void *(*start_routine)(void*), int num_pools, ...); +#endif } #endif Modified: poolalloc/trunk/runtime/FreeListAllocator/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/FreeListAllocator/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/FreeListAllocator/Makefile (original) +++ poolalloc/trunk/runtime/FreeListAllocator/Makefile Tue May 11 16:50:07 2010 @@ -1,5 +1,14 @@ LEVEL = ../.. -SHARED_LIBRARY=1 LIBRARYNAME=poolalloc_fl_rt +# +# Build shared libraries on all platforms except Cygwin and MingW (which do +# not support them). +# +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) +SHARED_LIBRARY=1 +endif +endif + include $(LEVEL)/Makefile.common Modified: poolalloc/trunk/runtime/HeapFrag/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/HeapFrag/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/HeapFrag/Makefile (original) +++ poolalloc/trunk/runtime/HeapFrag/Makefile Tue May 11 16:50:07 2010 @@ -1,7 +1,17 @@ LEVEL = ../.. +LIBRARYNAME=heapfrag + #BYTECODE_LIBRARY=1 + +# +# Build shared libraries on all platforms except Cygwin and MingW (which do +# not support them). +# +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) #SHARED_LIBRARY=1 -LIBRARYNAME=heapfrag +endif +endif include $(LEVEL)/Makefile.common Modified: poolalloc/trunk/runtime/PoolAllocator/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/PoolAllocator/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/PoolAllocator/Makefile (original) +++ poolalloc/trunk/runtime/PoolAllocator/Makefile Tue May 11 16:50:07 2010 @@ -1,7 +1,16 @@ LEVEL = ../.. -SHARED_LIBRARY=1 LIBRARYNAME=poolalloc_rt_old +# +# Build shared libraries on all platforms except Cygwin and MingW (which do +# not support them). +# +ifneq ($(OS),Cygwin) +ifneq ($(OS),MingW) +SHARED_LIBRARY=1 +endif +endif + include $(LEVEL)/Makefile.common # Always build optimized and debug versions Modified: poolalloc/trunk/runtime/PreRT/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/runtime/PreRT/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/runtime/PreRT/Makefile (original) +++ poolalloc/trunk/runtime/PreRT/Makefile Tue May 11 16:50:07 2010 @@ -1,6 +1,6 @@ LEVEL = ../.. -BYTECODE_LIBRARY=1 LIBRARYNAME=pa_pre_rt +BYTECODE_LIBRARY=1 include $(LEVEL)/Makefile.common Modified: poolalloc/trunk/test/Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/test/Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/test/Makefile (original) +++ poolalloc/trunk/test/Makefile Tue May 11 16:50:07 2010 @@ -25,30 +25,35 @@ LARGE_PROBLEM_SIZE_DIRS := \ - MultiSource/Benchmarks/llubenchmark \ - MultiSource/Benchmarks/FreeBench \ - MultiSource/Benchmarks/Ptrdist \ - MultiSource/Benchmarks/MallocBench/cfrac \ - SingleSource/Benchmarks/McGill + #MultiSource/Benchmarks/llubenchmark \ + #MultiSource/Benchmarks/FreeBench \ + #MultiSource/Benchmarks/Ptrdist \ + #MultiSource/Benchmarks/MallocBench/cfrac \ + #SingleSource/Benchmarks/McGill + LARGE_PROBLEM_SIZE_DIRS := \ - $(addprefix $(LLVM_OBJ_ROOT)/projects/llvm-test/,$(LARGE_PROBLEM_SIZE_DIRS)) + $(addprefix $(LLVM_OBJ_ROOT)/projects/test-suite/,$(LARGE_PROBLEM_SIZE_DIRS)) + NORMAL_PROBLEM_SIZE_DIRS := \ - MultiSource/Benchmarks/SciMark2-C/scimark2 \ - External/Povray \ - External/FPGrowth \ - External/Namd \ - External/SPEC/CINT2000/164.gzip \ - External/SPEC/CINT2000/175.vpr \ - External/SPEC/CINT2000/176.gcc \ - External/SPEC/CINT2000/252.eon \ - External/SPEC/CINT2000/253.perlbmk \ - External/SPEC/CINT2000/254.gap \ - External/SPEC/CINT2000/300.twolf + MultiSource/Benchmarks/Olden/bh \ + #MultiSource/Benchmarks/Olden \ + #MultiSource/Benchmarks/SciMark2-C/scimark2 \ + #External/Povray \ + #External/FPGrowth \ + #External/Namd \ + #External/SPEC/CINT2000/164.gzip \ + #External/SPEC/CINT2000/175.vpr \ + #External/SPEC/CINT2000/176.gcc \ + #External/SPEC/CINT2000/252.eon \ + #External/SPEC/CINT2000/253.perlbmk \ + #External/SPEC/CINT2000/254.gap \ + #External/SPEC/CINT2000/300.twolf + NORMAL_PROBLEM_SIZE_DIRS := \ - $(addprefix $(LLVM_OBJ_ROOT)/projects/llvm-test/,$(NORMAL_PROBLEM_SIZE_DIRS)) + $(addprefix $(LLVM_OBJ_ROOT)/projects/test-suite/,$(NORMAL_PROBLEM_SIZE_DIRS)) # # Problems for Pointer Compression @@ -63,7 +68,7 @@ MultiSource/Benchmarks/MallocBench/cfrac PCLARGE_PROBLEM_SIZE_DIRS := \ - $(addprefix $(LLVM_OBJ_ROOT)/projects/llvm-test/,$(PCLARGE_PROBLEM_SIZE_DIRS)) + $(addprefix $(LLVM_OBJ_ROOT)/projects/test-suite/,$(PCLARGE_PROBLEM_SIZE_DIRS)) PCNORMAL_PROBLEM_SIZE_DIRS := \ MultiSource/Benchmarks/Olden/perimeter \ @@ -72,7 +77,7 @@ MultiSource/Benchmarks/Olden/voronoi PCNORMAL_PROBLEM_SIZE_DIRS := \ - $(addprefix $(LLVM_OBJ_ROOT)/projects/llvm-test/,$(PCNORMAL_PROBLEM_SIZE_DIRS)) + $(addprefix $(LLVM_OBJ_ROOT)/projects/test-suite/,$(PCNORMAL_PROBLEM_SIZE_DIRS)) ############################################################################## # Targets for gathering statistics for programs for papers @@ -200,64 +205,64 @@ # Targets for running tests and gathering statistics for arbitrary tests ############################################################################## -# test target - Descend into projects/llvm-test and run the TEST.poolalloc.Makefile +# test target - Descend into projects/test-suite and run the TEST.poolalloc.Makefile # tests... test:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=poolalloc \ $(LARGESIZE) $(STABLERUN) report report.html) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" pacompiletime:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=pacompiletime \ report report.html) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" ptrcomp:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=ptrcomp \ $(LARGESIZE) $(STABLERUN) report report.html) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" ptrcomptest:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=ptrcomp \ $(LARGESIZE) report) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" vtl:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=pavtl \ test report report.csv) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" perf:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=perf \ test report report.csv) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" optzn:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=optzn \ $(LARGESIZE) $(STABLERUN) test report report.csv) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" p4perf:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=p4perf \ test report report.csv) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" strace:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=strace \ test) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" cputrack:: - (cd $(LLVM_OBJ_ROOT)/projects/llvm-test/$(SUBDIR); \ + (cd $(LLVM_OBJ_ROOT)/projects/test-suite/$(SUBDIR); \ PROJECT_DIR=$(PROJ_OBJ_ROOT) $(MAKE) -j1 TEST=cputrack \ report report.csv) @printf "\a"; sleep 1; printf "\a"; sleep 1; printf "\a" Modified: poolalloc/trunk/test/TEST.poolalloc.Makefile URL: http://llvm.org/viewvc/llvm-project/poolalloc/trunk/test/TEST.poolalloc.Makefile?rev=103518&r1=103517&r2=103518&view=diff ============================================================================== --- poolalloc/trunk/test/TEST.poolalloc.Makefile (original) +++ poolalloc/trunk/test/TEST.poolalloc.Makefile Tue May 11 16:50:07 2010 @@ -19,21 +19,22 @@ CURDIR := $(shell cd .; pwd) PROGDIR := $(shell cd $(LLVM_SRC_ROOT)/projects/test-suite; pwd)/ RELDIR := $(subst $(PROGDIR),,$(CURDIR)) -PADIR := /home/andrewl/Research/llvm/projects/poolalloc +#PADIR := /home/andrewl/Research/llvm/projects/poolalloc +PADIR := $(LLVM_OBJ_ROOT)/projects/poolalloc # Bits of runtime to improve analysis -PA_PRE_RT := $(PADIR)/Release/lib/libpa_pre_rt.bca +PA_PRE_RT := $(PADIR)/$(CONFIGURATION)/lib/libpa_pre_rt.bca # Pool allocator pass shared object -PA_SO := $(PADIR)/Debug/lib/libpoolalloc$(SHLIBEXT) -DSA_SO := $(PADIR)/Debug/lib/libLLVMDataStructure$(SHLIBEXT) -ASSIST_SO := $(PADIR)/Debug/lib/libAssistDS$(SHLIBEXT) +PA_SO := $(PADIR)/$(CONFIGURATION)/lib/libpoolalloc$(SHLIBEXT) +DSA_SO := $(PADIR)/$(CONFIGURATION)/lib/libLLVMDataStructure$(SHLIBEXT) +ASSIST_SO := $(PADIR)/$(CONFIGURATION)/lib/libAssistDS$(SHLIBEXT) # Pool allocator runtime library -#PA_RT := $(PADIR)/Debug/lib/libpoolalloc_fl_rt.bc +#PA_RT := $(PADIR)/$(CONFIGURATION)/lib/libpoolalloc_fl_rt.bc #PA_RT_O := $(PROJECT_DIR)/lib/$(CONFIGURATION)/poolalloc_rt.o -PA_RT_O := $(PADIR)/Debug/lib/poolalloc_rt.o -#PA_RT_O := $(PROJECT_DIR)/lib/Release/poolalloc_fl_rt.o +PA_RT_O := $(PADIR)/$(CONFIGURATION)/lib/libpoolalloc_rt.a +#PA_RT_O := $(PROJECT_DIR)/lib/$(CONFIGURATION)/poolalloc_fl_rt.o # Command to run opt with the pool allocator pass loaded OPT_PA := $(LOPT) -load $(DSA_SO) -load $(PA_SO) From gohman at apple.com Tue May 11 16:59:14 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 21:59:14 -0000 Subject: [llvm-commits] [llvm] r103519 - /llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Message-ID: <20100511215914.3DB16312800A@llvm.org> Author: djg Date: Tue May 11 16:59:14 2010 New Revision: 103519 URL: http://llvm.org/viewvc/llvm-project?rev=103519&view=rev Log: Don't set kill flags on uses of CopyFromReg nodes. InstrEmitter doesn't create separate virtual registers for CopyFromReg values, so uses of them don't necessarily kill the value. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=103519&r1=103518&r2=103519&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Tue May 11 16:59:14 2010 @@ -297,15 +297,22 @@ } // If this value has only one use, that use is a kill. This is a - // conservative approximation. Tied operands are never killed, so we need - // to check that. And that means we need to determine the index of the - // operand. - unsigned Idx = MI->getNumOperands(); - while (Idx > 0 && - MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) - --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; - bool isKill = Op.hasOneUse() && !isTied && !IsDebug; + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug; + if (isKill) { + unsigned Idx = MI->getNumOperands(); + while (Idx > 0 && + MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, false/*isImp*/, isKill, From stoklund at 2pi.dk Tue May 11 18:24:45 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 23:24:45 -0000 Subject: [llvm-commits] [llvm] r103521 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll Message-ID: <20100511232445.5D27C312800A@llvm.org> Author: stoklund Date: Tue May 11 18:24:45 2010 New Revision: 103521 URL: http://llvm.org/viewvc/llvm-project?rev=103521&view=rev Log: Keep track of the last place a live virtreg was used. This allows us to add accurate kill markers, something the scavenger likes. Add some more tests from ARM that needed this. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll llvm/trunk/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll llvm/trunk/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103521&r1=103520&r2=103521&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 18:24:45 2010 @@ -56,9 +56,22 @@ // values are spilled. IndexedMap StackSlotForVirtReg; - // Virt2PhysMap - This map contains entries for each virtual register + // Everything we know about a live virtual register. + struct LiveReg { + MachineInstr *LastUse; // Last instr to use reg. + unsigned PhysReg; // Currently held here. + unsigned LastOpNum; // OpNum on LastUse. + + LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0) { + assert(p && "Don't create LiveRegs without a PhysReg"); + } + }; + + typedef DenseMap LiveRegMap; + + // LiveVirtRegs - This map contains entries for each virtual register // that is currently available in a physical register. - DenseMap Virt2PhysMap; + LiveRegMap LiveVirtRegs; // RegState - Track the state of a physical register. enum RegState { @@ -77,7 +90,7 @@ // A register state may also be a virtual register number, indication that // the physical register is currently allocated to a virtual register. In - // that case, Virt2PhysMap contains the inverse mapping. + // that case, LiveVirtRegs contains the inverse mapping. }; // PhysRegState - One of the RegState enums, or a virtreg. @@ -112,18 +125,20 @@ void AllocateBasicBlock(MachineBasicBlock &MBB); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); void killVirtReg(unsigned VirtReg); + void killVirtReg(LiveRegMap::iterator i); void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned VirtReg, bool isKill); void killPhysReg(unsigned PhysReg); void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, unsigned PhysReg, bool isKill); - void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - unsigned allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg); + LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, + unsigned PhysReg); + LiveRegMap::iterator allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned VirtReg); unsigned defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg); + unsigned OpNum, unsigned VirtReg); unsigned reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg); + unsigned OpNum, unsigned VirtReg); void reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned PhysReg); void spillAll(MachineBasicBlock &MBB, MachineInstr *MI); @@ -150,54 +165,78 @@ } /// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(LiveRegMap::iterator i) { + assert(i != LiveVirtRegs.end() && "Killing unmapped virtual register"); + unsigned VirtReg = i->first; + const LiveReg &LR = i->second; + assert(PhysRegState[LR.PhysReg] == VirtReg && "Broken RegState mapping"); + PhysRegState[LR.PhysReg] = regFree; + if (LR.LastUse) { + MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); + if (MO.isUse()) MO.setIsKill(); + else MO.setIsDead(); + DEBUG(dbgs() << " - last seen here: " << *LR.LastUse); + } + LiveVirtRegs.erase(i); +} + +/// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); DEBUG(dbgs() << " Killing %reg" << VirtReg << "\n"); - DenseMap::iterator i = Virt2PhysMap.find(VirtReg); - if (i == Virt2PhysMap.end()) return; - unsigned PhysReg = i->second; - assert(PhysRegState[PhysReg] == VirtReg && "Broken RegState mapping"); - PhysRegState[PhysReg] = regFree; - Virt2PhysMap.erase(i); + LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); + if (i != LiveVirtRegs.end()) + killVirtReg(i); } /// spillVirtReg - This method spills the value specified by VirtReg into the /// corresponding stack slot if needed. If isKill is set, the register is also /// killed. void RAFast::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, + MachineBasicBlock::iterator MI, unsigned VirtReg, bool isKill) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); - DenseMap::iterator i = Virt2PhysMap.find(VirtReg); - assert(i != Virt2PhysMap.end() && "Spilling unmapped virtual register"); - unsigned PhysReg = i->second; - assert(PhysRegState[PhysReg] == VirtReg && "Broken RegState mapping"); - - if (PhysRegDirty.test(PhysReg)) { - PhysRegDirty.reset(PhysReg); - DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) + LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); + assert(i != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + const LiveReg &LR = i->second; + assert(PhysRegState[LR.PhysReg] == VirtReg && "Broken RegState mapping"); + + // If this physreg is used by the instruction, we want to kill it on the + // instruction, not on the spill. + bool spillKill = isKill && LR.LastUse != MI; + + if (PhysRegDirty.test(LR.PhysReg)) { + PhysRegDirty.reset(LR.PhysReg); + DEBUG(dbgs() << " Spilling register " << TRI->getName(LR.PhysReg) << " containing %reg" << VirtReg); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FrameIndex << "\n"); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI); + TII->storeRegToStackSlot(MBB, MI, LR.PhysReg, spillKill, + FrameIndex, RC, TRI); ++NumStores; // Update statistics - } - if (isKill) { - PhysRegState[PhysReg] = regFree; - Virt2PhysMap.erase(i); + if (spillKill) + i->second.LastUse = 0; // Don't kill register again + else if (!isKill) { + MachineInstr *Spill = llvm::prior(MI); + i->second.LastUse = Spill; + i->second.LastOpNum = Spill->findRegisterUseOperandIdx(LR.PhysReg); + } } + + if (isKill) + killVirtReg(i); } /// spillAll - Spill all dirty virtregs without killing them. void RAFast::spillAll(MachineBasicBlock &MBB, MachineInstr *MI) { SmallVector Dirty; - for (DenseMap::iterator i = Virt2PhysMap.begin(), - e = Virt2PhysMap.end(); i != e; ++i) - if (PhysRegDirty.test(i->second)) + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) + if (PhysRegDirty.test(i->second.PhysReg)) Dirty.push_back(i->first); for (unsigned i = 0, e = Dirty.size(); i != e; ++i) spillVirtReg(MBB, MI, Dirty[i], false); @@ -276,16 +315,18 @@ /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. /// -void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { +RAFast::LiveRegMap::iterator +RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { DEBUG(dbgs() << " Assigning %reg" << VirtReg << " to " << TRI->getName(PhysReg) << "\n"); - Virt2PhysMap.insert(std::make_pair(VirtReg, PhysReg)); PhysRegState[PhysReg] = VirtReg; + return LiveVirtRegs.insert(std::make_pair(VirtReg, PhysReg)).first; } /// allocVirtReg - Allocate a physical register for VirtReg. -unsigned RAFast::allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg) { +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineBasicBlock &MBB, + MachineInstr *MI, + unsigned VirtReg) { const unsigned spillCost = 100; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -305,10 +346,8 @@ case regReserved: continue; case regFree: - if (!UsedInInstr.test(PhysReg)) { - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; - } + if (!UsedInInstr.test(PhysReg)) + return assignVirtToPhysReg(VirtReg, PhysReg); continue; default: // Grab the first spillable register we meet. @@ -387,8 +426,7 @@ } } } - assignVirtToPhysReg(VirtReg, BestReg); - return BestReg; + return assignVirtToPhysReg(VirtReg, BestReg); } // Nothing we can do. @@ -401,40 +439,44 @@ MI->print(Msg, TM); } report_fatal_error(Msg.str()); - return 0; + return LiveVirtRegs.end(); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. unsigned RAFast::defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg) { + unsigned OpNum, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - unsigned PhysReg = Virt2PhysMap.lookup(VirtReg); - if (!PhysReg) - PhysReg = allocVirtReg(MBB, MI, VirtReg); - UsedInInstr.set(PhysReg); - PhysRegDirty.set(PhysReg); - return PhysReg; + LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); + if (i == LiveVirtRegs.end()) + i = allocVirtReg(MBB, MI, VirtReg); + i->second.LastUse = MI; + i->second.LastOpNum = OpNum; + UsedInInstr.set(i->second.PhysReg); + PhysRegDirty.set(i->second.PhysReg); + return i->second.PhysReg; } /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. unsigned RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg) { + unsigned OpNum, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - unsigned PhysReg = Virt2PhysMap.lookup(VirtReg); - if (!PhysReg) { - PhysReg = allocVirtReg(MBB, MI, VirtReg); - PhysRegDirty.reset(PhysReg); + LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); + if (i == LiveVirtRegs.end()) { + i = allocVirtReg(MBB, MI, VirtReg); + PhysRegDirty.reset(i->second.PhysReg); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"); - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); + << TRI->getName(i->second.PhysReg) << "\n"); + TII->loadRegFromStackSlot(MBB, MI, i->second.PhysReg, FrameIndex, RC, TRI); ++NumLoads; } - UsedInInstr.set(PhysReg); - return PhysReg; + i->second.LastUse = MI; + i->second.LastOpNum = OpNum; + UsedInInstr.set(i->second.PhysReg); + return i->second.PhysReg; } /// reservePhysReg - Mark PhysReg as reserved. This is very similar to @@ -491,7 +533,7 @@ DEBUG(dbgs() << "\nBB#" << MBB.getNumber() << ", "<< MBB.getName() << "\n"); PhysRegState.assign(TRI->getNumRegs(), regDisabled); - assert(Virt2PhysMap.empty() && "Mapping not cleared form last block?"); + assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); PhysRegDirty.reset(); MachineBasicBlock::iterator MII = MBB.begin(); @@ -522,20 +564,21 @@ dbgs() << "=%reg" << PhysRegState[Reg]; if (PhysRegDirty.test(Reg)) dbgs() << "*"; - assert(Virt2PhysMap.lookup(PhysRegState[Reg]) == Reg && + assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && "Bad inverse map"); break; } } dbgs() << '\n'; - // Check that Virt2PhysMap is the inverse. - for (DenseMap::iterator i = Virt2PhysMap.begin(), - e = Virt2PhysMap.end(); i != e; ++i) { + // Check that LiveVirtRegs is the inverse. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { assert(TargetRegisterInfo::isVirtualRegister(i->first) && "Bad map key"); - assert(TargetRegisterInfo::isPhysicalRegister(i->second) && + assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) && "Bad map value"); - assert(PhysRegState[i->second] == i->first && "Bad inverse map"); + assert(PhysRegState[i->second.PhysReg] == i->first && + "Bad inverse map"); } }); @@ -546,8 +589,11 @@ if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - // This may be 0 if the register is currently spilled. Tough. - setPhysReg(MO, Virt2PhysMap.lookup(Reg)); + LiveRegMap::iterator i = LiveVirtRegs.find(Reg); + if (i != LiveVirtRegs.end()) + setPhysReg(MO, i->second.PhysReg); + else + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! } // Next instruction. continue; @@ -589,11 +635,11 @@ unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isUse()) { - setPhysReg(MO, reloadVirtReg(MBB, MI, Reg)); + setPhysReg(MO, reloadVirtReg(MBB, MI, i, Reg)); if (MO.isKill()) VirtKills.push_back(Reg); } else if (MO.isEarlyClobber()) { - unsigned PhysReg = defineVirtReg(MBB, MI, Reg); + unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg); setPhysReg(MO, PhysReg); PhysDefs.push_back(PhysReg); } @@ -640,7 +686,7 @@ } if (MO.isDead()) VirtKills.push_back(Reg); - setPhysReg(MO, defineVirtReg(MBB, MI, Reg)); + setPhysReg(MO, defineVirtReg(MBB, MI, i, Reg)); } // Spill all dirty virtregs before a call, in case of an exception. @@ -665,8 +711,8 @@ // Spill all physical registers holding virtual registers now. DEBUG(dbgs() << "Killing live registers at end of block.\n"); MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - while (!Virt2PhysMap.empty()) - spillVirtReg(MBB, MI, Virt2PhysMap.begin()->first, true); + while (!LiveVirtRegs.empty()) + spillVirtReg(MBB, MI, LiveVirtRegs.begin()->first, true); DEBUG(MBB.dump()); } Modified: llvm/trunk/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll?rev=103521&r1=103520&r2=103521&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll (original) +++ llvm/trunk/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll Tue May 11 18:24:45 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast ; PR1925 %struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 } Modified: llvm/trunk/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll?rev=103521&r1=103520&r2=103521&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll (original) +++ llvm/trunk/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll Tue May 11 18:24:45 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast ; PR1925 %"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" } Modified: llvm/trunk/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll?rev=103521&r1=103520&r2=103521&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll (original) +++ llvm/trunk/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll Tue May 11 18:24:45 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local +; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=fast ; PR4100 @.str = external constant [30 x i8] ; <[30 x i8]*> [#uses=1] From stoklund at 2pi.dk Tue May 11 18:24:47 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 23:24:47 -0000 Subject: [llvm-commits] [llvm] r103522 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100511232447.96F2C3128018@llvm.org> Author: stoklund Date: Tue May 11 18:24:47 2010 New Revision: 103522 URL: http://llvm.org/viewvc/llvm-project?rev=103522&view=rev Log: Store the Dirty bit in the LiveReg structure instead of a bit vector. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103522&r1=103521&r2=103522&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 18:24:47 2010 @@ -58,11 +58,13 @@ // Everything we know about a live virtual register. struct LiveReg { - MachineInstr *LastUse; // Last instr to use reg. - unsigned PhysReg; // Currently held here. - unsigned LastOpNum; // OpNum on LastUse. + MachineInstr *LastUse; // Last instr to use reg. + unsigned PhysReg; // Currently held here. + unsigned short LastOpNum; // OpNum on LastUse. + bool Dirty; // Register needs spill. - LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0) { + LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0), + Dirty(false) { assert(p && "Don't create LiveRegs without a PhysReg"); } }; @@ -100,11 +102,6 @@ // instruction, and so cannot be allocated. BitVector UsedInInstr; - // PhysRegDirty - A bit is set for each physreg that holds a dirty virtual - // register. Bits for physregs that are not mapped to a virtual register are - // invalid. - BitVector PhysRegDirty; - // ReservedRegs - vector of reserved physical registers. BitVector ReservedRegs; @@ -200,15 +197,15 @@ "Spilling a physical register is illegal!"); LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); assert(i != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - const LiveReg &LR = i->second; + LiveReg &LR = i->second; assert(PhysRegState[LR.PhysReg] == VirtReg && "Broken RegState mapping"); // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. bool spillKill = isKill && LR.LastUse != MI; - if (PhysRegDirty.test(LR.PhysReg)) { - PhysRegDirty.reset(LR.PhysReg); + if (LR.Dirty) { + LR.Dirty = false; DEBUG(dbgs() << " Spilling register " << TRI->getName(LR.PhysReg) << " containing %reg" << VirtReg); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); @@ -219,11 +216,11 @@ ++NumStores; // Update statistics if (spillKill) - i->second.LastUse = 0; // Don't kill register again + LR.LastUse = 0; // Don't kill register again else if (!isKill) { MachineInstr *Spill = llvm::prior(MI); - i->second.LastUse = Spill; - i->second.LastOpNum = Spill->findRegisterUseOperandIdx(LR.PhysReg); + LR.LastUse = Spill; + LR.LastOpNum = Spill->findRegisterUseOperandIdx(LR.PhysReg); } } @@ -236,7 +233,7 @@ SmallVector Dirty; for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) - if (PhysRegDirty.test(i->second.PhysReg)) + if (i->second.Dirty) Dirty.push_back(i->first); for (unsigned i = 0, e = Dirty.size(); i != e; ++i) spillVirtReg(MBB, MI, Dirty[i], false); @@ -351,10 +348,8 @@ continue; default: // Grab the first spillable register we meet. - if (!BestReg && !UsedInInstr.test(PhysReg)) { - BestReg = PhysReg; - BestCost = PhysRegDirty.test(PhysReg) ? spillCost : 1; - } + if (!BestReg && !UsedInInstr.test(PhysReg)) + BestReg = PhysReg, BestCost = spillCost; continue; } } @@ -388,7 +383,7 @@ Cost++; break; default: - Cost += PhysRegDirty.test(Alias) ? spillCost : 1; + Cost += spillCost; break; } } @@ -450,11 +445,12 @@ LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); if (i == LiveVirtRegs.end()) i = allocVirtReg(MBB, MI, VirtReg); - i->second.LastUse = MI; - i->second.LastOpNum = OpNum; - UsedInInstr.set(i->second.PhysReg); - PhysRegDirty.set(i->second.PhysReg); - return i->second.PhysReg; + LiveReg &LR = i->second; + LR.LastUse = MI; + LR.LastOpNum = OpNum; + LR.Dirty = true; + UsedInInstr.set(LR.PhysReg); + return LR.PhysReg; } /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. @@ -465,7 +461,6 @@ LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); if (i == LiveVirtRegs.end()) { i = allocVirtReg(MBB, MI, VirtReg); - PhysRegDirty.reset(i->second.PhysReg); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " @@ -473,10 +468,11 @@ TII->loadRegFromStackSlot(MBB, MI, i->second.PhysReg, FrameIndex, RC, TRI); ++NumLoads; } - i->second.LastUse = MI; - i->second.LastOpNum = OpNum; - UsedInInstr.set(i->second.PhysReg); - return i->second.PhysReg; + LiveReg &LR = i->second; + LR.LastUse = MI; + LR.LastOpNum = OpNum; + UsedInInstr.set(LR.PhysReg); + return LR.PhysReg; } /// reservePhysReg - Mark PhysReg as reserved. This is very similar to @@ -534,7 +530,6 @@ PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); - PhysRegDirty.reset(); MachineBasicBlock::iterator MII = MBB.begin(); @@ -562,7 +557,7 @@ break; default: dbgs() << "=%reg" << PhysRegState[Reg]; - if (PhysRegDirty.test(Reg)) + if (LiveVirtRegs[PhysRegState[Reg]].Dirty) dbgs() << "*"; assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg && "Bad inverse map"); @@ -727,7 +722,6 @@ TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); - PhysRegDirty.resize(TRI->getNumRegs()); UsedInInstr.resize(TRI->getNumRegs()); ReservedRegs = TRI->getReservedRegs(*MF); From jyasskin at google.com Tue May 11 18:25:16 2010 From: jyasskin at google.com (Jeffrey Yasskin) Date: Tue, 11 May 2010 23:25:16 -0000 Subject: [llvm-commits] [llvm] r103523 - /llvm/trunk/tools/bugpoint/Miscompilation.cpp Message-ID: <20100511232516.4B1D5312800A@llvm.org> Author: jyasskin Date: Tue May 11 18:25:16 2010 New Revision: 103523 URL: http://llvm.org/viewvc/llvm-project?rev=103523&view=rev Log: Fix PR6951 by fixing Module leaks in bugpoint. Modified: llvm/trunk/tools/bugpoint/Miscompilation.cpp Modified: llvm/trunk/tools/bugpoint/Miscompilation.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/bugpoint/Miscompilation.cpp?rev=103523&r1=103522&r2=103523&view=diff ============================================================================== --- llvm/trunk/tools/bugpoint/Miscompilation.cpp (original) +++ llvm/trunk/tools/bugpoint/Miscompilation.cpp Tue May 11 18:25:16 2010 @@ -126,7 +126,8 @@ // Ok, so now we know that the prefix passes work, try running the suffix // passes on the result of the prefix passes. // - Module *PrefixOutput = ParseInputFile(BitcodeResult, BD.getContext()); + OwningPtr PrefixOutput(ParseInputFile(BitcodeResult, + BD.getContext())); if (PrefixOutput == 0) { errs() << BD.getToolName() << ": Error reading bitcode file '" << BitcodeResult << "'!\n"; @@ -142,7 +143,7 @@ << "' passes compile correctly after the '" << getPassesString(Prefix) << "' passes: "; - Module *OriginalInput = BD.swapProgramIn(PrefixOutput); + OwningPtr OriginalInput(BD.swapProgramIn(PrefixOutput.take())); if (BD.runPasses(Suffix, BitcodeResult, false/*delete*/, true/*quiet*/)) { errs() << " Error running this sequence of passes" << " on the input program!\n"; @@ -157,13 +158,13 @@ return InternalError; if (Diff) { outs() << " nope.\n"; - delete OriginalInput; // We pruned down the original input... return KeepSuffix; } // Otherwise, we must not be running the bad pass anymore. outs() << " yup.\n"; // No miscompilation! - delete BD.swapProgramIn(OriginalInput); // Restore orig program & free test + // Restore orig program & free test. + delete BD.swapProgramIn(OriginalInput.take()); return NoFailure; } @@ -222,15 +223,14 @@ } delete M2; // We are done with this module. - Module *OldProgram = BD.swapProgramIn(M1); + OwningPtr OldProgram(BD.swapProgramIn(M1)); // Execute the program. If it does not match the expected output, we must // return true. bool Broken = BD.diffProgram("", "", false, &Error); if (!Error.empty()) { // Delete the linked module & restore the original - BD.swapProgramIn(OldProgram); - delete M1; + delete BD.swapProgramIn(OldProgram.take()); } return Broken; } From gkistanova at gmail.com Tue May 11 18:27:23 2010 From: gkistanova at gmail.com (Galina Kistanova) Date: Tue, 11 May 2010 16:27:23 -0700 Subject: [llvm-commits] Next patch for builbot to use scripted builds Message-ID: Hello everyone, Please review the next two patches to for buildbot scripted builder. 1. One adds new shell script to the llvm-gcc/extras directory for building llvm-gcc: * build-x-4-mingw32 - cross builds llvm and llvm-gcc for --build=x86_64-apple-darwin10 --host=x86_64-apple-darwin10 --target=i686-pc-mingw32 2. The other one adds a new buildbot builder ScriptedBuilder to work with the new build script, and updates configuration of the llvm-gcc-x86_64-darwin10-cross-mingw32 buildslave. Please review. Thanks Galina -------------- next part -------------- A non-text attachment was scrubbed... Name: patch02.diff Type: application/octet-stream Size: 4336 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100511/174b50b2/attachment.obj -------------- next part -------------- A non-text attachment was scrubbed... Name: patch03.diff Type: application/octet-stream Size: 9462 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100511/174b50b2/attachment-0001.obj From dag at cray.com Tue May 11 18:28:33 2010 From: dag at cray.com (David Greene) Date: Tue, 11 May 2010 18:28:33 -0500 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <49AE2548-9ACC-4C2E-BA68-1C0E80D5575F@apple.com> References: <20100510205106.4F283312800A@llvm.org> <24B717FA-C973-47DE-9818-44E709A83EDC@nondot.org> <49AE2548-9ACC-4C2E-BA68-1C0E80D5575F@apple.com> Message-ID: <201005111828.49367.dag@cray.com> On Tuesday 11 May 2010 00:55:05 Daniel Dunbar wrote: > > Daniel, is this a lit bug? > > This is a 'lit' unimplementism. I didn't implement a full Tcl parser, just > enough for the parts we use. Since I plan to kill off the DejaGNU side at > some point, I don't feel it is worth adding support for more Tcl-isms, but > I can if people like. Somebody had better update the documentation if this doesn't get implemented. And someone should make sure we're not creating files called &1. What is 'lit' exactly, anyway? A Tcl replacement? -Dave From daniel_dunbar at apple.com Tue May 11 18:51:03 2010 From: daniel_dunbar at apple.com (Daniel Dunbar) Date: Tue, 11 May 2010 16:51:03 -0700 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <201005111828.49367.dag@cray.com> References: <20100510205106.4F283312800A@llvm.org> <24B717FA-C973-47DE-9818-44E709A83EDC@nondot.org> <49AE2548-9ACC-4C2E-BA68-1C0E80D5575F@apple.com> <201005111828.49367.dag@cray.com> Message-ID: <0D2BA5A8-278C-41D0-A530-ACD8BFDCF591@apple.com> It is a new testing harness for LLVM. See: http://blog.llvm.org/2009/12/lit-it.html - Daniel On May 11, 2010, at 4:28 PM, David Greene wrote: > On Tuesday 11 May 2010 00:55:05 Daniel Dunbar wrote: >>> Daniel, is this a lit bug? >> >> This is a 'lit' unimplementism. I didn't implement a full Tcl parser, just >> enough for the parts we use. Since I plan to kill off the DejaGNU side at >> some point, I don't feel it is worth adding support for more Tcl-isms, but >> I can if people like. > > Somebody had better update the documentation if this doesn't get implemented. > > And someone should make sure we're not creating files called &1. > > What is 'lit' exactly, anyway? A Tcl replacement? > > -Dave > From daniel at zuster.org Tue May 11 18:53:05 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 23:53:05 -0000 Subject: [llvm-commits] [llvm] r103525 - /llvm/trunk/lib/MC/MachObjectWriter.cpp Message-ID: <20100511235305.2F389312800A@llvm.org> Author: ddunbar Date: Tue May 11 18:53:05 2010 New Revision: 103525 URL: http://llvm.org/viewvc/llvm-project?rev=103525&view=rev Log: MC/Mach-O: Fix a crash on invalid. Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=103525&r1=103524&r2=103525&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Tue May 11 18:53:05 2010 @@ -584,7 +584,7 @@ // Add the local offset, if needed. if (Base != &SD) Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base); - } else { + } else if (Symbol->isInSection()) { // The index is the section ordinal (1-based). Index = SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; @@ -592,6 +592,9 @@ if (IsPCRel) Value -= Address + (1 << Log2Size); + } else { + report_fatal_error("unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); } MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); From daniel at zuster.org Tue May 11 18:53:08 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 23:53:08 -0000 Subject: [llvm-commits] [llvm] r103526 - /llvm/trunk/lib/MC/MachObjectWriter.cpp Message-ID: <20100511235308.293103128026@llvm.org> Author: ddunbar Date: Tue May 11 18:53:07 2010 New Revision: 103526 URL: http://llvm.org/viewvc/llvm-project?rev=103526&view=rev Log: MC/Mach-O: As Kevin pointed out, 'Address' is really an offset -- rename to clarify. Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=103526&r1=103525&r2=103526&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Tue May 11 18:53:07 2010 @@ -478,7 +478,7 @@ unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); // See . - uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; int64_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -547,7 +547,7 @@ Type = RIT_X86_64_Unsigned; MachRelocationEntry MRE; - MRE.Word0 = Address; + MRE.Word0 = FixupOffset; MRE.Word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | @@ -591,7 +591,7 @@ Value += Layout.getSymbolAddress(&SD); if (IsPCRel) - Value -= Address + (1 << Log2Size); + Value -= FixupOffset + (1 << Log2Size); } else { report_fatal_error("unsupported relocation of undefined symbol '" + Symbol->getName() + "'"); @@ -662,7 +662,7 @@ // struct relocation_info (8 bytes) MachRelocationEntry MRE; - MRE.Word0 = Address; + MRE.Word0 = FixupOffset; MRE.Word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | @@ -676,7 +676,7 @@ const MCFragment *Fragment, const MCAsmFixup &Fixup, MCValue Target, uint64_t &FixedValue) { - uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); unsigned Type = RIT_Vanilla; @@ -721,10 +721,10 @@ } MachRelocationEntry MRE; - MRE.Word0 = ((Address << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | + MRE.Word0 = ((FixupOffset << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | RF_Scattered); MRE.Word1 = Value; Relocations[Fragment->getParent()].push_back(MRE); @@ -763,7 +763,7 @@ Target, FixedValue); // See . - uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset; + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; uint32_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -798,7 +798,7 @@ // struct relocation_info (8 bytes) MachRelocationEntry MRE; - MRE.Word0 = Address; + MRE.Word0 = FixupOffset; MRE.Word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | From daniel at zuster.org Tue May 11 18:53:11 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 23:53:11 -0000 Subject: [llvm-commits] [llvm] r103527 - in /llvm/trunk: lib/MC/MachObjectWriter.cpp test/MC/MachO/darwin-x86_64-reloc.s Message-ID: <20100511235311.539B83128034@llvm.org> Author: ddunbar Date: Tue May 11 18:53:11 2010 New Revision: 103527 URL: http://llvm.org/viewvc/llvm-project?rev=103527&view=rev Log: MC/Mach-O/x86_64: Fix PCrel adjustment for x86_64, which was using the fixup offset instead of the fixup address as intended. Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=103527&r1=103526&r2=103527&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Tue May 11 18:53:11 2010 @@ -479,6 +479,7 @@ // See . uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.Offset; + uint32_t FixupAddress = Layout.getFragmentAddress(Fragment) + Fixup.Offset; int64_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; @@ -591,7 +592,7 @@ Value += Layout.getSymbolAddress(&SD); if (IsPCRel) - Value -= FixupOffset + (1 << Log2Size); + Value -= FixupAddress + (1 << Log2Size); } else { report_fatal_error("unsupported relocation of undefined symbol '" + Symbol->getName() + "'"); Modified: llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s?rev=103527&r1=103526&r2=103527&view=diff ============================================================================== --- llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s (original) +++ llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Tue May 11 18:53:11 2010 @@ -1,7 +1,10 @@ -// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s +// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s // These examples are taken from . + .data + .long 0 + .text _foo: ret @@ -55,7 +58,13 @@ L2_1: _f3: addl L2_1 - L2_0, %eax - + + .data +L4: + .long 0 + .text + movl L4(%rip), %eax + // CHECK: ('cputype', 16777223) // CHECK: ('cpusubtype', 3) // CHECK: ('filetype', 1) @@ -69,23 +78,46 @@ // CHECK: ('size', 312) // CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('vm_addr', 0) -// CHECK: ('vm_size', 215) +// CHECK: ('vm_size', 229) // CHECK: ('file_offset', 448) -// CHECK: ('file_size', 215) +// CHECK: ('file_size', 229) // CHECK: ('maxprot', 7) // CHECK: ('initprot', 7) // CHECK: ('num_sections', 3) // CHECK: ('flags', 0) // CHECK: ('sections', [ // CHECK: # Section 0 -// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) -// CHECK: ('size', 191) +// CHECK: ('size', 16) // CHECK: ('offset', 448) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 664) -// CHECK: ('num_reloc', 27) +// CHECK: ('reloc_offset', 680) +// CHECK: ('num_reloc', 2) +// CHECK: ('flags', 0x0) +// CHECK: ('reserved1', 0) +// CHECK: ('reserved2', 0) +// CHECK: ('reserved3', 0) +// CHECK: ), +// CHECK: ('_relocations', [ +// CHECK: # Relocation 0 +// CHECK: (('word-0', 0x8), +// CHECK: ('word-1', 0x4d000000)), +// CHECK: # Relocation 1 +// CHECK: (('word-0', 0x4), +// CHECK: ('word-1', 0x4d000007)), +// CHECK: ]) +// CHECK: ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00') +// CHECK: # Section 1 +// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('address', 16) +// CHECK: ('size', 197) +// CHECK: ('offset', 464) +// CHECK: ('alignment', 0) +// CHECK: ('reloc_offset', 696) +// CHECK: ('num_reloc', 28) // CHECK: ('flags', 0x80000400) // CHECK: ('reserved1', 0) // CHECK: ('reserved2', 0) @@ -93,119 +125,99 @@ // CHECK: ), // CHECK: ('_relocations', [ // CHECK: # Relocation 0 +// CHECK: (('word-0', 0xc1), +// CHECK: ('word-1', 0x15000001)), +// CHECK: # Relocation 1 // CHECK: (('word-0', 0xa5), // CHECK: ('word-1', 0x5e000003)), -// CHECK: # Relocation 1 +// CHECK: # Relocation 2 // CHECK: (('word-0', 0xa5), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 2 +// CHECK: # Relocation 3 // CHECK: (('word-0', 0x9d), // CHECK: ('word-1', 0x5e000003)), -// CHECK: # Relocation 3 +// CHECK: # Relocation 4 // CHECK: (('word-0', 0x9d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 4 +// CHECK: # Relocation 5 // CHECK: (('word-0', 0x95), // CHECK: ('word-1', 0xe000003)), -// CHECK: # Relocation 5 +// CHECK: # Relocation 6 // CHECK: (('word-0', 0x8d), // CHECK: ('word-1', 0xe000003)), -// CHECK: # Relocation 6 +// CHECK: # Relocation 7 // CHECK: (('word-0', 0x79), // CHECK: ('word-1', 0x8d000003)), -// CHECK: # Relocation 7 +// CHECK: # Relocation 8 // CHECK: (('word-0', 0x71), // CHECK: ('word-1', 0x7d000003)), -// CHECK: # Relocation 8 +// CHECK: # Relocation 9 // CHECK: (('word-0', 0x69), // CHECK: ('word-1', 0x6d000003)), -// CHECK: # Relocation 9 +// CHECK: # Relocation 10 // CHECK: (('word-0', 0x63), // CHECK: ('word-1', 0x1d000003)), -// CHECK: # Relocation 10 +// CHECK: # Relocation 11 // CHECK: (('word-0', 0x5c), // CHECK: ('word-1', 0x1d000003)), -// CHECK: # Relocation 11 +// CHECK: # Relocation 12 // CHECK: (('word-0', 0x55), // CHECK: ('word-1', 0x5c000002)), -// CHECK: # Relocation 12 +// CHECK: # Relocation 13 // CHECK: (('word-0', 0x55), // CHECK: ('word-1', 0xc000000)), -// CHECK: # Relocation 13 +// CHECK: # Relocation 14 // CHECK: (('word-0', 0x4d), // CHECK: ('word-1', 0x5e000002)), -// CHECK: # Relocation 14 +// CHECK: # Relocation 15 // CHECK: (('word-0', 0x4d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 15 +// CHECK: # Relocation 16 // CHECK: (('word-0', 0x45), // CHECK: ('word-1', 0x5e000002)), -// CHECK: # Relocation 16 +// CHECK: # Relocation 17 // CHECK: (('word-0', 0x45), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 17 +// CHECK: # Relocation 18 // CHECK: (('word-0', 0x3d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 18 +// CHECK: # Relocation 19 // CHECK: (('word-0', 0x35), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 19 +// CHECK: # Relocation 20 // CHECK: (('word-0', 0x2d), // CHECK: ('word-1', 0x8d000000)), -// CHECK: # Relocation 20 +// CHECK: # Relocation 21 // CHECK: (('word-0', 0x26), // CHECK: ('word-1', 0x6d000000)), -// CHECK: # Relocation 21 +// CHECK: # Relocation 22 // CHECK: (('word-0', 0x20), // CHECK: ('word-1', 0x1d000000)), -// CHECK: # Relocation 22 +// CHECK: # Relocation 23 // CHECK: (('word-0', 0x1a), // CHECK: ('word-1', 0x1d000000)), -// CHECK: # Relocation 23 +// CHECK: # Relocation 24 // CHECK: (('word-0', 0x14), // CHECK: ('word-1', 0x4d000000)), -// CHECK: # Relocation 24 +// CHECK: # Relocation 25 // CHECK: (('word-0', 0xe), // CHECK: ('word-1', 0x3d000000)), -// CHECK: # Relocation 25 +// CHECK: # Relocation 26 // CHECK: (('word-0', 0x7), // CHECK: ('word-1', 0x2d000000)), -// CHECK: # Relocation 26 +// CHECK: # Relocation 27 // CHECK: (('word-0', 0x2), // CHECK: ('word-1', 0x2d000000)), // CHECK: ]) -// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00') -// CHECK: # Section 1 -// CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 191) -// CHECK: ('size', 8) -// CHECK: ('offset', 639) -// CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 880) -// CHECK: ('num_reloc', 2) -// CHECK: ('flags', 0x0) -// CHECK: ('reserved1', 0) -// CHECK: ('reserved2', 0) -// CHECK: ('reserved3', 0) -// CHECK: ), -// CHECK: ('_relocations', [ -// CHECK: # Relocation 0 -// CHECK: (('word-0', 0x4), -// CHECK: ('word-1', 0x4d000000)), -// CHECK: # Relocation 1 -// CHECK: (('word-0', 0x0), -// CHECK: ('word-1', 0x4d000007)), -// CHECK: ]) -// CHECK: ('_section_data', '\x04\x00\x00\x00\x04\x00\x00\x00') +// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x057\xff\xff\xff') // CHECK: # Section 2 // CHECK: (('section_name', '__debug_frame\x00\x00\x00') // CHECK: ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 199) +// CHECK: ('address', 213) // CHECK: ('size', 16) -// CHECK: ('offset', 647) +// CHECK: ('offset', 661) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 896) +// CHECK: ('reloc_offset', 920) // CHECK: ('num_reloc', 2) // CHECK: ('flags', 0x2000000) // CHECK: ('reserved1', 0) @@ -218,66 +230,66 @@ // CHECK: ('word-1', 0xe000006)), // CHECK: # Relocation 1 // CHECK: (('word-0', 0x0), -// CHECK: ('word-1', 0x6000001)), +// CHECK: ('word-1', 0x6000002)), // CHECK: ]) -// CHECK: ('_section_data', '\xad\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('_section_data', '\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ]) // CHECK: ), // CHECK: # Load Command 1 // CHECK: (('command', 2) // CHECK: ('size', 24) -// CHECK: ('symoff', 912) +// CHECK: ('symoff', 936) // CHECK: ('nsyms', 8) -// CHECK: ('stroff', 1040) +// CHECK: ('stroff', 1064) // CHECK: ('strsize', 48) // CHECK: ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00\x00') // CHECK: ('_symbols', [ // CHECK: # Symbol 0 // CHECK: (('n_strx', 18) // CHECK: ('n_type', 0xe) -// CHECK: ('n_sect', 1) +// CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 0) +// CHECK: ('n_value', 16) // CHECK: ('_string', '_foo') // CHECK: ), // CHECK: # Symbol 1 // CHECK: (('n_strx', 23) // CHECK: ('n_type', 0xe) -// CHECK: ('n_sect', 1) +// CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 1) +// CHECK: ('n_value', 17) // CHECK: ('_string', '_baz') // CHECK: ), // CHECK: # Symbol 2 // CHECK: (('n_strx', 28) // CHECK: ('n_type', 0xe) -// CHECK: ('n_sect', 1) +// CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 61) +// CHECK: ('n_value', 77) // CHECK: ('_string', '_bar') // CHECK: ), // CHECK: # Symbol 3 // CHECK: (('n_strx', 33) // CHECK: ('n_type', 0xe) -// CHECK: ('n_sect', 1) +// CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 129) +// CHECK: ('n_value', 145) // CHECK: ('_string', '_prev') // CHECK: ), // CHECK: # Symbol 4 // CHECK: (('n_strx', 39) // CHECK: ('n_type', 0xe) -// CHECK: ('n_sect', 1) +// CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 181) +// CHECK: ('n_value', 197) // CHECK: ('_string', '_f2') // CHECK: ), // CHECK: # Symbol 5 // CHECK: (('n_strx', 43) // CHECK: ('n_type', 0xe) -// CHECK: ('n_sect', 1) +// CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 184) +// CHECK: ('n_value', 200) // CHECK: ('_string', '_f3') // CHECK: ), // CHECK: # Symbol 6 From daniel at zuster.org Tue May 11 18:53:13 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 23:53:13 -0000 Subject: [llvm-commits] [llvm] r103528 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100511235314.04CB4312800A@llvm.org> Author: ddunbar Date: Tue May 11 18:53:13 2010 New Revision: 103528 URL: http://llvm.org/viewvc/llvm-project?rev=103528&view=rev Log: Make Clang happy. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103528&r1=103527&r2=103528&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 18:53:13 2010 @@ -584,9 +584,9 @@ if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - LiveRegMap::iterator i = LiveVirtRegs.find(Reg); - if (i != LiveVirtRegs.end()) - setPhysReg(MO, i->second.PhysReg); + LiveRegMap::iterator it = LiveVirtRegs.find(Reg); + if (it != LiveVirtRegs.end()) + setPhysReg(MO, it->second.PhysReg); else MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! } From gohman at apple.com Tue May 11 18:54:08 2010 From: gohman at apple.com (Dan Gohman) Date: Tue, 11 May 2010 23:54:08 -0000 Subject: [llvm-commits] [llvm] r103529 - in /llvm/trunk: include/llvm/CodeGen/FastISel.h lib/CodeGen/SelectionDAG/FastISel.cpp lib/Target/X86/X86FastISel.cpp utils/TableGen/FastISelEmitter.cpp Message-ID: <20100511235408.131FB312800A@llvm.org> Author: djg Date: Tue May 11 18:54:07 2010 New Revision: 103529 URL: http://llvm.org/viewvc/llvm-project?rev=103529&view=rev Log: Add initial kill flag support to FastISel. Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp llvm/trunk/lib/Target/X86/X86FastISel.cpp llvm/trunk/utils/TableGen/FastISelEmitter.cpp Modified: llvm/trunk/include/llvm/CodeGen/FastISel.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/FastISel.h?rev=103529&r1=103528&r2=103529&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/FastISel.h (original) +++ llvm/trunk/include/llvm/CodeGen/FastISel.h Tue May 11 18:54:07 2010 @@ -106,7 +106,7 @@ /// getRegForGEPIndex - This is a wrapper around getRegForValue that also /// takes care of truncating or sign-extending the given getelementptr /// index value. - unsigned getRegForGEPIndex(const Value *V); + std::pair getRegForGEPIndex(const Value *V); virtual ~FastISel(); @@ -142,7 +142,8 @@ /// virtual unsigned FastEmit_r(MVT VT, MVT RetVT, - unsigned Opcode, unsigned Op0); + unsigned Opcode, + unsigned Op0, bool Op0IsKill); /// FastEmit_rr - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -151,7 +152,8 @@ virtual unsigned FastEmit_rr(MVT VT, MVT RetVT, unsigned Opcode, - unsigned Op0, unsigned Op1); + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); /// FastEmit_ri - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -160,7 +162,8 @@ virtual unsigned FastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, - unsigned Op0, uint64_t Imm); + unsigned Op0, bool Op0IsKill, + uint64_t Imm); /// FastEmit_rf - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -169,7 +172,8 @@ virtual unsigned FastEmit_rf(MVT VT, MVT RetVT, unsigned Opcode, - unsigned Op0, const ConstantFP *FPImm); + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); /// FastEmit_rri - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -178,7 +182,9 @@ virtual unsigned FastEmit_rri(MVT VT, MVT RetVT, unsigned Opcode, - unsigned Op0, unsigned Op1, uint64_t Imm); + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); /// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries /// to emit an instruction with an immediate operand using FastEmit_ri. @@ -186,8 +192,8 @@ /// FastEmit_rr instead. unsigned FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, uint64_t Imm, - MVT ImmType); + unsigned Op0, bool Op0IsKill, + uint64_t Imm, MVT ImmType); /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries /// to emit an instruction with an immediate operand using FastEmit_rf. @@ -195,8 +201,8 @@ /// FastEmit_rr instead. unsigned FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, const ConstantFP *FPImm, - MVT ImmType); + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm, MVT ImmType); /// FastEmit_i - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and @@ -225,35 +231,40 @@ /// unsigned FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0); + unsigned Op0, bool Op0IsKill); /// FastEmitInst_rr - Emit a MachineInstr with two register operands /// and a result register in the given register class. /// unsigned FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1); + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); /// FastEmitInst_ri - Emit a MachineInstr with two register operands /// and a result register in the given register class. /// unsigned FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, uint64_t Imm); + unsigned Op0, bool Op0IsKill, + uint64_t Imm); /// FastEmitInst_rf - Emit a MachineInstr with two register operands /// and a result register in the given register class. /// unsigned FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, const ConstantFP *FPImm); + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); /// FastEmitInst_rri - Emit a MachineInstr with two register operands, /// an immediate, and a result register in the given register class. /// unsigned FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1, uint64_t Imm); + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); /// FastEmitInst_i - Emit a MachineInstr with a single immediate /// operand, and a result register in the given register class. @@ -264,12 +275,13 @@ /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg /// from a specified index of a superregister to a specified type. unsigned FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, uint32_t Idx); + unsigned Op0, bool Op0IsKill, + uint32_t Idx); /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. unsigned FastEmitZExtFromI1(MVT VT, - unsigned Op); + unsigned Op0, bool Op0IsKill); /// FastEmitBranch - Emit an unconditional branch to the given block, /// unless it is the immediate (fall-through) successor, and update @@ -317,6 +329,9 @@ /// called when the value isn't already available in a register and must /// be materialized with new instructions. unsigned materializeRegForValue(const Value *V, MVT VT); + + /// hasTrivialKill - Test whether the given value has exactly one use. + bool hasTrivialKill(const Value *V) const; }; } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=103529&r1=103528&r2=103529&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Tue May 11 18:54:07 2010 @@ -56,6 +56,12 @@ #include "FunctionLoweringInfo.h" using namespace llvm; +bool FastISel::hasTrivialKill(const Value *V) const { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast(V); + return I && I->hasOneUse(); +} + unsigned FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. @@ -134,7 +140,8 @@ unsigned IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) - Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg); + Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, + IntegerReg, /*Kill=*/false); } } } else if (const Operator *Op = dyn_cast(V)) { @@ -190,20 +197,28 @@ return AssignedReg; } -unsigned FastISel::getRegForGEPIndex(const Value *Idx) { +std::pair FastISel::getRegForGEPIndex(const Value *Idx) { unsigned IdxN = getRegForValue(Idx); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. - return 0; + return std::pair(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); // If the index is smaller or larger than intptr_t, truncate or extend it. MVT PtrVT = TLI.getPointerTy(); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); - if (IdxVT.bitsLT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN); - else if (IdxVT.bitsGT(PtrVT)) - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN); - return IdxN; + if (IdxVT.bitsLT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + else if (IdxVT.bitsGT(PtrVT)) { + IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, + IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair(IdxN, IdxNIsKill); } /// SelectBinaryOp - Select and emit code for a binary operator instruction, @@ -235,10 +250,13 @@ // Unhandled operand. Halt "fast" selection and bail. return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, CI->getZExtValue()); + ISDOpcode, Op0, Op0IsKill, + CI->getZExtValue()); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); @@ -249,7 +267,7 @@ // Check if the second operand is a constant float. if (ConstantFP *CF = dyn_cast(I->getOperand(1))) { unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, CF); + ISDOpcode, Op0, Op0IsKill, CF); if (ResultReg != 0) { // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, ResultReg); @@ -262,9 +280,13 @@ // Unhandled operand. Halt "fast" selection and bail. return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + // Now we have both operands in registers. Emit the instruction. unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op1); + ISDOpcode, + Op0, Op0IsKill, + Op1, Op1IsKill); if (ResultReg == 0) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. @@ -281,6 +303,8 @@ // Unhandled operand. Halt "fast" selection and bail. return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + const Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -293,10 +317,11 @@ uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); // FIXME: This can be optimized by combining the add with a // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + NIsKill = true; } Ty = StTy->getElementType(Field); } else { @@ -307,27 +332,31 @@ if (CI->getZExtValue() == 0) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT); + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + NIsKill = true; continue; } // N = N + Idx * ElementSize; uint64_t ElementSize = TD.getTypeAllocSize(Ty); - unsigned IdxN = getRegForGEPIndex(Idx); + std::pair Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT); + IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); if (IdxN == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + IdxNIsKill = true; } - N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN); + N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -447,13 +476,15 @@ assert(InsertedCopy && "Can't copy address registers!"); InsertedCopy = InsertedCopy; + bool ResultRegIsKill = hasTrivialKill(I); + // Cast the register to the type of the selector. if (SrcVT.bitsGT(MVT::i32)) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg); + ResultReg, ResultRegIsKill); else if (SrcVT.bitsLT(MVT::i32)) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg); + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); if (ResultReg == 0) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -501,12 +532,15 @@ // Unhandled operand. Halt "fast" selection and bail. return false; + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + // If the operand is i1, arrange for the high bits in the register to be zero. if (SrcVT == MVT::i1) { SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT); - InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg); + InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill); if (!InputReg) return false; + InputRegIsKill = true; } // If the result is i1, truncate to the target's type for i1 first. if (DstVT == MVT::i1) @@ -515,7 +549,7 @@ unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, - InputReg); + InputReg, InputRegIsKill); if (!ResultReg) return false; @@ -547,6 +581,8 @@ if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; + + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; @@ -564,7 +600,7 @@ // If the reg-reg copy failed, select a BIT_CONVERT opcode. if (!ResultReg) ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BIT_CONVERT, Op0); + ISD::BIT_CONVERT, Op0, Op0IsKill); if (!ResultReg) return false; @@ -620,10 +656,12 @@ unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); if (OpReg == 0) return false; + bool OpRegIsKill = hasTrivialKill(I); + // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(I->getType()); unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), - ISD::FNEG, OpReg); + ISD::FNEG, OpReg, OpRegIsKill); if (ResultReg != 0) { UpdateValueMap(I, ResultReg); return true; @@ -637,18 +675,19 @@ return false; unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::BIT_CONVERT, OpReg); + ISD::BIT_CONVERT, OpReg, OpRegIsKill); if (IntReg == 0) return false; - unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg, + unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, + IntReg, /*Kill=*/true, UINT64_C(1) << (VT.getSizeInBits()-1), IntVT.getSimpleVT()); if (IntResultReg == 0) return false; ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BIT_CONVERT, IntResultReg); + ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true); if (ResultReg == 0) return false; @@ -805,13 +844,15 @@ } unsigned FastISel::FastEmit_r(MVT, MVT, - unsigned, unsigned /*Op0*/) { + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/) { return 0; } unsigned FastISel::FastEmit_rr(MVT, MVT, - unsigned, unsigned /*Op0*/, - unsigned /*Op0*/) { + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/) { return 0; } @@ -825,20 +866,23 @@ } unsigned FastISel::FastEmit_ri(MVT, MVT, - unsigned, unsigned /*Op0*/, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, uint64_t /*Imm*/) { return 0; } unsigned FastISel::FastEmit_rf(MVT, MVT, - unsigned, unsigned /*Op0*/, + unsigned, + unsigned /*Op0*/, bool /*Op0IsKill*/, const ConstantFP * /*FPImm*/) { return 0; } unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned, - unsigned /*Op0*/, unsigned /*Op1*/, + unsigned /*Op0*/, bool /*Op0IsKill*/, + unsigned /*Op1*/, bool /*Op1IsKill*/, uint64_t /*Imm*/) { return 0; } @@ -848,16 +892,18 @@ /// If that fails, it materializes the immediate into a register and try /// FastEmit_rr instead. unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, uint64_t Imm, - MVT ImmType) { + unsigned Op0, bool Op0IsKill, + uint64_t Imm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm); + unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); if (MaterialReg == 0) return 0; - return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); } /// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries @@ -865,10 +911,10 @@ /// FastEmit_rf. If that fails, it materializes the immediate into a register /// and try FastEmit_rr instead. unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, const ConstantFP *FPImm, - MVT ImmType) { + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm, MVT ImmType) { // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm); + unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); if (ResultReg != 0) return ResultReg; @@ -898,11 +944,13 @@ if (IntegerReg == 0) return 0; MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg); + ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); if (MaterialReg == 0) return 0; } - return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg); + return FastEmit_rr(VT, VT, Opcode, + Op0, Op0IsKill, + MaterialReg, /*Kill=*/true); } unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { @@ -920,14 +968,14 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0) { + unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0); + BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0); + BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) @@ -939,14 +987,19 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1) { + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) @@ -957,14 +1010,19 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, uint64_t Imm) { + unsigned Op0, bool Op0IsKill, + uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); else { - BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) @@ -975,14 +1033,19 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, const ConstantFP *FPImm) { + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); else { - BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addFPImm(FPImm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) @@ -993,14 +1056,22 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, unsigned Op1, uint64_t Imm) { + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); else { - BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addImm(Imm); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) @@ -1028,16 +1099,21 @@ } unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, uint32_t Idx) { + unsigned Op0, bool Op0IsKill, + uint32_t Idx) { const TargetRegisterClass* RC = MRI.getRegClass(Op0); unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx); + BuildMI(MBB, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Idx); else { - BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx); + BuildMI(MBB, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Idx); bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, II.ImplicitDefs[0], RC, RC, DL); if (!InsertedCopy) @@ -1048,8 +1124,8 @@ /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op /// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) { - return FastEmit_ri(VT, VT, ISD::AND, Op, 1); +unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=103529&r1=103528&r2=103529&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original) +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Tue May 11 18:54:07 2010 @@ -324,7 +324,8 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); + unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, + Src, /*TODO: Kill=*/false); if (RR != 0) { ResultReg = RR; @@ -416,7 +417,7 @@ (S == 1 || S == 2 || S == 4 || S == 8)) { // Scaled-index addressing. Scale = S; - IndexReg = getRegForGEPIndex(Op); + IndexReg = getRegForGEPIndex(Op).first; if (IndexReg == 0) return false; } else @@ -802,7 +803,7 @@ unsigned ResultReg = getRegForValue(I->getOperand(0)); if (ResultReg == 0) return false; // Set the high bits to zero. - ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); + ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); if (ResultReg == 0) return false; UpdateValueMap(I, ResultReg); return true; @@ -1133,7 +1134,8 @@ // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, - CopyReg, X86::SUBREG_8BIT); + CopyReg, /*Kill=*/true, + X86::SUBREG_8BIT); if (!ResultReg) return false; @@ -1436,7 +1438,7 @@ } case CCValAssign::BCvt: { unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), - ISD::BIT_CONVERT, Arg); + ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; ArgVT = VA.getLocVT(); Modified: llvm/trunk/utils/TableGen/FastISelEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/FastISelEmitter.cpp?rev=103529&r1=103528&r2=103529&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/FastISelEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/FastISelEmitter.cpp Tue May 11 18:54:07 2010 @@ -123,7 +123,7 @@ void PrintParameters(raw_ostream &OS) const { for (unsigned i = 0, e = Operands.size(); i != e; ++i) { if (Operands[i] == "r") { - OS << "unsigned Op" << i; + OS << "unsigned Op" << i << ", bool Op" << i << "IsKill"; } else if (Operands[i] == "i") { OS << "uint64_t imm" << i; } else if (Operands[i] == "f") { @@ -149,7 +149,7 @@ if (PrintedArg) OS << ", "; if (Operands[i] == "r") { - OS << "Op" << i; + OS << "Op" << i << ", Op" << i << "IsKill"; PrintedArg = true; } else if (Operands[i] == "i") { OS << "imm" << i; @@ -167,7 +167,7 @@ void PrintArguments(raw_ostream &OS) const { for (unsigned i = 0, e = Operands.size(); i != e; ++i) { if (Operands[i] == "r") { - OS << "Op" << i; + OS << "Op" << i << ", Op" << i << "IsKill"; } else if (Operands[i] == "i") { OS << "imm" << i; } else if (Operands[i] == "f") { @@ -447,7 +447,7 @@ OS << ");\n"; } else { OS << "extractsubreg(" << getName(RetVT); - OS << ", Op0, "; + OS << ", Op0, Op0IsKill, "; OS << (unsigned)Memo.SubRegNo; OS << ");\n"; } @@ -541,7 +541,7 @@ Operands.PrintArguments(OS, *Memo.PhysRegs); OS << ");\n"; } else { - OS << "extractsubreg(RetVT, Op0, "; + OS << "extractsubreg(RetVT, Op0, Op0IsKill, "; OS << (unsigned)Memo.SubRegNo; OS << ");\n"; } From stoklund at 2pi.dk Tue May 11 19:11:38 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 17:11:38 -0700 Subject: [llvm-commits] [llvm] r103528 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp In-Reply-To: <20100511235314.04CB4312800A@llvm.org> References: <20100511235314.04CB4312800A@llvm.org> Message-ID: <98CB2790-BDE1-4FA1-A34E-2F059394BB4F@2pi.dk> On May 11, 2010, at 4:53 PM, Daniel Dunbar wrote: > Author: ddunbar > Date: Tue May 11 18:53:13 2010 > New Revision: 103528 > > URL: http://llvm.org/viewvc/llvm-project?rev=103528&view=rev > Log: > Make Clang happy. Thanks, Daniel. That was clearly bad style, but was it also invalid code? Both Clang and MSVC rejected it. > > Modified: > llvm/trunk/lib/CodeGen/RegAllocFast.cpp > > Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103528&r1=103527&r2=103528&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) > +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 18:53:13 2010 > @@ -584,9 +584,9 @@ > if (!MO.isReg()) continue; > unsigned Reg = MO.getReg(); > if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; > - LiveRegMap::iterator i = LiveVirtRegs.find(Reg); > - if (i != LiveVirtRegs.end()) > - setPhysReg(MO, i->second.PhysReg); > + LiveRegMap::iterator it = LiveVirtRegs.find(Reg); > + if (it != LiveVirtRegs.end()) > + setPhysReg(MO, it->second.PhysReg); > else > MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From stoklund at 2pi.dk Tue May 11 19:11:19 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 12 May 2010 00:11:19 -0000 Subject: [llvm-commits] [llvm] r103530 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100512001119.94832312800A@llvm.org> Author: stoklund Date: Tue May 11 19:11:19 2010 New Revision: 103530 URL: http://llvm.org/viewvc/llvm-project?rev=103530&view=rev Log: Avoid scoping issues, fix buildbots Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103530&r1=103529&r2=103530&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 19:11:19 2010 @@ -182,9 +182,9 @@ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); DEBUG(dbgs() << " Killing %reg" << VirtReg << "\n"); - LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); - if (i != LiveVirtRegs.end()) - killVirtReg(i); + LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); + if (lri != LiveVirtRegs.end()) + killVirtReg(lri); } /// spillVirtReg - This method spills the value specified by VirtReg into the @@ -195,9 +195,9 @@ unsigned VirtReg, bool isKill) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); - LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); - assert(i != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - LiveReg &LR = i->second; + LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); + assert(lri != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + LiveReg &LR = lri->second; assert(PhysRegState[LR.PhysReg] == VirtReg && "Broken RegState mapping"); // If this physreg is used by the instruction, we want to kill it on the @@ -225,7 +225,7 @@ } if (isKill) - killVirtReg(i); + killVirtReg(lri); } /// spillAll - Spill all dirty virtregs without killing them. @@ -442,10 +442,10 @@ unsigned OpNum, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); - if (i == LiveVirtRegs.end()) - i = allocVirtReg(MBB, MI, VirtReg); - LiveReg &LR = i->second; + LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); + if (lri == LiveVirtRegs.end()) + lri = allocVirtReg(MBB, MI, VirtReg); + LiveReg &LR = lri->second; LR.LastUse = MI; LR.LastOpNum = OpNum; LR.Dirty = true; @@ -458,17 +458,18 @@ unsigned OpNum, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - LiveRegMap::iterator i = LiveVirtRegs.find(VirtReg); - if (i == LiveVirtRegs.end()) { - i = allocVirtReg(MBB, MI, VirtReg); + LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); + if (lri == LiveVirtRegs.end()) { + lri = allocVirtReg(MBB, MI, VirtReg); const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(i->second.PhysReg) << "\n"); - TII->loadRegFromStackSlot(MBB, MI, i->second.PhysReg, FrameIndex, RC, TRI); + << TRI->getName(lri->second.PhysReg) << "\n"); + TII->loadRegFromStackSlot(MBB, MI, lri->second.PhysReg, FrameIndex, RC, + TRI); ++NumLoads; } - LiveReg &LR = i->second; + LiveReg &LR = lri->second; LR.LastUse = MI; LR.LastOpNum = OpNum; UsedInInstr.set(LR.PhysReg); @@ -584,9 +585,9 @@ if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - LiveRegMap::iterator it = LiveVirtRegs.find(Reg); - if (it != LiveVirtRegs.end()) - setPhysReg(MO, it->second.PhysReg); + LiveRegMap::iterator lri = LiveVirtRegs.find(Reg); + if (lri != LiveVirtRegs.end()) + setPhysReg(MO, lri->second.PhysReg); else MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! } From stoklund at 2pi.dk Tue May 11 19:11:24 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 12 May 2010 00:11:24 -0000 Subject: [llvm-commits] [llvm] r103531 - in /llvm/trunk/test/CodeGen: Generic/ PowerPC/ Thumb/ X86/ Message-ID: <20100512001124.D6EE73128018@llvm.org> Author: stoklund Date: Tue May 11 19:11:24 2010 New Revision: 103531 URL: http://llvm.org/viewvc/llvm-project?rev=103531&view=rev Log: Enable a bunch more -regalloc=fast tests Modified: llvm/trunk/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll llvm/trunk/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll llvm/trunk/test/CodeGen/PowerPC/cr_spilling.ll llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll llvm/trunk/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll llvm/trunk/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll llvm/trunk/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll llvm/trunk/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll llvm/trunk/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll llvm/trunk/test/CodeGen/X86/2009-04-14-IllegalRegs.ll Modified: llvm/trunk/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll (original) +++ llvm/trunk/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -regalloc=local +; RUN: llc < %s -regalloc=fast %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 } @search = external global %struct.CHESS_POSITION ; <%struct.CHESS_POSITION*> [#uses=2] Modified: llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Tue May 11 19:11:24 2010 @@ -1,9 +1,15 @@ -; RUN: llc < %s | grep {subfc r3,r5,r4} -; RUN: llc < %s | grep {subfze r4,r6} -; RUN: llc < %s -regalloc=local | grep {subfc r6,r5,r4} -; RUN: llc < %s -regalloc=local | grep {subfze r3,r3} +; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -regalloc=local | FileCheck -check-prefix=LOCAL %s +; RUN: llc < %s -regalloc=fast | FileCheck -check-prefix=FAST %s ; The first argument of subfc must not be the same as any other register. +; CHECK: subfc r3,r5,r4 +; CHECK: subfze r4,r6 +; LOCAL: subfc r6,r5,r4 +; LOCAL: subfze r3,r3 +; FAST: subfc r9,r8,r7 +; FAST: subfze r10,r6 + ; PR1357 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" Modified: llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic +; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic %struct.NSError = type opaque %struct.NSManagedObjectContext = type opaque Modified: llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=local -relocation-model=pic +; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic %struct.NSError = type opaque %struct.NSManagedObjectContext = type opaque Modified: llvm/trunk/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=local +; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast define i32 @bork(i64 %foo, i64 %bar) { entry: Modified: llvm/trunk/test/CodeGen/PowerPC/cr_spilling.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/cr_spilling.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/cr_spilling.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/cr_spilling.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=ppc32 -regalloc=local -O0 -relocation-model=pic -o - +; RUN: llc < %s -march=ppc32 -regalloc=fast -O0 -relocation-model=pic -o - ; PR1638 @.str242 = external constant [3 x i8] ; <[3 x i8]*> [#uses=1] Modified: llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll (original) +++ llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -regalloc=local -relocation-model=pic | FileCheck %s +; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s target triple = "thumbv6-apple-darwin10" @@ -6,10 +7,10 @@ define arm_apcscc void @foo() nounwind { entry: -; CHECK: str r0, [sp] +; CHECK: str r0, [sp %0 = call arm_apcscc i32 (...)* @bar() nounwind ; [#uses=1] ; CHECK: blx _bar -; CHECK: ldr r1, [sp] +; CHECK: ldr r1, [sp store i32 %0, i32* @fred, align 4 br label %return Modified: llvm/trunk/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=local +; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) { entry: Modified: llvm/trunk/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -regalloc=local -march=x86 -mattr=+mmx | grep esi +; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi ; PR2082 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of ; registers. Modified: llvm/trunk/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll Tue May 11 19:11:24 2010 @@ -1,5 +1,6 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin ; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=local +; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=fast ; PR5534 %struct.CGPoint = type { double, double } Modified: llvm/trunk/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=local +; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*] ; <[5 x i32 (...)*]*> [#uses=1] Modified: llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll Tue May 11 19:11:24 2010 @@ -1,18 +1,19 @@ -; RUN: llc < %s -march=x86 | not grep "movl %eax, %eax" -; RUN: llc < %s -march=x86 | not grep "movl %edx, %edx" -; RUN: llc < %s -march=x86 | not grep "movl (%eax), %eax" -; RUN: llc < %s -march=x86 | not grep "movl (%edx), %edx" -; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %eax, %eax" -; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl %edx, %edx" -; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%eax), %eax" -; RUN: llc < %s -march=x86 -regalloc=local | not grep "movl (%edx), %edx" +; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86 -regalloc=local | FileCheck %s +; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s ; %0 must not be put in EAX or EDX. ; In the first asm, $0 and $2 must not be put in EAX. +; CHECK: InlineAsm Start +; CHECK-NOT: movl %eax, %eax +; CHECK-NOT: movl (%eax), %eax +; CHECK: InlineAsm End ; In the second asm, $0 and $2 must not be put in EDX. -; This is kind of hard to test thoroughly, but the things above should continue -; to pass, I think. -; ModuleID = '' +; CHECK: InlineAsm Start +; CHECK-NOT: movl %edx, %edx +; CHECK-NOT: movl (%edx), %edx +; CHECK: InlineAsm End + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" @x = common global i32 0 ; [#uses=1] Modified: llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Tue May 11 19:11:24 2010 @@ -1,5 +1,7 @@ ; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)" ; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)" +; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%ecx %ebx %edi 8(%ebp) %eax (%esi)" + ; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th ; operand. There are many combinations that work; this is what llc puts out now. Modified: llvm/trunk/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll (original) +++ llvm/trunk/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=local -disable-fp-elim +; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=fast -disable-fp-elim ; rdar://6538384 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } Modified: llvm/trunk/test/CodeGen/X86/2009-04-14-IllegalRegs.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-04-14-IllegalRegs.ll?rev=103531&r1=103530&r2=103531&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2009-04-14-IllegalRegs.ll (original) +++ llvm/trunk/test/CodeGen/X86/2009-04-14-IllegalRegs.ll Tue May 11 19:11:24 2010 @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=local | not grep sil +; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | not grep sil ; rdar://6787136 %struct.X = type { i8, [32 x i8] } From daniel at zuster.org Tue May 11 19:38:17 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 00:38:17 -0000 Subject: [llvm-commits] [llvm] r103532 - in /llvm/trunk: include/llvm/Target/TargetAsmBackend.h lib/MC/MCAssembler.cpp lib/Target/X86/X86AsmBackend.cpp test/MC/MachO/darwin-x86_64-reloc.s Message-ID: <20100512003817.7ADFD312800A@llvm.org> Author: ddunbar Date: Tue May 11 19:38:17 2010 New Revision: 103532 URL: http://llvm.org/viewvc/llvm-project?rev=103532&view=rev Log: MC/Mach-O/x86_64: Add a new hook for checking whether a particular section can be diced into atoms, and adjust getAtom() to take this into account. - This fixes relocations to symbols in fixed size literal sections, for example. Modified: llvm/trunk/include/llvm/Target/TargetAsmBackend.h llvm/trunk/lib/MC/MCAssembler.cpp llvm/trunk/lib/Target/X86/X86AsmBackend.cpp llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Modified: llvm/trunk/include/llvm/Target/TargetAsmBackend.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetAsmBackend.h?rev=103532&r1=103531&r2=103532&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetAsmBackend.h (original) +++ llvm/trunk/include/llvm/Target/TargetAsmBackend.h Tue May 11 19:38:17 2010 @@ -90,6 +90,14 @@ return false; } + /// isSectionAtomizable - Check whether the given section can be split into + /// atoms. + /// + /// \see MCAssembler::isSymbolLinkerVisible(). + virtual bool isSectionAtomizable(const MCSection &Section) const { + return true; + } + /// isVirtualSection - Check whether the given section is "virtual", that is /// has no actual object file contents. virtual bool isVirtualSection(const MCSection &Section) const = 0; Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103532&r1=103531&r2=103532&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Tue May 11 19:38:17 2010 @@ -297,6 +297,12 @@ if (!SD->getFragment()) return 0; + // Non-linker visible symbols in sections which can't be atomized have no + // defining atom. + if (!getBackend().isSectionAtomizable( + SD->getFragment()->getParent()->getSection())) + return 0; + // Otherwise, return the atom for the containing fragment. return SD->getFragment()->getAtom(); } Modified: llvm/trunk/lib/Target/X86/X86AsmBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86AsmBackend.cpp?rev=103532&r1=103531&r2=103532&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86AsmBackend.cpp (original) +++ llvm/trunk/lib/Target/X86/X86AsmBackend.cpp Tue May 11 19:38:17 2010 @@ -259,6 +259,26 @@ const MCSectionMachO &SMO = static_cast(Section); return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS; } + + virtual bool isSectionAtomizable(const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast(Section); + // Fixed sized data sections are uniqued, they cannot be diced into atoms. + switch (SMO.getType()) { + default: + return true; + + case MCSectionMachO::S_4BYTE_LITERALS: + case MCSectionMachO::S_8BYTE_LITERALS: + case MCSectionMachO::S_16BYTE_LITERALS: + case MCSectionMachO::S_LITERAL_POINTERS: + case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_LAZY_SYMBOL_POINTERS: + case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS: + case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS: + case MCSectionMachO::S_INTERPOSING: + return false; + } + } }; } Modified: llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s?rev=103532&r1=103531&r2=103532&view=diff ============================================================================== --- llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s (original) +++ llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Tue May 11 19:38:17 2010 @@ -64,37 +64,56 @@ .long 0 .text movl L4(%rip), %eax + + .section __TEXT,__literal8,8byte_literals + .quad 0 +L5: + .quad 0 +f6: + .quad 0 +L6: + .quad 0 + .text + movl L5(%rip), %eax + movl f6(%rip), %eax + movl L6(%rip), %eax + + .data + .quad L5 + .quad f6 + .quad L6 + // CHECK: ('cputype', 16777223) // CHECK: ('cpusubtype', 3) // CHECK: ('filetype', 1) // CHECK: ('num_load_commands', 1) -// CHECK: ('load_commands_size', 416) +// CHECK: ('load_commands_size', 496) // CHECK: ('flag', 0) // CHECK: ('reserved', 0) // CHECK: ('load_commands', [ // CHECK: # Load Command 0 // CHECK: (('command', 25) -// CHECK: ('size', 312) +// CHECK: ('size', 392) // CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('vm_addr', 0) -// CHECK: ('vm_size', 229) -// CHECK: ('file_offset', 448) -// CHECK: ('file_size', 229) +// CHECK: ('vm_size', 303) +// CHECK: ('file_offset', 528) +// CHECK: ('file_size', 303) // CHECK: ('maxprot', 7) // CHECK: ('initprot', 7) -// CHECK: ('num_sections', 3) +// CHECK: ('num_sections', 4) // CHECK: ('flags', 0) // CHECK: ('sections', [ // CHECK: # Section 0 // CHECK: (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 0) -// CHECK: ('size', 16) -// CHECK: ('offset', 448) +// CHECK: ('size', 40) +// CHECK: ('offset', 528) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 680) -// CHECK: ('num_reloc', 2) +// CHECK: ('reloc_offset', 832) +// CHECK: ('num_reloc', 5) // CHECK: ('flags', 0x0) // CHECK: ('reserved1', 0) // CHECK: ('reserved2', 0) @@ -102,22 +121,31 @@ // CHECK: ), // CHECK: ('_relocations', [ // CHECK: # Relocation 0 +// CHECK: (('word-0', 0x20), +// CHECK: ('word-1', 0x6000004)), +// CHECK: # Relocation 1 +// CHECK: (('word-0', 0x18), +// CHECK: ('word-1', 0xe000006)), +// CHECK: # Relocation 2 +// CHECK: (('word-0', 0x10), +// CHECK: ('word-1', 0x6000004)), +// CHECK: # Relocation 3 // CHECK: (('word-0', 0x8), // CHECK: ('word-1', 0x4d000000)), -// CHECK: # Relocation 1 +// CHECK: # Relocation 4 // CHECK: (('word-0', 0x4), -// CHECK: ('word-1', 0x4d000007)), +// CHECK: ('word-1', 0x4d000008)), // CHECK: ]) -// CHECK: ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('_section_data', "\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x17\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'\x01\x00\x00\x00\x00\x00\x00") // CHECK: # Section 1 // CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 16) -// CHECK: ('size', 197) -// CHECK: ('offset', 464) +// CHECK: ('address', 40) +// CHECK: ('size', 215) +// CHECK: ('offset', 568) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 696) -// CHECK: ('num_reloc', 28) +// CHECK: ('reloc_offset', 872) +// CHECK: ('num_reloc', 31) // CHECK: ('flags', 0x80000400) // CHECK: ('reserved1', 0) // CHECK: ('reserved2', 0) @@ -125,99 +153,108 @@ // CHECK: ), // CHECK: ('_relocations', [ // CHECK: # Relocation 0 +// CHECK: (('word-0', 0xd3), +// CHECK: ('word-1', 0x15000004)), +// CHECK: # Relocation 1 +// CHECK: (('word-0', 0xcd), +// CHECK: ('word-1', 0x1d000006)), +// CHECK: # Relocation 2 +// CHECK: (('word-0', 0xc7), +// CHECK: ('word-1', 0x15000004)), +// CHECK: # Relocation 3 // CHECK: (('word-0', 0xc1), // CHECK: ('word-1', 0x15000001)), -// CHECK: # Relocation 1 +// CHECK: # Relocation 4 // CHECK: (('word-0', 0xa5), // CHECK: ('word-1', 0x5e000003)), -// CHECK: # Relocation 2 +// CHECK: # Relocation 5 // CHECK: (('word-0', 0xa5), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 3 +// CHECK: # Relocation 6 // CHECK: (('word-0', 0x9d), // CHECK: ('word-1', 0x5e000003)), -// CHECK: # Relocation 4 +// CHECK: # Relocation 7 // CHECK: (('word-0', 0x9d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 5 +// CHECK: # Relocation 8 // CHECK: (('word-0', 0x95), // CHECK: ('word-1', 0xe000003)), -// CHECK: # Relocation 6 +// CHECK: # Relocation 9 // CHECK: (('word-0', 0x8d), // CHECK: ('word-1', 0xe000003)), -// CHECK: # Relocation 7 +// CHECK: # Relocation 10 // CHECK: (('word-0', 0x79), // CHECK: ('word-1', 0x8d000003)), -// CHECK: # Relocation 8 +// CHECK: # Relocation 11 // CHECK: (('word-0', 0x71), // CHECK: ('word-1', 0x7d000003)), -// CHECK: # Relocation 9 +// CHECK: # Relocation 12 // CHECK: (('word-0', 0x69), // CHECK: ('word-1', 0x6d000003)), -// CHECK: # Relocation 10 +// CHECK: # Relocation 13 // CHECK: (('word-0', 0x63), // CHECK: ('word-1', 0x1d000003)), -// CHECK: # Relocation 11 +// CHECK: # Relocation 14 // CHECK: (('word-0', 0x5c), // CHECK: ('word-1', 0x1d000003)), -// CHECK: # Relocation 12 +// CHECK: # Relocation 15 // CHECK: (('word-0', 0x55), // CHECK: ('word-1', 0x5c000002)), -// CHECK: # Relocation 13 +// CHECK: # Relocation 16 // CHECK: (('word-0', 0x55), // CHECK: ('word-1', 0xc000000)), -// CHECK: # Relocation 14 +// CHECK: # Relocation 17 // CHECK: (('word-0', 0x4d), // CHECK: ('word-1', 0x5e000002)), -// CHECK: # Relocation 15 +// CHECK: # Relocation 18 // CHECK: (('word-0', 0x4d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 16 +// CHECK: # Relocation 19 // CHECK: (('word-0', 0x45), // CHECK: ('word-1', 0x5e000002)), -// CHECK: # Relocation 17 +// CHECK: # Relocation 20 // CHECK: (('word-0', 0x45), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 18 +// CHECK: # Relocation 21 // CHECK: (('word-0', 0x3d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 19 +// CHECK: # Relocation 22 // CHECK: (('word-0', 0x35), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 20 +// CHECK: # Relocation 23 // CHECK: (('word-0', 0x2d), // CHECK: ('word-1', 0x8d000000)), -// CHECK: # Relocation 21 +// CHECK: # Relocation 24 // CHECK: (('word-0', 0x26), // CHECK: ('word-1', 0x6d000000)), -// CHECK: # Relocation 22 +// CHECK: # Relocation 25 // CHECK: (('word-0', 0x20), // CHECK: ('word-1', 0x1d000000)), -// CHECK: # Relocation 23 +// CHECK: # Relocation 26 // CHECK: (('word-0', 0x1a), // CHECK: ('word-1', 0x1d000000)), -// CHECK: # Relocation 24 +// CHECK: # Relocation 27 // CHECK: (('word-0', 0x14), // CHECK: ('word-1', 0x4d000000)), -// CHECK: # Relocation 25 +// CHECK: # Relocation 28 // CHECK: (('word-0', 0xe), // CHECK: ('word-1', 0x3d000000)), -// CHECK: # Relocation 26 +// CHECK: # Relocation 29 // CHECK: (('word-0', 0x7), // CHECK: ('word-1', 0x2d000000)), -// CHECK: # Relocation 27 +// CHECK: # Relocation 30 // CHECK: (('word-0', 0x2), // CHECK: ('word-1', 0x2d000000)), // CHECK: ]) -// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x057\xff\xff\xff') +// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x05\x1f\xff\xff\xff\x8b\x05$\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05(\x00\x00\x00') // CHECK: # Section 2 // CHECK: (('section_name', '__debug_frame\x00\x00\x00') // CHECK: ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 213) +// CHECK: ('address', 255) // CHECK: ('size', 16) -// CHECK: ('offset', 661) +// CHECK: ('offset', 783) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 920) +// CHECK: ('reloc_offset', 1120) // CHECK: ('num_reloc', 2) // CHECK: ('flags', 0x2000000) // CHECK: ('reserved1', 0) @@ -227,29 +264,46 @@ // CHECK: ('_relocations', [ // CHECK: # Relocation 0 // CHECK: (('word-0', 0x8), -// CHECK: ('word-1', 0xe000006)), +// CHECK: ('word-1', 0xe000007)), // CHECK: # Relocation 1 // CHECK: (('word-0', 0x0), // CHECK: ('word-1', 0x6000002)), // CHECK: ]) -// CHECK: ('_section_data', '\xbd\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('_section_data', '\xd5\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: # Section 3 +// CHECK: (('section_name', '__literal8\x00\x00\x00\x00\x00\x00') +// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('address', 271) +// CHECK: ('size', 32) +// CHECK: ('offset', 799) +// CHECK: ('alignment', 0) +// CHECK: ('reloc_offset', 0) +// CHECK: ('num_reloc', 0) +// CHECK: ('flags', 0x4) +// CHECK: ('reserved1', 0) +// CHECK: ('reserved2', 0) +// CHECK: ('reserved3', 0) +// CHECK: ), +// CHECK: ('_relocations', [ +// CHECK: ]) +// CHECK: ('_section_data', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ]) // CHECK: ), // CHECK: # Load Command 1 // CHECK: (('command', 2) // CHECK: ('size', 24) -// CHECK: ('symoff', 936) -// CHECK: ('nsyms', 8) -// CHECK: ('stroff', 1064) -// CHECK: ('strsize', 48) -// CHECK: ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00\x00') +// CHECK: ('symoff', 1136) +// CHECK: ('nsyms', 9) +// CHECK: ('stroff', 1280) +// CHECK: ('strsize', 52) +// CHECK: ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00f6\x00\x00\x00') // CHECK: ('_symbols', [ // CHECK: # Symbol 0 // CHECK: (('n_strx', 18) // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 16) +// CHECK: ('n_value', 40) // CHECK: ('_string', '_foo') // CHECK: ), // CHECK: # Symbol 1 @@ -257,7 +311,7 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 17) +// CHECK: ('n_value', 41) // CHECK: ('_string', '_baz') // CHECK: ), // CHECK: # Symbol 2 @@ -265,7 +319,7 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 77) +// CHECK: ('n_value', 101) // CHECK: ('_string', '_bar') // CHECK: ), // CHECK: # Symbol 3 @@ -273,7 +327,7 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 145) +// CHECK: ('n_value', 169) // CHECK: ('_string', '_prev') // CHECK: ), // CHECK: # Symbol 4 @@ -281,7 +335,7 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 197) +// CHECK: ('n_value', 221) // CHECK: ('_string', '_f2') // CHECK: ), // CHECK: # Symbol 5 @@ -289,10 +343,18 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 2) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 200) +// CHECK: ('n_value', 224) // CHECK: ('_string', '_f3') // CHECK: ), // CHECK: # Symbol 6 +// CHECK: (('n_strx', 47) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 4) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 287) +// CHECK: ('_string', 'f6') +// CHECK: ), +// CHECK: # Symbol 7 // CHECK: (('n_strx', 9) // CHECK: ('n_type', 0x1) // CHECK: ('n_sect', 0) @@ -300,7 +362,7 @@ // CHECK: ('n_value', 0) // CHECK: ('_string', '_ext_foo') // CHECK: ), -// CHECK: # Symbol 7 +// CHECK: # Symbol 8 // CHECK: (('n_strx', 1) // CHECK: ('n_type', 0x1) // CHECK: ('n_sect', 0) @@ -314,10 +376,10 @@ // CHECK: (('command', 11) // CHECK: ('size', 80) // CHECK: ('ilocalsym', 0) -// CHECK: ('nlocalsym', 6) -// CHECK: ('iextdefsym', 6) +// CHECK: ('nlocalsym', 7) +// CHECK: ('iextdefsym', 7) // CHECK: ('nextdefsym', 0) -// CHECK: ('iundefsym', 6) +// CHECK: ('iundefsym', 7) // CHECK: ('nundefsym', 2) // CHECK: ('tocoff', 0) // CHECK: ('ntoc', 0) From blunted2night at gmail.com Tue May 11 19:52:54 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Wed, 12 May 2010 00:52:54 -0000 Subject: [llvm-commits] [llvm] r103534 - /llvm/trunk/include/llvm/MC/MCAssembler.h Message-ID: <20100512005254.31B15312800A@llvm.org> Author: njeffords Date: Tue May 11 19:52:54 2010 New Revision: 103534 URL: http://llvm.org/viewvc/llvm-project?rev=103534&view=rev Log: Added a trivial function to modify the flags field of MCSymbolData class. The function takes the value and a mask, and clears the mask bits before applying the value. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103534&r1=103533&r2=103534&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Tue May 11 19:52:54 2010 @@ -571,6 +571,11 @@ /// setFlags - Set the (implementation defined) symbol flags. void setFlags(uint32_t Value) { Flags = Value; } + /// modifyFlags - Modify the flags via a mask + void modifyFlags(uint32_t Value, uint32_t Mask) { + Flags = (Flags & ~Mask) | Value; + } + /// getIndex - Get the (implementation defined) index. uint64_t getIndex() const { return Index; } From daniel at zuster.org Tue May 11 19:54:20 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 00:54:20 -0000 Subject: [llvm-commits] [llvm] r103535 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp test/MC/AsmParser/X86/x86_64-suffix-matching.s Message-ID: <20100512005420.92A45312800A@llvm.org> Author: ddunbar Date: Tue May 11 19:54:20 2010 New Revision: 103535 URL: http://llvm.org/viewvc/llvm-project?rev=103535&view=rev Log: MC/X86: Extend suffix matching hack to match 'q' suffix. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/test/MC/AsmParser/X86/x86_64-suffix-matching.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=103535&r1=103534&r2=103535&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Tue May 11 19:54:20 2010 @@ -673,6 +673,8 @@ bool MatchW = MatchInstructionImpl(Operands, Inst); Tmp[Base.size()] = 'l'; bool MatchL = MatchInstructionImpl(Operands, Inst); + Tmp[Base.size()] = 'q'; + bool MatchQ = MatchInstructionImpl(Operands, Inst); // Restore the old token. Op->setTokenValue(Base); @@ -680,7 +682,7 @@ // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing // matches won't have modified it). - if (MatchB + MatchW + MatchL == 2) + if (MatchB + MatchW + MatchL + MatchQ == 3) return false; // Otherwise, the match failed. Modified: llvm/trunk/test/MC/AsmParser/X86/x86_64-suffix-matching.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_64-suffix-matching.s?rev=103535&r1=103534&r2=103535&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_64-suffix-matching.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_64-suffix-matching.s Tue May 11 19:54:20 2010 @@ -4,3 +4,7 @@ add $0, %eax // CHECK: addb $255, %al add $0xFF, %al +// CHECK: orq %rax, %rdx + or %rax, %rdx +// CHECK: shlq $3, %rax + shl $3, %rax From blunted2night at gmail.com Tue May 11 20:18:44 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Tue, 11 May 2010 18:18:44 -0700 Subject: [llvm-commits] COFF linkonce update Message-ID: I would like to commit this patch as part of my ongoing work on improving COFF support in the MC library. This patch changes where the .linkonce attribute is emitted from the symbol to the section the symbol was assigned to. -Nathan -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100511/d63cd51b/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: linkonce.patch Type: application/octet-stream Size: 2183 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100511/d63cd51b/attachment.obj From evan.cheng at apple.com Tue May 11 20:27:49 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 12 May 2010 01:27:49 -0000 Subject: [llvm-commits] [llvm] r103538 - /llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Message-ID: <20100512012749.4DB8F312800A@llvm.org> Author: evancheng Date: Tue May 11 20:27:49 2010 New Revision: 103538 URL: http://llvm.org/viewvc/llvm-project?rev=103538&view=rev Log: Code clean up. Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=103538&r1=103537&r2=103538&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Tue May 11 20:27:49 2010 @@ -1135,10 +1135,11 @@ unsigned DstReg, unsigned SrcIdx, MachineRegisterInfo *MRI) { for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - UE = MRI->reg_end(); RI != UE; ) { + RE = MRI->reg_end(); RI != RE; ) { MachineOperand &MO = RI.getOperand(); ++RI; MO.setReg(DstReg); + assert(MO.getSubReg() == 0); MO.setSubReg(SrcIdx); } } From evan.cheng at apple.com Tue May 11 20:29:36 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 12 May 2010 01:29:36 -0000 Subject: [llvm-commits] [llvm] r103539 - /llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Message-ID: <20100512012936.44F67312800A@llvm.org> Author: evancheng Date: Tue May 11 20:29:36 2010 New Revision: 103539 URL: http://llvm.org/viewvc/llvm-project?rev=103539&view=rev Log: Teach local regalloc about virtual registers with sub-indices. Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLocal.cpp?rev=103539&r1=103538&r2=103539&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocLocal.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Tue May 11 20:29:36 2010 @@ -50,6 +50,7 @@ private: const TargetMachine *TM; MachineFunction *MF; + MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; @@ -506,10 +507,15 @@ SmallSet &ReloadedRegs, unsigned PhysReg) { unsigned VirtReg = MI->getOperand(OpNum).getReg(); + unsigned SubIdx = MI->getOperand(OpNum).getSubReg(); // If the virtual register is already available, just update the instruction // and return. if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { + if (SubIdx) { + PR = TRI->getSubReg(PR, SubIdx); + MI->getOperand(OpNum).setSubReg(0); + } MI->getOperand(OpNum).setReg(PR); // Assign the input register if (!MI->isDebugValue()) { // Do not do these for DBG_VALUE as they can affect codegen. @@ -547,7 +553,12 @@ ++NumLoads; // Update statistics MF->getRegInfo().setPhysRegUsed(PhysReg); - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register + // Assign the input register. + if (SubIdx) { + MI->getOperand(OpNum).setSubReg(0); + MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx)); + } else + MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); if (!ReloadedRegs.insert(PhysReg)) { @@ -626,7 +637,6 @@ /// ComputeLocalLiveness - Computes liveness of registers within a basic /// block, setting the killed/dead flags as appropriate. void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // Keep track of the most recently seen previous use or def of each reg, // so that we can update them with dead/kill markers. DenseMap > LastUseDef; @@ -672,18 +682,26 @@ // - A def followed by a def is dead // - A use followed by a def is a kill if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; - + + unsigned SubIdx = MO.getSubReg(); DenseMap >::iterator last = LastUseDef.find(MO.getReg()); if (last != LastUseDef.end()) { // Check if this is a two address instruction. If so, then // the def does not kill the use. - if (last->second.first == I && - I->isRegTiedToUseOperand(i)) + if (last->second.first == I && I->isRegTiedToUseOperand(i)) continue; MachineOperand &lastUD = last->second.first->getOperand(last->second.second); + if (SubIdx && lastUD.getSubReg() != SubIdx) + // Partial re-def, the last def is not dead. + // %reg1024:5 = + // %reg1024:6 = + // or + // %reg1024:5 = op %reg1024, 5 + continue; + if (lastUD.isDef()) lastUD.setIsDead(true); else @@ -732,8 +750,8 @@ // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that // happens. bool UsedByDebugValueOnly = false; - for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), - UE = MRI.reg_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), + UE = MRI->reg_end(); UI != UE; ++UI) { // Two cases: // - used in another block // - used in the same block before it is defined (loop) @@ -755,8 +773,8 @@ } if (UsedByDebugValueOnly) - for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()), - UE = MRI.reg_end(); UI != UE; ++UI) + for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), + UE = MRI->reg_end(); UI != UE; ++UI) if (UI->isDebugValue() && (UI->getParent() != &MBB || (MO.isDef() && precedes(&*UI, MI)))) @@ -878,6 +896,10 @@ std::make_pair((MachineInstr*)0, 0); DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); + if (unsigned DestSubIdx = MO.getSubReg()) { + MO.setSubReg(0); + DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); + } MO.setReg(DestPhysReg); // Assign the earlyclobber register } else { unsigned Reg = MO.getReg(); @@ -1073,6 +1095,11 @@ getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) << " to %reg" << DestVirtReg << "\n"); + + if (unsigned DestSubIdx = MO.getSubReg()) { + MO.setSubReg(0); + DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); + } MO.setReg(DestPhysReg); // Assign the output register } @@ -1165,6 +1192,7 @@ bool RALocal::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "Machine Function\n"); MF = &Fn; + MRI = &Fn.getRegInfo(); TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); From daniel at zuster.org Tue May 11 20:33:38 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 18:33:38 -0700 Subject: [llvm-commits] [llvm] r103528 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp In-Reply-To: <98CB2790-BDE1-4FA1-A34E-2F059394BB4F@2pi.dk> References: <20100511235314.04CB4312800A@llvm.org> <98CB2790-BDE1-4FA1-A34E-2F059394BB4F@2pi.dk> Message-ID: The #llvm committee said Yes! Then they scurried around and eventually quoted [basic.scope.local]p4. - Daniel On Tue, May 11, 2010 at 5:11 PM, Jakob Stoklund Olesen wrote: > > On May 11, 2010, at 4:53 PM, Daniel Dunbar wrote: > >> Author: ddunbar >> Date: Tue May 11 18:53:13 2010 >> New Revision: 103528 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=103528&view=rev >> Log: >> Make Clang happy. > > Thanks, Daniel. That was clearly bad style, but was it also invalid code? Both Clang and MSVC rejected it. > >> >> Modified: >> ? ?llvm/trunk/lib/CodeGen/RegAllocFast.cpp >> >> Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103528&r1=103527&r2=103528&view=diff >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) >> +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Tue May 11 18:53:13 2010 >> @@ -584,9 +584,9 @@ >> ? ? ? ? if (!MO.isReg()) continue; >> ? ? ? ? unsigned Reg = MO.getReg(); >> ? ? ? ? if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; >> - ? ? ? ?LiveRegMap::iterator i = LiveVirtRegs.find(Reg); >> - ? ? ? ?if (i != LiveVirtRegs.end()) >> - ? ? ? ? ?setPhysReg(MO, i->second.PhysReg); >> + ? ? ? ?LiveRegMap::iterator it = LiveVirtRegs.find(Reg); >> + ? ? ? ?if (it != LiveVirtRegs.end()) >> + ? ? ? ? ?setPhysReg(MO, it->second.PhysReg); >> ? ? ? ? else >> ? ? ? ? ? MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! >> ? ? ? } >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > From evan.cheng at apple.com Tue May 11 20:42:50 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 12 May 2010 01:42:50 -0000 Subject: [llvm-commits] [llvm] r103540 - /llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Message-ID: <20100512014250.6F823312800A@llvm.org> Author: evancheng Date: Tue May 11 20:42:50 2010 New Revision: 103540 URL: http://llvm.org/viewvc/llvm-project?rev=103540&view=rev Log: vst instructions are modeled as this: v1024 = REG_SEQUENCE ... v1025 = EXTRACT_SUBREG v1024, 5 v1026 = EXTRACR_SUBREG v1024, 6 = VSTxx , v1025, v1026 The REG_SEQUENCE ensures the sources that feed into the VST instruction are getting the right register allocation so they form a large super- register. The extract_subreg will be coalesced away all would just work: v1024 = REG_SEQUENCE ... = VSTxx , v1024:5, v1024:6 The problem is if the coalescer isn't run, the extract_subreg instructions would stick around and there is no assurance v1025 and v1026 will get the right registers. As a short term workaround, teach the NEON pre-allocation pass to transfer the sub-register indices over. An alternative would be do it 2addr pass when reg_sequence's are eliminated. But that *seems* wrong and require updating liveness information. Another alternative is to do this in the scheduler when the instructions are created. But that would mean somehow the scheduler this has to be done for correctness reason. That's yucky as well. So for now, we are leaving this in the target specific pass. Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103540&r1=103539&r2=103540&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Tue May 11 20:42:50 2010 @@ -379,6 +379,7 @@ unsigned LastSrcReg = 0; unsigned LastSubIdx = 0; + SmallVector SubIds; for (unsigned R = 0; R < NumRegs; ++R) { const MachineOperand &MO = MI->getOperand(FirstOpnd + R); assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); @@ -405,8 +406,33 @@ if (SubIdx != ARM::DSUBREG_0 && SubIdx != ARM::QSUBREG_0) return false; } + SubIds.push_back(SubIdx); LastSubIdx = SubIdx; } + + // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is + // currently required for correctness. e.g. + // %reg1041; = REG_SEQUENCE %reg1040, 5, %reg1035, 6 + // %reg1042 = EXTRACT_SUBREG %reg1041, 6 + // %reg1043 = EXTRACT_SUBREG %reg1041, 5 + // VST1q16 %reg1025, 0, %reg1043, %reg1042, + // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // respectively. + // We need to change how we model uses of REG_SEQUENCE. + for (unsigned R = 0; R < NumRegs; ++R) { + MachineOperand &MO = MI->getOperand(FirstOpnd + R); + unsigned OldReg = MO.getReg(); + MachineInstr *DefMI = MRI->getVRegDef(OldReg); + assert(DefMI->isExtractSubreg()); + MO.setReg(LastSrcReg); + MO.setSubReg(SubIds[R]); + if (R != 0) + MO.setIsKill(false); + // Delete the EXTRACT_SUBREG if its result is now dead. + if (MRI->use_empty(OldReg)) + DefMI->eraseFromParent(); + } + return true; } From clattner at apple.com Tue May 11 20:47:23 2010 From: clattner at apple.com (Chris Lattner) Date: Tue, 11 May 2010 18:47:23 -0700 Subject: [llvm-commits] COFF linkonce update In-Reply-To: References: Message-ID: On May 11, 2010, at 6:18 PM, Nathan Jeffords wrote: > I would like to commit this patch as part of my ongoing work on improving COFF support in the MC library. > > This patch changes where the .linkonce attribute is emitted from the symbol to the section the symbol was assigned to. Looks great, please fix the brace here: :-) + if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) + { and commit! -Chris -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100511/d63959a7/attachment.html From stoklund at 2pi.dk Tue May 11 20:47:46 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Tue, 11 May 2010 18:47:46 -0700 Subject: [llvm-commits] [llvm] r103528 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp In-Reply-To: References: <20100511235314.04CB4312800A@llvm.org> <98CB2790-BDE1-4FA1-A34E-2F059394BB4F@2pi.dk> Message-ID: On May 11, 2010, at 6:33 PM, Daniel Dunbar wrote: > The #llvm committee said Yes! Then they scurried around and eventually > quoted [basic.scope.local]p4. Thanks :-) Kudos to MSVC for getting this right after putting for loop variables in the outer scope for years. From daniel at zuster.org Tue May 11 21:06:21 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Tue, 11 May 2010 19:06:21 -0700 Subject: [llvm-commits] Next patch for builbot to use scripted builds In-Reply-To: References: Message-ID: Looks good to me! - Daniel On Tue, May 11, 2010 at 4:27 PM, Galina Kistanova wrote: > Hello everyone, > > Please review the next two patches to for buildbot scripted builder. > > 1. One adds new shell script to the llvm-gcc/extras directory for > building llvm-gcc: > > * build-x-4-mingw32 ? - cross builds llvm and llvm-gcc for > --build=x86_64-apple-darwin10 --host=x86_64-apple-darwin10 > --target=i686-pc-mingw32 > > 2. The other one adds a new buildbot builder ScriptedBuilder to work > with the new build script, and updates configuration of the > llvm-gcc-x86_64-darwin10-cross-mingw32 buildslave. > > Please review. > > Thanks > > Galina > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > From sabre at nondot.org Tue May 11 23:13:27 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 04:13:27 -0000 Subject: [llvm-commits] [www] r103567 - /www/trunk/header.incl Message-ID: <20100512041327.76730312800A@llvm.org> Author: lattner Date: Tue May 11 23:13:27 2010 New Revision: 103567 URL: http://llvm.org/viewvc/llvm-project?rev=103567&view=rev Log: remove the mostly useless 'support' link Modified: www/trunk/header.incl Modified: www/trunk/header.incl URL: http://llvm.org/viewvc/llvm-project/www/trunk/header.incl?rev=103567&r1=103566&r2=103567&view=diff ============================================================================== --- www/trunk/header.incl (original) +++ www/trunk/header.incl Tue May 11 23:13:27 2010 @@ -25,7 +25,6 @@ Features
Documentation
Command Guide
-Support
FAQ
Publications
LLVM Projects
From blunted2night at gmail.com Tue May 11 23:26:09 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Wed, 12 May 2010 04:26:09 -0000 Subject: [llvm-commits] [llvm] r103568 - in /llvm/trunk/lib: CodeGen/AsmPrinter/AsmPrinter.cpp MC/MCSectionCOFF.cpp Message-ID: <20100512042609.ABAD9312800A@llvm.org> Author: njeffords Date: Tue May 11 23:26:09 2010 New Revision: 103568 URL: http://llvm.org/viewvc/llvm-project?rev=103568&view=rev Log: updated support for the COFF .linkonce Now, the .linkonce directive is emitted as part of MCSectionCOFF::PrintSwitchToSection instead of AsmPrinter::EmitLinkage since it is an attribute of the section the symbol was placed into not the symbol itself. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/trunk/lib/MC/MCSectionCOFF.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=103568&r1=103567&r2=103568&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Tue May 11 23:26:09 2010 @@ -208,13 +208,7 @@ } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - // FIXME: linkonce should be a section attribute, handled by COFF Section - // assignment. - // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce - // .linkonce discard - // FIXME: It would be nice to use .linkonce samesize for non-common - // globals. - OutStreamer.EmitRawText(StringRef(LinkOnce)); + //NOTE: linkonce is handling by the section the symbol was assigned to } else { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); Modified: llvm/trunk/lib/MC/MCSectionCOFF.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCSectionCOFF.cpp?rev=103568&r1=103567&r2=103568&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCSectionCOFF.cpp (original) +++ llvm/trunk/lib/MC/MCSectionCOFF.cpp Tue May 11 23:26:09 2010 @@ -47,4 +47,30 @@ if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE) OS << 'n'; OS << "\"\n"; + + if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) { + switch (Selection) { + default: + assert (0 && "unsupported COFF selection type"); + break; + case IMAGE_COMDAT_SELECT_NODUPLICATES: + OS << "\t.linkonce one_only\n"; + break; + case IMAGE_COMDAT_SELECT_ANY: + OS << "\t.linkonce discard\n"; + break; + case IMAGE_COMDAT_SELECT_SAME_SIZE: + OS << "\t.linkonce same_size\n"; + break; + case IMAGE_COMDAT_SELECT_EXACT_MATCH: + OS << "\t.linkonce same_contents\n"; + break; + // ".linkonce largest" is not documented as being an option. + // It seems odd that a link attribute designed essentially for PE/COFF + // wouldn't support all the options (at least as of binutils 2.20) + //case IMAGE_COMDAT_SELECT_LARGEST: + // OS << "\t.linkonce largest\n"; + // break; + } + } } From sabre at nondot.org Tue May 11 23:49:35 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 04:49:35 -0000 Subject: [llvm-commits] [www] r103569 - /www/trunk/index.html Message-ID: <20100512044935.4512D312800A@llvm.org> Author: lattner Date: Tue May 11 23:49:35 2010 New Revision: 103569 URL: http://llvm.org/viewvc/llvm-project?rev=103569&view=rev Log: rewrite the main LLVM web page, including links to the main subprojects. Modified: www/trunk/index.html Modified: www/trunk/index.html URL: http://llvm.org/viewvc/llvm-project/www/trunk/index.html?rev=103569&r1=103568&r2=103569&view=diff ============================================================================== --- www/trunk/index.html (original) +++ www/trunk/index.html Tue May 11 23:49:35 2010 @@ -9,8 +9,82 @@
-

Low Level Virtual Machine (LLVM) is:

+

The LLVM Project is a collection of modular and reusable compiler and + toolchain technologies. From its humble beginnings as a research project at + the University of Illinois, LLVM has + grown to be an umbrella project consisting of a number of different + subprojects, many of which are being used in production by a wide variety of + commercial and open source projects + as well as being widely used in academic research. Code + in the LLVM project is licensed under the "UIUC" BSD-Style license.

+ +

The primary sub-projects of LLVM are:

+ +
    +
  1. The LLVM Core libraries provide a modern source language and + target-independent optimizer, along with + code generation support for many + popular CPUs (as well as some less common ones!) These libraries are built + around a well specified code representation + known as the LLVM intermediate representation ("LLVM IR"). The LLVM Core + libraries are well documented, and it is particularly + easy to invent your own language (or port an existing compiler) to use + LLVM as a optimizer and code generator.

    +
  2. + +
  3. Clang is an "LLVM native" + C/C++/Objective-C compiler, which aims to deliver amazingly fast compiles + (e.g. about 3x faster than GCC when + compiling Objective-C code in a debug configuration), extremely useful error and warning messages + and to provide a platform for building great source level tools. The + Clang Static Analyzer is a + tool built on Clang that automatically finds bugs in your code, it is a + great example of the sort of tool that can be built with the Clang + frontend.

  4. + +
  5. llvm-gcc 4.2 and + dragonegg integrate the LLVM + optimizers and code generator with the GCC 4.2 (which is GPL2) and GCC 4.5 + (which is GPL3) parsers, respectively. This allows LLVM to compile Ada, + Fortran, and a number of other languages supported by the GCC compiler + frontends, and provides high-fidelity drop-in compatibility with their + respective versions of GCC.

  6. + +
  7. The libc++ project provides + a standards conformant and high-performance implementation of the C++ + Standard Library, with an aim to supporting C++'0x when the standard is + ratified.

  8. + +
  9. The compiler-rt project + provides highly tuned implementations of the low-level code generator + support routines like "__fixunsdfdi" and other calls generated when + a target doesn't have a short sequence of native instructions to implement + a core IR operation.

  10. + +
  11. The vmkit project is an + implementation of the Java and .NET Virtual Machines that is built on LLVM + technologies.

  12. + +
  13. The klee project implements a + "symbolic virtual machine" which uses a theorem prover to try to evaluate + all dynamic paths through a program, in an effort to find bugs and to prove + properties of functions. A major feature of klee is that it can produce a + testcase in the event that it detects a bug.

  14. + +
+ +

As much as everything else, LLVM has a broad and friendly community of people +who are interested in building great low-level tools. If you are interested in +getting involved, a good first place is to skim the LLVM Blog and to sign up for the LLVM Developer mailing +list. +

+ From sabre at nondot.org Tue May 11 23:50:52 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 04:50:52 -0000 Subject: [llvm-commits] [www] r103570 - /www/trunk/index.html Message-ID: <20100512045052.BE200312800A@llvm.org> Author: lattner Date: Tue May 11 23:50:52 2010 New Revision: 103570 URL: http://llvm.org/viewvc/llvm-project?rev=103570&view=rev Log: validate Modified: www/trunk/index.html Modified: www/trunk/index.html URL: http://llvm.org/viewvc/llvm-project/www/trunk/index.html?rev=103570&r1=103569&r2=103570&view=diff ============================================================================== --- www/trunk/index.html (original) +++ www/trunk/index.html Tue May 11 23:50:52 2010 @@ -84,74 +84,7 @@ list.

- +
From sabre at nondot.org Wed May 12 00:01:37 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 05:01:37 -0000 Subject: [llvm-commits] [www] r103571 - /www/trunk/index.html Message-ID: <20100512050137.C7027312800A@llvm.org> Author: lattner Date: Wed May 12 00:01:37 2010 New Revision: 103571 URL: http://llvm.org/viewvc/llvm-project?rev=103571&view=rev Log: add another paragraph Modified: www/trunk/index.html Modified: www/trunk/index.html URL: http://llvm.org/viewvc/llvm-project/www/trunk/index.html?rev=103571&r1=103570&r2=103571&view=diff ============================================================================== --- www/trunk/index.html (original) +++ www/trunk/index.html Wed May 12 00:01:37 2010 @@ -22,7 +22,7 @@

The primary sub-projects of LLVM are:

    -
  1. The LLVM Core libraries provide a modern source language and +

  2. The LLVM Core libraries provide a modern source- and target-independent optimizer, along with code generation support for many popular CPUs (as well as some less common ones!) These libraries are built @@ -41,16 +41,16 @@ href="http://clang.llvm.org/diagnostics.html">error and warning messages and to provide a platform for building great source level tools. The Clang Static Analyzer is a - tool built on Clang that automatically finds bugs in your code, it is a - great example of the sort of tool that can be built with the Clang - frontend.

  3. + tool built on Clang that automatically finds bugs in your code, and it is a + great example of the sort of tool that can be built using the Clang + frontend as a library to parse C/C++ code.

  4. llvm-gcc 4.2 and dragonegg integrate the LLVM optimizers and code generator with the GCC 4.2 (which is GPL2) and GCC 4.5 (which is GPL3) parsers, respectively. This allows LLVM to compile Ada, - Fortran, and a number of other languages supported by the GCC compiler - frontends, and provides high-fidelity drop-in compatibility with their + Fortran, and other languages supported by the GCC compiler frontends, and + provides high-fidelity drop-in compatibility with their respective versions of GCC.

  5. The libc++ project provides @@ -76,6 +76,15 @@

+

In addition to official subprojects of LLVM, there are a broad variety of +other projects that use components +of LLVM for various tasks. Through these external projects you can use +LLVM to compile Ruby, Python, Haskell, Java, D, PHP, Pure, Lua, and a number of +other languages. A major strength of LLVM is its versatility, flexibility, and +reusability, which is why it is being used for such a wide variety of different +tasks: everything from doing light-weight JIT compiles of embedded languages +like Lua to compiling Fortran code for massive super computers.

+

As much as everything else, LLVM has a broad and friendly community of people who are interested in building great low-level tools. If you are interested in getting involved, a good first place is to skim the Author: johannes Date: Wed May 12 00:02:13 2010 New Revision: 103572 URL: http://llvm.org/viewvc/llvm-project?rev=103572&view=rev Log: Make sure always_inline is honored on instantiated functions. cgraph was removing these in the belief the inlining had already been done. 7898991. Modified: llvm-gcc-4.2/trunk/gcc/cgraph.h llvm-gcc-4.2/trunk/gcc/cp/semantics.c Modified: llvm-gcc-4.2/trunk/gcc/cgraph.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/cgraph.h?rev=103572&r1=103571&r2=103572&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/cgraph.h (original) +++ llvm-gcc-4.2/trunk/gcc/cgraph.h Wed May 12 00:02:13 2010 @@ -342,8 +342,14 @@ C++ uses DECL_EXTERNAL to mark functions instantiated as part of template instantiation which should not be emitted. Objective C uses it for some other convoluted purpose. Functions marked this way in - other languages should not be passed down to the LLVM BE. The easiest + other languages should not be passed down to the LLVM BE (except functions + marked always_inline should be passed down in all languages). The easiest way to outwit this, although inelegant, seems to be to check the language. + IS_EXTERN_NOINLINE seems a reasonable name for this quality. + + IS_EXTERN_INLINE, on the other hand, describes functions that have the + semantics of "extern inline" in C99. As such it is not the inverse of + IS_EXTERN_NOINLINE. Sorry about that. Weak extern inlines are treated as weak. @@ -356,6 +362,7 @@ !lookup_attribute ("weak", DECL_ATTRIBUTES (f)) && \ strcmp (lang_hooks.name, "GNU C") == 0) #define IS_EXTERN_NOINLINE(f) (DECL_EXTERNAL(f) && \ + !lookup_attribute ("always_inline", DECL_ATTRIBUTES (f)) && \ (!DECL_EXPLICIT_INLINE_P(f) || \ lookup_attribute ("weak", DECL_ATTRIBUTES (f)) || \ strcmp (lang_hooks.name, "GNU C") != 0)) Modified: llvm-gcc-4.2/trunk/gcc/cp/semantics.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/cp/semantics.c?rev=103572&r1=103571&r2=103572&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/cp/semantics.c (original) +++ llvm-gcc-4.2/trunk/gcc/cp/semantics.c Wed May 12 00:02:13 2010 @@ -3238,7 +3238,10 @@ `-fexternal-templates'; we instantiate the function, even though we're not planning on emitting it, in case we get a chance to inline it. */ - if (DECL_EXTERNAL (fn)) + /* LLVM LOCAL begin always_inline functions need to be emitted */ + if (DECL_EXTERNAL (fn) && + !lookup_attribute ("always_inline", DECL_ATTRIBUTES (fn))) + /* LLVM LOCAL end */ return; /* ??? When is this needed? */ From sabre at nondot.org Wed May 12 00:04:14 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 05:04:14 -0000 Subject: [llvm-commits] [www] r103573 - /www/trunk/index.html Message-ID: <20100512050414.9903B312800A@llvm.org> Author: lattner Date: Wed May 12 00:04:14 2010 New Revision: 103573 URL: http://llvm.org/viewvc/llvm-project?rev=103573&view=rev Log: wordsmith Modified: www/trunk/index.html Modified: www/trunk/index.html URL: http://llvm.org/viewvc/llvm-project/www/trunk/index.html?rev=103573&r1=103572&r2=103573&view=diff ============================================================================== --- www/trunk/index.html (original) +++ www/trunk/index.html Wed May 12 00:04:14 2010 @@ -41,9 +41,9 @@ href="http://clang.llvm.org/diagnostics.html">error and warning messages and to provide a platform for building great source level tools. The Clang Static Analyzer is a - tool built on Clang that automatically finds bugs in your code, and it is a - great example of the sort of tool that can be built using the Clang - frontend as a library to parse C/C++ code.

+ tool automatically finds bugs in your code, and is a great example of the + sort of tool that can be built using the Clang frontend as a library to + parse C/C++ code.

  • llvm-gcc 4.2 and dragonegg integrate the LLVM From dalej at apple.com Wed May 12 00:04:20 2010 From: dalej at apple.com (Dale Johannesen) Date: Wed, 12 May 2010 05:04:20 -0000 Subject: [llvm-commits] [llvm] r103574 - /llvm/trunk/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp Message-ID: <20100512050420.A35223128018@llvm.org> Author: johannes Date: Wed May 12 00:04:20 2010 New Revision: 103574 URL: http://llvm.org/viewvc/llvm-project?rev=103574&view=rev Log: Testcase for llvm 103572 (7898991). Added: llvm/trunk/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp Added: llvm/trunk/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC%2B%2B/2010-05-11-alwaysinlineinstantiation.cpp?rev=103574&view=auto ============================================================================== --- llvm/trunk/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp (added) +++ llvm/trunk/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp Wed May 12 00:04:20 2010 @@ -0,0 +1,31 @@ +// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | not grep ZN12basic_stringIcEC1Ev +// RUN: %llvmgxx -xc++ %s -c -o - | llvm-dis | grep ZN12basic_stringIcED1Ev | count 2 + +template +class basic_string +{ +public: + basic_string(); + ~basic_string(); +}; + +template +__attribute__ ((__visibility__("hidden"), __always_inline__)) inline +basic_string::basic_string() +{ +} + +template +inline +basic_string::~basic_string() +{ +} + +typedef basic_string string; + +extern template class basic_string; + +int main() +{ + string s; +} From sabre at nondot.org Wed May 12 00:05:43 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 05:05:43 -0000 Subject: [llvm-commits] [www] r103575 - /www/trunk/index.html Message-ID: <20100512050543.AE56B312800A@llvm.org> Author: lattner Date: Wed May 12 00:05:43 2010 New Revision: 103575 URL: http://llvm.org/viewvc/llvm-project?rev=103575&view=rev Log: grammar Modified: www/trunk/index.html Modified: www/trunk/index.html URL: http://llvm.org/viewvc/llvm-project/www/trunk/index.html?rev=103575&r1=103574&r2=103575&view=diff ============================================================================== --- www/trunk/index.html (original) +++ www/trunk/index.html Wed May 12 00:05:43 2010 @@ -54,8 +54,8 @@ respective versions of GCC.

  • The libc++ project provides - a standards conformant and high-performance implementation of the C++ - Standard Library, with an aim to supporting C++'0x when the standard is + a standard conformant and high-performance implementation of the C++ + Standard Library, with an aim of supporting C++'0x when the standard is ratified.

  • The compiler-rt project From rafael.espindola at gmail.com Wed May 12 00:16:34 2010 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Wed, 12 May 2010 05:16:34 -0000 Subject: [llvm-commits] [llvm] r103576 - in /llvm/trunk: include/llvm/MC/MCExpr.h lib/MC/MCExpr.cpp lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp Message-ID: <20100512051635.110FE312800A@llvm.org> Author: rafael Date: Wed May 12 00:16:34 2010 New Revision: 103576 URL: http://llvm.org/viewvc/llvm-project?rev=103576&view=rev Log: Add support for movi32 of global values to the new (MC) asm printer. Modified: llvm/trunk/include/llvm/MC/MCExpr.h llvm/trunk/lib/MC/MCExpr.cpp llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp llvm/trunk/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp Modified: llvm/trunk/include/llvm/MC/MCExpr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCExpr.h?rev=103576&r1=103575&r2=103576&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCExpr.h (original) +++ llvm/trunk/include/llvm/MC/MCExpr.h Wed May 12 00:16:34 2010 @@ -134,7 +134,9 @@ VK_NTPOFF, VK_PLT, VK_TLSGD, - VK_TPOFF + VK_TPOFF, + VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the asm file) + VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the asm file) }; private: Modified: llvm/trunk/lib/MC/MCExpr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCExpr.cpp?rev=103576&r1=103575&r2=103576&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCExpr.cpp (original) +++ llvm/trunk/lib/MC/MCExpr.cpp Wed May 12 00:16:34 2010 @@ -39,6 +39,10 @@ const MCSymbolRefExpr &SRE = cast(*this); const MCSymbol &Sym = SRE.getSymbol(); + if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 || + SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) + OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); + // Parenthesize names that start with $ so that they don't look like // absolute names. if (Sym.getName()[0] == '$') @@ -46,7 +50,9 @@ else OS << Sym; - if (SRE.getKind() != MCSymbolRefExpr::VK_None) + if (SRE.getKind() != MCSymbolRefExpr::VK_None && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); return; @@ -169,6 +175,8 @@ case VK_PLT: return "PLT"; case VK_TLSGD: return "TLSGD"; case VK_TPOFF: return "TPOFF"; + case VK_ARM_HI16: return ":upper16:"; + case VK_ARM_LO16: return ":lower16:"; } } Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp?rev=103576&r1=103575&r2=103576&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp Wed May 12 00:16:34 2010 @@ -1375,13 +1375,32 @@ case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. // This is a hack that lowers as a two instruction sequence. unsigned DstReg = MI->getOperand(0).getReg(); - unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); - + const MachineOperand &MO = MI->getOperand(1); + MCOperand V1, V2; + if (MO.isImm()) { + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + V1 = MCOperand::CreateImm(ImmVal & 65535); + V2 = MCOperand::CreateImm(ImmVal >> 16); + } else if (MO.isGlobal()) { + MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); + const MCSymbolRefExpr *SymRef1 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); + const MCSymbolRefExpr *SymRef2 = + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); + V1 = MCOperand::CreateExpr(SymRef1); + V2 = MCOperand::CreateExpr(SymRef2); + } else { + MI->dump(); + llvm_unreachable("cannot handle this operand"); + } + { MCInst TmpInst; TmpInst.setOpcode(ARM::MOVi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm) + TmpInst.addOperand(V1); // lower16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); @@ -1395,7 +1414,7 @@ TmpInst.setOpcode(ARM::MOVTi16); TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg - TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm) + TmpInst.addOperand(V2); // upper16(imm) // Predicate. TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); Modified: llvm/trunk/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp?rev=103576&r1=103575&r2=103576&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp Wed May 12 00:16:34 2010 @@ -217,7 +217,8 @@ ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0) + llvm_unreachable("Unsupported modifier"); assert(Op.isExpr() && "unknown operand kind in printOperand"); O << *Op.getExpr(); } From sabre at nondot.org Wed May 12 00:32:24 2010 From: sabre at nondot.org (Chris Lattner) Date: Tue, 11 May 2010 22:32:24 -0700 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <201005111828.49367.dag@cray.com> References: <20100510205106.4F283312800A@llvm.org> <24B717FA-C973-47DE-9818-44E709A83EDC@nondot.org> <49AE2548-9ACC-4C2E-BA68-1C0E80D5575F@apple.com> <201005111828.49367.dag@cray.com> Message-ID: On May 11, 2010, at 4:28 PM, David Greene wrote: > On Tuesday 11 May 2010 00:55:05 Daniel Dunbar wrote: > >>> Daniel, is this a lit bug? >> >> This is a 'lit' unimplementism. I didn't implement a full Tcl parser, just >> enough for the parts we use. Since I plan to kill off the DejaGNU side at >> some point, I don't feel it is worth adding support for more Tcl-isms, but >> I can if people like. > > Somebody had better update the documentation if this doesn't get implemented. Ah, I didn't realize that there was a real lit "bug" here. I'm sorry for accusing you of not testing your patch! Thank you for pointing this out. If you're not using lit, I strongly recommend it, it supports parallel testing, which makes regression tests run much faster on a multicore machine. To use it, just do: cd llvm/test; make check-lit -Chris From anton at korobeynikov.info Wed May 12 02:05:20 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Wed, 12 May 2010 11:05:20 +0400 Subject: [llvm-commits] COFF linkonce update In-Reply-To: References: Message-ID: Hello, Nathan > I would like to commit this patch as part of my ongoing work on improving > COFF support in the MC library. > This patch changes where the .linkonce attribute is emitted from the symbol > to the section the symbol was assigned to. Looks good for me as well. + //case IMAGE_COMDAT_SELECT_LARGEST: + // OS << "\t.linkonce largest\n"; + // break; Please move this case a fallthrough to default, with a proper comment. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From anton at korobeynikov.info Wed May 12 02:05:39 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Wed, 12 May 2010 11:05:39 +0400 Subject: [llvm-commits] COFF linkonce update In-Reply-To: References: Message-ID: > Please move this case a fallthrough to default, with a proper comment. s/move/make -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From baldrick at free.fr Wed May 12 02:11:33 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 07:11:33 -0000 Subject: [llvm-commits] [llvm] r103586 - /llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Message-ID: <20100512071133.D53C4312800A@llvm.org> Author: baldrick Date: Wed May 12 02:11:33 2010 New Revision: 103586 URL: http://llvm.org/viewvc/llvm-project?rev=103586&view=rev Log: Remove unused variable. Tweak a comment while there. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp?rev=103586&r1=103585&r2=103586&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinter.cpp Wed May 12 02:11:33 2010 @@ -205,10 +205,10 @@ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); - } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) { + } else if (MAI->getLinkOnceDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - //NOTE: linkonce is handling by the section the symbol was assigned to + //NOTE: linkonce is handled by the section the symbol was assigned to. } else { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); From nicholas at mxc.ca Wed May 12 02:23:36 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 12 May 2010 00:23:36 -0700 Subject: [llvm-commits] inliner holds onto ValueMap Message-ID: <4BEA5778.7050603@mxc.ca> I've encountered a problem while working on MergeFunctions, a ModulePass. The Inliner caches its results in a ValueMap, and that value map is a member of the InlineCostAnalyzer which is a member of SimpleInliner, aka. the -inline pass. When my pass tries to RAUW a function it triggers an abort in the ValueMap, because RAUW'ing a value map key is illegal. Here's the pass manager structure: Pass Arguments: -basiccg -inline -mergefunc -preverify -domtree -verify Target Data Layout ModulePass Manager Basic CallGraph Construction Call Graph SCC Pass Manager Function Integration/Inlining Merge Functions FunctionPass Manager Preliminary module verification Dominator Tree Construction Module Verifier so I tried the obvious fix, which is to add a clear() method to InlineCostAnalyzer that clears the cache, patch attached. It fixes my testcase! That's not why I'm emailing. My change breaks test/Transforms/Inline/externally_available.ll which contains: define available_externally i32 @test_function() { ret i32 4 } define i32 @result() { %A = call i32 @test_function() %B = add i32 %A, 1 ret i32 %B } and expects that after opt -inline -constprop, @test_function will have been deleted from the IR. Is my approach for Inliner cleanup correct? Is there a guarantee that doFinalization() on an CallGraphSCCPass will be called before the next ModulePass starts? What about when the CGSCC iterates, will it wipe the cache in between (we don't want it to). Secondly, why does my patch modify the behaviour of this test? Why should inlining delete that function at all? Nick -------------- next part -------------- A non-text attachment was scrubbed... Name: inliner-free-valuemap.patch Type: text/x-patch Size: 1631 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100512/8e0dd331/attachment.bin From baldrick at free.fr Wed May 12 02:31:59 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 07:31:59 -0000 Subject: [llvm-commits] [dragonegg] r103589 - in /dragonegg/trunk: llvm-abi.h llvm-backend.cpp llvm-convert.cpp llvm-debug.cpp llvm-types.cpp x86/llvm-target.cpp Message-ID: <20100512073159.AA346312800A@llvm.org> Author: baldrick Date: Wed May 12 02:31:59 2010 New Revision: 103589 URL: http://llvm.org/viewvc/llvm-project?rev=103589&view=rev Log: Now that VISIBILITY_HIDDEN is no longer defined by LLVM, remove hacks to avoid collisions with gcc's VISIBILITY_HIDDEN. Use the new LLVM_GLOBAL_VISIBILITY rather than using gcc's visibility pragma directly. Modified: dragonegg/trunk/llvm-abi.h dragonegg/trunk/llvm-backend.cpp dragonegg/trunk/llvm-convert.cpp dragonegg/trunk/llvm-debug.cpp dragonegg/trunk/llvm-types.cpp dragonegg/trunk/x86/llvm-target.cpp Modified: dragonegg/trunk/llvm-abi.h URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-abi.h?rev=103589&r1=103588&r2=103589&view=diff ============================================================================== --- dragonegg/trunk/llvm-abi.h (original) +++ dragonegg/trunk/llvm-abi.h Wed May 12 02:31:59 2010 @@ -41,8 +41,6 @@ #include // GCC headers -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" Modified: dragonegg/trunk/llvm-backend.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-backend.cpp?rev=103589&r1=103588&r2=103589&view=diff ============================================================================== --- dragonegg/trunk/llvm-backend.cpp (original) +++ dragonegg/trunk/llvm-backend.cpp Wed May 12 02:31:59 2010 @@ -55,8 +55,6 @@ #include // GCC headers -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" @@ -1563,10 +1561,9 @@ //===----------------------------------------------------------------------===// // This plugin's code is licensed under the GPLv2 or later. The LLVM libraries -// use the GPL compatible University of Illinois/NCSA Open Source License. -#pragma GCC visibility push(default) -int plugin_is_GPL_compatible; // This plugin is GPL compatible. -#pragma GCC visibility pop +// use the GPL compatible University of Illinois/NCSA Open Source License. The +// plugin is GPL compatible. +int plugin_is_GPL_compatible LLVM_GLOBAL_VISIBILITY; /// llvm_start_unit - Perform late initialization. This is called by GCC just @@ -2359,9 +2356,8 @@ /// plugin_init - Plugin initialization routine, called by GCC. This is the /// first code executed in the plugin (except for constructors). Configure /// the plugin and setup GCC, taking over optimization and code generation. -#pragma GCC visibility push(default) -int plugin_init(struct plugin_name_args *plugin_info, - struct plugin_gcc_version *version) { +int LLVM_GLOBAL_VISIBILITY plugin_init(struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) { const char *plugin_name = plugin_info->base_name; struct register_pass_info pass_info; @@ -2652,4 +2648,3 @@ return 0; } -#pragma GCC visibility pop Modified: dragonegg/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-convert.cpp?rev=103589&r1=103588&r2=103589&view=diff ============================================================================== --- dragonegg/trunk/llvm-convert.cpp (original) +++ dragonegg/trunk/llvm-convert.cpp Wed May 12 02:31:59 2010 @@ -51,8 +51,6 @@ #include // GCC headers -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" Modified: dragonegg/trunk/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-debug.cpp?rev=103589&r1=103588&r2=103589&view=diff ============================================================================== --- dragonegg/trunk/llvm-debug.cpp (original) +++ dragonegg/trunk/llvm-debug.cpp Wed May 12 02:31:59 2010 @@ -40,8 +40,6 @@ #include // GCC headers -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" Modified: dragonegg/trunk/llvm-types.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-types.cpp?rev=103589&r1=103588&r2=103589&view=diff ============================================================================== --- dragonegg/trunk/llvm-types.cpp (original) +++ dragonegg/trunk/llvm-types.cpp Wed May 12 02:31:59 2010 @@ -42,8 +42,6 @@ #include // GCC headers -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" Modified: dragonegg/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/x86/llvm-target.cpp?rev=103589&r1=103588&r2=103589&view=diff ============================================================================== --- dragonegg/trunk/x86/llvm-target.cpp (original) +++ dragonegg/trunk/x86/llvm-target.cpp Wed May 12 02:31:59 2010 @@ -34,8 +34,6 @@ #include // GCC headers -#undef VISIBILITY_HIDDEN - extern "C" { #include "config.h" #include "system.h" From blunted2night at gmail.com Wed May 12 02:36:03 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Wed, 12 May 2010 07:36:03 -0000 Subject: [llvm-commits] [llvm] r103590 - /llvm/trunk/lib/MC/MCSectionCOFF.cpp Message-ID: <20100512073603.7862E312800A@llvm.org> Author: njeffords Date: Wed May 12 02:36:03 2010 New Revision: 103590 URL: http://llvm.org/viewvc/llvm-project?rev=103590&view=rev Log: stylistic change to MCSectionCOFF::PrintSwitchToSection COMDAT handling Made a stylistic changed to the code/comments related to the unsupported COMDAT selection type IMAGE_COMDAT_SELECT_LARGEST based on from Anton Korobeynikov. Modified: llvm/trunk/lib/MC/MCSectionCOFF.cpp Modified: llvm/trunk/lib/MC/MCSectionCOFF.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCSectionCOFF.cpp?rev=103590&r1=103589&r2=103590&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCSectionCOFF.cpp (original) +++ llvm/trunk/lib/MC/MCSectionCOFF.cpp Wed May 12 02:36:03 2010 @@ -50,9 +50,6 @@ if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) { switch (Selection) { - default: - assert (0 && "unsupported COFF selection type"); - break; case IMAGE_COMDAT_SELECT_NODUPLICATES: OS << "\t.linkonce one_only\n"; break; @@ -65,12 +62,15 @@ case IMAGE_COMDAT_SELECT_EXACT_MATCH: OS << "\t.linkonce same_contents\n"; break; - // ".linkonce largest" is not documented as being an option. - // It seems odd that a link attribute designed essentially for PE/COFF - // wouldn't support all the options (at least as of binutils 2.20) - //case IMAGE_COMDAT_SELECT_LARGEST: + //NOTE: as of binutils 2.20, there is no way to specifiy select largest + // with the .linkonce directive. For now, we treat it as an invalid + // comdat selection value. + case IMAGE_COMDAT_SELECT_LARGEST: // OS << "\t.linkonce largest\n"; // break; + default: + assert (0 && "unsupported COFF selection type"); + break; } } } From blunted2night at gmail.com Wed May 12 02:38:22 2010 From: blunted2night at gmail.com (Nathan Jeffords) Date: Wed, 12 May 2010 00:38:22 -0700 Subject: [llvm-commits] COFF linkonce update In-Reply-To: References: Message-ID: On Wed, May 12, 2010 at 12:05 AM, Anton Korobeynikov < anton at korobeynikov.info> wrote: > > Please move this case a fallthrough to default, with a proper comment. > s/move/make > > -- > With best regards, Anton Korobeynikov > Faculty of Mathematics and Mechanics, Saint Petersburg State University > done in r103590 -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100512/03a2aac4/attachment.html From baldrick at free.fr Wed May 12 07:18:01 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 12:18:01 -0000 Subject: [llvm-commits] [dragonegg] r103591 - /dragonegg/trunk/x86/llvm-target.cpp Message-ID: <20100512121801.3FEF6312800A@llvm.org> Author: baldrick Date: Wed May 12 07:18:01 2010 New Revision: 103591 URL: http://llvm.org/viewvc/llvm-project?rev=103591&view=rev Log: Add support for more vector builtins, such as ADDPS256, when it looks like the existing code can handle it. This was done by "pattern matching", as I don't know diddly about vector operations. Modified: dragonegg/trunk/x86/llvm-target.cpp Modified: dragonegg/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/x86/llvm-target.cpp?rev=103591&r1=103590&r2=103591&view=diff ============================================================================== --- dragonegg/trunk/x86/llvm-target.cpp (original) +++ dragonegg/trunk/x86/llvm-target.cpp Wed May 12 07:18:01 2010 @@ -82,9 +82,9 @@ static const HandlerEntry Handlers[] = { // Unsupported builtins are commented out. {"__builtin_ia32_addpd", &&IX86_BUILTIN_ADDPD}, - //{"__builtin_ia32_addpd256", &&IX86_BUILTIN_ADDPD256}, + {"__builtin_ia32_addpd256", &&IX86_BUILTIN_ADDPD256}, {"__builtin_ia32_addps", &&IX86_BUILTIN_ADDPS}, - //{"__builtin_ia32_addps256", &&IX86_BUILTIN_ADDPS256}, + {"__builtin_ia32_addps256", &&IX86_BUILTIN_ADDPS256}, //{"__builtin_ia32_addsd", &&IX86_BUILTIN_ADDSD}, //{"__builtin_ia32_addss", &&IX86_BUILTIN_ADDSS}, //{"__builtin_ia32_addsubpd", &&IX86_BUILTIN_ADDSUBPD}, @@ -98,13 +98,13 @@ //{"__builtin_ia32_aesimc128", &&IX86_BUILTIN_AESIMC128}, //{"__builtin_ia32_aeskeygenassist128", &&IX86_BUILTIN_AESKEYGENASSIST128}, {"__builtin_ia32_andnpd", &&IX86_BUILTIN_ANDNPD}, - //{"__builtin_ia32_andnpd256", &&IX86_BUILTIN_ANDNPD256}, + {"__builtin_ia32_andnpd256", &&IX86_BUILTIN_ANDNPD256}, {"__builtin_ia32_andnps", &&IX86_BUILTIN_ANDNPS}, - //{"__builtin_ia32_andnps256", &&IX86_BUILTIN_ANDNPS256}, + {"__builtin_ia32_andnps256", &&IX86_BUILTIN_ANDNPS256}, {"__builtin_ia32_andpd", &&IX86_BUILTIN_ANDPD}, - //{"__builtin_ia32_andpd256", &&IX86_BUILTIN_ANDPD256}, + {"__builtin_ia32_andpd256", &&IX86_BUILTIN_ANDPD256}, {"__builtin_ia32_andps", &&IX86_BUILTIN_ANDPS}, - //{"__builtin_ia32_andps256", &&IX86_BUILTIN_ANDPS256}, + {"__builtin_ia32_andps256", &&IX86_BUILTIN_ANDPS256}, //{"__builtin_ia32_blendpd", &&IX86_BUILTIN_BLENDPD}, //{"__builtin_ia32_blendpd256", &&IX86_BUILTIN_BLENDPD256}, //{"__builtin_ia32_blendps", &&IX86_BUILTIN_BLENDPS}, @@ -220,9 +220,9 @@ //{"__builtin_ia32_cvttss2si64", &&IX86_BUILTIN_CVTTSS2SI64}, //{"__builtin_ia32_cvtudq2ps", &&IX86_BUILTIN_CVTUDQ2PS}, {"__builtin_ia32_divpd", &&IX86_BUILTIN_DIVPD}, - //{"__builtin_ia32_divpd256", &&IX86_BUILTIN_DIVPD256}, + {"__builtin_ia32_divpd256", &&IX86_BUILTIN_DIVPD256}, {"__builtin_ia32_divps", &&IX86_BUILTIN_DIVPS}, - //{"__builtin_ia32_divps256", &&IX86_BUILTIN_DIVPS256}, + {"__builtin_ia32_divps256", &&IX86_BUILTIN_DIVPS256}, //{"__builtin_ia32_divsd", &&IX86_BUILTIN_DIVSD}, //{"__builtin_ia32_divss", &&IX86_BUILTIN_DIVSS}, //{"__builtin_ia32_dppd", &&IX86_BUILTIN_DPPD}, @@ -308,16 +308,16 @@ {"__builtin_ia32_movss", &&IX86_BUILTIN_MOVSS}, //{"__builtin_ia32_mpsadbw128", &&IX86_BUILTIN_MPSADBW128}, {"__builtin_ia32_mulpd", &&IX86_BUILTIN_MULPD}, - //{"__builtin_ia32_mulpd256", &&IX86_BUILTIN_MULPD256}, + {"__builtin_ia32_mulpd256", &&IX86_BUILTIN_MULPD256}, {"__builtin_ia32_mulps", &&IX86_BUILTIN_MULPS}, - //{"__builtin_ia32_mulps256", &&IX86_BUILTIN_MULPS256}, + {"__builtin_ia32_mulps256", &&IX86_BUILTIN_MULPS256}, //{"__builtin_ia32_mulsd", &&IX86_BUILTIN_MULSD}, //{"__builtin_ia32_mulss", &&IX86_BUILTIN_MULSS}, //{"__builtin_ia32_mwait", &&IX86_BUILTIN_MWAIT}, {"__builtin_ia32_orpd", &&IX86_BUILTIN_ORPD}, - //{"__builtin_ia32_orpd256", &&IX86_BUILTIN_ORPD256}, + {"__builtin_ia32_orpd256", &&IX86_BUILTIN_ORPD256}, {"__builtin_ia32_orps", &&IX86_BUILTIN_ORPS}, - //{"__builtin_ia32_orps256", &&IX86_BUILTIN_ORPS256}, + {"__builtin_ia32_orps256", &&IX86_BUILTIN_ORPS256}, //{"__builtin_ia32_pabsb", &&IX86_BUILTIN_PABSB}, //{"__builtin_ia32_pabsb128", &&IX86_BUILTIN_PABSB128}, //{"__builtin_ia32_pabsd", &&IX86_BUILTIN_PABSD}, @@ -611,9 +611,9 @@ {"__builtin_ia32_storeups", &&IX86_BUILTIN_STOREUPS}, //{"__builtin_ia32_storeups256", &&IX86_BUILTIN_STOREUPS256}, {"__builtin_ia32_subpd", &&IX86_BUILTIN_SUBPD}, - //{"__builtin_ia32_subpd256", &&IX86_BUILTIN_SUBPD256}, + {"__builtin_ia32_subpd256", &&IX86_BUILTIN_SUBPD256}, {"__builtin_ia32_subps", &&IX86_BUILTIN_SUBPS}, - //{"__builtin_ia32_subps256", &&IX86_BUILTIN_SUBPS256}, + {"__builtin_ia32_subps256", &&IX86_BUILTIN_SUBPS256}, //{"__builtin_ia32_subsd", &&IX86_BUILTIN_SUBSD}, //{"__builtin_ia32_subss", &&IX86_BUILTIN_SUBSS}, //{"__builtin_ia32_ucomieq", &&IX86_BUILTIN_UCOMIEQSS}, @@ -722,9 +722,9 @@ //{"__builtin_ia32_vtestzps256", &&IX86_BUILTIN_VTESTZPS256}, //{"__builtin_ia32_vzeroall", &&IX86_BUILTIN_VZEROALL}, {"__builtin_ia32_xorpd", &&IX86_BUILTIN_XORPD}, - //{"__builtin_ia32_xorpd256", &&IX86_BUILTIN_XORPD256}, + {"__builtin_ia32_xorpd256", &&IX86_BUILTIN_XORPD256}, {"__builtin_ia32_xorps", &&IX86_BUILTIN_XORPS}, - //{"__builtin_ia32_xorps256", &&IX86_BUILTIN_XORPS256}, + {"__builtin_ia32_xorps256", &&IX86_BUILTIN_XORPS256}, }; static std::vector FunctionCodeCache; @@ -754,7 +754,9 @@ unknown: return false; IX86_BUILTIN_ADDPS: + IX86_BUILTIN_ADDPS256: IX86_BUILTIN_ADDPD: + IX86_BUILTIN_ADDPD256: Result = Builder.CreateFAdd(Ops[0], Ops[1]); return true; IX86_BUILTIN_PADDB: @@ -768,7 +770,9 @@ Result = Builder.CreateAdd(Ops[0], Ops[1]); return true; IX86_BUILTIN_SUBPS: + IX86_BUILTIN_SUBPS256: IX86_BUILTIN_SUBPD: + IX86_BUILTIN_SUBPD256: Result = Builder.CreateFSub(Ops[0], Ops[1]); return true; IX86_BUILTIN_PSUBB: @@ -782,7 +786,9 @@ Result = Builder.CreateSub(Ops[0], Ops[1]); return true; IX86_BUILTIN_MULPS: + IX86_BUILTIN_MULPS256: IX86_BUILTIN_MULPD: + IX86_BUILTIN_MULPD256: Result = Builder.CreateFMul(Ops[0], Ops[1]); return true; IX86_BUILTIN_PMULLW: @@ -790,7 +796,9 @@ Result = Builder.CreateMul(Ops[0], Ops[1]); return true; IX86_BUILTIN_DIVPS: + IX86_BUILTIN_DIVPS256: IX86_BUILTIN_DIVPD: + IX86_BUILTIN_DIVPD256: Result = Builder.CreateFDiv(Ops[0], Ops[1]); return true; IX86_BUILTIN_PAND: @@ -811,28 +819,36 @@ Result = Builder.CreateXor(Ops[0], Ops[1]); return true; IX86_BUILTIN_ANDPS: + IX86_BUILTIN_ANDPS256: IX86_BUILTIN_ANDPD: + IX86_BUILTIN_ANDPD256: Ops[0] = BitCastToIntVector(Ops[0], Builder); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); Result = Builder.CreateAnd(Ops[0], Ops[1]); Result = Builder.CreateBitCast(Result, ResultType); return true; IX86_BUILTIN_ORPS: + IX86_BUILTIN_ORPS256: IX86_BUILTIN_ORPD: + IX86_BUILTIN_ORPD256: Ops[0] = BitCastToIntVector(Ops[0], Builder); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); Result = Builder.CreateOr(Ops[0], Ops[1]); Result = Builder.CreateBitCast(Result, ResultType); return true; IX86_BUILTIN_XORPS: + IX86_BUILTIN_XORPS256: IX86_BUILTIN_XORPD: + IX86_BUILTIN_XORPD256: Ops[0] = BitCastToIntVector(Ops[0], Builder); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); Result = Builder.CreateXor(Ops[0], Ops[1]); Result = Builder.CreateBitCast(Result, ResultType); return true; IX86_BUILTIN_ANDNPS: + IX86_BUILTIN_ANDNPS256: IX86_BUILTIN_ANDNPD: + IX86_BUILTIN_ANDNPD256: Ops[0] = BitCastToIntVector(Ops[0], Builder); Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType()); Ops[0] = Builder.CreateNot(Ops[0]); From baldrick at free.fr Wed May 12 08:06:12 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 13:06:12 -0000 Subject: [llvm-commits] [dragonegg] r103592 - /dragonegg/trunk/x86/llvm-target.cpp Message-ID: <20100512130612.E63EA312800A@llvm.org> Author: baldrick Date: Wed May 12 08:06:12 2010 New Revision: 103592 URL: http://llvm.org/viewvc/llvm-project?rev=103592&view=rev Log: Port commit 99592 (echristo) from llvm-gcc: Lower IX86_BUILTIN_PMULLD128 in the x86 backend to the simple multiplication it is. Modified: dragonegg/trunk/x86/llvm-target.cpp Modified: dragonegg/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/x86/llvm-target.cpp?rev=103592&r1=103591&r2=103592&view=diff ============================================================================== --- dragonegg/trunk/x86/llvm-target.cpp (original) +++ dragonegg/trunk/x86/llvm-target.cpp Wed May 12 08:06:12 2010 @@ -467,7 +467,7 @@ //{"__builtin_ia32_pmulhuw128", &&IX86_BUILTIN_PMULHUW128}, //{"__builtin_ia32_pmulhw", &&IX86_BUILTIN_PMULHW}, //{"__builtin_ia32_pmulhw128", &&IX86_BUILTIN_PMULHW128}, - //{"__builtin_ia32_pmulld128", &&IX86_BUILTIN_PMULLD128}, + {"__builtin_ia32_pmulld128", &&IX86_BUILTIN_PMULLD128}, {"__builtin_ia32_pmullw", &&IX86_BUILTIN_PMULLW}, {"__builtin_ia32_pmullw128", &&IX86_BUILTIN_PMULLW128}, //{"__builtin_ia32_pmuludq", &&IX86_BUILTIN_PMULUDQ}, @@ -793,6 +793,7 @@ return true; IX86_BUILTIN_PMULLW: IX86_BUILTIN_PMULLW128: + IX86_BUILTIN_PMULLD128: Result = Builder.CreateMul(Ops[0], Ops[1]); return true; IX86_BUILTIN_DIVPS: From baldrick at free.fr Wed May 12 08:38:44 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 13:38:44 -0000 Subject: [llvm-commits] [dragonegg] r103613 - /dragonegg/trunk/llvm-convert.cpp Message-ID: <20100512133844.7827C312800A@llvm.org> Author: baldrick Date: Wed May 12 08:38:44 2010 New Revision: 103613 URL: http://llvm.org/viewvc/llvm-project?rev=103613&view=rev Log: Port commit 99593 (echristo) from llvm-gcc: Try to lower in the backend first, it may know what to do better for the target. Modified: dragonegg/trunk/llvm-convert.cpp Modified: dragonegg/trunk/llvm-convert.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/llvm-convert.cpp?rev=103613&r1=103612&r2=103613&view=diff ============================================================================== --- dragonegg/trunk/llvm-convert.cpp (original) +++ dragonegg/trunk/llvm-convert.cpp Wed May 12 08:38:44 2010 @@ -3428,8 +3428,7 @@ // ... Builtin Function Expansion ... //===----------------------------------------------------------------------===// -/// EmitFrontendExpandedBuiltinCall - For MD builtins that do not have a -/// directly corresponding LLVM intrinsic, we allow the target to do some amount +/// EmitFrontendExpandedBuiltinCall - We allow the target to do some amount /// of lowering. This allows us to avoid having intrinsics for operations that /// directly correspond to LLVM constructs. /// @@ -3581,15 +3580,17 @@ #ifdef LLVM_TARGET_INTRINSIC_PREFIX TargetPrefix = LLVM_TARGET_INTRINSIC_PREFIX; #endif + // If the backend has some special code to lower, go ahead and try to + // do that first. + if (EmitFrontendExpandedBuiltinCall(stmt, fndecl, DestLoc, Result)) + return true; + // If this builtin directly corresponds to an LLVM intrinsic, get the // IntrinsicID now. const char *BuiltinName = IDENTIFIER_POINTER(DECL_NAME(fndecl)); Intrinsic::ID IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(TargetPrefix, BuiltinName); if (IntrinsicID == Intrinsic::not_intrinsic) { - if (EmitFrontendExpandedBuiltinCall(stmt, fndecl, DestLoc, Result)) - return true; - error_at(gimple_location(stmt), "unsupported target builtin %<%s%> used", BuiltinName); const Type *ResTy = ConvertType(gimple_call_return_type(stmt)); From dag at cray.com Wed May 12 10:01:23 2010 From: dag at cray.com (David Greene) Date: Wed, 12 May 2010 10:01:23 -0500 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: References: <20100510205106.4F283312800A@llvm.org> <201005111828.49367.dag@cray.com> Message-ID: <201005121001.24079.dag@cray.com> On Wednesday 12 May 2010 00:32:24 Chris Lattner wrote: > If you're not using lit, I strongly recommend it, it supports parallel > testing, which makes regression tests run much faster on a multicore > machine. To use it, just do: > > cd llvm/test; make check-lit Since the buildbots do this, perhaps "make check" should do the same. I think it's worth changing "make check" to use lit and "make check-deja" to use the old framework (which should be removed at some point). What do you think? -Dave From daniel at zuster.org Wed May 12 10:42:59 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 15:42:59 -0000 Subject: [llvm-commits] [llvm] r103616 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h lib/MC/MCAssembler.cpp Message-ID: <20100512154259.78F2B312800A@llvm.org> Author: ddunbar Date: Wed May 12 10:42:59 2010 New Revision: 103616 URL: http://llvm.org/viewvc/llvm-project?rev=103616&view=rev Log: MC: Track section layout order explicitly, and use to simplify. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103616&r1=103615&r2=103616&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Wed May 12 10:42:59 2010 @@ -10,6 +10,8 @@ #ifndef LLVM_MC_MCASMLAYOUT_H #define LLVM_MC_MCASMLAYOUT_H +#include "llvm/ADT/SmallVector.h" + namespace llvm { class MCAssembler; class MCFragment; @@ -24,11 +26,18 @@ /// efficiently compute the exact addresses of any symbol in the assembly file, /// even during the relaxation process. class MCAsmLayout { +public: + typedef llvm::SmallVectorImpl::const_iterator const_iterator; + typedef llvm::SmallVectorImpl::iterator iterator; + private: MCAssembler &Assembler; + /// List of sections in layout order. + llvm::SmallVector SectionOrder; + public: - MCAsmLayout(MCAssembler &_Assembler) : Assembler(_Assembler) {} + MCAsmLayout(MCAssembler &_Assembler); /// Get the assembler object this is a layout for. MCAssembler &getAssembler() const { return Assembler; } @@ -38,6 +47,16 @@ /// the delta from the old size. void UpdateForSlide(MCFragment *F, int SlideAmount); + /// @name Section Access (in layout order) + /// @{ + + iterator begin() { return SectionOrder.begin(); } + const_iterator begin() const { return SectionOrder.begin(); } + + iterator end() {return SectionOrder.end();} + const_iterator end() const {return SectionOrder.end();} + + /// @} /// @name Fragment Layout Data /// @{ Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103616&r1=103615&r2=103616&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 10:42:59 2010 @@ -47,6 +47,16 @@ /* *** */ +MCAsmLayout::MCAsmLayout(MCAssembler &Asm) : Assembler(Asm) { + // Compute the section layout order. Virtual sections must go last. + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (!Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); + for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) + if (Asm.getBackend().isVirtualSection(it->getSection())) + SectionOrder.push_back(&*it); +} + void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { // We shouldn't have to do anything special to support negative slides, and it // is a perfectly valid thing to do as long as other parts of the system can @@ -59,24 +69,10 @@ // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter. // Layout the concrete sections and fragments. - MCAssembler &Asm = getAssembler(); uint64_t Address = 0; - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - // Skip virtual sections. - if (Asm.getBackend().isVirtualSection(it->getSection())) - continue; - - // Layout the section fragments and its size. - Address = Asm.LayoutSection(*it, *this, Address); - } - - // Layout the virtual sections. - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - if (!Asm.getBackend().isVirtualSection(it->getSection())) - continue; - + for (iterator it = begin(), ie = end(); it != ie; ++it) { // Layout the section fragments and its size. - Address = Asm.LayoutSection(*it, *this, Address); + Address = getAssembler().LayoutSection(**it, *this, Address); } } @@ -711,22 +707,10 @@ // Layout the concrete sections and fragments. uint64_t Address = 0; - for (iterator it = begin(), ie = end(); it != ie; ++it) { - // Skip virtual sections. - if (getBackend().isVirtualSection(it->getSection())) - continue; - - // Layout the section fragments and its size. - Address = LayoutSection(*it, Layout, Address); - } - - // Layout the virtual sections. - for (iterator it = begin(), ie = end(); it != ie; ++it) { - if (!getBackend().isVirtualSection(it->getSection())) - continue; - + for (MCAsmLayout::iterator it = Layout.begin(), + ie = Layout.end(); it != ie; ++it) { // Layout the section fragments and its size. - Address = LayoutSection(*it, Layout, Address); + Address = LayoutSection(**it, Layout, Address); } // Scan for fragments that need relaxation. From baldrick at free.fr Wed May 12 11:01:19 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 16:01:19 -0000 Subject: [llvm-commits] [dragonegg] r103617 - /dragonegg/trunk/x86/llvm-target.cpp Message-ID: <20100512160119.306C5312800A@llvm.org> Author: baldrick Date: Wed May 12 11:01:19 2010 New Revision: 103617 URL: http://llvm.org/viewvc/llvm-project?rev=103617&view=rev Log: Fix thinko: if the name is not found, then that doesn't mean that the end iterator is returned. Modified: dragonegg/trunk/x86/llvm-target.cpp Modified: dragonegg/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/x86/llvm-target.cpp?rev=103617&r1=103616&r2=103617&view=diff ============================================================================== --- dragonegg/trunk/x86/llvm-target.cpp (original) +++ dragonegg/trunk/x86/llvm-target.cpp Wed May 12 11:01:19 2010 @@ -745,7 +745,8 @@ #endif HandlerEntry ToFind = {IDENTIFIER_POINTER(DECL_NAME(fndecl)), NULL}; const HandlerEntry *E = std::lower_bound(Handlers, Handlers + N, ToFind, LT); - Handler = E == Handlers + N ? &&unknown : E->Handler; + Handler = (E == Handlers + N) || strcmp(E->Name, ToFind.Name) ? + &&unknown : E->Handler; } bool flip = false; From clattner at apple.com Wed May 12 12:10:12 2010 From: clattner at apple.com (Chris Lattner) Date: Wed, 12 May 2010 10:10:12 -0700 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: <201005121001.24079.dag@cray.com> References: <20100510205106.4F283312800A@llvm.org> <201005111828.49367.dag@cray.com> <201005121001.24079.dag@cray.com> Message-ID: On May 12, 2010, at 8:01 AM, David Greene wrote: > On Wednesday 12 May 2010 00:32:24 Chris Lattner wrote: > >> If you're not using lit, I strongly recommend it, it supports parallel >> testing, which makes regression tests run much faster on a multicore >> machine. To use it, just do: >> >> cd llvm/test; make check-lit > > Since the buildbots do this, perhaps "make check" should do the same. > I think it's worth changing "make check" to use lit and "make check-deja" > to use the old framework (which should be removed at some point). > > What do you think? I'd much rather just kill off dejagnu completely, but changing the default to be lit and making dejagnu available under 'check-dj' or something would be a good second bet. -Chris From baldrick at free.fr Wed May 12 12:10:37 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 17:10:37 -0000 Subject: [llvm-commits] [dragonegg] r103622 - /dragonegg/trunk/x86/llvm-target.cpp Message-ID: <20100512171037.2FF01312800A@llvm.org> Author: baldrick Date: Wed May 12 12:10:37 2010 New Revision: 103622 URL: http://llvm.org/viewvc/llvm-project?rev=103622&view=rev Log: TargetIntrinsicLower is only supposed to set Result if there is a value to be returned, so it is wrong to set it to a StoreInst for example. Fix a bunch of mistakes of this kind. Modified: dragonegg/trunk/x86/llvm-target.cpp Modified: dragonegg/trunk/x86/llvm-target.cpp URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/x86/llvm-target.cpp?rev=103622&r1=103621&r2=103622&view=diff ============================================================================== --- dragonegg/trunk/x86/llvm-target.cpp (original) +++ dragonegg/trunk/x86/llvm-target.cpp Wed May 12 12:10:37 2010 @@ -1036,7 +1036,6 @@ Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); - Result = SI; return true; } IX86_BUILTIN_STOREUPD: { @@ -1045,7 +1044,6 @@ Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); - Result = SI; return true; } IX86_BUILTIN_STOREDQU: { @@ -1054,7 +1052,6 @@ Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); - Result = SI; return true; } IX86_BUILTIN_LOADHPS: { @@ -1100,7 +1097,7 @@ Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 1); Ops[1] = Builder.CreateBitCast(Ops[1], v2f64); Ops[1] = Builder.CreateExtractElement(Ops[1], Idx); - Result = Builder.CreateStore(Ops[1], Ops[0]); + Builder.CreateStore(Ops[1], Ops[0]); return true; } IX86_BUILTIN_STORELPS: { @@ -1110,7 +1107,7 @@ Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), 0); Ops[1] = Builder.CreateBitCast(Ops[1], v2f64); Ops[1] = Builder.CreateExtractElement(Ops[1], Idx); - Result = Builder.CreateStore(Ops[1], Ops[0]); + Builder.CreateStore(Ops[1], Ops[0]); return true; } IX86_BUILTIN_MOVSHDUP: @@ -1256,7 +1253,7 @@ Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); Builder.CreateStore(Ops[0], Ptr); Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); - Result = Builder.CreateCall(ldmxcsr, Ptr); + Builder.CreateCall(ldmxcsr, Ptr); return true; } IX86_BUILTIN_STMXCSR: { From daniel at zuster.org Wed May 12 12:12:44 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 17:12:44 -0000 Subject: [llvm-commits] [zorg] r103623 - /zorg/trunk/zorg/buildbot/builders/ChrootSetup.py Message-ID: <20100512171244.BCF63312800A@llvm.org> Author: ddunbar Date: Wed May 12 12:12:44 2010 New Revision: 103623 URL: http://llvm.org/viewvc/llvm-project?rev=103623&view=rev Log: buildbot/ChrootSetup: Add support for images which contain packages to install. Modified: zorg/trunk/zorg/buildbot/builders/ChrootSetup.py Modified: zorg/trunk/zorg/buildbot/builders/ChrootSetup.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/zorg/buildbot/builders/ChrootSetup.py?rev=103623&r1=103622&r2=103623&view=diff ============================================================================== --- zorg/trunk/zorg/buildbot/builders/ChrootSetup.py (original) +++ zorg/trunk/zorg/buildbot/builders/ChrootSetup.py Wed May 12 12:12:44 2010 @@ -33,7 +33,14 @@ workdir=build_root_name)) # For each image... - for i,image in enumerate(build_root_images): + for i,image_info in enumerate(build_root_images): + # If this image is (image, package name) then assume this is a dmg with + # a package inside, which we should install. + if isinstance(image_info, tuple): + image,package_to_install = image_info + else: + image,package_to_install = image_info,None + # Setup the build root we will build projects in. f.addStep(ShellCommand( name="attach.buildroot", @@ -60,17 +67,26 @@ haltOnFailure=True, workdir="mounts")) - # Restore the build root. - cmd = ["sudo", "rsync", "-arv"] - if i == 0: - cmd.append("--delete") - cmd.extend([WithProperties("%%(%s)s/" % mount_point_property), "./"]) + # Check whether we are install the package, or restoring the disk + # directly. + if package_to_install: + cmd = ["sudo", "installer", "-verboseR", "-pkg", + WithProperties("%%(%s)s/%s" % (mount_point_property, + package_to_install)), + "-target", + WithProperties("%%(builddir)s/%s" % build_root_name)] + else: + # Restore the build root. + cmd = ["sudo", "rsync", "-arv"] + if i == 0: + cmd.append("--delete") + cmd.extend(["--exclude", "/dev"]) + cmd.extend([WithProperties("%%(%s)s/" % mount_point_property), + "./"]) f.addStep(ShellCommand( name="init.buildroot.%d" % i, command=cmd, - warnOnFailure=True, - flunkOnFailure=False, - haltOnFailure=False, + haltOnFailure=True, description="init build root", workdir=build_root_name)) From daniel_dunbar at apple.com Wed May 12 12:22:30 2010 From: daniel_dunbar at apple.com (Daniel Dunbar) Date: Wed, 12 May 2010 10:22:30 -0700 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: References: <20100510205106.4F283312800A@llvm.org> <201005111828.49367.dag@cray.com> <201005121001.24079.dag@cray.com> Message-ID: On May 12, 2010, at 10:10 AM, Chris Lattner wrote: > > On May 12, 2010, at 8:01 AM, David Greene wrote: > >> On Wednesday 12 May 2010 00:32:24 Chris Lattner wrote: >> >>> If you're not using lit, I strongly recommend it, it supports parallel >>> testing, which makes regression tests run much faster on a multicore >>> machine. To use it, just do: >>> >>> cd llvm/test; make check-lit >> >> Since the buildbots do this, perhaps "make check" should do the same. >> I think it's worth changing "make check" to use lit and "make check-deja" >> to use the old framework (which should be removed at some point). >> >> What do you think? > > I'd much rather just kill off dejagnu completely, but changing the default to be lit and making dejagnu available under 'check-dj' or something would be a good second bet. All on my list, and/or in bugzilla. Patches welcome, too. :) - Daniel > > -Chris From daniel at zuster.org Wed May 12 12:56:42 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 17:56:42 -0000 Subject: [llvm-commits] [llvm] r103625 - /llvm/trunk/utils/lit/lit/lit.py Message-ID: <20100512175642.5D397312800A@llvm.org> Author: ddunbar Date: Wed May 12 12:56:42 2010 New Revision: 103625 URL: http://llvm.org/viewvc/llvm-project?rev=103625&view=rev Log: lit: Add support for 'lit ... @foo', which reads a list of tests to run from foo. Modified: llvm/trunk/utils/lit/lit/lit.py Modified: llvm/trunk/utils/lit/lit/lit.py URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/lit/lit.py?rev=103625&r1=103624&r2=103625&view=diff ============================================================================== --- llvm/trunk/utils/lit/lit/lit.py (original) +++ llvm/trunk/utils/lit/lit/lit.py Wed May 12 12:56:42 2010 @@ -490,11 +490,27 @@ isWindows = (platform.system()=='Windows'), params = userParams) + # Expand '@...' form in inputs. + actual_inputs = [] + for input in inputs: + if os.path.exists(input) or not input.startswith('@'): + actual_inputs.append(input) + else: + f = open(input[1:]) + try: + for ln in f: + ln = ln.strip() + if ln: + actual_inputs.append(ln) + finally: + f.close() + + # Load the tests from the inputs. tests = [] testSuiteCache = {} localConfigCache = {} - for input in inputs: + for input in actual_inputs: prev = len(tests) tests.extend(getTests(input, litConfig, testSuiteCache, localConfigCache)[1]) From daniel at zuster.org Wed May 12 12:56:44 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 17:56:44 -0000 Subject: [llvm-commits] [llvm] r103626 - /llvm/trunk/utils/lit/lit/TestFormats.py Message-ID: <20100512175644.EFCD33128018@llvm.org> Author: ddunbar Date: Wed May 12 12:56:44 2010 New Revision: 103626 URL: http://llvm.org/viewvc/llvm-project?rev=103626&view=rev Log: lit: Fix OneCommandPerFileTest format when tests are specified directly. Modified: llvm/trunk/utils/lit/lit/TestFormats.py Modified: llvm/trunk/utils/lit/lit/TestFormats.py URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/lit/TestFormats.py?rev=103626&r1=103625&r2=103626&view=diff ============================================================================== --- llvm/trunk/utils/lit/lit/TestFormats.py (original) +++ llvm/trunk/utils/lit/lit/TestFormats.py Wed May 12 12:56:44 2010 @@ -183,8 +183,10 @@ self.createTempInput(tmp, test) tmp.flush() cmd.append(tmp.name) - else: + elif hasattr(test, 'source_path'): cmd.append(test.source_path) + else: + cmd.append(test.getSourcePath()) out, err, exitCode = TestRunner.executeCommand(cmd) From daniel at zuster.org Wed May 12 12:56:48 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 17:56:48 -0000 Subject: [llvm-commits] [llvm] r103627 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100512175648.134833128026@llvm.org> Author: ddunbar Date: Wed May 12 12:56:47 2010 New Revision: 103627 URL: http://llvm.org/viewvc/llvm-project?rev=103627&view=rev Log: MC: Simplify LayoutSection to just take the index of the section to layout. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103627&r1=103626&r2=103627&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Wed May 12 12:56:47 2010 @@ -50,11 +50,12 @@ /// @name Section Access (in layout order) /// @{ - iterator begin() { return SectionOrder.begin(); } - const_iterator begin() const { return SectionOrder.begin(); } - - iterator end() {return SectionOrder.end();} - const_iterator end() const {return SectionOrder.end();} + llvm::SmallVectorImpl &getSectionOrder() { + return SectionOrder; + } + const llvm::SmallVectorImpl &getSectionOrder() const { + return SectionOrder; + } /// @} /// @name Fragment Layout Data Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103627&r1=103626&r2=103627&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 12:56:47 2010 @@ -668,14 +668,10 @@ bool FragmentNeedsRelaxation(const MCInstFragment *IF, const MCAsmLayout &Layout) const; - /// LayoutSection - Assign the section the given \arg StartAddress, and then - /// assign offsets and sizes to the fragments in the section \arg SD, and - /// update the section size. - /// - /// \return The address at the end of the section, for use in laying out the - /// succeeding section. - uint64_t LayoutSection(MCSectionData &SD, MCAsmLayout &Layout, - uint64_t StartAddress); + /// LayoutSection - Performs layout of the section referenced by the given + /// \arg SectionOrderIndex. The layout assumes that the previous section has + /// already been layed out correctly. + void LayoutSection(MCAsmLayout &Layout, unsigned SectionOrderIndex); /// LayoutOnce - Perform one layout iteration and return true if any offsets /// were adjusted. Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103627&r1=103626&r2=103627&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 12:56:47 2010 @@ -68,12 +68,9 @@ // // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter. - // Layout the concrete sections and fragments. - uint64_t Address = 0; - for (iterator it = begin(), ie = end(); it != ie; ++it) { - // Layout the section fragments and its size. - Address = getAssembler().LayoutSection(**it, *this, Address); - } + // Layout the sections in order. + for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) + getAssembler().LayoutSection(*this, i); } uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const { @@ -365,13 +362,20 @@ return IsResolved; } -uint64_t MCAssembler::LayoutSection(MCSectionData &SD, - MCAsmLayout &Layout, - uint64_t StartAddress) { +void MCAssembler::LayoutSection(MCAsmLayout &Layout, + unsigned SectionOrderIndex) { + MCSectionData &SD = *Layout.getSectionOrder()[SectionOrderIndex]; bool IsVirtual = getBackend().isVirtualSection(SD.getSection()); ++stats::SectionLayouts; + // Get the section start address. + uint64_t StartAddress = 0; + if (SectionOrderIndex) { + MCSectionData *Prev = Layout.getSectionOrder()[SectionOrderIndex - 1]; + StartAddress = Layout.getSectionAddress(Prev) + Layout.getSectionSize(Prev); + } + // Align this section if necessary by adding padding bytes to the previous // section. It is safe to adjust this out-of-band, because no symbol or // fragment is allowed to point past the end of the section at any time. @@ -469,8 +473,6 @@ Layout.setSectionFileSize(&SD, 0); else Layout.setSectionFileSize(&SD, Address - StartAddress); - - return Address; } /// WriteFragmentData - Write the \arg F data to the output file. @@ -705,13 +707,9 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) { ++stats::RelaxationSteps; - // Layout the concrete sections and fragments. - uint64_t Address = 0; - for (MCAsmLayout::iterator it = Layout.begin(), - ie = Layout.end(); it != ie; ++it) { - // Layout the section fragments and its size. - Address = LayoutSection(**it, Layout, Address); - } + // Layout the sections in order. + for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) + LayoutSection(Layout, i); // Scan for fragments that need relaxation. bool WasRelaxed = false; From dag at cray.com Wed May 12 13:02:16 2010 From: dag at cray.com (David Greene) Date: Wed, 12 May 2010 13:02:16 -0500 Subject: [llvm-commits] [llvm] r103427 - /llvm/trunk/test/Other/2010-05-60-Printer.ll In-Reply-To: References: <20100510205106.4F283312800A@llvm.org> <201005121001.24079.dag@cray.com> Message-ID: <201005121302.17180.dag@cray.com> On Wednesday 12 May 2010 12:10:12 Chris Lattner wrote: > > Since the buildbots do this, perhaps "make check" should do the same. > > I think it's worth changing "make check" to use lit and "make check-deja" > > to use the old framework (which should be removed at some point). > > > > What do you think? > > I'd much rather just kill off dejagnu completely, but changing the default > to be lit and making dejagnu available under 'check-dj' or something would > be a good second bet. You'll get no complaint from me if you kill off DejaGNU entirely. :) -Dave From gkistanova at gmail.com Wed May 12 13:01:59 2010 From: gkistanova at gmail.com (Galina Kistanova) Date: Wed, 12 May 2010 18:01:59 -0000 Subject: [llvm-commits] [zorg] r103628 - /zorg/trunk/buildbot/osuosl/master/config/builders.py Message-ID: <20100512180159.CDC3A312800A@llvm.org> Author: gkistanova Date: Wed May 12 13:01:59 2010 New Revision: 103628 URL: http://llvm.org/viewvc/llvm-project?rev=103628&view=rev Log: Added new buildbot builder for native build (x86_64-apple-darwin10) of cross llvm-gcc (for i686-pc-mingw32); updated llvm-gcc-x86_64-darwin10-cross-mingw32 build to use build-4-mingw32 script Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/buildbot/osuosl/master/config/builders.py?rev=103628&r1=103627&r2=103628&view=diff ============================================================================== --- zorg/trunk/buildbot/osuosl/master/config/builders.py (original) +++ zorg/trunk/buildbot/osuosl/master/config/builders.py Wed May 12 13:01:59 2010 @@ -253,6 +253,39 @@ extra_configure_args=['--disable-multilib']), 'category' : 'llvm-gcc.exp' }, + {'name' : "llvm-gcc-x86_64-darwin10-self-mingw32", + 'slavenames': [ "kistanova1" ], + 'builddir' : "llvm-gcc-x86_64-darwin10-self-mingw32", + 'factory' : ScriptedBuilder.getScriptedBuildFactory( + source_code = [SVN(name='svn-llvm', + mode='update', baseURL='http://llvm.org/svn/llvm-project/llvm/', + defaultBranch='trunk', + workdir="llvm.src"), + SVN(name='svn-llvm-gcc', + mode='update', baseURL='http://llvm.org/svn/llvm-project/llvm-gcc-4.2/', + defaultBranch='trunk', + workdir="llvm-gcc.src"),], + launcher = 'llvm-gcc.src/extras/buildbot-launcher', + build_script = 'llvm-gcc.src/extras/build-4-mingw32', + extra_args = [], + build_steps = [{'name' : 'configure_llvm', + 'description' : 'Configure LLVM', + 'haltOnFailure' : True }, + {'name' : 'make_llvm', + 'description' : 'Make LLVM', + 'extra_args' : ['-j8'], # Extra step-specific properties + 'haltOnFailure' : True }, + {'name' : 'configure_llvmgcc', + 'description' : 'Configure LLVM-GCC', + 'haltOnFailure' : True }, + {'name' : 'make_llvmgcc', + 'description' : 'Make LLVM-GCC', + 'haltOnFailure' : True }, + {'name' : 'install_llvmgcc', + 'description' : 'Install LLVM-GCC', + 'haltOnFailure' : True },]), + 'category' : 'llvm-gcc' }, + {'name' : "llvm-gcc-x86_64-darwin10-cross-mingw32", 'slavenames': [ "kistanova1" ], 'builddir' : "llvm-gcc-x86_64-darwin10-cross-mingw32", @@ -266,7 +299,7 @@ defaultBranch='trunk', workdir="llvm-gcc.src"),], launcher = 'llvm-gcc.src/extras/buildbot-launcher', - build_script = 'llvm-gcc.src/extras/build-4-mingw32', + build_script = 'llvm-gcc.src/extras/build-x-4-mingw32', extra_args = [], build_steps = [{'name' : 'configure_llvm', 'description' : 'Configure LLVM', @@ -275,6 +308,12 @@ 'description' : 'Make LLVM', 'extra_args' : ['-j8'], # Extra step-specific properties 'haltOnFailure' : True }, + {'name' : 'test_llvm', + 'description' : 'Test LLVM', + 'haltOnFailure' : False }, + {'name' : 'cross_tools', + 'description' : 'Copy cross-tools', + 'haltOnFailure' : True }, {'name' : 'configure_llvmgcc', 'description' : 'Configure LLVM-GCC', 'haltOnFailure' : True }, From dpatel at apple.com Wed May 12 13:30:15 2010 From: dpatel at apple.com (Devang Patel) Date: Wed, 12 May 2010 18:30:15 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103633 - /llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Message-ID: <20100512183015.9815A312800A@llvm.org> Author: dpatel Date: Wed May 12 13:30:15 2010 New Revision: 103633 URL: http://llvm.org/viewvc/llvm-project?rev=103633&view=rev Log: Handle pointer to data member directly. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=103633&r1=103632&r2=103633&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Wed May 12 13:30:15 2010 @@ -457,6 +457,8 @@ // Construct variable. DIScope VarScope = DIScope(cast(RegionStack.back())); DIType Ty = getOrCreateType(type); + if (!Ty && TREE_CODE(type) == OFFSET_TYPE) + Ty = createPointerType(TREE_TYPE(type)); if (DECL_ARTIFICIAL (decl)) Ty = DebugFactory.CreateArtificialType(Ty); // If type info is not available then do not emit debug info for this var. @@ -684,10 +686,8 @@ DIType FromTy = getOrCreateType(TREE_TYPE(type)); // type* and type& // FIXME: Should BLOCK_POINTER_TYP have its own DW_TAG? - unsigned Tag = (TREE_CODE(type) == POINTER_TYPE || - TREE_CODE(type) == BLOCK_POINTER_TYPE) ? - DW_TAG_pointer_type : - DW_TAG_reference_type; + unsigned Tag = TREE_CODE(type) == REFERENCE_TYPE ? + DW_TAG_reference_type: DW_TAG_pointer_type; unsigned Flags = 0; if (type_is_block_byref_struct(type)) Flags |= llvm::DIType::FlagBlockByrefStruct; From dpatel at apple.com Wed May 12 13:31:04 2010 From: dpatel at apple.com (Devang Patel) Date: Wed, 12 May 2010 18:31:04 -0000 Subject: [llvm-commits] [llvm] r103634 - /llvm/trunk/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp Message-ID: <20100512183104.ECD6B312800A@llvm.org> Author: dpatel Date: Wed May 12 13:31:04 2010 New Revision: 103634 URL: http://llvm.org/viewvc/llvm-project?rev=103634&view=rev Log: Test case for r103633. Added: llvm/trunk/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp Added: llvm/trunk/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC%2B%2B/2010-05-12-PtrToMember-Dbg.cpp?rev=103634&view=auto ============================================================================== --- llvm/trunk/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp (added) +++ llvm/trunk/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp Wed May 12 13:31:04 2010 @@ -0,0 +1,17 @@ +//RUN: %llvmgxx -O0 -emit-llvm -S -g -o - %s | grep DW_TAG_auto_variable +class Foo +{ + public: + int x; + int y; + Foo (int i, int j) { x = i; y = j; } +}; + + +Foo foo(10, 11); + +int main() { + int Foo::* pmi = &Foo::y; + return foo.*pmi; +} + From stoklund at 2pi.dk Wed May 12 13:46:03 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Wed, 12 May 2010 18:46:03 -0000 Subject: [llvm-commits] [llvm] r103635 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/X86/2010-05-12-FastAllocKills.ll Message-ID: <20100512184603.CE750312800A@llvm.org> Author: stoklund Date: Wed May 12 13:46:03 2010 New Revision: 103635 URL: http://llvm.org/viewvc/llvm-project?rev=103635&view=rev Log: Make sure to add kill flags to the last use of a virtreg when it is redefined. The X86 floating point stack pass and others depend on good kill flags. Added: llvm/trunk/test/CodeGen/X86/2010-05-12-FastAllocKills.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103635&r1=103634&r2=103635&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Wed May 12 13:46:03 2010 @@ -121,8 +121,9 @@ bool runOnMachineFunction(MachineFunction &Fn); void AllocateBasicBlock(MachineBasicBlock &MBB); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - void killVirtReg(unsigned VirtReg); + void addKillFlag(LiveRegMap::iterator i); void killVirtReg(LiveRegMap::iterator i); + void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned VirtReg, bool isKill); void killPhysReg(unsigned PhysReg); @@ -161,20 +162,27 @@ return FrameIdx; } -/// killVirtReg - Mark virtreg as no longer available. -void RAFast::killVirtReg(LiveRegMap::iterator i) { - assert(i != LiveVirtRegs.end() && "Killing unmapped virtual register"); - unsigned VirtReg = i->first; - const LiveReg &LR = i->second; - assert(PhysRegState[LR.PhysReg] == VirtReg && "Broken RegState mapping"); - PhysRegState[LR.PhysReg] = regFree; +/// addKillFlag - Set kill flags on last use of a virtual register. +void RAFast::addKillFlag(LiveRegMap::iterator lri) { + assert(lri != LiveVirtRegs.end() && "Killing unmapped virtual register"); + const LiveReg &LR = lri->second; if (LR.LastUse) { MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); - if (MO.isUse()) MO.setIsKill(); - else MO.setIsDead(); - DEBUG(dbgs() << " - last seen here: " << *LR.LastUse); + if (MO.isDef()) + MO.setIsDead(); + else if (!LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) + MO.setIsKill(); + DEBUG(dbgs() << " %reg" << lri->first << " killed: " << *LR.LastUse); } - LiveVirtRegs.erase(i); +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(LiveRegMap::iterator lri) { + addKillFlag(lri); + const LiveReg &LR = lri->second; + assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); + PhysRegState[LR.PhysReg] = regFree; + LiveVirtRegs.erase(lri); } /// killVirtReg - Mark virtreg as no longer available. @@ -445,6 +453,8 @@ LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); if (lri == LiveVirtRegs.end()) lri = allocVirtReg(MBB, MI, VirtReg); + else + addKillFlag(lri); // Kill before redefine. LiveReg &LR = lri->second; LR.LastUse = MI; LR.LastOpNum = OpNum; Added: llvm/trunk/test/CodeGen/X86/2010-05-12-FastAllocKills.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-12-FastAllocKills.ll?rev=103635&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-05-12-FastAllocKills.ll (added) +++ llvm/trunk/test/CodeGen/X86/2010-05-12-FastAllocKills.ll Wed May 12 13:46:03 2010 @@ -0,0 +1,59 @@ +; RUN: llc -regalloc=fast -verify-machineinstrs < %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin" + +; This test causes a virtual FP register to be redefined while it is live: +;BB#5: derived from LLVM BB %bb10 +; Predecessors according to CFG: BB#4 BB#5 +; %reg1024 = MOV_Fp8080 %reg1034 +; %reg1025 = MUL_Fp80m32 %reg1024, %RIP, 1, %reg0, , %reg0; mem:LD4[ConstantPool] +; %reg1034 = MOV_Fp8080 %reg1025 +; FP_REG_KILL %FP0, %FP1, %FP2, %FP3, %FP4, %FP5, %FP6 +; JMP_4 +; Successors according to CFG: BB#5 +; +; The X86FP pass needs good kill flags, like on %FP0 representing %reg1034: +;BB#5: derived from LLVM BB %bb10 +; Predecessors according to CFG: BB#4 BB#5 +; %FP0 = LD_Fp80m , 1, %reg0, 0, %reg0; mem:LD10[FixedStack3](align=4) +; %FP1 = MOV_Fp8080 %FP0 +; %FP2 = MUL_Fp80m32 %FP1, %RIP, 1, %reg0, , %reg0; mem:LD4[ConstantPool] +; %FP0 = MOV_Fp8080 %FP2 +; ST_FpP80m , 1, %reg0, 0, %reg0, %FP0; mem:ST10[FixedStack3](align=4) +; ST_FpP80m , 1, %reg0, 0, %reg0, %FP1; mem:ST10[FixedStack4](align=4) +; ST_FpP80m , 1, %reg0, 0, %reg0, %FP2; mem:ST10[FixedStack5](align=4) +; FP_REG_KILL %FP0, %FP1, %FP2, %FP3, %FP4, %FP5, %FP6 +; JMP_4 +; Successors according to CFG: BB#5 + +define fastcc i32 @sqlite3AtoF(i8* %z, double* nocapture %pResult) nounwind ssp { +entry: + br i1 undef, label %bb2, label %bb1.i.i + +bb1.i.i: ; preds = %entry + unreachable + +bb2: ; preds = %entry + br i1 undef, label %isdigit339.exit11.preheader, label %bb13 + +isdigit339.exit11.preheader: ; preds = %bb2 + br i1 undef, label %bb12, label %bb10 + +bb10: ; preds = %bb10, %isdigit339.exit11.preheader + %divisor.041 = phi x86_fp80 [ %0, %bb10 ], [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ] ; [#uses=1] + %0 = fmul x86_fp80 %divisor.041, 0xK4002A000000000000000 ; [#uses=2] + br i1 false, label %bb12, label %bb10 + +bb12: ; preds = %bb10, %isdigit339.exit11.preheader + %divisor.0.lcssa = phi x86_fp80 [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ], [ %0, %bb10 ] ; [#uses=0] + br label %bb13 + +bb13: ; preds = %bb12, %bb2 + br i1 undef, label %bb34, label %bb36 + +bb34: ; preds = %bb13 + br label %bb36 + +bb36: ; preds = %bb34, %bb13 + ret i32 undef +} From gkistanova at gmail.com Wed May 12 14:50:28 2010 From: gkistanova at gmail.com (Galina Kistanova) Date: Wed, 12 May 2010 19:50:28 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103639 - in /llvm-gcc-4.2/trunk/extras: build-4-mingw32 build-x-4-mingw32 Message-ID: <20100512195028.4EAD2312800A@llvm.org> Author: gkistanova Date: Wed May 12 14:50:28 2010 New Revision: 103639 URL: http://llvm.org/viewvc/llvm-project?rev=103639&view=rev Log: Added build script for build on x86_64-apple-darwin10 of cross llvm-gcc for i686-pc-mingw32; added LLVM tests Added: llvm-gcc-4.2/trunk/extras/build-x-4-mingw32 (with props) Modified: llvm-gcc-4.2/trunk/extras/build-4-mingw32 Modified: llvm-gcc-4.2/trunk/extras/build-4-mingw32 URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/extras/build-4-mingw32?rev=103639&r1=103638&r2=103639&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/extras/build-4-mingw32 (original) +++ llvm-gcc-4.2/trunk/extras/build-4-mingw32 Wed May 12 14:50:28 2010 @@ -46,6 +46,7 @@ do_clean=no # Clean up the build directory. do_configure_llvm=no # Configure LLVM. do_make_llvm=no # Make LLVM. +do_test_llvm=no # Test LLVM. do_configure_llvmgcc=no # Configure LLVM-GCC. do_make_llvmgcc=no # Make LLVM-GCC. do_install_llvmgcc=no # Install LLVM-GCC. @@ -62,6 +63,7 @@ clean | \ configure_llvm | \ make_llvm | \ + test_llvm | \ configure_llvmgcc | \ make_llvmgcc | \ install_llvmgcc | \ @@ -81,6 +83,7 @@ do_clean=yes do_configure_llvm=yes do_make_llvm=yes + do_test_llvm=yes do_configure_llvmgcc=yes do_make_llvmgcc=yes do_install_llvmgcc=yes @@ -123,7 +126,18 @@ if [ "$do_make_llvm" == "yes" ] ; then cd ${BUILD_ROOT}/${LLVM_obj} - nice -n 20 make ENABLE_OPTIMIZED=1 \ + nice -n 20 make VERBOSE=1 \ + $@ # Extra args if any, like -j16 for example. + +fi + +#------------------------------------------------------------------------------ +# Step: Test LLVM. +#------------------------------------------------------------------------------ +if [ "$do_test_llvm" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_obj} + make check-lit VERBOSE=1 \ $@ # Extra args if any, like -j16 for example. fi Added: llvm-gcc-4.2/trunk/extras/build-x-4-mingw32 URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/extras/build-x-4-mingw32?rev=103639&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/extras/build-x-4-mingw32 (added) +++ llvm-gcc-4.2/trunk/extras/build-x-4-mingw32 Wed May 12 14:50:28 2010 @@ -0,0 +1,206 @@ +#!/bin/bash + +set -e # Terminate script at the first line that fails. +set -o pipefail # Return the first non-zero pipe command error. +set -x # Print commands as they are executed + +# This script performs an automated build on x86_64-apple-darwin10 of +# cross llvm-gcc for i686-pc-mingw32. It assumes the valid native +# compiler for x86_64-apple-darwin10 is in place and available as well as +# cross libraries and headers for i686-pc-mingw32. + +# --build=x86_64-apple-darwin10 +# --host=x86_64-apple-darwin10 +# --target=i686-pc-mingw32 + +# The usage: +# Run this build from the build from the build root directory as +# build-x-4-mingw32 [] [] + +# Expected project tree structure: +# +# +-- ${LLVM_src} +# +-- ${LLVM_GCC_src} +# +-- ${LLVM_obj} +# +-- ${LLVM_GCC_obj} +# +-- ${INSTALL} + +LLVM_src=llvm.src # The LLVM source code root directory name. +LLVM_GCC_src=llvm-gcc.src # The LLVM-GCC source code root directory name. +LLVM_obj=llvm.obj # The LLVM build root directory name. +LLVM_GCC_obj=llvm-gcc.obj # The LLVM-GCC build root directory name. +INSTALL=install # Where the result will be installed. + +# CFLAGS and CXXFLAGS must not be set during the building of cross-tools. +unset CFLAGS +unset CXXFLAGS + +BUILD_ROOT=$PWD # Where build happens. +PRIVATE_INSTALL=${BUILD_ROOT}/${INSTALL} # Where the result will be installed. + +#------------------------------------------------------------------------------ +# Define build steps, parse and validate input parameters +#------------------------------------------------------------------------------ + +# This script supports the following steps: +do_clean=no # Clean up the build directory. +do_configure_llvm=no # Configure LLVM. +do_make_llvm=no # Make LLVM. +do_test_llvm=no # Test LLVM. +do_cross_tools=no # Copy cross-tools. +do_configure_llvmgcc=no # Configure LLVM-GCC. +do_make_llvmgcc=no # Make LLVM-GCC. +do_install_llvmgcc=no # Install LLVM-GCC. +do_all=no # Runs all steps at once when requested. + +# Set step parameter +if (( $# == 0 )) ; then + do_all=yes +fi +# else +if (( ! $# == 0 )) ; then + # First check that the parameter actually defines a step. + case $1 in + clean | \ + configure_llvm | \ + make_llvm | \ + test_llvm | \ + cross_tools | \ + configure_llvmgcc | \ + make_llvmgcc | \ + install_llvmgcc | \ + all) + eval do_$1=yes # Set the flag for the requested step . + shift # Remove it since is is ours and already precessed. + ;; + + *) + # Not our parameter. Pass it as is. + esac +fi + +# Set all steps if do_all requested +if [ "$do_all" == "yes" ] ; then + # Set all steps to yes + do_clean=yes + do_configure_llvm=yes + do_make_llvm=yes + do_test_llvm=yes + do_cross_tools=yes + do_configure_llvmgcc=yes + do_make_llvmgcc=yes + do_install_llvmgcc=yes +fi + +#------------------------------------------------------------------------------ +# Step: Clean up. +#------------------------------------------------------------------------------ +if [ "$do_clean" == "yes" ] ; then + + # Remove everything from where we will be installing the result. + rm -rf ${PRIVATE_INSTALL} + mkdir -p ${PRIVATE_INSTALL} + chmod a+rx ${PRIVATE_INSTALL} + +fi + +#------------------------------------------------------------------------------ +# Step: Configure LLVM. +#------------------------------------------------------------------------------ +if [ "$do_configure_llvm" == "yes" ] ; then + + # Remove previous build files if any. + rm -rf ${BUILD_ROOT}/${LLVM_obj} + mkdir -p ${BUILD_ROOT}/${LLVM_obj} + chmod a+rx ${BUILD_ROOT}/${LLVM_obj} + cd ${BUILD_ROOT}/${LLVM_obj} + + ../${LLVM_src}/configure --prefix=${PRIVATE_INSTALL} \ + --build=x86_64-apple-darwin10 --host=x86_64-apple-darwin10 \ + --target=i686-pc-mingw32 \ + --enable-optimize \ + --without-llvmgcc --without-llvmgxx \ + $@ # Extra args if any + +fi + +#------------------------------------------------------------------------------ +# Step: Make LLVM. +#------------------------------------------------------------------------------ +if [ "$do_make_llvm" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_obj} + # NOTE: Do not build with ENABLE_OPTIMIZED=1 - some test fail after it. + nice -n 20 make VERBOSE=1 \ + $@ # Extra args if any, like -j16 for example. + +fi + +#------------------------------------------------------------------------------ +# Step: Test LLVM. +#------------------------------------------------------------------------------ +if [ "$do_test_llvm" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_obj} + make check-lit VERBOSE=1 \ + $@ # Extra args if any, like -j16 for example. + +fi + +#------------------------------------------------------------------------------ +# Step: Copy cross-tools. +#------------------------------------------------------------------------------ +if [ "$do_cross_tools" == "yes" ] ; then + + # We need a local copy of binutils, system libraries and headers, + # since we will be installing there. + cp -RL /cross-tools/ ${PRIVATE_INSTALL} + +fi + +#------------------------------------------------------------------------------ +# Step: Configure LLVM-GCC. +#------------------------------------------------------------------------------ +if [ "$do_configure_llvmgcc" == "yes" ] ; then + + # Remove previous build files if any. + rm -rf ${BUILD_ROOT}/${LLVM_GCC_obj} + mkdir -p ${BUILD_ROOT}/${LLVM_GCC_obj} + chmod a+rx ${BUILD_ROOT}/${LLVM_GCC_obj} + cd ${BUILD_ROOT}/${LLVM_GCC_obj} + + ../${LLVM_GCC_src}/configure --prefix=${PRIVATE_INSTALL} \ + --build=x86_64-apple-darwin10 --host=x86_64-apple-darwin10 \ + --with-local-prefix=/tools \ + --target=i686-pc-mingw32 \ + --program-prefix=i686-pc-mingw32- \ + --enable-llvm=${BUILD_ROOT}/${LLVM_obj} \ + --enable-languages=c,c++ \ + --disable-multilib --disable-nls --disable-shared \ + --disable-sjlj-exceptions --disable-__cxa_atexit \ + $@ # Extra args if any + +fi + +#------------------------------------------------------------------------------ +# Step: Make LLVM-GCC. +#------------------------------------------------------------------------------ +if [ "$do_make_llvmgcc" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_GCC_obj} + # NOTE: Do not build in parallel! It doesn't build. + nice -n 20 make \ + $@ # Extra args if any + +fi + +#------------------------------------------------------------------------------ +# Step: Install LLVM-GCC. +#------------------------------------------------------------------------------ +if [ "$do_install_llvmgcc" == "yes" ] ; then + + cd ${BUILD_ROOT}/${LLVM_GCC_obj} + nice -n 20 make install \ + $@ # Extra args if any + +fi Propchange: llvm-gcc-4.2/trunk/extras/build-x-4-mingw32 ------------------------------------------------------------------------------ svn:executable = * From baldrick at free.fr Wed May 12 14:50:41 2010 From: baldrick at free.fr (Duncan Sands) Date: Wed, 12 May 2010 19:50:41 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103640 - /llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp Message-ID: <20100512195041.34A2D312800A@llvm.org> Author: baldrick Date: Wed May 12 14:50:41 2010 New Revision: 103640 URL: http://llvm.org/viewvc/llvm-project?rev=103640&view=rev Log: Port commit 103622 (baldrick) from dragonegg: TargetIntrinsicLower is only supposed to set Result if there is a value to be returned, so it is wrong to set it to a StoreInst for example. Fix a bunch of mistakes of this kind. Modified: llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp Modified: llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp?rev=103640&r1=103639&r2=103640&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp Wed May 12 14:50:41 2010 @@ -329,7 +329,6 @@ Value *BC = Builder.CreateBitCast(Ops[0], v4f32Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); - Result = SI; return true; } case IX86_BUILTIN_STOREUPD: { @@ -338,7 +337,6 @@ Value *BC = Builder.CreateBitCast(Ops[0], v2f64Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); - Result = SI; return true; } case IX86_BUILTIN_STOREDQU: { @@ -347,7 +345,6 @@ Value *BC = Builder.CreateBitCast(Ops[0], v16i8Ptr); StoreInst *SI = Builder.CreateStore(Ops[1], BC); SI->setAlignment(1); - Result = SI; return true; } case IX86_BUILTIN_LOADHPS: { @@ -600,7 +597,7 @@ Value *Ptr = CreateTemporary(Type::getInt32Ty(Context)); Builder.CreateStore(Ops[0], Ptr); Ptr = Builder.CreateBitCast(Ptr, Type::getInt8PtrTy(Context)); - Result = Builder.CreateCall(ldmxcsr, Ptr); + Builder.CreateCall(ldmxcsr, Ptr); return true; } case IX86_BUILTIN_STMXCSR: { From gkistanova at gmail.com Wed May 12 15:05:32 2010 From: gkistanova at gmail.com (Galina Kistanova) Date: Wed, 12 May 2010 20:05:32 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103641 - in /llvm-gcc-4.2/trunk/extras: build-4-mingw32 buildbot-launcher Message-ID: <20100512200532.3BDED312800A@llvm.org> Author: gkistanova Date: Wed May 12 15:05:32 2010 New Revision: 103641 URL: http://llvm.org/viewvc/llvm-project?rev=103641&view=rev Log: Set missing executable flags Modified: llvm-gcc-4.2/trunk/extras/build-4-mingw32 (props changed) llvm-gcc-4.2/trunk/extras/buildbot-launcher (props changed) Propchange: llvm-gcc-4.2/trunk/extras/build-4-mingw32 ------------------------------------------------------------------------------ svn:executable = * Propchange: llvm-gcc-4.2/trunk/extras/buildbot-launcher ------------------------------------------------------------------------------ svn:executable = * From evan.cheng at apple.com Wed May 12 15:20:22 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 12 May 2010 20:20:22 -0000 Subject: [llvm-commits] [llvm] r103642 - /llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Message-ID: <20100512202022.DB55A312800A@llvm.org> Author: evancheng Date: Wed May 12 15:20:22 2010 New Revision: 103642 URL: http://llvm.org/viewvc/llvm-project?rev=103642&view=rev Log: Remove a dead fixme. Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=103642&r1=103641&r2=103642&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Wed May 12 15:20:22 2010 @@ -103,7 +103,6 @@ MI.eraseFromParent(); Modified = true; } - // FIXME: expand t2MOVi32imm } MBBI = NMBBI; } From dpatel at apple.com Wed May 12 15:33:13 2010 From: dpatel at apple.com (Devang Patel) Date: Wed, 12 May 2010 20:33:13 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103644 - in /llvm-gcc-4.2/trunk/gcc: config/arm/darwin.h config/i386/darwin.h config/rs6000/darwin.h doc/llvm-gcc.1 Message-ID: <20100512203313.1B4B6312800A@llvm.org> Author: dpatel Date: Wed May 12 15:33:12 2010 New Revision: 103644 URL: http://llvm.org/viewvc/llvm-project?rev=103644&view=rev Log: Do not suppress -g in LTO mode. Update man page. Modified: llvm-gcc-4.2/trunk/gcc/config/arm/darwin.h llvm-gcc-4.2/trunk/gcc/config/i386/darwin.h llvm-gcc-4.2/trunk/gcc/config/rs6000/darwin.h llvm-gcc-4.2/trunk/gcc/doc/llvm-gcc.1 Modified: llvm-gcc-4.2/trunk/gcc/config/arm/darwin.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/darwin.h?rev=103644&r1=103643&r2=103644&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/darwin.h (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/darwin.h Wed May 12 15:33:12 2010 @@ -36,10 +36,6 @@ %{!fbuiltin-strcpy:-fno-builtin-strcpy} \ % Author: ddunbar Date: Wed May 12 16:35:19 2010 New Revision: 103647 URL: http://llvm.org/viewvc/llvm-project?rev=103647&view=rev Log: ADT: Add ilist_node::get{Prev,Next}Node, which return the adjacent node or null. - This provides a convenient alternative to using something llvm::prior or manual iterator access, for example:: if (T *Prev = foo->getPrevNode()) ... instead of:: iterator it(foo); if (it != begin()) { --it; ... } - Chris, please review. Added: llvm/trunk/unittests/ADT/ilistTest.cpp Modified: llvm/trunk/include/llvm/ADT/ilist_node.h Modified: llvm/trunk/include/llvm/ADT/ilist_node.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/ilist_node.h?rev=103647&r1=103646&r2=103647&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/ilist_node.h (original) +++ llvm/trunk/include/llvm/ADT/ilist_node.h Wed May 12 16:35:19 2010 @@ -49,6 +49,56 @@ void setNext(NodeTy *N) { Next = N; } protected: ilist_node() : Next(0) {} + +public: + /// @name Adjacent Node Accessors + /// @{ + + /// \brief Get the previous node, or 0 for the list head. + NodeTy *getPrevNode() { + NodeTy *Prev = this->getPrev(); + + // Check for sentinel. + if (!Prev->getNext()) + return 0; + + return Prev; + } + + /// \brief Get the previous node, or 0 for the list head. + const NodeTy *getPrevNode() const { + NodeTy *Prev = this->getPrev(); + + // Check for sentinel. + if (!Prev->getNext()) + return 0; + + return Prev; + } + + /// \brief Get the next node, or 0 for the list tail. + NodeTy *getNextNode() { + NodeTy *Next = getNext(); + + // Check for sentinel. + if (!Next->getNext()) + return 0; + + return Next; + } + + /// \brief Get the next node, or 0 for the list tail. + const NodeTy *getNextNode() const { + NodeTy *Next = getNext(); + + // Check for sentinel. + if (!Next->getNext()) + return 0; + + return Next; + } + + /// @} }; } // End llvm namespace Added: llvm/trunk/unittests/ADT/ilistTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/ilistTest.cpp?rev=103647&view=auto ============================================================================== --- llvm/trunk/unittests/ADT/ilistTest.cpp (added) +++ llvm/trunk/unittests/ADT/ilistTest.cpp Wed May 12 16:35:19 2010 @@ -0,0 +1,39 @@ +//===- llvm/unittest/ADT/APInt.cpp - APInt unit tests ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include +#include "gtest/gtest.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" + +using namespace llvm; + +namespace { + +struct Node : ilist_node { + int Value; + + Node() {} + Node(int _Value) : Value(_Value) {} +}; + +TEST(ilistTest, Basic) { + ilist List; + List.push_back(Node(1)); + EXPECT_EQ(1, List.back().Value); + EXPECT_EQ(0, List.back().getPrevNode()); + EXPECT_EQ(0, List.back().getNextNode()); + + List.push_back(Node(2)); + EXPECT_EQ(2, List.back().Value); + EXPECT_EQ(2, List.front().getNextNode()->Value); + EXPECT_EQ(1, List.back().getPrevNode()->Value); +} + +} From daniel at zuster.org Wed May 12 16:35:22 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 21:35:22 -0000 Subject: [llvm-commits] [llvm] r103648 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100512213522.4B6C13128018@llvm.org> Author: ddunbar Date: Wed May 12 16:35:22 2010 New Revision: 103648 URL: http://llvm.org/viewvc/llvm-project?rev=103648&view=rev Log: MC: Tweak section layout to not relying on accumulating address value. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103648&r1=103647&r2=103648&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 16:35:22 2010 @@ -396,10 +396,15 @@ // Set the aligned section address. Layout.setSectionAddress(&SD, StartAddress); - uint64_t Address = StartAddress; for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) { MCFragment &F = *it; + // Compute the fragment start address. + uint64_t Address = StartAddress; + if (MCFragment *Prev = F.getPrevNode()) + Address = (Layout.getFragmentAddress(Prev) + + Layout.getFragmentEffectiveSize(Prev)); + ++stats::FragmentLayouts; uint64_t FragmentOffset = Address - StartAddress; @@ -464,15 +469,16 @@ } Layout.setFragmentEffectiveSize(&F, EffectiveSize); - Address += EffectiveSize; } // Set the section sizes. - Layout.setSectionSize(&SD, Address - StartAddress); - if (IsVirtual) - Layout.setSectionFileSize(&SD, 0); - else - Layout.setSectionFileSize(&SD, Address - StartAddress); + uint64_t Size = 0; + if (!SD.getFragmentList().empty()) { + MCFragment *F = &SD.getFragmentList().back(); + Size = Layout.getFragmentOffset(F) + Layout.getFragmentEffectiveSize(F); + } + Layout.setSectionSize(&SD, Size); + Layout.setSectionFileSize(&SD, IsVirtual ? 0 : Size); } /// WriteFragmentData - Write the \arg F data to the output file. From daniel at zuster.org Wed May 12 16:35:25 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 21:35:25 -0000 Subject: [llvm-commits] [llvm] r103649 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100512213525.557213128026@llvm.org> Author: ddunbar Date: Wed May 12 16:35:25 2010 New Revision: 103649 URL: http://llvm.org/viewvc/llvm-project?rev=103649&view=rev Log: MC: Factor out MCAssembler::LayoutFragment Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103649&r1=103648&r2=103649&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 16:35:25 2010 @@ -668,6 +668,11 @@ bool FragmentNeedsRelaxation(const MCInstFragment *IF, const MCAsmLayout &Layout) const; + /// LayoutFragment - Performs layout of the given \arg Fragment; assuming that + /// the previous fragment has already been layed out correctly, and the parent + /// section has been initialized. + void LayoutFragment(MCAsmLayout &Layout, MCFragment &Fragment); + /// LayoutSection - Performs layout of the section referenced by the given /// \arg SectionOrderIndex. The layout assumes that the previous section has /// already been layed out correctly. Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103649&r1=103648&r2=103649&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 16:35:25 2010 @@ -362,6 +362,82 @@ return IsResolved; } +void MCAssembler::LayoutFragment(MCAsmLayout &Layout, MCFragment &F) { + uint64_t StartAddress = Layout.getSectionAddress(F.getParent()); + + // Get the fragment start address. + uint64_t Address = StartAddress; + MCSectionData::iterator it = &F; + if (MCFragment *Prev = F.getPrevNode()) + Address = (StartAddress + Layout.getFragmentOffset(Prev) + + Layout.getFragmentEffectiveSize(Prev)); + + ++stats::FragmentLayouts; + + uint64_t FragmentOffset = Address - StartAddress; + Layout.setFragmentOffset(&F, FragmentOffset); + + // Evaluate fragment size. + uint64_t EffectiveSize = 0; + switch (F.getKind()) { + case MCFragment::FT_Align: { + MCAlignFragment &AF = cast(F); + + EffectiveSize = OffsetToAlignment(Address, AF.getAlignment()); + if (EffectiveSize > AF.getMaxBytesToEmit()) + EffectiveSize = 0; + break; + } + + case MCFragment::FT_Data: + EffectiveSize = cast(F).getContents().size(); + break; + + case MCFragment::FT_Fill: { + MCFillFragment &FF = cast(F); + EffectiveSize = FF.getValueSize() * FF.getCount(); + break; + } + + case MCFragment::FT_Inst: + EffectiveSize = cast(F).getInstSize(); + break; + + case MCFragment::FT_Org: { + MCOrgFragment &OF = cast(F); + + int64_t TargetLocation; + if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) + report_fatal_error("expected assembly-time absolute expression"); + + // FIXME: We need a way to communicate this error. + int64_t Offset = TargetLocation - FragmentOffset; + if (Offset < 0) + report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + + "' (at offset '" + Twine(FragmentOffset) + "'"); + + EffectiveSize = Offset; + break; + } + + case MCFragment::FT_ZeroFill: { + MCZeroFillFragment &ZFF = cast(F); + + // Align the fragment offset; it is safe to adjust the offset freely since + // this is only in virtual sections. + // + // FIXME: We shouldn't be doing this here. + Address = RoundUpToAlignment(Address, ZFF.getAlignment()); + Layout.setFragmentOffset(&F, Address - StartAddress); + + EffectiveSize = ZFF.getSize(); + break; + } + } + + Layout.setFragmentEffectiveSize(&F, EffectiveSize); +} + void MCAssembler::LayoutSection(MCAsmLayout &Layout, unsigned SectionOrderIndex) { MCSectionData &SD = *Layout.getSectionOrder()[SectionOrderIndex]; @@ -396,80 +472,8 @@ // Set the aligned section address. Layout.setSectionAddress(&SD, StartAddress); - for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) { - MCFragment &F = *it; - - // Compute the fragment start address. - uint64_t Address = StartAddress; - if (MCFragment *Prev = F.getPrevNode()) - Address = (Layout.getFragmentAddress(Prev) + - Layout.getFragmentEffectiveSize(Prev)); - - ++stats::FragmentLayouts; - - uint64_t FragmentOffset = Address - StartAddress; - Layout.setFragmentOffset(&F, FragmentOffset); - - // Evaluate fragment size. - uint64_t EffectiveSize = 0; - switch (F.getKind()) { - case MCFragment::FT_Align: { - MCAlignFragment &AF = cast(F); - - EffectiveSize = OffsetToAlignment(Address, AF.getAlignment()); - if (EffectiveSize > AF.getMaxBytesToEmit()) - EffectiveSize = 0; - break; - } - - case MCFragment::FT_Data: - EffectiveSize = cast(F).getContents().size(); - break; - - case MCFragment::FT_Fill: { - MCFillFragment &FF = cast(F); - EffectiveSize = FF.getValueSize() * FF.getCount(); - break; - } - - case MCFragment::FT_Inst: - EffectiveSize = cast(F).getInstSize(); - break; - - case MCFragment::FT_Org: { - MCOrgFragment &OF = cast(F); - - int64_t TargetLocation; - if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) - report_fatal_error("expected assembly-time absolute expression"); - - // FIXME: We need a way to communicate this error. - int64_t Offset = TargetLocation - FragmentOffset; - if (Offset < 0) - report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + - "' (at offset '" + Twine(FragmentOffset) + "'"); - - EffectiveSize = Offset; - break; - } - - case MCFragment::FT_ZeroFill: { - MCZeroFillFragment &ZFF = cast(F); - - // Align the fragment offset; it is safe to adjust the offset freely since - // this is only in virtual sections. - // - // FIXME: We shouldn't be doing this here. - Address = RoundUpToAlignment(Address, ZFF.getAlignment()); - Layout.setFragmentOffset(&F, Address - StartAddress); - - EffectiveSize = ZFF.getSize(); - break; - } - } - - Layout.setFragmentEffectiveSize(&F, EffectiveSize); - } + for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) + LayoutFragment(Layout, *it); // Set the section sizes. uint64_t Size = 0; From daniel at zuster.org Wed May 12 16:47:55 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 21:47:55 -0000 Subject: [llvm-commits] [llvm] r103651 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100512214755.CDA0F312800A@llvm.org> Author: ddunbar Date: Wed May 12 16:47:55 2010 New Revision: 103651 URL: http://llvm.org/viewvc/llvm-project?rev=103651&view=rev Log: Simplify. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103651&r1=103650&r2=103651&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 16:47:55 2010 @@ -459,11 +459,9 @@ // Unless this section is virtual (where we are allowed to adjust the offset // freely), the padding goes in the previous section. if (!IsVirtual) { - // Find the previous non-virtual section. - iterator it = &SD; - assert(it != begin() && "Invalid initial section address!"); - for (--it; getBackend().isVirtualSection(it->getSection()); --it) ; - Layout.setSectionFileSize(&*it, Layout.getSectionFileSize(&*it) + Pad); + assert(SectionOrderIndex && "Invalid initial section address!"); + MCSectionData *Prev = Layout.getSectionOrder()[SectionOrderIndex - 1]; + Layout.setSectionFileSize(Prev, Layout.getSectionFileSize(Prev) + Pad); } StartAddress += Pad; From nicholas at mxc.ca Wed May 12 16:48:15 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 12 May 2010 21:48:15 -0000 Subject: [llvm-commits] [llvm] r103653 - in /llvm/trunk: include/llvm/Analysis/InlineCost.h lib/Analysis/InlineCost.cpp lib/Transforms/IPO/InlineSimple.cpp Message-ID: <20100512214815.D18B8312800A@llvm.org> Author: nicholas Date: Wed May 12 16:48:15 2010 New Revision: 103653 URL: http://llvm.org/viewvc/llvm-project?rev=103653&view=rev Log: Clear CachedFunctionInfo upon Pass::releaseMemory. Because ValueMap will abort on RAUW of functions, this is a correctness issue instead of a mere memory usage problem. No testcase until the new MergeFunctions can land. Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h llvm/trunk/lib/Analysis/InlineCost.cpp llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=103653&r1=103652&r2=103653&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/InlineCost.h (original) +++ llvm/trunk/include/llvm/Analysis/InlineCost.h Wed May 12 16:48:15 2010 @@ -198,6 +198,9 @@ /// has been inlined. If Callee is NULL it means a dead call has been /// eliminated. void growCachedCostInfo(Function* Caller, Function* Callee); + + /// clear - empty the cache of inline costs + void clear(); }; /// callIsSmall - If a call is likely to lower to a single target instruction, Modified: llvm/trunk/lib/Analysis/InlineCost.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=103653&r1=103652&r2=103653&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp (original) +++ llvm/trunk/lib/Analysis/InlineCost.cpp Wed May 12 16:48:15 2010 @@ -455,6 +455,11 @@ else CallerMetrics.NumInsts = 0; - // We are not updating the argumentweights. We have already determined that + // We are not updating the argument weights. We have already determined that // Caller is a fairly large function, so we accept the loss of precision. } + +/// clear - empty the cache of inline costs +void InlineCostAnalyzer::clear() { + CachedFunctionInfo.clear(); +} Modified: llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp?rev=103653&r1=103652&r2=103653&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/InlineSimple.cpp Wed May 12 16:48:15 2010 @@ -49,6 +49,9 @@ CA.growCachedCostInfo(Caller, Callee); } virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } }; } From daniel at zuster.org Wed May 12 16:47:58 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 21:47:58 -0000 Subject: [llvm-commits] [llvm] r103652 - /llvm/trunk/utils/lit/lit/ShUtil.py Message-ID: <20100512214758.77AE33128018@llvm.org> Author: ddunbar Date: Wed May 12 16:47:58 2010 New Revision: 103652 URL: http://llvm.org/viewvc/llvm-project?rev=103652&view=rev Log: lit: Fix a sh lexing bug which caused annotate-token.m to fail when run with the internal shell parser; we weren't lexing the quotes in a command like:: clang -DFOO='hello' correctly. Modified: llvm/trunk/utils/lit/lit/ShUtil.py Modified: llvm/trunk/utils/lit/lit/ShUtil.py URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/lit/lit/ShUtil.py?rev=103652&r1=103651&r2=103652&view=diff ============================================================================== --- llvm/trunk/utils/lit/lit/ShUtil.py (original) +++ llvm/trunk/utils/lit/lit/ShUtil.py Wed May 12 16:47:58 2010 @@ -67,6 +67,9 @@ elif c == '"': self.eat() str += self.lex_arg_quoted('"') + elif c == "'": + self.eat() + str += self.lex_arg_quoted("'") elif not self.win32Escapes and c == '\\': # Outside of a string, '\\' escapes everything. self.eat() @@ -287,6 +290,10 @@ Pipeline([Command(['echo', 'hello'], [])], False)) self.assertEqual(self.parse('echo ""'), Pipeline([Command(['echo', ''], [])], False)) + self.assertEqual(self.parse("""echo -DFOO='a'"""), + Pipeline([Command(['echo', '-DFOO=a'], [])], False)) + self.assertEqual(self.parse('echo -DFOO="a"'), + Pipeline([Command(['echo', '-DFOO=a'], [])], False)) def test_redirection(self): self.assertEqual(self.parse('echo hello > c'), From sabre at nondot.org Wed May 12 17:48:24 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 22:48:24 -0000 Subject: [llvm-commits] [llvm] r103660 - in /llvm/trunk: lib/Target/X86/X86InstrInfo.td test/MC/AsmParser/X86/x86_32-new-encoder.s Message-ID: <20100512224824.D2785312800A@llvm.org> Author: lattner Date: Wed May 12 17:48:24 2010 New Revision: 103660 URL: http://llvm.org/viewvc/llvm-project?rev=103660&view=rev Log: fix the encoding of the obscure "moffset" forms of moves, i386 part first. rdar://7947184 Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103660&r1=103659&r2=103660&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed May 12 17:48:24 2010 @@ -966,16 +966,17 @@ "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src), +/// moffs8, moffs16, moffs32 and moffs64 versions of moves. The immediate is a +/// 32-bit offset from the PC. +def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; -def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src), +def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src), "mov{l}\t{$src, %eax|%eax, $src}", []>; - -def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins), +def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins), "mov{b}\t{%al, $dst|$dst, %al}", []>; -def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins), +def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; Modified: llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=103660&r1=103659&r2=103660&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Wed May 12 17:48:24 2010 @@ -50,3 +50,10 @@ // CHECK: shrl %eax # encoding: [0xd1,0xe8] shrl $1, %eax + +// moffset forms of moves, rdar://7947184 + +movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,A,A,A,A] +movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A] +movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A] + From daniel at zuster.org Wed May 12 17:51:32 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 22:51:32 -0000 Subject: [llvm-commits] [llvm] r103662 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100512225132.5F1793128018@llvm.org> Author: ddunbar Date: Wed May 12 17:51:32 2010 New Revision: 103662 URL: http://llvm.org/viewvc/llvm-project?rev=103662&view=rev Log: MC: Switch MCFillFragment to storing total fill size instead of a count. This allows using ValueSize==0 to represent a virtual fill. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103662&r1=103661&r2=103662&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 17:51:32 2010 @@ -293,17 +293,21 @@ /// Value - Value to use for filling bytes. int64_t Value; - /// ValueSize - The size (in bytes) of \arg Value to use when filling. + /// ValueSize - The size (in bytes) of \arg Value to use when filling, or 0 if + /// this is a virtual fill fragment. unsigned ValueSize; - /// Count - The number of copies of \arg Value to insert. - uint64_t Count; + /// Size - The number of bytes to insert. + uint64_t Size; public: - MCFillFragment(int64_t _Value, unsigned _ValueSize, uint64_t _Count, + MCFillFragment(int64_t _Value, unsigned _ValueSize, uint64_t _Size, MCSectionData *SD = 0) : MCFragment(FT_Fill, SD), - Value(_Value), ValueSize(_ValueSize), Count(_Count) {} + Value(_Value), ValueSize(_ValueSize), Size(_Size) { + assert((!ValueSize || (Size % ValueSize) == 0) && + "Fill size must be a multiple of the value size!"); + } /// @name Accessors /// @{ @@ -312,7 +316,7 @@ unsigned getValueSize() const { return ValueSize; } - uint64_t getCount() const { return Count; } + uint64_t getSize() const { return Size; } /// @} Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103662&r1=103661&r2=103662&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 17:51:32 2010 @@ -395,7 +395,7 @@ case MCFragment::FT_Fill: { MCFillFragment &FF = cast(F); - EffectiveSize = FF.getValueSize() * FF.getCount(); + EffectiveSize = FF.getSize(); break; } @@ -534,7 +534,7 @@ case MCFragment::FT_Fill: { MCFillFragment &FF = cast(F); - for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) { + for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { switch (FF.getValueSize()) { default: assert(0 && "Invalid size!"); @@ -876,7 +876,7 @@ this->MCFragment::dump(); OS << "\n "; OS << " Value:" << getValue() << " ValueSize:" << getValueSize() - << " Count:" << getCount() << ">"; + << " Size:" << getSize() << ">"; } void MCInstFragment::dump() { From daniel at zuster.org Wed May 12 17:51:35 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 22:51:35 -0000 Subject: [llvm-commits] [llvm] r103663 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100512225135.780213128026@llvm.org> Author: ddunbar Date: Wed May 12 17:51:35 2010 New Revision: 103663 URL: http://llvm.org/viewvc/llvm-project?rev=103663&view=rev Log: MC: Explicitly check that only virtual fragments appear in virtual sections. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103663&r1=103662&r2=103663&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 17:51:35 2010 @@ -534,6 +534,9 @@ case MCFragment::FT_Fill: { MCFillFragment &FF = cast(F); + + assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!"); + for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) { switch (FF.getValueSize()) { default: @@ -578,6 +581,26 @@ // Ignore virtual sections. if (getBackend().isVirtualSection(SD->getSection())) { assert(SectionFileSize == 0 && "Invalid size for section!"); + + // Check that contents are only things legal inside a virtual section. + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) { + switch (it->getKind()) { + default: + assert(0 && "Invalid fragment in virtual section!"); + case MCFragment::FT_Align: + assert(!cast(it)->getValueSize() && + "Invalid align in virtual section!"); + break; + case MCFragment::FT_Fill: + assert(!cast(it)->getValueSize() && + "Invalid fill in virtual section!"); + break; + case MCFragment::FT_ZeroFill: + break; + } + } + return; } From daniel at zuster.org Wed May 12 17:51:38 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 22:51:38 -0000 Subject: [llvm-commits] [llvm] r103664 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp lib/MC/MCMachOStreamer.cpp Message-ID: <20100512225139.029AD3128034@llvm.org> Author: ddunbar Date: Wed May 12 17:51:38 2010 New Revision: 103664 URL: http://llvm.org/viewvc/llvm-project?rev=103664&view=rev Log: MC: Eliminate MCZeroFillFragment, it is no longer needed. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp llvm/trunk/lib/MC/MCMachOStreamer.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103664&r1=103663&r2=103664&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 17:51:38 2010 @@ -69,8 +69,7 @@ FT_Data, FT_Fill, FT_Inst, - FT_Org, - FT_ZeroFill + FT_Org }; private: @@ -357,31 +356,6 @@ virtual void dump(); }; -/// MCZeroFillFragment - Represent data which has a fixed size and alignment, -/// but requires no physical space in the object file. -class MCZeroFillFragment : public MCFragment { - /// Size - The size of this fragment. - uint64_t Size; - -public: - MCZeroFillFragment(uint64_t _Size, MCSectionData *SD = 0) - : MCFragment(FT_ZeroFill, SD), Size(_Size) {} - - /// @name Accessors - /// @{ - - uint64_t getSize() const { return Size; } - - /// @} - - static bool classof(const MCFragment *F) { - return F->getKind() == MCFragment::FT_ZeroFill; - } - static bool classof(const MCZeroFillFragment *) { return true; } - - virtual void dump(); -}; - // FIXME: Should this be a separate class, or just merged into MCSection? Since // we anticipate the fast path being through an MCAssembler, the only reason to // keep it out is for API abstraction. Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103664&r1=103663&r2=103664&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 17:51:38 2010 @@ -394,8 +394,7 @@ break; case MCFragment::FT_Fill: { - MCFillFragment &FF = cast(F); - EffectiveSize = FF.getSize(); + EffectiveSize = cast(F).getSize(); break; } @@ -419,11 +418,6 @@ EffectiveSize = Offset; break; } - - case MCFragment::FT_ZeroFill: { - EffectiveSize = cast(F).getSize(); - break; - } } Layout.setFragmentEffectiveSize(&F, EffectiveSize); @@ -562,11 +556,6 @@ break; } - - case MCFragment::FT_ZeroFill: { - assert(0 && "Invalid zero fill fragment in concrete section!"); - break; - } } assert(OW->getStream().tell() - Start == FragmentSize); @@ -596,8 +585,6 @@ assert(!cast(it)->getValueSize() && "Invalid fill in virtual section!"); break; - case MCFragment::FT_ZeroFill: - break; } } @@ -922,15 +909,6 @@ OS << " Offset:" << getOffset() << " Value:" << getValue() << ">"; } -void MCZeroFillFragment::dump() { - raw_ostream &OS = llvm::errs(); - - OS << "MCFragment::dump(); - OS << "\n "; - OS << " Size:" << getSize() << ">"; -} - void MCSectionData::dump() { raw_ostream &OS = llvm::errs(); Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=103664&r1=103663&r2=103664&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Wed May 12 17:51:38 2010 @@ -326,7 +326,7 @@ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, /*EmitNops=*/false, &SectData); - MCFragment *F = new MCZeroFillFragment(Size, &SectData); + MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); SD.setFragment(F); if (Assembler.isSymbolLinkerVisible(&SD)) F->setAtom(&SD); From daniel at zuster.org Wed May 12 17:51:27 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 22:51:27 -0000 Subject: [llvm-commits] [llvm] r103661 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp lib/MC/MCMachOStreamer.cpp Message-ID: <20100512225127.F25E3312800A@llvm.org> Author: ddunbar Date: Wed May 12 17:51:27 2010 New Revision: 103661 URL: http://llvm.org/viewvc/llvm-project?rev=103661&view=rev Log: MC: Drop support for alignment in ZeroFill fragment, we can just use MCAlignFragments for this. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp llvm/trunk/lib/MC/MCMachOStreamer.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103661&r1=103660&r2=103661&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 17:51:27 2010 @@ -359,21 +359,15 @@ /// Size - The size of this fragment. uint64_t Size; - /// Alignment - The alignment for this fragment. - unsigned Alignment; - public: - MCZeroFillFragment(uint64_t _Size, unsigned _Alignment, MCSectionData *SD = 0) - : MCFragment(FT_ZeroFill, SD), - Size(_Size), Alignment(_Alignment) {} + MCZeroFillFragment(uint64_t _Size, MCSectionData *SD = 0) + : MCFragment(FT_ZeroFill, SD), Size(_Size) {} /// @name Accessors /// @{ uint64_t getSize() const { return Size; } - unsigned getAlignment() const { return Alignment; } - /// @} static bool classof(const MCFragment *F) { Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103661&r1=103660&r2=103661&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 17:51:27 2010 @@ -421,16 +421,7 @@ } case MCFragment::FT_ZeroFill: { - MCZeroFillFragment &ZFF = cast(F); - - // Align the fragment offset; it is safe to adjust the offset freely since - // this is only in virtual sections. - // - // FIXME: We shouldn't be doing this here. - Address = RoundUpToAlignment(Address, ZFF.getAlignment()); - Layout.setFragmentOffset(&F, Address - StartAddress); - - EffectiveSize = ZFF.getSize(); + EffectiveSize = cast(F).getSize(); break; } } @@ -498,6 +489,8 @@ MCAlignFragment &AF = cast(F); uint64_t Count = FragmentSize / AF.getValueSize(); + assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!"); + // FIXME: This error shouldn't actually occur (the front end should emit // multiple .align directives to enforce the semantics it wants), but is // severe enough that we want to report it. How to handle this? @@ -912,7 +905,7 @@ OS << "MCFragment::dump(); OS << "\n "; - OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">"; + OS << " Size:" << getSize() << ">"; } void MCSectionData::dump() { Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=103661&r1=103660&r2=103661&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Wed May 12 17:51:27 2010 @@ -321,7 +321,12 @@ MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); - MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData); + // Emit an align fragment if necessary. + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, /*EmitNops=*/false, + &SectData); + + MCFragment *F = new MCZeroFillFragment(Size, &SectData); SD.setFragment(F); if (Assembler.isSymbolLinkerVisible(&SD)) F->setAtom(&SD); From daniel at zuster.org Wed May 12 17:56:23 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Wed, 12 May 2010 22:56:23 -0000 Subject: [llvm-commits] [llvm] r103665 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp lib/MC/MCMachOStreamer.cpp Message-ID: <20100512225623.D244F312800A@llvm.org> Author: ddunbar Date: Wed May 12 17:56:23 2010 New Revision: 103665 URL: http://llvm.org/viewvc/llvm-project?rev=103665&view=rev Log: MC: Move MCAlignFragment::EmitNops value out of the constructor. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp llvm/trunk/lib/MC/MCMachOStreamer.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103665&r1=103664&r2=103665&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 17:56:23 2010 @@ -253,17 +253,17 @@ /// cannot be satisfied in this width then this fragment is ignored. unsigned MaxBytesToEmit; - /// EmitNops - true when aligning code and optimal nops to be used for - /// filling. - bool EmitNops; + /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead + /// of using the provided value. The exact interpretation of this flag is + /// target dependent. + bool EmitNops : 1; public: MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize, - unsigned _MaxBytesToEmit, bool _EmitNops, - MCSectionData *SD = 0) + unsigned _MaxBytesToEmit, MCSectionData *SD = 0) : MCFragment(FT_Align, SD), Alignment(_Alignment), Value(_Value),ValueSize(_ValueSize), - MaxBytesToEmit(_MaxBytesToEmit), EmitNops(_EmitNops) {} + MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false) {} /// @name Accessors /// @{ @@ -276,7 +276,8 @@ unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } - unsigned getEmitNops() const { return EmitNops; } + bool hasEmitNops() const { return EmitNops; } + void setEmitNops(bool Value) { EmitNops = Value; } /// @} Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103665&r1=103664&r2=103665&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 17:56:23 2010 @@ -498,7 +498,7 @@ // the Count bytes. Then if that did not fill any bytes or there are any // bytes left to fill use the the Value and ValueSize to fill the rest. // If we are aligning with nops, ask that target to emit the right data. - if (AF.getEmitNops()) { + if (AF.hasEmitNops()) { if (!Asm.getBackend().WriteNopData(Count, OW)) report_fatal_error("unable to write nop sequence of " + Twine(Count) + " bytes"); Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=103665&r1=103664&r2=103665&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Wed May 12 17:56:23 2010 @@ -323,8 +323,7 @@ // Emit an align fragment if necessary. if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, /*EmitNops=*/false, - &SectData); + new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData); MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); SD.setFragment(F); @@ -365,8 +364,7 @@ if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize, - MaxBytesToEmit, /*EmitNops=*/false, - CurSectionData); + MaxBytesToEmit, CurSectionData); F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. @@ -378,8 +376,9 @@ unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - MCFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, - /*EmitNops=*/true, CurSectionData); + MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, + CurSectionData); + F->setEmitNops(true); F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. From evan.cheng at apple.com Wed May 12 18:13:12 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 12 May 2010 23:13:12 -0000 Subject: [llvm-commits] [llvm] r103667 - /llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Message-ID: <20100512231313.03250312800A@llvm.org> Author: evancheng Date: Wed May 12 18:13:12 2010 New Revision: 103667 URL: http://llvm.org/viewvc/llvm-project?rev=103667&view=rev Log: Fix some potential issues in the pseudo instruction expansion phase: copy implicit operands and memoperands. Also, expand instructions even if their defs are "dead" since they may have implicit kill operands. Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=103667&r1=103666&r2=103667&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Wed May 12 18:13:12 2010 @@ -37,11 +37,31 @@ } private: + void TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMBB(MachineBasicBlock &MBB); }; char ARMExpandPseudo::ID = 0; } +/// TransferImpOps - Transfer implicit operands on the pseudo instruction to +/// the instructions created from the expansion. +void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, + MachineInstrBuilder &UseMI, + MachineInstrBuilder &DefMI) { + const TargetInstrDesc &Desc = OldMI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = OldMI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + if (MO.isUse()) + UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill())); + else + DefMI.addReg(MO.getReg(), + getDefRegState(true) | getDeadRegState(MO.isDead())); + } +} + bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool Modified = false; @@ -58,48 +78,54 @@ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) ? ARM::tLDRpci : ARM::t2LDRpci; unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - MachineInstr *NewMI = - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(NewLdOpc), DstReg) - .addOperand(MI.getOperand(1))); - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) - .addReg(DstReg, getDefRegState(true)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)); - } + bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder MIB1 = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NewLdOpc), DstReg) + .addOperand(MI.getOperand(1))); + (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::tPICADD)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)); + TransferImpOps(MI, MIB1, MIB2); MI.eraseFromParent(); Modified = true; break; } + case ARM::t2MOVi32imm: { + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); - if (!MI.getOperand(0).isDead()) { - const MachineOperand &MO = MI.getOperand(1); - MachineInstrBuilder LO16, HI16; - - LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), - DstReg); - HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true)).addReg(DstReg); - - if (MO.isImm()) { - unsigned Imm = MO.getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - LO16 = LO16.addImm(Lo16); - HI16 = HI16.addImm(Hi16); - } else { - const GlobalValue *GV = MO.getGlobal(); - unsigned TF = MO.getTargetFlags(); - LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); - HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); - // FIXME: What's about memoperands? - } - AddDefaultPred(LO16); - AddDefaultPred(HI16); + bool DstIsDead = MI.getOperand(0).isDead(); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + const GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); Modified = true; } From sabre at nondot.org Wed May 12 18:13:36 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 23:13:36 -0000 Subject: [llvm-commits] [llvm] r103668 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp lib/Target/X86/X86InstrInfo.td test/MC/AsmParser/X86/x86_32-new-encoder.s test/MC/AsmParser/X86/x86_64-new-encoder.s Message-ID: <20100512231336.41834312800A@llvm.org> Author: lattner Date: Wed May 12 18:13:36 2010 New Revision: 103668 URL: http://llvm.org/viewvc/llvm-project?rev=103668&view=rev Log: moffset forms of moves are x86-32 only, make the parser lower them to the correct x86-64 instructions since we don't have a clean way to handle this in td files yet. rdar://7947184 Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=103668&r1=103667&r2=103668&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Wed May 12 18:13:36 2010 @@ -622,6 +622,31 @@ return false; } +/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a +/// imm operand, to having "rm" or "mr" operands with the offset in the disp +/// field. +static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, + bool isMR) { + MCOperand Disp = Inst.getOperand(0); + + // Start over with an empty instruction. + Inst = MCInst(); + Inst.setOpcode(Opc); + + if (isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); + + // Add the mem operand. + Inst.addOperand(MCOperand::CreateReg(0)); // Segment + Inst.addOperand(MCOperand::CreateImm(1)); // Scale + Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg + Inst.addOperand(Disp); // Displacement + Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg + + if (!isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); +} + // FIXME: Custom X86 cleanup function to implement a temporary hack to handle // matching INCL/DECL correctly for x86_64. This needs to be replaced by a // proper mechanism for supporting (ambiguous) feature dependent instructions. @@ -637,6 +662,14 @@ case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + + // moffset instructions are x86-32 only. + case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; + case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; + case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; + case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; + case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; + case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; } } Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103668&r1=103667&r2=103668&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed May 12 18:13:36 2010 @@ -966,8 +966,8 @@ "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -/// moffs8, moffs16, moffs32 and moffs64 versions of moves. The immediate is a -/// 32-bit offset from the PC. +/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a +/// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), @@ -980,7 +980,7 @@ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; - + // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>; Modified: llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=103668&r1=103667&r2=103668&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Wed May 12 18:13:36 2010 @@ -52,7 +52,6 @@ shrl $1, %eax // moffset forms of moves, rdar://7947184 - movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,A,A,A,A] movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A] movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A] Modified: llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s?rev=103668&r1=103667&r2=103668&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s Wed May 12 18:13:36 2010 @@ -69,3 +69,10 @@ stosl // CHECK: stosl // CHECK: encoding: [0xab] + + +// Not moffset forms of moves, they are x86-32 only! rdar://7947184 +movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A] +movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A] +movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A] + From clattner at apple.com Wed May 12 18:17:50 2010 From: clattner at apple.com (Chris Lattner) Date: Wed, 12 May 2010 16:17:50 -0700 Subject: [llvm-commits] [llvm] r103647 - in /llvm/trunk: include/llvm/ADT/ilist_node.h unittests/ADT/ilistTest.cpp In-Reply-To: <20100512213519.80901312800A@llvm.org> References: <20100512213519.80901312800A@llvm.org> Message-ID: <4F17D515-9B48-41B4-B849-D6B5B4FB027F@apple.com> On May 12, 2010, at 2:35 PM, Daniel Dunbar wrote: > Author: ddunbar > Date: Wed May 12 16:35:19 2010 > New Revision: 103647 > > URL: http://llvm.org/viewvc/llvm-project?rev=103647&view=rev > Log: > ADT: Add ilist_node::get{Prev,Next}Node, which return the adjacent node or null. > - This provides a convenient alternative to using something llvm::prior or > manual iterator access, for example:: Works for me, thanks. -Chris > > if (T *Prev = foo->getPrevNode()) > ... > > instead of:: > > iterator it(foo); > if (it != begin()) { > --it; > ... > } > > - Chris, please review. > > Added: > llvm/trunk/unittests/ADT/ilistTest.cpp > Modified: > llvm/trunk/include/llvm/ADT/ilist_node.h > > Modified: llvm/trunk/include/llvm/ADT/ilist_node.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/ilist_node.h?rev=103647&r1=103646&r2=103647&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/ADT/ilist_node.h (original) > +++ llvm/trunk/include/llvm/ADT/ilist_node.h Wed May 12 16:35:19 2010 > @@ -49,6 +49,56 @@ > void setNext(NodeTy *N) { Next = N; } > protected: > ilist_node() : Next(0) {} > + > +public: > + /// @name Adjacent Node Accessors > + /// @{ > + > + /// \brief Get the previous node, or 0 for the list head. > + NodeTy *getPrevNode() { > + NodeTy *Prev = this->getPrev(); > + > + // Check for sentinel. > + if (!Prev->getNext()) > + return 0; > + > + return Prev; > + } > + > + /// \brief Get the previous node, or 0 for the list head. > + const NodeTy *getPrevNode() const { > + NodeTy *Prev = this->getPrev(); > + > + // Check for sentinel. > + if (!Prev->getNext()) > + return 0; > + > + return Prev; > + } > + > + /// \brief Get the next node, or 0 for the list tail. > + NodeTy *getNextNode() { > + NodeTy *Next = getNext(); > + > + // Check for sentinel. > + if (!Next->getNext()) > + return 0; > + > + return Next; > + } > + > + /// \brief Get the next node, or 0 for the list tail. > + const NodeTy *getNextNode() const { > + NodeTy *Next = getNext(); > + > + // Check for sentinel. > + if (!Next->getNext()) > + return 0; > + > + return Next; > + } > + > + /// @} > }; > > } // End llvm namespace > > Added: llvm/trunk/unittests/ADT/ilistTest.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/ilistTest.cpp?rev=103647&view=auto > ============================================================================== > --- llvm/trunk/unittests/ADT/ilistTest.cpp (added) > +++ llvm/trunk/unittests/ADT/ilistTest.cpp Wed May 12 16:35:19 2010 > @@ -0,0 +1,39 @@ > +//===- llvm/unittest/ADT/APInt.cpp - APInt unit tests ---------------------===// > +// > +// The LLVM Compiler Infrastructure > +// > +// This file is distributed under the University of Illinois Open Source > +// License. See LICENSE.TXT for details. > +// > +//===----------------------------------------------------------------------===// > + > +#include > +#include "gtest/gtest.h" > +#include "llvm/ADT/ilist.h" > +#include "llvm/ADT/ilist_node.h" > + > +using namespace llvm; > + > +namespace { > + > +struct Node : ilist_node { > + int Value; > + > + Node() {} > + Node(int _Value) : Value(_Value) {} > +}; > + > +TEST(ilistTest, Basic) { > + ilist List; > + List.push_back(Node(1)); > + EXPECT_EQ(1, List.back().Value); > + EXPECT_EQ(0, List.back().getPrevNode()); > + EXPECT_EQ(0, List.back().getNextNode()); > + > + List.push_back(Node(2)); > + EXPECT_EQ(2, List.back().Value); > + EXPECT_EQ(2, List.front().getNextNode()->Value); > + EXPECT_EQ(1, List.back().getPrevNode()->Value); > +} > + > +} > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From sabre at nondot.org Wed May 12 18:40:59 2010 From: sabre at nondot.org (Chris Lattner) Date: Wed, 12 May 2010 23:40:59 -0000 Subject: [llvm-commits] [llvm] r103677 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp lib/Target/X86/X86InstrInfo.td test/MC/AsmParser/X86/x86_32-new-encoder.s test/MC/AsmParser/X86/x86_64-new-encoder.s Message-ID: <20100512234059.86D90312800A@llvm.org> Author: lattner Date: Wed May 12 18:40:59 2010 New Revision: 103677 URL: http://llvm.org/viewvc/llvm-project?rev=103677&view=rev Log: revert r103668 for now, it is apparently breaking things. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=103677&r1=103676&r2=103677&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Wed May 12 18:40:59 2010 @@ -622,31 +622,6 @@ return false; } -/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a -/// imm operand, to having "rm" or "mr" operands with the offset in the disp -/// field. -static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, - bool isMR) { - MCOperand Disp = Inst.getOperand(0); - - // Start over with an empty instruction. - Inst = MCInst(); - Inst.setOpcode(Opc); - - if (isMR) - Inst.addOperand(MCOperand::CreateReg(RegNo)); - - // Add the mem operand. - Inst.addOperand(MCOperand::CreateReg(0)); // Segment - Inst.addOperand(MCOperand::CreateImm(1)); // Scale - Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg - Inst.addOperand(Disp); // Displacement - Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg - - if (!isMR) - Inst.addOperand(MCOperand::CreateReg(RegNo)); -} - // FIXME: Custom X86 cleanup function to implement a temporary hack to handle // matching INCL/DECL correctly for x86_64. This needs to be replaced by a // proper mechanism for supporting (ambiguous) feature dependent instructions. @@ -662,14 +637,6 @@ case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; - - // moffset instructions are x86-32 only. - case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; - case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; - case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; - case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; - case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; - case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; } } Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103677&r1=103676&r2=103677&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed May 12 18:40:59 2010 @@ -966,8 +966,8 @@ "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a -/// 32-bit offset from the PC. These are only valid in x86-32 mode. +/// moffs8, moffs16, moffs32 and moffs64 versions of moves. The immediate is a +/// 32-bit offset from the PC. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), @@ -980,7 +980,7 @@ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; - + // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>; Modified: llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=103677&r1=103676&r2=103677&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Wed May 12 18:40:59 2010 @@ -52,6 +52,7 @@ shrl $1, %eax // moffset forms of moves, rdar://7947184 + movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,A,A,A,A] movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A] movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A] Modified: llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s?rev=103677&r1=103676&r2=103677&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s Wed May 12 18:40:59 2010 @@ -69,10 +69,3 @@ stosl // CHECK: stosl // CHECK: encoding: [0xab] - - -// Not moffset forms of moves, they are x86-32 only! rdar://7947184 -movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A] -movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A] -movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A] - From evan.cheng at apple.com Wed May 12 18:59:42 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Wed, 12 May 2010 23:59:42 -0000 Subject: [llvm-commits] [llvm] r103679 - /llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Message-ID: <20100512235943.0493C312800A@llvm.org> Author: evancheng Date: Wed May 12 18:59:42 2010 New Revision: 103679 URL: http://llvm.org/viewvc/llvm-project?rev=103679&view=rev Log: Do not attempt copy coalescing if the source and dest sub-register indices do not match. Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLocal.cpp?rev=103679&r1=103678&r2=103679&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocLocal.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Wed May 12 18:59:42 2010 @@ -846,7 +846,8 @@ unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; unsigned SrcCopyPhysReg = 0U; bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg); + SrcCopySubReg, DstCopySubReg) && + SrcCopySubReg == DstCopySubReg; if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); @@ -1154,7 +1155,8 @@ // the register scavenger. See pr4100.) if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && DeadDefs.empty()) + SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg && + DeadDefs.empty()) MBB.erase(MI); } From evan.cheng at apple.com Wed May 12 19:00:35 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 00:00:35 -0000 Subject: [llvm-commits] [llvm] r103680 - /llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Message-ID: <20100513000035.3BE97312800A@llvm.org> Author: evancheng Date: Wed May 12 19:00:35 2010 New Revision: 103680 URL: http://llvm.org/viewvc/llvm-project?rev=103680&view=rev Log: If REG_SEQUENCE source is livein, copy it first. Also, update livevariables information when a copy is introduced. Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=103680&r1=103679&r2=103680&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Wed May 12 19:00:35 2010 @@ -1175,17 +1175,36 @@ llvm_unreachable(0); } - if (!Seen.insert(SrcReg)) { - // REG_SEQUENCE cannot have duplicated operands. Add a copy. + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent()) { + // REG_SEQUENCE cannot have duplicated operands, add a copy. + // Also add an copy if the source if live-in the block. We don't want + // to end up with a partial-redef of a livein, e.g. + // BB0: + // reg1051:10 = + // ... + // BB1: + // ... = reg1051:10 + // BB2: + // reg1051:9 = + // LiveIntervalAnalysis won't like it. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); unsigned NewReg = MRI->createVirtualRegister(RC); + MachineBasicBlock::iterator InsertLoc = MI; bool Emitted = - TII->copyRegToReg(*MI->getParent(), MI, NewReg, SrcReg, RC, RC, + TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC, MI->getDebugLoc()); (void)Emitted; assert(Emitted && "Unable to issue a copy instruction!\n"); MI->getOperand(i).setReg(NewReg); - MI->getOperand(i).setIsKill(); + if (MI->getOperand(i).isKill()) { + MachineBasicBlock::iterator CopyMI = prior(InsertLoc); + MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg); + KillMO->setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, MI, &*CopyMI); + } } } From sabre at nondot.org Wed May 12 19:02:47 2010 From: sabre at nondot.org (Chris Lattner) Date: Thu, 13 May 2010 00:02:47 -0000 Subject: [llvm-commits] [llvm] r103681 - in /llvm/trunk: lib/Target/X86/AsmParser/X86AsmParser.cpp lib/Target/X86/X86InstrInfo.td test/MC/AsmParser/X86/x86_32-new-encoder.s test/MC/AsmParser/X86/x86_64-new-encoder.s Message-ID: <20100513000247.73229312800A@llvm.org> Author: lattner Date: Wed May 12 19:02:47 2010 New Revision: 103681 URL: http://llvm.org/viewvc/llvm-project?rev=103681&view=rev Log: reapply r103668 with a fix. Never make "minor syntax changes" after testing before committing. Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s Modified: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp?rev=103681&r1=103680&r2=103681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp (original) +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp Wed May 12 19:02:47 2010 @@ -622,6 +622,31 @@ return false; } +/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a +/// imm operand, to having "rm" or "mr" operands with the offset in the disp +/// field. +static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo, + bool isMR) { + MCOperand Disp = Inst.getOperand(0); + + // Start over with an empty instruction. + Inst = MCInst(); + Inst.setOpcode(Opc); + + if (!isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); + + // Add the mem operand. + Inst.addOperand(MCOperand::CreateReg(0)); // Segment + Inst.addOperand(MCOperand::CreateImm(1)); // Scale + Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg + Inst.addOperand(Disp); // Displacement + Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg + + if (isMR) + Inst.addOperand(MCOperand::CreateReg(RegNo)); +} + // FIXME: Custom X86 cleanup function to implement a temporary hack to handle // matching INCL/DECL correctly for x86_64. This needs to be replaced by a // proper mechanism for supporting (ambiguous) feature dependent instructions. @@ -637,6 +662,14 @@ case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + + // moffset instructions are x86-32 only. + case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break; + case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break; + case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break; + case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break; + case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break; + case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break; } } Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103681&r1=103680&r2=103681&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed May 12 19:02:47 2010 @@ -966,8 +966,8 @@ "mov{l}\t{$src, $dst|$dst, $src}", [(store (i32 imm:$src), addr:$dst)]>; -/// moffs8, moffs16, moffs32 and moffs64 versions of moves. The immediate is a -/// 32-bit offset from the PC. +/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a +/// 32-bit offset from the PC. These are only valid in x86-32 mode. def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|%al, $src}", []>; def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src), @@ -980,7 +980,7 @@ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize; def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, %eax}", []>; - + // Moves to and from segment registers def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", []>; Modified: llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=103681&r1=103680&r2=103681&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Wed May 12 19:02:47 2010 @@ -52,7 +52,6 @@ shrl $1, %eax // moffset forms of moves, rdar://7947184 - movb 0, %al // CHECK: movb 0, %al # encoding: [0xa0,A,A,A,A] movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A] movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A] Modified: llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s?rev=103681&r1=103680&r2=103681&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_64-new-encoder.s Wed May 12 19:02:47 2010 @@ -69,3 +69,10 @@ stosl // CHECK: stosl // CHECK: encoding: [0xab] + + +// Not moffset forms of moves, they are x86-32 only! rdar://7947184 +movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A] +movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A] +movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A] + From sabre at nondot.org Wed May 12 19:10:34 2010 From: sabre at nondot.org (Chris Lattner) Date: Thu, 13 May 2010 00:10:34 -0000 Subject: [llvm-commits] [llvm] r103682 - in /llvm/trunk: lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_zerofill.s Message-ID: <20100513001034.90831312800A@llvm.org> Author: lattner Date: Wed May 12 19:10:34 2010 New Revision: 103682 URL: http://llvm.org/viewvc/llvm-project?rev=103682&view=rev Log: fix rdar://7965971 and a fixme: use ParseIdentifier in ParseDirectiveDarwinZerofill instead of hard coding the check for identifier. This allows quoted symbol names to be used. Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp llvm/trunk/test/MC/AsmParser/directive_zerofill.s Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=103682&r1=103681&r2=103682&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original) +++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Wed May 12 19:10:34 2010 @@ -1344,22 +1344,18 @@ /// ::= .zerofill segname , sectname [, identifier , size_expression [ /// , align_expression ]] bool AsmParser::ParseDirectiveDarwinZerofill() { - // FIXME: Handle quoted names here. - - if (Lexer.isNot(AsmToken::Identifier)) + StringRef Segment; + if (ParseIdentifier(Segment)) return TokError("expected segment name after '.zerofill' directive"); - StringRef Segment = getTok().getString(); - Lex(); if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); - - if (Lexer.isNot(AsmToken::Identifier)) + + StringRef Section; + if (ParseIdentifier(Section)) return TokError("expected section name after comma in '.zerofill' " "directive"); - StringRef Section = getTok().getString(); - Lex(); // If this is the end of the line all that was wanted was to create the // the section but with no symbol. @@ -1375,13 +1371,13 @@ return TokError("unexpected token in directive"); Lex(); - if (Lexer.isNot(AsmToken::Identifier)) + SMLoc IDLoc = Lexer.getLoc(); + StringRef IDStr; + if (ParseIdentifier(IDStr)) return TokError("expected identifier in directive"); // handle the identifier as the key symbol. - SMLoc IDLoc = Lexer.getLoc(); - MCSymbol *Sym = CreateSymbol(getTok().getString()); - Lex(); + MCSymbol *Sym = CreateSymbol(IDStr); if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Modified: llvm/trunk/test/MC/AsmParser/directive_zerofill.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/directive_zerofill.s?rev=103682&r1=103681&r2=103682&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/directive_zerofill.s (original) +++ llvm/trunk/test/MC/AsmParser/directive_zerofill.s Wed May 12 19:10:34 2010 @@ -4,7 +4,11 @@ # CHECK: .zerofill __FOO,__bar,x,1 # CHECK: .zerofill __FOO,__bar,y,8,2 # CHECK: .zerofill __EMPTY,__NoSymbol +# CHECK: .zerofill __DATA,__bss,"what you say?",8,3 TEST0: .zerofill __FOO, __bar, x, 2-1 .zerofill __FOO, __bar, y , 8 , 1+1 .zerofill __EMPTY,__NoSymbol + + # rdar://7965971 + .zerofill __DATA, __bss, "what you say?", 8, 3 From evan.cheng at apple.com Wed May 12 19:16:46 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 00:16:46 -0000 Subject: [llvm-commits] [llvm] r103683 - in /llvm/trunk/lib/Target/ARM: ARMInstrNEON.td ARMInstrVFP.td Message-ID: <20100513001646.E12A2312800A@llvm.org> Author: evancheng Date: Wed May 12 19:16:46 2010 New Revision: 103683 URL: http://llvm.org/viewvc/llvm-project?rev=103683&view=rev Log: Mark some pattern-less instructions as neverHasSideEffects. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=103683&r1=103682&r2=103683&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed May 12 19:16:46 2010 @@ -2796,6 +2796,7 @@ // VMOV : Vector Move (Register) +let neverHasSideEffects = 1 in { def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), @@ -2805,6 +2806,7 @@ // be expanded after register allocation is completed. def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), NoItinerary, "@ vmov\t$dst, $src", []>; +} // neverHasSideEffects // VMOV : Vector Move (Immediate) Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=103683&r1=103682&r2=103683&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Wed May 12 19:16:46 2010 @@ -313,6 +313,7 @@ IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; +let neverHasSideEffects = 1 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", @@ -326,6 +327,7 @@ [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } +} // neverHasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -337,6 +339,7 @@ let Inst{7-6} = 0b00; } +let neverHasSideEffects = 1 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -606,6 +609,7 @@ // FP Conditional moves. // +let neverHasSideEffects = 1 in { def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$false, DPR:$true), IIC_fpUNA64, "vmov", ".f64\t$dst, $true", @@ -629,7 +633,7 @@ IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; - +} // neverHasSideEffects //===----------------------------------------------------------------------===// // Misc. @@ -651,6 +655,7 @@ // FPSCR <-> GPR (for disassembly only) +let neverHasSideEffects = 1 in { let Uses = [FPSCR] in { def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", "\t$dst, fpscr", @@ -674,6 +679,7 @@ let Inst{4} = 1; } } +} // neverHasSideEffects // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { From evan.cheng at apple.com Wed May 12 19:17:02 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 00:17:02 -0000 Subject: [llvm-commits] [llvm] r103684 - /llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Message-ID: <20100513001702.E51E6312800A@llvm.org> Author: evancheng Date: Wed May 12 19:17:02 2010 New Revision: 103684 URL: http://llvm.org/viewvc/llvm-project?rev=103684&view=rev Log: Expand VMOVQQ into a pair of VMOVQ. Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Modified: llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp?rev=103684&r1=103683&r2=103684&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMExpandPseudoInsts.cpp Wed May 12 19:17:02 2010 @@ -29,6 +29,7 @@ ARMExpandPseudo() : MachineFunctionPass(&ID) {} const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -128,6 +129,31 @@ TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); Modified = true; + break; + } + + case ARM::VMOVQQ: { + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + unsigned EvenDst = TRI->getSubReg(DstReg, ARM::QSUBREG_0); + unsigned OddDst = TRI->getSubReg(DstReg, ARM::QSUBREG_1); + unsigned SrcReg = MI.getOperand(1).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::QSUBREG_0); + unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::QSUBREG_1); + MachineInstrBuilder Even = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); + MachineInstrBuilder Odd = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::VMOVQ)) + .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); + TransferImpOps(MI, Even, Odd); + MI.eraseFromParent(); + Modified = true; } } MBBI = NMBBI; @@ -138,6 +164,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; From stoklund at 2pi.dk Wed May 12 19:19:39 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 13 May 2010 00:19:39 -0000 Subject: [llvm-commits] [llvm] r103685 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100513001939.5353F312800A@llvm.org> Author: stoklund Date: Wed May 12 19:19:39 2010 New Revision: 103685 URL: http://llvm.org/viewvc/llvm-project?rev=103685&view=rev Log: More asserts around physreg uses Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103685&r1=103684&r2=103685&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Wed May 12 19:19:39 2010 @@ -487,7 +487,7 @@ } /// reservePhysReg - Mark PhysReg as reserved. This is very similar to -/// defineVirtReg except the physreg is reverved instead of allocated. +/// defineVirtReg except the physreg is reserved instead of allocated. void RAFast::reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned PhysReg) { UsedInInstr.set(PhysReg); @@ -623,6 +623,16 @@ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || ReservedRegs.test(Reg)) continue; if (MO.isUse()) { +#ifndef NDEBUG + // We are using a physreg directly. It had better not be clobbered by a + // virtreg. + assert(PhysRegState[Reg] <= regReserved && "Using clobbered physreg"); + if (PhysRegState[Reg] == regDisabled) + for (const unsigned *AS = TRI->getAliasSet(Reg); + unsigned Alias = *AS; ++AS) + assert(PhysRegState[Alias] <= regReserved && + "Physreg alias was clobbered"); +#endif PhysKills.push_back(Reg); // Any clean physreg use is a kill. UsedInInstr.set(Reg); } else if (MO.isEarlyClobber()) { From stoklund at 2pi.dk Wed May 12 19:19:43 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 13 May 2010 00:19:43 -0000 Subject: [llvm-commits] [llvm] r103686 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll test/CodeGen/X86/2008-09-18-inline-asm-2.ll Message-ID: <20100513001943.BE10C3128018@llvm.org> Author: stoklund Date: Wed May 12 19:19:43 2010 New Revision: 103686 URL: http://llvm.org/viewvc/llvm-project?rev=103686&view=rev Log: Take allocation hints from copy instructions to/from physregs. This causes way more identity copies to be generated, ripe for coalescing. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103686&r1=103685&r2=103686&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Wed May 12 19:19:43 2010 @@ -49,6 +49,7 @@ private: const TargetMachine *TM; MachineFunction *MF; + MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; @@ -132,11 +133,11 @@ LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); LiveRegMap::iterator allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg); + unsigned VirtReg, unsigned Hint); unsigned defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg); + unsigned OpNum, unsigned VirtReg, unsigned Hint); unsigned reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg); + unsigned OpNum, unsigned VirtReg, unsigned Hint); void reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned PhysReg); void spillAll(MachineBasicBlock &MBB, MachineInstr *MI); @@ -216,7 +217,7 @@ LR.Dirty = false; DEBUG(dbgs() << " Spilling register " << TRI->getName(LR.PhysReg) << " containing %reg" << VirtReg); - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " to stack slot #" << FrameIndex << "\n"); TII->storeRegToStackSlot(MBB, MI, LR.PhysReg, spillKill, @@ -331,15 +332,52 @@ /// allocVirtReg - Allocate a physical register for VirtReg. RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg) { + unsigned VirtReg, + unsigned Hint) { const unsigned spillCost = 100; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF); TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF); + // Ignore invalid hints. + if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || + !RC->contains(Hint) || UsedInInstr.test(Hint))) + Hint = 0; + + // If there is no hint, peek at the first use of this register. + if (!Hint && !MRI->use_nodbg_empty(VirtReg)) { + MachineInstr &MI = *MRI->use_nodbg_begin(VirtReg); + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + // Copy to physreg -> use physreg as hint. + if (TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + SrcReg == VirtReg && TargetRegisterInfo::isPhysicalRegister(DstReg) && + RC->contains(DstReg) && !UsedInInstr.test(DstReg)) { + Hint = DstReg; + DEBUG(dbgs() << " %reg" << VirtReg << " gets hint from " << MI); + } + } + + // Take hint when possible. + if (Hint) { + assert(RC->contains(Hint) && !UsedInInstr.test(Hint) && + "Invalid hint should have been cleared"); + switch(PhysRegState[Hint]) { + case regDisabled: + case regReserved: + break; + default: + DEBUG(dbgs() << " %reg" << VirtReg << " really wants " + << TRI->getName(Hint) << "\n"); + spillVirtReg(MBB, MI, PhysRegState[Hint], true); + // Fall through. + case regFree: + return assignVirtToPhysReg(VirtReg, Hint); + } + } + // First try to find a completely free register. unsigned BestCost = 0, BestReg = 0; bool hasDisabled = false; @@ -447,12 +485,12 @@ /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. unsigned RAFast::defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg) { + unsigned OpNum, unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); if (lri == LiveVirtRegs.end()) - lri = allocVirtReg(MBB, MI, VirtReg); + lri = allocVirtReg(MBB, MI, VirtReg, Hint); else addKillFlag(lri); // Kill before redefine. LiveReg &LR = lri->second; @@ -465,13 +503,13 @@ /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. unsigned RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg) { + unsigned OpNum, unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); if (lri == LiveVirtRegs.end()) { - lri = allocVirtReg(MBB, MI, VirtReg); - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); + lri = allocVirtReg(MBB, MI, VirtReg, Hint); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " << TRI->getName(lri->second.PhysReg) << "\n"); @@ -605,6 +643,11 @@ continue; } + // If this is a copy, we may be able to coalesce. + unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; + if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) + CopySrc = CopyDst = 0; + // Track registers used by instruction. UsedInInstr.reset(); PhysDefs.clear(); @@ -651,11 +694,14 @@ unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isUse()) { - setPhysReg(MO, reloadVirtReg(MBB, MI, i, Reg)); + unsigned PhysReg = reloadVirtReg(MBB, MI, i, Reg, CopyDst); + if (CopySrc == Reg) + CopySrc = PhysReg; + setPhysReg(MO, PhysReg); if (MO.isKill()) VirtKills.push_back(Reg); } else if (MO.isEarlyClobber()) { - unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg); + unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg, 0); setPhysReg(MO, PhysReg); PhysDefs.push_back(PhysReg); } @@ -671,7 +717,7 @@ killPhysReg(PhysKills[i]); PhysKills.clear(); - MF->getRegInfo().addPhysRegsUsed(UsedInInstr); + MRI->addPhysRegsUsed(UsedInInstr); // Track registers defined by instruction - early clobbers at this point. UsedInInstr.reset(); @@ -702,7 +748,10 @@ } if (MO.isDead()) VirtKills.push_back(Reg); - setPhysReg(MO, defineVirtReg(MBB, MI, i, Reg)); + unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg, CopySrc); + if (CopyDst == Reg) + CopyDst = PhysReg; + setPhysReg(MO, PhysReg); } // Spill all dirty virtregs before a call, in case of an exception. @@ -721,7 +770,7 @@ killPhysReg(PhysKills[i]); PhysKills.clear(); - MF->getRegInfo().addPhysRegsUsed(UsedInInstr); + MRI->addPhysRegsUsed(UsedInInstr); } // Spill all physical registers holding virtual registers now. @@ -739,6 +788,7 @@ DEBUG(dbgs() << "Machine Function\n"); DEBUG(Fn.dump()); MF = &Fn; + MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); @@ -748,7 +798,7 @@ // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers - unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); + unsigned LastVirtReg = MRI->getLastVirtReg(); StackSlotForVirtReg.grow(LastVirtReg); // Loop over all of the basic blocks, eliminating virtual register references @@ -757,7 +807,7 @@ AllocateBasicBlock(*MBB); // Make sure the set of used physregs is closed under subreg operations. - MF->getRegInfo().closePhysRegsUsed(*TRI); + MRI->closePhysRegsUsed(*TRI); StackSlotForVirtReg.clear(); return true; Modified: llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll?rev=103686&r1=103685&r2=103686&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Wed May 12 19:19:43 2010 @@ -7,8 +7,8 @@ ; CHECK: subfze r4,r6 ; LOCAL: subfc r6,r5,r4 ; LOCAL: subfze r3,r3 -; FAST: subfc r9,r8,r7 -; FAST: subfze r10,r6 +; FAST: subfc r3,r5,r4 +; FAST: subfze r4,r6 ; PR1357 Modified: llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll?rev=103686&r1=103685&r2=103686&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Wed May 12 19:19:43 2010 @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)" ; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)" -; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%ecx %ebx %edi 8(%ebp) %eax (%esi)" +; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%ecx %ebx %edx 8(%edi) %eax (%esi)" ; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th From daniel at zuster.org Wed May 12 20:10:22 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 01:10:22 -0000 Subject: [llvm-commits] [llvm] r103689 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100513011022.DC98C312800A@llvm.org> Author: ddunbar Date: Wed May 12 20:10:22 2010 New Revision: 103689 URL: http://llvm.org/viewvc/llvm-project?rev=103689&view=rev Log: MC: Add MCSectionData::AddressSize, which is the size of the address space consumed by the section. This can differ from both the section logical size, and the section size on disk (although the current code handles this without making an explicit distinction). Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103689&r1=103688&r2=103689&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Wed May 12 20:10:22 2010 @@ -91,10 +91,18 @@ /// \brief Set the data size of the given section. void setSectionFileSize(MCSectionData *SD, uint64_t Value); - /// \brief Get the actual data size of the given section. + /// \brief Get the address space size of the given section, as it effects + /// layout. This may differ from the size reported by \see getSectionSize() by + /// not including section tail padding. + uint64_t getSectionAddressSize(const MCSectionData *SD) const; + + /// \brief Set the address space size of the given section. + void setSectionAddressSize(MCSectionData *SD, uint64_t Value); + + /// \brief Get the logical data size of the given section. uint64_t getSectionSize(const MCSectionData *SD) const; - /// \brief Set the actual data size of the given section. + /// \brief Set the logical data size of the given section. void setSectionSize(MCSectionData *SD, uint64_t Value); /// @} Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103689&r1=103688&r2=103689&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 20:10:22 2010 @@ -394,9 +394,13 @@ /// initialized. uint64_t Address; - /// Size - The content size of this section. This is ~0 until initialized. + /// Size - The logical size of this section. This is ~0 until initialized. uint64_t Size; + /// AddressSize - The address space size used by this section. This is ~0 + /// until initialized. + uint64_t AddressSize; + /// FileSize - The size of this section in the object file. This is ~0 until /// initialized. uint64_t FileSize; Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103689&r1=103688&r2=103689&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 20:10:22 2010 @@ -126,6 +126,14 @@ SD->FileSize = Value; } +uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { + assert(SD->AddressSize != ~UINT64_C(0) && "Address size not set!"); + return SD->AddressSize; +} +void MCAsmLayout::setSectionAddressSize(MCSectionData *SD, uint64_t Value) { + SD->AddressSize = Value; +} + /* *** */ MCFragment::MCFragment() : Kind(FragmentType(~0)) { @@ -150,6 +158,7 @@ Alignment(1), Address(~UINT64_C(0)), Size(~UINT64_C(0)), + AddressSize(~UINT64_C(0)), FileSize(~UINT64_C(0)), HasInstructions(false) { @@ -434,7 +443,8 @@ uint64_t StartAddress = 0; if (SectionOrderIndex) { MCSectionData *Prev = Layout.getSectionOrder()[SectionOrderIndex - 1]; - StartAddress = Layout.getSectionAddress(Prev) + Layout.getSectionSize(Prev); + StartAddress = (Layout.getSectionAddress(Prev) + + Layout.getSectionAddressSize(Prev)); } // Align this section if necessary by adding padding bytes to the previous @@ -465,6 +475,7 @@ Size = Layout.getFragmentOffset(F) + Layout.getFragmentEffectiveSize(F); } Layout.setSectionSize(&SD, Size); + Layout.setSectionAddressSize(&SD, Size); Layout.setSectionFileSize(&SD, IsVirtual ? 0 : Size); } @@ -837,9 +848,7 @@ raw_ostream &OS = llvm::errs(); OS << ""; + << " EffectiveSize:" << EffectiveSize << ">"; } void MCAlignFragment::dump() { @@ -914,8 +923,8 @@ OS << "dump(); From daniel at zuster.org Wed May 12 20:10:26 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 01:10:26 -0000 Subject: [llvm-commits] [llvm] r103690 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100513011026.529E73128018@llvm.org> Author: ddunbar Date: Wed May 12 20:10:26 2010 New Revision: 103690 URL: http://llvm.org/viewvc/llvm-project?rev=103690&view=rev Log: MC: Add MCAlignFragment::OnlyAlignAddress bit. This is a bit of magic that says the align fragment shouldn't contribute to the logical section size, it is will be used for cleaning up the code to handle section alignment. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103690&r1=103689&r2=103690&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 20:10:26 2010 @@ -258,12 +258,19 @@ /// target dependent. bool EmitNops : 1; + /// OnlyAlignAddress - Flag to indicate that this align is only used to adjust + /// the address space size of a section and that it should not be included as + /// part of the section size. This flag can only be used on the last fragment + /// in a section. + bool OnlyAlignAddress : 1; + public: MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize, unsigned _MaxBytesToEmit, MCSectionData *SD = 0) : MCFragment(FT_Align, SD), Alignment(_Alignment), Value(_Value),ValueSize(_ValueSize), - MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false) {} + MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false), + OnlyAlignAddress(false) {} /// @name Accessors /// @{ @@ -279,6 +286,9 @@ bool hasEmitNops() const { return EmitNops; } void setEmitNops(bool Value) { EmitNops = Value; } + bool hasOnlyAlignAddress() const { return OnlyAlignAddress; } + void setOnlyAlignAddress(bool Value) { OnlyAlignAddress = Value; } + /// @} static bool classof(const MCFragment *F) { Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103690&r1=103689&r2=103690&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 20:10:26 2010 @@ -392,6 +392,9 @@ case MCFragment::FT_Align: { MCAlignFragment &AF = cast(F); + assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) && + "Invalid OnlyAlignAddress bit, not the last fragment!"); + EffectiveSize = OffsetToAlignment(Address, AF.getAlignment()); if (EffectiveSize > AF.getMaxBytesToEmit()) EffectiveSize = 0; @@ -474,9 +477,18 @@ MCFragment *F = &SD.getFragmentList().back(); Size = Layout.getFragmentOffset(F) + Layout.getFragmentEffectiveSize(F); } - Layout.setSectionSize(&SD, Size); Layout.setSectionAddressSize(&SD, Size); Layout.setSectionFileSize(&SD, IsVirtual ? 0 : Size); + + // Handle OnlyAlignAddress bit. + if (!SD.getFragmentList().empty()) { + MCAlignFragment *AF = + dyn_cast(&SD.getFragmentList().back()); + if (AF && AF->hasOnlyAlignAddress()) + Size -= Layout.getFragmentEffectiveSize(AF); + } + + Layout.setSectionSize(&SD, Size); } /// WriteFragmentData - Write the \arg F data to the output file. @@ -856,6 +868,10 @@ OS << "MCFragment::dump(); + if (hasEmitNops()) + OS << " (emit nops)"; + if (hasOnlyAlignAddress()) + OS << " (only align section)"; OS << "\n "; OS << " Alignment:" << getAlignment() << " Value:" << getValue() << " ValueSize:" << getValueSize() From daniel at zuster.org Wed May 12 20:10:29 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 01:10:29 -0000 Subject: [llvm-commits] [llvm] r103691 - /llvm/trunk/test/MC/MachO/zerofill-5.s Message-ID: <20100513011029.0A40B3128026@llvm.org> Author: ddunbar Date: Wed May 12 20:10:28 2010 New Revision: 103691 URL: http://llvm.org/viewvc/llvm-project?rev=103691&view=rev Log: MC/Mach-O: Add another zerofill test to improve coverage. Added: llvm/trunk/test/MC/MachO/zerofill-5.s Added: llvm/trunk/test/MC/MachO/zerofill-5.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/zerofill-5.s?rev=103691&view=auto ============================================================================== --- llvm/trunk/test/MC/MachO/zerofill-5.s (added) +++ llvm/trunk/test/MC/MachO/zerofill-5.s Wed May 12 20:10:28 2010 @@ -0,0 +1,109 @@ +// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s + + .text + .align 3 + .long 2 + + .zerofill __DATA,__bss,_g0,8,3 + +// CHECK: ('cputype', 16777223) +// CHECK: ('cpusubtype', 3) +// CHECK: ('filetype', 1) +// CHECK: ('num_load_commands', 1) +// CHECK: ('load_commands_size', 336) +// CHECK: ('flag', 0) +// CHECK: ('reserved', 0) +// CHECK: ('load_commands', [ +// CHECK: # Load Command 0 +// CHECK: (('command', 25) +// CHECK: ('size', 232) +// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('vm_addr', 0) +// CHECK: ('vm_size', 16) +// CHECK: ('file_offset', 368) +// CHECK: ('file_size', 4) +// CHECK: ('maxprot', 7) +// CHECK: ('initprot', 7) +// CHECK: ('num_sections', 2) +// CHECK: ('flags', 0) +// CHECK: ('sections', [ +// CHECK: # Section 0 +// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('address', 0) +// CHECK: ('size', 4) +// CHECK: ('offset', 368) +// CHECK: ('alignment', 3) +// CHECK: ('reloc_offset', 0) +// CHECK: ('num_reloc', 0) +// CHECK: ('flags', 0x80000000) +// CHECK: ('reserved1', 0) +// CHECK: ('reserved2', 0) +// CHECK: ('reserved3', 0) +// CHECK: ), +// CHECK: ('_relocations', [ +// CHECK: ]) +// CHECK: ('_section_data', '\x02\x00\x00\x00') +// CHECK: # Section 1 +// CHECK: (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('address', 8) +// CHECK: ('size', 8) +// CHECK: ('offset', 0) +// CHECK: ('alignment', 3) +// CHECK: ('reloc_offset', 0) +// CHECK: ('num_reloc', 0) +// CHECK: ('flags', 0x1) +// CHECK: ('reserved1', 0) +// CHECK: ('reserved2', 0) +// CHECK: ('reserved3', 0) +// CHECK: ), +// CHECK: ('_relocations', [ +// CHECK: ]) +// CHECK: ('_section_data', '\xcf\xfa\xed\xfe\x07\x00\x00\x01') +// CHECK: ]) +// CHECK: ), +// CHECK: # Load Command 1 +// CHECK: (('command', 2) +// CHECK: ('size', 24) +// CHECK: ('symoff', 372) +// CHECK: ('nsyms', 1) +// CHECK: ('stroff', 388) +// CHECK: ('strsize', 8) +// CHECK: ('_string_data', '\x00_g0\x00\x00\x00\x00') +// CHECK: ('_symbols', [ +// CHECK: # Symbol 0 +// CHECK: (('n_strx', 1) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 2) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 8) +// CHECK: ('_string', '_g0') +// CHECK: ), +// CHECK: ]) +// CHECK: ), +// CHECK: # Load Command 2 +// CHECK: (('command', 11) +// CHECK: ('size', 80) +// CHECK: ('ilocalsym', 0) +// CHECK: ('nlocalsym', 1) +// CHECK: ('iextdefsym', 1) +// CHECK: ('nextdefsym', 0) +// CHECK: ('iundefsym', 1) +// CHECK: ('nundefsym', 0) +// CHECK: ('tocoff', 0) +// CHECK: ('ntoc', 0) +// CHECK: ('modtaboff', 0) +// CHECK: ('nmodtab', 0) +// CHECK: ('extrefsymoff', 0) +// CHECK: ('nextrefsyms', 0) +// CHECK: ('indirectsymoff', 0) +// CHECK: ('nindirectsyms', 0) +// CHECK: ('extreloff', 0) +// CHECK: ('nextrel', 0) +// CHECK: ('locreloff', 0) +// CHECK: ('nlocrel', 0) +// CHECK: ('_indirect_symbols', [ +// CHECK: ]) +// CHECK: ), +// CHECK: ]) From evan.cheng at apple.com Wed May 12 20:12:06 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 01:12:06 -0000 Subject: [llvm-commits] [llvm] r103692 - in /llvm/trunk/lib/Target/ARM: ARMBaseInstrInfo.cpp ARMInstrNEON.td Message-ID: <20100513011206.5EB94312800A@llvm.org> Author: evancheng Date: Wed May 12 20:12:06 2010 New Revision: 103692 URL: http://llvm.org/viewvc/llvm-project?rev=103692&view=rev Log: Bring back VLD1q and VST1q and use them for reloading / spilling Q registers. This allows folding loads and stores into VMOVQ. Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=103692&r1=103691&r2=103692&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Wed May 12 20:12:06 2010 @@ -776,19 +776,16 @@ RC == ARM::QPR_8RegisterClass) { // FIXME: Neon instructions should support predicates if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(128); - MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_0, getKillRegState(isKill), TRI); - MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_1, 0, TRI); - AddDefaultPred(MIB.addMemOperand(MMO)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) + .addFrameIndex(FI).addImm(128) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) - .addMemOperand(MMO); - MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_0, getKillRegState(isKill), TRI); - AddDReg(MIB, SrcReg, ARM::DSUBREG_1, 0, TRI); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addMemOperand(MMO)); } } else { assert((RC == ARM::QQPRRegisterClass || @@ -826,7 +823,6 @@ MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); - MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOLoad, 0, @@ -853,18 +849,14 @@ RC == ARM::QPR_VFP2RegisterClass || RC == ARM::QPR_8RegisterClass) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1q64)); - MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_1, RegState::Define, TRI); - AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) + .addFrameIndex(FI).addImm(128) + .addMemOperand(MMO)); } else { - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_0, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::DSUBREG_1, RegState::Define, TRI); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addMemOperand(MMO)); } } else { assert((RC == ARM::QQPRRegisterClass || @@ -1004,8 +996,7 @@ DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } - } - else if (Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -1031,6 +1022,56 @@ DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } + } else if (Opc == ARM::VMOVQ) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Pred = MI->getOperand(2).getImm(); + unsigned PredReg = MI->getOperand(3).getReg(); + if (OpNum == 0) { // move -> store + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + bool isKill = MI->getOperand(1).isKill(); + bool isUndef = MI->getOperand(1).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) + .addFrameIndex(FI).addImm(128) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } else { // move -> load + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); + bool isDead = MI->getOperand(0).isDead(); + bool isUndef = MI->getOperand(0).isUndef(); + if (MFI.getObjectAlignment(FI) >= 16 && + getRegisterInfo().canRealignStack(MF)) { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); + } else { + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(isDead) | + getUndefRegState(isUndef), + DstSubReg) + .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) + .addImm(Pred).addReg(PredReg); + } + } } return NewMI; @@ -1059,10 +1100,9 @@ Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || + Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return true; - } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return false; // FIXME } return false; Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=103692&r1=103691&r2=103692&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed May 12 20:12:06 2010 @@ -123,6 +123,13 @@ : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpLoadm, "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>; + +// Use vld1 to load a Q register as a D register pair. +// This alternative to VLDMQ allows an alignment to be specified. +// This is equivalent to VLD1q64 except that it has a Q register operand. +def VLD1q + : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr), + IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>; } // mayLoad = 1 let mayStore = 1 in { @@ -133,6 +140,13 @@ : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p), IndexModeNone, IIC_fpStorem, "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>; + +// Use vst1 to store a Q register as a D register pair. +// This alternative to VSTMQ allows an alignment to be specified. +// This is equivalent to VST1q64 except that it has a Q register operand. +def VST1q + : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src), + IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>; } // mayStore = 1 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { From espindola at google.com Wed May 12 20:32:40 2010 From: espindola at google.com (Rafael Espindola) Date: Wed, 12 May 2010 18:32:40 -0700 Subject: [llvm-commits] [patch] Add an emit-llvm option to the gold plugin Message-ID: The attached patch adds the emit-llvm option to the gold plugin. It is a bit different from the behaviour of this option in other tools in that it requires a filename and gold still produces a regular ELF output. The reasons are *) The is no clean way for the plugin to get the name of the output file *) There is no clean way for the plugin to stop gold (short of calling exit(0)) Is this ok? Should I rename the option to also-emit-llvm? Cheers, -- Rafael ?vila de Esp?ndola -------------- next part -------------- A non-text attachment was scrubbed... Name: emit-llvm.patch Type: application/octet-stream Size: 1748 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100512/b6093e89/attachment.obj From daniel at zuster.org Wed May 12 21:34:15 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 02:34:15 -0000 Subject: [llvm-commits] [llvm] r103693 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100513023415.0FA17312800A@llvm.org> Author: ddunbar Date: Wed May 12 21:34:14 2010 New Revision: 103693 URL: http://llvm.org/viewvc/llvm-project?rev=103693&view=rev Log: MC: Switch to using explicit MCAlignFragments with OnlyAlignAddress bit instead of manually doing padding/editing layout in LayoutSection(). - This probably seems like six-of-one and half-dozen of another, but there is a method to my madness. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103693&r1=103692&r2=103693&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 21:34:14 2010 @@ -442,7 +442,7 @@ ++stats::SectionLayouts; - // Get the section start address. + // Compute the section start address. uint64_t StartAddress = 0; if (SectionOrderIndex) { MCSectionData *Prev = Layout.getSectionOrder()[SectionOrderIndex - 1]; @@ -450,22 +450,10 @@ Layout.getSectionAddressSize(Prev)); } - // Align this section if necessary by adding padding bytes to the previous - // section. It is safe to adjust this out-of-band, because no symbol or - // fragment is allowed to point past the end of the section at any time. - if (uint64_t Pad = OffsetToAlignment(StartAddress, SD.getAlignment())) { - // Unless this section is virtual (where we are allowed to adjust the offset - // freely), the padding goes in the previous section. - if (!IsVirtual) { - assert(SectionOrderIndex && "Invalid initial section address!"); - MCSectionData *Prev = Layout.getSectionOrder()[SectionOrderIndex - 1]; - Layout.setSectionFileSize(Prev, Layout.getSectionFileSize(Prev) + Pad); - } - - StartAddress += Pad; - } + // Honor the section alignment requirements. + StartAddress = RoundUpToAlignment(StartAddress, SD.getAlignment()); - // Set the aligned section address. + // Set the section address. Layout.setSectionAddress(&SD, StartAddress); for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) @@ -587,7 +575,6 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, const MCAsmLayout &Layout, MCObjectWriter *OW) const { - uint64_t SectionSize = Layout.getSectionSize(SD); uint64_t SectionFileSize = Layout.getSectionFileSize(SD); // Ignore virtual sections. @@ -621,10 +608,6 @@ ie = SD->end(); it != ie; ++it) WriteFragmentData(*this, Layout, *it, OW); - // Add section padding. - assert(SectionFileSize >= SectionSize && "Invalid section sizes!"); - OW->WriteZeros(SectionFileSize - SectionSize); - assert(OW->getStream().tell() - Start == SectionFileSize); } @@ -645,8 +628,34 @@ it2->setOrdinal(FragmentIndex++); } - // Layout until everything fits. + // Create the layout object. MCAsmLayout Layout(*this); + + // Insert additional align fragments for concrete sections to explicitly pad + // the previous section to match their alignment requirements. This is for + // 'gas' compatibility, it shouldn't strictly be necessary. + // + // FIXME: This may be Mach-O specific. + for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + + // Ignore sections without alignment requirements. + unsigned Align = SD->getAlignment(); + if (Align <= 1) + continue; + + // Ignore virtual sections, they don't cause file size modifications. + if (getBackend().isVirtualSection(SD->getSection())) + continue; + + // Otherwise, create a new align fragment at the end of the previous + // section. + MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align, + Layout.getSectionOrder()[i - 1]); + AF->setOnlyAlignAddress(true); + } + + // Layout until everything fits. while (LayoutOnce(Layout)) continue; From daniel at zuster.org Wed May 12 22:19:36 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 03:19:36 -0000 Subject: [llvm-commits] [llvm] r103694 - /llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Message-ID: <20100513031936.55F7D312800A@llvm.org> Author: ddunbar Date: Wed May 12 22:19:36 2010 New Revision: 103694 URL: http://llvm.org/viewvc/llvm-project?rev=103694&view=rev Log: Fix -Asserts warning. Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103694&r1=103693&r2=103694&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Wed May 12 22:19:36 2010 @@ -344,6 +344,7 @@ MachineOperand &FMO = MI->getOperand(FirstOpnd); assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); unsigned VirtReg = FMO.getReg(); + (void)VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "expected a virtual register"); if (FMO.isDef()) { From daniel at zuster.org Wed May 12 22:19:50 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 03:19:50 -0000 Subject: [llvm-commits] [llvm] r103695 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100513031950.C2C5D312800A@llvm.org> Author: ddunbar Date: Wed May 12 22:19:50 2010 New Revision: 103695 URL: http://llvm.org/viewvc/llvm-project?rev=103695&view=rev Log: MC: Eliminate MCSectionData::{,Address,File}Size, which can now be computed by utility functions. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103695&r1=103694&r2=103695&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Wed May 12 22:19:50 2010 @@ -84,35 +84,26 @@ /// \brief Set the computed address of the given section. void setSectionAddress(MCSectionData *SD, uint64_t Value); - /// \brief Get the data size of the given section, as emitted to the object - /// file. This may include additional padding, or be 0 for virtual sections. - uint64_t getSectionFileSize(const MCSectionData *SD) const; + /// @} + /// @name Utility Functions + /// @{ - /// \brief Set the data size of the given section. - void setSectionFileSize(MCSectionData *SD, uint64_t Value); + /// \brief Get the address of the given fragment, as computed in the current + /// layout. + uint64_t getFragmentAddress(const MCFragment *F) const; /// \brief Get the address space size of the given section, as it effects /// layout. This may differ from the size reported by \see getSectionSize() by /// not including section tail padding. uint64_t getSectionAddressSize(const MCSectionData *SD) const; - /// \brief Set the address space size of the given section. - void setSectionAddressSize(MCSectionData *SD, uint64_t Value); + /// \brief Get the data size of the given section, as emitted to the object + /// file. This may include additional padding, or be 0 for virtual sections. + uint64_t getSectionFileSize(const MCSectionData *SD) const; /// \brief Get the logical data size of the given section. uint64_t getSectionSize(const MCSectionData *SD) const; - /// \brief Set the logical data size of the given section. - void setSectionSize(MCSectionData *SD, uint64_t Value); - - /// @} - /// @name Utility Functions - /// @{ - - /// \brief Get the address of the given fragment, as computed in the current - /// layout. - uint64_t getFragmentAddress(const MCFragment *F) const; - /// \brief Get the address of the given symbol, as computed in the current /// layout. uint64_t getSymbolAddress(const MCSymbolData *SD) const; Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103695&r1=103694&r2=103695&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Wed May 12 22:19:50 2010 @@ -404,17 +404,6 @@ /// initialized. uint64_t Address; - /// Size - The logical size of this section. This is ~0 until initialized. - uint64_t Size; - - /// AddressSize - The address space size used by this section. This is ~0 - /// until initialized. - uint64_t AddressSize; - - /// FileSize - The size of this section in the object file. This is ~0 until - /// initialized. - uint64_t FileSize; - /// HasInstructions - Whether this section has had instructions emitted into /// it. unsigned HasInstructions : 1; Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103695&r1=103694&r2=103695&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 22:19:50 2010 @@ -110,28 +110,38 @@ SD->Address = Value; } -uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { - assert(SD->Size != ~UINT64_C(0) && "File size not set!"); - return SD->Size; -} -void MCAsmLayout::setSectionSize(MCSectionData *SD, uint64_t Value) { - SD->Size = Value; +uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { + // Empty sections have no size. + if (SD->getFragmentList().empty()) + return 0; + + // Otherwise, the size is the last fragment's end offset. + const MCFragment &F = SD->getFragmentList().back(); + return getFragmentOffset(&F) + getFragmentEffectiveSize(&F); } uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const { - assert(SD->FileSize != ~UINT64_C(0) && "File size not set!"); - return SD->FileSize; -} -void MCAsmLayout::setSectionFileSize(MCSectionData *SD, uint64_t Value) { - SD->FileSize = Value; -} + // Virtual sections have no file size. + if (getAssembler().getBackend().isVirtualSection(SD->getSection())) + return 0; -uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { - assert(SD->AddressSize != ~UINT64_C(0) && "Address size not set!"); - return SD->AddressSize; + // Otherwise, the file size is the same as the address space size. + return getSectionAddressSize(SD); } -void MCAsmLayout::setSectionAddressSize(MCSectionData *SD, uint64_t Value) { - SD->AddressSize = Value; + +uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { + // Empty sections have no size. + if (SD->getFragmentList().empty()) + return 0; + + // The logical size is the address space size minus any tail padding. + uint64_t Size = getSectionAddressSize(SD); + const MCAlignFragment *AF = + dyn_cast(&(SD->getFragmentList().back())); + if (AF && AF->hasOnlyAlignAddress()) + Size -= getFragmentEffectiveSize(AF); + + return Size; } /* *** */ @@ -157,9 +167,6 @@ : Section(&_Section), Alignment(1), Address(~UINT64_C(0)), - Size(~UINT64_C(0)), - AddressSize(~UINT64_C(0)), - FileSize(~UINT64_C(0)), HasInstructions(false) { if (A) @@ -438,7 +445,6 @@ void MCAssembler::LayoutSection(MCAsmLayout &Layout, unsigned SectionOrderIndex) { MCSectionData &SD = *Layout.getSectionOrder()[SectionOrderIndex]; - bool IsVirtual = getBackend().isVirtualSection(SD.getSection()); ++stats::SectionLayouts; @@ -458,25 +464,6 @@ for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) LayoutFragment(Layout, *it); - - // Set the section sizes. - uint64_t Size = 0; - if (!SD.getFragmentList().empty()) { - MCFragment *F = &SD.getFragmentList().back(); - Size = Layout.getFragmentOffset(F) + Layout.getFragmentEffectiveSize(F); - } - Layout.setSectionAddressSize(&SD, Size); - Layout.setSectionFileSize(&SD, IsVirtual ? 0 : Size); - - // Handle OnlyAlignAddress bit. - if (!SD.getFragmentList().empty()) { - MCAlignFragment *AF = - dyn_cast(&SD.getFragmentList().back()); - if (AF && AF->hasOnlyAlignAddress()) - Size -= Layout.getFragmentEffectiveSize(AF); - } - - Layout.setSectionSize(&SD, Size); } /// WriteFragmentData - Write the \arg F data to the output file. @@ -948,8 +935,7 @@ OS << "dump(); From daniel at zuster.org Wed May 12 22:50:50 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 03:50:50 -0000 Subject: [llvm-commits] [llvm] r103697 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100513035050.B3313312800A@llvm.org> Author: ddunbar Date: Wed May 12 22:50:50 2010 New Revision: 103697 URL: http://llvm.org/viewvc/llvm-project?rev=103697&view=rev Log: Fix -Asserts warning. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103697&r1=103696&r2=103697&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Wed May 12 22:50:50 2010 @@ -562,11 +562,9 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD, const MCAsmLayout &Layout, MCObjectWriter *OW) const { - uint64_t SectionFileSize = Layout.getSectionFileSize(SD); - // Ignore virtual sections. if (getBackend().isVirtualSection(SD->getSection())) { - assert(SectionFileSize == 0 && "Invalid size for section!"); + assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!"); // Check that contents are only things legal inside a virtual section. for (MCSectionData::const_iterator it = SD->begin(), @@ -595,7 +593,7 @@ ie = SD->end(); it != ie; ++it) WriteFragmentData(*this, Layout, *it, OW); - assert(OW->getStream().tell() - Start == SectionFileSize); + assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD)); } void MCAssembler::Finish() { From nicholas at mxc.ca Thu May 13 00:48:45 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 13 May 2010 05:48:45 -0000 Subject: [llvm-commits] [llvm] r103698 - /llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp Message-ID: <20100513054845.69CB4312800A@llvm.org> Author: nicholas Date: Thu May 13 00:48:45 2010 New Revision: 103698 URL: http://llvm.org/viewvc/llvm-project?rev=103698&view=rev Log: Replace the core comparison login in merge functions. We can now merge vector<>::push_back() in: int foo(vector &a, vector &b) { a.push_back(10); b.push_back(11); } to two calls to the same push_back function, or fold away the two copies of push_back() in: struct T { int; }; struct S { char; }; vector t; vector s; void f(T *x) { t.push_back(x); } void g(S *x) { s.push_back(x); } but leave f() and g() separate, since they refer to two different global variables. Modified: llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp Modified: llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp?rev=103698&r1=103697&r2=103698&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp Thu May 13 00:48:45 2010 @@ -17,32 +17,55 @@ // important that the hash function be high quality. The equality comparison // iterates through each instruction in each basic block. // -// When a match is found, the functions are folded. We can only fold two -// functions when we know that the definition of one of them is not -// overridable. +// When a match is found the functions are folded. If both functions are +// overridable, we move the functionality into a new internal function and +// leave two overridable thunks to it. // //===----------------------------------------------------------------------===// // // Future work: // -// * fold vector::push_back and vector::push_back. -// -// These two functions have different types, but in a way that doesn't matter -// to us. As long as we never see an S or T itself, using S* and S** is the -// same as using a T* and T**. -// // * virtual functions. // // Many functions have their address taken by the virtual function table for // the object they belong to. However, as long as it's only used for a lookup // and call, this is irrelevant, and we'd like to fold such implementations. // +// * use SCC to cut down on pair-wise comparisons and solve larger cycles. +// +// The current implementation loops over a pair-wise comparison of all +// functions in the program where the two functions in the pair are treated as +// assumed to be equal until proven otherwise. We could both use fewer +// comparisons and optimize more complex cases if we used strongly connected +// components of the call graph. +// +// * be smarter about bitcast. +// +// In order to fold functions, we will sometimes add either bitcast instructions +// or bitcast constant expressions. Unfortunately, this can confound further +// analysis since the two functions differ where one has a bitcast and the +// other doesn't. We should learn to peer through bitcasts without imposing bad +// performance properties. +// +// * don't emit aliases for Mach-O. +// +// Mach-O doesn't support aliases which means that we must avoid introducing +// them in the bitcode on architectures which don't support them, such as +// Mac OSX. There's a few approaches to this problem; +// a) teach codegen to lower global aliases to thunks on platforms which don't +// support them. +// b) always emit thunks, and create a separate thunk-to-alias pass which +// runs on ELF systems. This has the added benefit of transforming other +// thunks such as those produced by a C++ frontend into aliases when legal +// to do so. +// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" @@ -54,6 +77,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" #include #include using namespace llvm; @@ -61,17 +85,33 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged"); namespace { - struct MergeFunctions : public ModulePass { + class MergeFunctions : public ModulePass { + public: static char ID; // Pass identification, replacement for typeid MergeFunctions() : ModulePass(&ID) {} bool runOnModule(Module &M); + + private: + bool isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2); + + bool equals(const BasicBlock *BB1, const BasicBlock *BB2); + bool equals(const Function *F, const Function *G); + + bool compare(const Value *V1, const Value *V2); + + const Function *LHS, *RHS; + typedef DenseMap IDMap; + IDMap Map; + DenseMap Domains; + DenseMap DomainCount; + TargetData *TD; }; } char MergeFunctions::ID = 0; -static RegisterPass -X("mergefunc", "Merge Functions"); +static RegisterPass X("mergefunc", "Merge Functions"); ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); @@ -95,15 +135,6 @@ return ID.ComputeHash(); } -/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by -/// walking the chain of cast operands. Otherwise, returns the argument. -static Value* IgnoreBitcasts(Value *V) { - while (BitCastInst *BC = dyn_cast(V)) - V = BC->getOperand(0); - - return V; -} - /// isEquivalentType - any two pointers are equivalent. Otherwise, standard /// type equivalence rules apply. static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { @@ -113,6 +144,14 @@ return false; switch(Ty1->getTypeID()) { + default: + llvm_unreachable("Unknown type!"); + // Fall through in Release-Asserts mode. + case Type::IntegerTyID: + case Type::OpaqueTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + case Type::VoidTyID: case Type::FloatTyID: case Type::DoubleTyID: @@ -123,15 +162,6 @@ case Type::MetadataTyID: return true; - case Type::IntegerTyID: - case Type::OpaqueTyID: - // Ty1 == Ty2 would have returned true earlier. - return false; - - default: - llvm_unreachable("Unknown type!"); - return false; - case Type::PointerTyID: { const PointerType *PTy1 = cast(Ty1); const PointerType *PTy2 = cast(Ty2); @@ -154,6 +184,21 @@ return true; } + case Type::UnionTyID: { + const UnionType *UTy1 = cast(Ty1); + const UnionType *UTy2 = cast(Ty2); + + // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc. + if (UTy1->getNumElements() != UTy2->getNumElements()) + return false; + + for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i))) + return false; + } + return true; + } + case Type::FunctionTyID: { const FunctionType *FTy1 = cast(Ty1); const FunctionType *FTy2 = cast(Ty2); @@ -236,123 +281,136 @@ return true; } -static bool compare(const Value *V, const Value *U) { - assert(!isa(V) && !isa(U) && - "Must not compare basic blocks."); - - assert(isEquivalentType(V->getType(), U->getType()) && - "Two of the same operation have operands of different type."); +bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1, + const GetElementPtrInst *GEP2) { + if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) { + SmallVector Indices1, Indices2; + for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(), + E = GEP1->idx_end(); I != E; ++I) { + Indices1.push_back(*I); + } + for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(), + E = GEP2->idx_end(); I != E; ++I) { + Indices2.push_back(*I); + } + uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(), + Indices1.data(), Indices1.size()); + uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(), + Indices2.data(), Indices2.size()); + return Offset1 == Offset2; + } - // TODO: If the constant is an expression of F, we should accept that it's - // equal to the same expression in terms of G. - if (isa(V)) - return V == U; + // Equivalent types aren't enough. + if (GEP1->getPointerOperand()->getType() != + GEP2->getPointerOperand()->getType()) + return false; - // The caller has ensured that ValueMap[V] != U. Since Arguments are - // pre-loaded into the ValueMap, and Instructions are added as we go, we know - // that this can only be a mis-match. - if (isa(V) || isa(V)) + if (GEP1->getNumOperands() != GEP2->getNumOperands()) return false; - if (isa(V) && isa(U)) { - const InlineAsm *IAF = cast(V); - const InlineAsm *IAG = cast(U); - return IAF->getAsmString() == IAG->getAsmString() && - IAF->getConstraintString() == IAG->getConstraintString(); + for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { + if (!compare(GEP1->getOperand(i), GEP2->getOperand(i))) + return false; } - return false; + return true; } -static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, - DenseMap &ValueMap, - DenseMap &SpeculationMap) { - // Speculatively add it anyways. If it's false, we'll notice a difference - // later, and this won't matter. - ValueMap[BB1] = BB2; +bool MergeFunctions::compare(const Value *V1, const Value *V2) { + if (V1 == LHS || V1 == RHS) + if (V2 == LHS || V2 == RHS) + return true; + + // TODO: constant expressions in terms of LHS and RHS + if (isa(V1)) + return V1 == V2; + + if (isa(V1) && isa(V2)) { + const InlineAsm *IA1 = cast(V1); + const InlineAsm *IA2 = cast(V2); + return IA1->getAsmString() == IA2->getAsmString() && + IA1->getConstraintString() == IA2->getConstraintString(); + } + + // We enumerate constants globally and arguments, basic blocks or + // instructions within the function they belong to. + const Function *Domain1 = NULL; + if (const Argument *A = dyn_cast(V1)) { + Domain1 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast(V1)) { + Domain1 = BB->getParent(); + } else if (const Instruction *I = dyn_cast(V1)) { + Domain1 = I->getParent()->getParent(); + } + + const Function *Domain2 = NULL; + if (const Argument *A = dyn_cast(V2)) { + Domain2 = A->getParent(); + } else if (const BasicBlock *BB = dyn_cast(V2)) { + Domain2 = BB->getParent(); + } else if (const Instruction *I = dyn_cast(V2)) { + Domain2 = I->getParent()->getParent(); + } + + if (Domain1 != Domain2) + if (Domain1 != LHS && Domain1 != RHS) + if (Domain2 != LHS && Domain2 != RHS) + return false; + + IDMap &Map1 = Domains[Domain1]; + unsigned long &ID1 = Map1[V1]; + if (!ID1) + ID1 = ++DomainCount[Domain1]; + + IDMap &Map2 = Domains[Domain2]; + unsigned long &ID2 = Map2[V2]; + if (!ID2) + ID2 = ++DomainCount[Domain2]; + + return ID1 == ID2; +} +bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); do { - if (isa(FI)) { - ++FI; - continue; - } - if (isa(GI)) { - ++GI; - continue; - } - - if (!isEquivalentOperation(FI, GI)) + if (!compare(FI, GI)) return false; - if (isa(FI)) { - const GetElementPtrInst *GEPF = cast(FI); - const GetElementPtrInst *GEPG = cast(GI); - if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) { - // It's effectively a bitcast. - ++FI, ++GI; - continue; - } - - // TODO: we only really care about the elements before the index - if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType()) + if (isa(FI) && isa(GI)) { + const GetElementPtrInst *GEP1 = cast(FI); + const GetElementPtrInst *GEP2 = cast(GI); + + if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) + return false; + + if (!isEquivalentGEP(GEP1, GEP2)) + return false; + } else { + if (!isEquivalentOperation(FI, GI)) return false; - } - if (ValueMap[FI] == GI) { - ++FI, ++GI; - continue; - } - - if (ValueMap[FI] != NULL) - return false; - - for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = IgnoreBitcasts(FI->getOperand(i)); - Value *OpG = IgnoreBitcasts(GI->getOperand(i)); + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + Value *OpF = FI->getOperand(i); + Value *OpG = GI->getOperand(i); - if (ValueMap[OpF] == OpG) - continue; + if (!compare(OpF, OpG)) + return false; - if (ValueMap[OpF] != NULL) - return false; - - if (OpF->getValueID() != OpG->getValueID() || - !isEquivalentType(OpF->getType(), OpG->getType())) - return false; - - if (isa(FI)) { - if (SpeculationMap[OpF] == NULL) - SpeculationMap[OpF] = OpG; - else if (SpeculationMap[OpF] != OpG) - return false; - continue; - } else if (isa(OpF)) { - assert(isa(FI) && - "BasicBlock referenced by non-Terminator non-PHI"); - // This call changes the ValueMap, hence we can't use - // Value *& = ValueMap[...] - if (!equals(cast(OpF), cast(OpG), ValueMap, - SpeculationMap)) - return false; - } else { - if (!compare(OpF, OpG)) + if (OpF->getValueID() != OpG->getValueID() || + !isEquivalentType(OpF->getType(), OpG->getType())) return false; } - - ValueMap[OpF] = OpG; } - ValueMap[FI] = GI; ++FI, ++GI; } while (FI != FE && GI != GE); return FI == FE && GI == GE; } -static bool equals(const Function *F, const Function *G) { +bool MergeFunctions::equals(const Function *F, const Function *G) { // We need to recheck everything, but check the things that weren't included // in the hash first. @@ -382,27 +440,46 @@ if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) return false; - DenseMap ValueMap; - DenseMap SpeculationMap; - ValueMap[F] = G; - assert(F->arg_size() == G->arg_size() && "Identical functions have a different number of args."); - for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), - fe = F->arg_end(); fi != fe; ++fi, ++gi) - ValueMap[fi] = gi; - - if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap, - SpeculationMap)) - return false; + LHS = F; + RHS = G; - for (DenseMap::iterator - I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) { - if (ValueMap[I->first] != I->second) + // Visit the arguments so that they get enumerated in the order they're + // passed in. + for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(), + fe = F->arg_end(); fi != fe; ++fi, ++gi) { + if (!compare(fi, gi)) + llvm_unreachable("Arguments repeat"); + } + + SmallVector FBBs, GBBs; + SmallSet VisitedBBs; // in terms of F. + FBBs.push_back(&F->getEntryBlock()); + GBBs.push_back(&G->getEntryBlock()); + VisitedBBs.insert(FBBs[0]); + while (!FBBs.empty()) { + const BasicBlock *FBB = FBBs.pop_back_val(); + const BasicBlock *GBB = GBBs.pop_back_val(); + if (!compare(FBB, GBB) || !equals(FBB, GBB)) { + Domains.clear(); + DomainCount.clear(); return false; + } + const TerminatorInst *FTI = FBB->getTerminator(); + const TerminatorInst *GTI = GBB->getTerminator(); + assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); + for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(FTI->getSuccessor(i))) + continue; + FBBs.push_back(FTI->getSuccessor(i)); + GBBs.push_back(GTI->getSuccessor(i)); + } } + Domains.clear(); + DomainCount.clear(); return true; } @@ -476,20 +553,32 @@ } static void ThunkGToF(Function *F, Function *G) { + if (!G->mayBeOverridden()) { + // Redirect direct callers of G to F. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + for (Value::use_iterator UI = G->use_begin(), UE = G->use_end(); + UI != UE;) { + Value::use_iterator TheIter = UI; + ++UI; + CallSite CS(*TheIter); + if (CS && CS.isCallee(TheIter)) + TheIter.getUse().set(BitcastF); + } + } + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); - std::vector Args; + SmallVector Args; unsigned i = 0; const FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { - if (FFTy->getParamType(i) == AI->getType()) + if (FFTy->getParamType(i) == AI->getType()) { Args.push_back(AI); - else { - Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB); - Args.push_back(BCI); + } else { + Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB)); } ++i; } @@ -510,8 +599,6 @@ NewG->takeName(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); - - // TODO: look at direct callers to G and make them all direct callers to F. } static void AliasGToF(Function *F, Function *G) { @@ -542,67 +629,66 @@ } switch (catF) { + case ExternalStrong: + switch (catG) { case ExternalStrong: - switch (catG) { - case ExternalStrong: - case ExternalWeak: - ThunkGToF(F, G); - break; - case Internal: - if (G->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - } + case ExternalWeak: + ThunkGToF(F, G); break; + case Internal: + if (G->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + } + break; - case ExternalWeak: { - assert(catG == ExternalWeak); + case ExternalWeak: { + assert(catG == ExternalWeak); - // Make them both thunks to the same internal function. - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", - F->getParent()); - H->copyAttributesFrom(F); - H->takeName(F); - F->replaceAllUsesWith(H); + // Make them both thunks to the same internal function. + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + F->replaceAllUsesWith(H); - ThunkGToF(F, G); - ThunkGToF(F, H); + ThunkGToF(F, G); + ThunkGToF(F, H); - F->setLinkage(GlobalValue::InternalLinkage); - } break; + F->setLinkage(GlobalValue::InternalLinkage); + } break; - case Internal: - switch (catG) { - case ExternalStrong: - llvm_unreachable(0); - // fall-through - case ExternalWeak: - if (F->hasAddressTaken()) - ThunkGToF(F, G); - else - AliasGToF(F, G); - break; - case Internal: { - bool addrTakenF = F->hasAddressTaken(); - bool addrTakenG = G->hasAddressTaken(); - if (!addrTakenF && addrTakenG) { - std::swap(FnVec[i], FnVec[j]); - std::swap(F, G); - std::swap(addrTakenF, addrTakenG); - } + case Internal: + switch (catG) { + case ExternalStrong: + llvm_unreachable(0); + // fall-through + case ExternalWeak: + if (F->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + case Internal: { + bool addrTakenF = F->hasAddressTaken(); + bool addrTakenG = G->hasAddressTaken(); + if (!addrTakenF && addrTakenG) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(addrTakenF, addrTakenG); + } - if (addrTakenF && addrTakenG) { - ThunkGToF(F, G); - } else { - assert(!addrTakenG); - AliasGToF(F, G); - } - } break; + if (addrTakenF && addrTakenG) { + ThunkGToF(F, G); + } else { + assert(!addrTakenG); + AliasGToF(F, G); } - break; + } break; + } break; } ++NumFunctionsMerged; @@ -619,22 +705,20 @@ std::map > FnMap; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration() || F->isIntrinsic()) + if (F->isDeclaration()) continue; FnMap[hash(F)].push_back(F); } - // TODO: instead of running in a loop, we could also fold functions in - // callgraph order. Constructing the CFG probably isn't cheaper than just - // running in a loop, unless it happened to already be available. + TD = getAnalysisIfAvailable(); bool LocalChanged; do { LocalChanged = false; DEBUG(dbgs() << "size: " << FnMap.size() << "\n"); for (std::map >::iterator - I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { + I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { std::vector &FnVec = I->second; DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n"); From nicholas at mxc.ca Thu May 13 01:00:14 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 13 May 2010 06:00:14 -0000 Subject: [llvm-commits] [llvm] r103699 - /llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll Message-ID: <20100513060014.5BB4E312800A@llvm.org> Author: nicholas Date: Thu May 13 01:00:14 2010 New Revision: 103699 URL: http://llvm.org/viewvc/llvm-project?rev=103699&view=rev Log: Add testcase for r103653. Added: llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll Added: llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll?rev=103699&view=auto ============================================================================== --- llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll (added) +++ llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll Thu May 13 01:00:14 2010 @@ -0,0 +1,28 @@ +; RUN: opt -inline -mergefunc -disable-output + +; This tests for a bug where the inliner kept the functions in a ValueMap after +; it had completed and a ModulePass started to run. LLVM would crash deleting +; a function that was still a key in the ValueMap. + +define internal fastcc void @list_Cdr1918() nounwind inlinehint { + unreachable +} + +define internal fastcc void @list_PairSecond1927() nounwind inlinehint { + call fastcc void @list_Cdr1918() nounwind inlinehint + unreachable +} + +define internal fastcc void @list_Cdr3164() nounwind inlinehint { + unreachable +} + +define internal fastcc void @list_Nconc3167() nounwind inlinehint { + call fastcc void @list_Cdr3164() nounwind inlinehint + unreachable +} + +define void @term_Equal() nounwind { + call fastcc void @list_Cdr3164() nounwind inlinehint + unreachable +} From nicholas at mxc.ca Thu May 13 01:43:13 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Wed, 12 May 2010 23:43:13 -0700 Subject: [llvm-commits] [patch] Add an emit-llvm option to the gold plugin In-Reply-To: References: Message-ID: <4BEB9F81.6020304@mxc.ca> Rafael Espindola wrote: > The attached patch adds the emit-llvm option to the gold plugin. It is > a bit different from the behaviour of this option in other tools in > that it requires a filename and gold still produces a regular ELF > output. The reasons are > > *) The is no clean way for the plugin to get the name of the output file > *) There is no clean way for the plugin to stop gold (short of calling exit(0)) > > Is this ok? Should I rename the option to also-emit-llvm? I think that's fine, naming the flag also-emit-llvm would be more clear. > Cheers, + if (options::bc_path) + free(const_cast(options::bc_path)); You don't need the if-statement. free(NULL) is a guaranteed no-op. Also feel free to make bc_path not const or make it an std::string or something. Nick From nicholas at mxc.ca Thu May 13 01:45:13 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 13 May 2010 06:45:13 -0000 Subject: [llvm-commits] [llvm] r103700 - /llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp Message-ID: <20100513064513.55376312800A@llvm.org> Author: nicholas Date: Thu May 13 01:45:13 2010 New Revision: 103700 URL: http://llvm.org/viewvc/llvm-project?rev=103700&view=rev Log: Remove heinous tabs. Modified: llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp Modified: llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp?rev=103700&r1=103699&r2=103700&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/MergeFunctions.cpp Thu May 13 01:45:13 2010 @@ -94,7 +94,7 @@ private: bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2); + const GetElementPtrInst *GEP2); bool equals(const BasicBlock *BB1, const BasicBlock *BB2); bool equals(const Function *F, const Function *G); @@ -355,7 +355,7 @@ if (Domain1 != Domain2) if (Domain1 != LHS && Domain1 != RHS) if (Domain2 != LHS && Domain2 != RHS) - return false; + return false; IDMap &Map1 = Domains[Domain1]; unsigned long &ID1 = Map1[V1]; @@ -383,10 +383,10 @@ const GetElementPtrInst *GEP2 = cast(GI); if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand())) - return false; + return false; if (!isEquivalentGEP(GEP1, GEP2)) - return false; + return false; } else { if (!isEquivalentOperation(FI, GI)) return false; @@ -395,8 +395,8 @@ Value *OpF = FI->getOperand(i); Value *OpG = GI->getOperand(i); - if (!compare(OpF, OpG)) - return false; + if (!compare(OpF, OpG)) + return false; if (OpF->getValueID() != OpG->getValueID() || !isEquivalentType(OpF->getType(), OpG->getType())) @@ -472,7 +472,7 @@ assert(FTI->getNumSuccessors() == GTI->getNumSuccessors()); for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) { if (!VisitedBBs.insert(FTI->getSuccessor(i))) - continue; + continue; FBBs.push_back(FTI->getSuccessor(i)); GBBs.push_back(GTI->getSuccessor(i)); } From asl at math.spbu.ru Thu May 13 02:41:57 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Thu, 13 May 2010 07:41:57 -0000 Subject: [llvm-commits] [llvm] r103702 - /llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Message-ID: <20100513074157.65476312800A@llvm.org> Author: asl Date: Thu May 13 02:41:57 2010 New Revision: 103702 URL: http://llvm.org/viewvc/llvm-project?rev=103702&view=rev Log: Properly set thread-local flag on globals during cpp emission Modified: llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Modified: llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp?rev=103702&r1=103701&r2=103702&view=diff ============================================================================== --- llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp (original) +++ llvm/trunk/lib/Target/CppBackend/CPPBackend.cpp Thu May 13 02:41:57 2010 @@ -1038,6 +1038,11 @@ Out << ");"; nl(Out); } + if (GV->isThreadLocal()) { + printCppName(GV); + Out << "->setThreadLocal(true);"; + nl(Out); + } if (is_inline) { out(); Out << "}"; nl(Out); } From chandlerc at gmail.com Thu May 13 02:43:48 2010 From: chandlerc at gmail.com (Chandler Carruth) Date: Thu, 13 May 2010 07:43:48 -0000 Subject: [llvm-commits] [llvm] r103704 - /llvm/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp Message-ID: <20100513074348.1D924312800A@llvm.org> Author: chandlerc Date: Thu May 13 02:43:47 2010 New Revision: 103704 URL: http://llvm.org/viewvc/llvm-project?rev=103704&view=rev Log: Update tablegen to generate shorts instead of chars for subgroup arrays. Modified: llvm/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp Modified: llvm/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp?rev=103704&r1=103703&r2=103704&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/ClangDiagnosticsEmitter.cpp Thu May 13 02:43:47 2010 @@ -239,7 +239,7 @@ const std::vector &SubGroups = I->second.SubGroups; if (!SubGroups.empty()) { - OS << "static const char DiagSubGroup" << I->second.IDNo << "[] = { "; + OS << "static const short DiagSubGroup" << I->second.IDNo << "[] = { "; for (unsigned i = 0, e = SubGroups.size(); i != e; ++i) { std::map::iterator RI = DiagsInGroup.find(SubGroups[i]); From daniel at zuster.org Thu May 13 03:43:31 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 08:43:31 -0000 Subject: [llvm-commits] [llvm] r103709 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h lib/MC/MCAssembler.cpp Message-ID: <20100513084331.B7BF2312800A@llvm.org> Author: ddunbar Date: Thu May 13 03:43:31 2010 New Revision: 103709 URL: http://llvm.org/viewvc/llvm-project?rev=103709&view=rev Log: MC: Add MCAsmLayout::FragmentReplaced() helper function. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103709&r1=103708&r2=103709&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Thu May 13 03:43:31 2010 @@ -47,6 +47,9 @@ /// the delta from the old size. void UpdateForSlide(MCFragment *F, int SlideAmount); + /// \brief Update the layout because a fragment has been replaced. + void FragmentReplaced(MCFragment *Src, MCFragment *Dst); + /// @name Section Access (in layout order) /// @{ Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103709&r1=103708&r2=103709&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 03:43:31 2010 @@ -73,6 +73,11 @@ getAssembler().LayoutSection(*this, i); } +void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { + Dst->Offset = Src->Offset; + Dst->EffectiveSize = Src->EffectiveSize; +} + uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const { assert(F->getParent() && "Missing section()!"); return getSectionAddress(F->getParent()) + getFragmentOffset(F); @@ -818,13 +823,10 @@ SD.getFragmentList().insert(it2, DF); // Update the data fragments layout data. - // - // FIXME: Add MCAsmLayout utility for this. DF->setParent(IF->getParent()); DF->setAtom(IF->getAtom()); DF->setOrdinal(IF->getOrdinal()); - Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF)); - Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF)); + Layout.FragmentReplaced(IF, DF); // Copy in the data and the fixups. DF->getContents().append(IF->getCode().begin(), IF->getCode().end()); From daniel at zuster.org Thu May 13 03:43:34 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 08:43:34 -0000 Subject: [llvm-commits] [llvm] r103710 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100513084334.D8F823128018@llvm.org> Author: ddunbar Date: Thu May 13 03:43:34 2010 New Revision: 103710 URL: http://llvm.org/viewvc/llvm-project?rev=103710&view=rev Log: MC: Create dummy fragments to avoid ever having empty sections, which simplifies layout. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103710&r1=103709&r2=103710&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 03:43:34 2010 @@ -116,10 +116,6 @@ } uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { - // Empty sections have no size. - if (SD->getFragmentList().empty()) - return 0; - // Otherwise, the size is the last fragment's end offset. const MCFragment &F = SD->getFragmentList().back(); return getFragmentOffset(&F) + getFragmentEffectiveSize(&F); @@ -135,10 +131,6 @@ } uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const { - // Empty sections have no size. - if (SD->getFragmentList().empty()) - return 0; - // The logical size is the address space size minus any tail padding. uint64_t Size = getSectionAddressSize(SD); const MCAlignFragment *AF = @@ -611,6 +603,15 @@ unsigned SectionIndex = 0; unsigned FragmentIndex = 0; for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + // Create dummy fragments to eliminate any empty sections, this simplifies + // layout. + if (it->getFragmentList().empty()) { + unsigned ValueSize = 1; + if (getBackend().isVirtualSection(it->getSection())) + ValueSize = 1; + new MCFillFragment(0, 1, 0, it); + } + it->setOrdinal(SectionIndex++); for (MCSectionData::iterator it2 = it->begin(), From daniel at zuster.org Thu May 13 03:43:37 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 08:43:37 -0000 Subject: [llvm-commits] [llvm] r103711 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100513084337.9E42C3128026@llvm.org> Author: ddunbar Date: Thu May 13 03:43:37 2010 New Revision: 103711 URL: http://llvm.org/viewvc/llvm-project?rev=103711&view=rev Log: MC: Move ordinal calculation, to make sure fragments synthesized for layout get assigned ordinals properly. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103711&r1=103710&r2=103711&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 03:43:37 2010 @@ -598,27 +598,6 @@ llvm::errs() << "assembler backend - pre-layout\n--\n"; dump(); }); - // Assign section and fragment ordinals, all subsequent backend code is - // responsible for updating these in place. - unsigned SectionIndex = 0; - unsigned FragmentIndex = 0; - for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { - // Create dummy fragments to eliminate any empty sections, this simplifies - // layout. - if (it->getFragmentList().empty()) { - unsigned ValueSize = 1; - if (getBackend().isVirtualSection(it->getSection())) - ValueSize = 1; - new MCFillFragment(0, 1, 0, it); - } - - it->setOrdinal(SectionIndex++); - - for (MCSectionData::iterator it2 = it->begin(), - ie2 = it->end(); it2 != ie2; ++it2) - it2->setOrdinal(FragmentIndex++); - } - // Create the layout object. MCAsmLayout Layout(*this); @@ -646,6 +625,27 @@ AF->setOnlyAlignAddress(true); } + // Assign section and fragment ordinals, all subsequent backend code is + // responsible for updating these in place. + unsigned SectionIndex = 0; + unsigned FragmentIndex = 0; + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + // Create dummy fragments to eliminate any empty sections, this simplifies + // layout. + if (it->getFragmentList().empty()) { + unsigned ValueSize = 1; + if (getBackend().isVirtualSection(it->getSection())) + ValueSize = 1; + new MCFillFragment(0, 1, 0, it); + } + + it->setOrdinal(SectionIndex++); + + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) + it2->setOrdinal(FragmentIndex++); + } + // Layout until everything fits. while (LayoutOnce(Layout)) continue; From rafael.espindola at gmail.com Thu May 13 08:39:31 2010 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Thu, 13 May 2010 13:39:31 -0000 Subject: [llvm-commits] [llvm] r103714 - /llvm/trunk/tools/gold/gold-plugin.cpp Message-ID: <20100513133931.83E3A312800A@llvm.org> Author: rafael Date: Thu May 13 08:39:31 2010 New Revision: 103714 URL: http://llvm.org/viewvc/llvm-project?rev=103714&view=rev Log: Add an also-emit-llvm option to the gold plugin. Modified: llvm/trunk/tools/gold/gold-plugin.cpp Modified: llvm/trunk/tools/gold/gold-plugin.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/gold/gold-plugin.cpp?rev=103714&r1=103713&r2=103714&view=diff ============================================================================== --- llvm/trunk/tools/gold/gold-plugin.cpp (original) +++ llvm/trunk/tools/gold/gold-plugin.cpp Thu May 13 08:39:31 2010 @@ -59,6 +59,7 @@ namespace options { static bool generate_api_file = false; + static std::string bc_path; static const char *as_path = NULL; // Additional options to pass into the code generator. // Note: This array will contain all plugin options which are not claimed @@ -81,6 +82,14 @@ } else { as_path = strdup(opt + 3); } + } else if(llvm::StringRef(opt).startswith("also-emit-llvm=")) { + const char *path = opt + strlen("also-emit-llvm="); + if (bc_path != "") { + (*message)(LDPL_WARNING, "Path to the output IL file specified twice. " + "Discarding %s", opt); + } else { + bc_path = path; + } } else { // Save this option to pass to the code generator. extra.push_back(std::string(opt)); @@ -374,6 +383,11 @@ } } + if (options::bc_path != "") { + bool err = lto_codegen_write_merged_modules(cg, options::bc_path.c_str()); + if (err) + (*message)(LDPL_FATAL, "Failed to write the output file."); + } size_t bufsize = 0; const char *buffer = static_cast(lto_codegen_compile(cg, &bufsize)); From daniel at zuster.org Thu May 13 10:17:26 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 15:17:26 -0000 Subject: [llvm-commits] [llvm] r103715 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100513151726.C87E6312800A@llvm.org> Author: ddunbar Date: Thu May 13 10:17:26 2010 New Revision: 103715 URL: http://llvm.org/viewvc/llvm-project?rev=103715&view=rev Log: MC: Add section layout order indices to MCSectionData. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103715&r1=103714&r2=103715&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Thu May 13 10:17:26 2010 @@ -392,6 +392,9 @@ /// Ordinal - The section index in the assemblers section list. unsigned Ordinal; + /// LayoutOrder - The index of this section in the layout order. + unsigned LayoutOrder; + /// Alignment - The maximum alignment seen in this section. unsigned Alignment; @@ -426,6 +429,9 @@ unsigned getOrdinal() const { return Ordinal; } void setOrdinal(unsigned Value) { Ordinal = Value; } + unsigned getLayoutOrder() const { return LayoutOrder; } + void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } + /// @name Fragment Access /// @{ Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103715&r1=103714&r2=103715&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 10:17:26 2010 @@ -601,6 +601,10 @@ // Create the layout object. MCAsmLayout Layout(*this); + // Assign layout order indices. + for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) + Layout.getSectionOrder()[i]->setLayoutOrder(i); + // Insert additional align fragments for concrete sections to explicitly pad // the previous section to match their alignment requirements. This is for // 'gas' compatibility, it shouldn't strictly be necessary. From evzen.muller at arm.com Thu May 13 07:12:22 2010 From: evzen.muller at arm.com (Evzen Muller) Date: Thu, 13 May 2010 13:12:22 +0100 Subject: [llvm-commits] Fixes for build on hosts without round, powf and modf Message-ID: <000001caf295$8ac62210$a0526630$@muller@arm.com> Hi, Attached patch fixes build on machines without round() or powf()/modf() functions in math.h (for example Solaris9). Thanks Evzen -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100513/2c8f8507/attachment.html -------------- next part -------------- A non-text attachment was scrubbed... Name: math_fn_fixes.patch Type: application/octet-stream Size: 4148 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100513/2c8f8507/attachment.obj From nicholas at mxc.ca Thu May 13 12:41:06 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Thu, 13 May 2010 17:41:06 -0000 Subject: [llvm-commits] [llvm] r103720 - /llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll Message-ID: <20100513174107.05914312800A@llvm.org> Author: nicholas Date: Thu May 13 12:41:06 2010 New Revision: 103720 URL: http://llvm.org/viewvc/llvm-project?rev=103720&view=rev Log: Actually run the test. Thanks Daniel Dunbar! Modified: llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll Modified: llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll?rev=103720&r1=103719&r2=103720&view=diff ============================================================================== --- llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll (original) +++ llvm/trunk/test/Transforms/Inline/2010-05-12-ValueMap.ll Thu May 13 12:41:06 2010 @@ -1,4 +1,4 @@ -; RUN: opt -inline -mergefunc -disable-output +; RUN: opt %s -inline -mergefunc -disable-output ; This tests for a bug where the inliner kept the functions in a ValueMap after ; it had completed and a ModulePass started to run. LLVM would crash deleting From stoklund at 2pi.dk Thu May 13 12:58:15 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 13 May 2010 17:58:15 -0000 Subject: [llvm-commits] [llvm] r103721 - /llvm/trunk/tools/bugpoint/ToolRunner.cpp Message-ID: <20100513175815.47C96312800A@llvm.org> Author: stoklund Date: Thu May 13 12:58:15 2010 New Revision: 103721 URL: http://llvm.org/viewvc/llvm-project?rev=103721&view=rev Log: Fix complete badness in bugpoint's IsARMArchitecture() function. The revision history for this function is interesting, with multiple layers of wrongness being introduced one at a time. This fixes a weird issue where bugpoint -run-llc would suddenly exit 13 half way through isolating a miscompilation. Modified: llvm/trunk/tools/bugpoint/ToolRunner.cpp Modified: llvm/trunk/tools/bugpoint/ToolRunner.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/bugpoint/ToolRunner.cpp?rev=103721&r1=103720&r2=103721&view=diff ============================================================================== --- llvm/trunk/tools/bugpoint/ToolRunner.cpp (original) +++ llvm/trunk/tools/bugpoint/ToolRunner.cpp Thu May 13 12:58:15 2010 @@ -620,10 +620,9 @@ static bool IsARMArchitecture(std::vector Args) { for (std::vector::const_iterator I = Args.begin(), E = Args.end(); I != E; ++I) { - StringRef S(*I); - if (!S.equals_lower("-arch")) { + if (StringRef(*I).equals_lower("-arch")) { ++I; - if (I != E && !S.substr(0, strlen("arm")).equals_lower("arm")) + if (I != E && StringRef(*I).substr(0, strlen("arm")).equals_lower("arm")) return true; } } From daniel at zuster.org Thu May 13 13:35:03 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 18:35:03 -0000 Subject: [llvm-commits] [llvm] r103723 - in /llvm/trunk: include/llvm/ADT/ilist_node.h unittests/ADT/ilistTest.cpp Message-ID: <20100513183503.227D3312800A@llvm.org> Author: ddunbar Date: Thu May 13 13:35:02 2010 New Revision: 103723 URL: http://llvm.org/viewvc/llvm-project?rev=103723&view=rev Log: Fix const ilist_node::get{Prev,Next}Node() to actually compile. Picky, picky. Modified: llvm/trunk/include/llvm/ADT/ilist_node.h llvm/trunk/unittests/ADT/ilistTest.cpp Modified: llvm/trunk/include/llvm/ADT/ilist_node.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/ilist_node.h?rev=103723&r1=103722&r2=103723&view=diff ============================================================================== --- llvm/trunk/include/llvm/ADT/ilist_node.h (original) +++ llvm/trunk/include/llvm/ADT/ilist_node.h Thu May 13 13:35:02 2010 @@ -67,7 +67,7 @@ /// \brief Get the previous node, or 0 for the list head. const NodeTy *getPrevNode() const { - NodeTy *Prev = this->getPrev(); + const NodeTy *Prev = this->getPrev(); // Check for sentinel. if (!Prev->getNext()) @@ -89,7 +89,7 @@ /// \brief Get the next node, or 0 for the list tail. const NodeTy *getNextNode() const { - NodeTy *Next = getNext(); + const NodeTy *Next = getNext(); // Check for sentinel. if (!Next->getNext()) Modified: llvm/trunk/unittests/ADT/ilistTest.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ADT/ilistTest.cpp?rev=103723&r1=103722&r2=103723&view=diff ============================================================================== --- llvm/trunk/unittests/ADT/ilistTest.cpp (original) +++ llvm/trunk/unittests/ADT/ilistTest.cpp Thu May 13 13:35:02 2010 @@ -34,6 +34,11 @@ EXPECT_EQ(2, List.back().Value); EXPECT_EQ(2, List.front().getNextNode()->Value); EXPECT_EQ(1, List.back().getPrevNode()->Value); + + const ilist &ConstList = List; + EXPECT_EQ(2, ConstList.back().Value); + EXPECT_EQ(2, ConstList.front().getNextNode()->Value); + EXPECT_EQ(1, ConstList.back().getPrevNode()->Value); } } From daniel at zuster.org Thu May 13 13:35:06 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 18:35:06 -0000 Subject: [llvm-commits] [llvm] r103724 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100513183506.A0E6A3128018@llvm.org> Author: ddunbar Date: Thu May 13 13:35:06 2010 New Revision: 103724 URL: http://llvm.org/viewvc/llvm-project?rev=103724&view=rev Log: MC: Factor out MCAssembler::ComputeFragmentSize. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103724&r1=103723&r2=103724&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Thu May 13 13:35:06 2010 @@ -650,6 +650,12 @@ bool FragmentNeedsRelaxation(const MCInstFragment *IF, const MCAsmLayout &Layout) const; + /// Compute the effective fragment size assuming it is layed out at the given + /// \arg SectionAddress and \arg FragmentOffset. + uint64_t ComputeFragmentSize(MCAsmLayout &Layout, const MCFragment &F, + uint64_t SectionAddress, + uint64_t FragmentOffset) const; + /// LayoutFragment - Performs layout of the given \arg Fragment; assuming that /// the previous fragment has already been layed out correctly, and the parent /// section has been initialized. Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103724&r1=103723&r2=103724&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 13:35:06 2010 @@ -375,52 +375,39 @@ return IsResolved; } -void MCAssembler::LayoutFragment(MCAsmLayout &Layout, MCFragment &F) { - uint64_t StartAddress = Layout.getSectionAddress(F.getParent()); - - // Get the fragment start address. - uint64_t Address = StartAddress; - MCSectionData::iterator it = &F; - if (MCFragment *Prev = F.getPrevNode()) - Address = (StartAddress + Layout.getFragmentOffset(Prev) + - Layout.getFragmentEffectiveSize(Prev)); - - ++stats::FragmentLayouts; - - uint64_t FragmentOffset = Address - StartAddress; - Layout.setFragmentOffset(&F, FragmentOffset); - - // Evaluate fragment size. - uint64_t EffectiveSize = 0; +uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout, + const MCFragment &F, + uint64_t SectionAddress, + uint64_t FragmentOffset) const { switch (F.getKind()) { + case MCFragment::FT_Data: + return cast(F).getContents().size(); + case MCFragment::FT_Fill: + return cast(F).getSize(); + case MCFragment::FT_Inst: + return cast(F).getInstSize(); + case MCFragment::FT_Align: { - MCAlignFragment &AF = cast(F); + const MCAlignFragment &AF = cast(F); assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) && "Invalid OnlyAlignAddress bit, not the last fragment!"); - EffectiveSize = OffsetToAlignment(Address, AF.getAlignment()); - if (EffectiveSize > AF.getMaxBytesToEmit()) - EffectiveSize = 0; - break; - } + uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset, + AF.getAlignment()); - case MCFragment::FT_Data: - EffectiveSize = cast(F).getContents().size(); - break; + // Honor MaxBytesToEmit. + if (Size > AF.getMaxBytesToEmit()) + return 0; - case MCFragment::FT_Fill: { - EffectiveSize = cast(F).getSize(); - break; + return Size; } - case MCFragment::FT_Inst: - EffectiveSize = cast(F).getInstSize(); - break; - case MCFragment::FT_Org: { - MCOrgFragment &OF = cast(F); + const MCOrgFragment &OF = cast(F); + // FIXME: We should compute this sooner, we don't want to recurse here, and + // we would like to be more functional. int64_t TargetLocation; if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout)) report_fatal_error("expected assembly-time absolute expression"); @@ -431,11 +418,32 @@ report_fatal_error("invalid .org offset '" + Twine(TargetLocation) + "' (at offset '" + Twine(FragmentOffset) + "'"); - EffectiveSize = Offset; - break; + return Offset; } } + assert(0 && "invalid fragment kind"); + return 0; +} + +void MCAssembler::LayoutFragment(MCAsmLayout &Layout, MCFragment &F) { + uint64_t StartAddress = Layout.getSectionAddress(F.getParent()); + + // Get the fragment start address. + uint64_t Address = StartAddress; + MCSectionData::iterator it = &F; + if (MCFragment *Prev = F.getPrevNode()) + Address = (StartAddress + Layout.getFragmentOffset(Prev) + + Layout.getFragmentEffectiveSize(Prev)); + + ++stats::FragmentLayouts; + + uint64_t FragmentOffset = Address - StartAddress; + Layout.setFragmentOffset(&F, FragmentOffset); + + // Evaluate fragment size. + uint64_t EffectiveSize = ComputeFragmentSize(Layout, F, StartAddress, + FragmentOffset); Layout.setFragmentEffectiveSize(&F, EffectiveSize); } From gohman at apple.com Thu May 13 14:19:32 2010 From: gohman at apple.com (Dan Gohman) Date: Thu, 13 May 2010 19:19:32 -0000 Subject: [llvm-commits] [llvm] r103725 - /llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Message-ID: <20100513191933.0098D312800A@llvm.org> Author: djg Date: Thu May 13 14:19:32 2010 New Revision: 103725 URL: http://llvm.org/viewvc/llvm-project?rev=103725&view=rev Log: An Instruction has a trivial kill only if its use is in the same basic block. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=103725&r1=103724&r2=103725&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Thu May 13 14:19:32 2010 @@ -57,9 +57,12 @@ using namespace llvm; bool FastISel::hasTrivialKill(const Value *V) const { - // Don't consider constants or arguments to have trivial kills. + // Don't consider constants or arguments to have trivial kills. Only + // instructions with a single use in the same basic block. const Instruction *I = dyn_cast(V); - return I && I->hasOneUse(); + return I && + I->hasOneUse() && + cast(I->use_begin())->getParent() == I->getParent(); } unsigned FastISel::getRegForValue(const Value *V) { From gohman at apple.com Thu May 13 14:24:00 2010 From: gohman at apple.com (Dan Gohman) Date: Thu, 13 May 2010 19:24:00 -0000 Subject: [llvm-commits] [llvm] r103726 - in /llvm/trunk: include/llvm/CodeGen/MachineRegisterInfo.h lib/CodeGen/MachineCSE.cpp lib/CodeGen/MachineRegisterInfo.cpp Message-ID: <20100513192400.8850F312800A@llvm.org> Author: djg Date: Thu May 13 14:24:00 2010 New Revision: 103726 URL: http://llvm.org/viewvc/llvm-project?rev=103726&view=rev Log: Add a utility function for conservatively clearing kill flags, and make use of it in MachineCSE. Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h llvm/trunk/lib/CodeGen/MachineCSE.cpp llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h?rev=103726&r1=103725&r2=103726&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h Thu May 13 14:24:00 2010 @@ -162,6 +162,12 @@ /// register or null if none is found. This assumes that the code is in SSA /// form, so there should only be one definition. MachineInstr *getVRegDef(unsigned Reg) const; + + /// clearKillFlags - Iterate over all the uses of the given register and + /// clear the kill flag from the MachineOperand. This function is used by + /// optimization passes which extend register lifetimes and need only + /// preserve conservative kill flag information. + void clearKillFlags(unsigned Reg) const; #ifndef NDEBUG void dumpUses(unsigned RegNo) const; Modified: llvm/trunk/lib/CodeGen/MachineCSE.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCSE.cpp?rev=103726&r1=103725&r2=103726&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineCSE.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineCSE.cpp Thu May 13 14:24:00 2010 @@ -112,6 +112,7 @@ DEBUG(dbgs() << "Coalescing: " << *DefMI); DEBUG(dbgs() << "*** to: " << *MI); MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); if (NewRC != SRC) MRI->setRegClass(SrcReg, NewRC); DefMI->eraseFromParent(); @@ -365,8 +366,10 @@ // Actually perform the elimination. if (DoCSE) { - for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) + for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) { MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second); + MRI->clearKillFlags(CSEPairs[i].second); + } MI->eraseFromParent(); ++NumCSEs; } else { Modified: llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp?rev=103726&r1=103725&r2=103726&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineRegisterInfo.cpp Thu May 13 14:24:00 2010 @@ -133,6 +133,15 @@ return ++UI == use_nodbg_end(); } +/// clearKillFlags - Iterate over all the uses of the given register and +/// clear the kill flag from the MachineOperand. This function is used by +/// optimization passes which extend register lifetimes and need only +/// preserve conservative kill flag information. +void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { + for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI) + UI.getOperand().setIsKill(false); +} + bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) if (I->first == Reg || I->second == Reg) From ofv at wanadoo.es Thu May 13 14:34:07 2010 From: ofv at wanadoo.es (Oscar Fuentes) Date: Thu, 13 May 2010 19:34:07 -0000 Subject: [llvm-commits] [llvm] r103727 - /llvm/trunk/lib/Target/X86/CMakeLists.txt Message-ID: <20100513193407.1D9DA312800A@llvm.org> Author: ofv Date: Thu May 13 14:34:06 2010 New Revision: 103727 URL: http://llvm.org/viewvc/llvm-project?rev=103727&view=rev Log: CMake: fixes 64 bit Visual Studio IDE build. Fixes bug 4936. Patch by Dimitry Andric! Modified: llvm/trunk/lib/Target/X86/CMakeLists.txt Modified: llvm/trunk/lib/Target/X86/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/CMakeLists.txt?rev=103727&r1=103726&r2=103727&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/CMakeLists.txt (original) +++ llvm/trunk/lib/Target/X86/CMakeLists.txt Thu May 13 14:34:06 2010 @@ -39,7 +39,12 @@ if( CMAKE_CL_64 ) enable_language(ASM_MASM) - set(sources ${sources} X86CompilationCallback_Win64.asm) + ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj + COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm + ) + set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj) endif() add_llvm_target(X86CodeGen ${sources}) From bob.wilson at apple.com Thu May 13 14:58:24 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 13 May 2010 19:58:24 -0000 Subject: [llvm-commits] [llvm] r103730 - /llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Message-ID: <20100513195824.58828312800A@llvm.org> Author: bwilson Date: Thu May 13 14:58:24 2010 New Revision: 103730 URL: http://llvm.org/viewvc/llvm-project?rev=103730&view=rev Log: Fix pr7110: For non-Darwin targets UnspilledCS1GPRs may include high registers. Do not use those for Thumb1 functions. Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=103730&r1=103729&r2=103730&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Thu May 13 14:58:24 2010 @@ -779,7 +779,9 @@ while (NumExtras && !UnspilledCS1GPRs.empty()) { unsigned Reg = UnspilledCS1GPRs.back(); UnspilledCS1GPRs.pop_back(); - if (!isReservedReg(MF, Reg)) { + if (!isReservedReg(MF, Reg) && + (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || + Reg == ARM::LR)) { Extras.push_back(Reg); NumExtras--; } From evan.cheng at apple.com Thu May 13 15:02:08 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 20:02:08 -0000 Subject: [llvm-commits] [llvm] r103731 - /llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Message-ID: <20100513200208.8AE08312800A@llvm.org> Author: evancheng Date: Thu May 13 15:02:08 2010 New Revision: 103731 URL: http://llvm.org/viewvc/llvm-project?rev=103731&view=rev Log: Add comment about the pseudo registers QQ, each of which is a pair of Q registers. Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td?rev=103731&r1=103730&r2=103731&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Thu May 13 15:02:08 2010 @@ -108,6 +108,11 @@ // Pseudo 256-bit registers to represent pairs of Q registers. These should // never be present in the emitted code. +// These are used for NEON load / store instructions, e.g. vld4, vst3. +// NOTE: It's possible to define more QQ registers since technical the +// starting D register number doesn't have to be multiple of 4. e.g. +// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register +// stuffs very messy. def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; From evan.cheng at apple.com Thu May 13 15:14:58 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 20:14:58 -0000 Subject: [llvm-commits] [llvm] r103733 - /llvm/trunk/include/llvm/Target/TargetLowering.h Message-ID: <20100513201458.3DD1A312800A@llvm.org> Author: evancheng Date: Thu May 13 15:14:58 2010 New Revision: 103733 URL: http://llvm.org/viewvc/llvm-project?rev=103733&view=rev Log: 80 col violation. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103733&r1=103732&r2=103733&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Thu May 13 15:14:58 2010 @@ -1593,7 +1593,8 @@ /// non-legal value types are not described here. /// This array is accessed using VT.getSimpleVT(), so it is subject to /// the MVT::MAX_ALLOWED_VALUETYPE * 2 bits. - uint64_t OpActions[MVT::MAX_ALLOWED_VALUETYPE/(sizeof(uint64_t)*4)][ISD::BUILTIN_OP_END]; + uint64_t OpActions[MVT::MAX_ALLOWED_VALUETYPE/(sizeof(uint64_t)*4)] + [ISD::BUILTIN_OP_END]; /// LoadExtActions - For each load of load extension type and each value type, /// keep a LegalizeAction that indicates how instruction selection should deal From dpatel at apple.com Thu May 13 15:22:07 2010 From: dpatel at apple.com (Devang Patel) Date: Thu, 13 May 2010 20:22:07 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103734 - /llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Message-ID: <20100513202207.CD642312800A@llvm.org> Author: dpatel Date: Thu May 13 15:22:07 2010 New Revision: 103734 URL: http://llvm.org/viewvc/llvm-project?rev=103734&view=rev Log: Update comments. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=103734&r1=103733&r2=103734&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Thu May 13 15:22:07 2010 @@ -290,8 +290,7 @@ setCurrentLexicalBlock(desired); } -/// EmitFunctionStart - Constructs the debug code for entering a function - -/// "llvm.dbg.func.start." +/// EmitFunctionStart - Constructs the debug code for entering a function. void DebugInfo::EmitFunctionStart(tree FnDecl, Function *Fn, BasicBlock *CurBB) { setCurrentLexicalBlock(FnDecl); @@ -426,8 +425,7 @@ return getOrCreateFile(main_input_filename); } -/// EmitFunctionEnd - Constructs the debug code for exiting a declarative -/// region - "llvm.dbg.region.end." +/// EmitFunctionEnd - Pop the region stack and reset current lexical block. void DebugInfo::EmitFunctionEnd(BasicBlock *CurBB, bool EndFunction) { assert(!RegionStack.empty() && "Region stack mismatch, stack empty!"); RegionStack.pop_back(); @@ -442,7 +440,6 @@ } /// EmitDeclare - Constructs the debug code for allocation of a new variable. -/// region - "llvm.dbg.declare." void DebugInfo::EmitDeclare(tree decl, unsigned Tag, const char *Name, tree type, Value *AI, LLVMBuilder &Builder) { @@ -469,9 +466,8 @@ Name, getOrCreateFile(Loc.file), Loc.line, Ty); - // Insert an llvm.dbg.declare into the current block. - Instruction *Call = DebugFactory.InsertDeclare(AI, D, - Builder.GetInsertBlock()); + Instruction *Call = + DebugFactory.InsertDeclare(AI, D, Builder.GetInsertBlock()); Call->setDebugLoc(DebugLoc::get(CurLineNo, 0, VarScope)); } @@ -499,8 +495,7 @@ return false; } -/// EmitStopPoint - Emit a call to llvm.dbg.stoppoint to indicate a change of -/// source line - "llvm.dbg.stoppoint." Now enabled at -O. +/// EmitStopPoint - Set current source location. void DebugInfo::EmitStopPoint(Function *Fn, BasicBlock *CurBB, LLVMBuilder &Builder) { // Don't bother if things are the same as last time. From gohman at apple.com Thu May 13 15:34:42 2010 From: gohman at apple.com (Dan Gohman) Date: Thu, 13 May 2010 20:34:42 -0000 Subject: [llvm-commits] [llvm] r103737 - in /llvm/trunk: include/llvm/CodeGen/MachineInstr.h lib/CodeGen/MachineInstr.cpp lib/CodeGen/MachineLICM.cpp lib/CodeGen/MachineSink.cpp Message-ID: <20100513203442.62969312800A@llvm.org> Author: djg Date: Thu May 13 15:34:42 2010 New Revision: 103737 URL: http://llvm.org/viewvc/llvm-project?rev=103737&view=rev Log: Teach MachineLICM and MachineSink how to clear kill flags conservatively when they move instructions. Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h llvm/trunk/lib/CodeGen/MachineInstr.cpp llvm/trunk/lib/CodeGen/MachineLICM.cpp llvm/trunk/lib/CodeGen/MachineSink.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=103737&r1=103736&r2=103737&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Thu May 13 15:34:42 2010 @@ -302,6 +302,10 @@ /// reference if DefOpIdx is not null. bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx = 0) const; + /// clearKillInfo - Clears kill flags on all operands. + /// + void clearKillInfo(); + /// copyKillDeadInfo - Copies kill / dead operand properties from MI. /// void copyKillDeadInfo(const MachineInstr *MI); Modified: llvm/trunk/lib/CodeGen/MachineInstr.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineInstr.cpp?rev=103737&r1=103736&r2=103737&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineInstr.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineInstr.cpp Thu May 13 15:34:42 2010 @@ -938,6 +938,16 @@ return true; } +/// clearKillInfo - Clears kill flags on all operands. +/// +void MachineInstr::clearKillInfo() { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (MO.isReg() && MO.isUse()) + MO.setIsKill(false); + } +} + /// copyKillDeadInfo - Copies kill / dead operand properties from MI. /// void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { Modified: llvm/trunk/lib/CodeGen/MachineLICM.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineLICM.cpp?rev=103737&r1=103736&r2=103737&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineLICM.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineLICM.cpp Thu May 13 15:34:42 2010 @@ -738,8 +738,10 @@ "Instructions with different phys regs are not identical!"); if (MO.isReg() && MO.isDef() && - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg()); + RegInfo->clearKillFlags(Dup->getOperand(i).getReg()); + } } MI->eraseFromParent(); ++NumCSEed; @@ -784,6 +786,15 @@ // Otherwise, splice the instruction to the preheader. CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + // Clear the kill flags of any register this instruction defines, + // since they may need to be live throughout the entire loop + // rather than just live for part of it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isDead()) + RegInfo->clearKillFlags(MO.getReg()); + } + // Add to the CSE map. if (CI != CSEMap.end()) CI->second.push_back(MI); Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSink.cpp?rev=103737&r1=103736&r2=103737&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineSink.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineSink.cpp Thu May 13 15:34:42 2010 @@ -314,5 +314,10 @@ // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); + + // Conservatively, clear any kill flags, since it's possible that + // they are no longer correct. + MI->clearKillInfo(); + return true; } From daniel at zuster.org Thu May 13 15:40:12 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Thu, 13 May 2010 20:40:12 -0000 Subject: [llvm-commits] [llvm] r103738 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100513204012.38E41312800A@llvm.org> Author: ddunbar Date: Thu May 13 15:40:12 2010 New Revision: 103738 URL: http://llvm.org/viewvc/llvm-project?rev=103738&view=rev Log: MC: Move Layout{Fragment,Section} into MCAsmLayout, and add LayoutFile(). Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103738&r1=103737&r2=103738&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Thu May 13 15:40:12 2010 @@ -50,6 +50,18 @@ /// \brief Update the layout because a fragment has been replaced. void FragmentReplaced(MCFragment *Src, MCFragment *Dst); + /// \brief Perform a full layout. + void LayoutFile(); + + /// \brief Perform layout for a single fragment, assuming that the previous + /// fragment has already been layed out correctly, and the parent section has + /// been initialized. + void LayoutFragment(MCFragment *Fragment); + + /// \brief Performs layout for a single section, assuming that the previous + /// section has already been layed out correctly. + void LayoutSection(MCSectionData *SD); + /// @name Section Access (in layout order) /// @{ Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103738&r1=103737&r2=103738&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Thu May 13 15:40:12 2010 @@ -656,16 +656,6 @@ uint64_t SectionAddress, uint64_t FragmentOffset) const; - /// LayoutFragment - Performs layout of the given \arg Fragment; assuming that - /// the previous fragment has already been layed out correctly, and the parent - /// section has been initialized. - void LayoutFragment(MCAsmLayout &Layout, MCFragment &Fragment); - - /// LayoutSection - Performs layout of the section referenced by the given - /// \arg SectionOrderIndex. The layout assumes that the previous section has - /// already been layed out correctly. - void LayoutSection(MCAsmLayout &Layout, unsigned SectionOrderIndex); - /// LayoutOnce - Perform one layout iteration and return true if any offsets /// were adjusted. bool LayoutOnce(MCAsmLayout &Layout); Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103738&r1=103737&r2=103738&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 15:40:12 2010 @@ -69,8 +69,7 @@ // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter. // Layout the sections in order. - for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) - getAssembler().LayoutSection(*this, i); + LayoutFile(); } void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { @@ -426,49 +425,52 @@ return 0; } -void MCAssembler::LayoutFragment(MCAsmLayout &Layout, MCFragment &F) { - uint64_t StartAddress = Layout.getSectionAddress(F.getParent()); +void MCAsmLayout::LayoutFile() { + for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) + LayoutSection(getSectionOrder()[i]); +} + +void MCAsmLayout::LayoutFragment(MCFragment *F) { + uint64_t StartAddress = getSectionAddress(F->getParent()); // Get the fragment start address. uint64_t Address = StartAddress; - MCSectionData::iterator it = &F; - if (MCFragment *Prev = F.getPrevNode()) - Address = (StartAddress + Layout.getFragmentOffset(Prev) + - Layout.getFragmentEffectiveSize(Prev)); + MCSectionData::iterator it = F; + if (MCFragment *Prev = F->getPrevNode()) + Address = (StartAddress + getFragmentOffset(Prev) + + getFragmentEffectiveSize(Prev)); ++stats::FragmentLayouts; - uint64_t FragmentOffset = Address - StartAddress; - Layout.setFragmentOffset(&F, FragmentOffset); + // Compute fragment offset and size. + uint64_t Offset = Address - StartAddress; + uint64_t EffectiveSize = + getAssembler().ComputeFragmentSize(*this, *F, StartAddress, Offset); - // Evaluate fragment size. - uint64_t EffectiveSize = ComputeFragmentSize(Layout, F, StartAddress, - FragmentOffset); - Layout.setFragmentEffectiveSize(&F, EffectiveSize); + setFragmentOffset(F, Offset); + setFragmentEffectiveSize(F, EffectiveSize); } -void MCAssembler::LayoutSection(MCAsmLayout &Layout, - unsigned SectionOrderIndex) { - MCSectionData &SD = *Layout.getSectionOrder()[SectionOrderIndex]; +void MCAsmLayout::LayoutSection(MCSectionData *SD) { + unsigned SectionOrderIndex = SD->getLayoutOrder(); ++stats::SectionLayouts; // Compute the section start address. uint64_t StartAddress = 0; if (SectionOrderIndex) { - MCSectionData *Prev = Layout.getSectionOrder()[SectionOrderIndex - 1]; - StartAddress = (Layout.getSectionAddress(Prev) + - Layout.getSectionAddressSize(Prev)); + MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1]; + StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev); } // Honor the section alignment requirements. - StartAddress = RoundUpToAlignment(StartAddress, SD.getAlignment()); + StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); // Set the section address. - Layout.setSectionAddress(&SD, StartAddress); + setSectionAddress(SD, StartAddress); - for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) - LayoutFragment(Layout, *it); + for (MCSectionData::iterator it = SD->begin(), ie = SD->end(); it != ie; ++it) + LayoutFragment(it); } /// WriteFragmentData - Write the \arg F data to the output file. @@ -754,8 +756,7 @@ ++stats::RelaxationSteps; // Layout the sections in order. - for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) - LayoutSection(Layout, i); + Layout.LayoutFile(); // Scan for fragments that need relaxation. bool WasRelaxed = false; From stoklund at 2pi.dk Thu May 13 15:43:17 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Thu, 13 May 2010 20:43:17 -0000 Subject: [llvm-commits] [llvm] r103739 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100513204317.2ED89312800A@llvm.org> Author: stoklund Date: Thu May 13 15:43:17 2010 New Revision: 103739 URL: http://llvm.org/viewvc/llvm-project?rev=103739&view=rev Log: Clean up RegAllocFast debug output Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103739&r1=103738&r2=103739&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Thu May 13 15:43:17 2010 @@ -173,7 +173,6 @@ MO.setIsDead(); else if (!LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) MO.setIsKill(); - DEBUG(dbgs() << " %reg" << lri->first << " killed: " << *LR.LastUse); } } @@ -190,7 +189,6 @@ void RAFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); - DEBUG(dbgs() << " Killing %reg" << VirtReg << "\n"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); if (lri != LiveVirtRegs.end()) killVirtReg(lri); @@ -215,7 +213,7 @@ if (LR.Dirty) { LR.Dirty = false; - DEBUG(dbgs() << " Spilling register " << TRI->getName(LR.PhysReg) + DEBUG(dbgs() << "Spilling register " << TRI->getName(LR.PhysReg) << " containing %reg" << VirtReg); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); @@ -323,7 +321,7 @@ /// RAFast::LiveRegMap::iterator RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << " Assigning %reg" << VirtReg << " to " + DEBUG(dbgs() << "Assigning %reg" << VirtReg << " to " << TRI->getName(PhysReg) << "\n"); PhysRegState[PhysReg] = VirtReg; return LiveVirtRegs.insert(std::make_pair(VirtReg, PhysReg)).first; @@ -356,7 +354,7 @@ SrcReg == VirtReg && TargetRegisterInfo::isPhysicalRegister(DstReg) && RC->contains(DstReg) && !UsedInInstr.test(DstReg)) { Hint = DstReg; - DEBUG(dbgs() << " %reg" << VirtReg << " gets hint from " << MI); + DEBUG(dbgs() << "%reg" << VirtReg << " gets hint from " << MI); } } @@ -369,8 +367,6 @@ case regReserved: break; default: - DEBUG(dbgs() << " %reg" << VirtReg << " really wants " - << TRI->getName(Hint) << "\n"); spillVirtReg(MBB, MI, PhysRegState[Hint], true); // Fall through. case regFree: @@ -400,7 +396,7 @@ } } - DEBUG(dbgs() << " Allocating %reg" << VirtReg << " from " << RC->getName() + DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName() << " candidate=" << TRI->getName(BestReg) << "\n"); // Try to extend the working set for RC if there were any disabled registers. @@ -434,7 +430,7 @@ } } if (Impossible) continue; - DEBUG(dbgs() << " - candidate " << TRI->getName(PhysReg) + DEBUG(dbgs() << "- candidate " << TRI->getName(PhysReg) << " cost=" << Cost << "\n"); if (!BestReg || Cost < BestCost) { BestReg = PhysReg; @@ -511,7 +507,7 @@ lri = allocVirtReg(MBB, MI, VirtReg, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " + DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " << TRI->getName(lri->second.PhysReg) << "\n"); TII->loadRegFromStackSlot(MBB, MI, lri->second.PhysReg, FrameIndex, RC, TRI); @@ -575,7 +571,7 @@ } void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { - DEBUG(dbgs() << "\nBB#" << MBB.getNumber() << ", "<< MBB.getName() << "\n"); + DEBUG(dbgs() << "\nAllocating " << MBB); PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); @@ -594,7 +590,7 @@ MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); DEBUG({ - dbgs() << "\nStarting RegAlloc of: " << *MI << "Working set:"; + dbgs() << "\n>> " << *MI << "Regs:"; for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { if (PhysRegState[Reg] == regDisabled) continue; dbgs() << " " << TRI->getName(Reg); @@ -602,7 +598,7 @@ case regFree: break; case regReserved: - dbgs() << "(resv)"; + dbgs() << "*"; break; default: dbgs() << "=%reg" << PhysRegState[Reg]; @@ -771,6 +767,8 @@ PhysKills.clear(); MRI->addPhysRegsUsed(UsedInInstr); + + DEBUG(dbgs() << "<< " << *MI); } // Spill all physical registers holding virtual registers now. @@ -785,8 +783,9 @@ /// runOnMachineFunction - Register allocate the whole function /// bool RAFast::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(dbgs() << "Machine Function\n"); - DEBUG(Fn.dump()); + DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" + << "********** Function: " + << ((Value*)Fn.getFunction())->getName() << '\n'); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); From evan.cheng at apple.com Thu May 13 18:01:26 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 23:01:26 -0000 Subject: [llvm-commits] [llvm] r103742 - /llvm/trunk/include/llvm/Target/TargetLowering.h Message-ID: <20100513230126.298CC312800A@llvm.org> Author: evancheng Date: Thu May 13 18:01:26 2010 New Revision: 103742 URL: http://llvm.org/viewvc/llvm-project?rev=103742&view=rev Log: Fix up LoadExtActions, TruncStoreActions, and IndexedModeActions representation and setter and getter's so they will continue to work if the number of scalar ValueType's exceeds 31. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103742&r1=103741&r2=103742&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Thu May 13 18:01:26 2010 @@ -385,35 +385,31 @@ /// either it is legal, needs to be promoted to a larger size, needs to be /// expanded to some other code sequence, or the target has a custom expander /// for it. - LegalizeAction getLoadExtAction(unsigned LType, EVT VT) const { - assert(LType < array_lengthof(LoadExtActions) && - (unsigned)VT.getSimpleVT().SimpleTy < sizeof(LoadExtActions[0])*4 && + LegalizeAction getLoadExtAction(unsigned ExtType, EVT VT) const { + assert(ExtType < ISD::LAST_LOADEXT_TYPE && + (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)((LoadExtActions[LType] >> - (2*VT.getSimpleVT().SimpleTy)) & 3); + return (LegalizeAction)LoadExtActions[VT.getSimpleVT().SimpleTy][ExtType]; } /// isLoadExtLegal - Return true if the specified load with extension is legal /// on this target. - bool isLoadExtLegal(unsigned LType, EVT VT) const { + bool isLoadExtLegal(unsigned ExtType, EVT VT) const { return VT.isSimple() && - (getLoadExtAction(LType, VT) == Legal || - getLoadExtAction(LType, VT) == Custom); + (getLoadExtAction(ExtType, VT) == Legal || + getLoadExtAction(ExtType, VT) == Custom); } /// getTruncStoreAction - Return how this store with truncation should be /// treated: either it is legal, needs to be promoted to a larger size, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. - LegalizeAction getTruncStoreAction(EVT ValVT, - EVT MemVT) const { - assert((unsigned)ValVT.getSimpleVT().SimpleTy < - array_lengthof(TruncStoreActions) && - (unsigned)MemVT.getSimpleVT().SimpleTy < - sizeof(TruncStoreActions[0])*4 && + LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { + assert((unsigned)ValVT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + (unsigned)MemVT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)((TruncStoreActions[ValVT.getSimpleVT().SimpleTy] >> - (2*MemVT.getSimpleVT().SimpleTy)) & 3); + return (LegalizeAction)TruncStoreActions[ValVT.getSimpleVT().SimpleTy] + [MemVT.getSimpleVT().SimpleTy]; } /// isTruncStoreLegal - Return true if the specified store with truncation is @@ -430,11 +426,11 @@ /// for it. LegalizeAction getIndexedLoadAction(unsigned IdxMode, EVT VT) const { - assert( IdxMode < array_lengthof(IndexedModeActions[0][0]) && + assert( IdxMode < ISD::LAST_INDEXED_MODE && ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)((IndexedModeActions[ - (unsigned)VT.getSimpleVT().SimpleTy][0][IdxMode])); + unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy; + return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); } /// isIndexedLoadLegal - Return true if the specified indexed load is legal @@ -451,11 +447,11 @@ /// for it. LegalizeAction getIndexedStoreAction(unsigned IdxMode, EVT VT) const { - assert(IdxMode < array_lengthof(IndexedModeActions[0][1]) && - (unsigned)VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE && + assert( IdxMode < ISD::LAST_INDEXED_MODE && + ((unsigned)VT.getSimpleVT().SimpleTy) < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)((IndexedModeActions[ - (unsigned)VT.getSimpleVT().SimpleTy][1][IdxMode])); + unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy; + return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); } /// isIndexedStoreLegal - Return true if the specified indexed load is legal @@ -1000,23 +996,21 @@ /// setLoadExtAction - Indicate that the specified load with extension does /// not work with the specified type and indicate what to do about it. void setLoadExtAction(unsigned ExtType, MVT VT, - LegalizeAction Action) { - assert((unsigned)VT.SimpleTy*2 < 63 && - ExtType < array_lengthof(LoadExtActions) && + LegalizeAction Action) { + assert(ExtType < ISD::LAST_LOADEXT_TYPE && + (unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2); - LoadExtActions[ExtType] |= (uint64_t)Action << VT.SimpleTy*2; + LoadExtActions[VT.SimpleTy][ExtType] = (uint8_t)Action; } /// setTruncStoreAction - Indicate that the specified truncating store does /// not work with the specified type and indicate what to do about it. void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { - assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) && - (unsigned)MemVT.SimpleTy*2 < 63 && + assert((unsigned)ValVT.SimpleTy < MVT::LAST_VALUETYPE && + (unsigned)MemVT.SimpleTy < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL) << MemVT.SimpleTy*2); - TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2; + TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action; } /// setIndexedLoadAction - Indicate that the specified indexed load does or @@ -1026,9 +1020,12 @@ void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE && - IdxMode < array_lengthof(IndexedModeActions[0][0]) && + IdxMode < ISD::LAST_INDEXED_MODE && + (unsigned)Action < 0xf && "Table isn't big enough!"); - IndexedModeActions[(unsigned)VT.SimpleTy][0][IdxMode] = (uint8_t)Action; + // Load action are kept in the upper half. + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; } /// setIndexedStoreAction - Indicate that the specified indexed store does or @@ -1038,9 +1035,12 @@ void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE && - IdxMode < array_lengthof(IndexedModeActions[0][1] ) && + IdxMode < ISD::LAST_INDEXED_MODE && + (unsigned)Action < 0xf && "Table isn't big enough!"); - IndexedModeActions[(unsigned)VT.SimpleTy][1][IdxMode] = (uint8_t)Action; + // Store action are kept in the lower half. + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; + IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); } /// setCondCodeAction - Indicate that the specified condition code is or isn't @@ -1596,22 +1596,22 @@ uint64_t OpActions[MVT::MAX_ALLOWED_VALUETYPE/(sizeof(uint64_t)*4)] [ISD::BUILTIN_OP_END]; - /// LoadExtActions - For each load of load extension type and each value type, + /// LoadExtActions - For each load extension type and each value type, /// keep a LegalizeAction that indicates how instruction selection should deal - /// with the load. - uint64_t LoadExtActions[ISD::LAST_LOADEXT_TYPE]; + /// with a load of a specific value type and extension type. + uint8_t LoadExtActions[MVT::LAST_VALUETYPE][ISD::LAST_LOADEXT_TYPE]; - /// TruncStoreActions - For each truncating store, keep a LegalizeAction that - /// indicates how instruction selection should deal with the store. - uint64_t TruncStoreActions[MVT::LAST_VALUETYPE]; + /// TruncStoreActions - For each value type pair keep a LegalizeAction that + /// indicates whether a truncating store of a specific value type and + /// truncating type is legal. + uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; /// IndexedModeActions - For each indexed mode and each value type, /// keep a pair of LegalizeAction that indicates how instruction - /// selection should deal with the load / store. The first - /// dimension is now the value_type for the reference. The second - /// dimension is the load [0] vs. store[1]. The third dimension - /// represents the various modes for load store. - uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][2][ISD::LAST_INDEXED_MODE]; + /// selection should deal with the load / store. The first dimension is the + /// value_type for the reference. The second dimension represents the various + /// modes for load store. + uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; /// CondCodeActions - For each condition code (ISD::CondCode) keep a /// LegalizeAction that indicates how instruction selection should From evan.cheng at apple.com Thu May 13 18:25:21 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 23:25:21 -0000 Subject: [llvm-commits] [llvm] r103743 - /llvm/trunk/include/llvm/Target/TargetLowering.h Message-ID: <20100513232521.64F37312800A@llvm.org> Author: evancheng Date: Thu May 13 18:25:21 2010 New Revision: 103743 URL: http://llvm.org/viewvc/llvm-project?rev=103743&view=rev Log: Eliminate use of magic numbers to access OpActions. It also has the effect of allowing more than 31 scalar value types. MAX_ALLOWED_VALUETYPE had already been updated to 64 a while back. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103743&r1=103742&r2=103743&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Thu May 13 18:25:21 2010 @@ -202,12 +202,14 @@ } unsigned I = VT.getSimpleVT().SimpleTy; assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0])); - return (LegalizeAction)((ValueTypeActions[I>>4] >> ((2*I) & 31)) & 3); + unsigned Mask = (unsigned)MVT::MAX_ALLOWED_VALUETYPE-1; + return (LegalizeAction)((ValueTypeActions[I>>4] >> ((2*I) & Mask)) & 3); } void setTypeAction(EVT VT, LegalizeAction Action) { unsigned I = VT.getSimpleVT().SimpleTy; assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0])); - ValueTypeActions[I>>4] |= Action << ((I*2) & 31); + unsigned Mask = (unsigned)MVT::MAX_ALLOWED_VALUETYPE-1; + ValueTypeActions[I>>4] |= Action << ((I*2) & Mask); } }; @@ -360,9 +362,9 @@ (unsigned)VT.getSimpleVT().SimpleTy < sizeof(OpActions[0][0])*8 && "Table isn't big enough!"); unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; - unsigned J = I & 31; + unsigned J = I & ((unsigned)MVT::MAX_ALLOWED_VALUETYPE-1); I = I >> 5; - return (LegalizeAction)((OpActions[I][Op] >> (J*2) ) & 3); + return (LegalizeAction)((OpActions[I][Op] >> (J*2)) & 3); } /// isOperationLegalOrCustom - Return true if the specified operation is @@ -987,7 +989,7 @@ void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { unsigned I = (unsigned)VT.SimpleTy; - unsigned J = I & 31; + unsigned J = I & ((unsigned)MVT::MAX_ALLOWED_VALUETYPE - 1); I = I >> 5; OpActions[I][Op] &= ~(uint64_t(3UL) << (J*2)); OpActions[I][Op] |= (uint64_t)Action << (J*2); From dalej at apple.com Thu May 13 18:50:42 2010 From: dalej at apple.com (Dale Johannesen) Date: Thu, 13 May 2010 23:50:42 -0000 Subject: [llvm-commits] [llvm] r103744 - /llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Message-ID: <20100513235042.BD90F312800A@llvm.org> Author: johannes Date: Thu May 13 18:50:42 2010 New Revision: 103744 URL: http://llvm.org/viewvc/llvm-project?rev=103744&view=rev Log: Implement a correct ui64->f32 conversion. The old one was subject to double rounding in extreme cases. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=103744&r1=103743&r2=103744&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Thu May 13 18:50:42 2010 @@ -2026,6 +2026,7 @@ return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. // Implementation of unsigned i64 to f64 following the algorithm in // __floatundidf in compiler_rt. This implementation has the advantage @@ -2051,6 +2052,41 @@ return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } + // Implementation of unsigned i64 to f32. This implementation has the + // advantage of performing rounding correctly. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + EVT SHVT = TLI.getShiftAmountTy(); + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, + DAG.getConstant(UINT64_C(0x800), MVT::i64)); + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); + SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), + ISD::SETUGE); + SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, + DAG.getConstant(32, SHVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); + SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); + SDValue TwoP32 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); + SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, + DAG.getIntPtrConstant(0)); + + } + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), From evan.cheng at apple.com Thu May 13 18:55:47 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 23:55:47 -0000 Subject: [llvm-commits] [llvm] r103746 - in /llvm/trunk: include/llvm/CodeGen/ValueTypes.h include/llvm/CodeGen/ValueTypes.td lib/VMCore/ValueTypes.cpp utils/TableGen/CodeGenTarget.cpp Message-ID: <20100513235547.B166B312800A@llvm.org> Author: evancheng Date: Thu May 13 18:55:47 2010 New Revision: 103746 URL: http://llvm.org/viewvc/llvm-project?rev=103746&view=rev Log: Adding a v8i64 512-bit vector type. This will be used to model ARM NEON intrinsics which translate into a pair of vld / vst instructions that can load / store 8 consecutive 64-bit (D) registers. Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h llvm/trunk/include/llvm/CodeGen/ValueTypes.td llvm/trunk/lib/VMCore/ValueTypes.cpp llvm/trunk/utils/TableGen/CodeGenTarget.cpp Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=103746&r1=103745&r2=103746&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Thu May 13 18:55:47 2010 @@ -63,19 +63,20 @@ v1i64 = 24, // 1 x i64 v2i64 = 25, // 2 x i64 v4i64 = 26, // 4 x i64 + v8i64 = 27, // 8 x i64 - v2f32 = 27, // 2 x f32 - v4f32 = 28, // 4 x f32 - v8f32 = 29, // 8 x f32 - v2f64 = 30, // 2 x f64 - v4f64 = 31, // 4 x f64 + v2f32 = 28, // 2 x f32 + v4f32 = 29, // 4 x f32 + v8f32 = 30, // 8 x f32 + v2f64 = 31, // 2 x f64 + v4f64 = 32, // 4 x f64 FIRST_VECTOR_VALUETYPE = v2i8, LAST_VECTOR_VALUETYPE = v4f64, - Flag = 32, // This glues nodes together during pre-RA sched + Flag = 33, // This glues nodes together during pre-RA sched - isVoid = 33, // This has no value + isVoid = 34, // This has no value LAST_VALUETYPE = 34, // This always remains at the end of the list. @@ -140,7 +141,7 @@ bool isInteger() const { return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || - (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v4i64)); + (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v8i64)); } /// isVector - Return true if this is a vector value type. @@ -192,7 +193,8 @@ case v8i32: return i32; case v1i64: case v2i64: - case v4i64: return i64; + case v4i64: + case v8i64: return i64; case v2f32: case v4f32: case v8f32: return f32; @@ -211,6 +213,7 @@ case v8i8 : case v8i16: case v8i32: + case v8i64: case v8f32: return 8; case v4i8: case v4i16: @@ -269,6 +272,7 @@ case v4i64: case v8f32: case v4f64: return 256; + case v8i64: return 512; } } @@ -332,6 +336,7 @@ if (NumElements == 1) return MVT::v1i64; if (NumElements == 2) return MVT::v2i64; if (NumElements == 4) return MVT::v4i64; + if (NumElements == 8) return MVT::v8i64; break; case MVT::f32: if (NumElements == 2) return MVT::v2f32; @@ -468,10 +473,15 @@ /// is256BitVector - Return true if this is a 256-bit vector type. inline bool is256BitVector() const { - return isSimple() ? - (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || - V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) : - isExtended256BitVector(); + return isSimple() + ? (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || + V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) + : isExtended256BitVector(); + } + + /// is512BitVector - Return true if this is a 512-bit vector type. + inline bool is512BitVector() const { + return isSimple() ? (V == MVT::v8i64) : isExtended512BitVector(); } /// isOverloaded - Return true if this is an overloaded type for TableGen. @@ -668,6 +678,7 @@ bool isExtended64BitVector() const; bool isExtended128BitVector() const; bool isExtended256BitVector() const; + bool isExtended512BitVector() const; EVT getExtendedVectorElementType() const; unsigned getExtendedVectorNumElements() const; unsigned getExtendedSizeInBits() const; Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.td?rev=103746&r1=103745&r2=103746&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/ValueTypes.td (original) +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.td Thu May 13 18:55:47 2010 @@ -47,15 +47,16 @@ def v1i64 : ValueType<64 , 24>; // 1 x i64 vector value def v2i64 : ValueType<128, 25>; // 2 x i64 vector value def v4i64 : ValueType<256, 26>; // 4 x f64 vector value +def v8i64 : ValueType<512, 27>; // 4 x f64 vector value -def v2f32 : ValueType<64, 27>; // 2 x f32 vector value -def v4f32 : ValueType<128, 28>; // 4 x f32 vector value -def v8f32 : ValueType<256, 29>; // 8 x f32 vector value -def v2f64 : ValueType<128, 30>; // 2 x f64 vector value -def v4f64 : ValueType<256, 31>; // 4 x f64 vector value +def v2f32 : ValueType<64, 28>; // 2 x f32 vector value +def v4f32 : ValueType<128, 29>; // 4 x f32 vector value +def v8f32 : ValueType<256, 30>; // 8 x f32 vector value +def v2f64 : ValueType<128, 31>; // 2 x f64 vector value +def v4f64 : ValueType<256, 32>; // 4 x f64 vector value -def FlagVT : ValueType<0 , 32>; // Pre-RA sched glue -def isVoid : ValueType<0 , 33>; // Produces no value +def FlagVT : ValueType<0 , 33>; // Pre-RA sched glue +def isVoid : ValueType<0 , 34>; // Produces no value def MetadataVT: ValueType<0, 250>; // Metadata Modified: llvm/trunk/lib/VMCore/ValueTypes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ValueTypes.cpp?rev=103746&r1=103745&r2=103746&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/ValueTypes.cpp (original) +++ llvm/trunk/lib/VMCore/ValueTypes.cpp Thu May 13 18:55:47 2010 @@ -61,6 +61,10 @@ return isExtendedVector() && getSizeInBits() == 256; } +bool EVT::isExtended512BitVector() const { + return isExtendedVector() && getSizeInBits() == 512; +} + EVT EVT::getExtendedVectorElementType() const { assert(isExtended() && "Type is not extended!"); return EVT::getEVT(cast(LLVMTy)->getElementType()); @@ -121,6 +125,7 @@ case MVT::v1i64: return "v1i64"; case MVT::v2i64: return "v2i64"; case MVT::v4i64: return "v4i64"; + case MVT::v8i64: return "v8i64"; case MVT::v2f32: return "v2f32"; case MVT::v4f32: return "v4f32"; case MVT::v8f32: return "v8f32"; @@ -165,6 +170,7 @@ case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); + case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); Modified: llvm/trunk/utils/TableGen/CodeGenTarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.cpp?rev=103746&r1=103745&r2=103746&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/CodeGenTarget.cpp (original) +++ llvm/trunk/utils/TableGen/CodeGenTarget.cpp Thu May 13 18:55:47 2010 @@ -80,6 +80,7 @@ case MVT::v1i64: return "MVT::v1i64"; case MVT::v2i64: return "MVT::v2i64"; case MVT::v4i64: return "MVT::v4i64"; + case MVT::v8i64: return "MVT::v8i64"; case MVT::v2f32: return "MVT::v2f32"; case MVT::v4f32: return "MVT::v4f32"; case MVT::v8f32: return "MVT::v8f32"; From bob.wilson at apple.com Thu May 13 19:01:16 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Thu, 13 May 2010 17:01:16 -0700 Subject: [llvm-commits] [llvm] r103746 - in /llvm/trunk: include/llvm/CodeGen/ValueTypes.h include/llvm/CodeGen/ValueTypes.td lib/VMCore/ValueTypes.cpp utils/TableGen/CodeGenTarget.cpp In-Reply-To: <20100513235547.B166B312800A@llvm.org> References: <20100513235547.B166B312800A@llvm.org> Message-ID: <91A5B9A7-FFA4-40DD-B5BF-487A4ED3E6D5@apple.com> That looks vaguely familiar.... ;-) On May 13, 2010, at 4:55 PM, Evan Cheng wrote: > Author: evancheng > Date: Thu May 13 18:55:47 2010 > New Revision: 103746 > > URL: http://llvm.org/viewvc/llvm-project?rev=103746&view=rev > Log: > Adding a v8i64 512-bit vector type. This will be used to model ARM NEON intrinsics which translate into a pair of vld / vst instructions that can load / store 8 consecutive 64-bit (D) registers. > > Modified: > llvm/trunk/include/llvm/CodeGen/ValueTypes.h > llvm/trunk/include/llvm/CodeGen/ValueTypes.td > llvm/trunk/lib/VMCore/ValueTypes.cpp > llvm/trunk/utils/TableGen/CodeGenTarget.cpp > > Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=103746&r1=103745&r2=103746&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) > +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Thu May 13 18:55:47 2010 > @@ -63,19 +63,20 @@ > v1i64 = 24, // 1 x i64 > v2i64 = 25, // 2 x i64 > v4i64 = 26, // 4 x i64 > + v8i64 = 27, // 8 x i64 > > - v2f32 = 27, // 2 x f32 > - v4f32 = 28, // 4 x f32 > - v8f32 = 29, // 8 x f32 > - v2f64 = 30, // 2 x f64 > - v4f64 = 31, // 4 x f64 > + v2f32 = 28, // 2 x f32 > + v4f32 = 29, // 4 x f32 > + v8f32 = 30, // 8 x f32 > + v2f64 = 31, // 2 x f64 > + v4f64 = 32, // 4 x f64 > > FIRST_VECTOR_VALUETYPE = v2i8, > LAST_VECTOR_VALUETYPE = v4f64, > > - Flag = 32, // This glues nodes together during pre-RA sched > + Flag = 33, // This glues nodes together during pre-RA sched > > - isVoid = 33, // This has no value > + isVoid = 34, // This has no value > > LAST_VALUETYPE = 34, // This always remains at the end of the list. > > @@ -140,7 +141,7 @@ > bool isInteger() const { > return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && > SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || > - (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v4i64)); > + (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v8i64)); > } > > /// isVector - Return true if this is a vector value type. > @@ -192,7 +193,8 @@ > case v8i32: return i32; > case v1i64: > case v2i64: > - case v4i64: return i64; > + case v4i64: > + case v8i64: return i64; > case v2f32: > case v4f32: > case v8f32: return f32; > @@ -211,6 +213,7 @@ > case v8i8 : > case v8i16: > case v8i32: > + case v8i64: > case v8f32: return 8; > case v4i8: > case v4i16: > @@ -269,6 +272,7 @@ > case v4i64: > case v8f32: > case v4f64: return 256; > + case v8i64: return 512; > } > } > > @@ -332,6 +336,7 @@ > if (NumElements == 1) return MVT::v1i64; > if (NumElements == 2) return MVT::v2i64; > if (NumElements == 4) return MVT::v4i64; > + if (NumElements == 8) return MVT::v8i64; > break; > case MVT::f32: > if (NumElements == 2) return MVT::v2f32; > @@ -468,10 +473,15 @@ > > /// is256BitVector - Return true if this is a 256-bit vector type. > inline bool is256BitVector() const { > - return isSimple() ? > - (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || > - V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) : > - isExtended256BitVector(); > + return isSimple() > + ? (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || > + V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) > + : isExtended256BitVector(); > + } > + > + /// is512BitVector - Return true if this is a 512-bit vector type. > + inline bool is512BitVector() const { > + return isSimple() ? (V == MVT::v8i64) : isExtended512BitVector(); > } > > /// isOverloaded - Return true if this is an overloaded type for TableGen. > @@ -668,6 +678,7 @@ > bool isExtended64BitVector() const; > bool isExtended128BitVector() const; > bool isExtended256BitVector() const; > + bool isExtended512BitVector() const; > EVT getExtendedVectorElementType() const; > unsigned getExtendedVectorNumElements() const; > unsigned getExtendedSizeInBits() const; > > Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.td?rev=103746&r1=103745&r2=103746&view=diff > ============================================================================== > --- llvm/trunk/include/llvm/CodeGen/ValueTypes.td (original) > +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.td Thu May 13 18:55:47 2010 > @@ -47,15 +47,16 @@ > def v1i64 : ValueType<64 , 24>; // 1 x i64 vector value > def v2i64 : ValueType<128, 25>; // 2 x i64 vector value > def v4i64 : ValueType<256, 26>; // 4 x f64 vector value > +def v8i64 : ValueType<512, 27>; // 4 x f64 vector value > > -def v2f32 : ValueType<64, 27>; // 2 x f32 vector value > -def v4f32 : ValueType<128, 28>; // 4 x f32 vector value > -def v8f32 : ValueType<256, 29>; // 8 x f32 vector value > -def v2f64 : ValueType<128, 30>; // 2 x f64 vector value > -def v4f64 : ValueType<256, 31>; // 4 x f64 vector value > +def v2f32 : ValueType<64, 28>; // 2 x f32 vector value > +def v4f32 : ValueType<128, 29>; // 4 x f32 vector value > +def v8f32 : ValueType<256, 30>; // 8 x f32 vector value > +def v2f64 : ValueType<128, 31>; // 2 x f64 vector value > +def v4f64 : ValueType<256, 32>; // 4 x f64 vector value > > -def FlagVT : ValueType<0 , 32>; // Pre-RA sched glue > -def isVoid : ValueType<0 , 33>; // Produces no value > +def FlagVT : ValueType<0 , 33>; // Pre-RA sched glue > +def isVoid : ValueType<0 , 34>; // Produces no value > > def MetadataVT: ValueType<0, 250>; // Metadata > > > Modified: llvm/trunk/lib/VMCore/ValueTypes.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ValueTypes.cpp?rev=103746&r1=103745&r2=103746&view=diff > ============================================================================== > --- llvm/trunk/lib/VMCore/ValueTypes.cpp (original) > +++ llvm/trunk/lib/VMCore/ValueTypes.cpp Thu May 13 18:55:47 2010 > @@ -61,6 +61,10 @@ > return isExtendedVector() && getSizeInBits() == 256; > } > > +bool EVT::isExtended512BitVector() const { > + return isExtendedVector() && getSizeInBits() == 512; > +} > + > EVT EVT::getExtendedVectorElementType() const { > assert(isExtended() && "Type is not extended!"); > return EVT::getEVT(cast(LLVMTy)->getElementType()); > @@ -121,6 +125,7 @@ > case MVT::v1i64: return "v1i64"; > case MVT::v2i64: return "v2i64"; > case MVT::v4i64: return "v4i64"; > + case MVT::v8i64: return "v8i64"; > case MVT::v2f32: return "v2f32"; > case MVT::v4f32: return "v4f32"; > case MVT::v8f32: return "v8f32"; > @@ -165,6 +170,7 @@ > case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); > case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); > case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); > + case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); > case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); > case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); > case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); > > Modified: llvm/trunk/utils/TableGen/CodeGenTarget.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.cpp?rev=103746&r1=103745&r2=103746&view=diff > ============================================================================== > --- llvm/trunk/utils/TableGen/CodeGenTarget.cpp (original) > +++ llvm/trunk/utils/TableGen/CodeGenTarget.cpp Thu May 13 18:55:47 2010 > @@ -80,6 +80,7 @@ > case MVT::v1i64: return "MVT::v1i64"; > case MVT::v2i64: return "MVT::v2i64"; > case MVT::v4i64: return "MVT::v4i64"; > + case MVT::v8i64: return "MVT::v8i64"; > case MVT::v2f32: return "MVT::v2f32"; > case MVT::v4f32: return "MVT::v4f32"; > case MVT::v8f32: return "MVT::v8f32"; > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From stoklund at 2pi.dk Thu May 13 19:02:21 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 00:02:21 -0000 Subject: [llvm-commits] [llvm] r103747 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514000221.1B72B312800A@llvm.org> Author: stoklund Date: Thu May 13 19:02:20 2010 New Revision: 103747 URL: http://llvm.org/viewvc/llvm-project?rev=103747&view=rev Log: Fix an embarrassing runtime regression for RegAllocFast. This loop is quadratic in the capacity for a DenseMap: while(!map.empty()) map.erase(map.begin()); Instead we now do a normal begin() - end() iteration followed by map.clear(). That also has the nice sideeffect of shrinking the map capacity on demand. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103747&r1=103746&r2=103747&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Thu May 13 19:02:20 2010 @@ -45,7 +45,8 @@ class RAFast : public MachineFunctionPass { public: static char ID; - RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} + RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1), + atEndOfBlock(false) {} private: const TargetMachine *TM; MachineFunction *MF; @@ -106,6 +107,11 @@ // ReservedRegs - vector of reserved physical registers. BitVector ReservedRegs; + // atEndOfBlock - This flag is set after allocating all instructions in a + // block, before emitting final spills. When it is set, LiveRegMap is no + // longer updated properly sonce it will be cleared anyway. + bool atEndOfBlock; + public: virtual const char *getPassName() const { return "Fast Register Allocator"; @@ -126,6 +132,8 @@ void killVirtReg(LiveRegMap::iterator i); void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + LiveRegMap::iterator i, bool isKill); + void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned VirtReg, bool isKill); void killPhysReg(unsigned PhysReg); void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, @@ -182,7 +190,10 @@ const LiveReg &LR = lri->second; assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); PhysRegState[LR.PhysReg] = regFree; - LiveVirtRegs.erase(lri); + // Erase from LiveVirtRegs unless we're at the end of the block when + // everything will be bulk erased. + if (!atEndOfBlock) + LiveVirtRegs.erase(lri); } /// killVirtReg - Mark virtreg as no longer available. @@ -204,8 +215,15 @@ "Spilling a physical register is illegal!"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); assert(lri != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + spillVirtReg(MBB, MI, lri, isKill); +} + +/// spillVirtReg - Do the actual work of spilling. +void RAFast::spillVirtReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + LiveRegMap::iterator lri, bool isKill) { LiveReg &LR = lri->second; - assert(PhysRegState[LR.PhysReg] == VirtReg && "Broken RegState mapping"); + assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. @@ -213,10 +231,10 @@ if (LR.Dirty) { LR.Dirty = false; - DEBUG(dbgs() << "Spilling register " << TRI->getName(LR.PhysReg) - << " containing %reg" << VirtReg); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << "Spilling %reg" << lri->first + << " in " << TRI->getName(LR.PhysReg)); + const TargetRegisterClass *RC = MRI->getRegClass(lri->first); + int FrameIndex = getStackSpaceFor(lri->first, RC); DEBUG(dbgs() << " to stack slot #" << FrameIndex << "\n"); TII->storeRegToStackSlot(MBB, MI, LR.PhysReg, spillKill, FrameIndex, RC, TRI); @@ -573,6 +591,7 @@ void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << "\nAllocating " << MBB); + atEndOfBlock = false; PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); @@ -772,10 +791,13 @@ } // Spill all physical registers holding virtual registers now. + atEndOfBlock = true; DEBUG(dbgs() << "Killing live registers at end of block.\n"); MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - while (!LiveVirtRegs.empty()) - spillVirtReg(MBB, MI, LiveVirtRegs.begin()->first, true); + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + spillVirtReg(MBB, MI, i, true); + LiveVirtRegs.clear(); DEBUG(MBB.dump()); } From stoklund at 2pi.dk Thu May 13 19:02:23 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 00:02:23 -0000 Subject: [llvm-commits] [llvm] r103748 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514000223.46BCC3128018@llvm.org> Author: stoklund Date: Thu May 13 19:02:23 2010 New Revision: 103748 URL: http://llvm.org/viewvc/llvm-project?rev=103748&view=rev Log: Trust kill flags from isel and later passes. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103748&r1=103747&r2=103748&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Thu May 13 19:02:23 2010 @@ -673,10 +673,6 @@ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; - - // FIXME: For now, don't trust kill flags - if (MO.isUse()) MO.setIsKill(false); - unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || ReservedRegs.test(Reg)) continue; From evan.cheng at apple.com Thu May 13 19:14:04 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Thu, 13 May 2010 17:14:04 -0700 Subject: [llvm-commits] [llvm] r103746 - in /llvm/trunk: include/llvm/CodeGen/ValueTypes.h include/llvm/CodeGen/ValueTypes.td lib/VMCore/ValueTypes.cpp utils/TableGen/CodeGenTarget.cpp In-Reply-To: <91A5B9A7-FFA4-40DD-B5BF-487A4ED3E6D5@apple.com> References: <20100513235547.B166B312800A@llvm.org> <91A5B9A7-FFA4-40DD-B5BF-487A4ED3E6D5@apple.com> Message-ID: <825AC704-7605-47C9-8447-F16969FA63C2@apple.com> On May 13, 2010, at 5:01 PM, Bob Wilson wrote: > That looks vaguely familiar.... ;-) I only added one type! :-) Evan > > On May 13, 2010, at 4:55 PM, Evan Cheng wrote: > >> Author: evancheng >> Date: Thu May 13 18:55:47 2010 >> New Revision: 103746 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=103746&view=rev >> Log: >> Adding a v8i64 512-bit vector type. This will be used to model ARM NEON intrinsics which translate into a pair of vld / vst instructions that can load / store 8 consecutive 64-bit (D) registers. >> >> Modified: >> llvm/trunk/include/llvm/CodeGen/ValueTypes.h >> llvm/trunk/include/llvm/CodeGen/ValueTypes.td >> llvm/trunk/lib/VMCore/ValueTypes.cpp >> llvm/trunk/utils/TableGen/CodeGenTarget.cpp >> >> Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.h?rev=103746&r1=103745&r2=103746&view=diff >> ============================================================================== >> --- llvm/trunk/include/llvm/CodeGen/ValueTypes.h (original) >> +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.h Thu May 13 18:55:47 2010 >> @@ -63,19 +63,20 @@ >> v1i64 = 24, // 1 x i64 >> v2i64 = 25, // 2 x i64 >> v4i64 = 26, // 4 x i64 >> + v8i64 = 27, // 8 x i64 >> >> - v2f32 = 27, // 2 x f32 >> - v4f32 = 28, // 4 x f32 >> - v8f32 = 29, // 8 x f32 >> - v2f64 = 30, // 2 x f64 >> - v4f64 = 31, // 4 x f64 >> + v2f32 = 28, // 2 x f32 >> + v4f32 = 29, // 4 x f32 >> + v8f32 = 30, // 8 x f32 >> + v2f64 = 31, // 2 x f64 >> + v4f64 = 32, // 4 x f64 >> >> FIRST_VECTOR_VALUETYPE = v2i8, >> LAST_VECTOR_VALUETYPE = v4f64, >> >> - Flag = 32, // This glues nodes together during pre-RA sched >> + Flag = 33, // This glues nodes together during pre-RA sched >> >> - isVoid = 33, // This has no value >> + isVoid = 34, // This has no value >> >> LAST_VALUETYPE = 34, // This always remains at the end of the list. >> >> @@ -140,7 +141,7 @@ >> bool isInteger() const { >> return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE && >> SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) || >> - (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v4i64)); >> + (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v8i64)); >> } >> >> /// isVector - Return true if this is a vector value type. >> @@ -192,7 +193,8 @@ >> case v8i32: return i32; >> case v1i64: >> case v2i64: >> - case v4i64: return i64; >> + case v4i64: >> + case v8i64: return i64; >> case v2f32: >> case v4f32: >> case v8f32: return f32; >> @@ -211,6 +213,7 @@ >> case v8i8 : >> case v8i16: >> case v8i32: >> + case v8i64: >> case v8f32: return 8; >> case v4i8: >> case v4i16: >> @@ -269,6 +272,7 @@ >> case v4i64: >> case v8f32: >> case v4f64: return 256; >> + case v8i64: return 512; >> } >> } >> >> @@ -332,6 +336,7 @@ >> if (NumElements == 1) return MVT::v1i64; >> if (NumElements == 2) return MVT::v2i64; >> if (NumElements == 4) return MVT::v4i64; >> + if (NumElements == 8) return MVT::v8i64; >> break; >> case MVT::f32: >> if (NumElements == 2) return MVT::v2f32; >> @@ -468,10 +473,15 @@ >> >> /// is256BitVector - Return true if this is a 256-bit vector type. >> inline bool is256BitVector() const { >> - return isSimple() ? >> - (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || >> - V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) : >> - isExtended256BitVector(); >> + return isSimple() >> + ? (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 || >> + V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64) >> + : isExtended256BitVector(); >> + } >> + >> + /// is512BitVector - Return true if this is a 512-bit vector type. >> + inline bool is512BitVector() const { >> + return isSimple() ? (V == MVT::v8i64) : isExtended512BitVector(); >> } >> >> /// isOverloaded - Return true if this is an overloaded type for TableGen. >> @@ -668,6 +678,7 @@ >> bool isExtended64BitVector() const; >> bool isExtended128BitVector() const; >> bool isExtended256BitVector() const; >> + bool isExtended512BitVector() const; >> EVT getExtendedVectorElementType() const; >> unsigned getExtendedVectorNumElements() const; >> unsigned getExtendedSizeInBits() const; >> >> Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.td >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.td?rev=103746&r1=103745&r2=103746&view=diff >> ============================================================================== >> --- llvm/trunk/include/llvm/CodeGen/ValueTypes.td (original) >> +++ llvm/trunk/include/llvm/CodeGen/ValueTypes.td Thu May 13 18:55:47 2010 >> @@ -47,15 +47,16 @@ >> def v1i64 : ValueType<64 , 24>; // 1 x i64 vector value >> def v2i64 : ValueType<128, 25>; // 2 x i64 vector value >> def v4i64 : ValueType<256, 26>; // 4 x f64 vector value >> +def v8i64 : ValueType<512, 27>; // 4 x f64 vector value >> >> -def v2f32 : ValueType<64, 27>; // 2 x f32 vector value >> -def v4f32 : ValueType<128, 28>; // 4 x f32 vector value >> -def v8f32 : ValueType<256, 29>; // 8 x f32 vector value >> -def v2f64 : ValueType<128, 30>; // 2 x f64 vector value >> -def v4f64 : ValueType<256, 31>; // 4 x f64 vector value >> +def v2f32 : ValueType<64, 28>; // 2 x f32 vector value >> +def v4f32 : ValueType<128, 29>; // 4 x f32 vector value >> +def v8f32 : ValueType<256, 30>; // 8 x f32 vector value >> +def v2f64 : ValueType<128, 31>; // 2 x f64 vector value >> +def v4f64 : ValueType<256, 32>; // 4 x f64 vector value >> >> -def FlagVT : ValueType<0 , 32>; // Pre-RA sched glue >> -def isVoid : ValueType<0 , 33>; // Produces no value >> +def FlagVT : ValueType<0 , 33>; // Pre-RA sched glue >> +def isVoid : ValueType<0 , 34>; // Produces no value >> >> def MetadataVT: ValueType<0, 250>; // Metadata >> >> >> Modified: llvm/trunk/lib/VMCore/ValueTypes.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/ValueTypes.cpp?rev=103746&r1=103745&r2=103746&view=diff >> ============================================================================== >> --- llvm/trunk/lib/VMCore/ValueTypes.cpp (original) >> +++ llvm/trunk/lib/VMCore/ValueTypes.cpp Thu May 13 18:55:47 2010 >> @@ -61,6 +61,10 @@ >> return isExtendedVector() && getSizeInBits() == 256; >> } >> >> +bool EVT::isExtended512BitVector() const { >> + return isExtendedVector() && getSizeInBits() == 512; >> +} >> + >> EVT EVT::getExtendedVectorElementType() const { >> assert(isExtended() && "Type is not extended!"); >> return EVT::getEVT(cast(LLVMTy)->getElementType()); >> @@ -121,6 +125,7 @@ >> case MVT::v1i64: return "v1i64"; >> case MVT::v2i64: return "v2i64"; >> case MVT::v4i64: return "v4i64"; >> + case MVT::v8i64: return "v8i64"; >> case MVT::v2f32: return "v2f32"; >> case MVT::v4f32: return "v4f32"; >> case MVT::v8f32: return "v8f32"; >> @@ -165,6 +170,7 @@ >> case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); >> case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); >> case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); >> + case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); >> case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); >> case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); >> case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); >> >> Modified: llvm/trunk/utils/TableGen/CodeGenTarget.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.cpp?rev=103746&r1=103745&r2=103746&view=diff >> ============================================================================== >> --- llvm/trunk/utils/TableGen/CodeGenTarget.cpp (original) >> +++ llvm/trunk/utils/TableGen/CodeGenTarget.cpp Thu May 13 18:55:47 2010 >> @@ -80,6 +80,7 @@ >> case MVT::v1i64: return "MVT::v1i64"; >> case MVT::v2i64: return "MVT::v2i64"; >> case MVT::v4i64: return "MVT::v4i64"; >> + case MVT::v8i64: return "MVT::v8i64"; >> case MVT::v2f32: return "MVT::v2f32"; >> case MVT::v4f32: return "MVT::v4f32"; >> case MVT::v8f32: return "MVT::v8f32"; >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From evan.cheng at apple.com Thu May 13 19:21:45 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 00:21:45 -0000 Subject: [llvm-commits] [llvm] r103749 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <20100514002145.5F9BD312800A@llvm.org> Author: evancheng Date: Thu May 13 19:21:45 2010 New Revision: 103749 URL: http://llvm.org/viewvc/llvm-project?rev=103749&view=rev Log: Fix comments. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103749&r1=103748&r2=103749&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Thu May 13 19:21:45 2010 @@ -968,7 +968,7 @@ VT, SDValue(Pair, 0), V1, SubReg1); } -/// PairDRegs - Form a quad register pair from a pair of Q registers. +/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. /// SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); @@ -978,7 +978,7 @@ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } -/// QuadDRegs - Form a octo register from a quad of D registers. +/// QuadDRegs - Form 4 consecutive D registers. /// SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { From daniel at zuster.org Thu May 13 19:37:14 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 00:37:14 -0000 Subject: [llvm-commits] [llvm] r103751 - in /llvm/trunk: include/llvm/MC/MCAssembler.h lib/MC/MCAssembler.cpp Message-ID: <20100514003714.96C0E3128018@llvm.org> Author: ddunbar Date: Thu May 13 19:37:14 2010 New Revision: 103751 URL: http://llvm.org/viewvc/llvm-project?rev=103751&view=rev Log: MC: Switch MCFragment to storing the layout order index, not its index in the file. Modified: llvm/trunk/include/llvm/MC/MCAssembler.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAssembler.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAssembler.h?rev=103751&r1=103750&r2=103751&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAssembler.h (original) +++ llvm/trunk/include/llvm/MC/MCAssembler.h Thu May 13 19:37:14 2010 @@ -96,9 +96,9 @@ /// initialized. uint64_t EffectiveSize; - /// Ordinal - The global index of this fragment. This is the index across all - /// sections, not just the parent section. - unsigned Ordinal; + /// LayoutOrder - The global layout order of this fragment. This is the index + /// across all fragments in the file, not just within the section. + unsigned LayoutOrder; /// @} @@ -118,8 +118,8 @@ MCSymbolData *getAtom() const { return Atom; } void setAtom(MCSymbolData *Value) { Atom = Value; } - unsigned getOrdinal() const { return Ordinal; } - void setOrdinal(unsigned Value) { Ordinal = Value; } + unsigned getLayoutOrder() const { return LayoutOrder; } + void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } static bool classof(const MCFragment *O) { return true; } Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103751&r1=103750&r2=103751&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 19:37:14 2010 @@ -599,10 +599,6 @@ // Create the layout object. MCAsmLayout Layout(*this); - // Assign layout order indices. - for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) - Layout.getSectionOrder()[i]->setLayoutOrder(i); - // Insert additional align fragments for concrete sections to explicitly pad // the previous section to match their alignment requirements. This is for // 'gas' compatibility, it shouldn't strictly be necessary. @@ -627,10 +623,8 @@ AF->setOnlyAlignAddress(true); } - // Assign section and fragment ordinals, all subsequent backend code is - // responsible for updating these in place. + // Create dummy fragments and assign section ordinals. unsigned SectionIndex = 0; - unsigned FragmentIndex = 0; for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { // Create dummy fragments to eliminate any empty sections, this simplifies // layout. @@ -642,10 +636,17 @@ } it->setOrdinal(SectionIndex++); + } - for (MCSectionData::iterator it2 = it->begin(), - ie2 = it->end(); it2 != ie2; ++it2) - it2->setOrdinal(FragmentIndex++); + // Assign layout order indices to sections and fragments. + unsigned FragmentIndex = 0; + for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = Layout.getSectionOrder()[i]; + SD->setLayoutOrder(i); + + for (MCSectionData::iterator it2 = SD->begin(), + ie2 = SD->end(); it2 != ie2; ++it2) + it2->setLayoutOrder(FragmentIndex++); } // Layout until everything fits. @@ -827,7 +828,7 @@ // Update the data fragments layout data. DF->setParent(IF->getParent()); DF->setAtom(IF->getAtom()); - DF->setOrdinal(IF->getOrdinal()); + DF->setLayoutOrder(IF->getLayoutOrder()); Layout.FragmentReplaced(IF, DF); // Copy in the data and the fixups. @@ -857,8 +858,8 @@ void MCFragment::dump() { raw_ostream &OS = llvm::errs(); - OS << ""; + OS << ""; } void MCAlignFragment::dump() { From daniel at zuster.org Thu May 13 19:37:18 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 00:37:18 -0000 Subject: [llvm-commits] [llvm] r103752 - /llvm/trunk/lib/MC/MCAssembler.cpp Message-ID: <20100514003718.0E4A13128026@llvm.org> Author: ddunbar Date: Thu May 13 19:37:17 2010 New Revision: 103752 URL: http://llvm.org/viewvc/llvm-project?rev=103752&view=rev Log: MC: Implicitly assign section addresses when the previous fragment is layed out. Modified: llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103752&r1=103751&r2=103752&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 19:37:17 2010 @@ -414,10 +414,13 @@ } void MCAsmLayout::LayoutFile() { + // Initialize the first section. + if (!getSectionOrder().empty()) + getSectionOrder().front()->Address = 0; + for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) { MCSectionData *SD = getSectionOrder()[i]; - LayoutSection(SD); for (MCSectionData::iterator it = SD->begin(), ie = SD->end(); it != ie; ++it) LayoutFragment(it); @@ -440,6 +443,13 @@ F->Offset = Address - StartAddress; F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress, F->Offset); + + // If this is the last fragment in a section, update the next section address. + if (!F->getNextNode()) { + unsigned NextIndex = F->getParent()->getLayoutOrder() + 1; + if (NextIndex != getSectionOrder().size()) + LayoutSection(getSectionOrder()[NextIndex]); + } } void MCAsmLayout::LayoutSection(MCSectionData *SD) { From daniel at zuster.org Thu May 13 19:37:21 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 00:37:21 -0000 Subject: [llvm-commits] [llvm] r103753 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h lib/MC/MCAssembler.cpp Message-ID: <20100514003721.7F2A93128018@llvm.org> Author: ddunbar Date: Thu May 13 19:37:21 2010 New Revision: 103753 URL: http://llvm.org/viewvc/llvm-project?rev=103753&view=rev Log: MC: Extend MCAsmLayout to explicitly track which fragments have been layed out, and enforce several invariants to LayoutFragment to ensure we only do layout in a sensible order. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103753&r1=103752&r2=103753&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Thu May 13 19:37:21 2010 @@ -36,6 +36,14 @@ /// List of sections in layout order. llvm::SmallVector SectionOrder; + /// The last fragment which was layed out, or 0 if nothing has been layed + /// out. Fragments are always layed out in order, so all fragments with a + /// lower ordinal will be up to date. + mutable MCFragment *LastValidFragment; + + bool isSectionUpToDate(const MCSectionData *SD) const; + bool isFragmentUpToDate(const MCFragment *F) const; + public: MCAsmLayout(MCAssembler &_Assembler); Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103753&r1=103752&r2=103753&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 19:37:21 2010 @@ -47,7 +47,9 @@ /* *** */ -MCAsmLayout::MCAsmLayout(MCAssembler &Asm) : Assembler(Asm) { +MCAsmLayout::MCAsmLayout(MCAssembler &Asm) + : Assembler(Asm), LastValidFragment(0) + { // Compute the section layout order. Virtual sections must go last. for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) if (!Asm.getBackend().isVirtualSection(it->getSection())) @@ -57,6 +59,23 @@ SectionOrder.push_back(&*it); } +bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const { + // The first section is always up-to-date. + unsigned Index = SD->getLayoutOrder(); + if (!Index) + return true; + + // Otherwise, sections are always implicitly computed when the preceeding + // fragment is layed out. + const MCSectionData *Prev = getSectionOrder()[Index - 1]; + return isFragmentUpToDate(&(Prev->getFragmentList().back())); +} + +bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const { + return (LastValidFragment && + F->getLayoutOrder() <= LastValidFragment->getLayoutOrder()); +} + void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { // We shouldn't have to do anything special to support negative slides, and it // is a perfectly valid thing to do as long as other parts of the system can @@ -73,6 +92,9 @@ } void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { + if (LastValidFragment == Src) + LastValidFragment = Dst; + Dst->Offset = Src->Offset; Dst->EffectiveSize = Src->EffectiveSize; } @@ -414,7 +436,8 @@ } void MCAsmLayout::LayoutFile() { - // Initialize the first section. + // Initialize the first section and set the valid fragment layout point. + LastValidFragment = 0; if (!getSectionOrder().empty()) getSectionOrder().front()->Address = 0; @@ -428,21 +451,32 @@ } void MCAsmLayout::LayoutFragment(MCFragment *F) { - uint64_t StartAddress = getSectionAddress(F->getParent()); + MCFragment *Prev = F->getPrevNode(); - // Get the fragment start address. - uint64_t Address = StartAddress; - MCSectionData::iterator it = F; - if (MCFragment *Prev = F->getPrevNode()) - Address = (StartAddress + getFragmentOffset(Prev) + - getFragmentEffectiveSize(Prev)); + // We should never try to recompute something which is up-to-date. + assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!"); + // We should never try to compute the fragment layout if the section isn't + // up-to-date. + assert(isSectionUpToDate(F->getParent()) && + "Attempt to compute fragment before it's section!"); + // We should never try to compute the fragment layout if it's predecessor + // isn't up-to-date. + assert((!Prev || isFragmentUpToDate(Prev)) && + "Attempt to compute fragment before it's predecessor!"); ++stats::FragmentLayouts; + // Compute the fragment start address. + uint64_t StartAddress = F->getParent()->Address; + uint64_t Address = StartAddress; + if (Prev) + Address += Prev->Offset + Prev->EffectiveSize; + // Compute fragment offset and size. F->Offset = Address - StartAddress; F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress, F->Offset); + LastValidFragment = F; // If this is the last fragment in a section, update the next section address. if (!F->getNextNode()) { From daniel at zuster.org Thu May 13 19:37:11 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 00:37:11 -0000 Subject: [llvm-commits] [llvm] r103750 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h lib/MC/MCAssembler.cpp Message-ID: <20100514003711.5FE87312800A@llvm.org> Author: ddunbar Date: Thu May 13 19:37:11 2010 New Revision: 103750 URL: http://llvm.org/viewvc/llvm-project?rev=103750&view=rev Log: MC: Change LayoutSection() to only do the section initializiation. Also, elimminate MCAsmLayout::set*, which are no longer needed. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103750&r1=103749&r2=103750&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Thu May 13 19:37:11 2010 @@ -58,8 +58,9 @@ /// been initialized. void LayoutFragment(MCFragment *Fragment); - /// \brief Performs layout for a single section, assuming that the previous - /// section has already been layed out correctly. + /// \brief Performs initial layout for a single section, assuming that the + /// previous section (including its fragments) has already been layed out + /// correctly. void LayoutSection(MCSectionData *SD); /// @name Section Access (in layout order) @@ -80,15 +81,9 @@ /// current layout. uint64_t getFragmentEffectiveSize(const MCFragment *F) const; - /// \brief Set the effective size of the given fragment. - void setFragmentEffectiveSize(MCFragment *F, uint64_t Value); - /// \brief Get the offset of the given fragment inside its containing section. uint64_t getFragmentOffset(const MCFragment *F) const; - /// \brief Set the offset of the given fragment inside its containing section. - void setFragmentOffset(MCFragment *F, uint64_t Value); - /// @} /// @name Section Layout Data /// @{ @@ -96,9 +91,6 @@ /// \brief Get the computed address of the given section. uint64_t getSectionAddress(const MCSectionData *SD) const; - /// \brief Set the computed address of the given section. - void setSectionAddress(MCSectionData *SD, uint64_t Value); - /// @} /// @name Utility Functions /// @{ Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103750&r1=103749&r2=103750&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 19:37:11 2010 @@ -87,19 +87,11 @@ return F->EffectiveSize; } -void MCAsmLayout::setFragmentEffectiveSize(MCFragment *F, uint64_t Value) { - F->EffectiveSize = Value; -} - uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { assert(F->Offset != ~UINT64_C(0) && "Address not set!"); return F->Offset; } -void MCAsmLayout::setFragmentOffset(MCFragment *F, uint64_t Value) { - F->Offset = Value; -} - uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const { assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!"); return getFragmentAddress(SD->getFragment()) + SD->getOffset(); @@ -110,12 +102,8 @@ return SD->Address; } -void MCAsmLayout::setSectionAddress(MCSectionData *SD, uint64_t Value) { - SD->Address = Value; -} - uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { - // Otherwise, the size is the last fragment's end offset. + // The size is the last fragment's end offset. const MCFragment &F = SD->getFragmentList().back(); return getFragmentOffset(&F) + getFragmentEffectiveSize(&F); } @@ -426,8 +414,14 @@ } void MCAsmLayout::LayoutFile() { - for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) - LayoutSection(getSectionOrder()[i]); + for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) { + MCSectionData *SD = getSectionOrder()[i]; + + LayoutSection(SD); + for (MCSectionData::iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) + LayoutFragment(it); + } } void MCAsmLayout::LayoutFragment(MCFragment *F) { @@ -443,12 +437,9 @@ ++stats::FragmentLayouts; // Compute fragment offset and size. - uint64_t Offset = Address - StartAddress; - uint64_t EffectiveSize = - getAssembler().ComputeFragmentSize(*this, *F, StartAddress, Offset); - - setFragmentOffset(F, Offset); - setFragmentEffectiveSize(F, EffectiveSize); + F->Offset = Address - StartAddress; + F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress, + F->Offset); } void MCAsmLayout::LayoutSection(MCSectionData *SD) { @@ -467,10 +458,7 @@ StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment()); // Set the section address. - setSectionAddress(SD, StartAddress); - - for (MCSectionData::iterator it = SD->begin(), ie = SD->end(); it != ie; ++it) - LayoutFragment(it); + SD->Address = StartAddress; } /// WriteFragmentData - Write the \arg F data to the output file. From daniel at zuster.org Thu May 13 19:51:14 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 00:51:14 -0000 Subject: [llvm-commits] [llvm] r103754 - in /llvm/trunk: include/llvm/MC/MCAsmLayout.h lib/MC/MCAssembler.cpp Message-ID: <20100514005114.75C51312800A@llvm.org> Author: ddunbar Date: Thu May 13 19:51:14 2010 New Revision: 103754 URL: http://llvm.org/viewvc/llvm-project?rev=103754&view=rev Log: MC: Switch to completely lazy layout. - The eliminates the last major algorithmic problem with MC. Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h llvm/trunk/lib/MC/MCAssembler.cpp Modified: llvm/trunk/include/llvm/MC/MCAsmLayout.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCAsmLayout.h?rev=103754&r1=103753&r2=103754&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCAsmLayout.h (original) +++ llvm/trunk/include/llvm/MC/MCAsmLayout.h Thu May 13 19:51:14 2010 @@ -41,6 +41,10 @@ /// lower ordinal will be up to date. mutable MCFragment *LastValidFragment; + /// \brief Make sure that the layout for the given fragment is valid, lazily + /// computing it if necessary. + void EnsureValid(const MCFragment *F) const; + bool isSectionUpToDate(const MCSectionData *SD) const; bool isFragmentUpToDate(const MCFragment *F) const; Modified: llvm/trunk/lib/MC/MCAssembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAssembler.cpp?rev=103754&r1=103753&r2=103754&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAssembler.cpp (original) +++ llvm/trunk/lib/MC/MCAssembler.cpp Thu May 13 19:51:14 2010 @@ -77,18 +77,38 @@ } void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) { - // We shouldn't have to do anything special to support negative slides, and it - // is a perfectly valid thing to do as long as other parts of the system can - // guarantee convergence. - assert(SlideAmount >= 0 && "Negative slides not yet supported"); + // If this fragment wasn't already up-to-date, we don't need to do anything. + if (!isFragmentUpToDate(F)) + return; - // Update the layout by simply recomputing the layout for the entire - // file. This is trivially correct, but very slow. - // - // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter. + // Otherwise, reset the last valid fragment to the predecessor of the + // invalidated fragment. + LastValidFragment = F->getPrevNode(); + if (!LastValidFragment) { + unsigned Index = F->getParent()->getLayoutOrder(); + if (Index != 0) { + MCSectionData *Prev = getSectionOrder()[Index - 1]; + LastValidFragment = &(Prev->getFragmentList().back()); + } + } +} - // Layout the sections in order. - LayoutFile(); +void MCAsmLayout::EnsureValid(const MCFragment *F) const { + // Advance the layout position until the fragment is up-to-date. + while (!isFragmentUpToDate(F)) { + // Advance to the next fragment. + MCFragment *Cur = LastValidFragment; + if (Cur) + Cur = Cur->getNextNode(); + if (!Cur) { + unsigned NextIndex = 0; + if (LastValidFragment) + NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1; + Cur = SectionOrder[NextIndex]->begin(); + } + + const_cast(this)->LayoutFragment(Cur); + } } void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) { @@ -105,11 +125,13 @@ } uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const { + EnsureValid(F); assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!"); return F->EffectiveSize; } uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { + EnsureValid(F); assert(F->Offset != ~UINT64_C(0) && "Address not set!"); return F->Offset; } @@ -120,6 +142,7 @@ } uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const { + EnsureValid(SD->begin()); assert(SD->Address != ~UINT64_C(0) && "Address not set!"); return SD->Address; } @@ -436,18 +459,11 @@ } void MCAsmLayout::LayoutFile() { - // Initialize the first section and set the valid fragment layout point. + // Initialize the first section and set the valid fragment layout point. All + // actual layout computations are done lazily. LastValidFragment = 0; if (!getSectionOrder().empty()) getSectionOrder().front()->Address = 0; - - for (unsigned i = 0, e = getSectionOrder().size(); i != e; ++i) { - MCSectionData *SD = getSectionOrder()[i]; - - for (MCSectionData::iterator it = SD->begin(), - ie = SD->end(); it != ie; ++it) - LayoutFragment(it); - } } void MCAsmLayout::LayoutFragment(MCFragment *F) { From nicholas at mxc.ca Thu May 13 20:02:48 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Fri, 14 May 2010 01:02:48 -0000 Subject: [llvm-commits] [llvm] r103755 - /llvm/trunk/include/llvm/MC/MCObjectWriter.h Message-ID: <20100514010248.925B7312800A@llvm.org> Author: nicholas Date: Thu May 13 20:02:48 2010 New Revision: 103755 URL: http://llvm.org/viewvc/llvm-project?rev=103755&view=rev Log: Fix typo. Modified: llvm/trunk/include/llvm/MC/MCObjectWriter.h Modified: llvm/trunk/include/llvm/MC/MCObjectWriter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCObjectWriter.h?rev=103755&r1=103754&r2=103755&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCObjectWriter.h (original) +++ llvm/trunk/include/llvm/MC/MCObjectWriter.h Thu May 13 20:02:48 2010 @@ -78,7 +78,7 @@ /// Write the object file. /// /// This routine is called by the assembler after layout and relaxation is - /// complete, fixups have been evaluate and applied, and relocations + /// complete, fixups have been evaluated and applied, and relocations /// generated. virtual void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) = 0; From espindola at google.com Thu May 13 20:34:18 2010 From: espindola at google.com (Rafael Espindola) Date: Thu, 13 May 2010 18:34:18 -0700 Subject: [llvm-commits] [patch] Rename the gold plugin Message-ID: With the attached patch we now have a file name LLVMgold.so both in the build and install directories. There are two likely related bugs that a intend to try to fix once this is in: *) We rebuild the thing at install time *) We bulid a useless .a file Cheers, -- Rafael ?vila de Esp?ndola -------------- next part -------------- A non-text attachment was scrubbed... Name: gold.patch Type: application/octet-stream Size: 804 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100513/405c59cc/attachment.obj From echristo at apple.com Thu May 13 20:38:54 2010 From: echristo at apple.com (Eric Christopher) Date: Fri, 14 May 2010 01:38:54 -0000 Subject: [llvm-commits] [llvm] r103756 - /llvm/trunk/lib/MC/MCParser/AsmParser.cpp Message-ID: <20100514013854.9CA80312800A@llvm.org> Author: echristo Date: Thu May 13 20:38:54 2010 New Revision: 103756 URL: http://llvm.org/viewvc/llvm-project?rev=103756&view=rev Log: Fix a couple of typos. Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=103756&r1=103755&r2=103756&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original) +++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Thu May 13 20:38:54 2010 @@ -1316,7 +1316,7 @@ return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " "be less than zero"); - // NOTE: The alignment in the directive is a power of 2 value, the assember + // NOTE: The alignment in the directive is a power of 2 value, the assembler // may internally end up wanting an alignment in bytes. // FIXME: Diagnose overflow. if (Pow2Alignment < 0) @@ -1406,7 +1406,7 @@ return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " "than zero"); - // NOTE: The alignment in the directive is a power of 2 value, the assember + // NOTE: The alignment in the directive is a power of 2 value, the assembler // may internally end up wanting an alignment in bytes. // FIXME: Diagnose overflow. if (Pow2Alignment < 0) From echristo at apple.com Thu May 13 20:50:28 2010 From: echristo at apple.com (Eric Christopher) Date: Fri, 14 May 2010 01:50:28 -0000 Subject: [llvm-commits] [llvm] r103757 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h include/llvm/MC/MCStreamer.h lib/MC/MCAsmStreamer.cpp lib/MC/MCMachOStreamer.cpp lib/MC/MCNullStreamer.cpp lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_tbss.s Message-ID: <20100514015028.A8EC4312800A@llvm.org> Author: echristo Date: Thu May 13 20:50:28 2010 New Revision: 103757 URL: http://llvm.org/viewvc/llvm-project?rev=103757&view=rev Log: Add AsmParser support for darwin tbss directive. Nothing uses this yet. Added: llvm/trunk/test/MC/AsmParser/directive_tbss.s Modified: llvm/trunk/include/llvm/MC/MCParser/AsmParser.h llvm/trunk/include/llvm/MC/MCStreamer.h llvm/trunk/lib/MC/MCAsmStreamer.cpp llvm/trunk/lib/MC/MCMachOStreamer.cpp llvm/trunk/lib/MC/MCNullStreamer.cpp llvm/trunk/lib/MC/MCParser/AsmParser.cpp Modified: llvm/trunk/include/llvm/MC/MCParser/AsmParser.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCParser/AsmParser.h?rev=103757&r1=103756&r2=103757&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCParser/AsmParser.h (original) +++ llvm/trunk/include/llvm/MC/MCParser/AsmParser.h Thu May 13 20:50:28 2010 @@ -136,6 +136,7 @@ bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill" + bool ParseDirectiveDarwinTBSS(); // Darwin specific ".tbss" // Darwin specific ".subsections_via_symbols" bool ParseDirectiveDarwinSubsectionsViaSymbols(); Modified: llvm/trunk/include/llvm/MC/MCStreamer.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/MC/MCStreamer.h?rev=103757&r1=103756&r2=103757&view=diff ============================================================================== --- llvm/trunk/include/llvm/MC/MCStreamer.h (original) +++ llvm/trunk/include/llvm/MC/MCStreamer.h Thu May 13 20:50:28 2010 @@ -188,6 +188,14 @@ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0,unsigned ByteAlignment = 0) = 0; + /// EmitTBSSSymbol - Emit a thread local bss (.tbss) symbol. + /// + /// @param Symbol - The thread local common symbol to emit. + /// @param Size - The size of the symbol. + /// @param ByteAlignment - The alignment of the thread local common symbol + /// if non-zero. This must be a power of 2 on some targets. + virtual void EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment = 0) = 0; /// @} /// @name Generating Data /// @{ Modified: llvm/trunk/lib/MC/MCAsmStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAsmStreamer.cpp?rev=103757&r1=103756&r2=103757&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAsmStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCAsmStreamer.cpp Thu May 13 20:50:28 2010 @@ -126,6 +126,9 @@ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol (MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment = 0); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); @@ -360,6 +363,21 @@ EmitEOL(); } +// .tbss sym$tlv$init, size, align +void MCAsmStreamer::EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); + OS << ".tbss "; + + // This is a mach-o specific directive and the name requires some mangling. + OS << *Symbol << "$tlv$init, " << Size; + + // Output align if we have it. + if (ByteAlignment != 0) OS << ", " << Log2_32(ByteAlignment); + + EmitEOL(); +} + static inline char toOctal(int X) { return (X&7)+'0'; } static void PrintQuotedString(StringRef Data, raw_ostream &OS) { Modified: llvm/trunk/lib/MC/MCMachOStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCMachOStreamer.cpp?rev=103757&r1=103756&r2=103757&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCMachOStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCMachOStreamer.cpp Thu May 13 20:50:28 2010 @@ -126,6 +126,8 @@ } virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); + virtual void EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment = 0); virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace); virtual void EmitGPRel32Value(const MCExpr *Value) { @@ -337,6 +339,11 @@ SectData.setAlignment(ByteAlignment); } +void MCMachOStreamer::EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + assert(false && "Implement me!"); +} + void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); } Modified: llvm/trunk/lib/MC/MCNullStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCNullStreamer.cpp?rev=103757&r1=103756&r2=103757&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCNullStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCNullStreamer.cpp Thu May 13 20:50:28 2010 @@ -55,7 +55,8 @@ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0) {} - + virtual void EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} virtual void EmitValue(const MCExpr *Value, unsigned Size, Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=103757&r1=103756&r2=103757&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original) +++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Thu May 13 20:50:28 2010 @@ -703,6 +703,8 @@ return ParseDirectiveDarwinSymbolDesc(); if (IDVal == ".lsym") return ParseDirectiveDarwinLsym(); + if (IDVal == ".tbss") + return ParseDirectiveDarwinTBSS(); if (IDVal == ".subsections_via_symbols") return ParseDirectiveDarwinSubsectionsViaSymbols(); @@ -1427,6 +1429,61 @@ return false; } +/// ParseDirectiveDarwinTBSS +/// ::= .tbss identifier, size, align +bool AsmParser::ParseDirectiveDarwinTBSS() { + SMLoc IDLoc = Lexer.getLoc(); + StringRef Name; + if (ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Demangle the name output. The trailing characters are guaranteed to be + // $tlv$init so just strip that off. + StringRef DemName = Name.substr(0, Name.size() - strlen("$tlv$init")); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = CreateSymbol(DemName); + + if (Lexer.isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (Lexer.is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = Lexer.getLoc(); + if (ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (Lexer.isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.tbss' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" + "zero"); + + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" + "than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + Out.EmitTBSSSymbol(Sym, Size, Pow2Alignment ? 1 << Pow2Alignment : 0); + + return false; +} + /// ParseDirectiveDarwinSubsectionsViaSymbols /// ::= .subsections_via_symbols bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { Added: llvm/trunk/test/MC/AsmParser/directive_tbss.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/directive_tbss.s?rev=103757&view=auto ============================================================================== --- llvm/trunk/test/MC/AsmParser/directive_tbss.s (added) +++ llvm/trunk/test/MC/AsmParser/directive_tbss.s Thu May 13 20:50:28 2010 @@ -0,0 +1,7 @@ +# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s + +# CHECK: .tbss _a$tlv$init, 4 +# CHECK: .tbss _b$tlv$init, 4, 3 + +.tbss _a$tlv$init, 4 +.tbss _b$tlv$init, 4, 3 From evan.cheng at apple.com Thu May 13 21:13:42 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 02:13:42 -0000 Subject: [llvm-commits] [llvm] r103760 - in /llvm/trunk: lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMBaseRegisterInfo.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMRegisterInfo.h lib/Target/ARM/ARMRegisterInfo.td utils/TableGen/EDEmitter.cpp Message-ID: <20100514021342.4C294312800A@llvm.org> Author: evancheng Date: Thu May 13 21:13:41 2010 New Revision: 103760 URL: http://llvm.org/viewvc/llvm-project?rev=103760&view=rev Log: Added a QQQQ register file to model 4-consecutive Q registers. Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td llvm/trunk/utils/TableGen/EDEmitter.cpp Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Thu May 13 21:13:41 2010 @@ -680,12 +680,10 @@ SrcRC == ARM::QPR_8RegisterClass) SrcRC = ARM::QPRRegisterClass; - // Allow QQPR / QQPR_VFP2 / QQPR_8 cross-class copies. - if (DestRC == ARM::QQPR_VFP2RegisterClass || - DestRC == ARM::QQPR_8RegisterClass) + // Allow QQPR / QQPR_VFP2 cross-class copies. + if (DestRC == ARM::QQPR_VFP2RegisterClass) DestRC = ARM::QQPRRegisterClass; - if (SrcRC == ARM::QQPR_VFP2RegisterClass || - SrcRC == ARM::QQPR_8RegisterClass) + if (SrcRC == ARM::QQPR_VFP2RegisterClass) SrcRC = ARM::QQPRRegisterClass; // Disallow copies of unequal sizes. @@ -714,6 +712,8 @@ Opc = ARM::VMOVQ; else if (DestRC == ARM::QQPRRegisterClass) Opc = ARM::VMOVQQ; + else if (DestRC == ARM::QQQQPRRegisterClass) + Opc = ARM::VMOVQQQQ; else return false; @@ -787,11 +787,10 @@ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else { - assert((RC == ARM::QQPRRegisterClass || - RC == ARM::QQPR_VFP2RegisterClass || - RC == ARM::QQPR_8RegisterClass) && "Unknown regclass!"); + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + // FIXME: It's possible to only store part of the QQ register if the + // spilled def has a sub-register index. MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) .addFrameIndex(FI).addImm(128); MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_0, getKillRegState(isKill), TRI); @@ -810,6 +809,21 @@ MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_2, 0, TRI); AddDReg(MIB, SrcReg, ARM::DSUBREG_3, 0, TRI); } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_1, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_2, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_3, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_4, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_5, 0, TRI); + MIB = AddDReg(MIB, SrcReg, ARM::DSUBREG_6, 0, TRI); + AddDReg(MIB, SrcReg, ARM::DSUBREG_7, 0, TRI); } } @@ -858,10 +872,7 @@ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else { - assert((RC == ARM::QQPRRegisterClass || - RC == ARM::QQPR_VFP2RegisterClass || - RC == ARM::QQPR_8RegisterClass) && "Unknown regclass!"); + } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_0, RegState::Define, TRI); @@ -880,6 +891,21 @@ MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_2, RegState::Define, TRI); AddDReg(MIB, DestReg, ARM::DSUBREG_3, RegState::Define, TRI); } + } else { + assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::DSUBREG_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::DSUBREG_7, RegState::Define, TRI); } } @@ -1105,6 +1131,8 @@ return true; } + // FIXME: VMOVQQ and VMOVQQQQ? + return false; } Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Thu May 13 21:13:41 2010 @@ -279,10 +279,14 @@ return &ARM::QPR_VFP2RegClass; } - assert(A->getSize() == 32 && "Expecting a QQ register class!"); - if (B == &ARM::SPR_8RegClass) - return &ARM::QQPR_8RegClass; - return &ARM::QQPR_VFP2RegClass; + if (A->getSize() == 32) { + if (B == &ARM::SPR_8RegClass) + return 0; // Do not allow coalescing! + return &ARM::QQPR_VFP2RegClass; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + return 0; // Do not allow coalescing! } case 5: case 6: @@ -293,26 +297,55 @@ if (B == &ARM::DPR_VFP2RegClass) return &ARM::QPR_VFP2RegClass; if (B == &ARM::DPR_8RegClass) - return &ARM::QPR_8RegClass; + return 0; // Do not allow coalescing! return A; } - assert(A->getSize() == 32 && "Expecting a QQ register class!"); - if (B == &ARM::DPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::DPR_8RegClass) - return &ARM::QQPR_8RegClass; + if (A->getSize() == 32) { + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B != &ARM::DPRRegClass) + return 0; // Do not allow coalescing! return A; } case 9: - case 10: { + case 10: + case 11: + case 12: { + // D sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::DPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + + case 13: + case 14: { // Q sub-registers. - assert(A->getSize() == 32 && "Expecting a QQ register class!"); - if (B == &ARM::QPR_VFP2RegClass) - return &ARM::QQPR_VFP2RegClass; - if (B == &ARM::QPR_8RegClass) - return &ARM::QQPR_8RegClass; - return A; + if (A->getSize() == 32) { + if (B == &ARM::QPR_VFP2RegClass) + return &ARM::QQPR_VFP2RegClass; + if (B == &ARM::QPR_8RegClass) + return 0; // Do not allow coalescing! + return A; + } + + assert(A->getSize() == 64 && "Expecting a QQQQ register class!"); + if (B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! + } + case 15: + case 16: { + // Q sub-registers of QQQQ registers. + if (A->getSize() == 64 && B == &ARM::QPRRegClass) + return A; + return 0; // Do not allow coalescing! } } return 0; Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu May 13 21:13:41 2010 @@ -267,9 +267,11 @@ addQRTypeForNEON(MVT::v2i64); // Map v4i64 to QQ registers but do not make the type legal for any - // operations. v4i64 is only used for REG_SEQUENCE to load / store quad + // operations. Similarly map v8i64 to QQQQ registers. v4i64 and v8i64 are + // only used for REG_SEQUENCE to load / store 4 to 8 consecutive // D registers. addRegisterClass(MVT::v4i64, ARM::QQPRRegisterClass); + addRegisterClass(MVT::v8i64, ARM::QQQQPRRegisterClass); // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Thu May 13 21:13:41 2010 @@ -2816,10 +2816,13 @@ def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>; -// Pseudo vector move instruction for QQ (a pair of Q) registers. This should +// Pseudo vector move instructions for QQ and QQQQ registers. This should // be expanded after register allocation is completed. def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), NoItinerary, "@ vmov\t$dst, $src", []>; + +def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), + NoItinerary, "@ vmov\t$dst, $src", []>; } // neverHasSideEffects // VMOV : Vector Move (Immediate) Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h Thu May 13 21:13:41 2010 @@ -28,9 +28,10 @@ /// these indices must be kept in sync with the class indices in the /// ARMRegisterInfo.td file. enum SubregIndex { - SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, - DSUBREG_0 = 5, DSUBREG_1 = 6, DSUBREG_2 = 7, DSUBREG_3 = 8, - QSUBREG_0 = 9, QSUBREG_1 = 10 + SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, + DSUBREG_0 = 5, DSUBREG_1 = 6, DSUBREG_2 = 7, DSUBREG_3 = 8, + DSUBREG_4 = 9, DSUBREG_5 = 10, DSUBREG_6 = 11, DSUBREG_7 = 12, + QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16 }; } Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Thu May 13 21:13:41 2010 @@ -87,6 +87,7 @@ def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">; def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; +def DDummy : ARMFReg<31, "dINVALID">; // Advanced SIMD (NEON) defines 16 quad-word aliases def Q0 : ARMReg< 0, "q0", [D0, D1]>; @@ -105,6 +106,7 @@ def Q13 : ARMReg<13, "q13", [D26, D27]>; def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; +def QDummy : ARMFReg<16, "qINVALID">; // Pseudo 256-bit registers to represent pairs of Q registers. These should // never be present in the emitted code. @@ -122,6 +124,12 @@ def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; +// Pseudo 512-bit registers to represent four consecutive Q registers. +def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; +def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; +def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; +def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; + // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; @@ -358,34 +366,56 @@ let SubRegClassList = [SPR_8, SPR_8]; } +// Dummy 64-bit regclass to represent impossible subreg indices. +def DPR_INVALID : RegisterClass<"ARM", + [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, + [DDummy]> { + let CopyCost = -1; +} + // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> { let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, - DPR, DPR]; + DPR, DPR, DPR_INVALID, DPR_INVALID, + DPR_INVALID, DPR_INVALID, DPR_INVALID, DPR_INVALID]; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> { - let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2]; + let SubRegClassList = [SPR, SPR, SPR, SPR, + DPR_VFP2, DPR_VFP2, DPR_INVALID, DPR_INVALID, + DPR_INVALID, DPR_INVALID, DPR_INVALID, DPR_INVALID]; } // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, [Q0, Q1, Q2, Q3]> { - let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8]; + let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, + DPR_8, DPR_8, DPR_INVALID, DPR_INVALID, + DPR_INVALID, DPR_INVALID, DPR_INVALID, DPR_INVALID]; } -// Pseudo 256-bit vector register class to model pairs of Q registers. +// Dummy 128-bit regclass to represent impossible subreg indices. +def QPR_INVALID : RegisterClass<"ARM", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, + [QDummy]> { + let CopyCost = -1; +} + +// Pseudo 256-bit vector register class to model pairs of Q registers +// (4 consecutive D registers). def QQPR : RegisterClass<"ARM", [v4i64], 256, [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> { let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, - DPR, DPR, DPR, DPR, QPR, QPR]; + DPR, DPR, DPR, DPR, + DPR_INVALID, DPR_INVALID, DPR_INVALID, DPR_INVALID, + QPR, QPR, QPR_INVALID, QPR_INVALID]; } // Subset of QQPR that have 32-bit SPR subregs. @@ -394,16 +424,18 @@ [QQ0, QQ1, QQ2, QQ3]> { let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2, DPR_VFP2, DPR_VFP2, - QPR_VFP2, QPR_VFP2]; + DPR_INVALID, DPR_INVALID, DPR_INVALID, DPR_INVALID, + QPR_VFP2, QPR_VFP2, QPR_INVALID, QPR_INVALID]; } -// Subset of QQPR that have QPR_8, DPR_8, and SPR_8 subregs. -def QQPR_8 : RegisterClass<"ARM", [v4i64], - 256, - [QQ0, QQ1]> { - let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, - DPR_8, DPR_8, DPR_8, DPR_8, - QPR_8, QPR_8]; +// Pseudo 512-bit vector register class to model 4 consecutive Q registers +// (8 consecutive D registers). +def QQQQPR : RegisterClass<"ARM", [v8i64], + 256, + [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> { + let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID, + DPR, DPR, DPR, DPR, DPR, DPR, DPR, DPR, + QPR, QPR, QPR, QPR]; } // Condition code registers. @@ -418,12 +450,20 @@ def arm_ssubreg_1 : PatLeaf<(i32 2)>; def arm_ssubreg_2 : PatLeaf<(i32 3)>; def arm_ssubreg_3 : PatLeaf<(i32 4)>; + def arm_dsubreg_0 : PatLeaf<(i32 5)>; def arm_dsubreg_1 : PatLeaf<(i32 6)>; def arm_dsubreg_2 : PatLeaf<(i32 7)>; def arm_dsubreg_3 : PatLeaf<(i32 8)>; -def arm_qsubreg_0 : PatLeaf<(i32 9)>; -def arm_qsubreg_1 : PatLeaf<(i32 10)>; +def arm_dsubreg_4 : PatLeaf<(i32 9)>; +def arm_dsubreg_5 : PatLeaf<(i32 10)>; +def arm_dsubreg_6 : PatLeaf<(i32 11)>; +def arm_dsubreg_7 : PatLeaf<(i32 12)>; + +def arm_qsubreg_0 : PatLeaf<(i32 13)>; +def arm_qsubreg_1 : PatLeaf<(i32 14)>; +def arm_qsubreg_2 : PatLeaf<(i32 15)>; +def arm_qsubreg_3 : PatLeaf<(i32 16)>; // S sub-registers of D registers. def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, @@ -478,7 +518,37 @@ [D3, D7, D11, D15, D19, D23, D27, D31]>; // Q sub-registers of QQ registers. -def : SubRegSet<9, [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7], +def : SubRegSet<13, [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7], [Q0, Q2, Q4, Q6, Q8, Q10, Q12, Q14]>; -def : SubRegSet<10,[QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7], +def : SubRegSet<14,[QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7], [Q1, Q3, Q5, Q7, Q9, Q11, Q13, Q15]>; + + +// D sub-registers of QQQQ registers. +def : SubRegSet<5, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D0, D8, D16, D24]>; +def : SubRegSet<6, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D1, D9, D17, D25]>; +def : SubRegSet<7, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D2, D10, D18, D26]>; +def : SubRegSet<8, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D3, D11, D19, D27]>; + +def : SubRegSet<9, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D4, D12, D20, D28]>; +def : SubRegSet<10, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D5, D13, D21, D29]>; +def : SubRegSet<11, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D6, D14, D22, D30]>; +def : SubRegSet<12, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [D7, D15, D23, D31]>; + +// Q sub-registers of QQQQQQQQ registers. +def : SubRegSet<13, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [Q0, Q4, Q8, Q12]>; +def : SubRegSet<14, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [Q1, Q5, Q9, Q13]>; +def : SubRegSet<15, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [Q2, Q6, Q10, Q14]>; +def : SubRegSet<16, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [Q3, Q7, Q11, Q15]>; Modified: llvm/trunk/utils/TableGen/EDEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/EDEmitter.cpp?rev=103760&r1=103759&r2=103760&view=diff ============================================================================== --- llvm/trunk/utils/TableGen/EDEmitter.cpp (original) +++ llvm/trunk/utils/TableGen/EDEmitter.cpp Thu May 13 21:13:41 2010 @@ -579,6 +579,7 @@ REG("SPR"); REG("QPR"); REG("QQPR"); + REG("QQQQPR"); IMM("i32imm"); IMM("bf_inv_mask_imm"); From stoklund at 2pi.dk Thu May 13 23:30:51 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 04:30:51 -0000 Subject: [llvm-commits] [llvm] r103764 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514043051.5CA31312800A@llvm.org> Author: stoklund Date: Thu May 13 23:30:51 2010 New Revision: 103764 URL: http://llvm.org/viewvc/llvm-project?rev=103764&view=rev Log: Enable opportunistic coalescing Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103764&r1=103763&r2=103764&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Thu May 13 23:30:51 2010 @@ -603,6 +603,7 @@ reservePhysReg(MBB, MII, *I); SmallVector VirtKills, PhysKills, PhysDefs; + SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. while (MII != MBB.end()) { @@ -706,8 +707,7 @@ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isUse()) { unsigned PhysReg = reloadVirtReg(MBB, MI, i, Reg, CopyDst); - if (CopySrc == Reg) - CopySrc = PhysReg; + CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; setPhysReg(MO, PhysReg); if (MO.isKill()) VirtKills.push_back(Reg); @@ -757,11 +757,12 @@ PhysKills.push_back(Reg); continue; } - if (MO.isDead()) - VirtKills.push_back(Reg); unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg, CopySrc); - if (CopyDst == Reg) - CopyDst = PhysReg; + if (MO.isDead()) { + VirtKills.push_back(Reg); + CopyDst = 0; // cancel coalescing; + } else + CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; setPhysReg(MO, PhysReg); } @@ -783,7 +784,12 @@ MRI->addPhysRegsUsed(UsedInInstr); - DEBUG(dbgs() << "<< " << *MI); + if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { + DEBUG(dbgs() << "-- coalescing: " << *MI); + Coalesced.push_back(MI); + } else { + DEBUG(dbgs() << "<< " << *MI); + } } // Spill all physical registers holding virtual registers now. @@ -795,6 +801,11 @@ spillVirtReg(MBB, MI, i, true); LiveVirtRegs.clear(); + // Erase all the coalesced copies. We are delaying it until now because + // LiveVirtsRegs might refer to the instrs. + for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) + MBB.erase(Coalesced[i]); + DEBUG(MBB.dump()); } From daniel at zuster.org Thu May 13 23:31:50 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 04:31:50 -0000 Subject: [llvm-commits] [llvm] r103765 - in /llvm/trunk: lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp test/Other/inline-asm-newline-terminator.ll Message-ID: <20100514043150.C06E9312800A@llvm.org> Author: ddunbar Date: Thu May 13 23:31:50 2010 New Revision: 103765 URL: http://llvm.org/viewvc/llvm-project?rev=103765&view=rev Log: Inline Asm: Ensure buffer is newline terminated to match how the text is printed. - This is a hack, but I can't decide the best place to handle this. Chris? Added: llvm/trunk/test/Other/inline-asm-newline-terminator.ll Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp?rev=103765&r1=103764&r2=103765&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Thu May 13 23:31:50 2010 @@ -53,6 +53,17 @@ } SourceMgr SrcMgr; + + // Ensure the buffer is newline terminated. + char *TmpString = 0; + if (Str.back() != '\n') { + TmpString = new char[Str.size() + 2]; + memcpy(TmpString, Str.data(), Str.size()); + TmpString[Str.size()] = '\n'; + TmpString[Str.size() + 1] = 0; + isNullTerminated = true; + Str = TmpString; + } // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); @@ -84,6 +95,9 @@ /*NoFinalize*/ true); if (Res && !HasDiagHandler) report_fatal_error("Error parsing inline asm\n"); + + if (TmpString) + delete[] TmpString; } Added: llvm/trunk/test/Other/inline-asm-newline-terminator.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/inline-asm-newline-terminator.ll?rev=103765&view=auto ============================================================================== --- llvm/trunk/test/Other/inline-asm-newline-terminator.ll (added) +++ llvm/trunk/test/Other/inline-asm-newline-terminator.ll Thu May 13 23:31:50 2010 @@ -0,0 +1,6 @@ +; RUN: llc -filetype=obj -o - < %s + +; ModuleID = 't.c' +target triple = "x86_64-apple-darwin10.0.0" + +module asm ".desc _f0, 0x10" From evan.cheng at apple.com Fri May 14 02:37:40 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 07:37:40 -0000 Subject: [llvm-commits] [llvm] r103768 - /llvm/trunk/include/llvm/Target/TargetLowering.h Message-ID: <20100514073740.986E3312800A@llvm.org> Author: evancheng Date: Fri May 14 02:37:40 2010 New Revision: 103768 URL: http://llvm.org/viewvc/llvm-project?rev=103768&view=rev Log: Get rid of the bit twiddling to read / set OpActions and ValueTypeActions. The small saving in memory isn't worth the increase in runtime and code complexity in my opinion. Modified: llvm/trunk/include/llvm/Target/TargetLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103768&r1=103767&r2=103768&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri May 14 02:37:40 2010 @@ -180,11 +180,9 @@ } class ValueTypeActionImpl { - /// ValueTypeActions - This is a bitvector that contains two bits for each - /// value type, where the two bits correspond to the LegalizeAction enum. - /// This can be queried with "getTypeAction(VT)". - /// dimension by (MVT::MAX_ALLOWED_VALUETYPE/32) * 2 - uint32_t ValueTypeActions[(MVT::MAX_ALLOWED_VALUETYPE/32)*2]; + /// ValueTypeActions - For each value type, keep a LegalizeAction enum + /// that indicates how instruction selection should deal with the type. + uint8_t ValueTypeActions[MVT::LAST_VALUETYPE]; public: ValueTypeActionImpl() { std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0); @@ -201,15 +199,11 @@ return Legal; } unsigned I = VT.getSimpleVT().SimpleTy; - assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0])); - unsigned Mask = (unsigned)MVT::MAX_ALLOWED_VALUETYPE-1; - return (LegalizeAction)((ValueTypeActions[I>>4] >> ((2*I) & Mask)) & 3); + return (LegalizeAction)ValueTypeActions[I]; } void setTypeAction(EVT VT, LegalizeAction Action) { unsigned I = VT.getSimpleVT().SimpleTy; - assert(I<4*array_lengthof(ValueTypeActions)*sizeof(ValueTypeActions[0])); - unsigned Mask = (unsigned)MVT::MAX_ALLOWED_VALUETYPE-1; - ValueTypeActions[I>>4] |= Action << ((I*2) & Mask); + ValueTypeActions[I] = Action; } }; @@ -358,13 +352,9 @@ /// for it. LegalizeAction getOperationAction(unsigned Op, EVT VT) const { if (VT.isExtended()) return Expand; - assert(Op < array_lengthof(OpActions[0]) && - (unsigned)VT.getSimpleVT().SimpleTy < sizeof(OpActions[0][0])*8 && - "Table isn't big enough!"); + assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; - unsigned J = I & ((unsigned)MVT::MAX_ALLOWED_VALUETYPE-1); - I = I >> 5; - return (LegalizeAction)((OpActions[I][Op] >> (J*2)) & 3); + return (LegalizeAction)OpActions[I][Op]; } /// isOperationLegalOrCustom - Return true if the specified operation is @@ -988,11 +978,8 @@ /// with the specified type and indicate what to do about it. void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { - unsigned I = (unsigned)VT.SimpleTy; - unsigned J = I & ((unsigned)MVT::MAX_ALLOWED_VALUETYPE - 1); - I = I >> 5; - OpActions[I][Op] &= ~(uint64_t(3UL) << (J*2)); - OpActions[I][Op] |= (uint64_t)Action << (J*2); + assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); + OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action; } /// setLoadExtAction - Indicate that the specified load with extension does @@ -1593,10 +1580,7 @@ /// Most operations are Legal (aka, supported natively by the target), but /// operations that are not should be described. Note that operations on /// non-legal value types are not described here. - /// This array is accessed using VT.getSimpleVT(), so it is subject to - /// the MVT::MAX_ALLOWED_VALUETYPE * 2 bits. - uint64_t OpActions[MVT::MAX_ALLOWED_VALUETYPE/(sizeof(uint64_t)*4)] - [ISD::BUILTIN_OP_END]; + uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; /// LoadExtActions - For each load extension type and each value type, /// keep a LegalizeAction that indicates how instruction selection should deal From daniel at zuster.org Fri May 14 02:47:51 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 07:47:51 -0000 Subject: [llvm-commits] [llvm] r103769 - in /llvm/trunk: lib/CodeGen/LLVMTargetMachine.cpp test/Other/inline-asm-newline-terminator.ll Message-ID: <20100514074751.F3D21312800A@llvm.org> Author: ddunbar Date: Fri May 14 02:47:51 2010 New Revision: 103769 URL: http://llvm.org/viewvc/llvm-project?rev=103769&view=rev Log: XFAIL the test I added with vg_leak, apparently it is the first and only llc -filetype=obj test, and -filetype=obj leaks a few objects. Added a FIXME, we need to sort out the ownership model for the various MC objects. Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp llvm/trunk/test/Other/inline-asm-newline-terminator.ll Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=103769&r1=103768&r2=103769&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original) +++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Fri May 14 02:47:51 2010 @@ -140,6 +140,8 @@ case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. + // + // FIXME: These are currently leaked. MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context); TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); if (MCE == 0 || TAB == 0) Modified: llvm/trunk/test/Other/inline-asm-newline-terminator.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Other/inline-asm-newline-terminator.ll?rev=103769&r1=103768&r2=103769&view=diff ============================================================================== --- llvm/trunk/test/Other/inline-asm-newline-terminator.ll (original) +++ llvm/trunk/test/Other/inline-asm-newline-terminator.ll Fri May 14 02:47:51 2010 @@ -1,4 +1,5 @@ ; RUN: llc -filetype=obj -o - < %s +; XFAIL: vg_leak ; ModuleID = 't.c' target triple = "x86_64-apple-darwin10.0.0" From gohman at apple.com Fri May 14 10:29:31 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 15:29:31 -0000 Subject: [llvm-commits] [llvm] r103772 - in /llvm/trunk/include/llvm/Support: DOTGraphTraits.h GraphWriter.h Message-ID: <20100514152931.B50A5312800A@llvm.org> Author: djg Date: Fri May 14 10:29:31 2010 New Revision: 103772 URL: http://llvm.org/viewvc/llvm-project?rev=103772&view=rev Log: Add an isNodeHidden to the graph traits, to support definition of subgraph views. Modified: llvm/trunk/include/llvm/Support/DOTGraphTraits.h llvm/trunk/include/llvm/Support/GraphWriter.h Modified: llvm/trunk/include/llvm/Support/DOTGraphTraits.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/DOTGraphTraits.h?rev=103772&r1=103771&r2=103772&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/DOTGraphTraits.h (original) +++ llvm/trunk/include/llvm/Support/DOTGraphTraits.h Fri May 14 10:29:31 2010 @@ -59,6 +59,12 @@ return false; } + /// isNodeHidden - If thie function returns true, the given node is not + /// displayed in the graph. + static bool isNodeHidden(const void *Node) { + return false; + } + /// getNodeLabel - Given a node and a pointer to the top level graph, return /// the label to print in the node. template Modified: llvm/trunk/include/llvm/Support/GraphWriter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/GraphWriter.h?rev=103772&r1=103771&r2=103772&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/GraphWriter.h (original) +++ llvm/trunk/include/llvm/Support/GraphWriter.h Fri May 14 10:29:31 2010 @@ -122,7 +122,20 @@ // Loop over the graph, printing it out... for (node_iterator I = GTraits::nodes_begin(G), E = GTraits::nodes_end(G); I != E; ++I) - writeNode(*I); + if (!isNodeHidden(*I)) + writeNode(*I); + } + + bool isNodeHidden(NodeType &Node) { + return isNodeHidden(&Node); + } + + bool isNodeHidden(NodeType *const *Node) { + return isNodeHidden(*Node); + } + + bool isNodeHidden(NodeType *Node) { + return DTraits.isNodeHidden(Node); } void writeNode(NodeType& Node) { @@ -189,9 +202,11 @@ child_iterator EI = GTraits::child_begin(Node); child_iterator EE = GTraits::child_end(Node); for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) - writeEdge(Node, i, EI); + if (!DTraits.isNodeHidden(*EI)) + writeEdge(Node, i, EI); for (; EI != EE; ++EI) - writeEdge(Node, 64, EI); + if (!DTraits.isNodeHidden(*EI)) + writeEdge(Node, 64, EI); } void writeEdge(NodeType *Node, unsigned edgeidx, child_iterator EI) { From gohman at apple.com Fri May 14 10:35:10 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 15:35:10 -0000 Subject: [llvm-commits] [llvm] r103773 - /llvm/trunk/include/llvm/Support/StandardPasses.h Message-ID: <20100514153510.A1AC0312800A@llvm.org> Author: djg Date: Fri May 14 10:35:10 2010 New Revision: 103773 URL: http://llvm.org/viewvc/llvm-project?rev=103773&view=rev Log: Use PassManagerBase, to give clients the option of using either FunctionPassManager or regular PassManager. Modified: llvm/trunk/include/llvm/Support/StandardPasses.h Modified: llvm/trunk/include/llvm/Support/StandardPasses.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/StandardPasses.h?rev=103773&r1=103772&r2=103773&view=diff ============================================================================== --- llvm/trunk/include/llvm/Support/StandardPasses.h (original) +++ llvm/trunk/include/llvm/Support/StandardPasses.h Fri May 14 10:35:10 2010 @@ -31,7 +31,7 @@ /// /// \arg OptimizationLevel - The optimization level, corresponding to -O0, /// -O1, etc. - static inline void createStandardFunctionPasses(FunctionPassManager *PM, + static inline void createStandardFunctionPasses(PassManagerBase *PM, unsigned OptimizationLevel); /// createStandardModulePasses - Add the standard list of module passes to the @@ -46,7 +46,7 @@ /// \arg HaveExceptions - Whether the module may have code using exceptions. /// \arg InliningPass - The inlining pass to use, if any, or null. This will /// always be added, even at -O0.a - static inline void createStandardModulePasses(PassManager *PM, + static inline void createStandardModulePasses(PassManagerBase *PM, unsigned OptimizationLevel, bool OptimizeSize, bool UnitAtATime, @@ -61,14 +61,14 @@ /// Internalize - Run the internalize pass. /// RunInliner - Use a function inlining pass. /// VerifyEach - Run the verifier after each pass. - static inline void createStandardLTOPasses(PassManager *PM, + static inline void createStandardLTOPasses(PassManagerBase *PM, bool Internalize, bool RunInliner, bool VerifyEach); // Implementations - static inline void createStandardFunctionPasses(FunctionPassManager *PM, + static inline void createStandardFunctionPasses(PassManagerBase *PM, unsigned OptimizationLevel) { if (OptimizationLevel > 0) { PM->add(createCFGSimplificationPass()); @@ -82,7 +82,7 @@ /// createStandardModulePasses - Add the standard module passes. This is /// expected to be run after the standard function passes. - static inline void createStandardModulePasses(PassManager *PM, + static inline void createStandardModulePasses(PassManagerBase *PM, unsigned OptimizationLevel, bool OptimizeSize, bool UnitAtATime, @@ -164,14 +164,14 @@ } } - static inline void addOnePass(PassManager *PM, Pass *P, bool AndVerify) { + static inline void addOnePass(PassManagerBase *PM, Pass *P, bool AndVerify) { PM->add(P); if (AndVerify) PM->add(createVerifierPass()); } - static inline void createStandardLTOPasses(PassManager *PM, + static inline void createStandardLTOPasses(PassManagerBase *PM, bool Internalize, bool RunInliner, bool VerifyEach) { From gohman at apple.com Fri May 14 10:36:54 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 15:36:54 -0000 Subject: [llvm-commits] [llvm] r103774 - /llvm/trunk/tools/opt/opt.cpp Message-ID: <20100514153655.07CF1312800A@llvm.org> Author: djg Date: Fri May 14 10:36:54 2010 New Revision: 103774 URL: http://llvm.org/viewvc/llvm-project?rev=103774&view=rev Log: Use regular PassManager instead of FunctionPassManager in opt, since it isn't doing lazy streaming. This also fixes a missing doFinalization call. Modified: llvm/trunk/tools/opt/opt.cpp Modified: llvm/trunk/tools/opt/opt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/opt/opt.cpp?rev=103774&r1=103773&r2=103774&view=diff ============================================================================== --- llvm/trunk/tools/opt/opt.cpp (original) +++ llvm/trunk/tools/opt/opt.cpp Fri May 14 10:36:54 2010 @@ -268,7 +268,7 @@ }; char BasicBlockPassPrinter::ID = 0; -inline void addPass(PassManager &PM, Pass *P) { +inline void addPass(PassManagerBase &PM, Pass *P) { // Add the pass to the pass manager... PM.add(P); @@ -281,7 +281,7 @@ /// duplicates llvm-gcc behaviour. /// /// OptLevel - Optimization Level -void AddOptimizationPasses(PassManager &MPM, FunctionPassManager &FPM, +void AddOptimizationPasses(PassManagerBase &MPM, PassManagerBase &FPM, unsigned OptLevel) { createStandardFunctionPasses(&FPM, OptLevel); @@ -305,7 +305,7 @@ InliningPass); } -void AddStandardCompilePasses(PassManager &PM) { +void AddStandardCompilePasses(PassManagerBase &PM) { PM.add(createVerifierPass()); // Verify that input is correct addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp @@ -328,7 +328,7 @@ InliningPass); } -void AddStandardLinkPasses(PassManager &PM) { +void AddStandardLinkPasses(PassManagerBase &PM) { PM.add(createVerifierPass()); // Verify that input is correct // If the -strip-debug command line option was specified, do it. @@ -422,9 +422,9 @@ if (TD) Passes.add(TD); - OwningPtr FPasses; + OwningPtr FPasses; if (OptLevelO1 || OptLevelO2 || OptLevelO3) { - FPasses.reset(new FunctionPassManager(M.get())); + FPasses.reset(new PassManager()); if (TD) FPasses->add(new TargetData(*TD)); } @@ -521,12 +521,8 @@ if (OptLevelO3) AddOptimizationPasses(Passes, *FPasses, 3); - if (OptLevelO1 || OptLevelO2 || OptLevelO3) { - FPasses->doInitialization(); - for (Module::iterator I = M.get()->begin(), E = M.get()->end(); - I != E; ++I) - FPasses->run(*I); - } + if (OptLevelO1 || OptLevelO2 || OptLevelO3) + FPasses->run(*M.get()); // Check that the module is well formed on completion of optimization if (!NoVerify && !VerifyEach) From gohman at apple.com Fri May 14 11:34:55 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 16:34:55 -0000 Subject: [llvm-commits] [llvm] r103776 - in /llvm/trunk/lib/Target/X86: X86Instr64bit.td X86InstrInfo.td Message-ID: <20100514163455.CC933312800A@llvm.org> Author: djg Date: Fri May 14 11:34:55 2010 New Revision: 103776 URL: http://llvm.org/viewvc/llvm-project?rev=103776&view=rev Log: Add mayLoad and mayStore flags to instructions which missed them. Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td llvm/trunk/lib/Target/X86/X86InstrInfo.td Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=103776&r1=103775&r2=103776&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original) +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Fri May 14 11:34:55 2010 @@ -198,6 +198,7 @@ def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi64 : I<0, Pseudo, (outs), (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -208,6 +209,7 @@ def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops), "jmp{q}\t{*}$dst # TAILCALL", []>; } @@ -241,6 +243,7 @@ def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS; @@ -1720,11 +1723,13 @@ def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB; @@ -1736,7 +1741,7 @@ "xchg{q}\t{$src, %rax|%rax, $src}", []>; // Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { // FIXME: Use normal add / sub instructions and add lock prefix dynamically. def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "lock\n\t" Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103776&r1=103775&r2=103776&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri May 14 11:34:55 2010 @@ -693,6 +693,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; + let mayLoad = 1 in def TCRETURNmi : I<0, Pseudo, (outs), (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), "#TC_RETURN $dst $offset", []>; @@ -706,6 +707,7 @@ def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; + let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; } @@ -719,10 +721,12 @@ def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; +let mayLoad = 1 in def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; +let mayLoad = 1 in def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS; @@ -3865,12 +3869,14 @@ def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB; +} def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; @@ -3879,12 +3885,14 @@ def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB; def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB; +} let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst), @@ -3892,7 +3900,7 @@ // Optimized codegen when the non-memory output is not used. // FIXME: Use normal add / sub instructions and add lock prefix dynamically. -let Defs = [EFLAGS] in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in { def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "lock\n\t" "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK; From gohman at apple.com Fri May 14 11:42:16 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 16:42:16 -0000 Subject: [llvm-commits] [llvm] r103777 - /llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Message-ID: <20100514164216.95DF7312800A@llvm.org> Author: djg Date: Fri May 14 11:42:16 2010 New Revision: 103777 URL: http://llvm.org/viewvc/llvm-project?rev=103777&view=rev Log: Don't use isBarrier for the PowerPC sync instruction. isBarrier is for control barriers, not memory ordering barriers. Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=103777&r1=103776&r2=103777&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original) +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Fri May 14 11:42:16 2010 @@ -862,7 +862,6 @@ [(store F8RC:$frS, xaddr:$dst)]>; } -let isBarrier = 1 in def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; From gohman at apple.com Fri May 14 11:46:02 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 16:46:02 -0000 Subject: [llvm-commits] [llvm] r103778 - in /llvm/trunk/lib/Target: PowerPC/PPCInstrInfo.td X86/X86InstrInfo.td Message-ID: <20100514164602.7A7B5312800A@llvm.org> Author: djg Date: Fri May 14 11:46:02 2010 New Revision: 103778 URL: http://llvm.org/viewvc/llvm-project?rev=103778&view=rev Log: Set isTerminator on TRAP instructions. Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td llvm/trunk/lib/Target/X86/X86InstrInfo.td Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=103778&r1=103777&r2=103778&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original) +++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Fri May 14 11:46:02 2010 @@ -662,7 +662,7 @@ [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, isDOT; -let isBarrier = 1, hasCtrlDep = 1 in +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; //===----------------------------------------------------------------------===// Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103778&r1=103777&r2=103778&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri May 14 11:46:02 2010 @@ -871,7 +871,7 @@ let Defs = [RAX, RCX, RDX] in def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; -let isBarrier = 1, hasCtrlDep = 1 in { +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB; } From stoklund at 2pi.dk Fri May 14 13:03:25 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 18:03:25 -0000 Subject: [llvm-commits] [llvm] r103784 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/X86/2008-09-18-inline-asm-2.ll Message-ID: <20100514180325.7B3DA312800A@llvm.org> Author: stoklund Date: Fri May 14 13:03:25 2010 New Revision: 103784 URL: http://llvm.org/viewvc/llvm-project?rev=103784&view=rev Log: Simplify the handling of physreg defs and uses in RegAllocFast. This adds extra security against using clobbered physregs, and it adds kill markers to physreg uses. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103784&r1=103783&r2=103784&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 13:03:25 2010 @@ -135,9 +135,10 @@ LiveRegMap::iterator i, bool isKill); void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned VirtReg, bool isKill); - void killPhysReg(unsigned PhysReg); - void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool isKill); + + void usePhysReg(MachineOperand&); + void definePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned PhysReg, RegState NewState); LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); LiveRegMap::iterator allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, @@ -146,8 +147,6 @@ unsigned OpNum, unsigned VirtReg, unsigned Hint); unsigned reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - void reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned PhysReg); void spillAll(MachineBasicBlock &MBB, MachineInstr *MI); void setPhysReg(MachineOperand &MO, unsigned PhysReg); }; @@ -264,75 +263,106 @@ spillVirtReg(MBB, MI, Dirty[i], false); } -/// killPhysReg - Kill any virtual register aliased by PhysReg. -void RAFast::killPhysReg(unsigned PhysReg) { - // Fast path for the normal case. - switch (unsigned VirtReg = PhysRegState[PhysReg]) { +/// usePhysReg - Handle the direct use of a physical register. +/// Check that the register is not used by a virtreg. +/// Kill the physreg, marking it free. +/// This may add implicit kills to MO->getParent() and invalidate MO. +void RAFast::usePhysReg(MachineOperand &MO) { + unsigned PhysReg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Bad usePhysReg operand"); + + switch (PhysRegState[PhysReg]) { case regDisabled: break; - case regFree: - return; case regReserved: PhysRegState[PhysReg] = regFree; + // Fall through + case regFree: + UsedInInstr.set(PhysReg); + MO.setIsKill(); return; default: - killVirtReg(VirtReg); - return; + // The physreg was allocated to a virtual register. That means to value we + // wanted has been clobbered. + llvm_unreachable("Instruction uses an allocated register"); } - // This is a disabled register, we have to check aliases. + // Maybe a superregister is reserved? for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { - switch (unsigned VirtReg = PhysRegState[Alias]) { + switch (PhysRegState[Alias]) { case regDisabled: - case regFree: break; case regReserved: + assert(TRI->isSuperRegister(PhysReg, Alias) && + "Instruction is not using a subregister of a reserved register"); + // Leave the superregister in the working set. PhysRegState[Alias] = regFree; + UsedInInstr.set(Alias); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + case regFree: + if (TRI->isSuperRegister(PhysReg, Alias)) { + // Leave the superregister in the working set. + UsedInInstr.set(Alias); + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + } + // Some other alias was in the working set - clear it. + PhysRegState[Alias] = regDisabled; break; default: - killVirtReg(VirtReg); - break; + llvm_unreachable("Instruction uses an alias of an allocated register"); } } + + // All aliases are disabled, bring register into working set. + PhysRegState[PhysReg] = regFree; + UsedInInstr.set(PhysReg); + MO.setIsKill(); } -/// spillPhysReg - Spill any dirty virtual registers that aliases PhysReg. If -/// isKill is set, they are also killed. -void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned PhysReg, bool isKill) { +/// definePhysReg - Mark PhysReg as reserved or free after spilling any +/// virtregs. This is very similar to defineVirtReg except the physreg is +/// reserved instead of allocated. +void RAFast::definePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, + unsigned PhysReg, RegState NewState) { + UsedInInstr.set(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; + default: + spillVirtReg(MBB, MI, VirtReg, true); + // Fall through. case regFree: - return; case regReserved: - if (isKill) - PhysRegState[PhysReg] = regFree; - return; - default: - spillVirtReg(MBB, MI, VirtReg, isKill); + PhysRegState[PhysReg] = NewState; return; } - // This is a disabled register, we have to check aliases. + // This is a disabled register, disable all aliases. + PhysRegState[PhysReg] = NewState; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { + UsedInInstr.set(Alias); switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: - case regFree: - break; - case regReserved: - if (isKill) - PhysRegState[Alias] = regFree; break; default: - spillVirtReg(MBB, MI, VirtReg, isKill); + spillVirtReg(MBB, MI, VirtReg, true); + // Fall through. + case regFree: + case regReserved: + PhysRegState[Alias] = regDisabled; + if (TRI->isSuperRegister(PhysReg, Alias)) + return; break; } } } + /// assignVirtToPhysReg - This method updates local state so that we know /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. @@ -538,47 +568,6 @@ return LR.PhysReg; } -/// reservePhysReg - Mark PhysReg as reserved. This is very similar to -/// defineVirtReg except the physreg is reserved instead of allocated. -void RAFast::reservePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned PhysReg) { - UsedInInstr.set(PhysReg); - switch (unsigned VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - case regFree: - PhysRegState[PhysReg] = regReserved; - return; - case regReserved: - return; - default: - spillVirtReg(MBB, MI, VirtReg, true); - PhysRegState[PhysReg] = regReserved; - return; - } - - // This is a disabled register, disable all aliases. - for (const unsigned *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) { - UsedInInstr.set(Alias); - switch (unsigned VirtReg = PhysRegState[Alias]) { - case regDisabled: - case regFree: - break; - case regReserved: - // is a super register already reserved? - if (TRI->isSuperRegister(PhysReg, Alias)) - return; - break; - default: - spillVirtReg(MBB, MI, VirtReg, true); - break; - } - PhysRegState[Alias] = regDisabled; - } - PhysRegState[PhysReg] = regReserved; -} - // setPhysReg - Change MO the refer the PhysReg, considering subregs. void RAFast::setPhysReg(MachineOperand &MO, unsigned PhysReg) { if (unsigned Idx = MO.getSubReg()) { @@ -600,9 +589,9 @@ // Add live-in registers as live. for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) - reservePhysReg(MBB, MII, *I); + definePhysReg(MBB, MII, *I, regReserved); - SmallVector VirtKills, PhysKills, PhysDefs; + SmallVector VirtKills, PhysDefs; SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. @@ -670,7 +659,6 @@ // First scan. // Mark physreg uses and early clobbers as used. - // Collect PhysKills. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -678,25 +666,14 @@ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || ReservedRegs.test(Reg)) continue; if (MO.isUse()) { -#ifndef NDEBUG - // We are using a physreg directly. It had better not be clobbered by a - // virtreg. - assert(PhysRegState[Reg] <= regReserved && "Using clobbered physreg"); - if (PhysRegState[Reg] == regDisabled) - for (const unsigned *AS = TRI->getAliasSet(Reg); - unsigned Alias = *AS; ++AS) - assert(PhysRegState[Alias] <= regReserved && - "Physreg alias was clobbered"); -#endif - PhysKills.push_back(Reg); // Any clean physreg use is a kill. - UsedInInstr.set(Reg); + usePhysReg(MO); } else if (MO.isEarlyClobber()) { - spillPhysReg(MBB, MI, Reg, true); - UsedInInstr.set(Reg); + definePhysReg(MBB, MI, Reg, MO.isDead() ? regFree : regReserved); PhysDefs.push_back(Reg); } } + // Second scan. // Allocate virtreg uses and early clobbers. // Collect VirtKills @@ -723,11 +700,6 @@ killVirtReg(VirtKills[i]); VirtKills.clear(); - // Process physreg kills - for (unsigned i = 0, e = PhysKills.size(); i != e; ++i) - killPhysReg(PhysKills[i]); - PhysKills.clear(); - MRI->addPhysRegsUsed(UsedInInstr); // Track registers defined by instruction - early clobbers at this point. @@ -749,12 +721,8 @@ if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (ReservedRegs.test(Reg)) continue; - if (MO.isImplicit()) - spillPhysReg(MBB, MI, Reg, true); - else - reservePhysReg(MBB, MI, Reg); - if (MO.isDead()) - PhysKills.push_back(Reg); + definePhysReg(MBB, MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); continue; } unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg, CopySrc); @@ -777,11 +745,6 @@ killVirtReg(VirtKills[i]); VirtKills.clear(); - // Process physreg deads. - for (unsigned i = 0, e = PhysKills.size(); i != e; ++i) - killPhysReg(PhysKills[i]); - PhysKills.clear(); - MRI->addPhysRegsUsed(UsedInInstr); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { Modified: llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll?rev=103784&r1=103783&r2=103784&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll (original) +++ llvm/trunk/test/CodeGen/X86/2008-09-18-inline-asm-2.ll Fri May 14 13:03:25 2010 @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 | grep "#%ebp %esi %edi 8(%edx) %eax (%ebx)" ; RUN: llc < %s -march=x86 -regalloc=local | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)" -; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%ecx %ebx %edx 8(%edi) %eax (%esi)" +; RUN: llc < %s -march=x86 -regalloc=fast | grep "#%edi %ebp %edx 8(%ebx) %eax (%esi)" ; The 1st, 2nd, 3rd and 5th registers above must all be different. The registers ; referenced in the 4th and 6th operands must not be the same as the 1st or 5th From sabre at nondot.org Fri May 14 13:29:36 2010 From: sabre at nondot.org (Chris Lattner) Date: Fri, 14 May 2010 18:29:36 -0000 Subject: [llvm-commits] [www-pubs] r103786 - in /www-pubs/trunk: 2010-05-01-ClangBSD.html 2010-05-01-ClangBSD.pdf pubs.js Message-ID: <20100514182936.D3E5F312800A@llvm.org> Author: lattner Date: Fri May 14 13:29:36 2010 New Revision: 103786 URL: http://llvm.org/viewvc/llvm-project?rev=103786&view=rev Log: add roman's ClangBSD talk Added: www-pubs/trunk/2010-05-01-ClangBSD.html www-pubs/trunk/2010-05-01-ClangBSD.pdf (with props) Modified: www-pubs/trunk/pubs.js Added: www-pubs/trunk/2010-05-01-ClangBSD.html URL: http://llvm.org/viewvc/llvm-project/www-pubs/trunk/2010-05-01-ClangBSD.html?rev=103786&view=auto ============================================================================== --- www-pubs/trunk/2010-05-01-ClangBSD.html (added) +++ www-pubs/trunk/2010-05-01-ClangBSD.html Fri May 14 13:29:36 2010 @@ -0,0 +1,37 @@ + + + + + + ClangBSD + + + +

    + ClangBSD +
    +
    + Roman Divacky +
    + +

    Abstract:

    +
    +

    +This talk describes the status and progress of LLVM and Clang +and integration into the FreeBSD base.

    + +
    + +

    Published:

    +
    + "ClangBSD", Roman Divacky,
    + BSDcan, May 2010.
    +
    + +

    Download Presentation:

    + + + + Added: www-pubs/trunk/2010-05-01-ClangBSD.pdf URL: http://llvm.org/viewvc/llvm-project/www-pubs/trunk/2010-05-01-ClangBSD.pdf?rev=103786&view=auto ============================================================================== Binary file - no diff available. Propchange: www-pubs/trunk/2010-05-01-ClangBSD.pdf ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Modified: www-pubs/trunk/pubs.js URL: http://llvm.org/viewvc/llvm-project/www-pubs/trunk/pubs.js?rev=103786&r1=103785&r2=103786&view=diff ============================================================================== --- www-pubs/trunk/pubs.js (original) +++ www-pubs/trunk/pubs.js Fri May 14 13:29:36 2010 @@ -1,6 +1,12 @@ // The array should be sorted reverse-chronologically, and will be displayed on // the page in the order listed. var PUBS = [ + {url: "2010-05-01-ClangBSD.html", + title: "ClangBSD", + published: "BSDcan 2010", + author: "Roman Divacky", + month: 5, + year: 2010}, {url: "2010-05-ICSE-QualityOfService.html", title: "Quality of Service Profiling", published: "Proc. of the 2010 IEEE 32st International Conference on Software Engineering (ICSE'10)", From daniel at zuster.org Fri May 14 13:53:40 2010 From: daniel at zuster.org (Daniel Dunbar) Date: Fri, 14 May 2010 18:53:40 -0000 Subject: [llvm-commits] [llvm] r103789 - in /llvm/trunk: lib/MC/MachObjectWriter.cpp test/MC/MachO/darwin-x86_64-reloc.s Message-ID: <20100514185340.49A72312800A@llvm.org> Author: ddunbar Date: Fri May 14 13:53:40 2010 New Revision: 103789 URL: http://llvm.org/viewvc/llvm-project?rev=103789&view=rev Log: MC/Mach-O/x86_64: Darwin's special "signed_N" relocation types should only be used to replace a normal relocation, not a reference to a GOT entry. Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Modified: llvm/trunk/lib/MC/MachObjectWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MachObjectWriter.cpp?rev=103789&r1=103788&r2=103789&view=diff ============================================================================== --- llvm/trunk/lib/MC/MachObjectWriter.cpp (original) +++ llvm/trunk/lib/MC/MachObjectWriter.cpp Fri May 14 13:53:40 2010 @@ -609,10 +609,31 @@ Type = RIT_X86_64_GOTLoad; else Type = RIT_X86_64_GOT; - } else if (Modifier != MCSymbolRefExpr::VK_None) + } else if (Modifier != MCSymbolRefExpr::VK_None) { report_fatal_error("unsupported symbol modifier in relocation"); - else + } else { Type = RIT_X86_64_Signed; + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L + ) which is outside the atom + // containing L. Generally, this shouldn't occur but it does + // happen when we have a RIPrel instruction with data following the + // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel + // adjustment Darwin x86_64 uses, the offset is still negative and + // the linker has no way to recognize this. + // + // To work around this, Darwin uses several special relocation types + // to indicate the offsets. However, the specification or + // implementation of these seems to also be incomplete; they should + // adjust the addend as well based on the actual encoded instruction + // (the additional bias), but instead appear to just look at the + // final offset. + switch (-(Target.getConstant() + (1LL << Log2Size))) { + case 1: Type = RIT_X86_64_Signed1; break; + case 2: Type = RIT_X86_64_Signed2; break; + case 4: Type = RIT_X86_64_Signed4; break; + } + } } else { if (Modifier != MCSymbolRefExpr::VK_None) report_fatal_error("unsupported symbol modifier in branch " @@ -620,27 +641,6 @@ Type = RIT_X86_64_Branch; } - - // The Darwin x86_64 relocation format has a problem where it cannot - // encode an address (L + ) which is outside the atom - // containing L. Generally, this shouldn't occur but it does happen - // when we have a RIPrel instruction with data following the relocation - // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment - // Darwin x86_64 uses, the offset is still negative and the linker has - // no way to recognize this. - // - // To work around this, Darwin uses several special relocation types to - // indicate the offsets. However, the specification or implementation of - // these seems to also be incomplete; they should adjust the addend as - // well based on the actual encoded instruction (the additional bias), - // but instead appear to just look at the final offset. - if (IsRIPRel) { - switch (-(Target.getConstant() + (1LL << Log2Size))) { - case 1: Type = RIT_X86_64_Signed1; break; - case 2: Type = RIT_X86_64_Signed2; break; - case 4: Type = RIT_X86_64_Signed4; break; - } - } } else { if (Modifier == MCSymbolRefExpr::VK_GOT) { Type = RIT_X86_64_GOT; Modified: llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s?rev=103789&r1=103788&r2=103789&view=diff ============================================================================== --- llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s (original) +++ llvm/trunk/test/MC/MachO/darwin-x86_64-reloc.s Fri May 14 13:53:40 2010 @@ -84,6 +84,9 @@ .quad f6 .quad L6 + .text + cmpq $0, _foo at GOTPCREL(%rip) + // CHECK: ('cputype', 16777223) // CHECK: ('cpusubtype', 3) // CHECK: ('filetype', 1) @@ -97,9 +100,9 @@ // CHECK: ('size', 392) // CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('vm_addr', 0) -// CHECK: ('vm_size', 303) +// CHECK: ('vm_size', 311) // CHECK: ('file_offset', 528) -// CHECK: ('file_size', 303) +// CHECK: ('file_size', 311) // CHECK: ('maxprot', 7) // CHECK: ('initprot', 7) // CHECK: ('num_sections', 4) @@ -112,7 +115,7 @@ // CHECK: ('size', 40) // CHECK: ('offset', 528) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 832) +// CHECK: ('reloc_offset', 840) // CHECK: ('num_reloc', 5) // CHECK: ('flags', 0x0) // CHECK: ('reserved1', 0) @@ -136,16 +139,16 @@ // CHECK: (('word-0', 0x4), // CHECK: ('word-1', 0x4d000008)), // CHECK: ]) -// CHECK: ('_section_data', "\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x17\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'\x01\x00\x00\x00\x00\x00\x00") +// CHECK: ('_section_data', '\x00\x00\x00\x00\x04\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x1f\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00/\x01\x00\x00\x00\x00\x00\x00') // CHECK: # Section 1 // CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') // CHECK: ('address', 40) -// CHECK: ('size', 215) +// CHECK: ('size', 223) // CHECK: ('offset', 568) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 872) -// CHECK: ('num_reloc', 31) +// CHECK: ('reloc_offset', 880) +// CHECK: ('num_reloc', 32) // CHECK: ('flags', 0x80000400) // CHECK: ('reserved1', 0) // CHECK: ('reserved2', 0) @@ -153,108 +156,111 @@ // CHECK: ), // CHECK: ('_relocations', [ // CHECK: # Relocation 0 +// CHECK: (('word-0', 0xda), +// CHECK: ('word-1', 0x4d000000)), +// CHECK: # Relocation 1 // CHECK: (('word-0', 0xd3), // CHECK: ('word-1', 0x15000004)), -// CHECK: # Relocation 1 +// CHECK: # Relocation 2 // CHECK: (('word-0', 0xcd), // CHECK: ('word-1', 0x1d000006)), -// CHECK: # Relocation 2 +// CHECK: # Relocation 3 // CHECK: (('word-0', 0xc7), // CHECK: ('word-1', 0x15000004)), -// CHECK: # Relocation 3 +// CHECK: # Relocation 4 // CHECK: (('word-0', 0xc1), // CHECK: ('word-1', 0x15000001)), -// CHECK: # Relocation 4 +// CHECK: # Relocation 5 // CHECK: (('word-0', 0xa5), // CHECK: ('word-1', 0x5e000003)), -// CHECK: # Relocation 5 +// CHECK: # Relocation 6 // CHECK: (('word-0', 0xa5), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 6 +// CHECK: # Relocation 7 // CHECK: (('word-0', 0x9d), // CHECK: ('word-1', 0x5e000003)), -// CHECK: # Relocation 7 +// CHECK: # Relocation 8 // CHECK: (('word-0', 0x9d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 8 +// CHECK: # Relocation 9 // CHECK: (('word-0', 0x95), // CHECK: ('word-1', 0xe000003)), -// CHECK: # Relocation 9 +// CHECK: # Relocation 10 // CHECK: (('word-0', 0x8d), // CHECK: ('word-1', 0xe000003)), -// CHECK: # Relocation 10 +// CHECK: # Relocation 11 // CHECK: (('word-0', 0x79), // CHECK: ('word-1', 0x8d000003)), -// CHECK: # Relocation 11 +// CHECK: # Relocation 12 // CHECK: (('word-0', 0x71), // CHECK: ('word-1', 0x7d000003)), -// CHECK: # Relocation 12 +// CHECK: # Relocation 13 // CHECK: (('word-0', 0x69), // CHECK: ('word-1', 0x6d000003)), -// CHECK: # Relocation 13 +// CHECK: # Relocation 14 // CHECK: (('word-0', 0x63), // CHECK: ('word-1', 0x1d000003)), -// CHECK: # Relocation 14 +// CHECK: # Relocation 15 // CHECK: (('word-0', 0x5c), // CHECK: ('word-1', 0x1d000003)), -// CHECK: # Relocation 15 +// CHECK: # Relocation 16 // CHECK: (('word-0', 0x55), // CHECK: ('word-1', 0x5c000002)), -// CHECK: # Relocation 16 +// CHECK: # Relocation 17 // CHECK: (('word-0', 0x55), // CHECK: ('word-1', 0xc000000)), -// CHECK: # Relocation 17 +// CHECK: # Relocation 18 // CHECK: (('word-0', 0x4d), // CHECK: ('word-1', 0x5e000002)), -// CHECK: # Relocation 18 +// CHECK: # Relocation 19 // CHECK: (('word-0', 0x4d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 19 +// CHECK: # Relocation 20 // CHECK: (('word-0', 0x45), // CHECK: ('word-1', 0x5e000002)), -// CHECK: # Relocation 20 +// CHECK: # Relocation 21 // CHECK: (('word-0', 0x45), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 21 +// CHECK: # Relocation 22 // CHECK: (('word-0', 0x3d), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 22 +// CHECK: # Relocation 23 // CHECK: (('word-0', 0x35), // CHECK: ('word-1', 0xe000000)), -// CHECK: # Relocation 23 +// CHECK: # Relocation 24 // CHECK: (('word-0', 0x2d), // CHECK: ('word-1', 0x8d000000)), -// CHECK: # Relocation 24 +// CHECK: # Relocation 25 // CHECK: (('word-0', 0x26), // CHECK: ('word-1', 0x6d000000)), -// CHECK: # Relocation 25 +// CHECK: # Relocation 26 // CHECK: (('word-0', 0x20), // CHECK: ('word-1', 0x1d000000)), -// CHECK: # Relocation 26 +// CHECK: # Relocation 27 // CHECK: (('word-0', 0x1a), // CHECK: ('word-1', 0x1d000000)), -// CHECK: # Relocation 27 +// CHECK: # Relocation 28 // CHECK: (('word-0', 0x14), // CHECK: ('word-1', 0x4d000000)), -// CHECK: # Relocation 28 +// CHECK: # Relocation 29 // CHECK: (('word-0', 0xe), // CHECK: ('word-1', 0x3d000000)), -// CHECK: # Relocation 29 +// CHECK: # Relocation 30 // CHECK: (('word-0', 0x7), // CHECK: ('word-1', 0x2d000000)), -// CHECK: # Relocation 30 +// CHECK: # Relocation 31 // CHECK: (('word-0', 0x2), // CHECK: ('word-1', 0x2d000000)), // CHECK: ]) -// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x05\x1f\xff\xff\xff\x8b\x05$\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05(\x00\x00\x00') +// CHECK: ('_section_data', '\xc3\xe8\x00\x00\x00\x00\xe8\x04\x00\x00\x00H\x8b\x05\x00\x00\x00\x00\xff5\x00\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x05\x04\x00\x00\x00\xc6\x05\xff\xff\xff\xff\x12\xc7\x05\xfc\xff\xff\xffxV4\x12\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00H\x8d\x05,\x00\x00\x00H\x8d\x05\x14\x00\x00\x00\x83\x05\x13\x00\x00\x00\x06f\x81\x05\x12\x00\x00\x00\xf4\x01\x81\x05\x10\x00\x00\x00\xf4\x01\x00\x00\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90,\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\xe4\xff\xff\xff\xff\xff\xff\xff\xd4\xff\xff\xff\xff\xff\xff\xff,\x00\x00\x00\x00\x00\x00\x00\x83\xc0\x00\x03\x04%\x03\x00\x00\x00\x8b\x05\x1f\xff\xff\xff\x8b\x05,\x00\x00\x00\x8b\x05\x00\x00\x00\x00\x8b\x050\x00\x00\x00H\x83=\xff\xff\xff\xff\x00') // CHECK: # Section 2 // CHECK: (('section_name', '__debug_frame\x00\x00\x00') // CHECK: ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 255) +// CHECK: ('address', 263) // CHECK: ('size', 16) -// CHECK: ('offset', 783) +// CHECK: ('offset', 791) // CHECK: ('alignment', 0) -// CHECK: ('reloc_offset', 1120) +// CHECK: ('reloc_offset', 1136) // CHECK: ('num_reloc', 2) // CHECK: ('flags', 0x2000000) // CHECK: ('reserved1', 0) @@ -273,9 +279,9 @@ // CHECK: # Section 3 // CHECK: (('section_name', '__literal8\x00\x00\x00\x00\x00\x00') // CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') -// CHECK: ('address', 271) +// CHECK: ('address', 279) // CHECK: ('size', 32) -// CHECK: ('offset', 799) +// CHECK: ('offset', 807) // CHECK: ('alignment', 0) // CHECK: ('reloc_offset', 0) // CHECK: ('num_reloc', 0) @@ -292,9 +298,9 @@ // CHECK: # Load Command 1 // CHECK: (('command', 2) // CHECK: ('size', 24) -// CHECK: ('symoff', 1136) +// CHECK: ('symoff', 1152) // CHECK: ('nsyms', 9) -// CHECK: ('stroff', 1280) +// CHECK: ('stroff', 1296) // CHECK: ('strsize', 52) // CHECK: ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00f6\x00\x00\x00') // CHECK: ('_symbols', [ @@ -351,7 +357,7 @@ // CHECK: ('n_type', 0xe) // CHECK: ('n_sect', 4) // CHECK: ('n_desc', 0) -// CHECK: ('n_value', 287) +// CHECK: ('n_value', 295) // CHECK: ('_string', 'f6') // CHECK: ), // CHECK: # Symbol 7 From evan.cheng at apple.com Fri May 14 13:54:59 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 18:54:59 -0000 Subject: [llvm-commits] [llvm] r103790 - in /llvm/trunk/lib/Target/ARM: ARMISelDAGToDAG.cpp NEONPreAllocPass.cpp Message-ID: <20100514185459.4FFB5312800A@llvm.org> Author: evancheng Date: Fri May 14 13:54:59 2010 New Revision: 103790 URL: http://llvm.org/viewvc/llvm-project?rev=103790&view=rev Log: Model VLD*_UPD and VLD*odd_UPD pair with REG_SEQUENCE. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103790&r1=103789&r2=103790&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Fri May 14 13:54:59 2010 @@ -183,6 +183,11 @@ /// QuadDRegs - Form a quad register pair from a quad of D registers. /// SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// OctoDRegs - Form 8 consecutive D registers. + /// + SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, + SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; } @@ -991,6 +996,26 @@ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); } +/// OctoDRegs - Form 8 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3, + SDValue V4, SDValue V5, + SDValue V6, SDValue V7) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::DSUBREG_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::DSUBREG_3, MVT::i32); + SDValue SubReg4 = CurDAG->getTargetConstant(ARM::DSUBREG_4, MVT::i32); + SDValue SubReg5 = CurDAG->getTargetConstant(ARM::DSUBREG_5, MVT::i32); + SDValue SubReg6 = CurDAG->getTargetConstant(ARM::DSUBREG_6, MVT::i32); + SDValue SubReg7 = CurDAG->getTargetConstant(ARM::DSUBREG_7, MVT::i32); + const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3, + V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16); +} + /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type /// for a 64-bit subregister of the vector. static EVT GetNEONSubregVT(EVT VT) { @@ -1065,18 +1090,10 @@ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 0), D0); - SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 1), D1); - - if (NumVecs > 2) { - SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_2, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 2), D2); - } - if (NumVecs > 3) { - SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_3, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 3), D3); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue D = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), D); } ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); return NULL; @@ -1134,10 +1151,36 @@ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::QSUBREG_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); + } + } else { + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } ReplaceUses(SDValue(N, NumVecs), Chain); Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103790&r1=103789&r2=103790&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Fri May 14 13:54:59 2010 @@ -33,7 +33,8 @@ private: bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) const; + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const; bool PreAllocNEONRegisters(MachineBasicBlock &MBB); }; @@ -340,13 +341,16 @@ bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) const { + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const { MachineOperand &FMO = MI->getOperand(FirstOpnd); assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); unsigned VirtReg = FMO.getReg(); (void)VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "expected a virtual register"); + + unsigned LastSubIdx = 0; if (FMO.isDef()) { MachineInstr *RegSeq = 0; for (unsigned R = 0; R < NumRegs; ++R) { @@ -363,13 +367,28 @@ return false; if (RegSeq && RegSeq != UseMI) return false; + unsigned OpIdx = 1 + (Offset + R * Stride) * 2; + if (UseMI->getOperand(OpIdx).getReg() != VirtReg) + llvm_unreachable("Malformed REG_SEQUENCE instruction!"); + unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from arm_dsubreg_0 or arm_qsubreg_0. + if (SubIdx != (ARM::DSUBREG_0+Offset) && + SubIdx != (ARM::QSUBREG_0+Offset)) + return false; + } RegSeq = UseMI; + LastSubIdx = SubIdx; } - // Make sure trailing operands of REG_SEQUENCE are undef. - unsigned NumExps = (RegSeq->getNumOperands() - 1) / 2; - for (unsigned i = NumRegs * 2 + 1; i < NumExps; i += 2) { - const MachineOperand &MO = RegSeq->getOperand(i); + // In the case of vld3, etc., make sure the trailing operand of + // REG_SEQUENCE is an undef. + if (NumRegs == 3) { + unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; + const MachineOperand &MO = RegSeq->getOperand(OpIdx); unsigned VirtReg = MO.getReg(); MachineInstr *DefMI = MRI->getVRegDef(VirtReg); if (!DefMI || !DefMI->isImplicitDef()) @@ -379,7 +398,6 @@ } unsigned LastSrcReg = 0; - unsigned LastSubIdx = 0; SmallVector SubIds; for (unsigned R = 0; R < NumRegs; ++R) { const MachineOperand &MO = MI->getOperand(FirstOpnd + R); @@ -400,11 +418,12 @@ return false; unsigned SubIdx = DefMI->getOperand(2).getImm(); if (LastSubIdx) { - if (LastSubIdx != SubIdx-1) + if (LastSubIdx != SubIdx-Stride) return false; } else { // Must start from arm_dsubreg_0 or arm_qsubreg_0. - if (SubIdx != ARM::DSUBREG_0 && SubIdx != ARM::QSUBREG_0) + if (SubIdx != (ARM::DSUBREG_0+Offset) && + SubIdx != (ARM::QSUBREG_0+Offset)) return false; } SubIds.push_back(SubIdx); @@ -447,7 +466,7 @@ if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; if (llvm::ModelWithRegSequence() && - FormsRegSequence(MI, FirstOpnd, NumRegs)) + FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = llvm::next(MBBI); From enderby at apple.com Fri May 14 14:16:03 2010 From: enderby at apple.com (Kevin Enderby) Date: Fri, 14 May 2010 19:16:03 -0000 Subject: [llvm-commits] [llvm] r103791 - in /llvm/trunk: lib/Target/X86/X86InstrInfo.td test/MC/AsmParser/X86/x86_32-new-encoder.s Message-ID: <20100514191603.38666312800A@llvm.org> Author: enderby Date: Fri May 14 14:16:02 2010 New Revision: 103791 URL: http://llvm.org/viewvc/llvm-project?rev=103791&view=rev Log: Fix so "int3" is correctly accepted, added "into" and fixed "int" with an argument, like "int $4", to not get an Assertion error. Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=103791&r1=103790&r2=103791&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri May 14 14:16:02 2010 @@ -542,8 +542,10 @@ } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; -def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; +def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>; +// FIXME: need to make sure that "int $3" matches int3 +def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>; Modified: llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s?rev=103791&r1=103790&r2=103791&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s (original) +++ llvm/trunk/test/MC/AsmParser/X86/x86_32-new-encoder.s Fri May 14 14:16:02 2010 @@ -56,3 +56,16 @@ movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0xa1,A,A,A,A] movl 0, %eax // CHECK: movl 0, %eax # encoding: [0xa1,A,A,A,A] +// rdar://7973775 +into +// CHECK: into +// CHECK: encoding: [0xce] +int3 +// CHECK: int3 +// CHECK: encoding: [0xcc] +int $4 +// CHECK: int $4 +// CHECK: encoding: [0xcd,0x04] +int $255 +// CHECK: int $255 +// CHECK: encoding: [0xcd,0xff] From stoklund at 2pi.dk Fri May 14 15:28:33 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 20:28:33 -0000 Subject: [llvm-commits] [llvm] r103797 - in /llvm/trunk/lib/CodeGen: MachineVerifier.cpp RegAllocFast.cpp Message-ID: <20100514202833.267FE312800A@llvm.org> Author: stoklund Date: Fri May 14 15:28:32 2010 New Revision: 103797 URL: http://llvm.org/viewvc/llvm-project?rev=103797&view=rev Log: When verifying two-address instructions, check the following: - Kill is implicit when use and def registers are identical. - Only virtual registers can differ. Add a -verify-fast-regalloc to run the verifier before the fast allocator. Modified: llvm/trunk/lib/CodeGen/MachineVerifier.cpp llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/MachineVerifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineVerifier.cpp?rev=103797&r1=103796&r2=103797&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/MachineVerifier.cpp (original) +++ llvm/trunk/lib/CodeGen/MachineVerifier.cpp Fri May 14 15:28:32 2010 @@ -47,7 +47,7 @@ MachineVerifier(Pass *pass, bool allowDoubleDefs) : PASS(pass), allowVirtDoubleDefs(allowDoubleDefs), - allowPhysDoubleDefs(allowDoubleDefs), + allowPhysDoubleDefs(true), OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) {} @@ -552,19 +552,23 @@ regsLiveInButUnused.erase(Reg); bool isKill = false; - if (MO->isKill()) { - isKill = true; - // Tied operands on two-address instuctions MUST NOT have a flag. - if (MI->isRegTiedToDefOperand(MONum)) + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) { + isKill = true; + // ANd in that case an explicit kill flag is not allowed. + if (MO->isKill()) report("Illegal kill flag on two-address instruction operand", MO, MONum); - } else { - // TwoAddress instr modifying a reg is treated as kill+def. - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx) && - MI->getOperand(defIdx).getReg() == Reg) - isKill = true; - } + } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", + MO, MONum); + } + } else + isKill = MO->isKill(); + if (isKill) { addRegWithSubRegs(regsKilled, Reg); Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103797&r1=103796&r2=103797&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 15:28:32 2010 @@ -35,6 +35,9 @@ #include using namespace llvm; +static cl::opt VerifyFastRegalloc("verify-fast-regalloc", cl::Hidden, + cl::desc("Verify machine code before fast regalloc")); + STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); @@ -778,6 +781,8 @@ DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" << "********** Function: " << ((Value*)Fn.getFunction())->getName() << '\n'); + if (VerifyFastRegalloc) + Fn.verify(); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); From dpatel at apple.com Fri May 14 16:01:35 2010 From: dpatel at apple.com (Devang Patel) Date: Fri, 14 May 2010 21:01:35 -0000 Subject: [llvm-commits] [llvm] r103798 - in /llvm/trunk: include/llvm/Analysis/DebugInfo.h lib/Analysis/DebugInfo.cpp lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h Message-ID: <20100514210135.79794312800A@llvm.org> Author: dpatel Date: Fri May 14 16:01:35 2010 New Revision: 103798 URL: http://llvm.org/viewvc/llvm-project?rev=103798&view=rev Log: Add support to preserve type info for the variables that are removed by the optimizer. Modified: llvm/trunk/include/llvm/Analysis/DebugInfo.h llvm/trunk/lib/Analysis/DebugInfo.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Modified: llvm/trunk/include/llvm/Analysis/DebugInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/DebugInfo.h?rev=103798&r1=103797&r2=103798&view=diff ============================================================================== --- llvm/trunk/include/llvm/Analysis/DebugInfo.h (original) +++ llvm/trunk/include/llvm/Analysis/DebugInfo.h Fri May 14 16:01:35 2010 @@ -677,7 +677,7 @@ DIVariable CreateVariable(unsigned Tag, DIDescriptor Context, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty); + DIType Ty, bool OptimizedBuild = false); /// CreateComplexVariable - Create a new descriptor for the specified /// variable which has a complex address expression for its address. Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=103798&r1=103797&r2=103798&view=diff ============================================================================== --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Fri May 14 16:01:35 2010 @@ -1028,7 +1028,7 @@ StringRef Name, DIFile F, unsigned LineNo, - DIType Ty) { + DIType Ty, bool OptimizedBuild) { Value *Elts[] = { GetTagConstant(Tag), Context, @@ -1037,7 +1037,12 @@ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Ty, }; - return DIVariable(MDNode::get(VMContext, &Elts[0], 6)); + MDNode *Node = MDNode::get(VMContext, &Elts[0], 6); + if (OptimizedBuild) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv"); + NMD->addOperand(Node); + } + return DIVariable(Node); } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=103798&r1=103797&r2=103798&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Fri May 14 16:01:35 2010 @@ -193,6 +193,9 @@ DbgVariable *getAbstractVariable() const { return AbstractVar; } void setDIE(DIE *D) { TheDIE = D; } DIE *getDIE() const { return TheDIE; } + bool hasLocation() { + return DbgValueMInsn || FrameIndex != ~0U; + } }; //===----------------------------------------------------------------------===// @@ -1632,16 +1635,18 @@ MachineLocation Location; unsigned FrameReg; const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), - FrameReg); - Location.set(FrameReg, Offset); - - if (VD.hasComplexAddress()) - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else if (VD.isBlockByrefVariable()) - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); - else - addAddress(VariableDie, dwarf::DW_AT_location, Location); + if (DV->hasLocation()) { + int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), + FrameReg); + Location.set(FrameReg, Offset); + + if (VD.hasComplexAddress()) + addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else if (VD.isBlockByrefVariable()) + addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + else + addAddress(VariableDie, dwarf::DW_AT_location, Location); + } } } @@ -2122,14 +2127,15 @@ } /// collectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::collectVariableInfo() { +void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); - + SmallPtrSet Processed; MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) { const MDNode *Var = VI->first; if (!Var) continue; + Processed.insert(Var); DIVariable DV(Var); const std::pair &VP = VI->second; @@ -2184,12 +2190,27 @@ if (Scope == 0) continue; + Processed.insert(DV); DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL); DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable); DbgValueStartMap[MInsn] = RegVar; Scope->addVariable(RegVar); } } + + // Collect info for variables that were optimized out. + if (NamedMDNode *NMD = + MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast_or_null(NMD->getOperand(i))); + if (!Processed.insert(DV)) + continue; + DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV, ~0U, NULL)); + } + } + } /// beginScope - Process beginning of a scope. @@ -2542,7 +2563,7 @@ if (!MMI->hasDebugInfo()) return; if (!extractScopeInformation()) return; - collectVariableInfo(); + collectVariableInfo(MF); FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=103798&r1=103797&r2=103798&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Fri May 14 16:01:35 2010 @@ -545,7 +545,7 @@ bool extractScopeInformation(); /// collectVariableInfo - Populate DbgScope entries with variables' info. - void collectVariableInfo(); + void collectVariableInfo(const MachineFunction *); public: //===--------------------------------------------------------------------===// From gohman at apple.com Fri May 14 16:01:44 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 21:01:44 -0000 Subject: [llvm-commits] [llvm] r103799 - /llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Message-ID: <20100514210144.54D3B312800A@llvm.org> Author: djg Date: Fri May 14 16:01:44 2010 New Revision: 103799 URL: http://llvm.org/viewvc/llvm-project?rev=103799&view=rev Log: Lowering of atomic instructions can result in operands being used more than once. If ISel had put a kill flag on one of them, it's not valid to transfer the kill flag to each new instance. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103799&r1=103798&r2=103799&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri May 14 16:01:44 2010 @@ -7988,9 +7988,15 @@ MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) + for (int i=0; i < 2 + X86AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); + // We use some of the operands multiple times, so conservatively just + // clear any kill flags that might be present. + if (argOpers[i]->isReg() && argOpers[i]->isUse()) + argOpers[i]->setIsKill(false); + } + // x86 address has 5 operands: base, index, scale, displacement, and segment. int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] From dpatel at apple.com Fri May 14 16:03:38 2010 From: dpatel at apple.com (Devang Patel) Date: Fri, 14 May 2010 21:03:38 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103800 - /llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Message-ID: <20100514210338.8B988312800A@llvm.org> Author: dpatel Date: Fri May 14 16:03:38 2010 New Revision: 103800 URL: http://llvm.org/viewvc/llvm-project?rev=103800&view=rev Log: Preserve variable type information during optimized build even if variable is removed by the optimizer. r103798 is related code gen patch. Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Modified: llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp?rev=103800&r1=103799&r2=103800&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/llvm-debug.cpp Fri May 14 16:03:38 2010 @@ -464,7 +464,7 @@ llvm::DIVariable D = DebugFactory.CreateVariable(Tag, VarScope, Name, getOrCreateFile(Loc.file), - Loc.line, Ty); + Loc.line, Ty, optimize); Instruction *Call = DebugFactory.InsertDeclare(AI, D, Builder.GetInsertBlock()); From dpatel at apple.com Fri May 14 16:04:45 2010 From: dpatel at apple.com (Devang Patel) Date: Fri, 14 May 2010 21:04:45 -0000 Subject: [llvm-commits] [llvm] r103801 - /llvm/trunk/test/FrontendC/2010-05-14-Optimized-VarType.c Message-ID: <20100514210445.8D757312800A@llvm.org> Author: dpatel Date: Fri May 14 16:04:45 2010 New Revision: 103801 URL: http://llvm.org/viewvc/llvm-project?rev=103801&view=rev Log: Test case for r103800. Added: llvm/trunk/test/FrontendC/2010-05-14-Optimized-VarType.c Added: llvm/trunk/test/FrontendC/2010-05-14-Optimized-VarType.c URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/FrontendC/2010-05-14-Optimized-VarType.c?rev=103801&view=auto ============================================================================== --- llvm/trunk/test/FrontendC/2010-05-14-Optimized-VarType.c (added) +++ llvm/trunk/test/FrontendC/2010-05-14-Optimized-VarType.c Fri May 14 16:04:45 2010 @@ -0,0 +1,23 @@ +// RUN: %llvmgcc %s -Os -S -g -o - | grep DW_TAG_structure_type | count 1 +// Variable 'a' is optimized but the debug info should preserve its type info. +#include + +struct foo { + int Attribute; +}; + +void *getfoo(void) __attribute__((noinline)); + +void *getfoo(void) +{ + int *x = malloc(sizeof(int)); + *x = 42; + return (void *)x; +} + +int main(int argc, char *argv[]) { + struct foo *a = (struct foo *)getfoo(); + + return a->Attribute; +} + From evan.cheng at apple.com Fri May 14 16:15:44 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 14:15:44 -0700 Subject: [llvm-commits] [llvm] r103798 - in /llvm/trunk: include/llvm/Analysis/DebugInfo.h lib/Analysis/DebugInfo.cpp lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h In-Reply-To: <20100514210135.79794312800A@llvm.org> References: <20100514210135.79794312800A@llvm.org> Message-ID: <55F0D15A-D8CE-4CF5-B49F-02FEC333745B@apple.com> Hi Devang, Would it be better if the caller of CreateVariable() create the llvm.dbg.lv metadata instead? This solution seems to polluting the interface. Evan On May 14, 2010, at 2:01 PM, Devang Patel wrote: > > Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=103798&r1=103797&r2=103798&view=diff > ============================================================================== > --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) > +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Fri May 14 16:01:35 2010 > @@ -1028,7 +1028,7 @@ > StringRef Name, > DIFile F, > unsigned LineNo, > - DIType Ty) { > + DIType Ty, bool OptimizedBuild) { > Value *Elts[] = { > GetTagConstant(Tag), > Context, > @@ -1037,7 +1037,12 @@ > ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), > Ty, > }; > - return DIVariable(MDNode::get(VMContext, &Elts[0], 6)); > + MDNode *Node = MDNode::get(VMContext, &Elts[0], 6); > + if (OptimizedBuild) { > + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv"); > + NMD->addOperand(Node); > + } > + return DIVariable(Node); > } > > > > Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=103798&r1=103797&r2=103798&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) > +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Fri May 14 16:01:35 2010 > @@ -193,6 +193,9 @@ > DbgVariable *getAbstractVariable() const { return AbstractVar; } > void setDIE(DIE *D) { TheDIE = D; } > DIE *getDIE() const { return TheDIE; } > + bool hasLocation() { > + return DbgValueMInsn || FrameIndex != ~0U; > + } > }; > > //===----------------------------------------------------------------------===// > @@ -1632,16 +1635,18 @@ > MachineLocation Location; > unsigned FrameReg; > const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); > - int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), > - FrameReg); > - Location.set(FrameReg, Offset); > - > - if (VD.hasComplexAddress()) > - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); > - else if (VD.isBlockByrefVariable()) > - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); > - else > - addAddress(VariableDie, dwarf::DW_AT_location, Location); > + if (DV->hasLocation()) { > + int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), > + FrameReg); > + Location.set(FrameReg, Offset); > + > + if (VD.hasComplexAddress()) > + addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); > + else if (VD.isBlockByrefVariable()) > + addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); > + else > + addAddress(VariableDie, dwarf::DW_AT_location, Location); > + } > } > } > > @@ -2122,14 +2127,15 @@ > } > > /// collectVariableInfo - Populate DbgScope entries with variables' info. > -void DwarfDebug::collectVariableInfo() { > +void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { > const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); > - > + SmallPtrSet Processed; > MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); > for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), > VE = VMap.end(); VI != VE; ++VI) { > const MDNode *Var = VI->first; > if (!Var) continue; > + Processed.insert(Var); > DIVariable DV(Var); > const std::pair &VP = VI->second; > > @@ -2184,12 +2190,27 @@ > if (Scope == 0) > continue; > > + Processed.insert(DV); > DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL); > DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable); > DbgValueStartMap[MInsn] = RegVar; > Scope->addVariable(RegVar); > } > } > + > + // Collect info for variables that were optimized out. > + if (NamedMDNode *NMD = > + MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { > + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { > + DIVariable DV(cast_or_null(NMD->getOperand(i))); > + if (!Processed.insert(DV)) > + continue; > + DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); > + if (Scope) > + Scope->addVariable(new DbgVariable(DV, ~0U, NULL)); > + } > + } > + > } > > /// beginScope - Process beginning of a scope. > @@ -2542,7 +2563,7 @@ > if (!MMI->hasDebugInfo()) return; > if (!extractScopeInformation()) return; > > - collectVariableInfo(); > + collectVariableInfo(MF); > > FunctionBeginSym = Asm->GetTempSymbol("func_begin", > Asm->getFunctionNumber()); > > Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=103798&r1=103797&r2=103798&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) > +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Fri May 14 16:01:35 2010 > @@ -545,7 +545,7 @@ > bool extractScopeInformation(); > > /// collectVariableInfo - Populate DbgScope entries with variables' info. > - void collectVariableInfo(); > + void collectVariableInfo(const MachineFunction *); > > public: > //===--------------------------------------------------------------------===// > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From isanbard at gmail.com Fri May 14 16:14:32 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 21:14:32 -0000 Subject: [llvm-commits] [llvm] r103802 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/ lib/CodeGen/AsmPrinter/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MBlaze/AsmPrinter/ lib/Target/Mips/ lib/Target/Mips/AsmPrinter/ lib/Target/PowerPC/ lib/Target/SystemZ/ lib/Target/X86/ Message-ID: <20100514211433.3D6BC312800A@llvm.org> Author: void Date: Fri May 14 16:14:32 2010 New Revision: 103802 URL: http://llvm.org/viewvc/llvm-project?rev=103802&view=rev Log: Rename "HasCalls" in MachineFrameInfo to "AdjustsStack" to better describe what the variable actually tracks. N.B., several back-ends are using "HasCalls" as being synonymous for something that adjusts the stack. This isn't 100% correct and should be looked into. Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp llvm/trunk/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp llvm/trunk/lib/Target/MBlaze/MBlazeRegisterInfo.cpp llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp llvm/trunk/lib/Target/TargetMachine.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Modified: llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineFrameInfo.h Fri May 14 16:14:32 2010 @@ -152,8 +152,12 @@ /// unsigned MaxAlignment; - /// HasCalls - Set to true if this function has any function calls. This is - /// only valid during and after prolog/epilog code insertion. + /// AdjustsStack - Set to true if this function adjusts the stack -- e.g., + /// when calling another function. This is only valid during and after + /// prolog/epilog code insertion. + bool AdjustsStack; + + /// HasCalls - Set to true if this function has any function calls. bool HasCalls; /// StackProtectorIdx - The frame index for the stack protector. @@ -189,6 +193,7 @@ StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0; HasVarSizedObjects = false; FrameAddressTaken = false; + AdjustsStack = false; HasCalls = false; StackProtectorIdx = -1; MaxCallFrameSize = 0; @@ -313,9 +318,13 @@ /// void setMaxAlignment(unsigned Align) { MaxAlignment = Align; } - /// hasCalls - Return true if the current function has no function calls. - /// This is only valid during or after prolog/epilog code emission. - /// + /// AdjustsStack - Return true if this function adjusts the stack -- e.g., + /// when calling another function. This is only valid during and after + /// prolog/epilog code insertion. + bool adjustsStack() const { return AdjustsStack; } + void setAdjustsStack(bool V) { AdjustsStack = V; } + + /// hasCalls - Return true if the current function has any function calls. bool hasCalls() const { return HasCalls; } void setHasCalls(bool V) { HasCalls = V; } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.cpp Fri May 14 16:14:32 2010 @@ -189,7 +189,7 @@ // EH Frame, but some environments do not handle weak absolute symbols. If // UnwindTablesMandatory is set we cannot do this optimization; the unwind // info is to be available for non-EH uses. - if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory && + if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory && (!TheFunc->isWeakForLinker() || !Asm->MAI->getWeakDefDirective() || TLOF.getSupportsWeakOmittedEHFrame())) { @@ -949,11 +949,12 @@ TLOF.isFunctionEHFrameSymbolPrivate()); // Save EH frame information - EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym, - Asm->getFunctionNumber(), - MMI->getPersonalityIndex(), - Asm->MF->getFrameInfo()->hasCalls(), - !MMI->getLandingPads().empty(), - MMI->getFrameMoves(), - Asm->MF->getFunction())); + EHFrames. + push_back(FunctionEHFrameInfo(FunctionEHSym, + Asm->getFunctionNumber(), + MMI->getPersonalityIndex(), + Asm->MF->getFrameInfo()->adjustsStack(), + !MMI->getLandingPads().empty(), + MMI->getFrameMoves(), + Asm->MF->getFunction())); } Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfException.h Fri May 14 16:14:32 2010 @@ -45,7 +45,7 @@ MCSymbol *FunctionEHSym; // L_foo.eh unsigned Number; unsigned PersonalityIndex; - bool hasCalls; + bool adjustsStack; bool hasLandingPads; std::vector Moves; const Function *function; @@ -55,7 +55,7 @@ const std::vector &M, const Function *f): FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P), - hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { } + adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { } }; std::vector EHFrames; Modified: llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp (original) +++ llvm/trunk/lib/CodeGen/PrologEpilogInserter.cpp Fri May 14 16:14:32 2010 @@ -58,8 +58,9 @@ FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); FrameConstantRegMap.clear(); - // Calculate the MaxCallFrameSize and HasCalls variables for the function's - // frame information. Also eliminates call frame pseudo instructions. + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. calculateCallsInformation(Fn); // Allow the target machine to make some adjustments to the function @@ -91,8 +92,8 @@ // Add prolog and epilog code to the function. This function is required // to align the stack frame as necessary for any stack variables or - // called functions. Because of this, calculateCalleeSavedRegisters - // must be called before this function in order to set the HasCalls + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. if (!F->hasFnAttr(Attribute::Naked)) insertPrologEpilogCode(Fn); @@ -126,7 +127,7 @@ } #endif -/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { @@ -134,7 +135,7 @@ MachineFrameInfo *MFI = Fn.getFrameInfo(); unsigned MaxCallFrameSize = 0; - bool HasCalls = MFI->hasCalls(); + bool AdjustsStack = MFI->adjustsStack(); // Get the function call frame set-up and tear-down instruction opcode int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode(); @@ -154,15 +155,15 @@ " instructions should have a single immediate argument!"); unsigned Size = I->getOperand(0).getImm(); if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; - HasCalls = true; + AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { // An InlineAsm might be a call; assume it is to get the stack frame // aligned correctly for calls. - HasCalls = true; + AdjustsStack = true; } - MFI->setHasCalls(HasCalls); + MFI->setAdjustsStack(AdjustsStack); MFI->setMaxCallFrameSize(MaxCallFrameSize); for (std::vector::iterator @@ -577,7 +578,7 @@ // If we have reserved argument space for call sites in the function // immediately on entry to the current function, count it as part of the // overall stack size. - if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn)) + if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn)) Offset += MFI->getMaxCallFrameSize(); // Round up the size to a multiple of the alignment. If the function has @@ -586,13 +587,14 @@ // otherwise, for leaf functions, align to the TransientStackAlignment // value. unsigned StackAlign; - if (MFI->hasCalls() || MFI->hasVarSizedObjects() || + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) StackAlign = TFI.getStackAlignment(); else StackAlign = TFI.getTransientStackAlignment(); - // If the frame pointer is eliminated, all frame offsets will be relative - // to SP not FP; align to MaxAlign so this works. + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. StackAlign = std::max(StackAlign, MaxAlign); unsigned AlignMask = StackAlign - 1; Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); @@ -602,7 +604,6 @@ MFI->setStackSize(Offset - LocalAreaOffset); } - /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. @@ -621,7 +622,6 @@ } } - /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. /// Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri May 14 16:14:32 2010 @@ -28,6 +28,7 @@ #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -605,6 +606,19 @@ delete Scheduler; } + // Determine if there are any calls in this machine function. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (!MFI->hasCalls()) { + for (MachineBasicBlock::iterator + I = BB->begin(), E = BB->end(); I != E; ++I) { + const TargetInstrDesc &TID = TM.getInstrInfo()->get(I->getOpcode()); + if (I->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + MFI->setHasCalls(true); + break; + } + } + } + // Free the SelectionDAG state, now that we're finished with it. CurDAG->clear(); Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -543,7 +543,7 @@ /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->adjustsStack())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -571,7 +571,7 @@ bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (DisableFramePointerElim(MF) && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->adjustsStack()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); Modified: llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Blackfin/BlackfinRegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -111,7 +111,7 @@ bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return DisableFramePointerElim(MF) || - MFI->hasCalls() || MFI->hasVarSizedObjects(); + MFI->adjustsStack() || MFI->hasVarSizedObjects(); } bool BlackfinRegisterInfo:: @@ -394,7 +394,7 @@ } if (!hasFP(MF)) { - assert(!MFI->hasCalls() && + assert(!MFI->adjustsStack() && "FP elimination on a non-leaf function is not supported"); adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); return; @@ -435,7 +435,7 @@ assert(FrameSize%4 == 0 && "Misaligned frame size"); if (!hasFP(MF)) { - assert(!MFI->hasCalls() && + assert(!MFI->adjustsStack() && "FP elimination on a non-leaf function is not supported"); adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); return; Modified: llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/CellSPU/SPURegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -469,7 +469,7 @@ && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->hasCalls()) { + if (FrameSize > 16 || MFI->adjustsStack()) { FrameSize = -(FrameSize + SPUFrameInfo::minStackSize()); if (hasDebugInfo) { // Mark effective beginning of when frame pointer becomes valid. @@ -569,7 +569,7 @@ && "SPURegisterInfo::emitEpilogue: FrameSize not aligned"); // the "empty" frame size is 16 - just the register scavenger spill slot - if (FrameSize > 16 || MFI->hasCalls()) { + if (FrameSize > 16 || MFI->adjustsStack()) { FrameSize = FrameSize + SPUFrameInfo::minStackSize(); if (isInt<10>(FrameSize + LinkSlotOffset)) { // Reload $lr, adjust $sp by required amount Modified: llvm/trunk/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp Fri May 14 16:14:32 2010 @@ -155,7 +155,7 @@ CPUBitmask |= (1 << MBlazeRegisterInfo:: getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MFI->hasCalls()) + if (MFI->adjustsStack()) CPUBitmask |= (1 << MBlazeRegisterInfo:: getRegisterNumbering(RI.getRARegister())); Modified: llvm/trunk/lib/Target/MBlaze/MBlazeRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MBlaze/MBlazeRegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/MBlaze/MBlazeRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/MBlaze/MBlazeRegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -220,7 +220,7 @@ StackOffset += RegSize; } - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { MBlazeFI->setRAStackOffset(0); MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); @@ -311,8 +311,8 @@ unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->hasCalls()) return; - if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + if (StackSize == 0 && !MFI->adjustsStack()) return; + if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; int FPOffset = MBlazeFI->getFPStackOffset(); int RAOffset = MBlazeFI->getRAStackOffset(); @@ -323,7 +323,7 @@ // Save the return address only if the function isnt a leaf one. // swi R15, R1, stack_loc - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI)) .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1); } @@ -366,14 +366,14 @@ // Restore the return address only if the function isnt a leaf one. // lwi R15, R1, stack_loc - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15) .addImm(RAOffset).addReg(MBlaze::R1); } // Get the number of bytes from FrameInfo int StackSize = (int) MFI->getStackSize(); - if (StackSize < 28 && MFI->hasCalls()) StackSize = 28; + if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28; // adjust stack. // addi R1, R1, imm Modified: llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp (original) +++ llvm/trunk/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp Fri May 14 16:14:32 2010 @@ -145,7 +145,7 @@ CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getFrameRegister(*MF))); - if (MFI->hasCalls()) + if (MFI->adjustsStack()) CPUBitmask |= (1 << MipsRegisterInfo:: getRegisterNumbering(RI.getRARegister())); Modified: llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/Mips/MipsRegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -288,7 +288,7 @@ // Stack locations for FP and RA. If only one of them is used, // the space must be allocated for both, otherwise no space at all. - if (hasFP(MF) || MFI->hasCalls()) { + if (hasFP(MF) || MFI->adjustsStack()) { // FP stack location MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); @@ -302,7 +302,7 @@ MipsFI->setRAStackOffset(StackOffset); StackOffset += RegSize; - if (MFI->hasCalls()) + if (MFI->adjustsStack()) TopCPUSavedRegOff += RegSize; } @@ -407,7 +407,7 @@ unsigned StackSize = MFI->getStackSize(); // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->hasCalls()) return; + if (StackSize == 0 && !MFI->adjustsStack()) return; int FPOffset = MipsFI->getFPStackOffset(); int RAOffset = MipsFI->getRAStackOffset(); @@ -425,7 +425,7 @@ // Save the return address only if the function isnt a leaf one. // sw $ra, stack_loc($sp) - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); } @@ -477,7 +477,7 @@ // Restore the return address only if the function isnt a leaf one. // lw $ra, stack_loc($sp) - if (MFI->hasCalls()) { + if (MFI->adjustsStack()) { BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) .addImm(RAOffset).addReg(Mips::SP); } Modified: llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/PowerPC/PPCRegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -1009,7 +1009,7 @@ if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame MFI->setStackSize(0); Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -102,7 +102,7 @@ Offset += StackSize - TFI.getOffsetOfLocalArea(); // Skip the register save area if we generated the stack frame. - if (StackSize || MFI->hasCalls()) + if (StackSize || MFI->adjustsStack()) Offset -= TFI.getOffsetOfLocalArea(); return Offset; @@ -163,14 +163,14 @@ for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); - if (FFI->hasCalls()) + if (FFI->adjustsStack()) /* FIXME: function is varargs */ /* FIXME: function grabs RA */ /* FIXME: function calls eh_return */ MRI.setPhysRegUsed(SystemZ::R14D); if (HighFPRsUsed || - FFI->hasCalls() || + FFI->adjustsStack() || FFI->getObjectIndexEnd() != 0 || // Contains automatic variables FFI->hasVarSizedObjects() // Function calls dynamic alloca's /* FIXME: function is varargs */) @@ -234,7 +234,7 @@ DL = MBBI->getDebugLoc(); // adjust stack pointer: R15 -= numbytes - if (StackSize || MFI->hasCalls()) { + if (StackSize || MFI->adjustsStack()) { assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && "Invalid stack frame calculation!"); emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII); @@ -286,7 +286,7 @@ // During callee-saved restores emission stack frame was not yet finialized // (and thus - the stack size was unknown). Tune the offset having full stack // size in hands. - if (StackSize || MFI->hasCalls()) { + if (StackSize || MFI->adjustsStack()) { assert((MBBI->getOpcode() == SystemZ::MOV64rmm || MBBI->getOpcode() == SystemZ::MOV64rm) && "Expected to see callee-save register restore code"); Modified: llvm/trunk/lib/Target/TargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetMachine.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/TargetMachine.cpp (original) +++ llvm/trunk/lib/Target/TargetMachine.cpp Fri May 14 16:14:32 2010 @@ -277,7 +277,7 @@ return true; if (NoFramePointerElimNonLeaf) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->hasCalls(); + return MFI->adjustsStack(); } return false; } Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri May 14 16:14:32 2010 @@ -5272,7 +5272,7 @@ } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. - MFI->setHasCalls(true); + MFI->setAdjustsStack(true); SDValue Flag = Chain.getValue(1); return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=103802&r1=103801&r2=103802&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Fri May 14 16:14:32 2010 @@ -898,7 +898,7 @@ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; From isanbard at gmail.com Fri May 14 16:17:30 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 21:17:30 -0000 Subject: [llvm-commits] [llvm] r103804 - /llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Message-ID: <20100514211730.2078A312800A@llvm.org> Author: void Date: Fri May 14 16:17:29 2010 New Revision: 103804 URL: http://llvm.org/viewvc/llvm-project?rev=103804&view=rev Log: Fix comment. Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=103804&r1=103803&r2=103804&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Fri May 14 16:17:29 2010 @@ -898,7 +898,7 @@ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. + !MFI->adjustsStack() && // Doesn't adjust stack. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; From grosbach at apple.com Fri May 14 16:18:04 2010 From: grosbach at apple.com (Jim Grosbach) Date: Fri, 14 May 2010 21:18:04 -0000 Subject: [llvm-commits] [llvm] r103805 - /llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp Message-ID: <20100514211804.591ED312800A@llvm.org> Author: grosbach Date: Fri May 14 16:18:04 2010 New Revision: 103805 URL: http://llvm.org/viewvc/llvm-project?rev=103805&view=rev Log: add cmd line option to leave dbgvalues in during post-RA sceduling. Useful while debugging what's mishandled about them in the post-RA pass. Modified: llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp Modified: llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp?rev=103805&r1=103804&r2=103805&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp (original) +++ llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp Fri May 14 16:18:04 2010 @@ -80,6 +80,12 @@ cl::desc("Debug control MBBs that are scheduled"), cl::init(0), cl::Hidden); +static cl::opt +EnablePostRADbgValue("post-RA-dbg-value", + cl::desc("Enable processing of dbg_value in post-RA"), + cl::init(false), cl::Hidden); + + AntiDepBreaker::~AntiDepBreaker() { } namespace { @@ -269,11 +275,14 @@ // scheduler has some sort of problem with DebugValue instructions that // causes an assertion in LeaksContext.h to fail occasionally. Just // remove all those instructions for now. - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ) { - MachineInstr *MI = &*I++; - if (MI->isDebugValue()) - MI->eraseFromParent(); + if (!EnablePostRADbgValue) { + DEBUG(dbgs() << "*** Maintaining DbgValues in PostRAScheduler\n"); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ) { + MachineInstr *MI = &*I++; + if (MI->isDebugValue()) + MI->eraseFromParent(); + } } // Schedule each sequence of instructions not interrupted by a label From dpatel at apple.com Fri May 14 16:19:19 2010 From: dpatel at apple.com (Devang Patel) Date: Fri, 14 May 2010 14:19:19 -0700 Subject: [llvm-commits] [llvm] r103798 - in /llvm/trunk: include/llvm/Analysis/DebugInfo.h lib/Analysis/DebugInfo.cpp lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h In-Reply-To: <55F0D15A-D8CE-4CF5-B49F-02FEC333745B@apple.com> References: <20100514210135.79794312800A@llvm.org> <55F0D15A-D8CE-4CF5-B49F-02FEC333745B@apple.com> Message-ID: On May 14, 2010, at 2:15 PM, Evan Cheng wrote: > Hi Devang, > > Would it be better if the caller of CreateVariable() create the llvm.dbg.lv metadata instead? This solution seems to polluting the interface. Technically caller of CreateVariable(), the front end, does not know how variables' debug info is encoded in llvm IR. DebugInfo interface is responsible to encapsulate this detail. - Devang > > Evan > > On May 14, 2010, at 2:01 PM, Devang Patel wrote: > >> >> Modified: llvm/trunk/lib/Analysis/DebugInfo.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/DebugInfo.cpp?rev=103798&r1=103797&r2=103798&view=diff >> ============================================================================== >> --- llvm/trunk/lib/Analysis/DebugInfo.cpp (original) >> +++ llvm/trunk/lib/Analysis/DebugInfo.cpp Fri May 14 16:01:35 2010 >> @@ -1028,7 +1028,7 @@ >> StringRef Name, >> DIFile F, >> unsigned LineNo, >> - DIType Ty) { >> + DIType Ty, bool OptimizedBuild) { >> Value *Elts[] = { >> GetTagConstant(Tag), >> Context, >> @@ -1037,7 +1037,12 @@ >> ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), >> Ty, >> }; >> - return DIVariable(MDNode::get(VMContext, &Elts[0], 6)); >> + MDNode *Node = MDNode::get(VMContext, &Elts[0], 6); >> + if (OptimizedBuild) { >> + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv"); >> + NMD->addOperand(Node); >> + } >> + return DIVariable(Node); >> } >> >> >> >> Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=103798&r1=103797&r2=103798&view=diff >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) >> +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Fri May 14 16:01:35 2010 >> @@ -193,6 +193,9 @@ >> DbgVariable *getAbstractVariable() const { return AbstractVar; } >> void setDIE(DIE *D) { TheDIE = D; } >> DIE *getDIE() const { return TheDIE; } >> + bool hasLocation() { >> + return DbgValueMInsn || FrameIndex != ~0U; >> + } >> }; >> >> //===----------------------------------------------------------------------===// >> @@ -1632,16 +1635,18 @@ >> MachineLocation Location; >> unsigned FrameReg; >> const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); >> - int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), >> - FrameReg); >> - Location.set(FrameReg, Offset); >> - >> - if (VD.hasComplexAddress()) >> - addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); >> - else if (VD.isBlockByrefVariable()) >> - addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); >> - else >> - addAddress(VariableDie, dwarf::DW_AT_location, Location); >> + if (DV->hasLocation()) { >> + int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(), >> + FrameReg); >> + Location.set(FrameReg, Offset); >> + >> + if (VD.hasComplexAddress()) >> + addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); >> + else if (VD.isBlockByrefVariable()) >> + addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); >> + else >> + addAddress(VariableDie, dwarf::DW_AT_location, Location); >> + } >> } >> } >> >> @@ -2122,14 +2127,15 @@ >> } >> >> /// collectVariableInfo - Populate DbgScope entries with variables' info. >> -void DwarfDebug::collectVariableInfo() { >> +void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { >> const LLVMContext &Ctx = Asm->MF->getFunction()->getContext(); >> - >> + SmallPtrSet Processed; >> MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); >> for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(), >> VE = VMap.end(); VI != VE; ++VI) { >> const MDNode *Var = VI->first; >> if (!Var) continue; >> + Processed.insert(Var); >> DIVariable DV(Var); >> const std::pair &VP = VI->second; >> >> @@ -2184,12 +2190,27 @@ >> if (Scope == 0) >> continue; >> >> + Processed.insert(DV); >> DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL); >> DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable); >> DbgValueStartMap[MInsn] = RegVar; >> Scope->addVariable(RegVar); >> } >> } >> + >> + // Collect info for variables that were optimized out. >> + if (NamedMDNode *NMD = >> + MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { >> + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { >> + DIVariable DV(cast_or_null(NMD->getOperand(i))); >> + if (!Processed.insert(DV)) >> + continue; >> + DbgScope *Scope = DbgScopeMap.lookup(DV.getContext()); >> + if (Scope) >> + Scope->addVariable(new DbgVariable(DV, ~0U, NULL)); >> + } >> + } >> + >> } >> >> /// beginScope - Process beginning of a scope. >> @@ -2542,7 +2563,7 @@ >> if (!MMI->hasDebugInfo()) return; >> if (!extractScopeInformation()) return; >> >> - collectVariableInfo(); >> + collectVariableInfo(MF); >> >> FunctionBeginSym = Asm->GetTempSymbol("func_begin", >> Asm->getFunctionNumber()); >> >> Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h?rev=103798&r1=103797&r2=103798&view=diff >> ============================================================================== >> --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h (original) >> +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.h Fri May 14 16:01:35 2010 >> @@ -545,7 +545,7 @@ >> bool extractScopeInformation(); >> >> /// collectVariableInfo - Populate DbgScope entries with variables' info. >> - void collectVariableInfo(); >> + void collectVariableInfo(const MachineFunction *); >> >> public: >> //===--------------------------------------------------------------------===// >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > From grosbach at apple.com Fri May 14 16:19:48 2010 From: grosbach at apple.com (Jim Grosbach) Date: Fri, 14 May 2010 21:19:48 -0000 Subject: [llvm-commits] [llvm] r103806 - /llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp Message-ID: <20100514211948.4DB31312800A@llvm.org> Author: grosbach Date: Fri May 14 16:19:48 2010 New Revision: 103806 URL: http://llvm.org/viewvc/llvm-project?rev=103806&view=rev Log: 80 column and trailing whitespace cleanup Modified: llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp Modified: llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp?rev=103806&r1=103805&r2=103806&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp (original) +++ llvm/trunk/lib/CodeGen/PostRASchedulerList.cpp Fri May 14 16:19:48 2010 @@ -120,7 +120,7 @@ /// AvailableQueue - The priority queue to use for the available SUnits. /// LatencyPriorityQueue AvailableQueue; - + /// PendingQueue - This contains all of the instructions whose operands have /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands becomes available, the instruction is @@ -164,7 +164,7 @@ /// Schedule - Schedule the instruction range using list scheduling. /// void Schedule(); - + /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// @@ -185,7 +185,7 @@ void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); void ListScheduleTopDown(); void StartBlockForKills(MachineBasicBlock *BB); - + // ToggleKillFlag - Toggle a register operand kill flag. Other // adjustments may be made to the instruction if necessary. Return // true if the operand has been deleted, false if not. @@ -233,9 +233,10 @@ // Check for antidep breaking override... if (EnableAntiDepBreaking.getPosition() > 0) { - AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL : - (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL : - TargetSubtarget::ANTIDEP_NONE; + AntiDepMode = (EnableAntiDepBreaking == "all") ? + TargetSubtarget::ANTIDEP_ALL : + (EnableAntiDepBreaking == "critical") + ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE; } DEBUG(dbgs() << "PostRAScheduler\n"); @@ -246,10 +247,10 @@ ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); - AntiDepBreaker *ADB = + AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); @@ -319,7 +320,7 @@ return true; } - + /// StartBlock - Initialize register live-range state for scheduling in /// this block. /// @@ -340,10 +341,10 @@ BuildSchedGraph(AA); if (AntiDepBreak != NULL) { - unsigned Broken = + unsigned Broken = AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, InsertPosIndex); - + if (Broken != 0) { // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: @@ -356,7 +357,7 @@ EntrySU = SUnit(); ExitSU = SUnit(); BuildSchedGraph(AA); - + NumFixedAnti += Broken; } } @@ -434,7 +435,7 @@ MO.setIsKill(true); return false; } - + // If MO itself is live, clear the kill flag... if (KillIndices[MO.getReg()] != ~0u) { MO.setIsKill(false); @@ -473,7 +474,7 @@ BitVector ReservedRegs = TRI->getReservedRegs(MF); StartBlockForKills(MBB); - + // Examine block from end to start... unsigned Count = MBB->size(); for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); @@ -493,9 +494,9 @@ if (!MO.isDef()) continue; // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - + KillIndices[Reg] = ~0u; - + // Repeat for all subregs. for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { @@ -530,17 +531,17 @@ if (kill) kill = (KillIndices[Reg] == ~0u); } - + if (MO.isKill() != kill) { DEBUG(dbgs() << "Fixing " << MO << " in "); // Warning: ToggleKillFlag may invalidate MO. ToggleKillFlag(MI, MO); DEBUG(MI->dump()); } - + killedRegs.insert(Reg); } - + // Mark any used register (that is not using undef) and subregs as // now live... for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -550,7 +551,7 @@ if ((Reg == 0) || ReservedRegs.test(Reg)) continue; KillIndices[Reg] = Count; - + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) { KillIndices[*Subreg] = Count; @@ -582,7 +583,7 @@ // available. This is the max of the start time of all predecessors plus // their latencies. SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); - + // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) @@ -603,9 +604,9 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); - + Sequence.push_back(SU); - assert(CurCycle >= SU->getDepth() && + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); SU->setDepthToAtLeast(CurCycle); @@ -618,7 +619,7 @@ /// schedulers. void SchedulePostRATDList::ListScheduleTopDown() { unsigned CurCycle = 0; - + // We're scheduling top-down but we're visiting the regions in // bottom-up order, so we don't know the hazards at the start of a // region. So assume no hazards (this should usually be ok as most From grosbach at apple.com Fri May 14 16:20:46 2010 From: grosbach at apple.com (Jim Grosbach) Date: Fri, 14 May 2010 21:20:46 -0000 Subject: [llvm-commits] [llvm] r103807 - /llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp Message-ID: <20100514212046.EAFF0312800A@llvm.org> Author: grosbach Date: Fri May 14 16:20:46 2010 New Revision: 103807 URL: http://llvm.org/viewvc/llvm-project?rev=103807&view=rev Log: Remove trailing whitespace Modified: llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp Modified: llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp?rev=103807&r1=103806&r2=103807&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp (original) +++ llvm/trunk/lib/CodeGen/CriticalAntiDepBreaker.cpp Fri May 14 16:20:46 2010 @@ -26,7 +26,7 @@ using namespace llvm; CriticalAntiDepBreaker:: -CriticalAntiDepBreaker(MachineFunction& MFi) : +CriticalAntiDepBreaker(MachineFunction& MFi) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), @@ -172,7 +172,7 @@ unsigned Reg = MO.getReg(); if (Reg == 0) continue; const TargetRegisterClass *NewRC = 0; - + if (i < MI->getDesc().getNumOperands()) NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI); @@ -422,7 +422,7 @@ // breaking anti-dependence edges that aren't going to significantly // impact the overall schedule. There are a limited number of registers // and we want to save them for the important edges. - // + // // TODO: Instructions with multiple defs could have multiple // anti-dependencies. The current code here only knows how to break one // edge per instruction. Note that we'd have to be able to break all of From anton at korobeynikov.info Fri May 14 16:26:45 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sat, 15 May 2010 01:26:45 +0400 Subject: [llvm-commits] [llvm] r103802 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/ lib/CodeGen/AsmPrinter/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MBlaze/AsmPrin Message-ID: > N.B., several back-ends are using "HasCalls" as being synonymous for something > that adjusts the stack. This isn't 100% correct and should be looked into. I'm not quite sure for other backends, but systemz backend was correct. The semantics was precisely "has calls", the stack frame is always created for non-leaf functions in order to provide register save area for callees. x86 (win64) semantics is definitely the same - please switch to hasCalls() there as well, we need to create register save area regardless whether there are local stack variables or not. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From wendling at apple.com Fri May 14 16:27:51 2010 From: wendling at apple.com (Bill Wendling) Date: Fri, 14 May 2010 14:27:51 -0700 Subject: [llvm-commits] [llvm] r103802 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/ lib/CodeGen/AsmPrinter/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MBlaze/AsmPrinter/ lib/Target/Mips/ lib/Target/Mips/AsmPrinter/ lib/Target/PowerPC/ lib/Target/SystemZ/ lib/Target/X86/ In-Reply-To: <20100514211433.3D6BC312800A@llvm.org> References: <20100514211433.3D6BC312800A@llvm.org> Message-ID: On May 14, 2010, at 2:14 PM, Bill Wendling wrote: > Author: void > Date: Fri May 14 16:14:32 2010 > New Revision: 103802 > > URL: http://llvm.org/viewvc/llvm-project?rev=103802&view=rev > Log: > Rename "HasCalls" in MachineFrameInfo to "AdjustsStack" to better describe what > the variable actually tracks. > > N.B., several back-ends are using "HasCalls" as being synonymous for something > that adjusts the stack. This isn't 100% correct and should be looked into. > > [snip...] > Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=103802&r1=103801&r2=103802&view=diff > ============================================================================== > --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original) > +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri May 14 16:14:32 2010 > @@ -28,6 +28,7 @@ > #include "llvm/CodeGen/FastISel.h" > #include "llvm/CodeGen/GCStrategy.h" > #include "llvm/CodeGen/GCMetadata.h" > +#include "llvm/CodeGen/MachineFrameInfo.h" > #include "llvm/CodeGen/MachineFunction.h" > #include "llvm/CodeGen/MachineInstrBuilder.h" > #include "llvm/CodeGen/MachineModuleInfo.h" > @@ -605,6 +606,19 @@ > delete Scheduler; > } > > + // Determine if there are any calls in this machine function. > + MachineFrameInfo *MFI = MF->getFrameInfo(); > + if (!MFI->hasCalls()) { > + for (MachineBasicBlock::iterator > + I = BB->begin(), E = BB->end(); I != E; ++I) { > + const TargetInstrDesc &TID = TM.getInstrInfo()->get(I->getOpcode()); > + if (I->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { > + MFI->setHasCalls(true); > + break; > + } > + } > + } > + > // Free the SelectionDAG state, now that we're finished with it. > CurDAG->clear(); > > This slipped in there. I'm going to resurrect the HasCalls variable to indicate what its name suggests. Stay tuned! -bw From anton at korobeynikov.info Fri May 14 16:28:05 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sat, 15 May 2010 01:28:05 +0400 Subject: [llvm-commits] [llvm] r103804 - /llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp In-Reply-To: <20100514211730.2078A312800A@llvm.org> References: <20100514211730.2078A312800A@llvm.org> Message-ID: > Fix comment. The comment was correct. Red zone is created for leaf functions only (= no calls), please revert. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From isanbard at gmail.com Fri May 14 16:28:25 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 21:28:25 -0000 Subject: [llvm-commits] [llvm] r103808 - in /llvm/trunk/lib/Target: TargetMachine.cpp X86/X86RegisterInfo.cpp Message-ID: <20100514212825.1921E312800A@llvm.org> Author: void Date: Fri May 14 16:28:24 2010 New Revision: 103808 URL: http://llvm.org/viewvc/llvm-project?rev=103808&view=rev Log: Revert r103804. The comment is correct. Modified: llvm/trunk/lib/Target/TargetMachine.cpp llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Modified: llvm/trunk/lib/Target/TargetMachine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetMachine.cpp?rev=103808&r1=103807&r2=103808&view=diff ============================================================================== --- llvm/trunk/lib/Target/TargetMachine.cpp (original) +++ llvm/trunk/lib/Target/TargetMachine.cpp Fri May 14 16:28:24 2010 @@ -277,7 +277,7 @@ return true; if (NoFramePointerElimNonLeaf) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->adjustsStack(); + return MFI->hasCalls(); } return false; } Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=103808&r1=103807&r2=103808&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Fri May 14 16:28:24 2010 @@ -898,7 +898,7 @@ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // Doesn't adjust stack. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; From bob.wilson at apple.com Fri May 14 16:31:28 2010 From: bob.wilson at apple.com (Bob Wilson) Date: Fri, 14 May 2010 21:31:28 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103811 - in /llvm-gcc-4.2/trunk/gcc: config.gcc config/arm/arm-protos.h config/arm/arm.c config/arm/arm_neon.h config/arm/arm_neon_gcc.h config/arm/arm_neon_std.h config/arm/neon-gen-std.ml config/arm/neon-gen.ml Message-ID: <20100514213129.1680B312800A@llvm.org> Author: bwilson Date: Fri May 14 16:31:28 2010 New Revision: 103811 URL: http://llvm.org/viewvc/llvm-project?rev=103811&view=rev Log: llvm-gcc's implementation of Neon types and intrinsics, which follows ARM's specifications, has been causing problems for porting code from gcc. GCC's implementation defines the Neon types as plain vector types so that you can freely intermix builtin vector operators with Neon intrinsics. The ARM standard Neon types are "containerized vectors", i.e., structs, so you cannot use them with the usual vector operators (+, -, *, etc.). This change adds an optional gcc-compatibility mode for the Neon types and intrinsics to support code written in that style of intermixing intrinsics and operators. Before going into details, I also want to mention that the plan is for Clang to define overloaded intrinsics that will accept either ARM's containerized types or plain vector types. That is not yet implemented, but once it is available, Clang users can make a choice of declaring Neon vectors with either kind of type. If they go with the standard containerized vector types, they'll get strict conformance to ARM's specifications but will not be able to use vector operators with those values. If they go with plain vector types, they'll lose the standard conformance but be able to mix-and-match intrinsics with operators. This patch is intended as a step in the direction of the Clang solution, but since llvm-gcc does not support function overloading in C (besides the fact that it implements the Neon intrinsics as preprocessor macros for other reasons), llvm-gcc users will have to consistently use one style or the other. If they define ARM_NEON_GCC_COMPATIBILITY before including , then the Neon intrinsics will be defined to work on plain vector types. Otherwise, they'll continue to get the standard versions. The patch brings back some of the awful arm_mangle_types code that fakes the C++ mangling for Neon types to make them be mangled as if they were the containerized vector structs. I've addressed the previous problems with that code by defining the Neon vector types as built-in types so that the mangler can recognize them by their unique type nodes. The motivation for resurrecting that code is that we don't want binary compatibility problems between llvm-gcc with ARM_NEON_GCC_COMPATIBILITY and Clang. However, the expected use of the ARM_NEON_GCC_COMPATIBILITY mode is that variables will be declared with plain vector types. That usage should continue to work without changes in Clang (where ARM_NEON_GCC_COMPATIBILITY will not be needed). Likewise, code that does not use that mode will continue to work unmodified in Clang. Code using the standard Neon type names when ARM_NEON_GCC_COMPATIBILITY is defined may require changes when moving to Clang, but those are basically the same changes that would otherwise be required when porting from gcc to llvm-gcc. Down to the details.... There are now 2 versions of the neon-gen.ml generator for arm_neon.h, and 2 versions of the arm_neon.h output. The neon-gen-std.ml version generates arm_neon_std.h, which is the version using the standard containerized vector types. The neon-gen.ml version generates arm_neon_gcc.h, which is the gcc-compatible version. Neither of those headers should ever be used directly, since they will not be available with clang. The arm_neon.h header is now a simple wrapper that selects between the 2 versions based on whether ARM_NEON_GCC_COMPATIBILITY is defined. Added: llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_gcc.h llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_std.h - copied, changed from r103724, llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen-std.ml Modified: llvm-gcc-4.2/trunk/gcc/config.gcc llvm-gcc-4.2/trunk/gcc/config/arm/arm-protos.h llvm-gcc-4.2/trunk/gcc/config/arm/arm.c llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml Modified: llvm-gcc-4.2/trunk/gcc/config.gcc URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config.gcc?rev=103811&r1=103810&r2=103811&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config.gcc (original) +++ llvm-gcc-4.2/trunk/gcc/config.gcc Fri May 14 16:31:28 2010 @@ -264,7 +264,7 @@ arm*-*-*) cpu_type=arm # APPLE LOCAL ARM v7 support, merge from Codesourcery. - extra_headers="mmintrin.h arm_neon.h" + extra_headers="mmintrin.h arm_neon.h arm_neon_std.h arm_neon_gcc.h" # LLVM LOCAL begin out_cxx_file=arm/llvm-arm.cpp # LLVM LOCAL end @@ -822,7 +822,7 @@ extra_options="${extra_options} arm/darwin.opt" tm_file="${tm_file} arm/darwin.h" tmake_file="${tmake_file} arm/t-slibgcc-iphoneos" - extra_headers="arm_neon.h" + extra_headers="arm_neon.h arm_neon_std.h arm_neon_gcc.h" ;; # APPLE LOCAL end ARM darwin target arm*-wince-pe*) Modified: llvm-gcc-4.2/trunk/gcc/config/arm/arm-protos.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm-protos.h?rev=103811&r1=103810&r2=103811&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm-protos.h (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm-protos.h Fri May 14 16:31:28 2010 @@ -258,8 +258,10 @@ /* APPLE LOCAL 5946347 ms_struct support */ extern int arm_field_ms_struct_align (tree); -/* LLVM LOCAL pr5037 removed arm_mangle_type */ +/* APPLE LOCAL begin v7 support. Merge from Codesourcery */ +extern const char *arm_mangle_type (tree); +/* APPLE LOCAL end v7 support. Merge from Codesourcery */ /* APPLE LOCAL v7 support. Fix compact switch tables */ extern void arm_asm_output_addr_diff_vec (FILE *file, rtx LABEL, rtx BODY); Modified: llvm-gcc-4.2/trunk/gcc/config/arm/arm.c URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm.c?rev=103811&r1=103810&r2=103811&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm.c (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm.c Fri May 14 16:31:28 2010 @@ -146,7 +146,6 @@ /* LLVM LOCAL begin */ static tree arm_type_promotes_to(tree); static bool arm_is_fp16(tree); -static const char * arm_mangle_type (tree type); /* LLVM LOCAL end */ static int arm_comp_type_attributes (tree, tree); static void arm_set_default_type_attributes (tree); @@ -16775,8 +16774,48 @@ /* LLVM LOCAL pr5037 removed make_neon_float_type */ -/* LLVM LOCAL begin multi-vector types */ #ifdef ENABLE_LLVM +/* LLVM LOCAL begin use builtin vector types for easier mangling */ +/* Create a new vector type node for a Neon vector. This is just like + make_vector_type() but it does not enter the new type in the hash table. + The whole point of having these types built-in is to make them unique so + that the mangling function can identify them. */ + +static tree +build_neonvec_type (tree innertype, int nunits) +{ + tree t; + + t = make_node (VECTOR_TYPE); + TREE_TYPE (t) = TYPE_MAIN_VARIANT (innertype); + SET_TYPE_VECTOR_SUBPARTS (t, nunits); + TYPE_MODE (t) = VOIDmode; + TYPE_READONLY (t) = TYPE_READONLY (innertype); + TYPE_VOLATILE (t) = TYPE_VOLATILE (innertype); + + layout_type (t); + + { + tree index = build_int_cst (NULL_TREE, nunits - 1); + tree array = build_array_type (innertype, build_index_type (index)); + tree rt = make_node (RECORD_TYPE); + + TYPE_FIELDS (rt) = build_decl (FIELD_DECL, get_identifier ("f"), array); + DECL_CONTEXT (TYPE_FIELDS (rt)) = rt; + layout_type (rt); + TYPE_DEBUG_REPRESENTATION_TYPE (t) = rt; + /* In dwarfout.c, type lookup uses TYPE_UID numbers. We want to output + the representation type, and we want to find that die when looking up + the vector type. This is most easily achieved by making the TYPE_UID + numbers equal. */ + TYPE_UID (rt) = TYPE_UID (t); + } + + return t; +} +/* LLVM LOCAL end use builtin vector types for easier mangling */ + +/* LLVM LOCAL begin multi-vector types */ /* Create a new builtin struct type containing NUMVECS fields (where NUMVECS is in the range from 1 to 4) of type VECTYPE. */ static tree @@ -16810,6 +16849,57 @@ #endif /* ENABLE_LLVM */ /* LLVM LOCAL end multi-vector types */ +/* LLVM LOCAL begin use builtin vector types for easier mangling */ +typedef struct +{ + tree neonvec_type; + const char *aapcs_name; +} arm_mangle_map_entry; + +enum neonvec_types { + neon_int8x8_type, + neon_int16x4_type, + neon_int32x2_type, + neon_int64x1_type, + neon_float32x2_type, + neon_poly8x8_type, + neon_poly16x4_type, + neon_uint8x8_type, + neon_uint16x4_type, + neon_uint32x2_type, + neon_uint64x1_type, + neon_int8x16_type, + neon_int16x8_type, + neon_int32x4_type, + neon_int64x2_type, + neon_float32x4_type, + neon_poly8x16_type, + neon_poly16x8_type, + neon_uint8x16_type, + neon_uint16x8_type, + neon_uint32x4_type, + neon_uint64x2_type, + neon_LAST_type +}; + +static arm_mangle_map_entry arm_mangle_map[neon_LAST_type]; + +/* Create a unique type node for a Neon vector type and enter it in the + arm_mangle_map along with the corresponding mangled name. */ +static void +define_neonvec_type (tree elt_type, unsigned num_elts, + const char *type_name, const char *mangling, + enum neonvec_types neonvec) +{ + tree neon_type_node = build_neonvec_type(elt_type, num_elts); + (*lang_hooks.types.register_builtin_type) (neon_type_node, type_name); + + arm_mangle_map[neonvec].neonvec_type = neon_type_node; + arm_mangle_map[neonvec].aapcs_name = mangling; +} + +/* LLVM LOCAL end use builtin vector types for easier mangling */ + static void arm_init_neon_builtins (void) { @@ -17880,6 +17970,36 @@ (*lang_hooks.types.register_builtin_type) (intUDI_type_node, "__builtin_neon_udi"); + /* LLVM LOCAL begin use builtin vector types for easier mangling */ +#define DEFINE_NEONVEC_TYPE(VECT, ELTT, NUMELTS, MANGLING) \ + define_neonvec_type (ELTT, NUMELTS, "__neon_" #VECT "_t", \ + MANGLING, neon_##VECT##_type) + + DEFINE_NEONVEC_TYPE(int8x8, intQI_type_node, 8, "15__simd64_int8_t"); + DEFINE_NEONVEC_TYPE(int16x4, intHI_type_node, 4, "16__simd64_int16_t"); + DEFINE_NEONVEC_TYPE(int32x2, intSI_type_node, 2, "16__simd64_int32_t"); + DEFINE_NEONVEC_TYPE(int64x1, intDI_type_node, 1, "16__simd64_int64_t"); + DEFINE_NEONVEC_TYPE(float32x2, float_type_node, 2, "18__simd64_float32_t"); + DEFINE_NEONVEC_TYPE(poly8x8, intQI_type_node, 8, "16__simd64_poly8_t"); + DEFINE_NEONVEC_TYPE(poly16x4, intHI_type_node, 4, "17__simd64_poly16_t"); + DEFINE_NEONVEC_TYPE(uint8x8, intUQI_type_node, 8, "16__simd64_uint8_t"); + DEFINE_NEONVEC_TYPE(uint16x4, intUHI_type_node, 4, "17__simd64_uint16_t"); + DEFINE_NEONVEC_TYPE(uint32x2, intUSI_type_node, 2, "17__simd64_uint32_t"); + DEFINE_NEONVEC_TYPE(uint64x1, intUDI_type_node, 1, "17__simd64_uint64_t"); + + DEFINE_NEONVEC_TYPE(int8x16, intQI_type_node, 16, "16__simd128_int8_t"); + DEFINE_NEONVEC_TYPE(int16x8, intHI_type_node, 8, "17__simd128_int16_t"); + DEFINE_NEONVEC_TYPE(int32x4, intSI_type_node, 4, "17__simd128_int32_t"); + DEFINE_NEONVEC_TYPE(int64x2, intDI_type_node, 2, "17__simd128_int64_t"); + DEFINE_NEONVEC_TYPE(float32x4, float_type_node, 4, "19__simd128_float32_t"); + DEFINE_NEONVEC_TYPE(poly8x16, intQI_type_node, 16, "17__simd128_poly8_t"); + DEFINE_NEONVEC_TYPE(poly16x8, intHI_type_node, 8, "18__simd128_poly16_t"); + DEFINE_NEONVEC_TYPE(uint8x16, intUQI_type_node, 16, "17__simd128_uint8_t"); + DEFINE_NEONVEC_TYPE(uint16x8, intUHI_type_node, 8, "18__simd128_uint16_t"); + DEFINE_NEONVEC_TYPE(uint32x4, intUSI_type_node, 4, "18__simd128_uint32_t"); + DEFINE_NEONVEC_TYPE(uint64x2, intUDI_type_node, 2, "18__simd128_uint64_t"); + /* LLVM LOCAL end use builtin vector types for easier mangling */ + /* LLVM LOCAL begin multi-vector types */ (*lang_hooks.types.register_builtin_type) (V8QI2_type_node, "__neon_int8x8x2_t"); @@ -23904,18 +24024,41 @@ } /* APPLE LOCAL end v7 support. Merge from mainline */ -/* LLVM LOCAL begin */ -static const char * +/* A table and a function to perform ARM-specific name mangling for + NEON vector types in order to conform to the AAPCS (see "Procedure + Call Standard for the ARM Architecture", Appendix A). To qualify + for emission with the mangled names defined in that document, a + vector type must not only be of the correct mode but also be + composed of NEON vector element types (e.g. __builtin_neon_qi). */ +/* LLVM LOCAL moved arm_mangle_map declarations earlier in this file */ +const char * arm_mangle_type (tree type) { + /* LLVM LOCAL */ + unsigned pos; + + /* LLVM LOCAL begin half-float */ if (arm_is_fp16(type)) return "Dh"; + /* LLVM LOCAL end half-float */ + + if (TREE_CODE (type) != VECTOR_TYPE) + return NULL; + + /* LLVM LOCAL begin use builtin vector types for easier mangling */ + /* Check if this type matches any of the unique vector type nodes in the + arm_mangle_map table. */ + for (pos = 0; pos < neon_LAST_type; ++pos) + { + if (type == arm_mangle_map[pos].neonvec_type) + return arm_mangle_map[pos].aapcs_name; + } + /* LLVM LOCAL end use builtin vector types for easier mangling */ /* Use the default mangling for unrecognized (possibly user-defined) vector types. */ return NULL; } -/* LLVM LOCAL end */ void arm_asm_output_addr_diff_vec (FILE *file, rtx label, rtx body) Added: llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h?rev=103811&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h (added) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h Fri May 14 16:31:28 2010 @@ -0,0 +1,38 @@ +/* ARM NEON intrinsics include file. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the + Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* llvm-gcc provides two different versions of the NEON types and + intrinsics. The default versions follow the standard definitions + specified by ARM. For backward compatibility with GCC, alternate + versions are provided where the intrinsics will accept arguments with + GCC's vector types instead of the "containerized vector" types + specified by ARM. Define the ARM_NEON_GCC_COMPATIBILITY macro to + select these alternate versions of the NEON types and intrinsics. */ + +#ifdef ARM_NEON_GCC_COMPATIBILITY +#include +#else +#include +#endif Added: llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_gcc.h URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_gcc.h?rev=103811&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_gcc.h (added) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_gcc.h Fri May 14 16:31:28 2010 @@ -0,0 +1,7206 @@ +/* Internal definitions for GCC-compatible NEON types and intrinsics. + Do not include this file directly; please use and define + the ARM_NEON_GCC_COMPATIBILITY macro. + + This file is generated automatically using neon-gen.ml. + Please do not edit manually. + + Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 2, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the + Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +#ifndef _GCC_ARM_NEON_H +#define _GCC_ARM_NEON_H 1 + +#ifndef __ARM_NEON__ +#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h +#else + +#ifdef __cplusplus +extern "C" { +#define __neon_ptr_cast(ty, ptr) reinterpret_cast(ptr) +#else +#define __neon_ptr_cast(ty, ptr) (ty)(ptr) +#endif + +#include + +typedef __builtin_neon_sf float32_t; +typedef __builtin_neon_poly8 poly8_t; +typedef __builtin_neon_poly16 poly16_t; + +typedef __neon_int8x8_t int8x8_t; +typedef __neon_int16x4_t int16x4_t; +typedef __neon_int32x2_t int32x2_t; +typedef __neon_int64x1_t int64x1_t; +typedef __neon_float32x2_t float32x2_t; +typedef __neon_poly8x8_t poly8x8_t; +typedef __neon_poly16x4_t poly16x4_t; +typedef __neon_uint8x8_t uint8x8_t; +typedef __neon_uint16x4_t uint16x4_t; +typedef __neon_uint32x2_t uint32x2_t; +typedef __neon_uint64x1_t uint64x1_t; +typedef __neon_int8x16_t int8x16_t; +typedef __neon_int16x8_t int16x8_t; +typedef __neon_int32x4_t int32x4_t; +typedef __neon_int64x2_t int64x2_t; +typedef __neon_float32x4_t float32x4_t; +typedef __neon_poly8x16_t poly8x16_t; +typedef __neon_poly16x8_t poly16x8_t; +typedef __neon_uint8x16_t uint8x16_t; +typedef __neon_uint16x8_t uint16x8_t; +typedef __neon_uint32x4_t uint32x4_t; +typedef __neon_uint64x2_t uint64x2_t; + +typedef struct int8x8x2_t +{ + int8x8_t val[2]; +} int8x8x2_t; + +typedef struct int8x16x2_t +{ + int8x16_t val[2]; +} int8x16x2_t; + +typedef struct int16x4x2_t +{ + int16x4_t val[2]; +} int16x4x2_t; + +typedef struct int16x8x2_t +{ + int16x8_t val[2]; +} int16x8x2_t; + +typedef struct int32x2x2_t +{ + int32x2_t val[2]; +} int32x2x2_t; + +typedef struct int32x4x2_t +{ + int32x4_t val[2]; +} int32x4x2_t; + +typedef struct int64x1x2_t +{ + int64x1_t val[2]; +} int64x1x2_t; + +typedef struct int64x2x2_t +{ + int64x2_t val[2]; +} int64x2x2_t; + +typedef struct uint8x8x2_t +{ + uint8x8_t val[2]; +} uint8x8x2_t; + +typedef struct uint8x16x2_t +{ + uint8x16_t val[2]; +} uint8x16x2_t; + +typedef struct uint16x4x2_t +{ + uint16x4_t val[2]; +} uint16x4x2_t; + +typedef struct uint16x8x2_t +{ + uint16x8_t val[2]; +} uint16x8x2_t; + +typedef struct uint32x2x2_t +{ + uint32x2_t val[2]; +} uint32x2x2_t; + +typedef struct uint32x4x2_t +{ + uint32x4_t val[2]; +} uint32x4x2_t; + +typedef struct uint64x1x2_t +{ + uint64x1_t val[2]; +} uint64x1x2_t; + +typedef struct uint64x2x2_t +{ + uint64x2_t val[2]; +} uint64x2x2_t; + +typedef struct float32x2x2_t +{ + float32x2_t val[2]; +} float32x2x2_t; + +typedef struct float32x4x2_t +{ + float32x4_t val[2]; +} float32x4x2_t; + +typedef struct poly8x8x2_t +{ + poly8x8_t val[2]; +} poly8x8x2_t; + +typedef struct poly8x16x2_t +{ + poly8x16_t val[2]; +} poly8x16x2_t; + +typedef struct poly16x4x2_t +{ + poly16x4_t val[2]; +} poly16x4x2_t; + +typedef struct poly16x8x2_t +{ + poly16x8_t val[2]; +} poly16x8x2_t; + +typedef struct int8x8x3_t +{ + int8x8_t val[3]; +} int8x8x3_t; + +typedef struct int8x16x3_t +{ + int8x16_t val[3]; +} int8x16x3_t; + +typedef struct int16x4x3_t +{ + int16x4_t val[3]; +} int16x4x3_t; + +typedef struct int16x8x3_t +{ + int16x8_t val[3]; +} int16x8x3_t; + +typedef struct int32x2x3_t +{ + int32x2_t val[3]; +} int32x2x3_t; + +typedef struct int32x4x3_t +{ + int32x4_t val[3]; +} int32x4x3_t; + +typedef struct int64x1x3_t +{ + int64x1_t val[3]; +} int64x1x3_t; + +typedef struct int64x2x3_t +{ + int64x2_t val[3]; +} int64x2x3_t; + +typedef struct uint8x8x3_t +{ + uint8x8_t val[3]; +} uint8x8x3_t; + +typedef struct uint8x16x3_t +{ + uint8x16_t val[3]; +} uint8x16x3_t; + +typedef struct uint16x4x3_t +{ + uint16x4_t val[3]; +} uint16x4x3_t; + +typedef struct uint16x8x3_t +{ + uint16x8_t val[3]; +} uint16x8x3_t; + +typedef struct uint32x2x3_t +{ + uint32x2_t val[3]; +} uint32x2x3_t; + +typedef struct uint32x4x3_t +{ + uint32x4_t val[3]; +} uint32x4x3_t; + +typedef struct uint64x1x3_t +{ + uint64x1_t val[3]; +} uint64x1x3_t; + +typedef struct uint64x2x3_t +{ + uint64x2_t val[3]; +} uint64x2x3_t; + +typedef struct float32x2x3_t +{ + float32x2_t val[3]; +} float32x2x3_t; + +typedef struct float32x4x3_t +{ + float32x4_t val[3]; +} float32x4x3_t; + +typedef struct poly8x8x3_t +{ + poly8x8_t val[3]; +} poly8x8x3_t; + +typedef struct poly8x16x3_t +{ + poly8x16_t val[3]; +} poly8x16x3_t; + +typedef struct poly16x4x3_t +{ + poly16x4_t val[3]; +} poly16x4x3_t; + +typedef struct poly16x8x3_t +{ + poly16x8_t val[3]; +} poly16x8x3_t; + +typedef struct int8x8x4_t +{ + int8x8_t val[4]; +} int8x8x4_t; + +typedef struct int8x16x4_t +{ + int8x16_t val[4]; +} int8x16x4_t; + +typedef struct int16x4x4_t +{ + int16x4_t val[4]; +} int16x4x4_t; + +typedef struct int16x8x4_t +{ + int16x8_t val[4]; +} int16x8x4_t; + +typedef struct int32x2x4_t +{ + int32x2_t val[4]; +} int32x2x4_t; + +typedef struct int32x4x4_t +{ + int32x4_t val[4]; +} int32x4x4_t; + +typedef struct int64x1x4_t +{ + int64x1_t val[4]; +} int64x1x4_t; + +typedef struct int64x2x4_t +{ + int64x2_t val[4]; +} int64x2x4_t; + +typedef struct uint8x8x4_t +{ + uint8x8_t val[4]; +} uint8x8x4_t; + +typedef struct uint8x16x4_t +{ + uint8x16_t val[4]; +} uint8x16x4_t; + +typedef struct uint16x4x4_t +{ + uint16x4_t val[4]; +} uint16x4x4_t; + +typedef struct uint16x8x4_t +{ + uint16x8_t val[4]; +} uint16x8x4_t; + +typedef struct uint32x2x4_t +{ + uint32x2_t val[4]; +} uint32x2x4_t; + +typedef struct uint32x4x4_t +{ + uint32x4_t val[4]; +} uint32x4x4_t; + +typedef struct uint64x1x4_t +{ + uint64x1_t val[4]; +} uint64x1x4_t; + +typedef struct uint64x2x4_t +{ + uint64x2_t val[4]; +} uint64x2x4_t; + +typedef struct float32x2x4_t +{ + float32x2_t val[4]; +} float32x2x4_t; + +typedef struct float32x4x4_t +{ + float32x4_t val[4]; +} float32x4x4_t; + +typedef struct poly8x8x4_t +{ + poly8x8_t val[4]; +} poly8x8x4_t; + +typedef struct poly8x16x4_t +{ + poly8x16_t val[4]; +} poly8x16x4_t; + +typedef struct poly16x4x4_t +{ + poly16x4_t val[4]; +} poly16x4x4_t; + +typedef struct poly16x8x4_t +{ + poly16x8_t val[4]; +} poly16x8x4_t; + + +#define vadd_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1) + +#define vadd_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1) + +#define vadd_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1) + +#define vadd_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vaddv1di (__a, __b, 1) + +#define vadd_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 5) + +#define vadd_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vadd_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vadd_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vadd_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vaddv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vaddq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1) + +#define vaddq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1) + +#define vaddq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1) + +#define vaddq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1) + +#define vaddq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 5) + +#define vaddq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vaddq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vaddq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vaddq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vaddl_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vaddlv8qi (__a, __b, 1) + +#define vaddl_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vaddlv4hi (__a, __b, 1) + +#define vaddl_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vaddlv2si (__a, __b, 1) + +#define vaddl_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vaddlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vaddl_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vaddlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vaddl_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vaddlv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vaddw_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vaddwv8qi (__a, __b, 1) + +#define vaddw_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vaddwv4hi (__a, __b, 1) + +#define vaddw_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vaddwv2si (__a, __b, 1) + +#define vaddw_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vaddwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0) + +#define vaddw_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vaddwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0) + +#define vaddw_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vaddwv2si ((int64x2_t) __a, (int32x2_t) __b, 0) + +#define vhadd_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 1) + +#define vhadd_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 1) + +#define vhadd_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 1) + +#define vhadd_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vhadd_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vhadd_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vhaddq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 1) + +#define vhaddq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 1) + +#define vhaddq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 1) + +#define vhaddq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vhaddq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vhaddq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vrhadd_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vhaddv8qi (__a, __b, 3) + +#define vrhadd_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vhaddv4hi (__a, __b, 3) + +#define vrhadd_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vhaddv2si (__a, __b, 3) + +#define vrhadd_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vhaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 2) + +#define vrhadd_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vhaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 2) + +#define vrhadd_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vhaddv2si ((int32x2_t) __a, (int32x2_t) __b, 2) + +#define vrhaddq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vhaddv16qi (__a, __b, 3) + +#define vrhaddq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vhaddv8hi (__a, __b, 3) + +#define vrhaddq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vhaddv4si (__a, __b, 3) + +#define vrhaddq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vhaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 2) + +#define vrhaddq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vhaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 2) + +#define vrhaddq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vhaddv4si ((int32x4_t) __a, (int32x4_t) __b, 2) + +#define vqadd_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vqaddv8qi (__a, __b, 1) + +#define vqadd_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqaddv4hi (__a, __b, 1) + +#define vqadd_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqaddv2si (__a, __b, 1) + +#define vqadd_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vqaddv1di (__a, __b, 1) + +#define vqadd_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vqaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vqadd_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vqaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vqadd_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vqaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vqadd_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vqaddv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vqaddq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vqaddv16qi (__a, __b, 1) + +#define vqaddq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqaddv8hi (__a, __b, 1) + +#define vqaddq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqaddv4si (__a, __b, 1) + +#define vqaddq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vqaddv2di (__a, __b, 1) + +#define vqaddq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vqaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vqaddq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vqaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vqaddq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vqaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vqaddq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vqaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vaddhn_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 1) + +#define vaddhn_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 1) + +#define vaddhn_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 1) + +#define vaddhn_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vaddhn_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vaddhn_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vraddhn_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b, 3) + +#define vraddhn_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b, 3) + +#define vraddhn_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b, 3) + +#define vraddhn_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 2) + +#define vraddhn_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b, 2) + +#define vraddhn_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b, 2) + +#define vmul_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1) + +#define vmul_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1) + +#define vmul_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1) + +#define vmul_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 5) + +#define vmul_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vmul_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vmul_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vmul_p8(__a, __b) \ + (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 4) + +#define vmulq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1) + +#define vmulq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1) + +#define vmulq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1) + +#define vmulq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 5) + +#define vmulq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vmulq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vmulq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vmulq_p8(__a, __b) \ + (poly8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 4) + +#define vqdmulh_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 1) + +#define vqdmulh_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 1) + +#define vqdmulhq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 1) + +#define vqdmulhq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 1) + +#define vqrdmulh_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b, 3) + +#define vqrdmulh_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b, 3) + +#define vqrdmulhq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b, 3) + +#define vqrdmulhq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b, 3) + +#define vmull_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vmullv8qi (__a, __b, 1) + +#define vmull_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vmullv4hi (__a, __b, 1) + +#define vmull_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vmullv2si (__a, __b, 1) + +#define vmull_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vmull_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vmullv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vmull_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vmullv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vmull_p8(__a, __b) \ + (poly16x8_t)__builtin_neon_vmullv8qi ((int8x8_t) __a, (int8x8_t) __b, 4) + +#define vqdmull_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b, 1) + +#define vqdmull_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b, 1) + +#define vmla_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c, 1) + +#define vmla_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c, 1) + +#define vmla_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c, 1) + +#define vmla_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c, 5) + +#define vmla_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0) + +#define vmla_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0) + +#define vmla_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0) + +#define vmlaq_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c, 1) + +#define vmlaq_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c, 1) + +#define vmlaq_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c, 1) + +#define vmlaq_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c, 5) + +#define vmlaq_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0) + +#define vmlaq_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0) + +#define vmlaq_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0) + +#define vmlal_s8(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmlalv8qi (__a, __b, __c, 1) + +#define vmlal_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmlalv4hi (__a, __b, __c, 1) + +#define vmlal_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vmlalv2si (__a, __b, __c, 1) + +#define vmlal_u8(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmlalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0) + +#define vmlal_u16(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmlalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0) + +#define vmlal_u32(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vmlalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0) + +#define vqdmlal_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c, 1) + +#define vqdmlal_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c, 1) + +#define vmls_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c, 1) + +#define vmls_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c, 1) + +#define vmls_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c, 1) + +#define vmls_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c, 5) + +#define vmls_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0) + +#define vmls_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0) + +#define vmls_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0) + +#define vmlsq_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c, 1) + +#define vmlsq_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c, 1) + +#define vmlsq_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c, 1) + +#define vmlsq_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c, 5) + +#define vmlsq_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0) + +#define vmlsq_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0) + +#define vmlsq_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0) + +#define vmlsl_s8(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmlslv8qi (__a, __b, __c, 1) + +#define vmlsl_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmlslv4hi (__a, __b, __c, 1) + +#define vmlsl_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vmlslv2si (__a, __b, __c, 1) + +#define vmlsl_u8(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmlslv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0) + +#define vmlsl_u16(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmlslv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0) + +#define vmlsl_u32(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vmlslv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0) + +#define vqdmlsl_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c, 1) + +#define vqdmlsl_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1) + +#define vsub_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1) + +#define vsub_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1) + +#define vsub_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1) + +#define vsub_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vsubv1di (__a, __b, 1) + +#define vsub_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 5) + +#define vsub_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vsub_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vsub_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vsub_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vsubv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vsubq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1) + +#define vsubq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1) + +#define vsubq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1) + +#define vsubq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1) + +#define vsubq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 5) + +#define vsubq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vsubq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vsubq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vsubq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vsubl_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vsublv8qi (__a, __b, 1) + +#define vsubl_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vsublv4hi (__a, __b, 1) + +#define vsubl_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vsublv2si (__a, __b, 1) + +#define vsubl_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vsublv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vsubl_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vsublv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vsubl_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vsublv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vsubw_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vsubwv8qi (__a, __b, 1) + +#define vsubw_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vsubwv4hi (__a, __b, 1) + +#define vsubw_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vsubwv2si (__a, __b, 1) + +#define vsubw_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vsubwv8qi ((int16x8_t) __a, (int8x8_t) __b, 0) + +#define vsubw_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vsubwv4hi ((int32x4_t) __a, (int16x4_t) __b, 0) + +#define vsubw_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vsubwv2si ((int64x2_t) __a, (int32x2_t) __b, 0) + +#define vhsub_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vhsubv8qi (__a, __b, 1) + +#define vhsub_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vhsubv4hi (__a, __b, 1) + +#define vhsub_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vhsubv2si (__a, __b, 1) + +#define vhsub_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vhsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vhsub_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vhsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vhsub_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vhsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vhsubq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vhsubv16qi (__a, __b, 1) + +#define vhsubq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vhsubv8hi (__a, __b, 1) + +#define vhsubq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vhsubv4si (__a, __b, 1) + +#define vhsubq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vhsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vhsubq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vhsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vhsubq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vhsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vqsub_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vqsubv8qi (__a, __b, 1) + +#define vqsub_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqsubv4hi (__a, __b, 1) + +#define vqsub_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqsubv2si (__a, __b, 1) + +#define vqsub_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vqsubv1di (__a, __b, 1) + +#define vqsub_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vqsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vqsub_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vqsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vqsub_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vqsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vqsub_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vqsubv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vqsubq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vqsubv16qi (__a, __b, 1) + +#define vqsubq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqsubv8hi (__a, __b, 1) + +#define vqsubq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqsubv4si (__a, __b, 1) + +#define vqsubq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vqsubv2di (__a, __b, 1) + +#define vqsubq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vqsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vqsubq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vqsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vqsubq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vqsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vqsubq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vqsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vsubhn_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 1) + +#define vsubhn_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 1) + +#define vsubhn_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 1) + +#define vsubhn_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vsubhn_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vsubhn_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vrsubhn_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b, 3) + +#define vrsubhn_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b, 3) + +#define vrsubhn_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b, 3) + +#define vrsubhn_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b, 2) + +#define vrsubhn_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b, 2) + +#define vrsubhn_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b, 2) + +#define vceq_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b, 1) + +#define vceq_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b, 1) + +#define vceq_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vceqv2si (__a, __b, 1) + +#define vceq_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b, 5) + +#define vceq_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vceq_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vceq_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vceq_p8(__a, __b) \ + (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b, 4) + +#define vceqq_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b, 1) + +#define vceqq_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b, 1) + +#define vceqq_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vceqv4si (__a, __b, 1) + +#define vceqq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b, 5) + +#define vceqq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vceqq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vceqq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vceqq_p8(__a, __b) \ + (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b, 4) + +#define vcge_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b, 1) + +#define vcge_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b, 1) + +#define vcge_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgev2si (__a, __b, 1) + +#define vcge_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b, 5) + +#define vcge_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vcge_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vcge_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vcgeq_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b, 1) + +#define vcgeq_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b, 1) + +#define vcgeq_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgev4si (__a, __b, 1) + +#define vcgeq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b, 5) + +#define vcgeq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vcgeq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vcgeq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vcle_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a, 1) + +#define vcle_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a, 1) + +#define vcle_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgev2si (__b, __a, 1) + +#define vcle_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a, 5) + +#define vcle_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0) + +#define vcle_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0) + +#define vcle_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0) + +#define vcleq_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a, 1) + +#define vcleq_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a, 1) + +#define vcleq_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgev4si (__b, __a, 1) + +#define vcleq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a, 5) + +#define vcleq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0) + +#define vcleq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0) + +#define vcleq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0) + +#define vcgt_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b, 1) + +#define vcgt_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b, 1) + +#define vcgt_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b, 1) + +#define vcgt_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b, 5) + +#define vcgt_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vcgt_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vcgt_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vcgtq_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b, 1) + +#define vcgtq_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b, 1) + +#define vcgtq_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b, 1) + +#define vcgtq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b, 5) + +#define vcgtq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vcgtq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vcgtq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vclt_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a, 1) + +#define vclt_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a, 1) + +#define vclt_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a, 1) + +#define vclt_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a, 5) + +#define vclt_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0) + +#define vclt_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0) + +#define vclt_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0) + +#define vcltq_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a, 1) + +#define vcltq_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a, 1) + +#define vcltq_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a, 1) + +#define vcltq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a, 5) + +#define vcltq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0) + +#define vcltq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0) + +#define vcltq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0) + +#define vcage_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b, 5) + +#define vcageq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b, 5) + +#define vcale_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a, 5) + +#define vcaleq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a, 5) + +#define vcagt_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b, 5) + +#define vcagtq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b, 5) + +#define vcalt_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a, 5) + +#define vcaltq_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a, 5) + +#define vtst_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b, 1) + +#define vtst_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b, 1) + +#define vtst_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vtstv2si (__a, __b, 1) + +#define vtst_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vtst_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vtst_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vtst_p8(__a, __b) \ + (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b, 4) + +#define vtstq_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b, 1) + +#define vtstq_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b, 1) + +#define vtstq_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vtstv4si (__a, __b, 1) + +#define vtstq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vtstq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vtstq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vtstq_p8(__a, __b) \ + (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b, 4) + +#define vabd_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vabdv8qi (__a, __b, 1) + +#define vabd_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vabdv4hi (__a, __b, 1) + +#define vabd_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vabdv2si (__a, __b, 1) + +#define vabd_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vabdv2sf (__a, __b, 5) + +#define vabd_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vabdv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vabd_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vabdv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vabd_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vabdv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vabdq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vabdv16qi (__a, __b, 1) + +#define vabdq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vabdv8hi (__a, __b, 1) + +#define vabdq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vabdv4si (__a, __b, 1) + +#define vabdq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vabdv4sf (__a, __b, 5) + +#define vabdq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vabdv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vabdq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vabdv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vabdq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vabdv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vabdl_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vabdlv8qi (__a, __b, 1) + +#define vabdl_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vabdlv4hi (__a, __b, 1) + +#define vabdl_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vabdlv2si (__a, __b, 1) + +#define vabdl_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vabdlv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vabdl_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vabdlv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vabdl_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vabdlv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vaba_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vabav8qi (__a, __b, __c, 1) + +#define vaba_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vabav4hi (__a, __b, __c, 1) + +#define vaba_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vabav2si (__a, __b, __c, 1) + +#define vaba_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vabav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0) + +#define vaba_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vabav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0) + +#define vaba_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vabav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0) + +#define vabaq_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vabav16qi (__a, __b, __c, 1) + +#define vabaq_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vabav8hi (__a, __b, __c, 1) + +#define vabaq_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vabav4si (__a, __b, __c, 1) + +#define vabaq_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vabav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c, 0) + +#define vabaq_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vabav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c, 0) + +#define vabaq_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vabav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c, 0) + +#define vabal_s8(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vabalv8qi (__a, __b, __c, 1) + +#define vabal_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vabalv4hi (__a, __b, __c, 1) + +#define vabal_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vabalv2si (__a, __b, __c, 1) + +#define vabal_u8(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vabalv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c, 0) + +#define vabal_u16(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vabalv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, 0) + +#define vabal_u32(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vabalv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, 0) + +#define vmax_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vmaxv8qi (__a, __b, 1) + +#define vmax_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vmaxv4hi (__a, __b, 1) + +#define vmax_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vmaxv2si (__a, __b, 1) + +#define vmax_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vmaxv2sf (__a, __b, 5) + +#define vmax_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vmax_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vmax_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vmaxq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vmaxv16qi (__a, __b, 1) + +#define vmaxq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vmaxv8hi (__a, __b, 1) + +#define vmaxq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vmaxv4si (__a, __b, 1) + +#define vmaxq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vmaxv4sf (__a, __b, 5) + +#define vmaxq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vmaxv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vmaxq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vmaxv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vmaxq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vmaxv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vmin_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vminv8qi (__a, __b, 1) + +#define vmin_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vminv4hi (__a, __b, 1) + +#define vmin_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vminv2si (__a, __b, 1) + +#define vmin_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vminv2sf (__a, __b, 5) + +#define vmin_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vmin_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vmin_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vminv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vminq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vminv16qi (__a, __b, 1) + +#define vminq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vminv8hi (__a, __b, 1) + +#define vminq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vminv4si (__a, __b, 1) + +#define vminq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vminv4sf (__a, __b, 5) + +#define vminq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vminv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vminq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vminv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vminq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vminv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vpadd_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b, 1) + +#define vpadd_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b, 1) + +#define vpadd_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vpaddv2si (__a, __b, 1) + +#define vpadd_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b, 5) + +#define vpadd_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vpadd_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vpadd_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vpaddl_s8(__a) \ + (int16x4_t)__builtin_neon_vpaddlv8qi (__a, 1) + +#define vpaddl_s16(__a) \ + (int32x2_t)__builtin_neon_vpaddlv4hi (__a, 1) + +#define vpaddl_s32(__a) \ + (int64x1_t)__builtin_neon_vpaddlv2si (__a, 1) + +#define vpaddl_u8(__a) \ + (uint16x4_t)__builtin_neon_vpaddlv8qi ((int8x8_t) __a, 0) + +#define vpaddl_u16(__a) \ + (uint32x2_t)__builtin_neon_vpaddlv4hi ((int16x4_t) __a, 0) + +#define vpaddl_u32(__a) \ + (uint64x1_t)__builtin_neon_vpaddlv2si ((int32x2_t) __a, 0) + +#define vpaddlq_s8(__a) \ + (int16x8_t)__builtin_neon_vpaddlv16qi (__a, 1) + +#define vpaddlq_s16(__a) \ + (int32x4_t)__builtin_neon_vpaddlv8hi (__a, 1) + +#define vpaddlq_s32(__a) \ + (int64x2_t)__builtin_neon_vpaddlv4si (__a, 1) + +#define vpaddlq_u8(__a) \ + (uint16x8_t)__builtin_neon_vpaddlv16qi ((int8x16_t) __a, 0) + +#define vpaddlq_u16(__a) \ + (uint32x4_t)__builtin_neon_vpaddlv8hi ((int16x8_t) __a, 0) + +#define vpaddlq_u32(__a) \ + (uint64x2_t)__builtin_neon_vpaddlv4si ((int32x4_t) __a, 0) + +#define vpadal_s8(__a, __b) \ + (int16x4_t)__builtin_neon_vpadalv8qi (__a, __b, 1) + +#define vpadal_s16(__a, __b) \ + (int32x2_t)__builtin_neon_vpadalv4hi (__a, __b, 1) + +#define vpadal_s32(__a, __b) \ + (int64x1_t)__builtin_neon_vpadalv2si (__a, __b, 1) + +#define vpadal_u8(__a, __b) \ + (uint16x4_t)__builtin_neon_vpadalv8qi ((int16x4_t) __a, (int8x8_t) __b, 0) + +#define vpadal_u16(__a, __b) \ + (uint32x2_t)__builtin_neon_vpadalv4hi ((int32x2_t) __a, (int16x4_t) __b, 0) + +#define vpadal_u32(__a, __b) \ + (uint64x1_t)__builtin_neon_vpadalv2si ((int64x1_t) __a, (int32x2_t) __b, 0) + +#define vpadalq_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vpadalv16qi (__a, __b, 1) + +#define vpadalq_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vpadalv8hi (__a, __b, 1) + +#define vpadalq_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vpadalv4si (__a, __b, 1) + +#define vpadalq_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vpadalv16qi ((int16x8_t) __a, (int8x16_t) __b, 0) + +#define vpadalq_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vpadalv8hi ((int32x4_t) __a, (int16x8_t) __b, 0) + +#define vpadalq_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vpadalv4si ((int64x2_t) __a, (int32x4_t) __b, 0) + +#define vpmax_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vpmaxv8qi (__a, __b, 1) + +#define vpmax_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vpmaxv4hi (__a, __b, 1) + +#define vpmax_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vpmaxv2si (__a, __b, 1) + +#define vpmax_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vpmaxv2sf (__a, __b, 5) + +#define vpmax_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vpmaxv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vpmax_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vpmaxv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vpmax_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vpmaxv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vpmin_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vpminv8qi (__a, __b, 1) + +#define vpmin_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vpminv4hi (__a, __b, 1) + +#define vpmin_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vpminv2si (__a, __b, 1) + +#define vpmin_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vpminv2sf (__a, __b, 5) + +#define vpmin_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vpminv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vpmin_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vpminv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vpmin_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vpminv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vrecps_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b, 5) + +#define vrecpsq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b, 5) + +#define vrsqrts_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b, 5) + +#define vrsqrtsq_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b, 5) + +#define vshl_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 1) + +#define vshl_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 1) + +#define vshl_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 1) + +#define vshl_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vshlv1di (__a, __b, 1) + +#define vshl_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 0) + +#define vshl_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 0) + +#define vshl_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 0) + +#define vshl_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vshlv1di ((int64x1_t) __a, __b, 0) + +#define vshlq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 1) + +#define vshlq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 1) + +#define vshlq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 1) + +#define vshlq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 1) + +#define vshlq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 0) + +#define vshlq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 0) + +#define vshlq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 0) + +#define vshlq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 0) + +#define vrshl_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vshlv8qi (__a, __b, 3) + +#define vrshl_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vshlv4hi (__a, __b, 3) + +#define vrshl_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vshlv2si (__a, __b, 3) + +#define vrshl_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vshlv1di (__a, __b, 3) + +#define vrshl_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vshlv8qi ((int8x8_t) __a, __b, 2) + +#define vrshl_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vshlv4hi ((int16x4_t) __a, __b, 2) + +#define vrshl_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vshlv2si ((int32x2_t) __a, __b, 2) + +#define vrshl_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vshlv1di ((int64x1_t) __a, __b, 2) + +#define vrshlq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vshlv16qi (__a, __b, 3) + +#define vrshlq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vshlv8hi (__a, __b, 3) + +#define vrshlq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vshlv4si (__a, __b, 3) + +#define vrshlq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vshlv2di (__a, __b, 3) + +#define vrshlq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vshlv16qi ((int8x16_t) __a, __b, 2) + +#define vrshlq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vshlv8hi ((int16x8_t) __a, __b, 2) + +#define vrshlq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vshlv4si ((int32x4_t) __a, __b, 2) + +#define vrshlq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vshlv2di ((int64x2_t) __a, __b, 2) + +#define vqshl_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 1) + +#define vqshl_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 1) + +#define vqshl_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 1) + +#define vqshl_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vqshlv1di (__a, __b, 1) + +#define vqshl_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 0) + +#define vqshl_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 0) + +#define vqshl_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 0) + +#define vqshl_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vqshlv1di ((int64x1_t) __a, __b, 0) + +#define vqshlq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 1) + +#define vqshlq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 1) + +#define vqshlq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 1) + +#define vqshlq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 1) + +#define vqshlq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 0) + +#define vqshlq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 0) + +#define vqshlq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 0) + +#define vqshlq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 0) + +#define vqrshl_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vqshlv8qi (__a, __b, 3) + +#define vqrshl_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqshlv4hi (__a, __b, 3) + +#define vqrshl_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqshlv2si (__a, __b, 3) + +#define vqrshl_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vqshlv1di (__a, __b, 3) + +#define vqrshl_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshlv8qi ((int8x8_t) __a, __b, 2) + +#define vqrshl_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshlv4hi ((int16x4_t) __a, __b, 2) + +#define vqrshl_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshlv2si ((int32x2_t) __a, __b, 2) + +#define vqrshl_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vqshlv1di ((int64x1_t) __a, __b, 2) + +#define vqrshlq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vqshlv16qi (__a, __b, 3) + +#define vqrshlq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqshlv8hi (__a, __b, 3) + +#define vqrshlq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqshlv4si (__a, __b, 3) + +#define vqrshlq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vqshlv2di (__a, __b, 3) + +#define vqrshlq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vqshlv16qi ((int8x16_t) __a, __b, 2) + +#define vqrshlq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vqshlv8hi ((int16x8_t) __a, __b, 2) + +#define vqrshlq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vqshlv4si ((int32x4_t) __a, __b, 2) + +#define vqrshlq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vqshlv2di ((int64x2_t) __a, __b, 2) + +#define vshr_n_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 1) + +#define vshr_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 1) + +#define vshr_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 1) + +#define vshr_n_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 1) + +#define vshr_n_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 0) + +#define vshr_n_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 0) + +#define vshr_n_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 0) + +#define vshr_n_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vshr_nv1di ((int64x1_t) __a, __b, 0) + +#define vshrq_n_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 1) + +#define vshrq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1) + +#define vshrq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 1) + +#define vshrq_n_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 1) + +#define vshrq_n_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 0) + +#define vshrq_n_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 0) + +#define vshrq_n_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 0) + +#define vshrq_n_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 0) + +#define vrshr_n_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vshr_nv8qi (__a, __b, 3) + +#define vrshr_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vshr_nv4hi (__a, __b, 3) + +#define vrshr_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vshr_nv2si (__a, __b, 3) + +#define vrshr_n_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vshr_nv1di (__a, __b, 3) + +#define vrshr_n_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vshr_nv8qi ((int8x8_t) __a, __b, 2) + +#define vrshr_n_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vshr_nv4hi ((int16x4_t) __a, __b, 2) + +#define vrshr_n_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vshr_nv2si ((int32x2_t) __a, __b, 2) + +#define vrshr_n_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vshr_nv1di ((int64x1_t) __a, __b, 2) + +#define vrshrq_n_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vshr_nv16qi (__a, __b, 3) + +#define vrshrq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 3) + +#define vrshrq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vshr_nv4si (__a, __b, 3) + +#define vrshrq_n_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vshr_nv2di (__a, __b, 3) + +#define vrshrq_n_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vshr_nv16qi ((int8x16_t) __a, __b, 2) + +#define vrshrq_n_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vshr_nv8hi ((int16x8_t) __a, __b, 2) + +#define vrshrq_n_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vshr_nv4si ((int32x4_t) __a, __b, 2) + +#define vrshrq_n_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vshr_nv2di ((int64x2_t) __a, __b, 2) + +#define vshrn_n_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 1) + +#define vshrn_n_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 1) + +#define vshrn_n_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 1) + +#define vshrn_n_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 0) + +#define vshrn_n_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 0) + +#define vshrn_n_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 0) + +#define vrshrn_n_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b, 3) + +#define vrshrn_n_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b, 3) + +#define vrshrn_n_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b, 3) + +#define vrshrn_n_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b, 2) + +#define vrshrn_n_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b, 2) + +#define vrshrn_n_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b, 2) + +#define vqshrn_n_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 1) + +#define vqshrn_n_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 1) + +#define vqshrn_n_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 1) + +#define vqshrn_n_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 0) + +#define vqshrn_n_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 0) + +#define vqshrn_n_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 0) + +#define vqrshrn_n_s16(__a, __b) \ + (int8x8_t)__builtin_neon_vqshrn_nv8hi (__a, __b, 3) + +#define vqrshrn_n_s32(__a, __b) \ + (int16x4_t)__builtin_neon_vqshrn_nv4si (__a, __b, 3) + +#define vqrshrn_n_s64(__a, __b) \ + (int32x2_t)__builtin_neon_vqshrn_nv2di (__a, __b, 3) + +#define vqrshrn_n_u16(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshrn_nv8hi ((int16x8_t) __a, __b, 2) + +#define vqrshrn_n_u32(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshrn_nv4si ((int32x4_t) __a, __b, 2) + +#define vqrshrn_n_u64(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshrn_nv2di ((int64x2_t) __a, __b, 2) + +#define vqshrun_n_s16(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 1) + +#define vqshrun_n_s32(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 1) + +#define vqshrun_n_s64(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 1) + +#define vqrshrun_n_s16(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b, 3) + +#define vqrshrun_n_s32(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b, 3) + +#define vqrshrun_n_s64(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b, 3) + +#define vshl_n_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b, 1) + +#define vshl_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b, 1) + +#define vshl_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b, 1) + +#define vshl_n_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vshl_nv1di (__a, __b, 1) + +#define vshl_n_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b, 0) + +#define vshl_n_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b, 0) + +#define vshl_n_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b, 0) + +#define vshl_n_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vshl_nv1di ((int64x1_t) __a, __b, 0) + +#define vshlq_n_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b, 1) + +#define vshlq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1) + +#define vshlq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b, 1) + +#define vshlq_n_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b, 1) + +#define vshlq_n_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b, 0) + +#define vshlq_n_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b, 0) + +#define vshlq_n_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b, 0) + +#define vshlq_n_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b, 0) + +#define vqshl_n_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vqshl_nv8qi (__a, __b, 1) + +#define vqshl_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqshl_nv4hi (__a, __b, 1) + +#define vqshl_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqshl_nv2si (__a, __b, 1) + +#define vqshl_n_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vqshl_nv1di (__a, __b, 1) + +#define vqshl_n_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshl_nv8qi ((int8x8_t) __a, __b, 0) + +#define vqshl_n_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshl_nv4hi ((int16x4_t) __a, __b, 0) + +#define vqshl_n_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshl_nv2si ((int32x2_t) __a, __b, 0) + +#define vqshl_n_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vqshl_nv1di ((int64x1_t) __a, __b, 0) + +#define vqshlq_n_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vqshl_nv16qi (__a, __b, 1) + +#define vqshlq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqshl_nv8hi (__a, __b, 1) + +#define vqshlq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqshl_nv4si (__a, __b, 1) + +#define vqshlq_n_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vqshl_nv2di (__a, __b, 1) + +#define vqshlq_n_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vqshl_nv16qi ((int8x16_t) __a, __b, 0) + +#define vqshlq_n_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vqshl_nv8hi ((int16x8_t) __a, __b, 0) + +#define vqshlq_n_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vqshl_nv4si ((int32x4_t) __a, __b, 0) + +#define vqshlq_n_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vqshl_nv2di ((int64x2_t) __a, __b, 0) + +#define vqshlu_n_s8(__a, __b) \ + (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b, 1) + +#define vqshlu_n_s16(__a, __b) \ + (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b, 1) + +#define vqshlu_n_s32(__a, __b) \ + (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b, 1) + +#define vqshlu_n_s64(__a, __b) \ + (uint64x1_t)__builtin_neon_vqshlu_nv1di (__a, __b, 1) + +#define vqshluq_n_s8(__a, __b) \ + (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b, 1) + +#define vqshluq_n_s16(__a, __b) \ + (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b, 1) + +#define vqshluq_n_s32(__a, __b) \ + (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b, 1) + +#define vqshluq_n_s64(__a, __b) \ + (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b, 1) + +#define vshll_n_s8(__a, __b) \ + (int16x8_t)__builtin_neon_vshll_nv8qi (__a, __b, 1) + +#define vshll_n_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vshll_nv4hi (__a, __b, 1) + +#define vshll_n_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vshll_nv2si (__a, __b, 1) + +#define vshll_n_u8(__a, __b) \ + (uint16x8_t)__builtin_neon_vshll_nv8qi ((int8x8_t) __a, __b, 0) + +#define vshll_n_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vshll_nv4hi ((int16x4_t) __a, __b, 0) + +#define vshll_n_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vshll_nv2si ((int32x2_t) __a, __b, 0) + +#define vsra_n_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 1) + +#define vsra_n_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 1) + +#define vsra_n_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 1) + +#define vsra_n_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 1) + +#define vsra_n_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 0) + +#define vsra_n_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0) + +#define vsra_n_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0) + +#define vsra_n_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vsra_nv1di ((int64x1_t) __a, (int64x1_t) __b, __c, 0) + +#define vsraq_n_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 1) + +#define vsraq_n_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 1) + +#define vsraq_n_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 1) + +#define vsraq_n_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 1) + +#define vsraq_n_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 0) + +#define vsraq_n_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 0) + +#define vsraq_n_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 0) + +#define vsraq_n_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 0) + +#define vrsra_n_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vsra_nv8qi (__a, __b, __c, 3) + +#define vrsra_n_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vsra_nv4hi (__a, __b, __c, 3) + +#define vrsra_n_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vsra_nv2si (__a, __b, __c, 3) + +#define vrsra_n_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vsra_nv1di (__a, __b, __c, 3) + +#define vrsra_n_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vsra_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c, 2) + +#define vrsra_n_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vsra_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 2) + +#define vrsra_n_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vsra_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c, 2) + +#define vrsra_n_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vsra_nv1di ((int64x1_t) __a, (int64x1_t) __b, __c, 2) + +#define vrsraq_n_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vsra_nv16qi (__a, __b, __c, 3) + +#define vrsraq_n_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vsra_nv8hi (__a, __b, __c, 3) + +#define vrsraq_n_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vsra_nv4si (__a, __b, __c, 3) + +#define vrsraq_n_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vsra_nv2di (__a, __b, __c, 3) + +#define vrsraq_n_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vsra_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c, 2) + +#define vrsraq_n_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vsra_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c, 2) + +#define vrsraq_n_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vsra_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c, 2) + +#define vrsraq_n_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vsra_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c, 2) + +#define vsri_n_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c) + +#define vsri_n_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c) + +#define vsri_n_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c) + +#define vsri_n_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vsri_nv1di (__a, __b, __c) + +#define vsri_n_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c) + +#define vsri_n_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c) + +#define vsri_n_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c) + +#define vsri_n_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vsri_nv1di ((int64x1_t) __a, (int64x1_t) __b, __c) + +#define vsri_n_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c) + +#define vsri_n_p16(__a, __b, __c) \ + (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c) + +#define vsriq_n_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c) + +#define vsriq_n_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c) + +#define vsriq_n_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c) + +#define vsriq_n_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c) + +#define vsriq_n_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c) + +#define vsriq_n_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c) + +#define vsriq_n_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c) + +#define vsriq_n_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c) + +#define vsriq_n_p8(__a, __b, __c) \ + (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c) + +#define vsriq_n_p16(__a, __b, __c) \ + (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c) + +#define vsli_n_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c) + +#define vsli_n_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c) + +#define vsli_n_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c) + +#define vsli_n_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vsli_nv1di (__a, __b, __c) + +#define vsli_n_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c) + +#define vsli_n_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c) + +#define vsli_n_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c) + +#define vsli_n_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vsli_nv1di ((int64x1_t) __a, (int64x1_t) __b, __c) + +#define vsli_n_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c) + +#define vsli_n_p16(__a, __b, __c) \ + (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c) + +#define vsliq_n_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c) + +#define vsliq_n_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c) + +#define vsliq_n_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c) + +#define vsliq_n_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c) + +#define vsliq_n_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c) + +#define vsliq_n_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c) + +#define vsliq_n_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c) + +#define vsliq_n_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c) + +#define vsliq_n_p8(__a, __b, __c) \ + (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c) + +#define vsliq_n_p16(__a, __b, __c) \ + (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c) + +#define vabs_s8(__a) \ + (int8x8_t)__builtin_neon_vabsv8qi (__a, 1) + +#define vabs_s16(__a) \ + (int16x4_t)__builtin_neon_vabsv4hi (__a, 1) + +#define vabs_s32(__a) \ + (int32x2_t)__builtin_neon_vabsv2si (__a, 1) + +#define vabs_f32(__a) \ + (float32x2_t)__builtin_neon_vabsv2sf (__a, 5) + +#define vabsq_s8(__a) \ + (int8x16_t)__builtin_neon_vabsv16qi (__a, 1) + +#define vabsq_s16(__a) \ + (int16x8_t)__builtin_neon_vabsv8hi (__a, 1) + +#define vabsq_s32(__a) \ + (int32x4_t)__builtin_neon_vabsv4si (__a, 1) + +#define vabsq_f32(__a) \ + (float32x4_t)__builtin_neon_vabsv4sf (__a, 5) + +#define vqabs_s8(__a) \ + (int8x8_t)__builtin_neon_vqabsv8qi (__a, 1) + +#define vqabs_s16(__a) \ + (int16x4_t)__builtin_neon_vqabsv4hi (__a, 1) + +#define vqabs_s32(__a) \ + (int32x2_t)__builtin_neon_vqabsv2si (__a, 1) + +#define vqabsq_s8(__a) \ + (int8x16_t)__builtin_neon_vqabsv16qi (__a, 1) + +#define vqabsq_s16(__a) \ + (int16x8_t)__builtin_neon_vqabsv8hi (__a, 1) + +#define vqabsq_s32(__a) \ + (int32x4_t)__builtin_neon_vqabsv4si (__a, 1) + +#define vneg_s8(__a) \ + (int8x8_t)__builtin_neon_vnegv8qi (__a, 1) + +#define vneg_s16(__a) \ + (int16x4_t)__builtin_neon_vnegv4hi (__a, 1) + +#define vneg_s32(__a) \ + (int32x2_t)__builtin_neon_vnegv2si (__a, 1) + +#define vneg_f32(__a) \ + (float32x2_t)__builtin_neon_vnegv2sf (__a, 5) + +#define vnegq_s8(__a) \ + (int8x16_t)__builtin_neon_vnegv16qi (__a, 1) + +#define vnegq_s16(__a) \ + (int16x8_t)__builtin_neon_vnegv8hi (__a, 1) + +#define vnegq_s32(__a) \ + (int32x4_t)__builtin_neon_vnegv4si (__a, 1) + +#define vnegq_f32(__a) \ + (float32x4_t)__builtin_neon_vnegv4sf (__a, 5) + +#define vqneg_s8(__a) \ + (int8x8_t)__builtin_neon_vqnegv8qi (__a, 1) + +#define vqneg_s16(__a) \ + (int16x4_t)__builtin_neon_vqnegv4hi (__a, 1) + +#define vqneg_s32(__a) \ + (int32x2_t)__builtin_neon_vqnegv2si (__a, 1) + +#define vqnegq_s8(__a) \ + (int8x16_t)__builtin_neon_vqnegv16qi (__a, 1) + +#define vqnegq_s16(__a) \ + (int16x8_t)__builtin_neon_vqnegv8hi (__a, 1) + +#define vqnegq_s32(__a) \ + (int32x4_t)__builtin_neon_vqnegv4si (__a, 1) + +#define vmvn_s8(__a) \ + (int8x8_t)__builtin_neon_vmvnv8qi (__a, 1) + +#define vmvn_s16(__a) \ + (int16x4_t)__builtin_neon_vmvnv4hi (__a, 1) + +#define vmvn_s32(__a) \ + (int32x2_t)__builtin_neon_vmvnv2si (__a, 1) + +#define vmvn_u8(__a) \ + (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 0) + +#define vmvn_u16(__a) \ + (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a, 0) + +#define vmvn_u32(__a) \ + (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a, 0) + +#define vmvn_p8(__a) \ + (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a, 4) + +#define vmvnq_s8(__a) \ + (int8x16_t)__builtin_neon_vmvnv16qi (__a, 1) + +#define vmvnq_s16(__a) \ + (int16x8_t)__builtin_neon_vmvnv8hi (__a, 1) + +#define vmvnq_s32(__a) \ + (int32x4_t)__builtin_neon_vmvnv4si (__a, 1) + +#define vmvnq_u8(__a) \ + (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 0) + +#define vmvnq_u16(__a) \ + (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a, 0) + +#define vmvnq_u32(__a) \ + (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a, 0) + +#define vmvnq_p8(__a) \ + (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a, 4) + +#define vcls_s8(__a) \ + (int8x8_t)__builtin_neon_vclsv8qi (__a, 1) + +#define vcls_s16(__a) \ + (int16x4_t)__builtin_neon_vclsv4hi (__a, 1) + +#define vcls_s32(__a) \ + (int32x2_t)__builtin_neon_vclsv2si (__a, 1) + +#define vclsq_s8(__a) \ + (int8x16_t)__builtin_neon_vclsv16qi (__a, 1) + +#define vclsq_s16(__a) \ + (int16x8_t)__builtin_neon_vclsv8hi (__a, 1) + +#define vclsq_s32(__a) \ + (int32x4_t)__builtin_neon_vclsv4si (__a, 1) + +#define vclz_s8(__a) \ + (int8x8_t)__builtin_neon_vclzv8qi (__a, 1) + +#define vclz_s16(__a) \ + (int16x4_t)__builtin_neon_vclzv4hi (__a, 1) + +#define vclz_s32(__a) \ + (int32x2_t)__builtin_neon_vclzv2si (__a, 1) + +#define vclz_u8(__a) \ + (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a, 0) + +#define vclz_u16(__a) \ + (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a, 0) + +#define vclz_u32(__a) \ + (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a, 0) + +#define vclzq_s8(__a) \ + (int8x16_t)__builtin_neon_vclzv16qi (__a, 1) + +#define vclzq_s16(__a) \ + (int16x8_t)__builtin_neon_vclzv8hi (__a, 1) + +#define vclzq_s32(__a) \ + (int32x4_t)__builtin_neon_vclzv4si (__a, 1) + +#define vclzq_u8(__a) \ + (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a, 0) + +#define vclzq_u16(__a) \ + (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a, 0) + +#define vclzq_u32(__a) \ + (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a, 0) + +#define vcnt_s8(__a) \ + (int8x8_t)__builtin_neon_vcntv8qi (__a, 1) + +#define vcnt_u8(__a) \ + (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 0) + +#define vcnt_p8(__a) \ + (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a, 4) + +#define vcntq_s8(__a) \ + (int8x16_t)__builtin_neon_vcntv16qi (__a, 1) + +#define vcntq_u8(__a) \ + (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 0) + +#define vcntq_p8(__a) \ + (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a, 4) + +#define vrecpe_f32(__a) \ + (float32x2_t)__builtin_neon_vrecpev2sf (__a, 5) + +#define vrecpe_u32(__a) \ + (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a, 0) + +#define vrecpeq_f32(__a) \ + (float32x4_t)__builtin_neon_vrecpev4sf (__a, 5) + +#define vrecpeq_u32(__a) \ + (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a, 0) + +#define vrsqrte_f32(__a) \ + (float32x2_t)__builtin_neon_vrsqrtev2sf (__a, 5) + +#define vrsqrte_u32(__a) \ + (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a, 0) + +#define vrsqrteq_f32(__a) \ + (float32x4_t)__builtin_neon_vrsqrtev4sf (__a, 5) + +#define vrsqrteq_u32(__a) \ + (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a, 0) + +#define vget_lane_s8(__a, __b) \ + (int8_t)__builtin_neon_vget_lanev8qi (__a, __b, 1) + +#define vget_lane_s16(__a, __b) \ + (int16_t)__builtin_neon_vget_lanev4hi (__a, __b, 1) + +#define vget_lane_s32(__a, __b) \ + (int32_t)__builtin_neon_vget_lanev2si (__a, __b, 1) + +#define vget_lane_f32(__a, __b) \ + (float32_t)__builtin_neon_vget_lanev2sf (__a, __b, 5) + +#define vget_lane_u8(__a, __b) \ + (uint8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 0) + +#define vget_lane_u16(__a, __b) \ + (uint16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 0) + +#define vget_lane_u32(__a, __b) \ + (uint32_t)__builtin_neon_vget_lanev2si ((int32x2_t) __a, __b, 0) + +#define vget_lane_p8(__a, __b) \ + (poly8_t)__builtin_neon_vget_lanev8qi ((int8x8_t) __a, __b, 4) + +#define vget_lane_p16(__a, __b) \ + (poly16_t)__builtin_neon_vget_lanev4hi ((int16x4_t) __a, __b, 4) + +#define vget_lane_s64(__a, __b) \ + (int64_t)__builtin_neon_vget_lanev1di (__a, __b, 1) + +#define vget_lane_u64(__a, __b) \ + (uint64_t)__builtin_neon_vget_lanev1di ((int64x1_t) __a, __b, 0) + +#define vgetq_lane_s8(__a, __b) \ + (int8_t)__builtin_neon_vget_lanev16qi (__a, __b, 1) + +#define vgetq_lane_s16(__a, __b) \ + (int16_t)__builtin_neon_vget_lanev8hi (__a, __b, 1) + +#define vgetq_lane_s32(__a, __b) \ + (int32_t)__builtin_neon_vget_lanev4si (__a, __b, 1) + +#define vgetq_lane_f32(__a, __b) \ + (float32_t)__builtin_neon_vget_lanev4sf (__a, __b, 5) + +#define vgetq_lane_u8(__a, __b) \ + (uint8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 0) + +#define vgetq_lane_u16(__a, __b) \ + (uint16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 0) + +#define vgetq_lane_u32(__a, __b) \ + (uint32_t)__builtin_neon_vget_lanev4si ((int32x4_t) __a, __b, 0) + +#define vgetq_lane_p8(__a, __b) \ + (poly8_t)__builtin_neon_vget_lanev16qi ((int8x16_t) __a, __b, 4) + +#define vgetq_lane_p16(__a, __b) \ + (poly16_t)__builtin_neon_vget_lanev8hi ((int16x8_t) __a, __b, 4) + +#define vgetq_lane_s64(__a, __b) \ + (int64_t)__builtin_neon_vget_lanev2di (__a, __b, 1) + +#define vgetq_lane_u64(__a, __b) \ + (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b, 0) + +#define vset_lane_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c) + +#define vset_lane_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c) + +#define vset_lane_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c) + +#define vset_lane_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vset_lanev2sf (__a, __b, __c) + +#define vset_lane_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c) + +#define vset_lane_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c) + +#define vset_lane_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c) + +#define vset_lane_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c) + +#define vset_lane_p16(__a, __b, __c) \ + (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c) + +#define vset_lane_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vset_lanev1di ((__builtin_neon_di) __a, __b, __c) + +#define vset_lane_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vset_lanev1di ((__builtin_neon_di) __a, (int64x1_t) __b, __c) + +#define vsetq_lane_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c) + +#define vsetq_lane_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c) + +#define vsetq_lane_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c) + +#define vsetq_lane_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vset_lanev4sf (__a, __b, __c) + +#define vsetq_lane_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c) + +#define vsetq_lane_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c) + +#define vsetq_lane_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c) + +#define vsetq_lane_p8(__a, __b, __c) \ + (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c) + +#define vsetq_lane_p16(__a, __b, __c) \ + (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c) + +#define vsetq_lane_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c) + +#define vsetq_lane_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c) + +#define vcreate_s8(__a) \ + (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a) + +#define vcreate_s16(__a) \ + (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a) + +#define vcreate_s32(__a) \ + (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a) + +#define vcreate_s64(__a) \ + (int64x1_t)__builtin_neon_vcreatev1di ((__builtin_neon_di) __a) + +#define vcreate_f32(__a) \ + (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a) + +#define vcreate_u8(__a) \ + (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a) + +#define vcreate_u16(__a) \ + (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a) + +#define vcreate_u32(__a) \ + (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a) + +#define vcreate_u64(__a) \ + (uint64x1_t)__builtin_neon_vcreatev1di ((__builtin_neon_di) __a) + +#define vcreate_p8(__a) \ + (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a) + +#define vcreate_p16(__a) \ + (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a) + +#define vdup_n_s8(__a) \ + (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a) + +#define vdup_n_s16(__a) \ + (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a) + +#define vdup_n_s32(__a) \ + (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a) + +#define vdup_n_f32(__a) \ + (float32x2_t)__builtin_neon_vdup_nv2sf (__a) + +#define vdup_n_u8(__a) \ + (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a) + +#define vdup_n_u16(__a) \ + (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a) + +#define vdup_n_u32(__a) \ + (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a) + +#define vdup_n_p8(__a) \ + (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a) + +#define vdup_n_p16(__a) \ + (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a) + +#define vdup_n_s64(__a) \ + (int64x1_t)__builtin_neon_vdup_nv1di ((__builtin_neon_di) __a) + +#define vdup_n_u64(__a) \ + (uint64x1_t)__builtin_neon_vdup_nv1di ((__builtin_neon_di) __a) + +#define vdupq_n_s8(__a) \ + (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a) + +#define vdupq_n_s16(__a) \ + (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a) + +#define vdupq_n_s32(__a) \ + (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a) + +#define vdupq_n_f32(__a) \ + (float32x4_t)__builtin_neon_vdup_nv4sf (__a) + +#define vdupq_n_u8(__a) \ + (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a) + +#define vdupq_n_u16(__a) \ + (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a) + +#define vdupq_n_u32(__a) \ + (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a) + +#define vdupq_n_p8(__a) \ + (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a) + +#define vdupq_n_p16(__a) \ + (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a) + +#define vdupq_n_s64(__a) \ + (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a) + +#define vdupq_n_u64(__a) \ + (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a) + +#define vmov_n_s8(__a) \ + (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a) + +#define vmov_n_s16(__a) \ + (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a) + +#define vmov_n_s32(__a) \ + (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a) + +#define vmov_n_f32(__a) \ + (float32x2_t)__builtin_neon_vdup_nv2sf (__a) + +#define vmov_n_u8(__a) \ + (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a) + +#define vmov_n_u16(__a) \ + (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a) + +#define vmov_n_u32(__a) \ + (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a) + +#define vmov_n_p8(__a) \ + (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a) + +#define vmov_n_p16(__a) \ + (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a) + +#define vmov_n_s64(__a) \ + (int64x1_t)__builtin_neon_vdup_nv1di ((__builtin_neon_di) __a) + +#define vmov_n_u64(__a) \ + (uint64x1_t)__builtin_neon_vdup_nv1di ((__builtin_neon_di) __a) + +#define vmovq_n_s8(__a) \ + (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a) + +#define vmovq_n_s16(__a) \ + (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a) + +#define vmovq_n_s32(__a) \ + (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a) + +#define vmovq_n_f32(__a) \ + (float32x4_t)__builtin_neon_vdup_nv4sf (__a) + +#define vmovq_n_u8(__a) \ + (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a) + +#define vmovq_n_u16(__a) \ + (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a) + +#define vmovq_n_u32(__a) \ + (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a) + +#define vmovq_n_p8(__a) \ + (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a) + +#define vmovq_n_p16(__a) \ + (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a) + +#define vmovq_n_s64(__a) \ + (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a) + +#define vmovq_n_u64(__a) \ + (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a) + +#define vdup_lane_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b) + +#define vdup_lane_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b) + +#define vdup_lane_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b) + +#define vdup_lane_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b) + +#define vdup_lane_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b) + +#define vdup_lane_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b) + +#define vdup_lane_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b) + +#define vdup_lane_p8(__a, __b) \ + (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b) + +#define vdup_lane_p16(__a, __b) \ + (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b) + +#define vdup_lane_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vdup_lanev1di (__a, __b) + +#define vdup_lane_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vdup_lanev1di ((int64x1_t) __a, __b) + +#define vdupq_lane_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b) + +#define vdupq_lane_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b) + +#define vdupq_lane_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b) + +#define vdupq_lane_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b) + +#define vdupq_lane_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b) + +#define vdupq_lane_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b) + +#define vdupq_lane_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b) + +#define vdupq_lane_p8(__a, __b) \ + (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b) + +#define vdupq_lane_p16(__a, __b) \ + (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b) + +#define vdupq_lane_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b) + +#define vdupq_lane_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b) + +#define vcombine_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b) + +#define vcombine_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b) + +#define vcombine_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vcombinev2si (__a, __b) + +#define vcombine_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vcombinev1di (__a, __b) + +#define vcombine_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b) + +#define vcombine_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b) + +#define vcombine_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b) + +#define vcombine_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b) + +#define vcombine_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vcombinev1di ((int64x1_t) __a, (int64x1_t) __b) + +#define vcombine_p8(__a, __b) \ + (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b) + +#define vcombine_p16(__a, __b) \ + (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b) + +#define vget_high_s8(__a) \ + (int8x8_t)__builtin_neon_vget_highv16qi (__a) + +#define vget_high_s16(__a) \ + (int16x4_t)__builtin_neon_vget_highv8hi (__a) + +#define vget_high_s32(__a) \ + (int32x2_t)__builtin_neon_vget_highv4si (__a) + +#define vget_high_s64(__a) \ + (int64x1_t)__builtin_neon_vget_highv2di (__a) + +#define vget_high_f32(__a) \ + (float32x2_t)__builtin_neon_vget_highv4sf (__a) + +#define vget_high_u8(__a) \ + (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a) + +#define vget_high_u16(__a) \ + (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a) + +#define vget_high_u32(__a) \ + (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a) + +#define vget_high_u64(__a) \ + (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a) + +#define vget_high_p8(__a) \ + (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a) + +#define vget_high_p16(__a) \ + (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a) + +#define vget_low_s8(__a) \ + (int8x8_t)__builtin_neon_vget_lowv16qi (__a) + +#define vget_low_s16(__a) \ + (int16x4_t)__builtin_neon_vget_lowv8hi (__a) + +#define vget_low_s32(__a) \ + (int32x2_t)__builtin_neon_vget_lowv4si (__a) + +#define vget_low_s64(__a) \ + (int64x1_t)__builtin_neon_vget_lowv2di (__a) + +#define vget_low_f32(__a) \ + (float32x2_t)__builtin_neon_vget_lowv4sf (__a) + +#define vget_low_u8(__a) \ + (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a) + +#define vget_low_u16(__a) \ + (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a) + +#define vget_low_u32(__a) \ + (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a) + +#define vget_low_u64(__a) \ + (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a) + +#define vget_low_p8(__a) \ + (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a) + +#define vget_low_p16(__a) \ + (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a) + +#define vcvt_s32_f32(__a) \ + (int32x2_t)__builtin_neon_vcvtv2sf (__a, 1) + +#define vcvt_f32_s32(__a) \ + (float32x2_t)__builtin_neon_vcvtv2si (__a, 1) + +#define vcvt_f32_u32(__a) \ + (float32x2_t)__builtin_neon_vcvtv2si ((int32x2_t) __a, 0) + +#define vcvt_u32_f32(__a) \ + (uint32x2_t)__builtin_neon_vcvtv2sf (__a, 0) + +#define vcvtq_s32_f32(__a) \ + (int32x4_t)__builtin_neon_vcvtv4sf (__a, 1) + +#define vcvtq_f32_s32(__a) \ + (float32x4_t)__builtin_neon_vcvtv4si (__a, 1) + +#define vcvtq_f32_u32(__a) \ + (float32x4_t)__builtin_neon_vcvtv4si ((int32x4_t) __a, 0) + +#define vcvtq_u32_f32(__a) \ + (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0) + +#define vcvt_n_s32_f32(__a, __b) \ + (int32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 1) + +#define vcvt_n_f32_s32(__a, __b) \ + (float32x2_t)__builtin_neon_vcvt_nv2si (__a, __b, 1) + +#define vcvt_n_f32_u32(__a, __b) \ + (float32x2_t)__builtin_neon_vcvt_nv2si ((int32x2_t) __a, __b, 0) + +#define vcvt_n_u32_f32(__a, __b) \ + (uint32x2_t)__builtin_neon_vcvt_nv2sf (__a, __b, 0) + +#define vcvtq_n_s32_f32(__a, __b) \ + (int32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 1) + +#define vcvtq_n_f32_s32(__a, __b) \ + (float32x4_t)__builtin_neon_vcvt_nv4si (__a, __b, 1) + +#define vcvtq_n_f32_u32(__a, __b) \ + (float32x4_t)__builtin_neon_vcvt_nv4si ((int32x4_t) __a, __b, 0) + +#define vcvtq_n_u32_f32(__a, __b) \ + (uint32x4_t)__builtin_neon_vcvt_nv4sf (__a, __b, 0) + +#define vmovn_s16(__a) \ + (int8x8_t)__builtin_neon_vmovnv8hi (__a, 1) + +#define vmovn_s32(__a) \ + (int16x4_t)__builtin_neon_vmovnv4si (__a, 1) + +#define vmovn_s64(__a) \ + (int32x2_t)__builtin_neon_vmovnv2di (__a, 1) + +#define vmovn_u16(__a) \ + (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a, 0) + +#define vmovn_u32(__a) \ + (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a, 0) + +#define vmovn_u64(__a) \ + (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a, 0) + +#define vqmovn_s16(__a) \ + (int8x8_t)__builtin_neon_vqmovnv8hi (__a, 1) + +#define vqmovn_s32(__a) \ + (int16x4_t)__builtin_neon_vqmovnv4si (__a, 1) + +#define vqmovn_s64(__a) \ + (int32x2_t)__builtin_neon_vqmovnv2di (__a, 1) + +#define vqmovn_u16(__a) \ + (uint8x8_t)__builtin_neon_vqmovnv8hi ((int16x8_t) __a, 0) + +#define vqmovn_u32(__a) \ + (uint16x4_t)__builtin_neon_vqmovnv4si ((int32x4_t) __a, 0) + +#define vqmovn_u64(__a) \ + (uint32x2_t)__builtin_neon_vqmovnv2di ((int64x2_t) __a, 0) + +#define vqmovun_s16(__a) \ + (uint8x8_t)__builtin_neon_vqmovunv8hi (__a, 1) + +#define vqmovun_s32(__a) \ + (uint16x4_t)__builtin_neon_vqmovunv4si (__a, 1) + +#define vqmovun_s64(__a) \ + (uint32x2_t)__builtin_neon_vqmovunv2di (__a, 1) + +#define vmovl_s8(__a) \ + (int16x8_t)__builtin_neon_vmovlv8qi (__a, 1) + +#define vmovl_s16(__a) \ + (int32x4_t)__builtin_neon_vmovlv4hi (__a, 1) + +#define vmovl_s32(__a) \ + (int64x2_t)__builtin_neon_vmovlv2si (__a, 1) + +#define vmovl_u8(__a) \ + (uint16x8_t)__builtin_neon_vmovlv8qi ((int8x8_t) __a, 0) + +#define vmovl_u16(__a) \ + (uint32x4_t)__builtin_neon_vmovlv4hi ((int16x4_t) __a, 0) + +#define vmovl_u32(__a) \ + (uint64x2_t)__builtin_neon_vmovlv2si ((int32x2_t) __a, 0) + +#define vtbl1_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b) + +#define vtbl1_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b) + +#define vtbl1_p8(__a, __b) \ + (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b) + +#define vtbl2_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \ + (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); \ + }) + +#define vtbl2_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \ + (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); \ + }) + +#define vtbl2_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __au = { __a }; \ + (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); \ + }) + +#define vtbl3_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \ + (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); \ + }) + +#define vtbl3_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \ + (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); \ + }) + +#define vtbl3_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __au = { __a }; \ + (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); \ + }) + +#define vtbl4_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \ + (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); \ + }) + +#define vtbl4_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \ + (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); \ + }) + +#define vtbl4_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __au = { __a }; \ + (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); \ + }) + +#define vtbx1_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c) + +#define vtbx1_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c) + +#define vtbx1_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c) + +#define vtbx2_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); \ + }) + +#define vtbx2_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); \ + }) + +#define vtbx2_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); \ + }) + +#define vtbx3_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); \ + }) + +#define vtbx3_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); \ + }) + +#define vtbx3_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); \ + }) + +#define vtbx4_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); \ + }) + +#define vtbx4_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); \ + }) + +#define vtbx4_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); \ + }) + +#define vmul_lane_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c, 1) + +#define vmul_lane_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c, 1) + +#define vmul_lane_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c, 5) + +#define vmul_lane_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0) + +#define vmul_lane_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0) + +#define vmulq_lane_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c, 1) + +#define vmulq_lane_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c, 1) + +#define vmulq_lane_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c, 5) + +#define vmulq_lane_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c, 0) + +#define vmulq_lane_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c, 0) + +#define vmla_lane_s16(__a, __b, __c, __d) \ + (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d, 1) + +#define vmla_lane_s32(__a, __b, __c, __d) \ + (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d, 1) + +#define vmla_lane_f32(__a, __b, __c, __d) \ + (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d, 5) + +#define vmla_lane_u16(__a, __b, __c, __d) \ + (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0) + +#define vmla_lane_u32(__a, __b, __c, __d) \ + (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0) + +#define vmlaq_lane_s16(__a, __b, __c, __d) \ + (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d, 1) + +#define vmlaq_lane_s32(__a, __b, __c, __d) \ + (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d, 1) + +#define vmlaq_lane_f32(__a, __b, __c, __d) \ + (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d, 5) + +#define vmlaq_lane_u16(__a, __b, __c, __d) \ + (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0) + +#define vmlaq_lane_u32(__a, __b, __c, __d) \ + (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0) + +#define vmlal_lane_s16(__a, __b, __c, __d) \ + (int32x4_t)__builtin_neon_vmlal_lanev4hi (__a, __b, __c, __d, 1) + +#define vmlal_lane_s32(__a, __b, __c, __d) \ + (int64x2_t)__builtin_neon_vmlal_lanev2si (__a, __b, __c, __d, 1) + +#define vmlal_lane_u16(__a, __b, __c, __d) \ + (uint32x4_t)__builtin_neon_vmlal_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0) + +#define vmlal_lane_u32(__a, __b, __c, __d) \ + (uint64x2_t)__builtin_neon_vmlal_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0) + +#define vqdmlal_lane_s16(__a, __b, __c, __d) \ + (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d, 1) + +#define vqdmlal_lane_s32(__a, __b, __c, __d) \ + (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d, 1) + +#define vmls_lane_s16(__a, __b, __c, __d) \ + (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d, 1) + +#define vmls_lane_s32(__a, __b, __c, __d) \ + (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d, 1) + +#define vmls_lane_f32(__a, __b, __c, __d) \ + (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d, 5) + +#define vmls_lane_u16(__a, __b, __c, __d) \ + (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0) + +#define vmls_lane_u32(__a, __b, __c, __d) \ + (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0) + +#define vmlsq_lane_s16(__a, __b, __c, __d) \ + (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d, 1) + +#define vmlsq_lane_s32(__a, __b, __c, __d) \ + (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d, 1) + +#define vmlsq_lane_f32(__a, __b, __c, __d) \ + (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d, 5) + +#define vmlsq_lane_u16(__a, __b, __c, __d) \ + (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d, 0) + +#define vmlsq_lane_u32(__a, __b, __c, __d) \ + (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d, 0) + +#define vmlsl_lane_s16(__a, __b, __c, __d) \ + (int32x4_t)__builtin_neon_vmlsl_lanev4hi (__a, __b, __c, __d, 1) + +#define vmlsl_lane_s32(__a, __b, __c, __d) \ + (int64x2_t)__builtin_neon_vmlsl_lanev2si (__a, __b, __c, __d, 1) + +#define vmlsl_lane_u16(__a, __b, __c, __d) \ + (uint32x4_t)__builtin_neon_vmlsl_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d, 0) + +#define vmlsl_lane_u32(__a, __b, __c, __d) \ + (uint64x2_t)__builtin_neon_vmlsl_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d, 0) + +#define vqdmlsl_lane_s16(__a, __b, __c, __d) \ + (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d, 1) + +#define vqdmlsl_lane_s32(__a, __b, __c, __d) \ + (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d, 1) + +#define vmull_lane_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmull_lanev4hi (__a, __b, __c, 1) + +#define vmull_lane_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vmull_lanev2si (__a, __b, __c, 1) + +#define vmull_lane_u16(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmull_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c, 0) + +#define vmull_lane_u32(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vmull_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c, 0) + +#define vqdmull_lane_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c, 1) + +#define vqdmull_lane_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c, 1) + +#define vqdmulhq_lane_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 1) + +#define vqdmulhq_lane_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 1) + +#define vqdmulh_lane_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 1) + +#define vqdmulh_lane_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 1) + +#define vqrdmulhq_lane_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c, 3) + +#define vqrdmulhq_lane_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c, 3) + +#define vqrdmulh_lane_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c, 3) + +#define vqrdmulh_lane_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c, 3) + +#define vmul_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b, 1) + +#define vmul_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b, 1) + +#define vmul_n_f32(__a, __b) \ + (float32x2_t)__builtin_neon_vmul_nv2sf (__a, __b, 5) + +#define vmul_n_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0) + +#define vmul_n_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0) + +#define vmulq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b, 1) + +#define vmulq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b, 1) + +#define vmulq_n_f32(__a, __b) \ + (float32x4_t)__builtin_neon_vmul_nv4sf (__a, __b, 5) + +#define vmulq_n_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b, 0) + +#define vmulq_n_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b, 0) + +#define vmull_n_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vmull_nv4hi (__a, (__builtin_neon_hi) __b, 1) + +#define vmull_n_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vmull_nv2si (__a, (__builtin_neon_si) __b, 1) + +#define vmull_n_u16(__a, __b) \ + (uint32x4_t)__builtin_neon_vmull_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b, 0) + +#define vmull_n_u32(__a, __b) \ + (uint64x2_t)__builtin_neon_vmull_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b, 0) + +#define vqdmull_n_s16(__a, __b) \ + (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b, 1) + +#define vqdmull_n_s32(__a, __b) \ + (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b, 1) + +#define vqdmulhq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 1) + +#define vqdmulhq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 1) + +#define vqdmulh_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 1) + +#define vqdmulh_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 1) + +#define vqrdmulhq_n_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b, 3) + +#define vqrdmulhq_n_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b, 3) + +#define vqrdmulh_n_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b, 3) + +#define vqrdmulh_n_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b, 3) + +#define vmla_n_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vmla_n_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmla_n_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, __c, 5) + +#define vmla_n_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0) + +#define vmla_n_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0) + +#define vmlaq_n_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vmlaq_n_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmlaq_n_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, __c, 5) + +#define vmlaq_n_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0) + +#define vmlaq_n_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0) + +#define vmlal_n_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vmlal_n_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmlal_n_u16(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmlal_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0) + +#define vmlal_n_u32(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vmlal_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0) + +#define vqdmlal_n_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vqdmlal_n_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmls_n_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vmls_n_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmls_n_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, __c, 5) + +#define vmls_n_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0) + +#define vmls_n_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0) + +#define vmlsq_n_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vmlsq_n_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmlsq_n_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, __c, 5) + +#define vmlsq_n_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c, 0) + +#define vmlsq_n_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c, 0) + +#define vmlsl_n_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vmlsl_n_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vmlsl_n_u16(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vmlsl_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c, 0) + +#define vmlsl_n_u32(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vmlsl_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c, 0) + +#define vqdmlsl_n_s16(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c, 1) + +#define vqdmlsl_n_s32(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c, 1) + +#define vext_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c) + +#define vext_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c) + +#define vext_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c) + +#define vext_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vextv1di (__a, __b, __c) + +#define vext_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c) + +#define vext_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c) + +#define vext_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c) + +#define vext_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c) + +#define vext_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vextv1di ((int64x1_t) __a, (int64x1_t) __b, __c) + +#define vext_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c) + +#define vext_p16(__a, __b, __c) \ + (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c) + +#define vextq_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c) + +#define vextq_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c) + +#define vextq_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c) + +#define vextq_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c) + +#define vextq_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c) + +#define vextq_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c) + +#define vextq_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c) + +#define vextq_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c) + +#define vextq_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c) + +#define vextq_p8(__a, __b, __c) \ + (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c) + +#define vextq_p16(__a, __b, __c) \ + (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c) + +#define vrev64_s8(__a) \ + (int8x8_t)__builtin_neon_vrev64v8qi (__a, 1) + +#define vrev64_s16(__a) \ + (int16x4_t)__builtin_neon_vrev64v4hi (__a, 1) + +#define vrev64_s32(__a) \ + (int32x2_t)__builtin_neon_vrev64v2si (__a, 1) + +#define vrev64_f32(__a) \ + (float32x2_t)__builtin_neon_vrev64v2sf (__a, 5) + +#define vrev64_u8(__a) \ + (uint8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 0) + +#define vrev64_u16(__a) \ + (uint16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 0) + +#define vrev64_u32(__a) \ + (uint32x2_t)__builtin_neon_vrev64v2si ((int32x2_t) __a, 0) + +#define vrev64_p8(__a) \ + (poly8x8_t)__builtin_neon_vrev64v8qi ((int8x8_t) __a, 4) + +#define vrev64_p16(__a) \ + (poly16x4_t)__builtin_neon_vrev64v4hi ((int16x4_t) __a, 4) + +#define vrev64q_s8(__a) \ + (int8x16_t)__builtin_neon_vrev64v16qi (__a, 1) + +#define vrev64q_s16(__a) \ + (int16x8_t)__builtin_neon_vrev64v8hi (__a, 1) + +#define vrev64q_s32(__a) \ + (int32x4_t)__builtin_neon_vrev64v4si (__a, 1) + +#define vrev64q_f32(__a) \ + (float32x4_t)__builtin_neon_vrev64v4sf (__a, 5) + +#define vrev64q_u8(__a) \ + (uint8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 0) + +#define vrev64q_u16(__a) \ + (uint16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 0) + +#define vrev64q_u32(__a) \ + (uint32x4_t)__builtin_neon_vrev64v4si ((int32x4_t) __a, 0) + +#define vrev64q_p8(__a) \ + (poly8x16_t)__builtin_neon_vrev64v16qi ((int8x16_t) __a, 4) + +#define vrev64q_p16(__a) \ + (poly16x8_t)__builtin_neon_vrev64v8hi ((int16x8_t) __a, 4) + +#define vrev32_s8(__a) \ + (int8x8_t)__builtin_neon_vrev32v8qi (__a, 1) + +#define vrev32_s16(__a) \ + (int16x4_t)__builtin_neon_vrev32v4hi (__a, 1) + +#define vrev32_u8(__a) \ + (uint8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 0) + +#define vrev32_u16(__a) \ + (uint16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 0) + +#define vrev32_p8(__a) \ + (poly8x8_t)__builtin_neon_vrev32v8qi ((int8x8_t) __a, 4) + +#define vrev32_p16(__a) \ + (poly16x4_t)__builtin_neon_vrev32v4hi ((int16x4_t) __a, 4) + +#define vrev32q_s8(__a) \ + (int8x16_t)__builtin_neon_vrev32v16qi (__a, 1) + +#define vrev32q_s16(__a) \ + (int16x8_t)__builtin_neon_vrev32v8hi (__a, 1) + +#define vrev32q_u8(__a) \ + (uint8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 0) + +#define vrev32q_u16(__a) \ + (uint16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 0) + +#define vrev32q_p8(__a) \ + (poly8x16_t)__builtin_neon_vrev32v16qi ((int8x16_t) __a, 4) + +#define vrev32q_p16(__a) \ + (poly16x8_t)__builtin_neon_vrev32v8hi ((int16x8_t) __a, 4) + +#define vrev16_s8(__a) \ + (int8x8_t)__builtin_neon_vrev16v8qi (__a, 1) + +#define vrev16_u8(__a) \ + (uint8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 0) + +#define vrev16_p8(__a) \ + (poly8x8_t)__builtin_neon_vrev16v8qi ((int8x8_t) __a, 4) + +#define vrev16q_s8(__a) \ + (int8x16_t)__builtin_neon_vrev16v16qi (__a, 1) + +#define vrev16q_u8(__a) \ + (uint8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 0) + +#define vrev16q_p8(__a) \ + (poly8x16_t)__builtin_neon_vrev16v16qi ((int8x16_t) __a, 4) + +#define vbsl_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c) + +#define vbsl_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c) + +#define vbsl_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c) + +#define vbsl_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vbslv1di ((int64x1_t) __a, __b, __c) + +#define vbsl_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c) + +#define vbsl_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c) + +#define vbsl_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c) + +#define vbsl_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c) + +#define vbsl_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vbslv1di ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c) + +#define vbsl_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c) + +#define vbsl_p16(__a, __b, __c) \ + (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c) + +#define vbslq_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c) + +#define vbslq_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c) + +#define vbslq_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c) + +#define vbslq_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c) + +#define vbslq_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c) + +#define vbslq_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c) + +#define vbslq_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c) + +#define vbslq_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c) + +#define vbslq_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c) + +#define vbslq_p8(__a, __b, __c) \ + (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c) + +#define vbslq_p16(__a, __b, __c) \ + (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c) + +#define vtrn_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv8qi (__a, __b); \ + __rv.__i; \ + }) + +#define vtrn_s16(__a, __b) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv4hi (__a, __b); \ + __rv.__i; \ + }) + +#define vtrn_s32(__a, __b) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv2si (__a, __b); \ + __rv.__i; \ + }) + +#define vtrn_f32(__a, __b) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv2sf (__a, __b); \ + __rv.__i; \ + }) + +#define vtrn_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv8qi ((int8x8_t) __a, (int8x8_t) __b); \ + __rv.__i; \ + }) + +#define vtrn_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv4hi ((int16x4_t) __a, (int16x4_t) __b); \ + __rv.__i; \ + }) + +#define vtrn_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv2si ((int32x2_t) __a, (int32x2_t) __b); \ + __rv.__i; \ + }) + +#define vtrn_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv8qi ((int8x8_t) __a, (int8x8_t) __b); \ + __rv.__i; \ + }) + +#define vtrn_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv4hi ((int16x4_t) __a, (int16x4_t) __b); \ + __rv.__i; \ + }) + +#define vtrnq_s8(__a, __b) __extension__ \ + ({ \ + union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv16qi (__a, __b); \ + __rv.__i; \ + }) + +#define vtrnq_s16(__a, __b) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv8hi (__a, __b); \ + __rv.__i; \ + }) + +#define vtrnq_s32(__a, __b) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv4si (__a, __b); \ + __rv.__i; \ + }) + +#define vtrnq_f32(__a, __b) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv4sf (__a, __b); \ + __rv.__i; \ + }) + +#define vtrnq_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv16qi ((int8x16_t) __a, (int8x16_t) __b); \ + __rv.__i; \ + }) + +#define vtrnq_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv8hi ((int16x8_t) __a, (int16x8_t) __b); \ + __rv.__i; \ + }) + +#define vtrnq_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv4si ((int32x4_t) __a, (int32x4_t) __b); \ + __rv.__i; \ + }) + +#define vtrnq_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv16qi ((int8x16_t) __a, (int8x16_t) __b); \ + __rv.__i; \ + }) + +#define vtrnq_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vtrnv8hi ((int16x8_t) __a, (int16x8_t) __b); \ + __rv.__i; \ + }) + +#define vzip_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv8qi (__a, __b); \ + __rv.__i; \ + }) + +#define vzip_s16(__a, __b) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv4hi (__a, __b); \ + __rv.__i; \ + }) + +#define vzip_s32(__a, __b) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv2si (__a, __b); \ + __rv.__i; \ + }) + +#define vzip_f32(__a, __b) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv2sf (__a, __b); \ + __rv.__i; \ + }) + +#define vzip_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv8qi ((int8x8_t) __a, (int8x8_t) __b); \ + __rv.__i; \ + }) + +#define vzip_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv4hi ((int16x4_t) __a, (int16x4_t) __b); \ + __rv.__i; \ + }) + +#define vzip_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv2si ((int32x2_t) __a, (int32x2_t) __b); \ + __rv.__i; \ + }) + +#define vzip_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv8qi ((int8x8_t) __a, (int8x8_t) __b); \ + __rv.__i; \ + }) + +#define vzip_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv4hi ((int16x4_t) __a, (int16x4_t) __b); \ + __rv.__i; \ + }) + +#define vzipq_s8(__a, __b) __extension__ \ + ({ \ + union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv16qi (__a, __b); \ + __rv.__i; \ + }) + +#define vzipq_s16(__a, __b) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv8hi (__a, __b); \ + __rv.__i; \ + }) + +#define vzipq_s32(__a, __b) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv4si (__a, __b); \ + __rv.__i; \ + }) + +#define vzipq_f32(__a, __b) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv4sf (__a, __b); \ + __rv.__i; \ + }) + +#define vzipq_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv16qi ((int8x16_t) __a, (int8x16_t) __b); \ + __rv.__i; \ + }) + +#define vzipq_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv8hi ((int16x8_t) __a, (int16x8_t) __b); \ + __rv.__i; \ + }) + +#define vzipq_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv4si ((int32x4_t) __a, (int32x4_t) __b); \ + __rv.__i; \ + }) + +#define vzipq_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv16qi ((int8x16_t) __a, (int8x16_t) __b); \ + __rv.__i; \ + }) + +#define vzipq_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vzipv8hi ((int16x8_t) __a, (int16x8_t) __b); \ + __rv.__i; \ + }) + +#define vuzp_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv8qi (__a, __b); \ + __rv.__i; \ + }) + +#define vuzp_s16(__a, __b) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv4hi (__a, __b); \ + __rv.__i; \ + }) + +#define vuzp_s32(__a, __b) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv2si (__a, __b); \ + __rv.__i; \ + }) + +#define vuzp_f32(__a, __b) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv2sf (__a, __b); \ + __rv.__i; \ + }) + +#define vuzp_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv8qi ((int8x8_t) __a, (int8x8_t) __b); \ + __rv.__i; \ + }) + +#define vuzp_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv4hi ((int16x4_t) __a, (int16x4_t) __b); \ + __rv.__i; \ + }) + +#define vuzp_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv2si ((int32x2_t) __a, (int32x2_t) __b); \ + __rv.__i; \ + }) + +#define vuzp_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv8qi ((int8x8_t) __a, (int8x8_t) __b); \ + __rv.__i; \ + }) + +#define vuzp_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv4hi ((int16x4_t) __a, (int16x4_t) __b); \ + __rv.__i; \ + }) + +#define vuzpq_s8(__a, __b) __extension__ \ + ({ \ + union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv16qi (__a, __b); \ + __rv.__i; \ + }) + +#define vuzpq_s16(__a, __b) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv8hi (__a, __b); \ + __rv.__i; \ + }) + +#define vuzpq_s32(__a, __b) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv4si (__a, __b); \ + __rv.__i; \ + }) + +#define vuzpq_f32(__a, __b) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv4sf (__a, __b); \ + __rv.__i; \ + }) + +#define vuzpq_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv16qi ((int8x16_t) __a, (int8x16_t) __b); \ + __rv.__i; \ + }) + +#define vuzpq_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv8hi ((int16x8_t) __a, (int16x8_t) __b); \ + __rv.__i; \ + }) + +#define vuzpq_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv4si ((int32x4_t) __a, (int32x4_t) __b); \ + __rv.__i; \ + }) + +#define vuzpq_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv16qi ((int8x16_t) __a, (int8x16_t) __b); \ + __rv.__i; \ + }) + +#define vuzpq_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vuzpv8hi ((int16x8_t) __a, (int16x8_t) __b); \ + __rv.__i; \ + }) + +#define vld1_s8(__a) \ + (int8x8_t)__builtin_neon_vld1v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1_s16(__a) \ + (int16x4_t)__builtin_neon_vld1v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1_s32(__a) \ + (int32x2_t)__builtin_neon_vld1v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1_s64(__a) \ + (int64x1_t)__builtin_neon_vld1v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1_f32(__a) \ + (float32x2_t)__builtin_neon_vld1v2sf (__a) + +#define vld1_u8(__a) \ + (uint8x8_t)__builtin_neon_vld1v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1_u16(__a) \ + (uint16x4_t)__builtin_neon_vld1v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1_u32(__a) \ + (uint32x2_t)__builtin_neon_vld1v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1_u64(__a) \ + (uint64x1_t)__builtin_neon_vld1v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1_p8(__a) \ + (poly8x8_t)__builtin_neon_vld1v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1_p16(__a) \ + (poly16x4_t)__builtin_neon_vld1v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1q_s8(__a) \ + (int8x16_t)__builtin_neon_vld1v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1q_s16(__a) \ + (int16x8_t)__builtin_neon_vld1v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1q_s32(__a) \ + (int32x4_t)__builtin_neon_vld1v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1q_s64(__a) \ + (int64x2_t)__builtin_neon_vld1v2di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1q_f32(__a) \ + (float32x4_t)__builtin_neon_vld1v4sf (__a) + +#define vld1q_u8(__a) \ + (uint8x16_t)__builtin_neon_vld1v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1q_u16(__a) \ + (uint16x8_t)__builtin_neon_vld1v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1q_u32(__a) \ + (uint32x4_t)__builtin_neon_vld1v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1q_u64(__a) \ + (uint64x2_t)__builtin_neon_vld1v2di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1q_p8(__a) \ + (poly8x16_t)__builtin_neon_vld1v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1q_p16(__a) \ + (poly16x8_t)__builtin_neon_vld1v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1_lane_s8(__a, __b, __c) \ + (int8x8_t)__builtin_neon_vld1_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __b, __c) + +#define vld1_lane_s16(__a, __b, __c) \ + (int16x4_t)__builtin_neon_vld1_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __b, __c) + +#define vld1_lane_s32(__a, __b, __c) \ + (int32x2_t)__builtin_neon_vld1_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __b, __c) + +#define vld1_lane_f32(__a, __b, __c) \ + (float32x2_t)__builtin_neon_vld1_lanev2sf (__a, __b, __c) + +#define vld1_lane_u8(__a, __b, __c) \ + (uint8x8_t)__builtin_neon_vld1_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), (int8x8_t) __b, __c) + +#define vld1_lane_u16(__a, __b, __c) \ + (uint16x4_t)__builtin_neon_vld1_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), (int16x4_t) __b, __c) + +#define vld1_lane_u32(__a, __b, __c) \ + (uint32x2_t)__builtin_neon_vld1_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), (int32x2_t) __b, __c) + +#define vld1_lane_p8(__a, __b, __c) \ + (poly8x8_t)__builtin_neon_vld1_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), (int8x8_t) __b, __c) + +#define vld1_lane_p16(__a, __b, __c) \ + (poly16x4_t)__builtin_neon_vld1_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), (int16x4_t) __b, __c) + +#define vld1_lane_s64(__a, __b, __c) \ + (int64x1_t)__builtin_neon_vld1_lanev1di (__neon_ptr_cast(const __builtin_neon_di *, __a), __b, __c) + +#define vld1_lane_u64(__a, __b, __c) \ + (uint64x1_t)__builtin_neon_vld1_lanev1di (__neon_ptr_cast(const __builtin_neon_di *, __a), (int64x1_t) __b, __c) + +#define vld1q_lane_s8(__a, __b, __c) \ + (int8x16_t)__builtin_neon_vld1_lanev16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __b, __c) + +#define vld1q_lane_s16(__a, __b, __c) \ + (int16x8_t)__builtin_neon_vld1_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __b, __c) + +#define vld1q_lane_s32(__a, __b, __c) \ + (int32x4_t)__builtin_neon_vld1_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __b, __c) + +#define vld1q_lane_f32(__a, __b, __c) \ + (float32x4_t)__builtin_neon_vld1_lanev4sf (__a, __b, __c) + +#define vld1q_lane_u8(__a, __b, __c) \ + (uint8x16_t)__builtin_neon_vld1_lanev16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), (int8x16_t) __b, __c) + +#define vld1q_lane_u16(__a, __b, __c) \ + (uint16x8_t)__builtin_neon_vld1_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), (int16x8_t) __b, __c) + +#define vld1q_lane_u32(__a, __b, __c) \ + (uint32x4_t)__builtin_neon_vld1_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), (int32x4_t) __b, __c) + +#define vld1q_lane_p8(__a, __b, __c) \ + (poly8x16_t)__builtin_neon_vld1_lanev16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), (int8x16_t) __b, __c) + +#define vld1q_lane_p16(__a, __b, __c) \ + (poly16x8_t)__builtin_neon_vld1_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), (int16x8_t) __b, __c) + +#define vld1q_lane_s64(__a, __b, __c) \ + (int64x2_t)__builtin_neon_vld1_lanev2di (__neon_ptr_cast(const __builtin_neon_di *, __a), __b, __c) + +#define vld1q_lane_u64(__a, __b, __c) \ + (uint64x2_t)__builtin_neon_vld1_lanev2di (__neon_ptr_cast(const __builtin_neon_di *, __a), (int64x2_t) __b, __c) + +#define vld1_dup_s8(__a) \ + (int8x8_t)__builtin_neon_vld1_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1_dup_s16(__a) \ + (int16x4_t)__builtin_neon_vld1_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1_dup_s32(__a) \ + (int32x2_t)__builtin_neon_vld1_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1_dup_f32(__a) \ + (float32x2_t)__builtin_neon_vld1_dupv2sf (__a) + +#define vld1_dup_u8(__a) \ + (uint8x8_t)__builtin_neon_vld1_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1_dup_u16(__a) \ + (uint16x4_t)__builtin_neon_vld1_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1_dup_u32(__a) \ + (uint32x2_t)__builtin_neon_vld1_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1_dup_p8(__a) \ + (poly8x8_t)__builtin_neon_vld1_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1_dup_p16(__a) \ + (poly16x4_t)__builtin_neon_vld1_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1_dup_s64(__a) \ + (int64x1_t)__builtin_neon_vld1_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1_dup_u64(__a) \ + (uint64x1_t)__builtin_neon_vld1_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1q_dup_s8(__a) \ + (int8x16_t)__builtin_neon_vld1_dupv16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1q_dup_s16(__a) \ + (int16x8_t)__builtin_neon_vld1_dupv8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1q_dup_s32(__a) \ + (int32x4_t)__builtin_neon_vld1_dupv4si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1q_dup_f32(__a) \ + (float32x4_t)__builtin_neon_vld1_dupv4sf (__a) + +#define vld1q_dup_u8(__a) \ + (uint8x16_t)__builtin_neon_vld1_dupv16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1q_dup_u16(__a) \ + (uint16x8_t)__builtin_neon_vld1_dupv8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1q_dup_u32(__a) \ + (uint32x4_t)__builtin_neon_vld1_dupv4si (__neon_ptr_cast(const __builtin_neon_si *, __a)) + +#define vld1q_dup_p8(__a) \ + (poly8x16_t)__builtin_neon_vld1_dupv16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)) + +#define vld1q_dup_p16(__a) \ + (poly16x8_t)__builtin_neon_vld1_dupv8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)) + +#define vld1q_dup_s64(__a) \ + (int64x2_t)__builtin_neon_vld1_dupv2di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vld1q_dup_u64(__a) \ + (uint64x2_t)__builtin_neon_vld1_dupv2di (__neon_ptr_cast(const __builtin_neon_di *, __a)) + +#define vst1_s8(__a, __b) \ + __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __b) + +#define vst1_s16(__a, __b) \ + __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __b) + +#define vst1_s32(__a, __b) \ + __builtin_neon_vst1v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __b) + +#define vst1_s64(__a, __b) \ + __builtin_neon_vst1v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __b) + +#define vst1_f32(__a, __b) \ + __builtin_neon_vst1v2sf (__a, __b) + +#define vst1_u8(__a, __b) \ + __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x8_t) __b) + +#define vst1_u16(__a, __b) \ + __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x4_t) __b) + +#define vst1_u32(__a, __b) \ + __builtin_neon_vst1v2si (__neon_ptr_cast(__builtin_neon_si *, __a), (int32x2_t) __b) + +#define vst1_u64(__a, __b) \ + __builtin_neon_vst1v1di (__neon_ptr_cast(__builtin_neon_di *, __a), (int64x1_t) __b) + +#define vst1_p8(__a, __b) \ + __builtin_neon_vst1v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x8_t) __b) + +#define vst1_p16(__a, __b) \ + __builtin_neon_vst1v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x4_t) __b) + +#define vst1q_s8(__a, __b) \ + __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __b) + +#define vst1q_s16(__a, __b) \ + __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __b) + +#define vst1q_s32(__a, __b) \ + __builtin_neon_vst1v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __b) + +#define vst1q_s64(__a, __b) \ + __builtin_neon_vst1v2di (__neon_ptr_cast(__builtin_neon_di *, __a), __b) + +#define vst1q_f32(__a, __b) \ + __builtin_neon_vst1v4sf (__a, __b) + +#define vst1q_u8(__a, __b) \ + __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x16_t) __b) + +#define vst1q_u16(__a, __b) \ + __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x8_t) __b) + +#define vst1q_u32(__a, __b) \ + __builtin_neon_vst1v4si (__neon_ptr_cast(__builtin_neon_si *, __a), (int32x4_t) __b) + +#define vst1q_u64(__a, __b) \ + __builtin_neon_vst1v2di (__neon_ptr_cast(__builtin_neon_di *, __a), (int64x2_t) __b) + +#define vst1q_p8(__a, __b) \ + __builtin_neon_vst1v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x16_t) __b) + +#define vst1q_p16(__a, __b) \ + __builtin_neon_vst1v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x8_t) __b) + +#define vst1_lane_s8(__a, __b, __c) \ + __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __b, __c) + +#define vst1_lane_s16(__a, __b, __c) \ + __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __b, __c) + +#define vst1_lane_s32(__a, __b, __c) \ + __builtin_neon_vst1_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __b, __c) + +#define vst1_lane_f32(__a, __b, __c) \ + __builtin_neon_vst1_lanev2sf (__a, __b, __c) + +#define vst1_lane_u8(__a, __b, __c) \ + __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x8_t) __b, __c) + +#define vst1_lane_u16(__a, __b, __c) \ + __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x4_t) __b, __c) + +#define vst1_lane_u32(__a, __b, __c) \ + __builtin_neon_vst1_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), (int32x2_t) __b, __c) + +#define vst1_lane_p8(__a, __b, __c) \ + __builtin_neon_vst1_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x8_t) __b, __c) + +#define vst1_lane_p16(__a, __b, __c) \ + __builtin_neon_vst1_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x4_t) __b, __c) + +#define vst1_lane_s64(__a, __b, __c) \ + __builtin_neon_vst1_lanev1di (__neon_ptr_cast(__builtin_neon_di *, __a), __b, __c) + +#define vst1_lane_u64(__a, __b, __c) \ + __builtin_neon_vst1_lanev1di (__neon_ptr_cast(__builtin_neon_di *, __a), (int64x1_t) __b, __c) + +#define vst1q_lane_s8(__a, __b, __c) \ + __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __b, __c) + +#define vst1q_lane_s16(__a, __b, __c) \ + __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __b, __c) + +#define vst1q_lane_s32(__a, __b, __c) \ + __builtin_neon_vst1_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __b, __c) + +#define vst1q_lane_f32(__a, __b, __c) \ + __builtin_neon_vst1_lanev4sf (__a, __b, __c) + +#define vst1q_lane_u8(__a, __b, __c) \ + __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x16_t) __b, __c) + +#define vst1q_lane_u16(__a, __b, __c) \ + __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x8_t) __b, __c) + +#define vst1q_lane_u32(__a, __b, __c) \ + __builtin_neon_vst1_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), (int32x4_t) __b, __c) + +#define vst1q_lane_p8(__a, __b, __c) \ + __builtin_neon_vst1_lanev16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), (int8x16_t) __b, __c) + +#define vst1q_lane_p16(__a, __b, __c) \ + __builtin_neon_vst1_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), (int16x8_t) __b, __c) + +#define vst1q_lane_s64(__a, __b, __c) \ + __builtin_neon_vst1_lanev2di (__neon_ptr_cast(__builtin_neon_di *, __a), __b, __c) + +#define vst1q_lane_u64(__a, __b, __c) \ + __builtin_neon_vst1_lanev2di (__neon_ptr_cast(__builtin_neon_di *, __a), (int64x2_t) __b, __c) + +#define vld2_s8(__a) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_s16(__a) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_s32(__a) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld2_f32(__a) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v2sf (__a); \ + __rv.__i; \ + }) + +#define vld2_u8(__a) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_u16(__a) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_u32(__a) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld2_p8(__a) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_p16(__a) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_s64(__a) __extension__ \ + ({ \ + union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld2_u64(__a) __extension__ \ + ({ \ + union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_s8(__a) __extension__ \ + ({ \ + union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_s16(__a) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_s32(__a) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_f32(__a) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v4sf (__a); \ + __rv.__i; \ + }) + +#define vld2q_u8(__a) __extension__ \ + ({ \ + union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_u16(__a) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_u32(__a) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_p8(__a) __extension__ \ + ({ \ + union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2q_p16(__a) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_lane_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev2sf (__a, __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2q_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2q_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2q_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev4sf (__a, __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2q_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2q_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2q_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld2_dup_s8(__a) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_s16(__a) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_s32(__a) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_f32(__a) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv2sf (__a); \ + __rv.__i; \ + }) + +#define vld2_dup_u8(__a) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_u16(__a) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_u32(__a) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_p8(__a) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_p16(__a) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_s64(__a) __extension__ \ + ({ \ + union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld2_dup_u64(__a) __extension__ \ + ({ \ + union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld2_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vst2_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst2_s16(__a, __b) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst2_s32(__a, __b) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst2_f32(__a, __b) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v2sf (__a, __bu.__o); \ + }) + +#define vst2_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst2_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst2_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst2_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst2_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst2_s64(__a, __b) __extension__ \ + ({ \ + union { int64x1x2_t __i; __neon_int64x1x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __bu.__o); \ + }) + +#define vst2_u64(__a, __b) __extension__ \ + ({ \ + union { uint64x1x2_t __i; __neon_int64x1x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __bu.__o); \ + }) + +#define vst2q_s8(__a, __b) __extension__ \ + ({ \ + union { int8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst2q_s16(__a, __b) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst2q_s32(__a, __b) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst2q_f32(__a, __b) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v4sf (__a, __bu.__o); \ + }) + +#define vst2q_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst2q_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst2q_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst2q_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x16x2_t __i; __neon_int8x16x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst2q_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst2_lane_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x2x2_t __i; __neon_float32x2x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev2sf (__a, __bu.__o, __c); \ + }) + +#define vst2_lane_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x2x2_t __i; __neon_int32x2x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x2_t __i; __neon_int8x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst2_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x4x2_t __i; __neon_int16x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst2q_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst2q_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst2q_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x4x2_t __i; __neon_float32x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev4sf (__a, __bu.__o, __c); \ + }) + +#define vst2q_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst2q_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x4x2_t __i; __neon_int32x4x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst2q_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x8x2_t __i; __neon_int16x8x2_t __o; } __bu = { __b }; \ + __builtin_neon_vst2_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vld3_s8(__a) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_s16(__a) __extension__ \ + ({ \ + union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_s32(__a) __extension__ \ + ({ \ + union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld3_f32(__a) __extension__ \ + ({ \ + union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v2sf (__a); \ + __rv.__i; \ + }) + +#define vld3_u8(__a) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_u16(__a) __extension__ \ + ({ \ + union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_u32(__a) __extension__ \ + ({ \ + union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld3_p8(__a) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_p16(__a) __extension__ \ + ({ \ + union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_s64(__a) __extension__ \ + ({ \ + union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld3_u64(__a) __extension__ \ + ({ \ + union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_s8(__a) __extension__ \ + ({ \ + union { int8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_s16(__a) __extension__ \ + ({ \ + union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_s32(__a) __extension__ \ + ({ \ + union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_f32(__a) __extension__ \ + ({ \ + union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v4sf (__a); \ + __rv.__i; \ + }) + +#define vld3q_u8(__a) __extension__ \ + ({ \ + union { uint8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_u16(__a) __extension__ \ + ({ \ + union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_u32(__a) __extension__ \ + ({ \ + union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_p8(__a) __extension__ \ + ({ \ + union { poly8x16x3_t __i; __neon_int8x16x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3q_p16(__a) __extension__ \ + ({ \ + union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_lane_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ + union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \ + union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev2sf (__a, __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ + union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3q_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3q_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ + union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3q_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \ + union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev4sf (__a, __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3q_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3q_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ + union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3q_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld3_dup_s8(__a) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_s16(__a) __extension__ \ + ({ \ + union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_s32(__a) __extension__ \ + ({ \ + union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_f32(__a) __extension__ \ + ({ \ + union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv2sf (__a); \ + __rv.__i; \ + }) + +#define vld3_dup_u8(__a) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_u16(__a) __extension__ \ + ({ \ + union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_u32(__a) __extension__ \ + ({ \ + union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_p8(__a) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_p16(__a) __extension__ \ + ({ \ + union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_s64(__a) __extension__ \ + ({ \ + union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld3_dup_u64(__a) __extension__ \ + ({ \ + union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld3_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vst3_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst3_s16(__a, __b) __extension__ \ + ({ \ + union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst3_s32(__a, __b) __extension__ \ + ({ \ + union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst3_f32(__a, __b) __extension__ \ + ({ \ + union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v2sf (__a, __bu.__o); \ + }) + +#define vst3_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst3_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst3_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst3_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst3_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst3_s64(__a, __b) __extension__ \ + ({ \ + union { int64x1x3_t __i; __neon_int64x1x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __bu.__o); \ + }) + +#define vst3_u64(__a, __b) __extension__ \ + ({ \ + union { uint64x1x3_t __i; __neon_int64x1x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __bu.__o); \ + }) + +#define vst3q_s8(__a, __b) __extension__ \ + ({ \ + union { int8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst3q_s16(__a, __b) __extension__ \ + ({ \ + union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst3q_s32(__a, __b) __extension__ \ + ({ \ + union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst3q_f32(__a, __b) __extension__ \ + ({ \ + union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v4sf (__a, __bu.__o); \ + }) + +#define vst3q_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst3q_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst3q_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst3q_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x16x3_t __i; __neon_int8x16x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst3q_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst3_lane_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x2x3_t __i; __neon_float32x2x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev2sf (__a, __bu.__o, __c); \ + }) + +#define vst3_lane_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x2x3_t __i; __neon_int32x2x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x3_t __i; __neon_int8x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst3_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x4x3_t __i; __neon_int16x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst3q_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst3q_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst3q_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x4x3_t __i; __neon_float32x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev4sf (__a, __bu.__o, __c); \ + }) + +#define vst3q_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst3q_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x4x3_t __i; __neon_int32x4x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst3q_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x8x3_t __i; __neon_int16x8x3_t __o; } __bu = { __b }; \ + __builtin_neon_vst3_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vld4_s8(__a) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_s16(__a) __extension__ \ + ({ \ + union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_s32(__a) __extension__ \ + ({ \ + union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld4_f32(__a) __extension__ \ + ({ \ + union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v2sf (__a); \ + __rv.__i; \ + }) + +#define vld4_u8(__a) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_u16(__a) __extension__ \ + ({ \ + union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_u32(__a) __extension__ \ + ({ \ + union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld4_p8(__a) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_p16(__a) __extension__ \ + ({ \ + union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_s64(__a) __extension__ \ + ({ \ + union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld4_u64(__a) __extension__ \ + ({ \ + union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_s8(__a) __extension__ \ + ({ \ + union { int8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_s16(__a) __extension__ \ + ({ \ + union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_s32(__a) __extension__ \ + ({ \ + union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_f32(__a) __extension__ \ + ({ \ + union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v4sf (__a); \ + __rv.__i; \ + }) + +#define vld4q_u8(__a) __extension__ \ + ({ \ + union { uint8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_u16(__a) __extension__ \ + ({ \ + union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_u32(__a) __extension__ \ + ({ \ + union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v4si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_p8(__a) __extension__ \ + ({ \ + union { poly8x16x4_t __i; __neon_int8x16x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v16qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4q_p16(__a) __extension__ \ + ({ \ + union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4v8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_lane_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ + union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \ + union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev2sf (__a, __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ + union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev2si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4q_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4q_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ + union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4q_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \ + union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev4sf (__a, __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4q_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4q_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ + union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev4si (__neon_ptr_cast(const __builtin_neon_si *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4q_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_lanev8hi (__neon_ptr_cast(const __builtin_neon_hi *, __a), __bu.__o, __c); \ + __rv.__i; \ + }) + +#define vld4_dup_s8(__a) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_s16(__a) __extension__ \ + ({ \ + union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_s32(__a) __extension__ \ + ({ \ + union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_f32(__a) __extension__ \ + ({ \ + union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv2sf (__a); \ + __rv.__i; \ + }) + +#define vld4_dup_u8(__a) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_u16(__a) __extension__ \ + ({ \ + union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_u32(__a) __extension__ \ + ({ \ + union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv2si (__neon_ptr_cast(const __builtin_neon_si *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_p8(__a) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv8qi (__neon_ptr_cast(const __builtin_neon_qi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_p16(__a) __extension__ \ + ({ \ + union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv4hi (__neon_ptr_cast(const __builtin_neon_hi *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_s64(__a) __extension__ \ + ({ \ + union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vld4_dup_u64(__a) __extension__ \ + ({ \ + union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __rv; \ + __rv.__o = __builtin_neon_vld4_dupv1di (__neon_ptr_cast(const __builtin_neon_di *, __a)); \ + __rv.__i; \ + }) + +#define vst4_s8(__a, __b) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst4_s16(__a, __b) __extension__ \ + ({ \ + union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst4_s32(__a, __b) __extension__ \ + ({ \ + union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst4_f32(__a, __b) __extension__ \ + ({ \ + union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v2sf (__a, __bu.__o); \ + }) + +#define vst4_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst4_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst4_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst4_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst4_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst4_s64(__a, __b) __extension__ \ + ({ \ + union { int64x1x4_t __i; __neon_int64x1x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __bu.__o); \ + }) + +#define vst4_u64(__a, __b) __extension__ \ + ({ \ + union { uint64x1x4_t __i; __neon_int64x1x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v1di (__neon_ptr_cast(__builtin_neon_di *, __a), __bu.__o); \ + }) + +#define vst4q_s8(__a, __b) __extension__ \ + ({ \ + union { int8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst4q_s16(__a, __b) __extension__ \ + ({ \ + union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst4q_s32(__a, __b) __extension__ \ + ({ \ + union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst4q_f32(__a, __b) __extension__ \ + ({ \ + union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v4sf (__a, __bu.__o); \ + }) + +#define vst4q_u8(__a, __b) __extension__ \ + ({ \ + union { uint8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst4q_u16(__a, __b) __extension__ \ + ({ \ + union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst4q_u32(__a, __b) __extension__ \ + ({ \ + union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o); \ + }) + +#define vst4q_p8(__a, __b) __extension__ \ + ({ \ + union { poly8x16x4_t __i; __neon_int8x16x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v16qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o); \ + }) + +#define vst4q_p16(__a, __b) __extension__ \ + ({ \ + union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4v8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o); \ + }) + +#define vst4_lane_s8(__a, __b, __c) __extension__ \ + ({ \ + union { int8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x2x4_t __i; __neon_float32x2x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev2sf (__a, __bu.__o, __c); \ + }) + +#define vst4_lane_u8(__a, __b, __c) __extension__ \ + ({ \ + union { uint8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x2x4_t __i; __neon_int32x2x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev2si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_p8(__a, __b, __c) __extension__ \ + ({ \ + union { poly8x8x4_t __i; __neon_int8x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev8qi (__neon_ptr_cast(__builtin_neon_qi *, __a), __bu.__o, __c); \ + }) + +#define vst4_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x4x4_t __i; __neon_int16x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev4hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst4q_lane_s16(__a, __b, __c) __extension__ \ + ({ \ + union { int16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst4q_lane_s32(__a, __b, __c) __extension__ \ + ({ \ + union { int32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst4q_lane_f32(__a, __b, __c) __extension__ \ + ({ \ + union { float32x4x4_t __i; __neon_float32x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev4sf (__a, __bu.__o, __c); \ + }) + +#define vst4q_lane_u16(__a, __b, __c) __extension__ \ + ({ \ + union { uint16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vst4q_lane_u32(__a, __b, __c) __extension__ \ + ({ \ + union { uint32x4x4_t __i; __neon_int32x4x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev4si (__neon_ptr_cast(__builtin_neon_si *, __a), __bu.__o, __c); \ + }) + +#define vst4q_lane_p16(__a, __b, __c) __extension__ \ + ({ \ + union { poly16x8x4_t __i; __neon_int16x8x4_t __o; } __bu = { __b }; \ + __builtin_neon_vst4_lanev8hi (__neon_ptr_cast(__builtin_neon_hi *, __a), __bu.__o, __c); \ + }) + +#define vand_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1) + +#define vand_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1) + +#define vand_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1) + +#define vand_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vandv1di (__a, __b, 1) + +#define vand_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vand_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vand_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vand_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vandv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vandq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1) + +#define vandq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1) + +#define vandq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1) + +#define vandq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1) + +#define vandq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vandq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vandq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vandq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vorr_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1) + +#define vorr_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1) + +#define vorr_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1) + +#define vorr_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vorrv1di (__a, __b, 1) + +#define vorr_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vorr_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vorr_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vorr_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vorrv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vorrq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1) + +#define vorrq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1) + +#define vorrq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1) + +#define vorrq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1) + +#define vorrq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vorrq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vorrq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vorrq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define veor_s8(__a, __b) \ + (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1) + +#define veor_s16(__a, __b) \ + (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1) + +#define veor_s32(__a, __b) \ + (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1) + +#define veor_s64(__a, __b) \ + (int64x1_t)__builtin_neon_veorv1di (__a, __b, 1) + +#define veor_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define veor_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define veor_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define veor_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_veorv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define veorq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1) + +#define veorq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1) + +#define veorq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1) + +#define veorq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1) + +#define veorq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define veorq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define veorq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define veorq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vbic_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1) + +#define vbic_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1) + +#define vbic_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1) + +#define vbic_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vbicv1di (__a, __b, 1) + +#define vbic_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vbic_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vbic_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vbic_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vbicv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vbicq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1) + +#define vbicq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1) + +#define vbicq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1) + +#define vbicq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1) + +#define vbicq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vbicq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vbicq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vbicq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + +#define vorn_s8(__a, __b) \ + (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1) + +#define vorn_s16(__a, __b) \ + (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1) + +#define vorn_s32(__a, __b) \ + (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1) + +#define vorn_s64(__a, __b) \ + (int64x1_t)__builtin_neon_vornv1di (__a, __b, 1) + +#define vorn_u8(__a, __b) \ + (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0) + +#define vorn_u16(__a, __b) \ + (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0) + +#define vorn_u32(__a, __b) \ + (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0) + +#define vorn_u64(__a, __b) \ + (uint64x1_t)__builtin_neon_vornv1di ((int64x1_t) __a, (int64x1_t) __b, 0) + +#define vornq_s8(__a, __b) \ + (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1) + +#define vornq_s16(__a, __b) \ + (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1) + +#define vornq_s32(__a, __b) \ + (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1) + +#define vornq_s64(__a, __b) \ + (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1) + +#define vornq_u8(__a, __b) \ + (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0) + +#define vornq_u16(__a, __b) \ + (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0) + +#define vornq_u32(__a, __b) \ + (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0) + +#define vornq_u64(__a, __b) \ + (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0) + + +#define vreinterpret_p8_s8(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a) + +#define vreinterpret_p8_s16(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a) + +#define vreinterpret_p8_s32(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a) + +#define vreinterpret_p8_s64(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a) + +#define vreinterpret_p8_f32(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a) + +#define vreinterpret_p8_u8(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a) + +#define vreinterpret_p8_u16(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a) + +#define vreinterpret_p8_u32(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a) + +#define vreinterpret_p8_u64(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv1di ((int64x1_t) __a) + +#define vreinterpret_p8_p16(__a) \ + (poly8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a) + +#define vreinterpretq_p8_s8(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a) + +#define vreinterpretq_p8_s16(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a) + +#define vreinterpretq_p8_s32(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a) + +#define vreinterpretq_p8_s64(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a) + +#define vreinterpretq_p8_f32(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a) + +#define vreinterpretq_p8_u8(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a) + +#define vreinterpretq_p8_u16(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a) + +#define vreinterpretq_p8_u32(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a) + +#define vreinterpretq_p8_u64(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a) + +#define vreinterpretq_p8_p16(__a) \ + (poly8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a) + +#define vreinterpret_p16_s8(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a) + +#define vreinterpret_p16_s16(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a) + +#define vreinterpret_p16_s32(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a) + +#define vreinterpret_p16_s64(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a) + +#define vreinterpret_p16_f32(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a) + +#define vreinterpret_p16_u8(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a) + +#define vreinterpret_p16_u16(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a) + +#define vreinterpret_p16_u32(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a) + +#define vreinterpret_p16_u64(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv1di ((int64x1_t) __a) + +#define vreinterpret_p16_p8(__a) \ + (poly16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a) + +#define vreinterpretq_p16_s8(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a) + +#define vreinterpretq_p16_s16(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a) + +#define vreinterpretq_p16_s32(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a) + +#define vreinterpretq_p16_s64(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a) + +#define vreinterpretq_p16_f32(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a) + +#define vreinterpretq_p16_u8(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a) + +#define vreinterpretq_p16_u16(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a) + +#define vreinterpretq_p16_u32(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a) + +#define vreinterpretq_p16_u64(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a) + +#define vreinterpretq_p16_p8(__a) \ + (poly16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a) + +#define vreinterpret_f32_s8(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi (__a) + +#define vreinterpret_f32_s16(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi (__a) + +#define vreinterpret_f32_s32(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv2si (__a) + +#define vreinterpret_f32_s64(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv1di (__a) + +#define vreinterpret_f32_u8(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a) + +#define vreinterpret_f32_u16(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a) + +#define vreinterpret_f32_u32(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv2si ((int32x2_t) __a) + +#define vreinterpret_f32_u64(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv1di ((int64x1_t) __a) + +#define vreinterpret_f32_p8(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv8qi ((int8x8_t) __a) + +#define vreinterpret_f32_p16(__a) \ + (float32x2_t)__builtin_neon_vreinterpretv2sfv4hi ((int16x4_t) __a) + +#define vreinterpretq_f32_s8(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi (__a) + +#define vreinterpretq_f32_s16(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi (__a) + +#define vreinterpretq_f32_s32(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv4si (__a) + +#define vreinterpretq_f32_s64(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv2di (__a) + +#define vreinterpretq_f32_u8(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a) + +#define vreinterpretq_f32_u16(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a) + +#define vreinterpretq_f32_u32(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv4si ((int32x4_t) __a) + +#define vreinterpretq_f32_u64(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv2di ((int64x2_t) __a) + +#define vreinterpretq_f32_p8(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv16qi ((int8x16_t) __a) + +#define vreinterpretq_f32_p16(__a) \ + (float32x4_t)__builtin_neon_vreinterpretv4sfv8hi ((int16x8_t) __a) + +#define vreinterpret_s64_s8(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div8qi (__a) + +#define vreinterpret_s64_s16(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div4hi (__a) + +#define vreinterpret_s64_s32(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div2si (__a) + +#define vreinterpret_s64_f32(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div2sf (__a) + +#define vreinterpret_s64_u8(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div8qi ((int8x8_t) __a) + +#define vreinterpret_s64_u16(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div4hi ((int16x4_t) __a) + +#define vreinterpret_s64_u32(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div2si ((int32x2_t) __a) + +#define vreinterpret_s64_u64(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div1di ((int64x1_t) __a) + +#define vreinterpret_s64_p8(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div8qi ((int8x8_t) __a) + +#define vreinterpret_s64_p16(__a) \ + (int64x1_t)__builtin_neon_vreinterpretv1div4hi ((int16x4_t) __a) + +#define vreinterpretq_s64_s8(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div16qi (__a) + +#define vreinterpretq_s64_s16(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div8hi (__a) + +#define vreinterpretq_s64_s32(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div4si (__a) + +#define vreinterpretq_s64_f32(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div4sf (__a) + +#define vreinterpretq_s64_u8(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a) + +#define vreinterpretq_s64_u16(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a) + +#define vreinterpretq_s64_u32(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a) + +#define vreinterpretq_s64_u64(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div2di ((int64x2_t) __a) + +#define vreinterpretq_s64_p8(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a) + +#define vreinterpretq_s64_p16(__a) \ + (int64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a) + +#define vreinterpret_u64_s8(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div8qi (__a) + +#define vreinterpret_u64_s16(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div4hi (__a) + +#define vreinterpret_u64_s32(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div2si (__a) + +#define vreinterpret_u64_s64(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div1di (__a) + +#define vreinterpret_u64_f32(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div2sf (__a) + +#define vreinterpret_u64_u8(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div8qi ((int8x8_t) __a) + +#define vreinterpret_u64_u16(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div4hi ((int16x4_t) __a) + +#define vreinterpret_u64_u32(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div2si ((int32x2_t) __a) + +#define vreinterpret_u64_p8(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div8qi ((int8x8_t) __a) + +#define vreinterpret_u64_p16(__a) \ + (uint64x1_t)__builtin_neon_vreinterpretv1div4hi ((int16x4_t) __a) + +#define vreinterpretq_u64_s8(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div16qi (__a) + +#define vreinterpretq_u64_s16(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div8hi (__a) + +#define vreinterpretq_u64_s32(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div4si (__a) + +#define vreinterpretq_u64_s64(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div2di (__a) + +#define vreinterpretq_u64_f32(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div4sf (__a) + +#define vreinterpretq_u64_u8(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a) + +#define vreinterpretq_u64_u16(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a) + +#define vreinterpretq_u64_u32(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div4si ((int32x4_t) __a) + +#define vreinterpretq_u64_p8(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div16qi ((int8x16_t) __a) + +#define vreinterpretq_u64_p16(__a) \ + (uint64x2_t)__builtin_neon_vreinterpretv2div8hi ((int16x8_t) __a) + +#define vreinterpret_s8_s16(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a) + +#define vreinterpret_s8_s32(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a) + +#define vreinterpret_s8_s64(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a) + +#define vreinterpret_s8_f32(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a) + +#define vreinterpret_s8_u8(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a) + +#define vreinterpret_s8_u16(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a) + +#define vreinterpret_s8_u32(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a) + +#define vreinterpret_s8_u64(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv1di ((int64x1_t) __a) + +#define vreinterpret_s8_p8(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a) + +#define vreinterpret_s8_p16(__a) \ + (int8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a) + +#define vreinterpretq_s8_s16(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a) + +#define vreinterpretq_s8_s32(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a) + +#define vreinterpretq_s8_s64(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a) + +#define vreinterpretq_s8_f32(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a) + +#define vreinterpretq_s8_u8(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a) + +#define vreinterpretq_s8_u16(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a) + +#define vreinterpretq_s8_u32(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a) + +#define vreinterpretq_s8_u64(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a) + +#define vreinterpretq_s8_p8(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a) + +#define vreinterpretq_s8_p16(__a) \ + (int8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a) + +#define vreinterpret_s16_s8(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a) + +#define vreinterpret_s16_s32(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a) + +#define vreinterpret_s16_s64(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a) + +#define vreinterpret_s16_f32(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a) + +#define vreinterpret_s16_u8(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a) + +#define vreinterpret_s16_u16(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a) + +#define vreinterpret_s16_u32(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a) + +#define vreinterpret_s16_u64(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv1di ((int64x1_t) __a) + +#define vreinterpret_s16_p8(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a) + +#define vreinterpret_s16_p16(__a) \ + (int16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a) + +#define vreinterpretq_s16_s8(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a) + +#define vreinterpretq_s16_s32(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a) + +#define vreinterpretq_s16_s64(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a) + +#define vreinterpretq_s16_f32(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a) + +#define vreinterpretq_s16_u8(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a) + +#define vreinterpretq_s16_u16(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a) + +#define vreinterpretq_s16_u32(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a) + +#define vreinterpretq_s16_u64(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a) + +#define vreinterpretq_s16_p8(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a) + +#define vreinterpretq_s16_p16(__a) \ + (int16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a) + +#define vreinterpret_s32_s8(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a) + +#define vreinterpret_s32_s16(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a) + +#define vreinterpret_s32_s64(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv1di (__a) + +#define vreinterpret_s32_f32(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a) + +#define vreinterpret_s32_u8(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a) + +#define vreinterpret_s32_u16(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a) + +#define vreinterpret_s32_u32(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv2si ((int32x2_t) __a) + +#define vreinterpret_s32_u64(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv1di ((int64x1_t) __a) + +#define vreinterpret_s32_p8(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a) + +#define vreinterpret_s32_p16(__a) \ + (int32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a) + +#define vreinterpretq_s32_s8(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a) + +#define vreinterpretq_s32_s16(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a) + +#define vreinterpretq_s32_s64(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv2di (__a) + +#define vreinterpretq_s32_f32(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a) + +#define vreinterpretq_s32_u8(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a) + +#define vreinterpretq_s32_u16(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a) + +#define vreinterpretq_s32_u32(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv4si ((int32x4_t) __a) + +#define vreinterpretq_s32_u64(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a) + +#define vreinterpretq_s32_p8(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a) + +#define vreinterpretq_s32_p16(__a) \ + (int32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a) + +#define vreinterpret_u8_s8(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi (__a) + +#define vreinterpret_u8_s16(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi (__a) + +#define vreinterpret_u8_s32(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si (__a) + +#define vreinterpret_u8_s64(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv1di (__a) + +#define vreinterpret_u8_f32(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv2sf (__a) + +#define vreinterpret_u8_u16(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a) + +#define vreinterpret_u8_u32(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv2si ((int32x2_t) __a) + +#define vreinterpret_u8_u64(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv1di ((int64x1_t) __a) + +#define vreinterpret_u8_p8(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv8qi ((int8x8_t) __a) + +#define vreinterpret_u8_p16(__a) \ + (uint8x8_t)__builtin_neon_vreinterpretv8qiv4hi ((int16x4_t) __a) + +#define vreinterpretq_u8_s8(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi (__a) + +#define vreinterpretq_u8_s16(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi (__a) + +#define vreinterpretq_u8_s32(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si (__a) + +#define vreinterpretq_u8_s64(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di (__a) + +#define vreinterpretq_u8_f32(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv4sf (__a) + +#define vreinterpretq_u8_u16(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a) + +#define vreinterpretq_u8_u32(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv4si ((int32x4_t) __a) + +#define vreinterpretq_u8_u64(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv2di ((int64x2_t) __a) + +#define vreinterpretq_u8_p8(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv16qi ((int8x16_t) __a) + +#define vreinterpretq_u8_p16(__a) \ + (uint8x16_t)__builtin_neon_vreinterpretv16qiv8hi ((int16x8_t) __a) + +#define vreinterpret_u16_s8(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi (__a) + +#define vreinterpret_u16_s16(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi (__a) + +#define vreinterpret_u16_s32(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si (__a) + +#define vreinterpret_u16_s64(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv1di (__a) + +#define vreinterpret_u16_f32(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv2sf (__a) + +#define vreinterpret_u16_u8(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a) + +#define vreinterpret_u16_u32(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv2si ((int32x2_t) __a) + +#define vreinterpret_u16_u64(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv1di ((int64x1_t) __a) + +#define vreinterpret_u16_p8(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv8qi ((int8x8_t) __a) + +#define vreinterpret_u16_p16(__a) \ + (uint16x4_t)__builtin_neon_vreinterpretv4hiv4hi ((int16x4_t) __a) + +#define vreinterpretq_u16_s8(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi (__a) + +#define vreinterpretq_u16_s16(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi (__a) + +#define vreinterpretq_u16_s32(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si (__a) + +#define vreinterpretq_u16_s64(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di (__a) + +#define vreinterpretq_u16_f32(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv4sf (__a) + +#define vreinterpretq_u16_u8(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a) + +#define vreinterpretq_u16_u32(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv4si ((int32x4_t) __a) + +#define vreinterpretq_u16_u64(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv2di ((int64x2_t) __a) + +#define vreinterpretq_u16_p8(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv16qi ((int8x16_t) __a) + +#define vreinterpretq_u16_p16(__a) \ + (uint16x8_t)__builtin_neon_vreinterpretv8hiv8hi ((int16x8_t) __a) + +#define vreinterpret_u32_s8(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi (__a) + +#define vreinterpret_u32_s16(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi (__a) + +#define vreinterpret_u32_s32(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv2si (__a) + +#define vreinterpret_u32_s64(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv1di (__a) + +#define vreinterpret_u32_f32(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv2sf (__a) + +#define vreinterpret_u32_u8(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a) + +#define vreinterpret_u32_u16(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a) + +#define vreinterpret_u32_u64(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv1di ((int64x1_t) __a) + +#define vreinterpret_u32_p8(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv8qi ((int8x8_t) __a) + +#define vreinterpret_u32_p16(__a) \ + (uint32x2_t)__builtin_neon_vreinterpretv2siv4hi ((int16x4_t) __a) + +#define vreinterpretq_u32_s8(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi (__a) + +#define vreinterpretq_u32_s16(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi (__a) + +#define vreinterpretq_u32_s32(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv4si (__a) + +#define vreinterpretq_u32_s64(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv2di (__a) + +#define vreinterpretq_u32_f32(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv4sf (__a) + +#define vreinterpretq_u32_u8(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a) + +#define vreinterpretq_u32_u16(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a) + +#define vreinterpretq_u32_u64(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv2di ((int64x2_t) __a) + +#define vreinterpretq_u32_p8(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv16qi ((int8x16_t) __a) + +#define vreinterpretq_u32_p16(__a) \ + (uint32x4_t)__builtin_neon_vreinterpretv4siv8hi ((int16x8_t) __a) + +#ifdef __cplusplus +} +#endif +#endif +#endif Copied: llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_std.h (from r103724, llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h) URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_std.h?p2=llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_std.h&p1=llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h&r1=103724&r2=103811&rev=103811&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon.h (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/arm_neon_std.h Fri May 14 16:31:28 2010 @@ -1,7 +1,8 @@ -/* LLVM LOCAL file Changed to use preprocessor macros. */ -/* APPLE LOCAL file v7 support. Merge from Codesourcery */ -/* ARM NEON intrinsics include file. This file is generated automatically - using neon-gen.ml. Please do not edit manually. +/* Internal definitions for standard versions of NEON types and intrinsics. + Do not include this file directly; please use . + + This file is generated automatically using neon-gen-std.ml. + Please do not edit manually. Copyright (C) 2006, 2007 Free Software Foundation, Inc. Contributed by CodeSourcery. @@ -46,29 +47,6 @@ #include -typedef __builtin_neon_qi __neon_int8x8_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_hi __neon_int16x4_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_si __neon_int32x2_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_di __neon_int64x1_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_sf __neon_float32x2_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_poly8 __neon_poly8x8_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_poly16 __neon_poly16x4_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_uqi __neon_uint8x8_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_uhi __neon_uint16x4_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_usi __neon_uint32x2_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_udi __neon_uint64x1_t __attribute__ ((__vector_size__ (8))); -typedef __builtin_neon_qi __neon_int8x16_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_hi __neon_int16x8_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_si __neon_int32x4_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_di __neon_int64x2_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_sf __neon_float32x4_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_poly8 __neon_poly8x16_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_poly16 __neon_poly16x8_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_uqi __neon_uint8x16_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_uhi __neon_uint16x8_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_usi __neon_uint32x4_t __attribute__ ((__vector_size__ (16))); -typedef __builtin_neon_udi __neon_uint64x2_t __attribute__ ((__vector_size__ (16))); - typedef __builtin_neon_sf float32_t; typedef __builtin_neon_poly8 poly8_t; typedef __builtin_neon_poly16 poly16_t; Added: llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen-std.ml URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen-std.ml?rev=103811&view=auto ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen-std.ml (added) +++ llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen-std.ml Fri May 14 16:31:28 2010 @@ -0,0 +1,507 @@ +(* APPLE LOCAL file v7 support. Merge from Codesourcery *) +(* Auto-generate ARM Neon intrinsics header file. + Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Contributed by CodeSourcery. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + This is an O'Caml program. The O'Caml compiler is available from: + + http://caml.inria.fr/ + + Or from your favourite OS's friendly packaging system. Tested with version + 3.09.2, though other versions will probably work too. + + Compile with: + ocamlc -c neon.ml + ocamlc -o neon-gen-std neon.cmo neon-gen-std.ml + + Run with: + ./neon-gen-std > arm_neon_std.h +*) + +open Neon + +(* The format codes used in the following functions are documented at: + http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\ + #6_printflikefunctionsforprettyprinting + (one line, remove the backslash.) +*) + +(* Following functions can be used to approximate GNU indentation style. *) +let start_function () = + Format.printf "@["; + ref 0 + +let end_function nesting = + match !nesting with + 0 -> Format.printf "@;@;@]" + | _ -> failwith ("Bad nesting (ending function at level " + ^ (string_of_int !nesting) ^ ")") + +let open_braceblock nesting = + begin match !nesting with + 0 -> Format.printf "@,@<0>{@[@," + | _ -> Format.printf "@,@[ @<0>{@[@," + end; + incr nesting + +let close_braceblock nesting = + decr nesting; + match !nesting with + 0 -> Format.printf "@]@,@<0>}" + | _ -> Format.printf "@]@,@<0>}@]" + +(* LLVM LOCAL begin Print macros instead of inline functions. + This is needed so that immediate arguments (e.g., lane numbers, shift + amounts, etc.) can be checked for validity. GCC can check them after + inlining, but LLVM does inlining separately. + + Some macros translate to simple intrinsic calls and should not end with + semicolons, but for others, which use GCC's statement-expressions to + include unions that convert argument and/or return types, the semicolons + need to be emitted after every statement. This is implemented by deferring + the emission of trailing semicolons so they are only added in the context + of statement-expressions. *) +let print_function arity fnname body = + let ffmt = start_function () in + Format.printf "@[#define "; + begin match arity with + Arity0 ret -> + Format.printf "%s()" fnname + | Arity1 (ret, arg0) -> + Format.printf "%s(__a)" fnname + | Arity2 (ret, arg0, arg1) -> + Format.printf "%s(__a, __b)" fnname + | Arity3 (ret, arg0, arg1, arg2) -> + Format.printf "%s(__a, __b, __c)" fnname + | Arity4 (ret, arg0, arg1, arg2, arg3) -> + Format.printf "%s(__a, __b, __c, __d)" fnname + end; + let rec print_lines = function + [] -> () + | [line] -> Format.printf "%s; \\" line + | line::lines -> Format.printf "%s; \\@," line; print_lines lines in + let print_macro_body = function + [] -> Format.printf " \\@,"; + | [line] -> Format.printf " \\@,"; + Format.printf "%s" line + | line::lines -> Format.printf " __extension__ \\@,"; + Format.printf "@[({ \\@,%s; \\@," line; + print_lines lines; + Format.printf "@]@, })" in + print_macro_body body; + Format.printf "@]"; + end_function ffmt +(* LLVM LOCAL end Print macros instead of inline functions. *) + +let return_by_ptr features = List.mem ReturnPtr features + +let rec signed_ctype = function + T_uint8x8 | T_poly8x8 -> T_int8x8 + | T_uint8x16 | T_poly8x16 -> T_int8x16 + | T_uint16x4 | T_poly16x4 -> T_int16x4 + | T_uint16x8 | T_poly16x8 -> T_int16x8 + | T_uint32x2 -> T_int32x2 + | T_uint32x4 -> T_int32x4 + | T_uint64x1 -> T_int64x1 + | T_uint64x2 -> T_int64x2 + (* Cast to types defined by mode in arm.c, not random types pulled in from + the header in use. This fixes incompatible pointer errors when + compiling with C++. *) + | T_uint8 | T_int8 -> T_intQI + | T_uint16 | T_int16 -> T_intHI + | T_uint32 | T_int32 -> T_intSI + | T_uint64 | T_int64 -> T_intDI + | T_poly8 -> T_intQI + | T_poly16 -> T_intHI + | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) + | T_ptrto elt -> T_ptrto (signed_ctype elt) + | T_const elt -> T_const (signed_ctype elt) + | x -> x + +(* LLVM LOCAL begin union_string. + Array types are handled as structs in llvm-gcc, not as wide integers, and + single vector types have wrapper structs. Unions are used here to convert + back and forth between these different representations. The union_string + function has been updated accordingly, and it is moved below signed_ctype + so it can use that function. *) +let union_string num elts base = + let itype = match num with + 1 -> elts + | _ -> T_arrayof (num, elts) in + let iname = string_of_vectype (signed_ctype itype) + and sname = string_of_vectype itype in + Printf.sprintf "union { %s __i; __neon_%s __o; } %s" sname iname base +(* LLVM LOCAL end union_string. *) + +(* LLVM LOCAL begin add_cast_with_prefix. *) +let add_cast_with_prefix ctype cval stype_prefix = + let stype = signed_ctype ctype in + if ctype <> stype then + match stype with + T_ptrto elt -> + Printf.sprintf "__neon_ptr_cast(%s%s, %s)" stype_prefix (string_of_vectype stype) cval + | _ -> + Printf.sprintf "(%s%s) %s" stype_prefix (string_of_vectype stype) cval + else + cval + +let add_cast ctype cval = add_cast_with_prefix ctype cval "" +(* LLVM LOCAL end add_cast_with_prefix. *) + +let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" + +(* Return a tuple of a list of declarations to go at the start of the function, + and a list of statements needed to return THING. *) +(* LLVM LOCAL begin Omit "return" keywords and trailing semicolons. *) +let return arity return_by_ptr thing = + match arity with + Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) + | Arity4 (ret, _, _, _, _) -> + match ret with + T_arrayof (num, vec) -> + if return_by_ptr then + let sname = string_of_vectype ret in + [Printf.sprintf "%s __rv" sname], + [thing; "__rv"] + else + let uname = union_string num vec "__rv" in + [uname], ["__rv.__o = " ^ thing; "__rv.__i"] + (* LLVM LOCAL begin Convert vector result to wrapper struct. *) + | T_int8x8 | T_int8x16 + | T_int16x4 | T_int16x8 + | T_int32x2 | T_int32x4 + | T_int64x1 | T_int64x2 + | T_uint8x8 | T_uint8x16 + | T_uint16x4 | T_uint16x8 + | T_uint32x2 | T_uint32x4 + | T_uint64x1 | T_uint64x2 + | T_float32x2 | T_float32x4 + | T_poly8x8 | T_poly8x16 + | T_poly16x4 | T_poly16x8 -> + let uname = union_string 1 ret "__rv" in + [uname], ["__rv.__o = " ^ thing; "__rv.__i"] + (* LLVM LOCAL end Convert vector result to wrapper struct. *) + | T_void -> [], [thing] + | _ -> + [], [(cast_for_return ret) ^ thing] +(* LLVM LOCAL end Omit "return" keywords and trailing semicolons. *) + +let rec element_type ctype = + match ctype with + T_arrayof (_, v) -> element_type v + | _ -> ctype + +let params return_by_ptr ps = + let pdecls = ref [] in + let ptype t p = + match t with + T_arrayof (num, elts) -> + let uname = union_string num elts (p ^ "u") in + (* LLVM LOCAL Omit trailing semicolon. *) + let decl = Printf.sprintf "%s = { %s }" uname p in + pdecls := decl :: !pdecls; + p ^ "u.__o" + (* LLVM LOCAL begin Extract vector operand from wrapper struct. *) + | T_int8x8 | T_int8x16 + | T_int16x4 | T_int16x8 + | T_int32x2 | T_int32x4 + | T_int64x1 | T_int64x2 + | T_uint8x8 | T_uint8x16 + | T_uint16x4 | T_uint16x8 + | T_uint32x2 | T_uint32x4 + | T_uint64x1 | T_uint64x2 + | T_float32x2 | T_float32x4 + | T_poly8x8 | T_poly8x16 + | T_poly16x4 | T_poly16x8 -> + let decl = Printf.sprintf "%s %s = %s" + (string_of_vectype t) (p ^ "x") p in + pdecls := decl :: !pdecls; + add_cast_with_prefix t (p ^ "x.val") "__neon_" + | T_immediate (lo, hi) -> p + | _ -> + let decl = Printf.sprintf "%s %s = %s" + (string_of_vectype t) (p ^ "x") p in + pdecls := decl :: !pdecls; + add_cast t (p ^ "x") in + (* LLVM LOCAL end Extract vector operand from wrapper struct. *) + let plist = match ps with + Arity0 _ -> [] + | Arity1 (_, t1) -> [ptype t1 "__a"] + | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"] + | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"] + | Arity4 (_, t1, t2, t3, t4) -> + [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in + match ps with + Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) + | Arity4 (ret, _, _, _, _) -> + if return_by_ptr then + !pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist + else + !pdecls, plist + +let modify_params features plist = + let is_flipped = + List.exists (function Flipped _ -> true | _ -> false) features in + if is_flipped then + match plist with + [ a; b ] -> [ b; a ] + | _ -> + failwith ("Don't know how to flip args " ^ (String.concat ", " plist)) + else + plist + +(* !!! Decide whether to add an extra information word based on the shape + form. *) +let extra_word shape features paramlist bits = + let use_word = + match shape with + All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow + | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm + | Narrow_imm -> true + | _ -> List.mem InfoWord features + in + if use_word then + paramlist @ [string_of_int bits] + else + paramlist + +(* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0). + Bit 1 represents rounding (1) vs none (0) + Bit 2 represents floats & polynomials (1), or ordinary integers (0). *) +let infoword_value elttype features = + let bits02 = + match elt_class elttype with + Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001 + | Poly -> 0b100 + | Float -> 0b101 + | _ -> 0b000 + and rounding_bit = if List.mem Rounding features then 0b010 else 0b000 in + bits02 lor rounding_bit + +(* "Cast" type operations will throw an exception in mode_of_elt (actually in + elt_width, called from there). Deal with that here, and generate a suffix + with multiple modes (). *) +let rec mode_suffix elttype shape = + try + let mode = mode_of_elt elttype shape in + string_of_mode mode + with MixedMode (dst, src) -> + let dstmode = mode_of_elt dst shape + and srcmode = mode_of_elt src shape in + string_of_mode dstmode ^ string_of_mode srcmode + +let print_variant opcode features shape name (ctype, asmtype, elttype) = + let bits = infoword_value elttype features in + let modesuf = mode_suffix elttype shape in + let return_by_ptr = return_by_ptr features in + let pdecls, paramlist = params return_by_ptr ctype in + let paramlist' = modify_params features paramlist in + let paramlist'' = extra_word shape features paramlist' bits in + let parstr = String.concat ", " paramlist'' in + let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)" + (builtin_name features name) modesuf parstr in + let rdecls, stmts = return ctype return_by_ptr builtin in + let body = pdecls @ rdecls @ stmts + and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in + print_function ctype fnname body + +(* When this function processes the element types in the ops table, it rewrites + them in a list of tuples (a,b,c): + a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8) + b : Asm type : a single, processed element type, e.g. P16. This is the + type which should be attached to the asm opcode. + c : Variant type : the unprocessed type for this variant (e.g. in add + instructions which don't care about the sign, b might be i16 and c + might be s16.) +*) + +let print_op (opcode, features, shape, name, munge, types) = + let sorted_types = List.sort compare types in + let munged_types = List.map + (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in + List.iter + (fun variant -> print_variant opcode features shape name variant) + munged_types + +let print_ops ops = + List.iter print_op ops + +(* Output type definitions. Table entries are: + cbase : "C" name for the type. + abase : "ARM" base name for the type (i.e. int in int8x8_t). + esize : element size. + enum : element count. + We can't really distinguish between polynomial types and integer types in + the C type system, I don't think, which may allow the user to make mistakes + without warnings from the compiler. + FIXME: It's probably better to use stdint.h names here. +*) + +let deftypes () = + let typeinfo = [ + (* Doubleword vector types. *) + "__builtin_neon_qi", "int", 8, 8; + "__builtin_neon_hi", "int", 16, 4; + "__builtin_neon_si", "int", 32, 2; + "__builtin_neon_di", "int", 64, 1; + "__builtin_neon_sf", "float", 32, 2; + "__builtin_neon_poly8", "poly", 8, 8; + "__builtin_neon_poly16", "poly", 16, 4; + "__builtin_neon_uqi", "uint", 8, 8; + "__builtin_neon_uhi", "uint", 16, 4; + "__builtin_neon_usi", "uint", 32, 2; + "__builtin_neon_udi", "uint", 64, 1; + + (* Quadword vector types. *) + "__builtin_neon_qi", "int", 8, 16; + "__builtin_neon_hi", "int", 16, 8; + "__builtin_neon_si", "int", 32, 4; + "__builtin_neon_di", "int", 64, 2; + "__builtin_neon_sf", "float", 32, 4; + "__builtin_neon_poly8", "poly", 8, 16; + "__builtin_neon_poly16", "poly", 16, 8; + "__builtin_neon_uqi", "uint", 8, 16; + "__builtin_neon_uhi", "uint", 16, 8; + "__builtin_neon_usi", "uint", 32, 4; + "__builtin_neon_udi", "uint", 64, 2 + ] in + (* LLVM LOCAL remove typedefs for builtin Neon vector types *) + (* Extra types not in . *) + Format.printf "typedef __builtin_neon_sf float32_t;\n"; + Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; + Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" +(* LLVM LOCAL begin Define containerized vector types. *) + ; + List.iter + (fun (cbase, abase, esize, enum) -> + let typename = + Printf.sprintf "%s%dx%d_t" abase esize enum in + let structname = + Printf.sprintf "__simd%d_%s%d_t" (esize * enum) abase esize in + let sfmt = start_function () in + Format.printf "typedef struct %s" structname; + open_braceblock sfmt; + Format.printf "__neon_%s val;" typename; + close_braceblock sfmt; + Format.printf " %s;" typename; + end_function sfmt) + typeinfo +(* LLVM LOCAL end Define containerized vector types. *) + +(* Output structs containing arrays, for load & store instructions etc. *) + +let arrtypes () = + let typeinfo = [ + "int", 8; "int", 16; + "int", 32; "int", 64; + "uint", 8; "uint", 16; + "uint", 32; "uint", 64; + "float", 32; "poly", 8; + "poly", 16 + ] in + let writestruct elname elsize regsize arrsize = + let elnum = regsize / elsize in + let structname = + Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in + let sfmt = start_function () in + Format.printf "typedef struct %s" structname; + open_braceblock sfmt; + Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize; + close_braceblock sfmt; + Format.printf " %s;" structname; + end_function sfmt; + in + for n = 2 to 4 do + List.iter + (fun (elname, elsize) -> + writestruct elname elsize 64 n; + writestruct elname elsize 128 n) + typeinfo + done + +let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) + +(* Do it. *) + +let _ = + print_lines [ +"/* Internal definitions for standard versions of NEON types and intrinsics."; +" Do not include this file directly; please use ."; +""; +" This file is generated automatically using neon-gen-std.ml."; +" Please do not edit manually."; +""; +" Copyright (C) 2006, 2007 Free Software Foundation, Inc."; +" Contributed by CodeSourcery."; +""; +" This file is part of GCC."; +""; +" GCC is free software; you can redistribute it and/or modify it"; +" under the terms of the GNU General Public License as published"; +" by the Free Software Foundation; either version 2, or (at your"; +" option) any later version."; +""; +" GCC is distributed in the hope that it will be useful, but WITHOUT"; +" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; +" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; +" License for more details."; +""; +" You should have received a copy of the GNU General Public License"; +" along with GCC; see the file COPYING. If not, write to the"; +" Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,"; +" MA 02110-1301, USA. */"; +""; +"/* As a special exception, if you include this header file into source"; +" files compiled by GCC, this header file does not by itself cause"; +" the resulting executable to be covered by the GNU General Public"; +" License. This exception does not however invalidate any other"; +" reasons why the executable file might be covered by the GNU General"; +" Public License. */"; +""; +"#ifndef _GCC_ARM_NEON_H"; +"#define _GCC_ARM_NEON_H 1"; +""; +"#ifndef __ARM_NEON__"; +"#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h"; +"#else"; +""; +"#ifdef __cplusplus"; +"extern \"C\" {"; +(* LLVM LOCAL begin Use reinterpret_cast for pointers in C++ *) +"#define __neon_ptr_cast(ty, ptr) reinterpret_cast(ptr)"; +"#else"; +"#define __neon_ptr_cast(ty, ptr) (ty)(ptr)"; +(* LLVM LOCAL end Use reinterpret_cast for pointers in C++ *) +"#endif"; +""; +"#include "; +""]; + deftypes (); + arrtypes (); + Format.print_newline (); + print_ops ops; + Format.print_newline (); + print_ops reinterp; + print_lines [ +"#ifdef __cplusplus"; +"}"; +"#endif"; +"#endif"; +"#endif"] Modified: llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml?rev=103811&r1=103810&r2=103811&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml (original) +++ llvm-gcc-4.2/trunk/gcc/config/arm/neon-gen.ml Fri May 14 16:31:28 2010 @@ -32,7 +32,7 @@ ocamlc -o neon-gen neon.cmo neon-gen.ml Run with: - ./neon-gen > arm_neon.h + ./neon-gen > arm_neon_gcc.h *) open Neon @@ -136,35 +136,29 @@ | x -> x (* LLVM LOCAL begin union_string. - Array types are handled as structs in llvm-gcc, not as wide integers, and - single vector types have wrapper structs. Unions are used here to convert + Array types are handled as structs in llvm-gcc, not as wide integers. + Unions are used here to convert back and forth between these different representations. The union_string function has been updated accordingly, and it is moved below signed_ctype so it can use that function. *) let union_string num elts base = - let itype = match num with - 1 -> elts - | _ -> T_arrayof (num, elts) in + let itype = T_arrayof (num, elts) in let iname = string_of_vectype (signed_ctype itype) and sname = string_of_vectype itype in Printf.sprintf "union { %s __i; __neon_%s __o; } %s" sname iname base (* LLVM LOCAL end union_string. *) -(* LLVM LOCAL begin add_cast_with_prefix. *) -let add_cast_with_prefix ctype cval stype_prefix = +let add_cast ctype cval = let stype = signed_ctype ctype in if ctype <> stype then match stype with T_ptrto elt -> - Printf.sprintf "__neon_ptr_cast(%s%s, %s)" stype_prefix (string_of_vectype stype) cval + Printf.sprintf "__neon_ptr_cast(%s, %s)" (string_of_vectype stype) cval | _ -> - Printf.sprintf "(%s%s) %s" stype_prefix (string_of_vectype stype) cval + Printf.sprintf "(%s) %s" (string_of_vectype stype) cval else cval -let add_cast ctype cval = add_cast_with_prefix ctype cval "" -(* LLVM LOCAL end add_cast_with_prefix. *) - let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" (* Return a tuple of a list of declarations to go at the start of the function, @@ -183,21 +177,6 @@ else let uname = union_string num vec "__rv" in [uname], ["__rv.__o = " ^ thing; "__rv.__i"] - (* LLVM LOCAL begin Convert vector result to wrapper struct. *) - | T_int8x8 | T_int8x16 - | T_int16x4 | T_int16x8 - | T_int32x2 | T_int32x4 - | T_int64x1 | T_int64x2 - | T_uint8x8 | T_uint8x16 - | T_uint16x4 | T_uint16x8 - | T_uint32x2 | T_uint32x4 - | T_uint64x1 | T_uint64x2 - | T_float32x2 | T_float32x4 - | T_poly8x8 | T_poly8x16 - | T_poly16x4 | T_poly16x8 -> - let uname = union_string 1 ret "__rv" in - [uname], ["__rv.__o = " ^ thing; "__rv.__i"] - (* LLVM LOCAL end Convert vector result to wrapper struct. *) | T_void -> [], [thing] | _ -> [], [(cast_for_return ret) ^ thing] @@ -218,29 +197,7 @@ let decl = Printf.sprintf "%s = { %s }" uname p in pdecls := decl :: !pdecls; p ^ "u.__o" - (* LLVM LOCAL begin Extract vector operand from wrapper struct. *) - | T_int8x8 | T_int8x16 - | T_int16x4 | T_int16x8 - | T_int32x2 | T_int32x4 - | T_int64x1 | T_int64x2 - | T_uint8x8 | T_uint8x16 - | T_uint16x4 | T_uint16x8 - | T_uint32x2 | T_uint32x4 - | T_uint64x1 | T_uint64x2 - | T_float32x2 | T_float32x4 - | T_poly8x8 | T_poly8x16 - | T_poly16x4 | T_poly16x8 -> - let decl = Printf.sprintf "%s %s = %s" - (string_of_vectype t) (p ^ "x") p in - pdecls := decl :: !pdecls; - add_cast_with_prefix t (p ^ "x.val") "__neon_" - | T_immediate (lo, hi) -> p - | _ -> - let decl = Printf.sprintf "%s %s = %s" - (string_of_vectype t) (p ^ "x") p in - pdecls := decl :: !pdecls; - add_cast t (p ^ "x") in - (* LLVM LOCAL end Extract vector operand from wrapper struct. *) + | _ -> add_cast t p in let plist = match ps with Arity0 _ -> [] | Arity1 (_, t1) -> [ptype t1 "__a"] @@ -382,36 +339,19 @@ "__builtin_neon_usi", "uint", 32, 4; "__builtin_neon_udi", "uint", 64, 2 ] in - List.iter - (fun (cbase, abase, esize, enum) -> - let attr = - match enum with - (* LLVM LOCAL no special case for enum == 1 so int64x1_t is a vector *) - _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" - (esize * enum / 8) in - (* LLVM LOCAL Add "__neon_" prefix. *) - Format.printf "typedef %s __neon_%s%dx%d_t%s;@\n" cbase abase esize enum attr) - typeinfo; - Format.print_newline (); + (* LLVM LOCAL remove typedefs for builtin Neon vector types *) (* Extra types not in . *) Format.printf "typedef __builtin_neon_sf float32_t;\n"; Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" (* LLVM LOCAL begin Define containerized vector types. *) ; + Format.print_newline (); List.iter (fun (cbase, abase, esize, enum) -> let typename = Printf.sprintf "%s%dx%d_t" abase esize enum in - let structname = - Printf.sprintf "__simd%d_%s%d_t" (esize * enum) abase esize in - let sfmt = start_function () in - Format.printf "typedef struct %s" structname; - open_braceblock sfmt; - Format.printf "__neon_%s val;" typename; - close_braceblock sfmt; - Format.printf " %s;" typename; - end_function sfmt) + Format.printf "typedef __neon_%s %s;\n" typename typename) typeinfo (* LLVM LOCAL end Define containerized vector types. *) @@ -452,10 +392,12 @@ let _ = print_lines [ -"/* LLVM LOCAL file Changed to use preprocessor macros. */"; -"/* APPLE LOCAL file v7 support. Merge from Codesourcery */"; -"/* ARM NEON intrinsics include file. This file is generated automatically"; -" using neon-gen.ml. Please do not edit manually."; +"/* Internal definitions for GCC-compatible NEON types and intrinsics."; +" Do not include this file directly; please use and define"; +" the ARM_NEON_GCC_COMPATIBILITY macro."; +""; +" This file is generated automatically using neon-gen.ml."; +" Please do not edit manually."; ""; " Copyright (C) 2006, 2007 Free Software Foundation, Inc."; " Contributed by CodeSourcery."; From isanbard at gmail.com Fri May 14 16:33:50 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 14:33:50 -0700 Subject: [llvm-commits] [llvm] r103802 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/ lib/CodeGen/AsmPrinter/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MBlaze/AsmPrin In-Reply-To: References: Message-ID: <4C5CAFD1-B2F6-45F5-A0E1-66FB6DF26073@gmail.com> On May 14, 2010, at 2:26 PM, Anton Korobeynikov wrote: >> N.B., several back-ends are using "HasCalls" as being synonymous for something >> that adjusts the stack. This isn't 100% correct and should be looked into. > I'm not quite sure for other backends, but systemz backend was correct. > The semantics was precisely "has calls", the stack frame is always > created for non-leaf functions in order to provide register save area > for callees. > > x86 (win64) semantics is definitely the same - please switch to > hasCalls() there as well, we need to create register save area > regardless whether there are local stack variables or not. > Hi Anton, Interesting. SystemZ is an example of where, when I change it to use a "HasCalls" that's set correctly (i.e., it's set if there's any call, not just an adjustment of the stack frame), then test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll would fail. Also, if I do a self-hosted build on X86 with HasCalls set if there are any calls, then the resulting binary doesn't work. So there are some weird (and in my view incorrect) assumptions going on in the back ends which isn't correct. This is why I changed the name of the flag instead of making it true if there are any calls regardless of stack adjustments. -bw -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100514/6e05d9a0/attachment.html From isanbard at gmail.com Fri May 14 16:38:45 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 21:38:45 -0000 Subject: [llvm-commits] [llvm] r103813 - /llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Message-ID: <20100514213845.10975312800A@llvm.org> Author: void Date: Fri May 14 16:38:44 2010 New Revision: 103813 URL: http://llvm.org/viewvc/llvm-project?rev=103813&view=rev Log: This should happen if there are no calls, not if it just doesn't adjust the stack. Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=103813&r1=103812&r2=103813&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Fri May 14 16:38:44 2010 @@ -898,7 +898,7 @@ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->adjustsStack() && // No calls. + !MFI->hasCalls() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; From anton at korobeynikov.info Fri May 14 16:43:39 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sat, 15 May 2010 01:43:39 +0400 Subject: [llvm-commits] [llvm] r103802 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/ lib/CodeGen/AsmPrinter/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MBlaze/AsmPrin In-Reply-To: <4C5CAFD1-B2F6-45F5-A0E1-66FB6DF26073@gmail.com> References: <4C5CAFD1-B2F6-45F5-A0E1-66FB6DF26073@gmail.com> Message-ID: > Interesting. SystemZ is an example of where, when I change it to use a > "HasCalls" that's set correctly (i.e., it's set if there's any call, not > just an adjustment of the stack frame), > then?test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll would > fail. It seems that test is wrong and new code is correct. On z/System the situation is the following: function uses the incoming stack frame to save any callee-saved registers. It's a responsibility of the caller to create such stack frame. The test in question contains calls, and it seems that these calls might overwrite the register save area (with the old code). So, please fix the test :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From isanbard at gmail.com Fri May 14 16:47:05 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 14:47:05 -0700 Subject: [llvm-commits] [llvm] r103802 - in /llvm/trunk: include/llvm/CodeGen/ lib/CodeGen/ lib/CodeGen/AsmPrinter/ lib/CodeGen/SelectionDAG/ lib/Target/ lib/Target/ARM/ lib/Target/Blackfin/ lib/Target/CellSPU/ lib/Target/MBlaze/ lib/Target/MBlaze/AsmPrin In-Reply-To: References: <4C5CAFD1-B2F6-45F5-A0E1-66FB6DF26073@gmail.com> Message-ID: <4E3D062A-5C3F-48EA-A4B7-A14EAAB7374F@gmail.com> On May 14, 2010, at 2:43 PM, Anton Korobeynikov wrote: >> Interesting. SystemZ is an example of where, when I change it to use a >> "HasCalls" that's set correctly (i.e., it's set if there's any call, not >> just an adjustment of the stack frame), >> then test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll would >> fail. > It seems that test is wrong and new code is correct. On z/System the > situation is the following: function uses the incoming stack frame to > save any callee-saved registers. It's a responsibility of the caller > to create such stack frame. The test in question contains calls, and > it seems that these calls might overwrite the register save area (with > the old code). So, please fix the test :) > Okay. I'll trust on you to make sure that my fix is okay and that there aren't any regressions in z/System. :-) -bw From stoklund at 2pi.dk Fri May 14 16:55:44 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 21:55:44 -0000 Subject: [llvm-commits] [llvm] r103820 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514215544.B0B56312800A@llvm.org> Author: stoklund Date: Fri May 14 16:55:44 2010 New Revision: 103820 URL: http://llvm.org/viewvc/llvm-project?rev=103820&view=rev Log: Allow virtreg redefines when verifying for RegAllocFast Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103820&r1=103819&r2=103820&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 16:55:44 2010 @@ -782,7 +782,7 @@ << "********** Function: " << ((Value*)Fn.getFunction())->getName() << '\n'); if (VerifyFastRegalloc) - Fn.verify(); + Fn.verify(this, true); MF = &Fn; MRI = &MF->getRegInfo(); TM = &Fn.getTarget(); From stoklund at 2pi.dk Fri May 14 16:55:50 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 21:55:50 -0000 Subject: [llvm-commits] [llvm] r103821 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514215550.344E03128018@llvm.org> Author: stoklund Date: Fri May 14 16:55:50 2010 New Revision: 103821 URL: http://llvm.org/viewvc/llvm-project?rev=103821&view=rev Log: Count coalesced copies Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103821&r1=103820&r2=103821&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 16:55:50 2010 @@ -40,6 +40,7 @@ STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -771,6 +772,7 @@ // LiveVirtsRegs might refer to the instrs. for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) MBB.erase(Coalesced[i]); + NumCopies += Coalesced.size(); DEBUG(MBB.dump()); } From dpatel at apple.com Fri May 14 16:55:50 2010 From: dpatel at apple.com (Devang Patel) Date: Fri, 14 May 2010 21:55:50 -0000 Subject: [llvm-commits] [llvm] r103822 - /llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Message-ID: <20100514215550.EC5EA3128026@llvm.org> Author: dpatel Date: Fri May 14 16:55:50 2010 New Revision: 103822 URL: http://llvm.org/viewvc/llvm-project?rev=103822&view=rev Log: Do not forget to mark prcessed arguments. Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=103822&r1=103821&r2=103822&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original) +++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Fri May 14 16:55:50 2010 @@ -2175,6 +2175,7 @@ DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL); CurrentFnDbgScope->addVariable(ArgVar); DbgValueStartMap[MInsn] = ArgVar; + Processed.insert(DV); continue; } From stoklund at 2pi.dk Fri May 14 16:55:52 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 21:55:52 -0000 Subject: [llvm-commits] [llvm] r103823 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514215552.5991D312800A@llvm.org> Author: stoklund Date: Fri May 14 16:55:52 2010 New Revision: 103823 URL: http://llvm.org/viewvc/llvm-project?rev=103823&view=rev Log: Avoid scanning the long tail of physreg operands on calls Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103823&r1=103822&r2=103823&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 16:55:52 2010 @@ -663,12 +663,18 @@ // First scan. // Mark physreg uses and early clobbers as used. + // Find the end of the virtreg operands + unsigned VirtOpEnd = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) || - ReservedRegs.test(Reg)) continue; + if (!Reg) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + VirtOpEnd = i+1; + continue; + } + if (ReservedRegs.test(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { @@ -677,11 +683,10 @@ } } - // Second scan. // Allocate virtreg uses and early clobbers. // Collect VirtKills - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0; i != VirtOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); From isanbard at gmail.com Fri May 14 16:58:35 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 21:58:35 -0000 Subject: [llvm-commits] [llvm] r103824 - /llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Message-ID: <20100514215835.A3BF2312800A@llvm.org> Author: void Date: Fri May 14 16:58:35 2010 New Revision: 103824 URL: http://llvm.org/viewvc/llvm-project?rev=103824&view=rev Log: Several tail call tests apparently rely upon this being "adjusts stack" instead of "has calls". That's probably wrong, but it needs further investigation. Revert to the original behavior until this is settled. Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=103824&r1=103823&r2=103824&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Fri May 14 16:58:35 2010 @@ -898,7 +898,7 @@ if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls() && // No calls. + !MFI->adjustsStack() && // No calls. !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; From gohman at apple.com Fri May 14 17:00:27 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 22:00:27 -0000 Subject: [llvm-commits] [llvm] r103826 - /llvm/trunk/lib/Target/Alpha/AlphaInstrInfo.td Message-ID: <20100514220027.EA1953128018@llvm.org> Author: djg Date: Fri May 14 17:00:27 2010 New Revision: 103826 URL: http://llvm.org/viewvc/llvm-project?rev=103826&view=rev Log: BR is a barrier. Modified: llvm/trunk/lib/Target/Alpha/AlphaInstrInfo.td Modified: llvm/trunk/lib/Target/Alpha/AlphaInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Alpha/AlphaInstrInfo.td?rev=103826&r1=103825&r2=103826&view=diff ============================================================================== --- llvm/trunk/lib/Target/Alpha/AlphaInstrInfo.td (original) +++ llvm/trunk/lib/Target/Alpha/AlphaInstrInfo.td Fri May 14 17:00:27 2010 @@ -836,7 +836,7 @@ !strconcat(asmstr, " $R,$dst"), s_fbr>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -let Ra = 31 in +let Ra = 31, isBarrier = 1 in def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), From gohman at apple.com Fri May 14 17:01:14 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 22:01:14 -0000 Subject: [llvm-commits] [llvm] r103827 - in /llvm/trunk/lib/CodeGen/SelectionDAG: InstrEmitter.cpp InstrEmitter.h Message-ID: <20100514220114.E7646312800A@llvm.org> Author: djg Date: Fri May 14 17:01:14 2010 New Revision: 103827 URL: http://llvm.org/viewvc/llvm-project?rev=103827&view=rev Log: Don't set kill flags for instructions which the scheduler has cloned. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp?rev=103827&r1=103826&r2=103827&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.cpp Fri May 14 17:01:14 2010 @@ -265,7 +265,7 @@ unsigned IIOpNum, const TargetInstrDesc *II, DenseMap &VRBaseMap, - bool IsDebug) { + bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); @@ -299,11 +299,14 @@ // If this value has only one use, that use is a kill. This is a // conservative approximation. InstrEmitter does trivial coalescing // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. // Tied operands are never killed, so we need to check that. And that // means we need to determine the index of the operand. bool isKill = Op.hasOneUse() && Op.getNode()->getOpcode() != ISD::CopyFromReg && - !IsDebug; + !IsDebug && + !(IsClone || IsCloned); if (isKill) { unsigned Idx = MI->getNumOperands(); while (Idx > 0 && @@ -329,9 +332,10 @@ unsigned IIOpNum, const TargetInstrDesc *II, DenseMap &VRBaseMap, - bool IsDebug) { + bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast(Op)) { MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); } else if (ConstantFPSDNode *F = dyn_cast(Op)) { @@ -380,7 +384,8 @@ assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && "Chain and flag operands should occur at end of operand list!"); - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug); + AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); } } @@ -402,7 +407,8 @@ /// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap &VRBaseMap){ + DenseMap &VRBaseMap, + bool IsClone, bool IsCloned) { unsigned VRBase = 0; unsigned Opc = Node->getMachineOpcode(); @@ -446,7 +452,8 @@ // Add def, source, and subreg index MI->addOperand(MachineOperand::CreateReg(VRBase, true)); - AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap); + AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || @@ -480,9 +487,11 @@ const ConstantSDNode *SD = cast(N0); MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); } else - AddOperand(MI, N0, 0, 0, VRBaseMap); + AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MI, N1, 0, 0, VRBaseMap); + AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); MI->addOperand(MachineOperand::CreateImm(SubIdx)); MBB->insert(InsertPos, MI); } else @@ -524,7 +533,8 @@ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap &VRBaseMap) { + DenseMap &VRBaseMap, + bool IsClone, bool IsCloned) { const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0)); unsigned NewVReg = MRI->createVirtualRegister(RC); MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), @@ -545,7 +555,8 @@ assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE"); } #endif - AddOperand(MI, Op, i+1, &II, VRBaseMap); + AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); } MBB->insert(InsertPos, MI); @@ -586,7 +597,7 @@ MIB.addReg(0U); // undef else AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, - true /*IsDebug*/); + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast(V)) { @@ -625,7 +636,7 @@ if (Opc == TargetOpcode::EXTRACT_SUBREG || Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { - EmitSubregNode(Node, VRBaseMap); + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); return; } @@ -637,7 +648,7 @@ // Handle REG_SEQUENCE specially. if (Opc == TargetOpcode::REG_SEQUENCE) { - EmitRegSequence(Node, VRBaseMap); + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); return; } @@ -676,7 +687,7 @@ unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, - VRBaseMap); + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. MI->setMemRefs(cast(Node)->memoperands_begin(), @@ -823,7 +834,8 @@ // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (; NumVals; --NumVals, ++i) - AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap); + AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); break; } } Modified: llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h?rev=103827&r1=103826&r2=103827&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/InstrEmitter.h Fri May 14 17:01:14 2010 @@ -65,7 +65,7 @@ unsigned IIOpNum, const TargetInstrDesc *II, DenseMap &VRBaseMap, - bool IsDebug = false); + bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the @@ -75,11 +75,12 @@ unsigned IIOpNum, const TargetInstrDesc *II, DenseMap &VRBaseMap, - bool IsDebug = false); + bool IsDebug, bool IsClone, bool IsCloned); /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap); + void EmitSubregNode(SDNode *Node, DenseMap &VRBaseMap, + bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. /// COPY_TO_REGCLASS is just a normal copy, except that the destination @@ -90,7 +91,8 @@ /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap); + void EmitRegSequence(SDNode *Node, DenseMap &VRBaseMap, + bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate /// operands first, then an optional chain, and optional flag operands From stoklund at 2pi.dk Fri May 14 17:02:56 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 22:02:56 -0000 Subject: [llvm-commits] [llvm] r103828 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514220256.F210E312800A@llvm.org> Author: stoklund Date: Fri May 14 17:02:56 2010 New Revision: 103828 URL: http://llvm.org/viewvc/llvm-project?rev=103828&view=rev Log: Track allocatable instead of reserved regs, and never take an unallocatable hint. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103828&r1=103827&r2=103828&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 17:02:56 2010 @@ -108,8 +108,8 @@ // instruction, and so cannot be allocated. BitVector UsedInInstr; - // ReservedRegs - vector of reserved physical registers. - BitVector ReservedRegs; + // Allocatable - vector of allocatable physical registers. + BitVector Allocatable; // atEndOfBlock - This flag is set after allocating all instructions in a // block, before emitting final spills. When it is set, LiveRegMap is no @@ -394,7 +394,8 @@ // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || UsedInInstr.test(Hint))) + !RC->contains(Hint) || UsedInInstr.test(Hint)) || + !Allocatable.test(Hint)) Hint = 0; // If there is no hint, peek at the first use of this register. @@ -404,7 +405,8 @@ // Copy to physreg -> use physreg as hint. if (TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && SrcReg == VirtReg && TargetRegisterInfo::isPhysicalRegister(DstReg) && - RC->contains(DstReg) && !UsedInInstr.test(DstReg)) { + RC->contains(DstReg) && !UsedInInstr.test(DstReg) && + Allocatable.test(DstReg)) { Hint = DstReg; DEBUG(dbgs() << "%reg" << VirtReg << " gets hint from " << MI); } @@ -413,7 +415,7 @@ // Take hint when possible. if (Hint) { assert(RC->contains(Hint) && !UsedInInstr.test(Hint) && - "Invalid hint should have been cleared"); + Allocatable.test(Hint) && "Invalid hint should have been cleared"); switch(PhysRegState[Hint]) { case regDisabled: case regReserved: @@ -674,7 +676,7 @@ VirtOpEnd = i+1; continue; } - if (ReservedRegs.test(Reg)) continue; + if (!Allocatable.test(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { @@ -729,7 +731,7 @@ unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (ReservedRegs.test(Reg)) continue; + if (!Allocatable.test(Reg)) continue; definePhysReg(MBB, MI, Reg, (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); continue; @@ -797,7 +799,7 @@ TII = TM->getInstrInfo(); UsedInInstr.resize(TRI->getNumRegs()); - ReservedRegs = TRI->getReservedRegs(*MF); + Allocatable = TRI->getAllocatableSet(*MF); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers From anton at korobeynikov.info Fri May 14 17:16:55 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sat, 15 May 2010 02:16:55 +0400 Subject: [llvm-commits] [llvm] r103760 - in /llvm/trunk: lib/Target/ARM/ARMBaseInstrInfo.cpp lib/Target/ARM/ARMBaseRegisterInfo.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMInstrNEON.td lib/Target/ARM/ARMRegisterInfo.h lib/Target/ARM/ARMRegist Message-ID: Hi, Evan > ? ? addRegisterClass(MVT::v4i64, ARM::QQPRRegisterClass); > + ? ?addRegisterClass(MVT::v8i64, ARM::QQQQPRRegisterClass); These are just not enough. The operations on v4i64 & v8i64 are not legal and thus you should told type legalizer about this. Otherwise, all 4 x i64 & 8 x i64 stuff will be broken. -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From isanbard at gmail.com Fri May 14 17:17:42 2010 From: isanbard at gmail.com (Bill Wendling) Date: Fri, 14 May 2010 22:17:42 -0000 Subject: [llvm-commits] [llvm] r103829 - in /llvm/trunk: lib/Target/SystemZ/SystemZRegisterInfo.cpp test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll Message-ID: <20100514221742.B1440312800A@llvm.org> Author: void Date: Fri May 14 17:17:42 2010 New Revision: 103829 URL: http://llvm.org/viewvc/llvm-project?rev=103829&view=rev Log: SystemZ really does mean "has calls" and not just "adjusts stack." Go ahead and replace the check with the appropriate predicate. Modify the testcase to reflect the correct code. (It should be saving callee-saved registers on the stack allocated by the calling fuction.) Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp llvm/trunk/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=103829&r1=103828&r2=103829&view=diff ============================================================================== --- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Fri May 14 17:17:42 2010 @@ -102,7 +102,7 @@ Offset += StackSize - TFI.getOffsetOfLocalArea(); // Skip the register save area if we generated the stack frame. - if (StackSize || MFI->adjustsStack()) + if (StackSize || MFI->hasCalls()) Offset -= TFI.getOffsetOfLocalArea(); return Offset; @@ -163,14 +163,14 @@ for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); - if (FFI->adjustsStack()) + if (FFI->hasCalls()) /* FIXME: function is varargs */ /* FIXME: function grabs RA */ /* FIXME: function calls eh_return */ MRI.setPhysRegUsed(SystemZ::R14D); if (HighFPRsUsed || - FFI->adjustsStack() || + FFI->hasCalls() || FFI->getObjectIndexEnd() != 0 || // Contains automatic variables FFI->hasVarSizedObjects() // Function calls dynamic alloca's /* FIXME: function is varargs */) @@ -234,7 +234,7 @@ DL = MBBI->getDebugLoc(); // adjust stack pointer: R15 -= numbytes - if (StackSize || MFI->adjustsStack()) { + if (StackSize || MFI->hasCalls()) { assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && "Invalid stack frame calculation!"); emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII); @@ -286,7 +286,7 @@ // During callee-saved restores emission stack frame was not yet finialized // (and thus - the stack size was unknown). Tune the offset having full stack // size in hands. - if (StackSize || MFI->adjustsStack()) { + if (StackSize || MFI->hasCalls()) { assert((MBBI->getOpcode() == SystemZ::MOV64rmm || MBBI->getOpcode() == SystemZ::MOV64rm) && "Expected to see callee-save register restore code"); Modified: llvm/trunk/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll?rev=103829&r1=103828&r2=103829&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll (original) +++ llvm/trunk/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll Fri May 14 17:17:42 2010 @@ -1,4 +1,4 @@ -; RUN: llc < %s | grep 168 +; RUN: llc < %s | FileCheck %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" target triple = "s390x-linux" @@ -8,6 +8,8 @@ declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind { +; CHECK: lg %r11, 328(%r15) + newFuncRoot: br label %bb3 From anton at korobeynikov.info Fri May 14 17:22:53 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sat, 15 May 2010 02:22:53 +0400 Subject: [llvm-commits] [llvm] r103829 - in /llvm/trunk: lib/Target/SystemZ/SystemZRegisterInfo.cpp test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll In-Reply-To: <20100514221742.B1440312800A@llvm.org> References: <20100514221742.B1440312800A@llvm.org> Message-ID: > SystemZ really does mean "has calls" and not just "adjusts stack." Go ahead and > replace the check with the appropriate predicate. Modify the testcase to reflect > the correct code. (It should be saving callee-saved registers on the stack > allocated by the calling fuction.) Thanks! -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From stoklund at 2pi.dk Fri May 14 17:40:40 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 22:40:40 -0000 Subject: [llvm-commits] [llvm] r103830 - /llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Message-ID: <20100514224040.7AC32312800A@llvm.org> Author: stoklund Date: Fri May 14 17:40:40 2010 New Revision: 103830 URL: http://llvm.org/viewvc/llvm-project?rev=103830&view=rev Log: RegAllocLocal can count copies too Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocLocal.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLocal.cpp?rev=103830&r1=103829&r2=103830&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocLocal.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocLocal.cpp Fri May 14 17:40:40 2010 @@ -37,6 +37,7 @@ STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); static RegisterRegAlloc localRegAlloc("local", "local register allocator", @@ -1156,8 +1157,10 @@ if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg) && SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg && - DeadDefs.empty()) + DeadDefs.empty()) { + ++NumCopies; MBB.erase(MI); + } } MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); From stoklund at 2pi.dk Fri May 14 17:40:43 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Fri, 14 May 2010 22:40:43 -0000 Subject: [llvm-commits] [llvm] r103831 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100514224043.77D933128018@llvm.org> Author: stoklund Date: Fri May 14 17:40:43 2010 New Revision: 103831 URL: http://llvm.org/viewvc/llvm-project?rev=103831&view=rev Log: Don't bother spilling before a return Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103831&r1=103830&r2=103831&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 17:40:43 2010 @@ -768,11 +768,20 @@ // Spill all physical registers holding virtual registers now. atEndOfBlock = true; - DEBUG(dbgs() << "Killing live registers at end of block.\n"); MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); - i != e; ++i) - spillVirtReg(MBB, MI, i, true); + if (MI != MBB.end() && MI->getDesc().isReturn()) { + // This is a return block, kill all virtual registers. + DEBUG(dbgs() << "Killing live registers at end of return block.\n"); + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + killVirtReg(i); + } else { + // This is a normal block, spill any dirty virtregs. + DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + spillVirtReg(MBB, MI, i, true); + } LiveVirtRegs.clear(); // Erase all the coalesced copies. We are delaying it until now because From gohman at apple.com Fri May 14 17:53:18 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 22:53:18 -0000 Subject: [llvm-commits] [llvm] r103832 - /llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Message-ID: <20100514225318.ED0AB312800A@llvm.org> Author: djg Date: Fri May 14 17:53:18 2010 New Revision: 103832 URL: http://llvm.org/viewvc/llvm-project?rev=103832&view=rev Log: Fast ISel trivially coalesces away no-op casts, so check for this when setting kill flags. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=103832&r1=103831&r2=103832&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Fri May 14 17:53:18 2010 @@ -57,11 +57,23 @@ using namespace llvm; bool FastISel::hasTrivialKill(const Value *V) const { - // Don't consider constants or arguments to have trivial kills. Only - // instructions with a single use in the same basic block. + // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast(V); - return I && - I->hasOneUse() && + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const CastInst *Cast = dyn_cast(I)) + if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && cast(I->use_begin())->getParent() == I->getParent(); } From evan.cheng at apple.com Fri May 14 17:54:52 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 22:54:52 -0000 Subject: [llvm-commits] [llvm] r103833 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <20100514225452.E8DFE312800A@llvm.org> Author: evancheng Date: Fri May 14 17:54:52 2010 New Revision: 103833 URL: http://llvm.org/viewvc/llvm-project?rev=103833&view=rev Log: Model VST*_UPD and VST*oddUPD pair with REG_SEQUENCE. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103833&r1=103832&r2=103833&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Fri May 14 17:54:52 2010 @@ -1312,33 +1312,76 @@ // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. + if (llvm::ModelWithRegSequence()) { + assert(NumVecs <= 4); - Ops.push_back(Reg0); // post-access address offset + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = + SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + // Store the even D registers. + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } else { + Ops.push_back(Reg0); // post-access address offset + + // Store the even subregs. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd subregs. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(Vec+3)); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; + } } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, From gohman at apple.com Fri May 14 18:01:03 2010 From: gohman at apple.com (Dan Gohman) Date: Fri, 14 May 2010 23:01:03 -0000 Subject: [llvm-commits] [llvm] r103834 - /llvm/trunk/include/llvm/Constants.h Message-ID: <20100514230103.25405312800A@llvm.org> Author: djg Date: Fri May 14 18:01:02 2010 New Revision: 103834 URL: http://llvm.org/viewvc/llvm-project?rev=103834&view=rev Log: Add some comments about undef. Modified: llvm/trunk/include/llvm/Constants.h Modified: llvm/trunk/include/llvm/Constants.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Constants.h?rev=103834&r1=103833&r2=103834&view=diff ============================================================================== --- llvm/trunk/include/llvm/Constants.h (original) +++ llvm/trunk/include/llvm/Constants.h Fri May 14 18:01:02 2010 @@ -923,7 +923,11 @@ /// UndefValue - 'undef' values are things that do not have specified contents. /// These are used for a variety of purposes, including global variable /// initializers and operands to instructions. 'undef' values can occur with -/// any type. +/// any first-class type. +/// +/// Undef values aren't exactly constants; if they have multiple uses, they +/// can appear to have different bit patterns at each use. See +/// LangRef.html#undefvalues for details. /// class UndefValue : public Constant { friend struct ConstantCreator; @@ -954,6 +958,7 @@ return V->getValueID() == UndefValueVal; } }; + } // End llvm namespace #endif From evan.cheng at apple.com Fri May 14 18:21:14 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Fri, 14 May 2010 23:21:14 -0000 Subject: [llvm-commits] [llvm] r103835 - in /llvm/trunk: include/llvm/Target/TargetRegisterInfo.h lib/CodeGen/TwoAddressInstructionPass.cpp lib/Target/ARM/ARMBaseRegisterInfo.cpp lib/Target/ARM/ARMBaseRegisterInfo.h lib/Target/ARM/ARMRegisterInfo.h lib/Target/ARM/ARMRegisterInfo.td lib/Target/ARM/NEONPreAllocPass.cpp Message-ID: <20100514232114.4C761312800A@llvm.org> Author: evancheng Date: Fri May 14 18:21:14 2010 New Revision: 103835 URL: http://llvm.org/viewvc/llvm-project?rev=103835&view=rev Log: Teach two-address pass to do some coalescing while eliminating REG_SEQUENCE instructions. e.g. %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 %reg1027 = EXTRACT_SUBREG %reg1026, 6 %reg1028 = EXTRACT_SUBREG %reg1026, 5 ... %reg1029 = REG_SEQUENCE %reg1028, 5, %reg1027, 6, %reg1028, 7, %reg1027, 8, %reg1028, 9, %reg1027, 10, %reg1030, 11, %reg1032, 12 After REG_SEQUENCE is eliminated, we are left with: %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 %reg1029:6 = EXTRACT_SUBREG %reg1026, 6 %reg1029:5 = EXTRACT_SUBREG %reg1026, 5 The regular coalescer will not be able to coalesce reg1026 and reg1029 because it doesn't know how to combine sub-register indices 5 and 6. Now 2-address pass will consult the target whether sub-registers 5 and 6 of reg1026 can be combined to into a larger sub-register (or combined to be reg1026 itself as is the case here). If it is possible, it will be able to replace references of reg1026 with reg1029 + the larger sub-register index. Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Modified: llvm/trunk/include/llvm/Target/TargetRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRegisterInfo.h?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetRegisterInfo.h (original) +++ llvm/trunk/include/llvm/Target/TargetRegisterInfo.h Fri May 14 18:21:14 2010 @@ -28,6 +28,7 @@ class MachineFunction; class MachineMove; class RegScavenger; +template class SmallVectorImpl; /// TargetRegisterDesc - This record contains all of the information known about /// a particular register. The AliasSet field (if not null) contains a pointer @@ -479,6 +480,17 @@ return 0; } + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const { + return 0; + } + /// getMatchingSuperRegClass - Return a subclass of the specified register /// class A so that each register in it has a sub-register of the /// specified sub-register index which is in the specified register class B. Modified: llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp (original) +++ llvm/trunk/lib/CodeGen/TwoAddressInstructionPass.cpp Fri May 14 18:21:14 2010 @@ -1166,6 +1166,7 @@ llvm_unreachable(0); } + SmallVector RealSrcs; SmallSet Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); @@ -1176,6 +1177,16 @@ } MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isImplicitDef()) { + DefMI->eraseFromParent(); + continue; + } + + // Remember EXTRACT_SUBREG sources. These might be candidate for + // coalescing. + if (DefMI->isExtractSubreg()) + RealSrcs.push_back(DefMI->getOperand(1).getReg()); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent()) { // REG_SEQUENCE cannot have duplicated operands, add a copy. // Also add an copy if the source if live-in the block. We don't want @@ -1216,6 +1227,44 @@ DEBUG(dbgs() << "Eliminated: " << *MI); MI->eraseFromParent(); + + // Try coalescing some EXTRACT_SUBREG instructions. + Seen.clear(); + for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) { + unsigned SrcReg = RealSrcs[i]; + if (!Seen.insert(SrcReg)) + continue; + + // If there are no other uses than extract_subreg which feed into + // the reg_sequence, then we might be able to coalesce them. + bool CanCoalesce = true; + SmallVector SubIndices; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (!UseMI->isExtractSubreg() || + UseMI->getOperand(0).getReg() != DstReg) { + CanCoalesce = false; + break; + } + SubIndices.push_back(UseMI->getOperand(2).getImm()); + } + + if (!CanCoalesce) + continue; + + // %reg1026 = VLDMQ %reg1025, 260, pred:14, pred:%reg0 + // %reg1029:6 = EXTRACT_SUBREG %reg1026, 6 + // %reg1029:5 = EXTRACT_SUBREG %reg1026, 5 + // Since D subregs 5, 6 can combine to a Q register, we can coalesce + // reg1026 to reg1029. + std::sort(SubIndices.begin(), SubIndices.end()); + unsigned NewSubIdx = 0; + if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, + NewSubIdx)) + UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI); + } } RegSequences.clear(); Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.cpp Fri May 14 18:21:14 2010 @@ -351,6 +351,123 @@ return 0; } +bool +ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const { + + unsigned Size = RC->getSize() * 8; + if (Size < 6) + return 0; + + NewSubIdx = 0; // Whole register. + unsigned NumRegs = SubIndices.size(); + if (NumRegs == 8) { + // 8 D registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[0] == ARM::DSUBREG_0 && + SubIndices[1] == ARM::DSUBREG_1 && + SubIndices[2] == ARM::DSUBREG_2 && + SubIndices[3] == ARM::DSUBREG_3 && + SubIndices[4] == ARM::DSUBREG_4 && + SubIndices[5] == ARM::DSUBREG_5 && + SubIndices[6] == ARM::DSUBREG_6 && + SubIndices[7] == ARM::DSUBREG_7); + } else if (NumRegs == 4) { + if (SubIndices[0] == ARM::QSUBREG_0) { + // 4 Q registers -> 1 QQQQ register. + return (Size == 512 && + SubIndices[1] == ARM::QSUBREG_1 && + SubIndices[2] == ARM::QSUBREG_2 && + SubIndices[3] == ARM::QSUBREG_3); + } else if (SubIndices[0] == ARM::DSUBREG_0) { + // 4 D registers -> 1 QQ register. + if (Size >= 256 && + SubIndices[1] == ARM::DSUBREG_1 && + SubIndices[2] == ARM::DSUBREG_2 && + SubIndices[3] == ARM::DSUBREG_3) { + if (Size == 512) + NewSubIdx = ARM::QQSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_4) { + // 4 D registers -> 1 QQ register (2nd). + if (Size == 512 && + SubIndices[1] == ARM::DSUBREG_5 && + SubIndices[2] == ARM::DSUBREG_6 && + SubIndices[3] == ARM::DSUBREG_7) { + NewSubIdx = ARM::QQSUBREG_1; + return true; + } + } else if (SubIndices[0] == ARM::SSUBREG_0) { + // 4 S registers -> 1 Q register. + if (Size >= 128 && + SubIndices[1] == ARM::SSUBREG_1 && + SubIndices[2] == ARM::SSUBREG_2 && + SubIndices[3] == ARM::SSUBREG_3) { + if (Size >= 256) + NewSubIdx = ARM::QSUBREG_0; + return true; + } + } + } else if (NumRegs == 2) { + if (SubIndices[0] == ARM::QSUBREG_0) { + // 2 Q registers -> 1 QQ register. + if (Size >= 256 && SubIndices[1] == ARM::QSUBREG_1) { + if (Size == 512) + NewSubIdx = ARM::QQSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::QSUBREG_2) { + // 2 Q registers -> 1 QQ register (2nd). + if (Size == 512 && SubIndices[1] == ARM::QSUBREG_3) { + NewSubIdx = ARM::QQSUBREG_1; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_0) { + // 2 D registers -> 1 Q register. + if (Size >= 128 && SubIndices[1] == ARM::DSUBREG_1) { + if (Size >= 256) + NewSubIdx = ARM::QSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_2) { + // 2 D registers -> 1 Q register (2nd). + if (Size >= 256 && SubIndices[1] == ARM::DSUBREG_3) { + NewSubIdx = ARM::QSUBREG_1; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_4) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::DSUBREG_5) { + NewSubIdx = ARM::QSUBREG_2; + return true; + } + } else if (SubIndices[0] == ARM::DSUBREG_6) { + // 2 D registers -> 1 Q register (3rd). + if (Size == 512 && SubIndices[1] == ARM::DSUBREG_7) { + NewSubIdx = ARM::QSUBREG_3; + return true; + } + } else if (SubIndices[0] == ARM::SSUBREG_0) { + // 2 S registers -> 1 D register. + if (SubIndices[1] == ARM::SSUBREG_1) { + if (Size >= 128) + NewSubIdx = ARM::DSUBREG_0; + return true; + } + } else if (SubIndices[0] == ARM::SSUBREG_2) { + // 2 S registers -> 1 D register (2nd). + if (Size >= 128 && SubIndices[1] == ARM::SSUBREG_3) { + NewSubIdx = ARM::DSUBREG_1; + return true; + } + } + } + return false; +} + + const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; Modified: llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMBaseRegisterInfo.h Fri May 14 18:21:14 2010 @@ -81,6 +81,15 @@ getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + /// canCombinedSubRegIndex - Given a register class and a list of sub-register + /// indices, return true if it's possible to combine the sub-register indices + /// into one that corresponds to a larger sub-register. Return the new sub- + /// register index by reference. Note the new index by be zero if the given + /// sub-registers combined to form the whole register. + virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; std::pair Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h (original) +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.h Fri May 14 18:21:14 2010 @@ -31,7 +31,8 @@ SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, DSUBREG_0 = 5, DSUBREG_1 = 6, DSUBREG_2 = 7, DSUBREG_3 = 8, DSUBREG_4 = 9, DSUBREG_5 = 10, DSUBREG_6 = 11, DSUBREG_7 = 12, - QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16 + QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16, + QQSUBREG_0= 17, QQSUBREG_1= 18 }; } Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Fri May 14 18:21:14 2010 @@ -465,6 +465,10 @@ def arm_qsubreg_2 : PatLeaf<(i32 15)>; def arm_qsubreg_3 : PatLeaf<(i32 16)>; +def arm_qqsubreg_0 : PatLeaf<(i32 17)>; +def arm_qqsubreg_1 : PatLeaf<(i32 18)>; + + // S sub-registers of D registers. def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15], @@ -552,3 +556,10 @@ [Q2, Q6, Q10, Q14]>; def : SubRegSet<16, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], [Q3, Q7, Q11, Q15]>; + +// QQ sub-registers of QQQQQQQQ registers. +def : SubRegSet<17, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [QQ0, QQ2, QQ4, QQ6]>; +def : SubRegSet<18, [QQQQ0, QQQQ1, QQQQ2, QQQQ3], + [QQ1, QQ3, QQ5, QQ7]>; + Modified: llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp?rev=103835&r1=103834&r2=103835&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONPreAllocPass.cpp Fri May 14 18:21:14 2010 @@ -414,7 +414,9 @@ return false; LastSrcReg = VirtReg; const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass) + if (RC != ARM::QPRRegisterClass && + RC != ARM::QQPRRegisterClass && + RC != ARM::QQQQPRRegisterClass) return false; unsigned SubIdx = DefMI->getOperand(2).getImm(); if (LastSubIdx) { @@ -432,7 +434,7 @@ // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is // currently required for correctness. e.g. - // %reg1041; = REG_SEQUENCE %reg1040, 5, %reg1035, 6 + // %reg1041; = REG_SEQUENCE %reg1040, 5, %reg1035, 6 // %reg1042 = EXTRACT_SUBREG %reg1041, 6 // %reg1043 = EXTRACT_SUBREG %reg1041, 5 // VST1q16 %reg1025, 0, %reg1043, %reg1042, From gkistanova at gmail.com Fri May 14 18:47:04 2010 From: gkistanova at gmail.com (Galina Kistanova) Date: Fri, 14 May 2010 16:47:04 -0700 Subject: [llvm-commits] New ScriptedBuilder patch Message-ID: Hello everyone, Please review the patch for buildbot ScriptedBuilder. It adds ability to specify type of command for build steps (for example to use ClangTestCommand). By default steps are WarningCountingShellCommand. I am going to send follow up patch for update builders to use this. Please review. Thanks Galina -------------- next part -------------- A non-text attachment was scrubbed... Name: patch04.diff Type: text/x-patch Size: 2521 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100514/184faa41/attachment.bin From gkistanova at gmail.com Fri May 14 19:06:13 2010 From: gkistanova at gmail.com (Galina Kistanova) Date: Fri, 14 May 2010 17:06:13 -0700 Subject: [llvm-commits] New ScriptedBuilder patch In-Reply-To: References: Message-ID: Hello, Please review patch to update one of existing builders and increase number of max_builds to 2 for the slave. Thanks Galina On Fri, May 14, 2010 at 4:47 PM, Galina Kistanova wrote: > Hello everyone, > > Please review the patch for buildbot ScriptedBuilder. > It adds ability to specify type of command for build steps (for > example to use ClangTestCommand). By default steps are > WarningCountingShellCommand. > > I am going to send follow up patch for update builders to use this. > > Please review. > > Thanks > > Galina > -------------- next part -------------- A non-text attachment was scrubbed... Name: patch05.diff Type: text/x-patch Size: 1450 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100514/98569088/attachment.bin From evan.cheng at apple.com Fri May 14 20:35:44 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 01:35:44 -0000 Subject: [llvm-commits] [llvm] r103850 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Message-ID: <20100515013544.CB7EB312800A@llvm.org> Author: evancheng Date: Fri May 14 20:35:44 2010 New Revision: 103850 URL: http://llvm.org/viewvc/llvm-project?rev=103850&view=rev Log: A partial re-def instruction may be a copy. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=103850&r1=103849&r2=103850&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Fri May 14 20:35:44 2010 @@ -454,7 +454,14 @@ // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; - OldValNo->setCopy(0); + if (!PartReDef) + OldValNo->setCopy(0); + else { + // A re-def may be a copy. e.g. %reg1030:6 = VMOVD %reg1026, ... + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + OldValNo->setCopy(&*mi); + } // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); From evan.cheng at apple.com Fri May 14 20:36:30 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 01:36:30 -0000 Subject: [llvm-commits] [llvm] r103851 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <20100515013630.24620312800A@llvm.org> Author: evancheng Date: Fri May 14 20:36:29 2010 New Revision: 103851 URL: http://llvm.org/viewvc/llvm-project?rev=103851&view=rev Log: Model 64-bit lane vld with REG_SEQUENCE. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103851&r1=103850&r2=103851&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Fri May 14 20:36:29 2010 @@ -1073,7 +1073,6 @@ if (!llvm::ModelWithRegSequence() || NumVecs < 2) return VLd; - assert(NumVecs <= 4); SDValue RegSeq; SDValue V0 = SDValue(VLd, 0); SDValue V1 = SDValue(VLd, 1); @@ -1229,7 +1228,6 @@ if (is64BitVector) { if (llvm::ModelWithRegSequence() && NumVecs >= 2) { - assert(NumVecs <= 4); SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1313,8 +1311,6 @@ // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. if (llvm::ModelWithRegSequence()) { - assert(NumVecs <= 4); - // Form the QQQQ REG_SEQUENCE. SDValue V[8]; for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { @@ -1460,8 +1456,34 @@ std::vector ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); - SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); + + if (llvm::ModelWithRegSequence() && is64BitVector) { + SDValue RegSeq; + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + + // Form a REG_SEQUENCE to force register allocation. + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue D = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0+Vec, dl, VT, + RegSeq); + ReplaceUses(SDValue(N, Vec), D); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); + return NULL; + } + // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; From kledzik at apple.com Fri May 14 20:42:04 2010 From: kledzik at apple.com (Nick Kledzik) Date: Sat, 15 May 2010 01:42:04 -0000 Subject: [llvm-commits] [compiler-rt] r103852 - /compiler-rt/trunk/make/platform/darwin_bni.mk Message-ID: <20100515014205.08387312800A@llvm.org> Author: kledzik Date: Fri May 14 20:42:04 2010 New Revision: 103852 URL: http://llvm.org/viewvc/llvm-project?rev=103852&view=rev Log: Libcompiler_rt isn't platform aware Modified: compiler-rt/trunk/make/platform/darwin_bni.mk Modified: compiler-rt/trunk/make/platform/darwin_bni.mk URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/make/platform/darwin_bni.mk?rev=103852&r1=103851&r2=103852&view=diff ============================================================================== --- compiler-rt/trunk/make/platform/darwin_bni.mk (original) +++ compiler-rt/trunk/make/platform/darwin_bni.mk Fri May 14 20:42:04 2010 @@ -8,6 +8,12 @@ # and the resulting lib will just have generic versions for anything unknown. UniversalArchs := $(RC_ARCHS) +ifeq (,$(SDKROOT)) +else + CC.Release := /Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/cc + CC.Static := /Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/cc +endif + CFLAGS := -Wall -Os -fomit-frame-pointer -g CFLAGS.Static := $(CFLAGS) -static From kledzik at apple.com Fri May 14 20:43:04 2010 From: kledzik at apple.com (Nick Kledzik) Date: Sat, 15 May 2010 01:43:04 -0000 Subject: [llvm-commits] [compiler-rt] r103853 - /compiler-rt/tags/Apple/Libcompiler_rt-7.1/ Message-ID: <20100515014304.7A447312800A@llvm.org> Author: kledzik Date: Fri May 14 20:43:04 2010 New Revision: 103853 URL: http://llvm.org/viewvc/llvm-project?rev=103853&view=rev Log: Libcompiler_rt-7.1 Added: compiler-rt/tags/Apple/Libcompiler_rt-7.1/ - copied from r103852, compiler-rt/trunk/ From evan.cheng at apple.com Fri May 14 21:18:07 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 02:18:07 -0000 Subject: [llvm-commits] [llvm] r103854 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h test/CodeGen/ARM/2010-05-14-IllegalType.ll Message-ID: <20100515021807.70D8C312800A@llvm.org> Author: evancheng Date: Fri May 14 21:18:07 2010 New Revision: 103854 URL: http://llvm.org/viewvc/llvm-project?rev=103854&view=rev Log: Allow TargetLowering::getRegClassFor() to be called on illegal types. Also allow target to override it in order to map register classes to illegal but synthesizable types. e.g. v4i64, v8i64 for ARM / NEON. Added: llvm/trunk/test/CodeGen/ARM/2010-05-14-IllegalType.ll Modified: llvm/trunk/include/llvm/Target/TargetLowering.h llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/lib/Target/ARM/ARMISelLowering.h Modified: llvm/trunk/include/llvm/Target/TargetLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=103854&r1=103853&r2=103854&view=diff ============================================================================== --- llvm/trunk/include/llvm/Target/TargetLowering.h (original) +++ llvm/trunk/include/llvm/Target/TargetLowering.h Fri May 14 21:18:07 2010 @@ -155,8 +155,8 @@ } /// getRegClassFor - Return the register class that should be used for the - /// specified value type. This may only be called on legal types. - TargetRegisterClass *getRegClassFor(EVT VT) const { + /// specified value type. + virtual TargetRegisterClass *getRegClassFor(EVT VT) const { assert(VT.isSimple() && "getRegClassFor called on illegal type!"); TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; assert(RC && "This value type is not natively supported!"); Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103854&r1=103853&r2=103854&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri May 14 21:18:07 2010 @@ -266,13 +266,6 @@ addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); - // Map v4i64 to QQ registers but do not make the type legal for any - // operations. Similarly map v8i64 to QQQQ registers. v4i64 and v8i64 are - // only used for REG_SEQUENCE to load / store 4 to 8 consecutive - // D registers. - addRegisterClass(MVT::v4i64, ARM::QQPRRegisterClass); - addRegisterClass(MVT::v8i64, ARM::QQQQPRRegisterClass); - // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. setOperationAction(ISD::FADD, MVT::v2f64, Expand); @@ -586,6 +579,19 @@ } } +/// getRegClassFor - Return the register class that should be used for the +/// specified value type. +TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { + // Map v4i64 to QQ registers but do not make the type legal. Similarly map + // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to + // load / store 4 to 8 consecutive D registers. + if (VT == MVT::v4i64) + return ARM::QQPRRegisterClass; + else if (VT == MVT::v8i64) + return ARM::QQQQPRRegisterClass; + return TargetLowering::getRegClassFor(VT); +} + /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { return getTargetMachine().getSubtarget().isThumb() ? 0 : 1; Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=103854&r1=103853&r2=103854&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri May 14 21:18:07 2010 @@ -240,6 +240,10 @@ return Subtarget; } + /// getRegClassFor - Return the register class that should be used for the + /// specified value type. + virtual TargetRegisterClass *getRegClassFor(EVT VT) const; + /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; Added: llvm/trunk/test/CodeGen/ARM/2010-05-14-IllegalType.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2010-05-14-IllegalType.ll?rev=103854&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/ARM/2010-05-14-IllegalType.ll (added) +++ llvm/trunk/test/CodeGen/ARM/2010-05-14-IllegalType.ll Fri May 14 21:18:07 2010 @@ -0,0 +1,10 @@ +; RUN: llc -march=thumb -mcpu=cortex-a8 -mtriple=thumbv7-eabi -float-abi=hard < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +define <4 x i64> @f_4_i64(<4 x i64> %a, <4 x i64> %b) nounwind { +; CHECK: vadd.i64 + %y = add <4 x i64> %a, %b + ret <4 x i64> %y +} From evan.cheng at apple.com Fri May 14 21:20:22 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 02:20:22 -0000 Subject: [llvm-commits] [llvm] r103855 - /llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <20100515022022.29F35312800A@llvm.org> Author: evancheng Date: Fri May 14 21:20:21 2010 New Revision: 103855 URL: http://llvm.org/viewvc/llvm-project?rev=103855&view=rev Log: v4i64 and v8i64 are only synthesizable when NEON is available. Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103855&r1=103854&r2=103855&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri May 14 21:20:21 2010 @@ -585,10 +585,12 @@ // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. - if (VT == MVT::v4i64) - return ARM::QQPRRegisterClass; - else if (VT == MVT::v8i64) - return ARM::QQQQPRRegisterClass; + if (Subtarget->hasNEON()) { + if (VT == MVT::v4i64) + return ARM::QQPRRegisterClass; + else if (VT == MVT::v8i64) + return ARM::QQQQPRRegisterClass; + } return TargetLowering::getRegClassFor(VT); } From stoklund at 2pi.dk Fri May 14 21:52:58 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sat, 15 May 2010 02:52:58 -0000 Subject: [llvm-commits] [llvm] r103856 - /llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h Message-ID: <20100515025258.306AF312800A@llvm.org> Author: stoklund Date: Fri May 14 21:52:58 2010 New Revision: 103856 URL: http://llvm.org/viewvc/llvm-project?rev=103856&view=rev Log: Add reg_nodbg_iterator Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h Modified: llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h?rev=103856&r1=103855&r2=103856&view=diff ============================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h (original) +++ llvm/trunk/include/llvm/CodeGen/MachineRegisterInfo.h Fri May 14 21:52:58 2010 @@ -93,6 +93,20 @@ /// specified register (it may be live-in). bool reg_empty(unsigned RegNo) const { return reg_begin(RegNo) == reg_end(); } + /// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses + /// of the specified register, skipping those marked as Debug. + typedef defusechain_iterator reg_nodbg_iterator; + reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const { + return reg_nodbg_iterator(getRegUseDefListHead(RegNo)); + } + static reg_nodbg_iterator reg_nodbg_end() { return reg_nodbg_iterator(0); } + + /// reg_nodbg_empty - Return true if the only instructions using or defining + /// Reg are Debug instructions. + bool reg_nodbg_empty(unsigned RegNo) const { + return reg_nodbg_begin(RegNo) == reg_nodbg_end(); + } + /// def_iterator/def_begin/def_end - Walk all defs of the specified register. typedef defusechain_iterator def_iterator; def_iterator def_begin(unsigned RegNo) const { From nicholas at mxc.ca Fri May 14 22:41:59 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Sat, 15 May 2010 03:41:59 -0000 Subject: [llvm-commits] [llvm] r103857 - /llvm/trunk/lib/Transforms/IPO/DeadArgumentElimination.cpp Message-ID: <20100515034159.1C9E9312800A@llvm.org> Author: nicholas Date: Fri May 14 22:41:58 2010 New Revision: 103857 URL: http://llvm.org/viewvc/llvm-project?rev=103857&view=rev Log: Clean up, no functional change. Modified: llvm/trunk/lib/Transforms/IPO/DeadArgumentElimination.cpp Modified: llvm/trunk/lib/Transforms/IPO/DeadArgumentElimination.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/DeadArgumentElimination.cpp?rev=103857&r1=103856&r2=103857&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/DeadArgumentElimination.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/DeadArgumentElimination.cpp Fri May 14 22:41:58 2010 @@ -535,14 +535,14 @@ /// values (according to Uses) live as well. void DAE::MarkLive(const Function &F) { DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n"); - // Mark the function as live. - LiveFunctions.insert(&F); - // Mark all arguments as live. - for (unsigned i = 0, e = F.arg_size(); i != e; ++i) - PropagateLiveness(CreateArg(&F, i)); - // Mark all return values as live. - for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) - PropagateLiveness(CreateRet(&F, i)); + // Mark the function as live. + LiveFunctions.insert(&F); + // Mark all arguments as live. + for (unsigned i = 0, e = F.arg_size(); i != e; ++i) + PropagateLiveness(CreateArg(&F, i)); + // Mark all return values as live. + for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i) + PropagateLiveness(CreateRet(&F, i)); } /// MarkLive - Mark the given return value or argument as live. Additionally, @@ -859,7 +859,7 @@ if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { Value *RetVal; - if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) { + if (NFTy->getReturnType()->isVoidTy()) { RetVal = 0; } else { assert (RetTy->isStructTy()); From nicholas at mxc.ca Fri May 14 23:26:25 2010 From: nicholas at mxc.ca (Nick Lewycky) Date: Sat, 15 May 2010 04:26:25 -0000 Subject: [llvm-commits] [llvm] r103858 - /llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp Message-ID: <20100515042625.3229F312800A@llvm.org> Author: nicholas Date: Fri May 14 23:26:25 2010 New Revision: 103858 URL: http://llvm.org/viewvc/llvm-project?rev=103858&view=rev Log: Teach the always inliner to release its inline cost estimates, like the basic inliner did in r103653. Why does the always inliner even bother with cost estimates anyways? Modified: llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp Modified: llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp?rev=103858&r1=103857&r2=103858&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp (original) +++ llvm/trunk/lib/Transforms/IPO/InlineAlways.cpp Fri May 14 23:26:25 2010 @@ -54,6 +54,9 @@ return removeDeadFunctions(CG, &NeverInline); } virtual bool doInitialization(CallGraph &CG); + void releaseMemory() { + CA.clear(); + } }; } From stoklund at 2pi.dk Sat May 15 01:09:08 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sat, 15 May 2010 06:09:08 -0000 Subject: [llvm-commits] [llvm] r103866 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100515060908.5D169312800A@llvm.org> Author: stoklund Date: Sat May 15 01:09:08 2010 New Revision: 103866 URL: http://llvm.org/viewvc/llvm-project?rev=103866&view=rev Log: Calculate liveness on the fly for local registers. When working top-down in a basic block, substituting physregs for virtregs, the use-def chains are kept up to date. That means we can recognize a virtreg kill by the use-def chain becoming empty. This makes the fast allocator independent of incoming kill flags. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103866&r1=103865&r2=103866&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sat May 15 01:09:08 2010 @@ -132,6 +132,8 @@ bool runOnMachineFunction(MachineFunction &Fn); void AllocateBasicBlock(MachineBasicBlock &MBB); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + bool isLastUseOfLocalReg(MachineOperand&); + void addKillFlag(LiveRegMap::iterator i); void killVirtReg(LiveRegMap::iterator i); void killVirtReg(unsigned VirtReg); @@ -174,6 +176,26 @@ return FrameIdx; } +/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to +/// its virtual register, and it is guaranteed to be a block-local register. +/// +bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { + // Check for non-debug uses or defs following MO. + // This is the most likely way to fail - fast path it. + MachineOperand *i = &MO; + while ((i = i->getNextOperandForReg())) + if (!i->isDebug()) + return false; + + // If the register has ever been spilled or reloaded, we conservatively assume + // it is a global register used in multiple blocks. + if (StackSlotForVirtReg[MO.getReg()] != -1) + return false; + + // Check that the use/def chain has exactly one operand - MO. + return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; +} + /// addKillFlag - Set kill flags on last use of a virtual register. void RAFast::addKillFlag(LiveRegMap::iterator lri) { assert(lri != LiveVirtRegs.end() && "Killing unmapped virtual register"); @@ -566,6 +588,15 @@ TII->loadRegFromStackSlot(MBB, MI, lri->second.PhysReg, FrameIndex, RC, TRI); ++NumLoads; + } else if (lri->second.Dirty) { + MachineOperand &MO = MI->getOperand(OpNum); + if (isLastUseOfLocalReg(MO)) { + DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + MO.setIsKill(); + } else if (MO.isKill()) { + DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + MO.setIsKill(false); + } } LiveReg &LR = lri->second; LR.LastUse = MI; From evan.cheng at apple.com Sat May 15 02:53:37 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 07:53:37 -0000 Subject: [llvm-commits] [llvm] r103868 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <20100515075337.3997B312800A@llvm.org> Author: evancheng Date: Sat May 15 02:53:37 2010 New Revision: 103868 URL: http://llvm.org/viewvc/llvm-project?rev=103868&view=rev Log: Model 128-bit vld lane with REG_SEQUENCE. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103868&r1=103867&r2=103868&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Sat May 15 02:53:37 2010 @@ -1320,8 +1320,9 @@ N->getOperand(Vec+3)); } if (NumVecs == 3) - V[6] = V[7] = - SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0); + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], V[4], V[5], V[6], V[7]), 0); @@ -1458,28 +1459,52 @@ ResTys.push_back(MVT::Other); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); - if (llvm::ModelWithRegSequence() && is64BitVector) { - SDValue RegSeq; - SDValue V0 = SDValue(VLdLn, 0); - SDValue V1 = SDValue(VLdLn, 1); - + if (llvm::ModelWithRegSequence()) { // Form a REG_SEQUENCE to force register allocation. - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = SDValue(VLdLn, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) : SDValue(VLdLn, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert them + // as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (SubregIdx == ARM::DSUBREG_0) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); + } + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); } - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue D = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0+Vec, dl, VT, - RegSeq); - ReplaceUses(SDValue(N, Vec), D); - } + unsigned SubIdx = is64BitVector ? ARM::DSUBREG_0 : ARM::QSUBREG_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); return NULL; } From chandlerc at gmail.com Sat May 15 05:23:23 2010 From: chandlerc at gmail.com (Chandler Carruth) Date: Sat, 15 May 2010 10:23:23 -0000 Subject: [llvm-commits] [llvm] r103869 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100515102323.70860312800A@llvm.org> Author: chandlerc Date: Sat May 15 05:23:23 2010 New Revision: 103869 URL: http://llvm.org/viewvc/llvm-project?rev=103869&view=rev Log: Fix an GCC warning that seems to have actually caught a bug (!!!) in a condition's grouping. Every other use of Allocatable.test(Hint) groups it the same way as it is indented, so move the parentheses to agree with that grouping. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103869&r1=103868&r2=103869&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sat May 15 05:23:23 2010 @@ -416,8 +416,8 @@ // Ignore invalid hints. if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || UsedInInstr.test(Hint)) || - !Allocatable.test(Hint)) + !RC->contains(Hint) || UsedInInstr.test(Hint) || + !Allocatable.test(Hint))) Hint = 0; // If there is no hint, peek at the first use of this register. From chandlerc at google.com Sat May 15 05:28:11 2010 From: chandlerc at google.com (Chandler Carruth) Date: Sat, 15 May 2010 03:28:11 -0700 Subject: [llvm-commits] [llvm] r103828 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp In-Reply-To: <20100514220256.F210E312800A@llvm.org> References: <20100514220256.F210E312800A@llvm.org> Message-ID: On Fri, May 14, 2010 at 3:02 PM, Jakob Stoklund Olesen wrote: > > > ============================================================================== > --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) > +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Fri May 14 17:02:56 2010 > @@ -394,7 +394,8 @@ > > // Ignore invalid hints. > if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || > - !RC->contains(Hint) || UsedInInstr.test(Hint))) > + !RC->contains(Hint) || UsedInInstr.test(Hint)) || > + !Allocatable.test(Hint)) > I think the !Allocatable.test(Hint) is intended to be in the group of ||s? Every other use groups in that way, the indentation agrees, and a GCC warning pointed this out, so I went ahead and switch it in r103869. Please review, and I'll fix if this was intentional (and you don't beat me to it!). -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100515/bd0895c4/attachment.html From stoklund at 2pi.dk Sat May 15 10:04:14 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sat, 15 May 2010 08:04:14 -0700 Subject: [llvm-commits] [llvm] r103869 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp In-Reply-To: <20100515102323.70860312800A@llvm.org> References: <20100515102323.70860312800A@llvm.org> Message-ID: <4AA7476F-4BF2-4EE6-B56C-ACCB13297EC6@2pi.dk> On May 15, 2010, at 3:23 AM, Chandler Carruth wrote: > Author: chandlerc > Date: Sat May 15 05:23:23 2010 > New Revision: 103869 > > URL: http://llvm.org/viewvc/llvm-project?rev=103869&view=rev > Log: > Fix an GCC warning that seems to have actually caught a bug (!!!) in > a condition's grouping. Every other use of Allocatable.test(Hint) groups it the > same way as it is indented, so move the parentheses to agree with that > grouping. Nice catch, thanks! /jakob From clattner at apple.com Sat May 15 12:08:38 2010 From: clattner at apple.com (Chris Lattner) Date: Sat, 15 May 2010 10:08:38 -0700 Subject: [llvm-commits] [llvm] r103757 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h include/llvm/MC/MCStreamer.h lib/MC/MCAsmStreamer.cpp lib/MC/MCMachOStreamer.cpp lib/MC/MCNullStreamer.cpp lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_tbss.s In-Reply-To: <20100514015028.A8EC4312800A@llvm.org> References: <20100514015028.A8EC4312800A@llvm.org> Message-ID: On May 13, 2010, at 6:50 PM, Eric Christopher wrote: > URL: http://llvm.org/viewvc/llvm-project?rev=103757&view=rev > Log: > Add AsmParser support for darwin tbss directive. Hi Eric, thanks for working on this: > +void MCAsmStreamer::EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, > + unsigned ByteAlignment) { > + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); > + OS << ".tbss "; > + > + // This is a mach-o specific directive and the name requires some mangling. > + OS << *Symbol << "$tlv$init, " << Size; The $tlv$init suffix should be added by the caller, and be part of the MCSymbol. > + > + // Demangle the name output. The trailing characters are guaranteed to be > + // $tlv$init so just strip that off. > + StringRef DemName = Name.substr(0, Name.size() - strlen("$tlv$init")); That allows elimination of this logic from the parser, which isn't safe if the suffix doesn't exist. > + // FIXME: Diagnose overflow. > + if (Pow2Alignment < 0) > + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" > + "than zero"); How about "negative" instead of "less than zero" :) > > +++ llvm/trunk/test/MC/AsmParser/directive_tbss.s Thu May 13 20:50:28 2010 > @@ -0,0 +1,7 @@ > +# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s This is a darwin-specific directive, you should force a darwin triple. -Chris > + > +# CHECK: .tbss _a$tlv$init, 4 > +# CHECK: .tbss _b$tlv$init, 4, 3 > + > +.tbss _a$tlv$init, 4 > +.tbss _b$tlv$init, 4, 3 > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From sabre at nondot.org Sat May 15 12:10:24 2010 From: sabre at nondot.org (Chris Lattner) Date: Sat, 15 May 2010 17:10:24 -0000 Subject: [llvm-commits] [llvm] r103876 - in /llvm/trunk/lib: CodeGen/LiveIntervalAnalysis.cpp CodeGen/RegAllocLinearScan.cpp CodeGen/RegAllocPBQP.cpp ExecutionEngine/ExecutionEngine.cpp Message-ID: <20100515171024.B3328312800A@llvm.org> Author: lattner Date: Sat May 15 12:10:24 2010 New Revision: 103876 URL: http://llvm.org/viewvc/llvm-project?rev=103876&view=rev Log: improve portability to systems that don't have powf/modf (e.g. solaris 9) patch by Evzen Muller! Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=103876&r1=103875&r2=103876&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Sat May 15 12:10:24 2010 @@ -1634,7 +1634,7 @@ // overflow a float. This expression behaves like 10^d for small d, but is // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of // headroom before overflow. - float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth); return (isDef + isUse) * lc; } Modified: llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp?rev=103876&r1=103875&r2=103876&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp Sat May 15 12:10:24 2010 @@ -809,7 +809,7 @@ MachineInstr *MI = &*I; if (cur->liveAt(li_->getInstructionIndex(MI))) { unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += powf(10.0f, (float)loopDepth); + Conflicts += std::pow(10.0f, (float)loopDepth); } } return Conflicts; Modified: llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp?rev=103876&r1=103875&r2=103876&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocPBQP.cpp Sat May 15 12:10:24 2010 @@ -489,7 +489,7 @@ // did, but none of their definitions would prevent us from coalescing. // We're good to go with the coalesce. - float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0; + float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0; coalescesFound[RegPair(srcReg, dstReg)] = cBenefit; coalescesFound[RegPair(dstReg, srcReg)] = cBenefit; Modified: llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp?rev=103876&r1=103875&r2=103876&view=diff ============================================================================== --- llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp (original) +++ llvm/trunk/lib/ExecutionEngine/ExecutionEngine.cpp Sat May 15 12:10:24 2010 @@ -715,7 +715,7 @@ case Instruction::FDiv: GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break; case Instruction::FRem: - GV.FloatVal = ::fmodf(LHS.FloatVal,RHS.FloatVal); break; + GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break; } break; case Type::DoubleTyID: @@ -730,7 +730,7 @@ case Instruction::FDiv: GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break; case Instruction::FRem: - GV.DoubleVal = ::fmod(LHS.DoubleVal,RHS.DoubleVal); break; + GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break; } break; case Type::X86_FP80TyID: From sabre at nondot.org Sat May 15 12:11:55 2010 From: sabre at nondot.org (Chris Lattner) Date: Sat, 15 May 2010 17:11:55 -0000 Subject: [llvm-commits] [llvm] r103877 - in /llvm/trunk: include/llvm/Config/config.h.cmake lib/Support/APInt.cpp Message-ID: <20100515171155.EB47A312800A@llvm.org> Author: lattner Date: Sat May 15 12:11:55 2010 New Revision: 103877 URL: http://llvm.org/viewvc/llvm-project?rev=103877&view=rev Log: improve portability to systems that don't have round, patch by Evzen Muller! Modified: llvm/trunk/include/llvm/Config/config.h.cmake llvm/trunk/lib/Support/APInt.cpp Modified: llvm/trunk/include/llvm/Config/config.h.cmake URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Config/config.h.cmake?rev=103877&r1=103876&r2=103877&view=diff ============================================================================== --- llvm/trunk/include/llvm/Config/config.h.cmake (original) +++ llvm/trunk/include/llvm/Config/config.h.cmake Sat May 15 12:11:55 2010 @@ -312,6 +312,9 @@ /* Define to 1 if you have the `roundf' function. */ #undef HAVE_ROUNDF +/* Define to 1 if you have the `round' function. */ +#cmakedefine HAVE_ROUND ${HAVE_ROUND} + /* Define to 1 if you have the `sbrk' function. */ #cmakedefine HAVE_SBRK ${HAVE_SBRK} Modified: llvm/trunk/lib/Support/APInt.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/APInt.cpp?rev=103877&r1=103876&r2=103877&view=diff ============================================================================== --- llvm/trunk/lib/Support/APInt.cpp (original) +++ llvm/trunk/lib/Support/APInt.cpp Sat May 15 12:11:55 2010 @@ -1382,13 +1382,12 @@ // libc sqrt function which will probably use a hardware sqrt computation. // This should be faster than the algorithm below. if (magnitude < 52) { -#if defined( _MSC_VER ) || defined(_MINIX) - // Amazingly, VC++ and Minix don't have round(). +#if HAVE_ROUND return APInt(BitWidth, - uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); + uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); #else return APInt(BitWidth, - uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0]))))); + uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5); #endif } From clattner at apple.com Sat May 15 12:13:33 2010 From: clattner at apple.com (Chris Lattner) Date: Sat, 15 May 2010 10:13:33 -0700 Subject: [llvm-commits] Fixes for build on hosts without round, powf and modf In-Reply-To: <000001caf295$8ac62210$a0526630$%muller@arm.com> References: <000001caf295$8ac62210$a0526630$%muller@arm.com> Message-ID: <1D03BEBB-C5D6-4DBC-99DD-05D7C927AFC4@apple.com> On May 13, 2010, at 5:12 AM, Evzen Muller wrote: > Hi, > > Attached patch fixes build on machines without round() or powf()/modf() functions in math.h (for example Solaris9). > I applied the powf/modf portion in r103876 and the round part in r103877, thanks! -Chris -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100515/62230525/attachment.html From asl at math.spbu.ru Sat May 15 12:19:20 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Sat, 15 May 2010 17:19:20 -0000 Subject: [llvm-commits] [llvm] r103878 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb.td Message-ID: <20100515171920.D01D4312800A@llvm.org> Author: asl Date: Sat May 15 12:19:20 2010 New Revision: 103878 URL: http://llvm.org/viewvc/llvm-project?rev=103878&view=rev Log: "trap" pseudo-op turned out to be apple-local. Temporary emit it as raw bytes until it will be added to binutils as well. Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=103878&r1=103877&r2=103878&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sat May 15 12:19:20 2010 @@ -791,8 +791,11 @@ } // A5.4 Permanently UNDEFINED instructions. +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils let isBarrier = 1, isTerminator = 1 in -def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, + ".long 0xe7ffdefe @ trap", [(trap)]>, Requires<[IsARM]> { let Inst{27-25} = 0b011; let Inst{24-20} = 0b11111; Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=103878&r1=103877&r2=103878&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Sat May 15 12:19:20 2010 @@ -419,8 +419,11 @@ // A8.6.16 B: Encoding T1 // If Inst{11-8} == 0b1110 then UNDEFINED +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to +// binutils let isBarrier = 1, isTerminator = 1 in -def tTRAP : TI<(outs), (ins), IIC_Br, "trap", [(trap)]>, Encoding16 { +def tTRAP : TI<(outs), (ins), IIC_Br, + ".word 0xdefe @ trap", [(trap)]>, Encoding16 { let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; } From clattner at apple.com Sat May 15 12:42:41 2010 From: clattner at apple.com (Chris Lattner) Date: Sat, 15 May 2010 10:42:41 -0700 Subject: [llvm-commits] [llvm] r103878 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb.td In-Reply-To: <20100515171920.D01D4312800A@llvm.org> References: <20100515171920.D01D4312800A@llvm.org> Message-ID: <758A8ED0-9721-4303-9772-A4893507F0D6@apple.com> On May 15, 2010, at 10:19 AM, Anton Korobeynikov wrote: > Author: asl > Date: Sat May 15 12:19:20 2010 > New Revision: 103878 > > URL: http://llvm.org/viewvc/llvm-project?rev=103878&view=rev > Log: > "trap" pseudo-op turned out to be apple-local. > Temporary emit it as raw bytes until it will be added to binutils as well. Ok. This should use the escape for the comment character in the asm string though. I think it is ${:comment} ? -Chris > > Modified: > llvm/trunk/lib/Target/ARM/ARMInstrInfo.td > llvm/trunk/lib/Target/ARM/ARMInstrThumb.td > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=103878&r1=103877&r2=103878&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sat May 15 12:19:20 2010 > @@ -791,8 +791,11 @@ > } > > // A5.4 Permanently UNDEFINED instructions. > +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to > +// binutils > let isBarrier = 1, isTerminator = 1 in > -def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, > +def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, > + ".long 0xe7ffdefe @ trap", [(trap)]>, > Requires<[IsARM]> { > let Inst{27-25} = 0b011; > let Inst{24-20} = 0b11111; > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=103878&r1=103877&r2=103878&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Sat May 15 12:19:20 2010 > @@ -419,8 +419,11 @@ > > // A8.6.16 B: Encoding T1 > // If Inst{11-8} == 0b1110 then UNDEFINED > +// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to > +// binutils > let isBarrier = 1, isTerminator = 1 in > -def tTRAP : TI<(outs), (ins), IIC_Br, "trap", [(trap)]>, Encoding16 { > +def tTRAP : TI<(outs), (ins), IIC_Br, > + ".word 0xdefe @ trap", [(trap)]>, Encoding16 { > let Inst{15-12} = 0b1101; > let Inst{11-8} = 0b1110; > } > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From anton at korobeynikov.info Sat May 15 13:03:33 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sat, 15 May 2010 22:03:33 +0400 Subject: [llvm-commits] [llvm] r103878 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb.td In-Reply-To: <758A8ED0-9721-4303-9772-A4893507F0D6@apple.com> References: <20100515171920.D01D4312800A@llvm.org> <758A8ED0-9721-4303-9772-A4893507F0D6@apple.com> Message-ID: <1273946613.22060.23.camel@aslstation> > Ok. This should use the escape for the comment character in the asm string though. I think it is ${:comment} ? Well, there are a lot of "@"'s in the assembler strings, are they intentional? -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. From asl at math.spbu.ru Sat May 15 13:16:59 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Sat, 15 May 2010 18:16:59 -0000 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll Message-ID: <20100515181659.EF8EA312800A@llvm.org> Author: asl Date: Sat May 15 13:16:59 2010 New Revision: 103881 URL: http://llvm.org/viewvc/llvm-project?rev=103881&view=rev Log: Some cheap DAG combine goodness for multiplication with a particular constant. This can be extended later on to handle more "complex" constants. Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp llvm/trunk/test/CodeGen/ARM/mul_const.ll Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103881&r1=103880&r2=103881&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Sat May 15 13:16:59 2010 @@ -463,6 +463,7 @@ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); + setTargetDAGCombine(ISD::MUL); setStackPointerRegisterToSaveRestore(ARM::SP); setSchedulingPreference(SchedulingForRegPressure); @@ -3584,6 +3585,75 @@ return SDValue(); } +static SDValue PerformMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + + if (Subtarget->isThumb1Only()) + return SDValue(); + + if (DAG.getMachineFunction(). + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return SDValue(); + + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + ConstantSDNode *C = dyn_cast(N->getOperand(1)); + if (!C) + return SDValue(); + + uint64_t MulAmt = C->getZExtValue(); + unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); + SDValue V = N->getOperand(0); + DebugLoc DL = N->getDebugLoc(); + SDValue NewAdd; + + // FIXME: Handle arbitrary powers of 2. + switch (MulAmt >> ShiftAmt) { + case 3: // 2 + 1 + NewAdd = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(1, MVT::i32))); + break; + case 5: // 4 + 1 + NewAdd = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(2, MVT::i32))); + break; + case 7: // 8 - 1 + NewAdd = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(3, MVT::i32)), + V); + break; + case 9: // 8 + 1 + NewAdd = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(3, MVT::i32))); + break; + default: return SDValue(); + } + + if (ShiftAmt != 0) { + SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAdd, + DAG.getConstant(ShiftAmt, MVT::i32)); + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, NewShift, false); + return SDValue(); + } + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, NewAdd, false); + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -3970,6 +4040,7 @@ default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: Modified: llvm/trunk/test/CodeGen/ARM/mul_const.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/mul_const.ll?rev=103881&r1=103880&r2=103881&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/ARM/mul_const.ll (original) +++ llvm/trunk/test/CodeGen/ARM/mul_const.ll Sat May 15 13:16:59 2010 @@ -1,17 +1,43 @@ ; RUN: llc < %s -march=arm | FileCheck %s -define i32 @t1(i32 %v) nounwind readnone { +define i32 @t9(i32 %v) nounwind readnone { entry: -; CHECK: t1: +; CHECK: t9: ; CHECK: add r0, r0, r0, lsl #3 %0 = mul i32 %v, 9 ret i32 %0 } -define i32 @t2(i32 %v) nounwind readnone { +define i32 @t7(i32 %v) nounwind readnone { entry: -; CHECK: t2: +; CHECK: t7: ; CHECK: rsb r0, r0, r0, lsl #3 %0 = mul i32 %v, 7 ret i32 %0 } + +define i32 @t5(i32 %v) nounwind readnone { +entry: +; CHECK: t5: +; CHECK: add r0, r0, r0, lsl #2 + %0 = mul i32 %v, 5 + ret i32 %0 +} + +define i32 @t3(i32 %v) nounwind readnone { +entry: +; CHECK: t3: +; CHECK: add r0, r0, r0, lsl #1 + %0 = mul i32 %v, 3 + ret i32 %0 +} + +define i32 @t12288(i32 %v) nounwind readnone { +entry: +; CHECK: t12288: +; CHECK: add r0, r0, r0, lsl #1 +; CHECK: mov r0, r0, lsl #12 + %0 = mul i32 %v, 12288 + ret i32 %0 +} + From dalej at apple.com Sat May 15 13:38:02 2010 From: dalej at apple.com (Dale Johannesen) Date: Sat, 15 May 2010 18:38:02 -0000 Subject: [llvm-commits] [llvm] r103882 - /llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Message-ID: <20100515183802.86B41312800A@llvm.org> Author: johannes Date: Sat May 15 13:38:02 2010 New Revision: 103882 URL: http://llvm.org/viewvc/llvm-project?rev=103882&view=rev Log: Improve assertion messages. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=103882&r1=103881&r2=103882&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Sat May 15 13:38:02 2010 @@ -2627,7 +2627,8 @@ } break; case ISD::AND: - assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's // worth handling here. @@ -2640,7 +2641,8 @@ case ISD::XOR: case ISD::ADD: case ISD::SUB: - assert(VT.isInteger() && N1.getValueType() == N2.getValueType() && + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so // it's worth handling here. @@ -2655,7 +2657,9 @@ case ISD::SDIV: case ISD::SREM: assert(VT.isInteger() && "This operator does not apply to FP types!"); - // fall through + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; case ISD::FADD: case ISD::FSUB: case ISD::FMUL: @@ -2678,6 +2682,7 @@ return N1; } } + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); break; From dalej at apple.com Sat May 15 13:51:12 2010 From: dalej at apple.com (Dale Johannesen) Date: Sat, 15 May 2010 18:51:12 -0000 Subject: [llvm-commits] [llvm] r103883 - in /llvm/trunk/lib: CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Target/X86/X86ISelLowering.cpp Message-ID: <20100515185112.B0155312800A@llvm.org> Author: johannes Date: Sat May 15 13:51:12 2010 New Revision: 103883 URL: http://llvm.org/viewvc/llvm-project?rev=103883&view=rev Log: Fix uint64->{float, double} conversion to do rounding correctly in 32-bit. The implementation in LegalizeIntegerTypes to handle this as sint64->float + appropriate power of 2 is subject to double rounding, considered incorrect by numerics people. Use this implementation only when it is safe. This leads to using library calls in some cases that produced inline code before, but it's correct now. (EVTToAPFloatSemantics belongs somewhere else, any suggestions?) Add a correctly rounding (though not particularly fast) conversion that uses X87 80-bit computations for x86-32. 7885399, 5901940. This shows up in gcc.c-torture/execute/ieee/rbug.c in the gcc testsuite on some platforms. Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=103883&r1=103882&r2=103883&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original) +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Sat May 15 13:51:12 2010 @@ -2314,13 +2314,29 @@ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); } +static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unknown FP format"); + case MVT::f32: return &APFloat::IEEEsingle; + case MVT::f64: return &APFloat::IEEEdouble; + case MVT::f80: return &APFloat::x87DoubleExtended; + case MVT::f128: return &APFloat::IEEEquad; + case MVT::ppcf128: return &APFloat::PPCDoubleDouble; + } +} + SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ // Do a signed conversion then adjust the result. SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); SignedConv = TLI.LowerOperation(SignedConv, DAG); Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103883&r1=103882&r2=103883&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat May 15 13:51:12 2010 @@ -145,13 +145,12 @@ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); } else if (!UseSoftFloat) { - if (X86ScalarSSEf64) { - // We have an impenetrably clever algorithm for ui64->double only. - setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); - } + // We have an algorithm for SSE2->double, and we turn this into a + // 64-bit FILD followed by conditional FADD for other targets. + setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); // We have an algorithm for SSE2, and we turn this into a 64-bit // FILD for other targets. - setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); + setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); } // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have @@ -5462,7 +5461,7 @@ } SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, - SDValue StackSlot, + SDValue StackSlot, SelectionDAG &DAG) const { // Build the FILD DebugLoc dl = Op.getDebugLoc(); @@ -5636,35 +5635,72 @@ SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); - // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't + // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); EVT SrcVT = N0.getValueType(); - if (SrcVT == MVT::i64) { - // We only handle SSE2 f64 target here; caller can expand the rest. - if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) - return SDValue(); - + EVT DstVT = Op.getValueType(); + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG); - } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { + else if (SrcVT == MVT::i32 && X86ScalarSSEf64) return LowerUINT_TO_FP_i32(Op, DAG); - } - - assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); // Make a 64-bit buffer, and use it to build an FILD. SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); - SDValue WordOff = DAG.getConstant(4, getPointerTy()); - SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, - getPointerTy(), StackSlot, WordOff); - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + if (SrcVT == MVT::i32) { + SDValue WordOff = DAG.getConstant(4, getPointerTy()); + SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, + getPointerTy(), StackSlot, WordOff); + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), + StackSlot, NULL, 0, false, false, 0); + SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), + OffsetSlot, NULL, 0, false, false, 0); + SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + return Fild; + } + + assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, NULL, 0, false, false, 0); - SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), - OffsetSlot, NULL, 0, false, false, 0); - return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); + // For i64 source, we need to add the appropriate power of 2 if the input + // was negative. This is the same as the optimization in + // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here, + // we must be careful to do the computation in x87 extended precision, not + // in SSE. (The generic code can't know it's OK to do this, or how to.) + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); + SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; + SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3); + + APInt FF(32, 0x5F800000ULL); + + // Check whether the sign bit is set. + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), + Op.getOperand(0), DAG.getConstant(0, MVT::i64), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = DAG.getConstantPool( + ConstantInt::get(*DAG.getContext(), FF.zext(64)), + getPointerTy()); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue Four = DAG.getIntPtrConstant(4); + SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, + Zero, Four); + FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset); + + // Load the value out, extending it from f32 to f80. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + FudgePtr, PseudoSourceValue::getConstantPool(), + 0, MVT::f32, false, false, 4); + // Extend everything to 80 bits to force it to be done on x87. + SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); + return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0)); } std::pair X86TargetLowering:: From echristo at apple.com Sat May 15 17:36:03 2010 From: echristo at apple.com (Eric Christopher) Date: Sat, 15 May 2010 15:36:03 -0700 Subject: [llvm-commits] [llvm] r103757 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h include/llvm/MC/MCStreamer.h lib/MC/MCAsmStreamer.cpp lib/MC/MCMachOStreamer.cpp lib/MC/MCNullStreamer.cpp lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_tbss.s In-Reply-To: References: <20100514015028.A8EC4312800A@llvm.org> Message-ID: <6969D864-B6A2-4B5D-B751-C13C50200A0B@apple.com> > > Hi Eric, thanks for working on this: > Fun! >> +void MCAsmStreamer::EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, >> + unsigned ByteAlignment) { >> + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); >> + OS << ".tbss "; >> + >> + // This is a mach-o specific directive and the name requires some mangling. >> + OS << *Symbol << "$tlv$init, " << Size; > > The $tlv$init suffix should be added by the caller, and be part of the MCSymbol. > >> + >> + // Demangle the name output. The trailing characters are guaranteed to be >> + // $tlv$init so just strip that off. >> + StringRef DemName = Name.substr(0, Name.size() - strlen("$tlv$init")); > > That allows elimination of this logic from the parser, which isn't safe if the suffix doesn't exist. > Yeah, I was pretty unhappy about it and figured I'd be changing it. The idea is that something like: __thread a = 0; will turn into: .tbss _a+somemangling .tlv _a: .quad xxxx .quad _a+somemangling .quad 0 for any thread local symbol - but there's definitely no reason to have a specific mangling. Something else I'd considered was: void EmitTBSSSymbol(StringRef Name, uint64_t Size, unsigned ByteAlignment) ... I just didn't have enough there that i'd be happy about it. Another was: void EmitTBSSSymbol(MCSymbol Sym, ...., StringRef Mangle) but then I'm stuck trying to figure out which symbol I'm mangling. >> + // FIXME: Diagnose overflow. >> + if (Pow2Alignment < 0) >> + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" >> + "than zero"); > > How about "negative" instead of "less than zero" :) Sure! I'll change the wording in a couple of other places too :) > >> >> +++ llvm/trunk/test/MC/AsmParser/directive_tbss.s Thu May 13 20:50:28 2010 >> @@ -0,0 +1,7 @@ >> +# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s > > This is a darwin-specific directive, you should force a darwin triple. OK! -eric From evan.cheng at apple.com Sat May 15 18:37:36 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 16:37:36 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <20100515181659.EF8EA312800A@llvm.org> References: <20100515181659.EF8EA312800A@llvm.org> Message-ID: Hi Anton, Please move it to general dag combiner for all power of two cases. Special casing for 3, 5, 7, 9 is fairly ugly. Jakob, I think you have a dag combine patch for this? It wasn't profitable for x86, but perhaps it's good for other targets. Evan On May 15, 2010, at 11:16 AM, Anton Korobeynikov wrote: > Author: asl > Date: Sat May 15 13:16:59 2010 > New Revision: 103881 > > URL: http://llvm.org/viewvc/llvm-project?rev=103881&view=rev > Log: > Some cheap DAG combine goodness for multiplication with a particular constant. > This can be extended later on to handle more "complex" constants. > > Modified: > llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp > llvm/trunk/test/CodeGen/ARM/mul_const.ll > > Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103881&r1=103880&r2=103881&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) > +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Sat May 15 13:16:59 2010 > @@ -463,6 +463,7 @@ > // ARMISD::VMOVRRD - No need to call setTargetDAGCombine > setTargetDAGCombine(ISD::ADD); > setTargetDAGCombine(ISD::SUB); > + setTargetDAGCombine(ISD::MUL); > > setStackPointerRegisterToSaveRestore(ARM::SP); > setSchedulingPreference(SchedulingForRegPressure); > @@ -3584,6 +3585,75 @@ > return SDValue(); > } > > +static SDValue PerformMULCombine(SDNode *N, > + TargetLowering::DAGCombinerInfo &DCI, > + const ARMSubtarget *Subtarget) { > + SelectionDAG &DAG = DCI.DAG; > + > + if (Subtarget->isThumb1Only()) > + return SDValue(); > + > + if (DAG.getMachineFunction(). > + getFunction()->hasFnAttr(Attribute::OptimizeForSize)) > + return SDValue(); > + > + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) > + return SDValue(); > + > + EVT VT = N->getValueType(0); > + if (VT != MVT::i32) > + return SDValue(); > + > + ConstantSDNode *C = dyn_cast(N->getOperand(1)); > + if (!C) > + return SDValue(); > + > + uint64_t MulAmt = C->getZExtValue(); > + unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); > + ShiftAmt = ShiftAmt & (32 - 1); > + SDValue V = N->getOperand(0); > + DebugLoc DL = N->getDebugLoc(); > + SDValue NewAdd; > + > + // FIXME: Handle arbitrary powers of 2. > + switch (MulAmt >> ShiftAmt) { > + case 3: // 2 + 1 > + NewAdd = DAG.getNode(ISD::ADD, DL, VT, > + V, DAG.getNode(ISD::SHL, DL, VT, > + V, DAG.getConstant(1, MVT::i32))); > + break; > + case 5: // 4 + 1 > + NewAdd = DAG.getNode(ISD::ADD, DL, VT, > + V, DAG.getNode(ISD::SHL, DL, VT, > + V, DAG.getConstant(2, MVT::i32))); > + break; > + case 7: // 8 - 1 > + NewAdd = DAG.getNode(ISD::SUB, DL, VT, > + DAG.getNode(ISD::SHL, DL, VT, > + V, DAG.getConstant(3, MVT::i32)), > + V); > + break; > + case 9: // 8 + 1 > + NewAdd = DAG.getNode(ISD::ADD, DL, VT, > + V, DAG.getNode(ISD::SHL, DL, VT, > + V, DAG.getConstant(3, MVT::i32))); > + break; > + default: return SDValue(); > + } > + > + if (ShiftAmt != 0) { > + SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAdd, > + DAG.getConstant(ShiftAmt, MVT::i32)); > + // Do not add new nodes to DAG combiner worklist. > + DCI.CombineTo(N, NewShift, false); > + return SDValue(); > + } > + > + // Do not add new nodes to DAG combiner worklist. > + DCI.CombineTo(N, NewAdd, false); > + return SDValue(); > +} > + > /// PerformVMOVRRDCombine - Target-specific dag combine xforms for > /// ARMISD::VMOVRRD. > static SDValue PerformVMOVRRDCombine(SDNode *N, > @@ -3970,6 +4040,7 @@ > default: break; > case ISD::ADD: return PerformADDCombine(N, DCI); > case ISD::SUB: return PerformSUBCombine(N, DCI); > + case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); > case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); > case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); > case ISD::SHL: > > Modified: llvm/trunk/test/CodeGen/ARM/mul_const.ll > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/mul_const.ll?rev=103881&r1=103880&r2=103881&view=diff > ============================================================================== > --- llvm/trunk/test/CodeGen/ARM/mul_const.ll (original) > +++ llvm/trunk/test/CodeGen/ARM/mul_const.ll Sat May 15 13:16:59 2010 > @@ -1,17 +1,43 @@ > ; RUN: llc < %s -march=arm | FileCheck %s > > -define i32 @t1(i32 %v) nounwind readnone { > +define i32 @t9(i32 %v) nounwind readnone { > entry: > -; CHECK: t1: > +; CHECK: t9: > ; CHECK: add r0, r0, r0, lsl #3 > %0 = mul i32 %v, 9 > ret i32 %0 > } > > -define i32 @t2(i32 %v) nounwind readnone { > +define i32 @t7(i32 %v) nounwind readnone { > entry: > -; CHECK: t2: > +; CHECK: t7: > ; CHECK: rsb r0, r0, r0, lsl #3 > %0 = mul i32 %v, 7 > ret i32 %0 > } > + > +define i32 @t5(i32 %v) nounwind readnone { > +entry: > +; CHECK: t5: > +; CHECK: add r0, r0, r0, lsl #2 > + %0 = mul i32 %v, 5 > + ret i32 %0 > +} > + > +define i32 @t3(i32 %v) nounwind readnone { > +entry: > +; CHECK: t3: > +; CHECK: add r0, r0, r0, lsl #1 > + %0 = mul i32 %v, 3 > + ret i32 %0 > +} > + > +define i32 @t12288(i32 %v) nounwind readnone { > +entry: > +; CHECK: t12288: > +; CHECK: add r0, r0, r0, lsl #1 > +; CHECK: mov r0, r0, lsl #12 > + %0 = mul i32 %v, 12288 > + ret i32 %0 > +} > + > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From stoklund at 2pi.dk Sat May 15 19:36:47 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sat, 15 May 2010 17:36:47 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: References: <20100515181659.EF8EA312800A@llvm.org> Message-ID: <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> On May 15, 2010, at 4:37 PM, Evan Cheng wrote: > Hi Anton, > > Please move it to general dag combiner for all power of two cases. Special casing for 3, 5, 7, 9 is fairly ugly. Jakob, I think you have a dag combine patch for this? It wasn't profitable for x86, but perhaps it's good for other targets. Nope, I have an X86 patch. Many factors involve LEA, and it is not easy to imagine a target independent algorithm. AMD has suggested instruction sequences for factors 2-31 in ?8.2: http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25112.PDF A lot of them are shl+add and shl+sub which could be target independent, I guess. All of the power-of-two's are straight shl's, except for 2 which is better done with an add. /jakob From cdavis at mymail.mines.edu Sat May 15 20:36:55 2010 From: cdavis at mymail.mines.edu (Charles Davis) Date: Sat, 15 May 2010 19:36:55 -0600 Subject: [llvm-commits] [PATCH] Support for the thiscall calling convention In-Reply-To: References: <4BEEF138.3000407@providere-consulting.com> Message-ID: <4BEF4C37.3030408@mymail.mines.edu> On 5/15/10 1:29 PM, Anton Korobeynikov wrote: > Hello > >> The attached patch adds support for Microsoft's __thiscall >> keyword. > Patch itself looks ok, but before it can be committed, thiscall CC > should be implemented in LLVM. > Otherwise the code will be silently miscompiled (and no error will be > produced as now). Ask and ye shall receive. Attached is a patch to add this support to LLVM--complete with full Target, AsmWriter, and AsmParser support. Chip -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: thiscall-support.patch Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100515/d73a8d40/attachment.pl From espindola at google.com Sat May 15 21:50:10 2010 From: espindola at google.com (Rafael Espindola) Date: Sat, 15 May 2010 22:50:10 -0400 Subject: [llvm-commits] [patch] Use $ORIGIN it the rpath of libraries on non-darwin hosts Message-ID: Right now we use $ORIGIN on the rpath when linking tools, but not when linking libraries. This affects the gold plugin that links with libLTO.so. With this path we use $ORIGIN for both. This makes it possible to move plugin and libLTO.so to any directory and it will work. Right now one has to use LD_LIBRARY_PATH. Cheers, -- Rafael ?vila de Esp?ndola -------------- next part -------------- diff --git a/Makefile.rules b/Makefile.rules index d77fe27..2fc5eeb 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -589,10 +589,14 @@ ifeq ($(TARGET_OS),Darwin) endif ifdef SHARED_LIBRARY +ifneq ($(HOST_OS),Darwin) + LD.Flags += $(RPATH) -Wl,'$$ORIGIN' +else ifneq ($(DARWIN_MAJVERS),4) LD.Flags += $(RPATH) -Wl,$(LibDir) endif endif +endif ifdef TOOL_VERBOSE C.Flags += -v From rafael.espindola at gmail.com Sat May 15 22:05:14 2010 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Sun, 16 May 2010 03:05:14 -0000 Subject: [llvm-commits] [llvm] r103896 - /llvm/trunk/Makefile.rules Message-ID: <20100516030515.05888312800A@llvm.org> Author: rafael Date: Sat May 15 22:05:14 2010 New Revision: 103896 URL: http://llvm.org/viewvc/llvm-project?rev=103896&view=rev Log: Use $ORIGIN in the rpath of libraries. Modified: llvm/trunk/Makefile.rules Modified: llvm/trunk/Makefile.rules URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/Makefile.rules?rev=103896&r1=103895&r2=103896&view=diff ============================================================================== --- llvm/trunk/Makefile.rules (original) +++ llvm/trunk/Makefile.rules Sat May 15 22:05:14 2010 @@ -589,10 +589,14 @@ endif ifdef SHARED_LIBRARY +ifneq ($(HOST_OS),Darwin) + LD.Flags += $(RPATH) -Wl,'$$ORIGIN' +else ifneq ($(DARWIN_MAJVERS),4) LD.Flags += $(RPATH) -Wl,$(LibDir) endif endif +endif ifdef TOOL_VERBOSE C.Flags += -v From rafael.espindola at gmail.com Sat May 15 22:13:23 2010 From: rafael.espindola at gmail.com (Rafael Espindola) Date: Sun, 16 May 2010 03:13:23 -0000 Subject: [llvm-commits] [llvm] r103897 - in /llvm/trunk: Makefile.rules tools/gold/Makefile Message-ID: <20100516031323.8E49B312800A@llvm.org> Author: rafael Date: Sat May 15 22:13:23 2010 New Revision: 103897 URL: http://llvm.org/viewvc/llvm-project?rev=103897&view=rev Log: Avoid renaming loadable modules at install time. Now the gold plugin is named LLVMgold.so both in both the build and install directories. Modified: llvm/trunk/Makefile.rules llvm/trunk/tools/gold/Makefile Modified: llvm/trunk/Makefile.rules URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/Makefile.rules?rev=103897&r1=103896&r2=103897&view=diff ============================================================================== --- llvm/trunk/Makefile.rules (original) +++ llvm/trunk/Makefile.rules Sat May 15 22:13:23 2010 @@ -1128,7 +1128,12 @@ uninstall-local:: $(Echo) Uninstall circumvented with NO_INSTALL else + +ifdef LOADABLE_MODULE +DestSharedLib = $(DESTDIR)$(PROJ_libdir)/$(LIBRARYNAME)$(SHLIBEXT) +else DestSharedLib = $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME)$(SHLIBEXT) +endif install-local:: $(DestSharedLib) Modified: llvm/trunk/tools/gold/Makefile URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/gold/Makefile?rev=103897&r1=103896&r2=103897&view=diff ============================================================================== --- llvm/trunk/tools/gold/Makefile (original) +++ llvm/trunk/tools/gold/Makefile Sat May 15 22:13:23 2010 @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -LIBRARYNAME = libLLVMgold +LIBRARYNAME = LLVMgold EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports From clattner at apple.com Sat May 15 22:24:26 2010 From: clattner at apple.com (Chris Lattner) Date: Sat, 15 May 2010 20:24:26 -0700 Subject: [llvm-commits] [llvm] r103878 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb.td In-Reply-To: <1273946613.22060.23.camel@aslstation> References: <20100515171920.D01D4312800A@llvm.org> <758A8ED0-9721-4303-9772-A4893507F0D6@apple.com> <1273946613.22060.23.camel@aslstation> Message-ID: On May 15, 2010, at 11:03 AM, Anton Korobeynikov wrote: > >> Ok. This should use the escape for the comment character in the asm string though. I think it is ${:comment} ? > Well, there are a lot of "@"'s in the assembler strings, are they > intentional? I don't know, sounds like a widespread bug :) From clattner at apple.com Sat May 15 22:27:12 2010 From: clattner at apple.com (Chris Lattner) Date: Sat, 15 May 2010 20:27:12 -0700 Subject: [llvm-commits] [llvm] r103757 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h include/llvm/MC/MCStreamer.h lib/MC/MCAsmStreamer.cpp lib/MC/MCMachOStreamer.cpp lib/MC/MCNullStreamer.cpp lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_tbss.s In-Reply-To: <6969D864-B6A2-4B5D-B751-C13C50200A0B@apple.com> References: <20100514015028.A8EC4312800A@llvm.org> <6969D864-B6A2-4B5D-B751-C13C50200A0B@apple.com> Message-ID: On May 15, 2010, at 3:36 PM, Eric Christopher wrote: > Yeah, I was pretty unhappy about it and figured I'd be changing it. The idea is that something like: > > __thread a = 0; > > will turn into: > > .tbss _a+somemangling Higher level things should handle the mangling. MCStreamer should correspond to what the assembler parses. > Another was: > > void EmitTBSSSymbol(MCSymbol Sym, ...., StringRef Mangle) > > but then I'm stuck trying to figure out which symbol I'm mangling. It should take just a mcsymbol, size, and alignment. The "mangling" should be handled at a higher level, just like it is for $stub's etc. -Chris From evan.cheng at apple.com Sat May 15 22:27:48 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sun, 16 May 2010 03:27:48 -0000 Subject: [llvm-commits] [llvm] r103898 - /llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Message-ID: <20100516032748.57EE7312800A@llvm.org> Author: evancheng Date: Sat May 15 22:27:48 2010 New Revision: 103898 URL: http://llvm.org/viewvc/llvm-project?rev=103898&view=rev Log: Model vst lane instructions with REG_SEQUENCE. Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=103898&r1=103897&r2=103898&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Sat May 15 22:27:48 2010 @@ -184,6 +184,10 @@ /// SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + /// QuadQRegs - Form 4 consecutive Q registers. + /// + SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + /// OctoDRegs - Form 8 consecutive D registers. /// SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, @@ -996,6 +1000,19 @@ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); } +/// QuadQRegs - Form 4 consecutive Q registers. +/// +SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::QSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::QSUBREG_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::QSUBREG_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::QSUBREG_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + /// OctoDRegs - Form 8 consecutive D registers. /// SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, @@ -1401,11 +1418,13 @@ // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; int SubregIdx = 0; + bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + Even = Lane < NumElts; } unsigned OpcodeIndex; @@ -1432,8 +1451,35 @@ unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, VT, + RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, VT, + RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_2, dl, VT, + RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_3, dl, VT, + RegSeq)); + } else { + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + } } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1442,10 +1488,32 @@ Lane -= NumElts; Opc = QOpcodes1[OpcodeIndex]; } - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + + if (llvm::ModelWithRegSequence()) { + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); + } else { + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); + } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::DSUBREG_0 : ARM::DSUBREG_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); + } else { + // Extract the subregs of the input vector. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, + N->getOperand(Vec+3))); + } } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1480,7 +1548,7 @@ // as subregs into the result. SDValue V[8]; for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - if (SubregIdx == ARM::DSUBREG_0) { + if (Even) { V[i] = SDValue(VLdLn, Vec); V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, RegVT), 0); From echristo at apple.com Sat May 15 22:29:22 2010 From: echristo at apple.com (Eric Christopher) Date: Sat, 15 May 2010 20:29:22 -0700 Subject: [llvm-commits] [llvm] r103757 - in /llvm/trunk: include/llvm/MC/MCParser/AsmParser.h include/llvm/MC/MCStreamer.h lib/MC/MCAsmStreamer.cpp lib/MC/MCMachOStreamer.cpp lib/MC/MCNullStreamer.cpp lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_tbss.s In-Reply-To: References: <20100514015028.A8EC4312800A@llvm.org> <6969D864-B6A2-4B5D-B751-C13C50200A0B@apple.com> Message-ID: <16E4C511-3674-4F4A-B475-91243D2942BC@apple.com> On May 15, 2010, at 8:27 PM, Chris Lattner wrote: > On May 15, 2010, at 3:36 PM, Eric Christopher wrote: >> Yeah, I was pretty unhappy about it and figured I'd be changing it. The idea is that something like: >> >> __thread a = 0; >> >> will turn into: >> >> .tbss _a+somemangling > > Higher level things should handle the mangling. MCStreamer should correspond to what the assembler parses. > >> Another was: >> >> void EmitTBSSSymbol(MCSymbol Sym, ...., StringRef Mangle) >> >> but then I'm stuck trying to figure out which symbol I'm mangling. > > It should take just a mcsymbol, size, and alignment. The "mangling" should be handled at a higher level, just like it is for $stub's etc. Cool. No argument, just hadn't figured out how it needed to happen yet. Thanks for the reminder on $stub, I'll look there. -eric From watanabesj at gmail.com Sat May 15 22:22:23 2010 From: watanabesj at gmail.com (Steven Watanabe) Date: Sat, 15 May 2010 20:22:23 -0700 Subject: [llvm-commits] [PATCH] Support for the thiscall calling convention In-Reply-To: <4BEF4C37.3030408@mymail.mines.edu> References: <4BEEF138.3000407@providere-consulting.com> <4BEF4C37.3030408@mymail.mines.edu> Message-ID: <4BEF64EF.8000007@providere-consulting.com> AMDG Charles Davis wrote: > On 5/15/10 1:29 PM, Anton Korobeynikov wrote: > >>> The attached patch adds support for Microsoft's __thiscall >>> keyword. >>> >> Patch itself looks ok, but before it can be committed, thiscall CC >> should be implemented in LLVM. >> Otherwise the code will be silently miscompiled (and no error will be >> produced as now). >> > Ask and ye shall receive. > > Attached is a patch to add this support to LLVM--complete with full > Target, AsmWriter, and AsmParser support. > Here's a slightly more complete patch. I added support for CBackend and MSIL, and some extra error handling to match fastcall. In Christ, Steven Watanabe -------------- next part -------------- A non-text attachment was scrubbed... Name: thiscall-support.llvm.patch Type: text/x-patch Size: 10753 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100515/5e919172/attachment.bin From evan.cheng at apple.com Sat May 15 22:51:11 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sat, 15 May 2010 20:51:11 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> Message-ID: On May 15, 2010, at 5:36 PM, Jakob Stoklund Olesen wrote: > > On May 15, 2010, at 4:37 PM, Evan Cheng wrote: > >> Hi Anton, >> >> Please move it to general dag combiner for all power of two cases. Special casing for 3, 5, 7, 9 is fairly ugly. Jakob, I think you have a dag combine patch for this? It wasn't profitable for x86, but perhaps it's good for other targets. > > Nope, I have an X86 patch. Many factors involve LEA, and it is not easy to imagine a target independent algorithm. > > AMD has suggested instruction sequences for factors 2-31 in ?8.2: > http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25112.PDF > > A lot of them are shl+add and shl+sub which could be target independent, I guess. All of the power-of-two's are straight shl's, except for 2 which is better done with an add. I am talking about power-of-two's ones. There is no good reason for these to be target dependent? Evan > > /jakob > > > From stoklund at 2pi.dk Sat May 15 23:14:26 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sat, 15 May 2010 21:14:26 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> Message-ID: On May 15, 2010, at 8:51 PM, Evan Cheng wrote: > > On May 15, 2010, at 5:36 PM, Jakob Stoklund Olesen wrote: >> AMD has suggested instruction sequences for factors 2-31 in ?8.2: >> http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25112.PDF >> >> A lot of them are shl+add and shl+sub which could be target independent, I guess. All of the power-of-two's are straight shl's, except for 2 which is better done with an add. > > I am talking about power-of-two's ones. There is no good reason for these to be target dependent? Translating (mul x, 2^N) -> (shl x, n) in the dag combiner sounds reasonable, and we are already doing that in DAGCombiner::visitMUL. Then there is (mul x, 2^N+1) -> (add (shl x, n), x) and (mul x, 2^N-1) -> (sub (shl x, n), x). For these, X86 prefers to use LEA for factors 3, 5, and 9, so we are probably better off leaving that target dependent. /Jakob From evan.cheng at apple.com Sun May 16 03:30:20 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sun, 16 May 2010 01:30:20 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> Message-ID: <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> On May 15, 2010, at 9:14 PM, Jakob Stoklund Olesen wrote: > > On May 15, 2010, at 8:51 PM, Evan Cheng wrote: > >> >> On May 15, 2010, at 5:36 PM, Jakob Stoklund Olesen wrote: >>> AMD has suggested instruction sequences for factors 2-31 in ?8.2: >>> http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25112.PDF >>> >>> A lot of them are shl+add and shl+sub which could be target independent, I guess. All of the power-of-two's are straight shl's, except for 2 which is better done with an add. >> >> I am talking about power-of-two's ones. There is no good reason for these to be target dependent? > > Translating (mul x, 2^N) -> (shl x, n) in the dag combiner sounds reasonable, and we are already doing that in DAGCombiner::visitMUL. > > Then there is (mul x, 2^N+1) -> (add (shl x, n), x) and (mul x, 2^N-1) -> (sub (shl x, n), x). For these, X86 prefers to use LEA for factors 3, 5, and 9, so we are probably better off leaving that target dependent. I would have preferred if dag combine does the transformation and x86 isel matches to the LEA. If it doesn't just work, then it's not a huge deal. Anton, for ARM to only special case 3, 5, 7, and 9, that really bugs me. Please make it deal with pow-of-2 cases or I'll continue to nag about this. :-) Evan > > /Jakob > From anton at korobeynikov.info Sun May 16 03:53:50 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sun, 16 May 2010 12:53:50 +0400 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> Message-ID: <1274000030.22060.26.camel@aslstation> Hi, Evan > Anton, for ARM to only special case 3, 5, 7, and 9, that really bugs me. > Please make it deal with pow-of-2 cases or I'll continue to nag about this. :-) I already have this patch in my queue. I'm planning to flush it today :) We can surely move the stuff into generic DAG combiner afterwards. PS: In fact, it's not 3, 5, 7 and 9, but also these ones shifted by arbitrary amount :) -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. From asl at math.spbu.ru Sun May 16 03:52:46 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Sun, 16 May 2010 12:52:46 +0400 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> Message-ID: <1273999966.22060.25.camel@aslstation> Hi, Evan > Anton, for ARM to only special case 3, 5, 7, and 9, that really bugs me. > Please make it deal with pow-of-2 cases or I'll continue to nag about this. :-) I already have this patch in my queue. I'm planning to flush it today :) We can surely move the stuff into generic DAG combiner afterwards. PS: In fact, it's not 3, 5, 7 and 9, but also these ones shifted by arbitrary amount :) -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. From asl at math.spbu.ru Sun May 16 03:54:20 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Sun, 16 May 2010 08:54:20 -0000 Subject: [llvm-commits] [llvm] r103901 - /llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Message-ID: <20100516085420.A6A25312800A@llvm.org> Author: asl Date: Sun May 16 03:54:20 2010 New Revision: 103901 URL: http://llvm.org/viewvc/llvm-project?rev=103901&view=rev Log: Generalize the ARM DAG combiner of mul with constants to all power-of-two cases. Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=103901&r1=103900&r2=103901&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original) +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Sun May 16 03:54:20 2010 @@ -3613,44 +3613,31 @@ ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); DebugLoc DL = N->getDebugLoc(); - SDValue NewAdd; - // FIXME: Handle arbitrary powers of 2. - switch (MulAmt >> ShiftAmt) { - case 3: // 2 + 1 - NewAdd = DAG.getNode(ISD::ADD, DL, VT, - V, DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(1, MVT::i32))); - break; - case 5: // 4 + 1 - NewAdd = DAG.getNode(ISD::ADD, DL, VT, - V, DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(2, MVT::i32))); - break; - case 7: // 8 - 1 - NewAdd = DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(3, MVT::i32)), - V); - break; - case 9: // 8 + 1 - NewAdd = DAG.getNode(ISD::ADD, DL, VT, - V, DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(3, MVT::i32))); - break; - default: return SDValue(); - } - - if (ShiftAmt != 0) { - SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAdd, - DAG.getConstant(ShiftAmt, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. - DCI.CombineTo(N, NewShift, false); + SDValue Res; + MulAmt >>= ShiftAmt; + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt-1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, DAG.getConstant(Log2_32(MulAmt+1), + MVT::i32)), + V); + } else return SDValue(); - } + + if (ShiftAmt != 0) + Res = DAG.getNode(ISD::SHL, DL, VT, Res, + DAG.getConstant(ShiftAmt, MVT::i32)); // Do not add new nodes to DAG combiner worklist. - DCI.CombineTo(N, NewAdd, false); + DCI.CombineTo(N, Res, false); return SDValue(); } From anton at korobeynikov.info Sun May 16 04:03:38 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sun, 16 May 2010 13:03:38 +0400 Subject: [llvm-commits] [PATCH] Support for the thiscall calling convention In-Reply-To: <4BEF4C37.3030408@mymail.mines.edu> References: <4BEEF138.3000407@providere-consulting.com> <4BEF4C37.3030408@mymail.mines.edu> Message-ID: <1274000618.22060.31.camel@aslstation> Hello, Charles > Attached is a patch to add this support to LLVM--complete with full > Target, AsmWriter, and AsmParser support. Given > + /// X86_ThisCall - Similar to X86_StdCall. Shouldn't is delegate to stdcall in: > + // Otherwise, same as everything else. > + CCDelegateTo > +]>; ? -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. From asl at math.spbu.ru Sun May 16 04:08:45 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Sun, 16 May 2010 09:08:45 -0000 Subject: [llvm-commits] [llvm] r103902 - in /llvm/trunk: include/llvm/CallingConv.h lib/AsmParser/LLLexer.cpp lib/AsmParser/LLParser.cpp lib/AsmParser/LLToken.h lib/Target/CBackend/CBackend.cpp lib/Target/MSIL/MSILWriter.cpp lib/Target/X86/X86CallingConv.td lib/Target/X86/X86FastISel.cpp lib/Target/X86/X86ISelLowering.cpp lib/VMCore/AsmWriter.cpp lib/VMCore/Verifier.cpp test/CodeGen/X86/fast-cc-callee-pops.ll test/CodeGen/X86/fast-cc-pass-in-regs.ll Message-ID: <20100516090845.C155B312800A@llvm.org> Author: asl Date: Sun May 16 04:08:45 2010 New Revision: 103902 URL: http://llvm.org/viewvc/llvm-project?rev=103902&view=rev Log: Add support for thiscall calling convention. Patch by Charles Davis and Steven Watanabe! Modified: llvm/trunk/include/llvm/CallingConv.h llvm/trunk/lib/AsmParser/LLLexer.cpp llvm/trunk/lib/AsmParser/LLParser.cpp llvm/trunk/lib/AsmParser/LLToken.h llvm/trunk/lib/Target/CBackend/CBackend.cpp llvm/trunk/lib/Target/MSIL/MSILWriter.cpp llvm/trunk/lib/Target/X86/X86CallingConv.td llvm/trunk/lib/Target/X86/X86FastISel.cpp llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/lib/VMCore/AsmWriter.cpp llvm/trunk/lib/VMCore/Verifier.cpp llvm/trunk/test/CodeGen/X86/fast-cc-callee-pops.ll llvm/trunk/test/CodeGen/X86/fast-cc-pass-in-regs.ll Modified: llvm/trunk/include/llvm/CallingConv.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CallingConv.h?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/include/llvm/CallingConv.h (original) +++ llvm/trunk/include/llvm/CallingConv.h Sun May 16 04:08:45 2010 @@ -74,7 +74,12 @@ ARM_AAPCS_VFP = 68, /// MSP430_INTR - Calling convention used for MSP430 interrupt routines. - MSP430_INTR = 69 + MSP430_INTR = 69, + + /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX, + /// others via stack. Callee is responsible for stack cleaning. MSVC uses + /// this by default for methods in its ABI. + X86_ThisCall = 70 }; } // End CallingConv namespace Modified: llvm/trunk/lib/AsmParser/LLLexer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLLexer.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/LLLexer.cpp (original) +++ llvm/trunk/lib/AsmParser/LLLexer.cpp Sun May 16 04:08:45 2010 @@ -537,6 +537,7 @@ KEYWORD(coldcc); KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); + KEYWORD(x86_thiscallcc); KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); Modified: llvm/trunk/lib/AsmParser/LLParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLParser.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/LLParser.cpp (original) +++ llvm/trunk/lib/AsmParser/LLParser.cpp Sun May 16 04:08:45 2010 @@ -1074,6 +1074,7 @@ /// ::= 'coldcc' /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' +/// ::= 'x86_thiscallcc' /// ::= 'arm_apcscc' /// ::= 'arm_aapcscc' /// ::= 'arm_aapcs_vfpcc' @@ -1088,6 +1089,7 @@ case lltok::kw_coldcc: CC = CallingConv::Cold; break; case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; + case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break; case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; Modified: llvm/trunk/lib/AsmParser/LLToken.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/AsmParser/LLToken.h?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/AsmParser/LLToken.h (original) +++ llvm/trunk/lib/AsmParser/LLToken.h Sun May 16 04:08:45 2010 @@ -68,7 +68,7 @@ kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, - kw_x86_stdcallcc, kw_x86_fastcallcc, + kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_msp430_intrcc, Modified: llvm/trunk/lib/Target/CBackend/CBackend.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/CBackend/CBackend.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/Target/CBackend/CBackend.cpp (original) +++ llvm/trunk/lib/Target/CBackend/CBackend.cpp Sun May 16 04:08:45 2010 @@ -2165,6 +2165,9 @@ case CallingConv::X86_FastCall: Out << "__attribute__((fastcall)) "; break; + case CallingConv::X86_ThisCall: + Out << "__attribute__((thiscall)) "; + break; default: break; } Modified: llvm/trunk/lib/Target/MSIL/MSILWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/MSIL/MSILWriter.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/Target/MSIL/MSILWriter.cpp (original) +++ llvm/trunk/lib/Target/MSIL/MSILWriter.cpp Sun May 16 04:08:45 2010 @@ -278,6 +278,8 @@ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) "; case CallingConv::X86_StdCall: return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) "; + case CallingConv::X86_ThisCall: + return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) "; default: errs() << "CallingConvID = " << CallingConvID << '\n'; llvm_unreachable("Unsupported calling convention"); Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td (original) +++ llvm/trunk/lib/Target/X86/X86CallingConv.td Sun May 16 04:08:45 2010 @@ -307,6 +307,20 @@ CCDelegateTo ]>; +def CC_X86_32_ThisCall : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in EAX. + CCIfNest>, + + // The first integer argument is passed in ECX + CCIfType<[i32], CCAssignToReg<[ECX]>>, + + // Otherwise, same as everything else. + CCDelegateTo +]>; + def CC_X86_32_FastCC : CallingConv<[ // Handles byval parameters. Note that we can't rely on the delegation // to CC_X86_32_Common for this because that happens after code that Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original) +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Sun May 16 04:08:45 2010 @@ -180,6 +180,8 @@ if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun May 16 04:08:45 2010 @@ -1383,6 +1383,8 @@ return !Subtarget->is64Bit(); case CallingConv::X86_FastCall: return !Subtarget->is64Bit(); + case CallingConv::X86_ThisCall: + return !Subtarget->is64Bit(); case CallingConv::Fast: return GuaranteedTailCallOpt; case CallingConv::GHC: @@ -1404,6 +1406,8 @@ if (CC == CallingConv::X86_FastCall) return CC_X86_32_FastCall; + else if (CC == CallingConv::X86_ThisCall) + return CC_X86_32_ThisCall; else if (CC == CallingConv::Fast) return CC_X86_32_FastCC; else if (CC == CallingConv::GHC) @@ -1595,7 +1599,8 @@ // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (Is64Bit || CallConv != CallingConv::X86_FastCall) { + if (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall)) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true, false)); } @@ -1715,7 +1720,8 @@ if (!Is64Bit) { // RegSaveFrameIndex is X86-64 only. FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); - if (CallConv == CallingConv::X86_FastCall) + if (CallConv == CallingConv::X86_FastCall || + CallConv == CallingConv::X86_ThisCall) // fastcc functions can't have varargs. FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } @@ -7119,6 +7125,7 @@ break; } case CallingConv::X86_FastCall: + case CallingConv::X86_ThisCall: case CallingConv::Fast: // Pass 'nest' parameter in EAX. // Must be kept in sync with X86CallingConv.td Modified: llvm/trunk/lib/VMCore/AsmWriter.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/AsmWriter.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/AsmWriter.cpp (original) +++ llvm/trunk/lib/VMCore/AsmWriter.cpp Sun May 16 04:08:45 2010 @@ -1573,6 +1573,7 @@ case CallingConv::Cold: Out << "coldcc "; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break; + case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break; case CallingConv::ARM_APCS: Out << "arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break; @@ -1845,6 +1846,7 @@ case CallingConv::Cold: Out << " coldcc"; break; case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break; case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; @@ -1897,6 +1899,7 @@ case CallingConv::Cold: Out << " coldcc"; break; case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break; + case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break; case CallingConv::ARM_APCS: Out << " arm_apcscc "; break; case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break; Modified: llvm/trunk/lib/VMCore/Verifier.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/VMCore/Verifier.cpp?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/lib/VMCore/Verifier.cpp (original) +++ llvm/trunk/lib/VMCore/Verifier.cpp Sun May 16 04:08:45 2010 @@ -688,6 +688,7 @@ case CallingConv::Fast: case CallingConv::Cold: case CallingConv::X86_FastCall: + case CallingConv::X86_ThisCall: Assert1(!F.isVarArg(), "Varargs functions must have C calling conventions!", &F); break; Modified: llvm/trunk/test/CodeGen/X86/fast-cc-callee-pops.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-cc-callee-pops.ll?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/fast-cc-callee-pops.ll (original) +++ llvm/trunk/test/CodeGen/X86/fast-cc-callee-pops.ll Sun May 16 04:08:45 2010 @@ -1,7 +1,13 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | grep {ret 20} +; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | FileCheck %s ; Check that a fastcc function pops its stack variables before returning. define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind { ret void +; CHECK: ret{{.*}}20 +} + +define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind { + ret void +; CHECK: ret{{.*}}20 } Modified: llvm/trunk/test/CodeGen/X86/fast-cc-pass-in-regs.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-cc-pass-in-regs.ll?rev=103902&r1=103901&r2=103902&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/fast-cc-pass-in-regs.ll (original) +++ llvm/trunk/test/CodeGen/X86/fast-cc-pass-in-regs.ll Sun May 16 04:08:45 2010 @@ -1,15 +1,29 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \ -; RUN: grep {mov EDX, 1} +; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s ; check that fastcc is passing stuff in regs. declare x86_fastcallcc i64 @callee(i64) define i64 @caller() { %X = call x86_fastcallcc i64 @callee( i64 4294967299 ) ; [#uses=1] +; CHECK: mov{{.*}}EDX, 1 ret i64 %X } define x86_fastcallcc i64 @caller2(i64 %X) { ret i64 %X +; CHECK: mov{{.*}}EAX, ECX +} + +declare x86_thiscallcc i64 @callee2(i32) + +define i64 @caller3() { + %X = call x86_thiscallcc i64 @callee2( i32 3 ) +; CHECK: mov{{.*}}ECX, 3 + ret i64 %X +} + +define x86_thiscallcc i32 @caller4(i32 %X) { + ret i32 %X +; CHECK: mov{{.*}}EAX, ECX } From anton at korobeynikov.info Sun May 16 04:10:01 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sun, 16 May 2010 13:10:01 +0400 Subject: [llvm-commits] [PATCH] Support for the thiscall calling convention In-Reply-To: <4BEF64EF.8000007@providere-consulting.com> References: <4BEEF138.3000407@providere-consulting.com> <4BEF4C37.3030408@mymail.mines.edu> <4BEF64EF.8000007@providere-consulting.com> Message-ID: <1274001001.22060.32.camel@aslstation> > Here's a slightly more complete patch. I added support > for CBackend and MSIL, and some extra error handling > to match fastcall. Applied, thanks! -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. From asl at math.spbu.ru Sun May 16 04:15:36 2010 From: asl at math.spbu.ru (Anton Korobeynikov) Date: Sun, 16 May 2010 09:15:36 -0000 Subject: [llvm-commits] [llvm] r103903 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrNEON.td ARMInstrThumb.td ARMInstrThumb2.td Message-ID: <20100516091536.89950312800A@llvm.org> Author: asl Date: Sun May 16 04:15:36 2010 New Revision: 103903 URL: http://llvm.org/viewvc/llvm-project?rev=103903&view=rev Log: Chris said that the comment char should be escaped. Fix all the occurences of "@" in *.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td llvm/trunk/lib/Target/ARM/ARMInstrNEON.td llvm/trunk/lib/Target/ARM/ARMInstrThumb.td llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=103903&r1=103902&r2=103903&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Sun May 16 04:15:36 2010 @@ -656,12 +656,12 @@ let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def ADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, - "@ ADJCALLSTACKUP $amt1", + "${:comment} ADJCALLSTACKUP $amt1", [(ARMcallseq_end timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, - "@ ADJCALLSTACKDOWN $amt", + "${:comment} ADJCALLSTACKDOWN $amt", [(ARMcallseq_start timm:$amt)]>; } @@ -795,7 +795,7 @@ // binutils let isBarrier = 1, isTerminator = 1 in def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, - ".long 0xe7ffdefe @ trap", [(trap)]>, + ".long 0xe7ffdefe ${:comment} trap", [(trap)]>, Requires<[IsARM]> { let Inst{27-25} = 0b011; let Inst{24-20} = 0b11111; @@ -2533,12 +2533,12 @@ def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } @@ -2548,12 +2548,12 @@ def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" "add\t$val, pc, #8\n\t" "str\t$val, [$src, #+4]\n\t" "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" - "mov\tr0, #1 @ eh_setjmp end", "", + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=103903&r1=103902&r2=103903&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Sun May 16 04:15:36 2010 @@ -2819,10 +2819,10 @@ // Pseudo vector move instructions for QQ and QQQQ registers. This should // be expanded after register allocation is completed. def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src), - NoItinerary, "@ vmov\t$dst, $src", []>; + NoItinerary, "${:comment} vmov\t$dst, $src", []>; def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), - NoItinerary, "@ vmov\t$dst, $src", []>; + NoItinerary, "${:comment} vmov\t$dst, $src", []>; } // neverHasSideEffects // VMOV : Vector Move (Immediate) Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb.td?rev=103903&r1=103902&r2=103903&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td Sun May 16 04:15:36 2010 @@ -127,12 +127,12 @@ let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { def tADJCALLSTACKUP : PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, - "@ tADJCALLSTACKUP $amt1", + "${:comment} tADJCALLSTACKUP $amt1", [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>; def tADJCALLSTACKDOWN : PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, - "@ tADJCALLSTACKDOWN $amt", + "${:comment} tADJCALLSTACKDOWN $amt", [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>; } @@ -254,14 +254,14 @@ // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), - NoItinerary, "@ sub\t$dst, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $rhs", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "@ add\t$dst, $rhs", []>; + NoItinerary, "${:comment} add\t$dst, $rhs", []>; let Defs = [CPSR] in def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "@ and\t$dst, $rhs", []>; + NoItinerary, "${:comment} and\t$dst, $rhs", []>; } // usesCustomInserter //===----------------------------------------------------------------------===// @@ -374,7 +374,7 @@ // Far jump let Defs = [LR] in def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, - "bl\t$target\t@ far jump",[]>; + "bl\t$target\t${:comment} far jump",[]>; def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), @@ -423,7 +423,7 @@ // binutils let isBarrier = 1, isTerminator = 1 in def tTRAP : TI<(outs), (ins), IIC_Br, - ".word 0xdefe @ trap", [(trap)]>, Encoding16 { + ".word 0xdefe ${:comment} trap", [(trap)]>, Encoding16 { let Inst{15-12} = 0b1101; let Inst{11-8} = 0b1110; } @@ -870,7 +870,7 @@ let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), - NoItinerary, "@ tMOVCCr $cc", + NoItinerary, "${:comment} tMOVCCr $cc", [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>; @@ -922,13 +922,13 @@ [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" "\tadds\t$val, #9\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } @@ -1015,7 +1015,7 @@ // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=103903&r1=103902&r2=103903&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Sun May 16 04:15:36 2010 @@ -888,11 +888,11 @@ // Pseudo instruction that will expand into a t2SUBrSPi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>; def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "@ subw\t$dst, $sp, $imm", []>; + NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>; def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "@ sub\t$dst, $sp, $rhs", []>; + NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>; } // usesCustomInserter @@ -2394,13 +2394,13 @@ D31 ] in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" "\tadds\t$val, #9\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; @@ -2410,13 +2410,13 @@ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" "\tmov\t$val, pc\n" "\tadds\t$val, #9\n" "\tstr\t$val, [$src, #4]\n" "\tmovs\tr0, #0\n" "\tb\t1f\n" - "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2688,7 +2688,7 @@ // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; From anton at korobeynikov.info Sun May 16 04:17:16 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sun, 16 May 2010 13:17:16 +0400 Subject: [llvm-commits] [llvm] r103878 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb.td In-Reply-To: References: <20100515171920.D01D4312800A@llvm.org> <758A8ED0-9721-4303-9772-A4893507F0D6@apple.com> <1273946613.22060.23.camel@aslstation> Message-ID: > I don't know, sounds like a widespread bug :) Ok, I fixed all of them :) -- With best regards, Anton Korobeynikov Faculty of Mathematics and Mechanics, Saint Petersburg State University From anton at korobeynikov.info Sun May 16 05:49:43 2010 From: anton at korobeynikov.info (Anton Korobeynikov) Date: Sun, 16 May 2010 14:49:43 +0400 Subject: [llvm-commits] Global Merge Pass for ARM Message-ID: <1274006983.22060.61.camel@aslstation> Hello, Everyone Please find the patch which can be viewed as some early approximation of "section anchors" feature seen in gcc. It tries to solve the following problem: consider the code touches several global variables at once, e.g.: <=cut=> static int foo[N], bar[N], baz[N]; for (i = 0; i < N; ++i) { foo[i] = bar[i] * baz[i]; } <=cut=> On ARM the addresses of 3 arrays should be kept in the registers, thus this code has quite large register pressure (loop body): ldr r1, [r5], #4 ldr r2, [r6], #4 mul r1, r2, r1 str r1, [r0], #4 Pass converts the code to something like: <=cut=> static struct { int foo[N]; int bar[N]; int baz[N]; } merged; for (i = 0; i < N; ++i) { merged.foo[i] = merged.bar[i] * merged.baz[i]; } <=cut=> and in ARM code this becomes: ldr r0, [r5, #40] ldr r1, [r5, #80] mul r0, r1, r0 str r0, [r5], #4 note that we saved 2 registers here. This way only the address of the merged structured needs to be kept in the registers. For the fields accesses ldr/str with offsets are used. Pass correctly distinguishes constant and non-constant globals. Maximum size of the struct dependes on the instruction set used (it's 4095 for ARM/Thumb2 and 127 for Thumb1). Maybe PPC can benefit from this pass as well, but I'm not yet sure. Ok to commit? -- With best regards, Anton Korobeynikov. Faculty of Mathematics & Mechanics, Saint Petersburg State University. -------------- next part -------------- A non-text attachment was scrubbed... Name: ARMGlobalMerge.cpp Type: text/x-c++src Size: 5006 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100516/b8ecd073/attachment.bin From stoklund at 2pi.dk Sun May 16 12:32:16 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sun, 16 May 2010 10:32:16 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> Message-ID: <353F4F51-35A7-4EC7-B554-85A5BC208A10@2pi.dk> On May 16, 2010, at 1:30 AM, Evan Cheng wrote: > > On May 15, 2010, at 9:14 PM, Jakob Stoklund Olesen wrote: >> Translating (mul x, 2^N) -> (shl x, n) in the dag combiner sounds reasonable, and we are already doing that in DAGCombiner::visitMUL. >> >> Then there is (mul x, 2^N+1) -> (add (shl x, n), x) and (mul x, 2^N-1) -> (sub (shl x, n), x). For these, X86 prefers to use LEA for factors 3, 5, and 9, so we are probably better off leaving that target dependent. > > I would have preferred if dag combine does the transformation and x86 isel matches to the LEA. If it doesn't just work, then it's not a huge deal. The transformation is in the function PerformMulCombine() in X86ISelLowering.cpp. It inserts an X86ISD::MUL_IMM node, presumably to prevent further DAG combinations on it. That X86ISD::MUL_IMM is then matched to LEA or folded into an address operand. It tries to put the MUL_IMM last such that (mul x, 48) becomes (MUL_IMM 3, (shl x, 4)). This makes it easier to match to an addressing mode. This could be done with a target independent transform: (mul x, 48) -> (add y, (shl y, 1)), y = (shl x, 4). or (mul x, 48) -> (shl (add x, (shl x, 1)), 4). But it is not clear that this transform would be a benefit to all targets, and the first one is better than the second for X86 because it partially matches an addressing mode. There are many ways of expanding constant multiplication, and which is best is target dependent. From dalej at apple.com Sun May 16 13:22:38 2010 From: dalej at apple.com (Dale Johannesen) Date: Sun, 16 May 2010 18:22:38 -0000 Subject: [llvm-commits] [llvm] r103911 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2010-05-16-nosseconversion.ll Message-ID: <20100516182238.B052A312800A@llvm.org> Author: johannes Date: Sun May 16 13:22:38 2010 New Revision: 103911 URL: http://llvm.org/viewvc/llvm-project?rev=103911&view=rev Log: Make x86-64 64-bit bitconvert work when SSE is not available. (This worked as of about 6 months ago and I didn't track down exactly what broke it; I think this fix is appropriate.) Added: llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103911&r1=103910&r2=103911&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun May 16 13:22:38 2010 @@ -217,6 +217,10 @@ if (!X86ScalarSSEf64) { setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); + setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); + } } // Scalar integer divide and remainder are lowered to use operations that Added: llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll?rev=103911&view=auto ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll (added) +++ llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll Sun May 16 13:22:38 2010 @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mattr=-sse < %s + + at x = common global i64 0 ; [#uses=1] + +define i32 @foo() nounwind readonly ssp { +entry: + %0 = load i64* @x, align 8 ; [#uses=1] + %1 = uitofp i64 %0 to double ; [#uses=1] + %2 = fptosi double %1 to i32 ; [#uses=1] + ret i32 %2 +} From matti.niemenmaa+llvm at iki.fi Sun May 16 11:59:49 2010 From: matti.niemenmaa+llvm at iki.fi (Matti Niemenmaa) Date: Sun, 16 May 2010 19:59:49 +0300 Subject: [llvm-commits] [PATCH] InstCombine: remove malloc+free if malloc's only uses are comparisons to null Message-ID: <4BF02485.2050508@iki.fi> The attached patch makes InstCombine remove malloc+free pairs if the only uses of the malloc are comparisons of the returned pointer to null. Previously, they would only be removed if there are no uses at all (other than the free, of course). Since real-world code often actually checks whether malloc returned null, this makes the optimization significantly more useful, in my opinion. A test is provided, and an old test which checks that this optimization does /not/ happen gets an additional use for its malloc. There's one slight weirdness in it whose cleanness I'm not so sure about: when getting rid of the malloc+free, we want to replace the remaining uses of the malloc with a constant (trivially folded) non-null pointer. The only way of getting such a thing I could think of was to bitcast the enclosing function's address, since that's a GlobalValue and hence can be handled by ConstantFoldCompareInstruction. It definitely works, but it seems a bit hacky. -------------- next part -------------- A non-text attachment was scrubbed... Name: instcombine-mallocs-only-null-compared-against.patch Type: text/x-patch Size: 3474 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100516/afd8770a/attachment.bin From matt at console-pimps.org Sun May 16 12:55:04 2010 From: matt at console-pimps.org (Matt Fleming) Date: Sun, 16 May 2010 18:55:04 +0100 Subject: [llvm-commits] [PATCH] MC abstraction and stub patches Message-ID: <871vdbspdz.fsf@linux-g6p1.site> An embedded and charset-unspecified text was scrubbed... Name: 0001-type-asm-directive.patch Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100516/13860443/attachment.pl -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0002-split-elfx86-asmbackend.patch Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100516/13860443/attachment-0001.pl -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0003-target-streamer.patch Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100516/13860443/attachment-0002.pl -------------- next part -------------- An embedded and charset-unspecified text was scrubbed... Name: 0004-sectioni-abstraction.patch Url: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20100516/13860443/attachment-0003.pl From clattner at apple.com Sun May 16 14:30:30 2010 From: clattner at apple.com (Chris Lattner) Date: Sun, 16 May 2010 12:30:30 -0700 Subject: [llvm-commits] [llvm] r103878 - in /llvm/trunk/lib/Target/ARM: ARMInstrInfo.td ARMInstrThumb.td In-Reply-To: References: <20100515171920.D01D4312800A@llvm.org> <758A8ED0-9721-4303-9772-A4893507F0D6@apple.com> <1273946613.22060.23.camel@aslstation> Message-ID: <6DAE9456-9C98-430B-AFB6-65DE065FE8DC@apple.com> thanks!! On May 16, 2010, at 2:17 AM, Anton Korobeynikov wrote: >> I don't know, sounds like a widespread bug :) > Ok, I fixed all of them :) > > -- > With best regards, Anton Korobeynikov > Faculty of Mathematics and Mechanics, Saint Petersburg State University From clattner at apple.com Sun May 16 14:31:27 2010 From: clattner at apple.com (Chris Lattner) Date: Sun, 16 May 2010 12:31:27 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <353F4F51-35A7-4EC7-B554-85A5BC208A10@2pi.dk> References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> <353F4F51-35A7-4EC7-B554-85A5BC208A10@2pi.dk> Message-ID: <314D40FB-807C-4E50-ABE7-6E630875CFEC@apple.com> On May 16, 2010, at 10:32 AM, Jakob Stoklund Olesen wrote: > > But it is not clear that this transform would be a benefit to all targets, and the first one is better than the second for X86 because it partially matches an addressing mode. The best way to handle this IMO is to have target-independent code do this, and have TargetLower* expose hooks that the code can query for its cost model. -Chris From dalej at apple.com Sun May 16 15:06:53 2010 From: dalej at apple.com (Dale Johannesen) Date: Sun, 16 May 2010 13:06:53 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: <314D40FB-807C-4E50-ABE7-6E630875CFEC@apple.com> References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> <353F4F51-35A7-4EC7-B554-85A5BC208A10@2pi.dk> <314D40FB-807C-4E50-ABE7-6E630875CFEC@apple.com> Message-ID: On May 16, 2010, at 12:31 PM, Chris Lattner wrote: > On May 16, 2010, at 10:32 AM, Jakob Stoklund Olesen wrote: >> >> But it is not clear that this transform would be a benefit to all >> targets, and the first one is better than the second for X86 >> because it partially matches an addressing mode. > > The best way to handle this IMO is to have target-independent code > do this, and have TargetLower* expose hooks that the code can query > for its cost model. > > -Chris I agree (and, fwiw, that's what gcc does). You can do a reasonable job knowing the relative cycle counts for multiply-by-constant (which depends on the value of the constant in some hardware), add, sub, and shift. From dalej at apple.com Sun May 16 15:19:04 2010 From: dalej at apple.com (Dale Johannesen) Date: Sun, 16 May 2010 20:19:04 -0000 Subject: [llvm-commits] [llvm] r103914 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/2010-05-16-nosseconversion.ll Message-ID: <20100516201904.A3FEC312800A@llvm.org> Author: johannes Date: Sun May 16 15:19:04 2010 New Revision: 103914 URL: http://llvm.org/viewvc/llvm-project?rev=103914&view=rev Log: Revert 103911; it broke a test that expects bitconvert <1xi64> -> i64 to work in MMX registers on hosts where -no-sse is the default (not mine). The right thing is to accept this and make i64->f64 conversions go through memory, but I don't have time right now. Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=103914&r1=103913&r2=103914&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original) +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun May 16 15:19:04 2010 @@ -217,10 +217,6 @@ if (!X86ScalarSSEf64) { setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); - if (Subtarget->is64Bit()) { - setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand); - setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand); - } } // Scalar integer divide and remainder are lowered to use operations that Modified: llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll?rev=103914&r1=103913&r2=103914&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll (original) +++ llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll Sun May 16 15:19:04 2010 @@ -1,11 +0,0 @@ -; RUN: llc -mtriple=x86_64-apple-darwin -mattr=-sse < %s - - at x = common global i64 0 ; [#uses=1] - -define i32 @foo() nounwind readonly ssp { -entry: - %0 = load i64* @x, align 8 ; [#uses=1] - %1 = uitofp i64 %0 to double ; [#uses=1] - %2 = fptosi double %1 to i32 ; [#uses=1] - ret i32 %2 -} From dalej at apple.com Sun May 16 15:19:40 2010 From: dalej at apple.com (Dale Johannesen) Date: Sun, 16 May 2010 20:19:40 -0000 Subject: [llvm-commits] [llvm] r103915 - /llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll Message-ID: <20100516201940.9E10C312800A@llvm.org> Author: johannes Date: Sun May 16 15:19:40 2010 New Revision: 103915 URL: http://llvm.org/viewvc/llvm-project?rev=103915&view=rev Log: Removing as part of previous reversion. Removed: llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll Removed: llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-05-16-nosseconversion.ll?rev=103914&view=auto ============================================================================== (empty) From stoklund at 2pi.dk Sun May 16 15:36:44 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Sun, 16 May 2010 13:36:44 -0700 Subject: [llvm-commits] [llvm] r103881 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp test/CodeGen/ARM/mul_const.ll In-Reply-To: References: <20100515181659.EF8EA312800A@llvm.org> <5EDFB279-1503-4F2D-A5A2-131D75B1DACF@2pi.dk> <5FC974E3-0CB9-4D23-BFAD-6983D5997BA0@apple.com> <353F4F51-35A7-4EC7-B554-85A5BC208A10@2pi.dk> <314D40FB-807C-4E50-ABE7-6E630875CFEC@apple.com> Message-ID: <3340E9C8-C150-443C-ADDF-6FC683D5981F@2pi.dk> On May 16, 2010, at 1:06 PM, Dale Johannesen wrote: > > On May 16, 2010, at 12:31 PM, Chris Lattner wrote: > >> On May 16, 2010, at 10:32 AM, Jakob Stoklund Olesen wrote: >>> >>> But it is not clear that this transform would be a benefit to all targets, and the first one is better than the second for X86 because it partially matches an addressing mode. >> >> The best way to handle this IMO is to have target-independent code do this, and have TargetLower* expose hooks that the code can query for its cost model. >> >> -Chris > > I agree (and, fwiw, that's what gcc does). You can do a reasonable job knowing the relative cycle counts for multiply-by-constant (which depends on the value of the constant in some hardware), add, sub, and shift. It sounds doable. An extra quirk is that X86 can execute up to 3 add/shl/lea instructions in parallel, dependencies allowing, but it only has one multiplier for imul. So if you can build a DAG with some parallel instructions, that would be extra good. An approach would be to have a target hook to provide the cost of operations, and a hook to suggest a way of breaking up a constant. A given constant multiplication (mul x, N) can be broken up recursively depending on N: Keep it: (mul x, N) Factors: (mul (mul x, N/K), K) Terms: (add (mul x, N-K), (mul x, N+K)) I am not sure how to balance parallel instructions vs register pressure. Another problem is that such an expanded constant multiplication would look really yummy to the DAG combiner. /jakob From evan.cheng at apple.com Sun May 16 20:11:46 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 17 May 2010 01:11:46 -0000 Subject: [llvm-commits] [llvm] r103917 - /llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp Message-ID: <20100517011146.A3F04312800A@llvm.org> Author: evancheng Date: Sun May 16 20:11:46 2010 New Revision: 103917 URL: http://llvm.org/viewvc/llvm-project?rev=103917&view=rev Log: No reason not to run the NEON domain croassing fix up pass in thumb2 mode. Modified: llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp Modified: llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp?rev=103917&r1=103916&r2=103917&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp (original) +++ llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp Sun May 16 20:11:46 2010 @@ -118,7 +118,7 @@ ARMFunctionInfo *AFI = Fn.getInfo(); const TargetMachine &TM = Fn.getTarget(); - if (AFI->isThumbFunction()) + if (AFI->isThumb1OnlyFunction()) return false; TRI = TM.getRegisterInfo(); From evan.cheng at apple.com Sun May 16 20:15:59 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Sun, 16 May 2010 18:15:59 -0700 Subject: [llvm-commits] [llvm] r103917 - /llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp In-Reply-To: <20100517011146.A3F04312800A@llvm.org> References: <20100517011146.A3F04312800A@llvm.org> Message-ID: <4E37E15C-26D7-4095-A793-510C744BF1EF@apple.com> Hi Anton, Let me know if this is not correct. I don't see why this pass is only run in ARM mode. Evan On May 16, 2010, at 6:11 PM, Evan Cheng wrote: > Author: evancheng > Date: Sun May 16 20:11:46 2010 > New Revision: 103917 > > URL: http://llvm.org/viewvc/llvm-project?rev=103917&view=rev > Log: > No reason not to run the NEON domain croassing fix up pass in thumb2 mode. > > Modified: > llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp > > Modified: llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp?rev=103917&r1=103916&r2=103917&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp (original) > +++ llvm/trunk/lib/Target/ARM/NEONMoveFix.cpp Sun May 16 20:11:46 2010 > @@ -118,7 +118,7 @@ > ARMFunctionInfo *AFI = Fn.getInfo(); > const TargetMachine &TM = Fn.getTarget(); > > - if (AFI->isThumbFunction()) > + if (AFI->isThumb1OnlyFunction()) > return false; > > TRI = TM.getRegisterInfo(); > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From echristo at apple.com Sun May 16 20:29:18 2010 From: echristo at apple.com (Eric Christopher) Date: Mon, 17 May 2010 01:29:18 -0000 Subject: [llvm-commits] [llvm-gcc-4.2] r103918 - /llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp Message-ID: <20100517012918.68ED8312800A@llvm.org> Author: echristo Date: Sun May 16 20:29:18 2010 New Revision: 103918 URL: http://llvm.org/viewvc/llvm-project?rev=103918&view=rev Log: Fix divide by 8 thinko in palignr128 expansion. It's there in the mmx version. Modified: llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp Modified: llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp URL: http://llvm.org/viewvc/llvm-project/llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp?rev=103918&r1=103917&r2=103918&view=diff ============================================================================== --- llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp (original) +++ llvm-gcc-4.2/trunk/gcc/config/i386/llvm-i386.cpp Sun May 16 20:29:18 2010 @@ -664,7 +664,9 @@ } case IX86_BUILTIN_PALIGNR128: { if (ConstantInt *Elt = dyn_cast(Ops[2])) { - unsigned shiftVal = cast(Ops[2])->getZExtValue(); + + // In the header we multiply by 8, correct that back now. + unsigned shiftVal = (cast(Ops[2])->getZExtValue())/8; // If palignr is shifting the pair of input vectors less than 17 bytes, // emit a shuffle instruction. From evan.cheng at apple.com Sun May 16 20:47:47 2010 From: evan.cheng at apple.com (Evan Cheng) Date: Mon, 17 May 2010 01:47:47 -0000 Subject: [llvm-commits] [llvm] r103922 - /llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Message-ID: <20100517014747.511D1312800A@llvm.org> Author: evancheng Date: Sun May 16 20:47:47 2010 New Revision: 103922 URL: http://llvm.org/viewvc/llvm-project?rev=103922&view=rev Log: Yes, if the redef is a copy, update the old val# with the copy. But make sure to clear the copy field if the redef is not a copy. Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=103922&r1=103921&r2=103922&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original) +++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Sun May 16 20:47:47 2010 @@ -454,14 +454,13 @@ // Value#0 is now defined by the 2-addr instruction. OldValNo->def = RedefIndex; - if (!PartReDef) - OldValNo->setCopy(0); - else { - // A re-def may be a copy. e.g. %reg1030:6 = VMOVD %reg1026, ... - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) - OldValNo->setCopy(&*mi); - } + OldValNo->setCopy(0); + + // A re-def may be a copy. e.g. %reg1030:6 = VMOVD %reg1026, ... + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + if (PartReDef && + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + OldValNo->setCopy(&*mi); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); From echristo at apple.com Sun May 16 20:51:01 2010 From: echristo at apple.com (Eric Christopher) Date: Sun, 16 May 2010 18:51:01 -0700 Subject: [llvm-commits] [patch] Use $ORIGIN it the rpath of libraries on non-darwin hosts In-Reply-To: References: Message-ID: On May 15, 2010, at 7:50 PM, Rafael Espindola wrote: > This going to work on windows? Or am I missing some context in the diff? -eric From echristo at apple.com Sun May 16 20:57:37 2010 From: echristo at apple.com (Eric Christopher) Date: Sun, 16 May 2010 18:57:37 -0700 Subject: [llvm-commits] Global Merge Pass for ARM In-Reply-To: <1274006983.22060.61.camel@aslstation> References: <1274006983.22060.61.camel@aslstation> Message-ID: <00CB9243-E49C-4A8D-B63E-226FC9ADAFD6@apple.com> > > Please find the patch which can be viewed as some early approximation of > "section anchors" feature seen in gcc. > > It tries to solve the following problem: consider the code touches > several global variables at once, e.g.: > > <=cut=> > static int foo[N], bar[N], baz[N]; > > for (i = 0; i < N; ++i) { > foo[i] = bar[i] * baz[i]; > } > <=cut=> > > On ARM the addresses of 3 arrays should be kept in the registers, thus > this code has quite large register pressure (loop body): > > ldr r1, [r5], #4 > ldr r2, [r6], #4 > mul r1, r2, r1 > str r1, [r0], #4 > > Pass converts the code to something like: > > <=cut=> > static struct { > int foo[N]; > int bar[N]; > int baz[N]; > } merged; > > for (i = 0; i < N; ++i) { > merged.foo[i] = merged.bar[i] * merged.baz[i]; > } > <=cut=> > > and in ARM code this becomes: > ldr r0, [r5, #40] > ldr r1, [r5, #80] > mul r0, r1, r0 > str r0, [r5], #4 > > note that we saved 2 registers here. > This is pretty cool and simple so far. What's the benchmarking look like? > This way only the address of the merged structured needs to be kept in > the registers. For the fields accesses ldr/str with offsets are used. > Pass correctly distinguishes constant and non-constant globals. Maximum > size of the struct dependes on the instruction set used (it's 4095 for > ARM/Thumb2 and 127 for Thumb1). > > Maybe PPC can benefit from this pass as well, but I'm not yet sure. > > Ok to commit? At the very least do you mind including the above writeup in a big block comment on the pass? I make no claims about the rest of it, but the pass is pretty sparse on documentation and much easier to read with the example there. Thanks. -eric From stoklund at 2pi.dk Sun May 16 21:07:22 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 02:07:22 -0000 Subject: [llvm-commits] [llvm] r103925 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517020722.2F1CE312800A@llvm.org> Author: stoklund Date: Sun May 16 21:07:22 2010 New Revision: 103925 URL: http://llvm.org/viewvc/llvm-project?rev=103925&view=rev Log: Make MBB a class member instead of passing it around everywhere. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103925&r1=103924&r2=103925&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 21:07:22 2010 @@ -58,6 +58,9 @@ const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; + // Basic block currently being allocated. + MachineBasicBlock *MBB; + // StackSlotForVirtReg - Maps virtual regs to the frame index where these // values are spilled. IndexedMap StackSlotForVirtReg; @@ -130,30 +133,29 @@ private: bool runOnMachineFunction(MachineFunction &Fn); - void AllocateBasicBlock(MachineBasicBlock &MBB); + void AllocateBasicBlock(); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); bool isLastUseOfLocalReg(MachineOperand&); void addKillFlag(LiveRegMap::iterator i); void killVirtReg(LiveRegMap::iterator i); void killVirtReg(unsigned VirtReg); - void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - LiveRegMap::iterator i, bool isKill); - void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned VirtReg, bool isKill); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator i, + bool isKill); + void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg, + bool isKill); void usePhysReg(MachineOperand&); - void definePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned PhysReg, RegState NewState); + void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - LiveRegMap::iterator allocVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg, unsigned Hint); - unsigned defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg, unsigned Hint); - unsigned reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, + LiveRegMap::iterator allocVirtReg(MachineInstr *MI, unsigned VirtReg, + unsigned Hint); + unsigned defineVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, + unsigned Hint); + unsigned reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); - void spillAll(MachineBasicBlock &MBB, MachineInstr *MI); + void spillAll(MachineInstr *MI); void setPhysReg(MachineOperand &MO, unsigned PhysReg); }; char RAFast::ID = 0; @@ -233,19 +235,17 @@ /// spillVirtReg - This method spills the value specified by VirtReg into the /// corresponding stack slot if needed. If isKill is set, the register is also /// killed. -void RAFast::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg, bool isKill) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); assert(lri != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - spillVirtReg(MBB, MI, lri, isKill); + spillVirtReg(MI, lri, isKill); } /// spillVirtReg - Do the actual work of spilling. -void RAFast::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator lri, bool isKill) { LiveReg &LR = lri->second; assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); @@ -259,10 +259,9 @@ DEBUG(dbgs() << "Spilling %reg" << lri->first << " in " << TRI->getName(LR.PhysReg)); const TargetRegisterClass *RC = MRI->getRegClass(lri->first); - int FrameIndex = getStackSpaceFor(lri->first, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex << "\n"); - TII->storeRegToStackSlot(MBB, MI, LR.PhysReg, spillKill, - FrameIndex, RC, TRI); + int FI = getStackSpaceFor(lri->first, RC); + DEBUG(dbgs() << " to stack slot #" << FI << "\n"); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, spillKill, FI, RC, TRI); ++NumStores; // Update statistics if (spillKill) @@ -279,14 +278,14 @@ } /// spillAll - Spill all dirty virtregs without killing them. -void RAFast::spillAll(MachineBasicBlock &MBB, MachineInstr *MI) { +void RAFast::spillAll(MachineInstr *MI) { SmallVector Dirty; for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) if (i->second.Dirty) Dirty.push_back(i->first); for (unsigned i = 0, e = Dirty.size(); i != e; ++i) - spillVirtReg(MBB, MI, Dirty[i], false); + spillVirtReg(MI, Dirty[i], false); } /// usePhysReg - Handle the direct use of a physical register. @@ -352,14 +351,14 @@ /// definePhysReg - Mark PhysReg as reserved or free after spilling any /// virtregs. This is very similar to defineVirtReg except the physreg is /// reserved instead of allocated. -void RAFast::definePhysReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned PhysReg, RegState NewState) { +void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, + RegState NewState) { UsedInInstr.set(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; default: - spillVirtReg(MBB, MI, VirtReg, true); + spillVirtReg(MI, VirtReg, true); // Fall through. case regFree: case regReserved: @@ -376,7 +375,7 @@ case regDisabled: break; default: - spillVirtReg(MBB, MI, VirtReg, true); + spillVirtReg(MI, VirtReg, true); // Fall through. case regFree: case regReserved: @@ -402,8 +401,7 @@ } /// allocVirtReg - Allocate a physical register for VirtReg. -RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineBasicBlock &MBB, - MachineInstr *MI, +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned Hint) { const unsigned spillCost = 100; @@ -443,7 +441,7 @@ case regReserved: break; default: - spillVirtReg(MBB, MI, PhysRegState[Hint], true); + spillVirtReg(MI, PhysRegState[Hint], true); // Fall through. case regFree: return assignVirtToPhysReg(VirtReg, Hint); @@ -520,7 +518,7 @@ // BestCost is 0 when all aliases are already disabled. if (BestCost) { if (PhysRegState[BestReg] != regDisabled) - spillVirtReg(MBB, MI, PhysRegState[BestReg], true); + spillVirtReg(MI, PhysRegState[BestReg], true); else { // Make sure all aliases are disabled. for (const unsigned *AS = TRI->getAliasSet(BestReg); @@ -532,7 +530,7 @@ PhysRegState[Alias] = regDisabled; break; default: - spillVirtReg(MBB, MI, PhysRegState[Alias], true); + spillVirtReg(MI, PhysRegState[Alias], true); PhysRegState[Alias] = regDisabled; break; } @@ -556,13 +554,13 @@ } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. -unsigned RAFast::defineVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg, unsigned Hint) { +unsigned RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); if (lri == LiveVirtRegs.end()) - lri = allocVirtReg(MBB, MI, VirtReg, Hint); + lri = allocVirtReg(MI, VirtReg, Hint); else addKillFlag(lri); // Kill before redefine. LiveReg &LR = lri->second; @@ -574,18 +572,18 @@ } /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. -unsigned RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, unsigned VirtReg, unsigned Hint) { +unsigned RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); if (lri == LiveVirtRegs.end()) { - lri = allocVirtReg(MBB, MI, VirtReg, Hint); + lri = allocVirtReg(MI, VirtReg, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " << TRI->getName(lri->second.PhysReg) << "\n"); - TII->loadRegFromStackSlot(MBB, MI, lri->second.PhysReg, FrameIndex, RC, + TII->loadRegFromStackSlot(*MBB, MI, lri->second.PhysReg, FrameIndex, RC, TRI); ++NumLoads; } else if (lri->second.Dirty) { @@ -614,25 +612,25 @@ MO.setReg(PhysReg); } -void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) { - DEBUG(dbgs() << "\nAllocating " << MBB); +void RAFast::AllocateBasicBlock() { + DEBUG(dbgs() << "\nAllocating " << *MBB); atEndOfBlock = false; PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); - MachineBasicBlock::iterator MII = MBB.begin(); + MachineBasicBlock::iterator MII = MBB->begin(); // Add live-in registers as live. - for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) - definePhysReg(MBB, MII, *I, regReserved); + for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), + E = MBB->livein_end(); I != E; ++I) + definePhysReg(MII, *I, regReserved); SmallVector VirtKills, PhysDefs; SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. - while (MII != MBB.end()) { + while (MII != MBB->end()) { MachineInstr *MI = MII++; const TargetInstrDesc &TID = MI->getDesc(); DEBUG({ @@ -711,7 +709,7 @@ if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { - definePhysReg(MBB, MI, Reg, MO.isDead() ? regFree : regReserved); + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); PhysDefs.push_back(Reg); } } @@ -725,13 +723,13 @@ unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isUse()) { - unsigned PhysReg = reloadVirtReg(MBB, MI, i, Reg, CopyDst); + unsigned PhysReg = reloadVirtReg(MI, i, Reg, CopyDst); CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; setPhysReg(MO, PhysReg); if (MO.isKill()) VirtKills.push_back(Reg); } else if (MO.isEarlyClobber()) { - unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg, 0); + unsigned PhysReg = defineVirtReg(MI, i, Reg, 0); setPhysReg(MO, PhysReg); PhysDefs.push_back(PhysReg); } @@ -763,11 +761,11 @@ if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (!Allocatable.test(Reg)) continue; - definePhysReg(MBB, MI, Reg, (MO.isImplicit() || MO.isDead()) ? - regFree : regReserved); + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); continue; } - unsigned PhysReg = defineVirtReg(MBB, MI, i, Reg, CopySrc); + unsigned PhysReg = defineVirtReg(MI, i, Reg, CopySrc); if (MO.isDead()) { VirtKills.push_back(Reg); CopyDst = 0; // cancel coalescing; @@ -779,7 +777,7 @@ // Spill all dirty virtregs before a call, in case of an exception. if (TID.isCall()) { DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MBB, MI); + spillAll(MI); } // Process virtreg deads. @@ -799,8 +797,8 @@ // Spill all physical registers holding virtual registers now. atEndOfBlock = true; - MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - if (MI != MBB.end() && MI->getDesc().isReturn()) { + MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); + if (MI != MBB->end() && MI->getDesc().isReturn()) { // This is a return block, kill all virtual registers. DEBUG(dbgs() << "Killing live registers at end of return block.\n"); for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); @@ -811,17 +809,17 @@ DEBUG(dbgs() << "Spilling live registers at end of block.\n"); for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) - spillVirtReg(MBB, MI, i, true); + spillVirtReg(MI, i, true); } LiveVirtRegs.clear(); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtsRegs might refer to the instrs. for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) - MBB.erase(Coalesced[i]); + MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); - DEBUG(MBB.dump()); + DEBUG(MBB->dump()); } /// runOnMachineFunction - Register allocate the whole function @@ -847,9 +845,11 @@ StackSlotForVirtReg.grow(LastVirtReg); // Loop over all of the basic blocks, eliminating virtual register references - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) - AllocateBasicBlock(*MBB); + for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); + MBBi != MBBe; ++MBBi) { + MBB = &*MBBi; + AllocateBasicBlock(); + } // Make sure the set of used physregs is closed under subreg operations. MRI->closePhysRegsUsed(*TRI); From stoklund at 2pi.dk Sun May 16 21:07:29 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 02:07:29 -0000 Subject: [llvm-commits] [llvm] r103926 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517020730.02F14312800A@llvm.org> Author: stoklund Date: Sun May 16 21:07:29 2010 New Revision: 103926 URL: http://llvm.org/viewvc/llvm-project?rev=103926&view=rev Log: Reduce hashtable probes by using DenseMap::insert() for lookup. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103926&r1=103925&r2=103926&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 21:07:29 2010 @@ -73,12 +73,11 @@ bool Dirty; // Register needs spill. LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0), - Dirty(false) { - assert(p && "Don't create LiveRegs without a PhysReg"); - } + Dirty(false) {} }; typedef DenseMap LiveRegMap; + typedef LiveRegMap::value_type LiveRegEntry; // LiveVirtRegs - This map contains entries for each virtual register // that is currently available in a physical register. @@ -137,7 +136,7 @@ int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); bool isLastUseOfLocalReg(MachineOperand&); - void addKillFlag(LiveRegMap::iterator i); + void addKillFlag(const LiveReg&); void killVirtReg(LiveRegMap::iterator i); void killVirtReg(unsigned VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator i, @@ -147,14 +146,12 @@ void usePhysReg(MachineOperand&); void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); - LiveRegMap::iterator assignVirtToPhysReg(unsigned VirtReg, - unsigned PhysReg); - LiveRegMap::iterator allocVirtReg(MachineInstr *MI, unsigned VirtReg, - unsigned Hint); - unsigned defineVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, - unsigned Hint); - unsigned reloadVirtReg(MachineInstr *MI, - unsigned OpNum, unsigned VirtReg, unsigned Hint); + void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg); + void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint); + unsigned defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + unsigned reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); void spillAll(MachineInstr *MI); void setPhysReg(MachineOperand &MO, unsigned PhysReg); }; @@ -199,21 +196,18 @@ } /// addKillFlag - Set kill flags on last use of a virtual register. -void RAFast::addKillFlag(LiveRegMap::iterator lri) { - assert(lri != LiveVirtRegs.end() && "Killing unmapped virtual register"); - const LiveReg &LR = lri->second; - if (LR.LastUse) { - MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); - if (MO.isDef()) - MO.setIsDead(); - else if (!LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) - MO.setIsKill(); - } +void RAFast::addKillFlag(const LiveReg &LR) { + if (!LR.LastUse) return; + MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); + if (MO.isDef()) + MO.setIsDead(); + else if (!LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) + MO.setIsKill(); } /// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(LiveRegMap::iterator lri) { - addKillFlag(lri); + addKillFlag(lri->second); const LiveReg &LR = lri->second; assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); PhysRegState[LR.PhysReg] = regFree; @@ -392,19 +386,19 @@ /// that PhysReg is the proper container for VirtReg now. The physical /// register must not be used for anything else when this is called. /// -RAFast::LiveRegMap::iterator -RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - DEBUG(dbgs() << "Assigning %reg" << VirtReg << " to " +void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to " << TRI->getName(PhysReg) << "\n"); - PhysRegState[PhysReg] = VirtReg; - return LiveVirtRegs.insert(std::make_pair(VirtReg, PhysReg)).first; + PhysRegState[PhysReg] = LRE.first; + assert(!LRE.second.PhysReg && "Already assigned a physreg"); + LRE.second.PhysReg = PhysReg; } /// allocVirtReg - Allocate a physical register for VirtReg. -RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, - unsigned VirtReg, - unsigned Hint) { +void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { const unsigned spillCost = 100; + const unsigned VirtReg = LRE.first; + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Can only allocate virtual registers"); @@ -444,7 +438,7 @@ spillVirtReg(MI, PhysRegState[Hint], true); // Fall through. case regFree: - return assignVirtToPhysReg(VirtReg, Hint); + return assignVirtToPhysReg(LRE, Hint); } } @@ -460,7 +454,7 @@ continue; case regFree: if (!UsedInInstr.test(PhysReg)) - return assignVirtToPhysReg(VirtReg, PhysReg); + return assignVirtToPhysReg(LRE, PhysReg); continue; default: // Grab the first spillable register we meet. @@ -537,7 +531,7 @@ } } } - return assignVirtToPhysReg(VirtReg, BestReg); + return assignVirtToPhysReg(LRE, BestReg); } // Nothing we can do. @@ -550,7 +544,6 @@ MI->print(Msg, TM); } report_fatal_error(Msg.str()); - return LiveVirtRegs.end(); } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. @@ -558,12 +551,15 @@ unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); - if (lri == LiveVirtRegs.end()) - lri = allocVirtReg(MI, VirtReg, Hint); - else - addKillFlag(lri); // Kill before redefine. + LiveRegMap::iterator lri; + bool New; + tie(lri, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); LiveReg &LR = lri->second; + if (New) + allocVirtReg(MI, *lri, Hint); + else + addKillFlag(LR); // Kill before redefine. + assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; LR.LastOpNum = OpNum; LR.Dirty = true; @@ -576,17 +572,19 @@ unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); - if (lri == LiveVirtRegs.end()) { - lri = allocVirtReg(MI, VirtReg, Hint); + LiveRegMap::iterator lri; + bool New; + tie(lri, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = lri->second; + if (New) { + allocVirtReg(MI, *lri, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " - << TRI->getName(lri->second.PhysReg) << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, lri->second.PhysReg, FrameIndex, RC, - TRI); + << TRI->getName(LR.PhysReg) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); ++NumLoads; - } else if (lri->second.Dirty) { + } else if (LR.Dirty) { MachineOperand &MO = MI->getOperand(OpNum); if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); @@ -596,7 +594,7 @@ MO.setIsKill(false); } } - LiveReg &LR = lri->second; + assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; LR.LastOpNum = OpNum; UsedInInstr.set(LR.PhysReg); From stoklund at 2pi.dk Sun May 16 21:07:32 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 02:07:32 -0000 Subject: [llvm-commits] [llvm] r103927 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517020732.DEC3D3128018@llvm.org> Author: stoklund Date: Sun May 16 21:07:32 2010 New Revision: 103927 URL: http://llvm.org/viewvc/llvm-project?rev=103927&view=rev Log: Spill and kill all virtual registers across a call. Debug code doesn't use callee saved registers anyway, and the code is simpler this way. Now spillVirtReg always kills, and the isKill parameter is not needed. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103927&r1=103926&r2=103927&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 21:07:32 2010 @@ -50,7 +50,7 @@ public: static char ID; RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1), - atEndOfBlock(false) {} + isBulkSpilling(false) {} private: const TargetMachine *TM; MachineFunction *MF; @@ -113,10 +113,10 @@ // Allocatable - vector of allocatable physical registers. BitVector Allocatable; - // atEndOfBlock - This flag is set after allocating all instructions in a - // block, before emitting final spills. When it is set, LiveRegMap is no - // longer updated properly sonce it will be cleared anyway. - bool atEndOfBlock; + // isBulkSpilling - This flag is set when LiveRegMap will be cleared + // completely after spilling all live registers. LiveRegMap entries should + // not be erased. + bool isBulkSpilling; public: virtual const char *getPassName() const { @@ -139,10 +139,8 @@ void addKillFlag(const LiveReg&); void killVirtReg(LiveRegMap::iterator i); void killVirtReg(unsigned VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator i, - bool isKill); - void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg, - bool isKill); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator i); + void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand&); void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); @@ -211,9 +209,8 @@ const LiveReg &LR = lri->second; assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); PhysRegState[LR.PhysReg] = regFree; - // Erase from LiveVirtRegs unless we're at the end of the block when - // everything will be bulk erased. - if (!atEndOfBlock) + // Erase from LiveVirtRegs unless we're spilling in bulk. + if (!isBulkSpilling) LiveVirtRegs.erase(lri); } @@ -229,26 +226,24 @@ /// spillVirtReg - This method spills the value specified by VirtReg into the /// corresponding stack slot if needed. If isKill is set, the register is also /// killed. -void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, - unsigned VirtReg, bool isKill) { +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); assert(lri != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - spillVirtReg(MI, lri, isKill); + spillVirtReg(MI, lri); } /// spillVirtReg - Do the actual work of spilling. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, - LiveRegMap::iterator lri, bool isKill) { + LiveRegMap::iterator lri) { LiveReg &LR = lri->second; assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); - // If this physreg is used by the instruction, we want to kill it on the - // instruction, not on the spill. - bool spillKill = isKill && LR.LastUse != MI; - if (LR.Dirty) { + // If this physreg is used by the instruction, we want to kill it on the + // instruction, not on the spill. + bool spillKill = LR.LastUse != MI; LR.Dirty = false; DEBUG(dbgs() << "Spilling %reg" << lri->first << " in " << TRI->getName(LR.PhysReg)); @@ -260,26 +255,18 @@ if (spillKill) LR.LastUse = 0; // Don't kill register again - else if (!isKill) { - MachineInstr *Spill = llvm::prior(MI); - LR.LastUse = Spill; - LR.LastOpNum = Spill->findRegisterUseOperandIdx(LR.PhysReg); - } } - - if (isKill) - killVirtReg(lri); + killVirtReg(lri); } /// spillAll - Spill all dirty virtregs without killing them. void RAFast::spillAll(MachineInstr *MI) { - SmallVector Dirty; + isBulkSpilling = true; for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); i != e; ++i) - if (i->second.Dirty) - Dirty.push_back(i->first); - for (unsigned i = 0, e = Dirty.size(); i != e; ++i) - spillVirtReg(MI, Dirty[i], false); + spillVirtReg(MI, i); + LiveVirtRegs.clear(); + isBulkSpilling = false; } /// usePhysReg - Handle the direct use of a physical register. @@ -352,7 +339,7 @@ case regDisabled: break; default: - spillVirtReg(MI, VirtReg, true); + spillVirtReg(MI, VirtReg); // Fall through. case regFree: case regReserved: @@ -369,7 +356,7 @@ case regDisabled: break; default: - spillVirtReg(MI, VirtReg, true); + spillVirtReg(MI, VirtReg); // Fall through. case regFree: case regReserved: @@ -435,7 +422,7 @@ case regReserved: break; default: - spillVirtReg(MI, PhysRegState[Hint], true); + spillVirtReg(MI, PhysRegState[Hint]); // Fall through. case regFree: return assignVirtToPhysReg(LRE, Hint); @@ -512,7 +499,7 @@ // BestCost is 0 when all aliases are already disabled. if (BestCost) { if (PhysRegState[BestReg] != regDisabled) - spillVirtReg(MI, PhysRegState[BestReg], true); + spillVirtReg(MI, PhysRegState[BestReg]); else { // Make sure all aliases are disabled. for (const unsigned *AS = TRI->getAliasSet(BestReg); @@ -524,7 +511,7 @@ PhysRegState[Alias] = regDisabled; break; default: - spillVirtReg(MI, PhysRegState[Alias], true); + spillVirtReg(MI, PhysRegState[Alias]); PhysRegState[Alias] = regDisabled; break; } @@ -613,7 +600,6 @@ void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); - atEndOfBlock = false; PhysRegState.assign(TRI->getNumRegs(), regDisabled); assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?"); @@ -794,25 +780,11 @@ } // Spill all physical registers holding virtual registers now. - atEndOfBlock = true; - MachineBasicBlock::iterator MI = MBB->getFirstTerminator(); - if (MI != MBB->end() && MI->getDesc().isReturn()) { - // This is a return block, kill all virtual registers. - DEBUG(dbgs() << "Killing live registers at end of return block.\n"); - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); - i != e; ++i) - killVirtReg(i); - } else { - // This is a normal block, spill any dirty virtregs. - DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); - i != e; ++i) - spillVirtReg(MI, i, true); - } - LiveVirtRegs.clear(); + DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + spillAll(MBB->getFirstTerminator()); // Erase all the coalesced copies. We are delaying it until now because - // LiveVirtsRegs might refer to the instrs. + // LiveVirtRegs might refer to the instrs. for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); From echristo at apple.com Sun May 16 21:13:02 2010 From: echristo at apple.com (Eric Christopher) Date: Mon, 17 May 2010 02:13:02 -0000 Subject: [llvm-commits] [llvm] r103928 - in /llvm/trunk: lib/MC/MCAsmStreamer.cpp lib/MC/MCParser/AsmParser.cpp test/MC/AsmParser/directive_tbss.s Message-ID: <20100517021302.C3B69312800A@llvm.org> Author: echristo Date: Sun May 16 21:13:02 2010 New Revision: 103928 URL: http://llvm.org/viewvc/llvm-project?rev=103928&view=rev Log: Assume that we'll handle mangling the symbols earlier and just put the symbol to the file as we have it. Simplifies out tbss handling. Modified: llvm/trunk/lib/MC/MCAsmStreamer.cpp llvm/trunk/lib/MC/MCParser/AsmParser.cpp llvm/trunk/test/MC/AsmParser/directive_tbss.s Modified: llvm/trunk/lib/MC/MCAsmStreamer.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCAsmStreamer.cpp?rev=103928&r1=103927&r2=103928&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCAsmStreamer.cpp (original) +++ llvm/trunk/lib/MC/MCAsmStreamer.cpp Sun May 16 21:13:02 2010 @@ -363,14 +363,13 @@ EmitEOL(); } -// .tbss sym$tlv$init, size, align +// .tbss sym, size, align +// This depends that the symbol has already been mangled from the original, +// e.g. _a. void MCAsmStreamer::EmitTBSSSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { assert(Symbol != NULL && "Symbol shouldn't be NULL!"); - OS << ".tbss "; - - // This is a mach-o specific directive and the name requires some mangling. - OS << *Symbol << "$tlv$init, " << Size; + OS << ".tbss " << *Symbol << ", " << Size; // Output align if we have it. if (ByteAlignment != 0) OS << ", " << Log2_32(ByteAlignment); Modified: llvm/trunk/lib/MC/MCParser/AsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/MC/MCParser/AsmParser.cpp?rev=103928&r1=103927&r2=103928&view=diff ============================================================================== --- llvm/trunk/lib/MC/MCParser/AsmParser.cpp (original) +++ llvm/trunk/lib/MC/MCParser/AsmParser.cpp Sun May 16 21:13:02 2010 @@ -1436,13 +1436,9 @@ StringRef Name; if (ParseIdentifier(Name)) return TokError("expected identifier in directive"); - - // Demangle the name output. The trailing characters are guaranteed to be - // $tlv$init so just strip that off. - StringRef DemName = Name.substr(0, Name.size() - strlen("$tlv$init")); - + // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(DemName); + MCSymbol *Sym = CreateSymbol(Name); if (Lexer.isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Modified: llvm/trunk/test/MC/AsmParser/directive_tbss.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AsmParser/directive_tbss.s?rev=103928&r1=103927&r2=103928&view=diff ============================================================================== --- llvm/trunk/test/MC/AsmParser/directive_tbss.s (original) +++ llvm/trunk/test/MC/AsmParser/directive_tbss.s Sun May 16 21:13:02 2010 @@ -1,4 +1,4 @@ -# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s +# RUN: llvm-mc -triple i386-unknown-darwin %s | FileCheck %s # CHECK: .tbss _a$tlv$init, 4 # CHECK: .tbss _b$tlv$init, 4, 3 From stoklund at 2pi.dk Sun May 16 21:49:16 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 02:49:16 -0000 Subject: [llvm-commits] [llvm] r103929 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517024916.124CD312800A@llvm.org> Author: stoklund Date: Sun May 16 21:49:15 2010 New Revision: 103929 URL: http://llvm.org/viewvc/llvm-project?rev=103929&view=rev Log: Boldly attempt consistent capitalization. Functional changes unintended. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103929&r1=103928&r2=103929&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 21:49:15 2010 @@ -137,9 +137,9 @@ bool isLastUseOfLocalReg(MachineOperand&); void addKillFlag(const LiveReg&); - void killVirtReg(LiveRegMap::iterator i); + void killVirtReg(LiveRegMap::iterator); void killVirtReg(unsigned VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator i); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); void usePhysReg(MachineOperand&); @@ -179,9 +179,9 @@ bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { // Check for non-debug uses or defs following MO. // This is the most likely way to fail - fast path it. - MachineOperand *i = &MO; - while ((i = i->getNextOperandForReg())) - if (!i->isDebug()) + MachineOperand *Next = &MO; + while ((Next = Next->getNextOperandForReg())) + if (!Next->isDebug()) return false; // If the register has ever been spilled or reloaded, we conservatively assume @@ -204,23 +204,23 @@ } /// killVirtReg - Mark virtreg as no longer available. -void RAFast::killVirtReg(LiveRegMap::iterator lri) { - addKillFlag(lri->second); - const LiveReg &LR = lri->second; - assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); +void RAFast::killVirtReg(LiveRegMap::iterator LRI) { + addKillFlag(LRI->second); + const LiveReg &LR = LRI->second; + assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); PhysRegState[LR.PhysReg] = regFree; // Erase from LiveVirtRegs unless we're spilling in bulk. if (!isBulkSpilling) - LiveVirtRegs.erase(lri); + LiveVirtRegs.erase(LRI); } /// killVirtReg - Mark virtreg as no longer available. void RAFast::killVirtReg(unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "killVirtReg needs a virtual register"); - LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); - if (lri != LiveVirtRegs.end()) - killVirtReg(lri); + LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + if (LRI != LiveVirtRegs.end()) + killVirtReg(LRI); } /// spillVirtReg - This method spills the value specified by VirtReg into the @@ -229,34 +229,34 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Spilling a physical register is illegal!"); - LiveRegMap::iterator lri = LiveVirtRegs.find(VirtReg); - assert(lri != LiveVirtRegs.end() && "Spilling unmapped virtual register"); - spillVirtReg(MI, lri); + LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg); + assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + spillVirtReg(MI, LRI); } /// spillVirtReg - Do the actual work of spilling. void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, - LiveRegMap::iterator lri) { - LiveReg &LR = lri->second; - assert(PhysRegState[LR.PhysReg] == lri->first && "Broken RegState mapping"); + LiveRegMap::iterator LRI) { + LiveReg &LR = LRI->second; + assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping"); if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the // instruction, not on the spill. - bool spillKill = LR.LastUse != MI; + bool SpillKill = LR.LastUse != MI; LR.Dirty = false; - DEBUG(dbgs() << "Spilling %reg" << lri->first + DEBUG(dbgs() << "Spilling %reg" << LRI->first << " in " << TRI->getName(LR.PhysReg)); - const TargetRegisterClass *RC = MRI->getRegClass(lri->first); - int FI = getStackSpaceFor(lri->first, RC); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->first); + int FI = getStackSpaceFor(LRI->first, RC); DEBUG(dbgs() << " to stack slot #" << FI << "\n"); - TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, spillKill, FI, RC, TRI); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); ++NumStores; // Update statistics - if (spillKill) + if (SpillKill) LR.LastUse = 0; // Don't kill register again } - killVirtReg(lri); + killVirtReg(LRI); } /// spillAll - Spill all dirty virtregs without killing them. @@ -383,7 +383,7 @@ /// allocVirtReg - Allocate a physical register for VirtReg. void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { - const unsigned spillCost = 100; + const unsigned SpillCost = 100; const unsigned VirtReg = LRE.first; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && @@ -446,7 +446,7 @@ default: // Grab the first spillable register we meet. if (!BestReg && !UsedInInstr.test(PhysReg)) - BestReg = PhysReg, BestCost = spillCost; + BestReg = PhysReg, BestCost = SpillCost; continue; } } @@ -455,7 +455,7 @@ << " candidate=" << TRI->getName(BestReg) << "\n"); // Try to extend the working set for RC if there were any disabled registers. - if (hasDisabled && (!BestReg || BestCost >= spillCost)) { + if (hasDisabled && (!BestReg || BestCost >= SpillCost)) { for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { unsigned PhysReg = *I; if (PhysRegState[PhysReg] != regDisabled || UsedInInstr.test(PhysReg)) @@ -480,7 +480,7 @@ Cost++; break; default: - Cost += spillCost; + Cost += SpillCost; break; } } @@ -490,7 +490,7 @@ if (!BestReg || Cost < BestCost) { BestReg = PhysReg; BestCost = Cost; - if (Cost < spillCost) break; + if (Cost < SpillCost) break; } } } @@ -538,12 +538,12 @@ unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - LiveRegMap::iterator lri; + LiveRegMap::iterator LRI; bool New; - tie(lri, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); - LiveReg &LR = lri->second; + tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = LRI->second; if (New) - allocVirtReg(MI, *lri, Hint); + allocVirtReg(MI, *LRI, Hint); else addKillFlag(LR); // Kill before redefine. assert(LR.PhysReg && "Register not assigned"); @@ -559,12 +559,12 @@ unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); - LiveRegMap::iterator lri; + LiveRegMap::iterator LRI; bool New; - tie(lri, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); - LiveReg &LR = lri->second; + tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); + LiveReg &LR = LRI->second; if (New) { - allocVirtReg(MI, *lri, Hint); + allocVirtReg(MI, *LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); int FrameIndex = getStackSpaceFor(VirtReg, RC); DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into " @@ -657,9 +657,9 @@ if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - LiveRegMap::iterator lri = LiveVirtRegs.find(Reg); - if (lri != LiveVirtRegs.end()) - setPhysReg(MO, lri->second.PhysReg); + LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MO, LRI->second.PhysReg); else MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! } From stoklund at 2pi.dk Sun May 16 21:49:18 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 02:49:18 -0000 Subject: [llvm-commits] [llvm] r103930 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517024918.D025F3128018@llvm.org> Author: stoklund Date: Sun May 16 21:49:18 2010 New Revision: 103930 URL: http://llvm.org/viewvc/llvm-project?rev=103930&view=rev Log: Now that we don't keep live registers across calls, there is not reason to go through the very long list of call-clobbered registers. We just assume all registers are clobbered. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103930&r1=103929&r2=103930&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 21:49:18 2010 @@ -736,9 +736,20 @@ UsedInInstr.set(Alias); } + unsigned DefOpEnd = MI->getNumOperands(); + if (TID.isCall()) { + // Spill all virtregs before a call. This serves two purposes: 1. If an + // exception is thrown, the landing pad is going to expect to find registers + // in their spill slots, and 2. we don't have to wade through all the + // operands on the call instruction. + DefOpEnd = VirtOpEnd; + DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); + } + // Third scan. // Allocate defs and collect dead defs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + for (unsigned i = 0; i != DefOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; unsigned Reg = MO.getReg(); @@ -758,12 +769,6 @@ setPhysReg(MO, PhysReg); } - // Spill all dirty virtregs before a call, in case of an exception. - if (TID.isCall()) { - DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI); - } - // Process virtreg deads. for (unsigned i = 0, e = VirtKills.size(); i != e; ++i) killVirtReg(VirtKills[i]); From stoklund at 2pi.dk Sun May 16 21:49:21 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 02:49:21 -0000 Subject: [llvm-commits] [llvm] r103931 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517024921.BD1DB3128026@llvm.org> Author: stoklund Date: Sun May 16 21:49:21 2010 New Revision: 103931 URL: http://llvm.org/viewvc/llvm-project?rev=103931&view=rev Log: Sprinkle superregister and operands when dealing with subregister indices. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103931&r1=103930&r2=103931&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 21:49:21 2010 @@ -151,7 +151,7 @@ unsigned reloadVirtReg(MachineInstr *MI, unsigned OpNum, unsigned VirtReg, unsigned Hint); void spillAll(MachineInstr *MI); - void setPhysReg(MachineOperand &MO, unsigned PhysReg); + bool setPhysReg(MachineOperand &MO, unsigned PhysReg); }; char RAFast::ID = 0; } @@ -589,12 +589,32 @@ } // setPhysReg - Change MO the refer the PhysReg, considering subregs. -void RAFast::setPhysReg(MachineOperand &MO, unsigned PhysReg) { - if (unsigned Idx = MO.getSubReg()) { - MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, Idx) : 0); - MO.setSubReg(0); - } else +// This may invalidate MO if it is necessary to add implicit kills for a +// superregister. +// Return tru if MO kills its register. +bool RAFast::setPhysReg(MachineOperand &MO, unsigned PhysReg) { + if (!MO.getSubReg()) { MO.setReg(PhysReg); + return MO.isKill() || MO.isDead(); + } + + // Handle subregister index. + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setSubReg(0); + if (MO.isUse()) { + if (MO.isKill()) { + MO.getParent()->addRegisterKilled(PhysReg, TRI, true); + return true; + } + return false; + } + // A subregister def implicitly defines the whole physreg. + if (MO.isDead()) { + MO.getParent()->addRegisterDead(PhysReg, TRI, true); + return true; + } + MO.getParent()->addRegisterDefined(PhysReg, TRI); + return false; } void RAFast::AllocateBasicBlock() { @@ -709,8 +729,7 @@ if (MO.isUse()) { unsigned PhysReg = reloadVirtReg(MI, i, Reg, CopyDst); CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; - setPhysReg(MO, PhysReg); - if (MO.isKill()) + if (setPhysReg(MO, PhysReg)) VirtKills.push_back(Reg); } else if (MO.isEarlyClobber()) { unsigned PhysReg = defineVirtReg(MI, i, Reg, 0); @@ -761,12 +780,11 @@ continue; } unsigned PhysReg = defineVirtReg(MI, i, Reg, CopySrc); - if (MO.isDead()) { + if (setPhysReg(MO, PhysReg)) { VirtKills.push_back(Reg); CopyDst = 0; // cancel coalescing; } else CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; - setPhysReg(MO, PhysReg); } // Process virtreg deads. From espindola at google.com Sun May 16 22:04:56 2010 From: espindola at google.com (Rafael Espindola) Date: Sun, 16 May 2010 23:04:56 -0400 Subject: [llvm-commits] [patch] Use $ORIGIN it the rpath of libraries on non-darwin hosts In-Reply-To: References: Message-ID: > This going to work on windows? ?Or am I missing some context in the diff? I have no idea what rpath looks like on windows, but we already use $ORIGIN for programs: ifneq ($(HOST_OS),Darwin) ifneq ($(DARWIN_MAJVERS),4) ifdef TOOLNAME LD.Flags += $(RPATH) -Wl,'$$ORIGIN/../lib' ifdef EXAMPLE_TOOL LD.Flags += $(RPATH) -Wl,$(ExmplDir) $(RDYNAMIC) else LD.Flags += $(RPATH) -Wl,$(ToolDir) $(RDYNAMIC) endif endif endif endif The patch added it for libraries. > -eric > Cheers, -- Rafael ?vila de Esp?ndola From stoklund at 2pi.dk Sun May 16 22:26:07 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 03:26:07 -0000 Subject: [llvm-commits] [llvm] r103933 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517032607.1F8B0312800A@llvm.org> Author: stoklund Date: Sun May 16 22:26:06 2010 New Revision: 103933 URL: http://llvm.org/viewvc/llvm-project?rev=103933&view=rev Log: Execute virtreg kills immediately instead of after processing all uses. This is safe to do because the physreg has been marked UsedInInstr and the kill flag will be set on the last operand using the virtreg if there are more then one. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103933&r1=103932&r2=103933&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 22:26:06 2010 @@ -563,6 +563,7 @@ bool New; tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); LiveReg &LR = LRI->second; + MachineOperand &MO = MI->getOperand(OpNum); if (New) { allocVirtReg(MI, *LRI, Hint); const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); @@ -572,7 +573,6 @@ TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI); ++NumLoads; } else if (LR.Dirty) { - MachineOperand &MO = MI->getOperand(OpNum); if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); MO.setIsKill(); @@ -580,6 +580,13 @@ DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); MO.setIsKill(false); } + } else if (MO.isKill()) { + // We must remove kill flags from uses of reloaded registers because the + // register would be killed immediately, and there might be a second use: + // %foo = OR %x, %x + // This would cause a second reload of %x into a different register. + DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + MO.setIsKill(false); } assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; @@ -630,7 +637,7 @@ E = MBB->livein_end(); I != E; ++I) definePhysReg(MII, *I, regReserved); - SmallVector VirtKills, PhysDefs; + SmallVector PhysECs; SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. @@ -694,7 +701,7 @@ // Track registers used by instruction. UsedInInstr.reset(); - PhysDefs.clear(); + PhysECs.clear(); // First scan. // Mark physreg uses and early clobbers as used. @@ -714,7 +721,7 @@ usePhysReg(MO); } else if (MO.isEarlyClobber()) { definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - PhysDefs.push_back(Reg); + PhysECs.push_back(Reg); } } @@ -730,25 +737,20 @@ unsigned PhysReg = reloadVirtReg(MI, i, Reg, CopyDst); CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MO, PhysReg)) - VirtKills.push_back(Reg); + killVirtReg(Reg); } else if (MO.isEarlyClobber()) { unsigned PhysReg = defineVirtReg(MI, i, Reg, 0); setPhysReg(MO, PhysReg); - PhysDefs.push_back(PhysReg); + PhysECs.push_back(PhysReg); } } - // Process virtreg kills - for (unsigned i = 0, e = VirtKills.size(); i != e; ++i) - killVirtReg(VirtKills[i]); - VirtKills.clear(); - MRI->addPhysRegsUsed(UsedInInstr); // Track registers defined by instruction - early clobbers at this point. UsedInInstr.reset(); - for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { - unsigned PhysReg = PhysDefs[i]; + for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) { + unsigned PhysReg = PhysECs[i]; UsedInInstr.set(PhysReg); for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) @@ -781,17 +783,12 @@ } unsigned PhysReg = defineVirtReg(MI, i, Reg, CopySrc); if (setPhysReg(MO, PhysReg)) { - VirtKills.push_back(Reg); + killVirtReg(Reg); CopyDst = 0; // cancel coalescing; } else CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; } - // Process virtreg deads. - for (unsigned i = 0, e = VirtKills.size(); i != e; ++i) - killVirtReg(VirtKills[i]); - VirtKills.clear(); - MRI->addPhysRegsUsed(UsedInInstr); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { From stoklund at 2pi.dk Sun May 16 22:26:09 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 03:26:09 -0000 Subject: [llvm-commits] [llvm] r103934 - /llvm/trunk/lib/CodeGen/RegAllocFast.cpp Message-ID: <20100517032609.ED2DF3128018@llvm.org> Author: stoklund Date: Sun May 16 22:26:09 2010 New Revision: 103934 URL: http://llvm.org/viewvc/llvm-project?rev=103934&view=rev Log: Eliminate a hash table probe when killing virtual registers. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103934&r1=103933&r2=103934&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 22:26:09 2010 @@ -146,10 +146,10 @@ void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg); void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint); - unsigned defineVirtReg(MachineInstr *MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); - unsigned reloadVirtReg(MachineInstr *MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint); + LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); void spillAll(MachineInstr *MI); bool setPhysReg(MachineOperand &MO, unsigned PhysReg); }; @@ -534,8 +534,9 @@ } /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. -unsigned RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint) { +RAFast::LiveRegMap::iterator +RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; @@ -551,12 +552,13 @@ LR.LastOpNum = OpNum; LR.Dirty = true; UsedInInstr.set(LR.PhysReg); - return LR.PhysReg; + return LRI; } /// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. -unsigned RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, - unsigned VirtReg, unsigned Hint) { +RAFast::LiveRegMap::iterator +RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Not a virtual register"); LiveRegMap::iterator LRI; @@ -592,7 +594,7 @@ LR.LastUse = MI; LR.LastOpNum = OpNum; UsedInInstr.set(LR.PhysReg); - return LR.PhysReg; + return LRI; } // setPhysReg - Change MO the refer the PhysReg, considering subregs. @@ -734,12 +736,14 @@ unsigned Reg = MO.getReg(); if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (MO.isUse()) { - unsigned PhysReg = reloadVirtReg(MI, i, Reg, CopyDst); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); + unsigned PhysReg = LRI->second.PhysReg; CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MO, PhysReg)) - killVirtReg(Reg); + killVirtReg(LRI); } else if (MO.isEarlyClobber()) { - unsigned PhysReg = defineVirtReg(MI, i, Reg, 0); + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; setPhysReg(MO, PhysReg); PhysECs.push_back(PhysReg); } @@ -781,9 +785,10 @@ regFree : regReserved); continue; } - unsigned PhysReg = defineVirtReg(MI, i, Reg, CopySrc); + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); + unsigned PhysReg = LRI->second.PhysReg; if (setPhysReg(MO, PhysReg)) { - killVirtReg(Reg); + killVirtReg(LRI); CopyDst = 0; // cancel coalescing; } else CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; From stoklund at 2pi.dk Sun May 16 23:50:57 2010 From: stoklund at 2pi.dk (Jakob Stoklund Olesen) Date: Mon, 17 May 2010 04:50:57 -0000 Subject: [llvm-commits] [llvm] r103935 - in /llvm/trunk: lib/CodeGen/RegAllocFast.cpp test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Message-ID: <20100517045057.5E903312800A@llvm.org> Author: stoklund Date: Sun May 16 23:50:57 2010 New Revision: 103935 URL: http://llvm.org/viewvc/llvm-project?rev=103935&view=rev Log: Only use clairvoyance when defining a register, and then only if it has one use. This makes allocation independent on the ordering of use-def chains. Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Modified: llvm/trunk/lib/CodeGen/RegAllocFast.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocFast.cpp?rev=103935&r1=103934&r2=103935&view=diff ============================================================================== --- llvm/trunk/lib/CodeGen/RegAllocFast.cpp (original) +++ llvm/trunk/lib/CodeGen/RegAllocFast.cpp Sun May 16 23:50:57 2010 @@ -399,20 +399,6 @@ !Allocatable.test(Hint))) Hint = 0; - // If there is no hint, peek at the first use of this register. - if (!Hint && !MRI->use_nodbg_empty(VirtReg)) { - MachineInstr &MI = *MRI->use_nodbg_begin(VirtReg); - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - // Copy to physreg -> use physreg as hint. - if (TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - SrcReg == VirtReg && TargetRegisterInfo::isPhysicalRegister(DstReg) && - RC->contains(DstReg) && !UsedInInstr.test(DstReg) && - Allocatable.test(DstReg)) { - Hint = DstReg; - DEBUG(dbgs() << "%reg" << VirtReg << " gets hint from " << MI); - } - } - // Take hint when possible. if (Hint) { assert(RC->contains(Hint) && !UsedInInstr.test(Hint) && @@ -543,9 +529,18 @@ bool New; tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); LiveReg &LR = LRI->second; - if (New) + if (New) { + // If there is no hint, peek at the only use of this register. + if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + MRI->hasOneNonDBGUse(VirtReg)) { + unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; + // It's a copy, use the destination register as a hint. + if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg), + SrcReg, DstReg, SrcSubReg, DstSubReg)) + Hint = DstReg; + } allocVirtReg(MI, *LRI, Hint); - else + } else addKillFlag(LR); // Kill before redefine. assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; Modified: llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll?rev=103935&r1=103934&r2=103935&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll (original) +++ llvm/trunk/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll Sun May 16 23:50:57 2010 @@ -1,15 +1,11 @@ ; RUN: llc < %s | FileCheck %s -; RUN: llc < %s -regalloc=local | FileCheck -check-prefix=LOCAL %s -; RUN: llc < %s -regalloc=fast | FileCheck -check-prefix=FAST %s +; RUN: llc < %s -regalloc=local | FileCheck %s +; RUN: llc < %s -regalloc=fast | FileCheck %s ; The first argument of subfc must not be the same as any other register. -; CHECK: subfc r3,r5,r4 -; CHECK: subfze r4,r6 -; LOCAL: subfc r6,r5,r4 -; LOCAL: subfze r3,r3 -; FAST: subfc r3,r5,r4 -; FAST: subfze r4,r6 - +; CHECK: subfc [[REG:r.]], +; CHECK-NOT: [[REG]] +; CHECK: InlineAsm End ; PR1357 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"