From nicholas at mxc.ca Mon Jul 9 03:06:17 2012 From: nicholas at mxc.ca (Nick Lewycky) Date: Mon, 09 Jul 2012 01:06:17 -0700 Subject: [llvm-commits] [llvm] r159876 - in /llvm/trunk: lib/Transforms/InstCombine/InstructionCombining.cpp test/Transforms/InstCombine/badmalloc.ll test/Transforms/InstCombine/invoke.ll test/Transforms/InstCombine/malloc-free-delete.ll test/Transforms/InstCombine/objsize-64.ll test/Transforms/InstCombine/objsize.ll In-Reply-To: <4FF7E2A7.9030704@free.fr> References: <20120706230925.C3A562A6C069@llvm.org> <4FF7E2A7.9030704@free.fr> Message-ID: <4FFA90F9.1010701@mxc.ca> Duncan Sands wrote: > Hi Nuno, > >> --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original) >> +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jul 6 18:09:25 2012 >> @@ -1137,12 +1137,29 @@ >> } >> } >> if (IntrinsicInst *II = dyn_cast(U)) { >> - if (II->getIntrinsicID() == Intrinsic::lifetime_start || >> - II->getIntrinsicID() == Intrinsic::lifetime_end) { >> + switch (II->getIntrinsicID()) { >> + default: return false; >> + case Intrinsic::memmove: >> + case Intrinsic::memcpy: >> + case Intrinsic::memset: { >> + MemIntrinsic *MI = cast(II); >> + if (MI->isVolatile() || MI->getRawDest() != V) > > why exclude volatile stores? If all that is being done to the allocated memory > is doing a bunch of volatile stores to it, I don't see why you can't discard > them and the allocation too. Please don't do that. While that would not miscompile C or C++, LLVM has a stronger sense of the term volatile: "The optimizers must not change the number of volatile operations or change their order of execution relative to other volatile operations. The optimizers may change the order of volatile operations relative to non-volatile operations." -- http://llvm.org/docs/LangRef.html#volatile In reality, people non-standardly use volatile pointers in C/C++ to tell the compiler to not optimize something, and LLVM politely supports this. Nick PS. I might be guilty of using a volatile pointer to malloc'd memory that has no other users, just to keep the malloc call around. Might be. From samsonov at google.com Mon Jul 9 08:21:39 2012 From: samsonov at google.com (Alexey Samsonov) Date: Mon, 09 Jul 2012 13:21:39 -0000 Subject: [llvm-commits] [compiler-rt] r159928 - in /compiler-rt/trunk/lib: sanitizer_common/sanitizer_flags.cc sanitizer_common/sanitizer_flags.h tsan/Makefile.old tsan/rtl/tsan_flags.cc tsan/unit_tests/tsan_flags_test.cc Message-ID: <20120709132139.D426F2A6C069@llvm.org> Author: samsonov Date: Mon Jul 9 08:21:39 2012 New Revision: 159928 URL: http://llvm.org/viewvc/llvm-project?rev=159928&view=rev Log: [Sanitizer] move flag parsing routines (and unit tests) from tsan runtime to common runtime. Added: compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.cc compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.h Modified: compiler-rt/trunk/lib/tsan/Makefile.old compiler-rt/trunk/lib/tsan/rtl/tsan_flags.cc compiler-rt/trunk/lib/tsan/unit_tests/tsan_flags_test.cc Added: compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.cc?rev=159928&view=auto ============================================================================== --- compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.cc (added) +++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.cc Mon Jul 9 08:21:39 2012 @@ -0,0 +1,82 @@ +//===-- sanitizer_flags.cc ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer/AddressSanitizer runtime. +// +//===----------------------------------------------------------------------===// + +#include "sanitizer_flags.h" + +#include "sanitizer_common.h" +#include "sanitizer_libc.h" + +namespace __sanitizer { + +static char *GetFlagValue(const char *env, const char *name) { + if (env == 0) + return 0; + const char *pos = internal_strstr(env, name); + const char *end; + if (pos == 0) + return 0; + pos += internal_strlen(name); + if (pos[0] != '=') { + end = pos; + } else { + pos += 1; + if (pos[0] == '"') { + pos += 1; + end = internal_strchr(pos, '"'); + } else if (pos[0] == '\'') { + pos += 1; + end = internal_strchr(pos, '\''); + } else { + end = internal_strchr(pos, ' '); + } + if (end == 0) + end = pos + internal_strlen(pos); + } + int len = end - pos; + char *f = (char*)InternalAlloc(len + 1); + internal_memcpy(f, pos, len); + f[len] = '\0'; + return f; +} + +void ParseFlag(const char *env, bool *flag, const char *name) { + char *val = GetFlagValue(env, name); + if (val == 0) + return; + if (0 == internal_strcmp(val, "0") || + 0 == internal_strcmp(val, "no") || + 0 == internal_strcmp(val, "false")) + *flag = false; + if (0 == internal_strcmp(val, "1") || + 0 == internal_strcmp(val, "yes") || + 0 == internal_strcmp(val, "true")) + *flag = true; + InternalFree(val); +} + +void ParseFlag(const char *env, int *flag, const char *name) { + char *val = GetFlagValue(env, name); + if (val == 0) + return; + *flag = internal_atoll(val); + InternalFree(val); +} + +void ParseFlag(const char *env, const char **flag, const char *name) { + const char *val = GetFlagValue(env, name); + if (val == 0) + return; + *flag = val; +} + +} // namespace __sanitizer Added: compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.h?rev=159928&view=auto ============================================================================== --- compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.h (added) +++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_flags.h Mon Jul 9 08:21:39 2012 @@ -0,0 +1,27 @@ +//===-- sanitizer_flags.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer/AddressSanitizer runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef SANITIZER_FLAGS_H +#define SANITIZER_FLAGS_H + +#include "sanitizer_common/sanitizer_internal_defs.h" + +namespace __sanitizer { + +void ParseFlag(const char *env, bool *flag, const char *name); +void ParseFlag(const char *env, int *flag, const char *name); +void ParseFlag(const char *env, const char **flag, const char *name); + +} // namespace __sanitizer + +#endif // SANITIZER_FLAGS_H Modified: compiler-rt/trunk/lib/tsan/Makefile.old URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/Makefile.old?rev=159928&r1=159927&r2=159928&view=diff ============================================================================== --- compiler-rt/trunk/lib/tsan/Makefile.old (original) +++ compiler-rt/trunk/lib/tsan/Makefile.old Mon Jul 9 08:21:39 2012 @@ -43,7 +43,7 @@ libtsan: $(MAKE) -C rtl -f Makefile.old DEBUG=$(DEBUG) -%.o: %.cc $(UNIT_TEST_HDR) +%.o: %.cc $(UNIT_TEST_HDR) libtsan $(CXX) $(CXXFLAGS) $(CFLAGS) $(INCLUDES) -o $@ -c $< tsan_test: $(UNIT_TEST_OBJ) $(RTL_TEST_OBJ) \ Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_flags.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_flags.cc?rev=159928&r1=159927&r2=159928&view=diff ============================================================================== --- compiler-rt/trunk/lib/tsan/rtl/tsan_flags.cc (original) +++ compiler-rt/trunk/lib/tsan/rtl/tsan_flags.cc Mon Jul 9 08:21:39 2012 @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_libc.h" #include "tsan_flags.h" #include "tsan_rtl.h" @@ -18,10 +19,6 @@ namespace __tsan { -static void Flag(const char *env, bool *flag, const char *name); -static void Flag(const char *env, int *flag, const char *name); -static void Flag(const char *env, const char **flag, const char *name); - Flags *flags() { return &CTX()->flags; } @@ -57,92 +54,22 @@ OverrideFlags(f); // Override from command line. - Flag(env, &f->enable_annotations, "enable_annotations"); - Flag(env, &f->suppress_equal_stacks, "suppress_equal_stacks"); - Flag(env, &f->suppress_equal_addresses, "suppress_equal_addresses"); - Flag(env, &f->report_thread_leaks, "report_thread_leaks"); - Flag(env, &f->report_signal_unsafe, "report_signal_unsafe"); - Flag(env, &f->force_seq_cst_atomics, "force_seq_cst_atomics"); - Flag(env, &f->strip_path_prefix, "strip_path_prefix"); - Flag(env, &f->suppressions, "suppressions"); - Flag(env, &f->exitcode, "exitcode"); - Flag(env, &f->log_fileno, "log_fileno"); - Flag(env, &f->atexit_sleep_ms, "atexit_sleep_ms"); - Flag(env, &f->verbosity, "verbosity"); - Flag(env, &f->profile_memory, "profile_memory"); - Flag(env, &f->flush_memory_ms, "flush_memory_ms"); - Flag(env, &f->stop_on_start, "stop_on_start"); - Flag(env, &f->use_internal_symbolizer, "use_internal_symbolizer"); -} - -static const char *GetFlagValue(const char *env, const char *name, - const char **end) { - if (env == 0) - return *end = 0; - const char *pos = internal_strstr(env, name); - if (pos == 0) - return *end = 0; - pos += internal_strlen(name); - if (pos[0] != '=') - return *end = pos; - pos += 1; - if (pos[0] == '"') { - pos += 1; - *end = internal_strchr(pos, '"'); - } else if (pos[0] == '\'') { - pos += 1; - *end = internal_strchr(pos, '\''); - } else { - *end = internal_strchr(pos, ' '); - } - if (*end == 0) - *end = pos + internal_strlen(pos); - return pos; -} - -static void Flag(const char *env, bool *flag, const char *name) { - const char *end = 0; - const char *val = GetFlagValue(env, name, &end); - if (val == 0) - return; - int len = end - val; - if (len == 1 && val[0] == '0') - *flag = false; - else if (len == 1 && val[0] == '1') - *flag = true; -} - -static void Flag(const char *env, int *flag, const char *name) { - const char *end = 0; - const char *val = GetFlagValue(env, name, &end); - if (val == 0) - return; - bool minus = false; - if (val != end && val[0] == '-') { - minus = true; - val += 1; - } - int v = 0; - for (; val != end; val++) { - if (val[0] < '0' || val[0] > '9') - break; - v = v * 10 + val[0] - '0'; - } - if (minus) - v = -v; - *flag = v; -} - -static void Flag(const char *env, const char **flag, const char *name) { - const char *end = 0; - const char *val = GetFlagValue(env, name, &end); - if (val == 0) - return; - int len = end - val; - char *f = (char*)internal_alloc(MBlockFlag, len + 1); - internal_memcpy(f, val, len); - f[len] = 0; - *flag = f; + ParseFlag(env, &f->enable_annotations, "enable_annotations"); + ParseFlag(env, &f->suppress_equal_stacks, "suppress_equal_stacks"); + ParseFlag(env, &f->suppress_equal_addresses, "suppress_equal_addresses"); + ParseFlag(env, &f->report_thread_leaks, "report_thread_leaks"); + ParseFlag(env, &f->report_signal_unsafe, "report_signal_unsafe"); + ParseFlag(env, &f->force_seq_cst_atomics, "force_seq_cst_atomics"); + ParseFlag(env, &f->strip_path_prefix, "strip_path_prefix"); + ParseFlag(env, &f->suppressions, "suppressions"); + ParseFlag(env, &f->exitcode, "exitcode"); + ParseFlag(env, &f->log_fileno, "log_fileno"); + ParseFlag(env, &f->atexit_sleep_ms, "atexit_sleep_ms"); + ParseFlag(env, &f->verbosity, "verbosity"); + ParseFlag(env, &f->profile_memory, "profile_memory"); + ParseFlag(env, &f->flush_memory_ms, "flush_memory_ms"); + ParseFlag(env, &f->stop_on_start, "stop_on_start"); + ParseFlag(env, &f->use_internal_symbolizer, "use_internal_symbolizer"); } } // namespace __tsan Modified: compiler-rt/trunk/lib/tsan/unit_tests/tsan_flags_test.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/unit_tests/tsan_flags_test.cc?rev=159928&r1=159927&r2=159928&view=diff ============================================================================== --- compiler-rt/trunk/lib/tsan/unit_tests/tsan_flags_test.cc (original) +++ compiler-rt/trunk/lib/tsan/unit_tests/tsan_flags_test.cc Mon Jul 9 08:21:39 2012 @@ -24,78 +24,15 @@ InitializeFlags(&f, ""); } -TEST(Flags, ParseBool) { +TEST(Flags, DefaultValues) { ScopedInRtl in_rtl; Flags f = {}; f.enable_annotations = false; - InitializeFlags(&f, "enable_annotations"); - EXPECT_EQ(f.enable_annotations, true); - - f.enable_annotations = false; - InitializeFlags(&f, "--enable_annotations"); - EXPECT_EQ(f.enable_annotations, true); - - f.enable_annotations = false; - InitializeFlags(&f, "--enable_annotations=1"); - EXPECT_EQ(f.enable_annotations, true); - - // This flag is false by default. - f.force_seq_cst_atomics = false; - InitializeFlags(&f, "--force_seq_cst_atomics=1"); - EXPECT_EQ(f.force_seq_cst_atomics, true); - - f.enable_annotations = true; - InitializeFlags(&f, "asdas enable_annotations=0 asdasd"); - EXPECT_EQ(f.enable_annotations, false); - - f.enable_annotations = true; - InitializeFlags(&f, " --enable_annotations=0 "); - EXPECT_EQ(f.enable_annotations, false); -} - -TEST(Flags, ParseInt) { - ScopedInRtl in_rtl; - Flags f = {}; - f.exitcode = -11; - InitializeFlags(&f, "exitcode"); - EXPECT_EQ(f.exitcode, 0); - - f.exitcode = -11; - InitializeFlags(&f, "--exitcode="); - EXPECT_EQ(f.exitcode, 0); - - f.exitcode = -11; - InitializeFlags(&f, "--exitcode=42"); - EXPECT_EQ(f.exitcode, 42); - - f.exitcode = -11; - InitializeFlags(&f, "--exitcode=-42"); - EXPECT_EQ(f.exitcode, -42); -} - -TEST(Flags, ParseStr) { - ScopedInRtl in_rtl; - Flags f = {}; - - InitializeFlags(&f, 0); - EXPECT_EQ(0, strcmp(f.strip_path_prefix, "")); - - InitializeFlags(&f, "strip_path_prefix"); - EXPECT_EQ(0, strcmp(f.strip_path_prefix, "")); - - InitializeFlags(&f, "--strip_path_prefix="); - EXPECT_EQ(0, strcmp(f.strip_path_prefix, "")); - - InitializeFlags(&f, "--strip_path_prefix=abc"); - EXPECT_EQ(0, strcmp(f.strip_path_prefix, "abc")); - - InitializeFlags(&f, "--strip_path_prefix='abc zxc'"); - EXPECT_EQ(0, strcmp(f.strip_path_prefix, "abc zxc")); - - InitializeFlags(&f, "--strip_path_prefix=\"abc zxc\""); - EXPECT_EQ(0, strcmp(f.strip_path_prefix, "abc zxc")); + InitializeFlags(&f, ""); + EXPECT_EQ(66, f.exitcode); + EXPECT_EQ(true, f.enable_annotations); } } // namespace __tsan From samsonov at google.com Mon Jul 9 09:36:05 2012 From: samsonov at google.com (Alexey Samsonov) Date: Mon, 09 Jul 2012 14:36:05 -0000 Subject: [llvm-commits] [compiler-rt] r159933 - in /compiler-rt/trunk/lib/asan: asan_allocator.cc asan_globals.cc asan_interceptors.cc asan_internal.h asan_linux.cc asan_mac.cc asan_malloc_mac.cc asan_poisoning.cc asan_posix.cc asan_rtl.cc asan_stack.cc asan_stack.h asan_thread.cc asan_thread_registry.cc Message-ID: <20120709143605.831CE2A6C069@llvm.org> Author: samsonov Date: Mon Jul 9 09:36:04 2012 New Revision: 159933 URL: http://llvm.org/viewvc/llvm-project?rev=159933&view=rev Log: [ASan] Use common flags parsing machinery. Modified: compiler-rt/trunk/lib/asan/asan_allocator.cc compiler-rt/trunk/lib/asan/asan_globals.cc compiler-rt/trunk/lib/asan/asan_interceptors.cc compiler-rt/trunk/lib/asan/asan_internal.h compiler-rt/trunk/lib/asan/asan_linux.cc compiler-rt/trunk/lib/asan/asan_mac.cc compiler-rt/trunk/lib/asan/asan_malloc_mac.cc compiler-rt/trunk/lib/asan/asan_poisoning.cc compiler-rt/trunk/lib/asan/asan_posix.cc compiler-rt/trunk/lib/asan/asan_rtl.cc compiler-rt/trunk/lib/asan/asan_stack.cc compiler-rt/trunk/lib/asan/asan_stack.h compiler-rt/trunk/lib/asan/asan_thread.cc compiler-rt/trunk/lib/asan/asan_thread_registry.cc Modified: compiler-rt/trunk/lib/asan/asan_allocator.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_allocator.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_allocator.cc (original) +++ compiler-rt/trunk/lib/asan/asan_allocator.cc Mon Jul 9 09:36:04 2012 @@ -42,7 +42,7 @@ namespace __asan { -#define REDZONE FLAG_redzone +#define REDZONE (flags()->redzone) static const uptr kMinAllocSize = REDZONE * 2; static const u64 kMaxAvailableRam = 128ULL << 30; // 128G static const uptr kMaxThreadLocalQuarantine = 1 << 20; // 1M @@ -134,7 +134,7 @@ CHECK(IsAligned(size, kPageSize)); u8 *res = (u8*)MmapOrDie(size, __FUNCTION__); PoisonShadow((uptr)res, size, kAsanHeapLeftRedzoneMagic); - if (FLAG_debug) { + if (flags()->debug) { Printf("ASAN_MMAP: [%p, %p)\n", res, res + size); } return res; @@ -186,7 +186,7 @@ return (u32*)((uptr)this + sizeof(ChunkBase)); } u32 *compressed_free_stack() { - return (u32*)((uptr)this + Max(REDZONE, (uptr)sizeof(ChunkBase))); + return (u32*)((uptr)this + Max((uptr)REDZONE, (uptr)sizeof(ChunkBase))); } // The left redzone after the ChunkBase is given to the alloc stack trace. @@ -339,12 +339,12 @@ void SwallowThreadLocalMallocStorage(AsanThreadLocalMallocStorage *x, bool eat_free_lists) { - CHECK(FLAG_quarantine_size > 0); + CHECK(flags()->quarantine_size > 0); ScopedLock lock(&mu_); AsanChunkFifoList *q = &x->quarantine_; if (q->size() > 0) { quarantine_.PushList(q); - while (quarantine_.size() > FLAG_quarantine_size) { + while (quarantine_.size() > flags()->quarantine_size) { QuarantinePop(); } } @@ -644,7 +644,7 @@ CHECK(size_to_allocate >= needed_size); CHECK(IsAligned(size_to_allocate, REDZONE)); - if (FLAG_v >= 3) { + if (flags()->verbosity >= 3) { Printf("Allocate align: %zu size: %zu class: %u real: %zu\n", alignment, size, size_class, size_to_allocate); } @@ -704,7 +704,7 @@ PoisonHeapPartialRightRedzone(addr + rounded_size - REDZONE, size & (REDZONE - 1)); } - if (size <= FLAG_max_malloc_fill_size) { + if (size <= flags()->max_malloc_fill_size) { REAL(memset)((void*)addr, 0, rounded_size); } return (u8*)addr; @@ -714,7 +714,7 @@ if (!ptr) return; CHECK(stack); - if (FLAG_debug) { + if (flags()->debug) { CHECK(malloc_info.FindPageGroup((uptr)ptr)); } @@ -882,7 +882,7 @@ CHECK(stack); if (ptr == 0) return 0; uptr usable_size = malloc_info.AllocationSize((uptr)ptr); - if (FLAG_check_malloc_usable_size && (usable_size == 0)) { + if (flags()->check_malloc_usable_size && (usable_size == 0)) { AsanReport("ERROR: AddressSanitizer attempting to call " "malloc_usable_size() for pointer which is " "not owned: %p\n", ptr); @@ -1055,7 +1055,7 @@ using namespace __asan; // NOLINT uptr __asan_stack_malloc(uptr size, uptr real_stack) { - if (!FLAG_use_fake_stack) return real_stack; + if (!flags()->use_fake_stack) return real_stack; AsanThread *t = asanThreadRegistry().GetCurrent(); if (!t) { // TSD is gone, use the real stack. @@ -1067,7 +1067,7 @@ } void __asan_stack_free(uptr ptr, uptr size, uptr real_stack) { - if (!FLAG_use_fake_stack) return; + if (!flags()->use_fake_stack) return; if (ptr != real_stack) { FakeStack::OnFree(ptr, size, real_stack); } Modified: compiler-rt/trunk/lib/asan/asan_globals.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_globals.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_globals.cc (original) +++ compiler-rt/trunk/lib/asan/asan_globals.cc Mon Jul 9 09:36:04 2012 @@ -88,12 +88,12 @@ bool DescribeAddrIfGlobal(uptr addr) { - if (!FLAG_report_globals) return false; + if (!flags()->report_globals) return false; ScopedLock lock(&mu_for_globals); bool res = false; for (ListOfGlobals *l = list_of_globals; l; l = l->next) { const Global &g = *l->g; - if (FLAG_report_globals >= 2) + if (flags()->report_globals >= 2) AsanPrintf("Search Global: beg=%p size=%zu name=%s\n", (void*)g.beg, g.size, (char*)g.name); res |= DescribeAddrIfMyRedZone(g, addr); @@ -106,7 +106,7 @@ // so we store the globals in a map. static void RegisterGlobal(const Global *g) { CHECK(asan_inited); - CHECK(FLAG_report_globals); + CHECK(flags()->report_globals); CHECK(AddrIsInMem(g->beg)); CHECK(AddrIsAlignedByGranularity(g->beg)); CHECK(AddrIsAlignedByGranularity(g->size_with_redzone)); @@ -116,14 +116,14 @@ l->g = g; l->next = list_of_globals; list_of_globals = l; - if (FLAG_report_globals >= 2) + if (flags()->report_globals >= 2) Report("Added Global: beg=%p size=%zu name=%s\n", (void*)g->beg, g->size, g->name); } static void UnregisterGlobal(const Global *g) { CHECK(asan_inited); - CHECK(FLAG_report_globals); + CHECK(flags()->report_globals); CHECK(AddrIsInMem(g->beg)); CHECK(AddrIsAlignedByGranularity(g->beg)); CHECK(AddrIsAlignedByGranularity(g->size_with_redzone)); @@ -141,7 +141,7 @@ // Register one global with a default redzone. void __asan_register_global(uptr addr, uptr size, const char *name) { - if (!FLAG_report_globals) return; + if (!flags()->report_globals) return; ScopedLock lock(&mu_for_globals); Global *g = (Global *)allocator_for_globals.Allocate(sizeof(Global)); g->beg = addr; @@ -153,7 +153,7 @@ // Register an array of globals. void __asan_register_globals(__asan_global *globals, uptr n) { - if (!FLAG_report_globals) return; + if (!flags()->report_globals) return; ScopedLock lock(&mu_for_globals); for (uptr i = 0; i < n; i++) { RegisterGlobal(&globals[i]); @@ -163,7 +163,7 @@ // Unregister an array of globals. // We must do it when a shared objects gets dlclosed. void __asan_unregister_globals(__asan_global *globals, uptr n) { - if (!FLAG_report_globals) return; + if (!flags()->report_globals) return; ScopedLock lock(&mu_for_globals); for (uptr i = 0; i < n; i++) { UnregisterGlobal(&globals[i]); Modified: compiler-rt/trunk/lib/asan/asan_interceptors.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_interceptors.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_interceptors.cc (original) +++ compiler-rt/trunk/lib/asan/asan_interceptors.cc Mon Jul 9 09:36:04 2012 @@ -321,7 +321,7 @@ return REAL(memcpy)(to, from, size); } ENSURE_ASAN_INITED(); - if (FLAG_replace_intrin) { + if (flags()->replace_intrin) { if (to != from) { // We do not treat memcpy with to==from as a bug. // See http://llvm.org/bugs/show_bug.cgi?id=11763. @@ -338,7 +338,7 @@ return REAL(memmove)(to, from, size); } ENSURE_ASAN_INITED(); - if (FLAG_replace_intrin) { + if (flags()->replace_intrin) { ASAN_WRITE_RANGE(from, size); ASAN_READ_RANGE(to, size); } @@ -351,7 +351,7 @@ return REAL(memset)(block, c, size); } ENSURE_ASAN_INITED(); - if (FLAG_replace_intrin) { + if (flags()->replace_intrin) { ASAN_WRITE_RANGE(block, size); } return REAL(memset)(block, c, size); @@ -360,7 +360,7 @@ INTERCEPTOR(char*, strchr, const char *str, int c) { ENSURE_ASAN_INITED(); char *result = REAL(strchr)(str, c); - if (FLAG_replace_str) { + if (flags()->replace_str) { uptr bytes_read = (result ? result - str : REAL(strlen)(str)) + 1; ASAN_READ_RANGE(str, bytes_read); } @@ -390,7 +390,7 @@ INTERCEPTOR(char*, strcat, char *to, const char *from) { // NOLINT ENSURE_ASAN_INITED(); - if (FLAG_replace_str) { + if (flags()->replace_str) { uptr from_length = REAL(strlen)(from); ASAN_READ_RANGE(from, from_length + 1); if (from_length > 0) { @@ -405,7 +405,7 @@ INTERCEPTOR(char*, strncat, char *to, const char *from, uptr size) { ENSURE_ASAN_INITED(); - if (FLAG_replace_str && size > 0) { + if (flags()->replace_str && size > 0) { uptr from_length = MaybeRealStrnlen(from, size); ASAN_READ_RANGE(from, Min(size, from_length + 1)); uptr to_length = REAL(strlen)(to); @@ -442,7 +442,7 @@ return REAL(strcpy)(to, from); // NOLINT } ENSURE_ASAN_INITED(); - if (FLAG_replace_str) { + if (flags()->replace_str) { uptr from_size = REAL(strlen)(from) + 1; CHECK_RANGES_OVERLAP("strcpy", to, from_size, from, from_size); ASAN_READ_RANGE(from, from_size); @@ -453,7 +453,7 @@ INTERCEPTOR(char*, strdup, const char *s) { ENSURE_ASAN_INITED(); - if (FLAG_replace_str) { + if (flags()->replace_str) { uptr length = REAL(strlen)(s); ASAN_READ_RANGE(s, length + 1); } @@ -468,7 +468,7 @@ } ENSURE_ASAN_INITED(); uptr length = REAL(strlen)(s); - if (FLAG_replace_str) { + if (flags()->replace_str) { ASAN_READ_RANGE(s, length + 1); } return length; @@ -508,7 +508,7 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) { ENSURE_ASAN_INITED(); - if (FLAG_replace_str) { + if (flags()->replace_str) { uptr from_size = Min(size, MaybeRealStrnlen(from, size) + 1); CHECK_RANGES_OVERLAP("strncpy", to, from_size, from, from_size); ASAN_READ_RANGE(from, from_size); @@ -521,7 +521,7 @@ INTERCEPTOR(uptr, strnlen, const char *s, uptr maxlen) { ENSURE_ASAN_INITED(); uptr length = REAL(strnlen)(s, maxlen); - if (FLAG_replace_str) { + if (flags()->replace_str) { ASAN_READ_RANGE(s, Min(length + 1, maxlen)); } return length; @@ -548,7 +548,7 @@ INTERCEPTOR(long, strtol, const char *nptr, // NOLINT char **endptr, int base) { ENSURE_ASAN_INITED(); - if (!FLAG_replace_str) { + if (!flags()->replace_str) { return REAL(strtol)(nptr, endptr, base); } char *real_endptr; @@ -565,7 +565,7 @@ INTERCEPTOR(int, atoi, const char *nptr) { ENSURE_ASAN_INITED(); - if (!FLAG_replace_str) { + if (!flags()->replace_str) { return REAL(atoi)(nptr); } char *real_endptr; @@ -581,7 +581,7 @@ INTERCEPTOR(long, atol, const char *nptr) { // NOLINT ENSURE_ASAN_INITED(); - if (!FLAG_replace_str) { + if (!flags()->replace_str) { return REAL(atol)(nptr); } char *real_endptr; @@ -595,7 +595,7 @@ INTERCEPTOR(long long, strtoll, const char *nptr, // NOLINT char **endptr, int base) { ENSURE_ASAN_INITED(); - if (!FLAG_replace_str) { + if (!flags()->replace_str) { return REAL(strtoll)(nptr, endptr, base); } char *real_endptr; @@ -615,7 +615,7 @@ INTERCEPTOR(long long, atoll, const char *nptr) { // NOLINT ENSURE_ASAN_INITED(); - if (!FLAG_replace_str) { + if (!flags()->replace_str) { return REAL(atoll)(nptr); } char *real_endptr; @@ -627,7 +627,7 @@ #endif // ASAN_INTERCEPT_ATOLL_AND_STRTOLL #define ASAN_INTERCEPT_FUNC(name) do { \ - if (!INTERCEPT_FUNCTION(name) && FLAG_v > 0) \ + if (!INTERCEPT_FUNCTION(name) && flags()->verbosity > 0) \ Report("AddressSanitizer: failed to intercept '" #name "'\n"); \ } while (0) @@ -731,7 +731,7 @@ InitializeMacInterceptors(); #endif - if (FLAG_v > 0) { + if (flags()->verbosity > 0) { Report("AddressSanitizer: libc interceptors initialized\n"); } } Modified: compiler-rt/trunk/lib/asan/asan_internal.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_internal.h?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_internal.h (original) +++ compiler-rt/trunk/lib/asan/asan_internal.h Mon Jul 9 09:36:04 2012 @@ -136,30 +136,34 @@ # define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE true #endif // __APPLE__ -extern uptr FLAG_quarantine_size; -extern s64 FLAG_demangle; -extern bool FLAG_symbolize; -extern s64 FLAG_v; -extern uptr FLAG_redzone; -extern s64 FLAG_debug; -extern bool FLAG_poison_shadow; -extern s64 FLAG_report_globals; -extern uptr FLAG_malloc_context_size; -extern bool FLAG_replace_str; -extern bool FLAG_replace_intrin; -extern bool FLAG_replace_cfallocator; -extern bool FLAG_mac_ignore_invalid_free; -extern bool FLAG_fast_unwind; -extern bool FLAG_use_fake_stack; -extern uptr FLAG_max_malloc_fill_size; -extern s64 FLAG_exitcode; -extern bool FLAG_allow_user_poisoning; -extern s64 FLAG_sleep_before_dying; -extern bool FLAG_handle_segv; -extern bool FLAG_use_sigaltstack; -extern bool FLAG_check_malloc_usable_size; -extern bool FLAG_unmap_shadow_on_exit; -extern bool FLAG_abort_on_error; +struct Flags { + int quarantine_size; + bool symbolize; + int verbosity; + int redzone; + int debug; + bool poison_shadow; + int report_globals; + int malloc_context_size; + bool replace_str; + bool replace_intrin; + bool replace_cfallocator; + bool mac_ignore_invalid_free; + bool use_fake_stack; + int max_malloc_fill_size; + int exitcode; + bool allow_user_poisoning; + int sleep_before_dying; + bool handle_segv; + bool use_sigaltstack; + bool check_malloc_usable_size; + bool unmap_shadow_on_exit; + bool abort_on_error; + bool atexit; + bool disable_core; +}; +Flags *flags(); +void InitializeFlags(Flags *f, const char *env); extern int asan_inited; // Used to avoid infinite recursion in __asan_init(). Modified: compiler-rt/trunk/lib/asan/asan_linux.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_linux.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_linux.cc (original) +++ compiler-rt/trunk/lib/asan/asan_linux.cc Mon Jul 9 09:36:04 2012 @@ -69,7 +69,7 @@ } bool AsanInterceptsSignal(int signum) { - return signum == SIGSEGV && FLAG_handle_segv; + return signum == SIGSEGV && flags()->handle_segv; } AsanLock::AsanLock(LinkerInitialized) { Modified: compiler-rt/trunk/lib/asan/asan_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_mac.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_mac.cc Mon Jul 9 09:36:04 2012 @@ -89,7 +89,7 @@ } bool AsanInterceptsSignal(int signum) { - return (signum == SIGSEGV || signum == SIGBUS) && FLAG_handle_segv; + return (signum == SIGSEGV || signum == SIGBUS) && flags()->handle_segv; } AsanLock::AsanLock(LinkerInitialized) { @@ -149,7 +149,7 @@ if (island_allocator_pos != (void*)kIslandBeg) { return KERN_NO_SPACE; } - if (FLAG_v) { + if (flags()->verbosity) { Report("Mapped pages %p--%p for branch islands.\n", (void*)kIslandBeg, (void*)kIslandEnd); } @@ -158,7 +158,7 @@ }; *ptr = island_allocator_pos; island_allocator_pos = (char*)island_allocator_pos + kPageSize; - if (FLAG_v) { + if (flags()->verbosity) { Report("Branch island allocated at %p\n", *ptr); } return err_none; @@ -241,7 +241,7 @@ void asan_dispatch_call_block_and_release(void *block) { GET_STACK_TRACE_HERE(kStackTraceMax); asan_block_context_t *context = (asan_block_context_t*)block; - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("asan_dispatch_call_block_and_release(): " "context: %p, pthread_self: %p\n", block, pthread_self()); @@ -280,7 +280,7 @@ dispatch_function_t func) { GET_STACK_TRACE_HERE(kStackTraceMax); asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("dispatch_async_f(): context: %p, pthread_self: %p\n", asan_ctxt, pthread_self()); PRINT_CURRENT_STACK(); @@ -293,7 +293,7 @@ dispatch_function_t func) { GET_STACK_TRACE_HERE(kStackTraceMax); asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("dispatch_sync_f(): context: %p, pthread_self: %p\n", asan_ctxt, pthread_self()); PRINT_CURRENT_STACK(); @@ -307,7 +307,7 @@ dispatch_function_t func) { GET_STACK_TRACE_HERE(kStackTraceMax); asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("dispatch_after_f: %p\n", asan_ctxt); PRINT_CURRENT_STACK(); } @@ -319,7 +319,7 @@ dispatch_function_t func) { GET_STACK_TRACE_HERE(kStackTraceMax); asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("dispatch_barrier_async_f(): context: %p, pthread_self: %p\n", asan_ctxt, pthread_self()); PRINT_CURRENT_STACK(); @@ -333,7 +333,7 @@ dispatch_function_t func) { GET_STACK_TRACE_HERE(kStackTraceMax); asan_block_context_t *asan_ctxt = alloc_asan_context(ctxt, func, &stack); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("dispatch_group_async_f(): context: %p, pthread_self: %p\n", asan_ctxt, pthread_self()); PRINT_CURRENT_STACK(); @@ -350,7 +350,7 @@ // libdispatch API. extern "C" void *wrap_workitem_func(void *arg) { - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("wrap_workitem_func: %p, pthread_self: %p\n", arg, pthread_self()); } asan_block_context_t *ctxt = (asan_block_context_t*)arg; @@ -370,7 +370,7 @@ asan_ctxt->block = workitem_arg; asan_ctxt->func = (dispatch_function_t)workitem_func; asan_ctxt->parent_tid = asanThreadRegistry().GetCurrentTidOrInvalid(); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("pthread_workqueue_additem_np: %p\n", asan_ctxt); PRINT_CURRENT_STACK(); } @@ -415,7 +415,7 @@ // We don't need to intercept pthread_workqueue_additem_np() to support the // libdispatch API, but it helps us to debug the unsupported functions. Let's // intercept it only during verbose runs. - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { CHECK(INTERCEPT_FUNCTION(pthread_workqueue_additem_np)); } // Normally CFStringCreateCopy should not copy constant CF strings. @@ -429,7 +429,7 @@ // Some of the library functions call free() directly, so we have to // intercept it. CHECK(INTERCEPT_FUNCTION(free)); - if (FLAG_replace_cfallocator) { + if (flags()->replace_cfallocator) { CHECK(INTERCEPT_FUNCTION(__CFInitialize)); } } Modified: compiler-rt/trunk/lib/asan/asan_malloc_mac.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_malloc_mac.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_malloc_mac.cc (original) +++ compiler-rt/trunk/lib/asan/asan_malloc_mac.cc Mon Jul 9 09:36:04 2012 @@ -65,7 +65,7 @@ malloc_zone_free(zone, ptr); #endif } else { - if (FLAG_replace_cfallocator) { + if (flags()->replace_cfallocator) { // Make sure we're not hitting the previous page. This may be incorrect // if ASan's malloc returns an address ending with 0xFF8, which will be // then padded to a page boundary with a CFAllocatorRef. @@ -95,7 +95,7 @@ // See http://code.google.com/p/address-sanitizer/issues/detail?id=87 // and http://opensource.apple.com/source/CF/CF-550.43/CFRuntime.c INTERCEPTOR(void, __CFInitialize) { - CHECK(FLAG_replace_cfallocator); + CHECK(flags()->replace_cfallocator); CHECK(asan_inited); REAL(__CFInitialize)(); if (!cf_asan) ReplaceCFAllocator(); @@ -169,7 +169,7 @@ void ALWAYS_INLINE free_common(void *context, void *ptr) { if (!ptr) return; - if (!FLAG_mac_ignore_invalid_free || asan_mz_size(ptr)) { + if (!flags()->mac_ignore_invalid_free || asan_mz_size(ptr)) { GET_STACK_TRACE_HERE_FOR_FREE(ptr); asan_free(ptr, &stack); } else { @@ -403,7 +403,7 @@ // Make sure the default allocator was replaced. CHECK(malloc_default_zone() == &asan_zone); - if (FLAG_replace_cfallocator) { + if (flags()->replace_cfallocator) { // If __CFInitialize() hasn't been called yet, cf_asan will be created and // installed as the default allocator after __CFInitialize() finishes (see // the interceptor for __CFInitialize() above). Otherwise install cf_asan Modified: compiler-rt/trunk/lib/asan/asan_poisoning.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_poisoning.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_poisoning.cc (original) +++ compiler-rt/trunk/lib/asan/asan_poisoning.cc Mon Jul 9 09:36:04 2012 @@ -75,10 +75,10 @@ // * if user asks to unpoison region [left, right), the program unpoisons // at most [AlignDown(left), right). void __asan_poison_memory_region(void const volatile *addr, uptr size) { - if (!FLAG_allow_user_poisoning || size == 0) return; + if (!flags()->allow_user_poisoning || size == 0) return; uptr beg_addr = (uptr)addr; uptr end_addr = beg_addr + size; - if (FLAG_v >= 1) { + if (flags()->verbosity >= 1) { Printf("Trying to poison memory region [%p, %p)\n", (void*)beg_addr, (void*)end_addr); } @@ -117,10 +117,10 @@ } void __asan_unpoison_memory_region(void const volatile *addr, uptr size) { - if (!FLAG_allow_user_poisoning || size == 0) return; + if (!flags()->allow_user_poisoning || size == 0) return; uptr beg_addr = (uptr)addr; uptr end_addr = beg_addr + size; - if (FLAG_v >= 1) { + if (flags()->verbosity >= 1) { Printf("Trying to unpoison memory region [%p, %p)\n", (void*)beg_addr, (void*)end_addr); } Modified: compiler-rt/trunk/lib/asan/asan_posix.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_posix.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_posix.cc (original) +++ compiler-rt/trunk/lib/asan/asan_posix.cc Mon Jul 9 09:36:04 2012 @@ -40,9 +40,9 @@ REAL(memset)(&sigact, 0, sizeof(sigact)); sigact.sa_sigaction = handler; sigact.sa_flags = SA_SIGINFO; - if (FLAG_use_sigaltstack) sigact.sa_flags |= SA_ONSTACK; + if (flags()->use_sigaltstack) sigact.sa_flags |= SA_ONSTACK; CHECK(0 == REAL(sigaction)(signum, &sigact, 0)); - if (FLAG_v >= 1) { + if (flags()->verbosity >= 1) { Report("Installed the sigaction for signal %d\n", signum); } } @@ -76,7 +76,7 @@ altstack.ss_flags = 0; altstack.ss_size = kAltStackSize; CHECK(0 == sigaltstack(&altstack, 0)); - if (FLAG_v > 0) { + if (flags()->verbosity > 0) { Report("Alternative stack for T%d set: [%p,%p)\n", asanThreadRegistry().GetCurrentTidOrInvalid(), altstack.ss_sp, (char*)altstack.ss_sp + altstack.ss_size); @@ -96,7 +96,7 @@ // Set the alternate signal stack for the main thread. // This will cause SetAlternateSignalStack to be called twice, but the stack // will be actually set only once. - if (FLAG_use_sigaltstack) SetAlternateSignalStack(); + if (flags()->use_sigaltstack) SetAlternateSignalStack(); MaybeInstallSigaction(SIGSEGV, ASAN_OnSIGSEGV); MaybeInstallSigaction(SIGBUS, ASAN_OnSIGSEGV); } Modified: compiler-rt/trunk/lib/asan/asan_rtl.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_rtl.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_rtl.cc (original) +++ compiler-rt/trunk/lib/asan/asan_rtl.cc Mon Jul 9 09:36:04 2012 @@ -22,6 +22,7 @@ #include "asan_thread.h" #include "asan_thread_registry.h" #include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_libc.h" namespace __sanitizer { @@ -33,17 +34,17 @@ // Don't die twice - run a busy loop. while (1) { } } - if (FLAG_sleep_before_dying) { - Report("Sleeping for %zd second(s)\n", FLAG_sleep_before_dying); - SleepForSeconds(FLAG_sleep_before_dying); + if (flags()->sleep_before_dying) { + Report("Sleeping for %zd second(s)\n", flags()->sleep_before_dying); + SleepForSeconds(flags()->sleep_before_dying); } - if (FLAG_unmap_shadow_on_exit) + if (flags()->unmap_shadow_on_exit) UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg); if (death_callback) death_callback(); - if (FLAG_abort_on_error) + if (flags()->abort_on_error) Abort(); - Exit(FLAG_exitcode); + Exit(flags()->exitcode); } void CheckFailed(const char *file, int line, const char *cond, u64 v1, u64 v2) { @@ -60,31 +61,97 @@ // -------------------------- Flags ------------------------- {{{1 static const uptr kMallocContextSize = 30; -uptr FLAG_malloc_context_size = kMallocContextSize; -uptr FLAG_max_malloc_fill_size = 0; -s64 FLAG_v = 0; -uptr FLAG_redzone = (ASAN_LOW_MEMORY) ? 64 : 128; // power of two, >= 32 -uptr FLAG_quarantine_size = (ASAN_LOW_MEMORY) ? 1UL << 24 : 1UL << 28; -static s64 FLAG_atexit = 0; -bool FLAG_poison_shadow = 1; -s64 FLAG_report_globals = 1; -bool FLAG_handle_segv = ASAN_NEEDS_SEGV; -bool FLAG_use_sigaltstack = 0; -bool FLAG_symbolize = 0; -s64 FLAG_demangle = 1; -s64 FLAG_debug = 0; -bool FLAG_replace_cfallocator = 1; // Used on Mac only. -bool FLAG_mac_ignore_invalid_free = 0; // Used on Mac only. -bool FLAG_replace_str = 1; -bool FLAG_replace_intrin = 1; -bool FLAG_use_fake_stack = 1; -s64 FLAG_exitcode = ASAN_DEFAULT_FAILURE_EXITCODE; -bool FLAG_allow_user_poisoning = 1; -s64 FLAG_sleep_before_dying = 0; -bool FLAG_abort_on_error = 0; -bool FLAG_unmap_shadow_on_exit = 0; -bool FLAG_disable_core = __WORDSIZE == 64; -bool FLAG_check_malloc_usable_size = 1; +static Flags asan_flags; + +Flags *flags() { + return &asan_flags; +} + +// Can be overriden in frontend. +void WEAK OverrideFlags(Flags *f) { + (void)f; +} + +static void ParseFlagsFromString(Flags *f, const char *str) { + ParseFlag(str, &f->quarantine_size, "quarantine_size"); + ParseFlag(str, &f->symbolize, "symbolize"); + ParseFlag(str, &f->verbosity, "verbosity"); + ParseFlag(str, &f->redzone, "redzone"); + CHECK(f->redzone >= 16); + CHECK(IsPowerOfTwo(f->redzone)); + + ParseFlag(str, &f->debug, "debug"); + ParseFlag(str, &f->poison_shadow, "poison_shadow"); + ParseFlag(str, &f->report_globals, "report_globals"); + ParseFlag(str, &f->malloc_context_size, "malloc_context_size"); + CHECK(f->malloc_context_size <= kMallocContextSize); + + ParseFlag(str, &f->replace_str, "replace_str"); + ParseFlag(str, &f->replace_intrin, "replace_intrin"); + ParseFlag(str, &f->replace_cfallocator, "replace_cfallocator"); + ParseFlag(str, &f->mac_ignore_invalid_free, "mac_ignore_invalid_free"); + ParseFlag(str, &f->use_fake_stack, "use_fake_stack"); + ParseFlag(str, &f->max_malloc_fill_size, "max_malloc_fill_size"); + ParseFlag(str, &f->exitcode, "exitcode"); + ParseFlag(str, &f->allow_user_poisoning, "allow_user_poisoning"); + ParseFlag(str, &f->sleep_before_dying, "sleep_before_dying"); + ParseFlag(str, &f->handle_segv, "handle_segv"); + ParseFlag(str, &f->use_sigaltstack, "use_sigaltstack"); + // Allow the users to work around the bug in Nvidia drivers prior to 295.*. + ParseFlag(str, &f->check_malloc_usable_size, "check_malloc_usable_size"); + ParseFlag(str, &f->unmap_shadow_on_exit, "unmap_shadow_on_exit"); + ParseFlag(str, &f->abort_on_error, "abort_on_error"); + ParseFlag(str, &f->atexit, "atexit"); + // By default, disable core dumper on 64-bit -- + // it makes little sense to dump 16T+ core. + ParseFlag(str, &f->disable_core, "disable_core"); +} + +void InitializeFlags(Flags *f, const char *env) { + internal_memset(f, 0, sizeof(*f)); + + f->quarantine_size = (ASAN_LOW_MEMORY) ? 1UL << 24 : 1UL << 28; + f->symbolize = false; + f->verbosity = 0; + f->redzone = (ASAN_LOW_MEMORY) ? 64 : 128; // power of two, >= 32. + f->debug = 0; + f->poison_shadow = true; + f->report_globals = 1; + f->malloc_context_size = kMallocContextSize; + f->replace_str = true; + f->replace_intrin = true; + f->replace_cfallocator = true; // Used on Mac only. + f->mac_ignore_invalid_free = false; // Used on Mac only. + f->use_fake_stack = true; + f->max_malloc_fill_size = 0; + f->exitcode = ASAN_DEFAULT_FAILURE_EXITCODE; + f->allow_user_poisoning = true; + f->sleep_before_dying = 0; + f->handle_segv = ASAN_NEEDS_SEGV; + f->use_sigaltstack = false; + f->check_malloc_usable_size = true; + f->unmap_shadow_on_exit = false; + f->abort_on_error = false; + f->atexit = false; + f->disable_core = (__WORDSIZE == 64); + + // Let a frontend override. + OverrideFlags(f); + + // Override from user-specified string. +#if !defined(_WIN32) + if (__asan_default_options) { + ParseFlagsFromString(f, __asan_default_options); + if (flags()->verbosity) { + Report("Using the defaults from __asan_default_options: %s\n", + __asan_default_options); + } + } +#endif + + // Override from command line. + ParseFlagsFromString(f, env); +} // -------------------------- Globals --------------------- {{{1 int asan_inited; @@ -264,44 +331,6 @@ } // -------------------------- Init ------------------- {{{1 -static void IntFlagValue(const char *flags, const char *flag, - s64 *out_val) { - if (!flags) return; - const char *str = internal_strstr(flags, flag); - if (!str) return; - *out_val = internal_atoll(str + internal_strlen(flag)); -} - -static void BoolFlagValue(const char *flags, const char *flag, - bool *out_val) { - if (!flags) return; - const char *str = internal_strstr(flags, flag); - if (!str) return; - const char *suffix = str + internal_strlen(flag); - if (!internal_atoll(str + internal_strlen(flag))) { - if (suffix[0] == '0') { - *out_val = false; - return; - } - } else { - *out_val = true; - return; - } - switch (suffix[0]) { - case 'y': - case 't': { - *out_val = true; - break; - } - case 'n': - case 'f': { - *out_val = false; - break; - } - default: return; - } -} - static void asan_atexit() { AsanPrintf("AddressSanitizer exit stats:\n"); __asan_print_accumulated_stats(); @@ -313,8 +342,8 @@ using namespace __asan; // NOLINT int __asan_set_error_exit_code(int exit_code) { - int old = FLAG_exitcode; - FLAG_exitcode = exit_code; + int old = flags()->exitcode; + flags()->exitcode = exit_code; return old; } @@ -403,7 +432,7 @@ access_size ? (is_write ? "WRITE" : "READ") : "ACCESS", access_size, (void*)addr, curr_tid); - if (FLAG_debug) { + if (flags()->debug) { PrintBytes("PC: ", (uptr*)pc); } @@ -437,48 +466,6 @@ Die(); } -static void ParseAsanOptions(const char *options) { - IntFlagValue(options, "malloc_context_size=", - (s64*)&FLAG_malloc_context_size); - CHECK(FLAG_malloc_context_size <= kMallocContextSize); - - IntFlagValue(options, "max_malloc_fill_size=", - (s64*)&FLAG_max_malloc_fill_size); - - IntFlagValue(options, "verbosity=", &FLAG_v); - - IntFlagValue(options, "redzone=", (s64*)&FLAG_redzone); - CHECK(FLAG_redzone >= 16); - CHECK(IsPowerOfTwo(FLAG_redzone)); - IntFlagValue(options, "quarantine_size=", (s64*)&FLAG_quarantine_size); - - IntFlagValue(options, "atexit=", &FLAG_atexit); - BoolFlagValue(options, "poison_shadow=", &FLAG_poison_shadow); - IntFlagValue(options, "report_globals=", &FLAG_report_globals); - BoolFlagValue(options, "handle_segv=", &FLAG_handle_segv); - BoolFlagValue(options, "use_sigaltstack=", &FLAG_use_sigaltstack); - BoolFlagValue(options, "symbolize=", &FLAG_symbolize); - IntFlagValue(options, "demangle=", &FLAG_demangle); - IntFlagValue(options, "debug=", &FLAG_debug); - BoolFlagValue(options, "replace_cfallocator=", &FLAG_replace_cfallocator); - BoolFlagValue(options, "mac_ignore_invalid_free=", - &FLAG_mac_ignore_invalid_free); - BoolFlagValue(options, "replace_str=", &FLAG_replace_str); - BoolFlagValue(options, "replace_intrin=", &FLAG_replace_intrin); - BoolFlagValue(options, "use_fake_stack=", &FLAG_use_fake_stack); - IntFlagValue(options, "exitcode=", &FLAG_exitcode); - BoolFlagValue(options, "allow_user_poisoning=", &FLAG_allow_user_poisoning); - IntFlagValue(options, "sleep_before_dying=", &FLAG_sleep_before_dying); - BoolFlagValue(options, "abort_on_error=", &FLAG_abort_on_error); - BoolFlagValue(options, "unmap_shadow_on_exit=", &FLAG_unmap_shadow_on_exit); - // By default, disable core dumper on 64-bit -- - // it makes little sense to dump 16T+ core. - BoolFlagValue(options, "disable_core=", &FLAG_disable_core); - - // Allow the users to work around the bug in Nvidia drivers prior to 295.*. - BoolFlagValue(options, "check_malloc_usable_size=", - &FLAG_check_malloc_usable_size); -} void __asan_init() { if (asan_inited) return; @@ -487,24 +474,15 @@ // Make sure we are not statically linked. AsanDoesNotSupportStaticLinkage(); -#if !defined(_WIN32) - if (__asan_default_options) { - ParseAsanOptions(__asan_default_options); - if (FLAG_v) { - Report("Using the defaults from __asan_default_options: %s\n", - __asan_default_options); - } - } -#endif - // flags + // Initialize flags. const char *options = GetEnv("ASAN_OPTIONS"); - ParseAsanOptions(options); + InitializeFlags(flags(), options); - if (FLAG_v && options) { + if (flags()->verbosity && options) { Report("Parsed ASAN_OPTIONS: %s\n", options); } - if (FLAG_atexit) { + if (flags()->atexit) { Atexit(asan_atexit); } @@ -514,7 +492,7 @@ ReplaceSystemMalloc(); ReplaceOperatorsNewAndDelete(); - if (FLAG_v) { + if (flags()->verbosity) { Printf("|| `[%p, %p]` || HighMem ||\n", (void*)kHighMemBeg, (void*)kHighMemEnd); Printf("|| `[%p, %p]` || HighShadow ||\n", @@ -530,8 +508,8 @@ (void*)MEM_TO_SHADOW(kLowShadowEnd), (void*)MEM_TO_SHADOW(kHighShadowBeg), (void*)MEM_TO_SHADOW(kHighShadowEnd)); - Printf("red_zone=%zu\n", (uptr)FLAG_redzone); - Printf("malloc_context_size=%zu\n", (uptr)FLAG_malloc_context_size); + Printf("red_zone=%zu\n", (uptr)flags()->redzone); + Printf("malloc_context_size=%zu\n", (uptr)flags()->malloc_context_size); Printf("SHADOW_SCALE: %zx\n", (uptr)SHADOW_SCALE); Printf("SHADOW_GRANULARITY: %zx\n", (uptr)SHADOW_GRANULARITY); @@ -539,7 +517,7 @@ CHECK(SHADOW_SCALE >= 3 && SHADOW_SCALE <= 7); } - if (FLAG_disable_core) { + if (flags()->disable_core) { DisableCoreDumper(); } @@ -574,7 +552,7 @@ asanThreadRegistry().GetMain()->ThreadStart(); force_interface_symbols(); // no-op. - if (FLAG_v) { + if (flags()->verbosity) { Report("AddressSanitizer Init done\n"); } } Modified: compiler-rt/trunk/lib/asan/asan_stack.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_stack.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_stack.cc (original) +++ compiler-rt/trunk/lib/asan/asan_stack.cc Mon Jul 9 09:36:04 2012 @@ -45,7 +45,7 @@ uptr pc = addr[i]; AddressInfo addr_frames[64]; uptr addr_frames_num = 0; - if (FLAG_symbolize) { + if (flags()->symbolize) { bool last_frame = (i == size - 1) || !addr[i + 1]; addr_frames_num = SymbolizeCode(pc - !last_frame, addr_frames, ASAN_ARRAY_SIZE(addr_frames)); Modified: compiler-rt/trunk/lib/asan/asan_stack.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_stack.h?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_stack.h (original) +++ compiler-rt/trunk/lib/asan/asan_stack.h Mon Jul 9 09:36:04 2012 @@ -90,10 +90,10 @@ AsanStackTrace::GetCurrentPc(), GET_CURRENT_FRAME()) #define GET_STACK_TRACE_HERE_FOR_MALLOC \ - GET_STACK_TRACE_HERE(FLAG_malloc_context_size) + GET_STACK_TRACE_HERE(flags()->malloc_context_size) #define GET_STACK_TRACE_HERE_FOR_FREE(ptr) \ - GET_STACK_TRACE_HERE(FLAG_malloc_context_size) + GET_STACK_TRACE_HERE(flags()->malloc_context_size) #define PRINT_CURRENT_STACK() \ { \ Modified: compiler-rt/trunk/lib/asan/asan_thread.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_thread.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_thread.cc (original) +++ compiler-rt/trunk/lib/asan/asan_thread.cc Mon Jul 9 09:36:04 2012 @@ -42,7 +42,7 @@ void AsanThreadSummary::TSDDtor(void *tsd) { AsanThreadSummary *summary = (AsanThreadSummary*)tsd; - if (FLAG_v >= 1) { + if (flags()->verbosity >= 1) { Report("T%d TSDDtor\n", summary->tid()); } if (summary->thread()) { @@ -51,7 +51,7 @@ } void AsanThread::Destroy() { - if (FLAG_v >= 1) { + if (flags()->verbosity >= 1) { Report("T%d exited\n", tid()); } @@ -71,7 +71,7 @@ CHECK(AddrIsInMem(stack_bottom_)); CHECK(AddrIsInMem(stack_top_)); ClearShadowForThreadStack(); - if (FLAG_v >= 1) { + if (flags()->verbosity >= 1) { int local = 0; Report("T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(), (void*)stack_bottom_, (void*)stack_top_, @@ -82,7 +82,7 @@ thread_return_t AsanThread::ThreadStart() { Init(); - if (FLAG_use_sigaltstack) SetAlternateSignalStack(); + if (flags()->use_sigaltstack) SetAlternateSignalStack(); if (!start_routine_) { // start_routine_ == 0 if we're on the main thread or on one of the @@ -94,7 +94,7 @@ thread_return_t res = start_routine_(arg_); malloc_storage().CommitBack(); - if (FLAG_use_sigaltstack) UnsetAlternateSignalStack(); + if (flags()->use_sigaltstack) UnsetAlternateSignalStack(); this->Destroy(); Modified: compiler-rt/trunk/lib/asan/asan_thread_registry.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_thread_registry.cc?rev=159933&r1=159932&r2=159933&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_thread_registry.cc (original) +++ compiler-rt/trunk/lib/asan/asan_thread_registry.cc Mon Jul 9 09:36:04 2012 @@ -88,7 +88,7 @@ void AsanThreadRegistry::SetCurrent(AsanThread *t) { CHECK(t->summary()); - if (FLAG_v >= 2) { + if (flags()->verbosity >= 2) { Report("SetCurrent: %p for thread %p\n", t->summary(), (void*)GetThreadSelf()); } From samsonov at google.com Mon Jul 9 10:11:28 2012 From: samsonov at google.com (Alexey Samsonov) Date: Mon, 09 Jul 2012 15:11:28 -0000 Subject: [llvm-commits] [compiler-rt] r159934 - in /compiler-rt/trunk/lib/asan: asan_internal.h asan_rtl.cc Message-ID: <20120709151128.6336F2A6C069@llvm.org> Author: samsonov Date: Mon Jul 9 10:11:28 2012 New Revision: 159934 URL: http://llvm.org/viewvc/llvm-project?rev=159934&view=rev Log: [ASan] cleanup: remove dead flag Modified: compiler-rt/trunk/lib/asan/asan_internal.h compiler-rt/trunk/lib/asan/asan_rtl.cc Modified: compiler-rt/trunk/lib/asan/asan_internal.h URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_internal.h?rev=159934&r1=159933&r2=159934&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_internal.h (original) +++ compiler-rt/trunk/lib/asan/asan_internal.h Mon Jul 9 10:11:28 2012 @@ -142,7 +142,6 @@ int verbosity; int redzone; int debug; - bool poison_shadow; int report_globals; int malloc_context_size; bool replace_str; Modified: compiler-rt/trunk/lib/asan/asan_rtl.cc URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/asan/asan_rtl.cc?rev=159934&r1=159933&r2=159934&view=diff ============================================================================== --- compiler-rt/trunk/lib/asan/asan_rtl.cc (original) +++ compiler-rt/trunk/lib/asan/asan_rtl.cc Mon Jul 9 10:11:28 2012 @@ -81,7 +81,6 @@ CHECK(IsPowerOfTwo(f->redzone)); ParseFlag(str, &f->debug, "debug"); - ParseFlag(str, &f->poison_shadow, "poison_shadow"); ParseFlag(str, &f->report_globals, "report_globals"); ParseFlag(str, &f->malloc_context_size, "malloc_context_size"); CHECK(f->malloc_context_size <= kMallocContextSize); @@ -115,7 +114,6 @@ f->verbosity = 0; f->redzone = (ASAN_LOW_MEMORY) ? 64 : 128; // power of two, >= 32. f->debug = 0; - f->poison_shadow = true; f->report_globals = 1; f->malloc_context_size = kMallocContextSize; f->replace_str = true; From baldrick at free.fr Mon Jul 9 11:00:16 2012 From: baldrick at free.fr (Duncan Sands) Date: Mon, 09 Jul 2012 18:00:16 +0200 Subject: [llvm-commits] [llvm] r159876 - in /llvm/trunk: lib/Transforms/InstCombine/InstructionCombining.cpp test/Transforms/InstCombine/badmalloc.ll test/Transforms/InstCombine/invoke.ll test/Transforms/InstCombine/malloc-free-delete.ll test/Transforms/InstCom In-Reply-To: <65B0029D4BCA46AABF0F161BEA01987D@PC07655> References: <20120706230925.C3A562A6C069@llvm.org> <4FF7E2A7.9030704@free.fr> <65B0029D4BCA46AABF0F161BEA01987D@PC07655> Message-ID: <4FFB0010.9020303@free.fr> Hi Nuno, On 08/07/12 22:01, Nuno Lopes wrote: >>> --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original) >>> +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jul 6 >>> 18:09:25 2012 >>> @@ -1137,12 +1137,29 @@ >>> } >>> } >>> if (IntrinsicInst *II = dyn_cast(U)) { >>> - if (II->getIntrinsicID() == Intrinsic::lifetime_start || >>> - II->getIntrinsicID() == Intrinsic::lifetime_end) { >>> + switch (II->getIntrinsicID()) { >>> + default: return false; >>> + case Intrinsic::memmove: >>> + case Intrinsic::memcpy: >>> + case Intrinsic::memset: { >>> + MemIntrinsic *MI = cast(II); >>> + if (MI->isVolatile() || MI->getRawDest() != V) >> >> why exclude volatile stores? If all that is being done to the allocated memory >> is doing a bunch of volatile stores to it, I don't see why you can't discard >> them and the allocation too. > > I thought about that, but.. I guess it's better not to mess around with > volatiles. And I don't even think it's legal (C standard wise) to remove it. we already remove volatile stores to alloca's, so why not to heap allocations too? Ciao, Duncan. From criswell at illinois.edu Mon Jul 9 11:12:05 2012 From: criswell at illinois.edu (John Criswell) Date: Mon, 9 Jul 2012 11:12:05 -0500 Subject: [llvm-commits] [llvm] r159876 - in /llvm/trunk: lib/Transforms/InstCombine/InstructionCombining.cpp test/Transforms/InstCombine/badmalloc.ll test/Transforms/InstCombine/invoke.ll test/Transforms/InstCombine/malloc-free-delete.ll test/Transforms/InstCom In-Reply-To: <4FFB0010.9020303@free.fr> References: <20120706230925.C3A562A6C069@llvm.org> <4FF7E2A7.9030704@free.fr> <65B0029D4BCA46AABF0F161BEA01987D@PC07655> <4FFB0010.9020303@free.fr> Message-ID: <4FFB02D5.9020109@illinois.edu> On 7/9/12 11:00 AM, Duncan Sands wrote: > Hi Nuno, > > On 08/07/12 22:01, Nuno Lopes wrote: >>>> --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original) >>>> +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jul 6 >>>> 18:09:25 2012 >>>> @@ -1137,12 +1137,29 @@ >>>> } >>>> } >>>> if (IntrinsicInst *II = dyn_cast(U)) { >>>> - if (II->getIntrinsicID() == Intrinsic::lifetime_start || >>>> - II->getIntrinsicID() == Intrinsic::lifetime_end) { >>>> + switch (II->getIntrinsicID()) { >>>> + default: return false; >>>> + case Intrinsic::memmove: >>>> + case Intrinsic::memcpy: >>>> + case Intrinsic::memset: { >>>> + MemIntrinsic *MI = cast(II); >>>> + if (MI->isVolatile() || MI->getRawDest() != V) >>> why exclude volatile stores? If all that is being done to the allocated memory >>> is doing a bunch of volatile stores to it, I don't see why you can't discard >>> them and the allocation too. >> I thought about that, but.. I guess it's better not to mess around with >> volatiles. And I don't even think it's legal (C standard wise) to remove it. > we already remove volatile stores to alloca's, so why not to heap allocations > too? IMHO, it is incorrect to optimize away volatile stores of any kind. The whole point of volatile is to tell the compiler that something weird is going on that it doesn't understand and that it should leave the memory access alone. I sometimes use (at the C language level) a volatile variable to ensure that a load or store doesn't get optimized away (e.g., when writing tests for SAFECode, I want to write simple tests, but I need the loads and stores to stay in place so that SAFECode instruments them). That said, I understand that people disagree with me on that issue, but if you do, you should fix the Language Reference Manual to indicate which volatiles can be removed and which ones cannot. -- John T. > > Ciao, Duncan. > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From richard.barton at arm.com Mon Jul 9 11:12:24 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 09 Jul 2012 16:12:24 -0000 Subject: [llvm-commits] [llvm] r159935 - in /llvm/trunk: lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/thumb2-narrow-dp.ll Message-ID: <20120709161224.7C5CF2A6C069@llvm.org> Author: rbarton Date: Mon Jul 9 11:12:24 2012 New Revision: 159935 URL: http://llvm.org/viewvc/llvm-project?rev=159935&view=rev Log: Teach the assembler to use the narrow thumb encodings of various three-register dp instructions where permissable. Added: llvm/trunk/test/MC/ARM/thumb2-narrow-dp.ll Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=159935&r1=159934&r2=159935&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jul 9 11:12:24 2012 @@ -7287,6 +7287,86 @@ ITState.FirstCond = true; break; } + case ARM::t2LSLrr: + case ARM::t2LSRrr: + case ARM::t2ASRrr: + case ARM::t2SBCrr: + case ARM::t2RORrr: + case ARM::t2BICrr: + { + // Assemblers should use the narrow encodings of these instructions when permissable. + if ((isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + (!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR || + inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) && + (!static_cast(Operands[3])->isToken() || + !static_cast(Operands[3])->getToken().equals_lower(".w"))) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLrr: NewOpc = ARM::tLSLrr; break; + case ARM::t2LSRrr: NewOpc = ARM::tLSRrr; break; + case ARM::t2ASRrr: NewOpc = ARM::tASRrr; break; + case ARM::t2SBCrr: NewOpc = ARM::tSBC; break; + case ARM::t2RORrr: NewOpc = ARM::tROR; break; + case ARM::t2BICrr: NewOpc = ARM::tBIC; break; + } + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + case ARM::t2ANDrr: + case ARM::t2EORrr: + case ARM::t2ADCrr: + case ARM::t2ORRrr: + { + // Assemblers should use the narrow encodings of these instructions when permissable. + // These instructions are special in that they are commutable, so shorter encodings + // are available more often. + if ((isARMLowRegister(Inst.getOperand(1).getReg()) && + isARMLowRegister(Inst.getOperand(2).getReg())) && + (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || + Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && + (!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR || + inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) && + (!static_cast(Operands[3])->isToken() || + !static_cast(Operands[3])->getToken().equals_lower(".w"))) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2ADCrr: NewOpc = ARM::tADC; break; + case ARM::t2ANDrr: NewOpc = ARM::tAND; break; + case ARM::t2EORrr: NewOpc = ARM::tEOR; break; + case ARM::t2ORRrr: NewOpc = ARM::tORR; break; + } + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) { + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + } else { + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(1)); + } + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } } return false; } Added: llvm/trunk/test/MC/ARM/thumb2-narrow-dp.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/thumb2-narrow-dp.ll?rev=159935&view=auto ============================================================================== --- llvm/trunk/test/MC/ARM/thumb2-narrow-dp.ll (added) +++ llvm/trunk/test/MC/ARM/thumb2-narrow-dp.ll Mon Jul 9 11:12:24 2012 @@ -0,0 +1,807 @@ +// RUN: llvm-mc -triple thumbv7 -show-encoding < %s | FileCheck %s + +// Test each of the Thumb1 data-processing instructions +// The assembly syntax for these instructions allows an optional Rd register +// OP{S}{}{} {,} , +// Assemblers should chose the narrow thumb encoding when possible, i.e. +// - Rd == Rn +// - Rd, Rn and Rm are < r8 +// In addition, some operations are commutative, allowing the transormation +// when: +// - Rd == Rn || Rd == Rm +// - Rd, Rn and Rm are < r8 + +// AND (commutative) + ANDS r0, r2, r1 // Must be wide - 3 distinct registers + ANDS r2, r2, r1 // Should choose narrow + ANDS r2, r1, r2 // Should choose narrow - commutative + ANDS.W r0, r0, r1 // Explicitly wide + ANDS.W r3, r1, r3 + AND r0, r1, r0 // Must use wide encoding as not flag-setting + ANDS r7, r7, r1 // Should use narrow + ANDS r7, r1, r7 // Commutative + ANDS r8, r1, r8 // high registers so must use wide encoding + ANDS r8, r8, r1 + ANDS r0, r8, r0 + ANDS r1, r1, r8 + ANDS r2, r2, r1, lsl #1 // Must use wide - shifted register + ANDS r0, r1, r0, lsr #1 +// CHECK: ands.w r0, r2, r1 @ encoding: [0x12,0xea,0x01,0x00] +// CHECK: ands r2, r1 @ encoding: [0x0a,0x40] +// CHECK: ands r2, r1 @ encoding: [0x0a,0x40] +// CHECK: ands.w r0, r0, r1 @ encoding: [0x10,0xea,0x01,0x00] +// CHECK: ands.w r3, r1, r3 @ encoding: [0x11,0xea,0x03,0x03] +// CHECK: and.w r0, r1, r0 @ encoding: [0x01,0xea,0x00,0x00] +// CHECK: ands r7, r1 @ encoding: [0x0f,0x40] +// CHECK: ands r7, r1 @ encoding: [0x0f,0x40] +// CHECK: ands.w r8, r1, r8 @ encoding: [0x11,0xea,0x08,0x08] +// CHECK: ands.w r8, r8, r1 @ encoding: [0x18,0xea,0x01,0x08] +// CHECK: ands.w r0, r8, r0 @ encoding: [0x18,0xea,0x00,0x00] +// CHECK: ands.w r1, r1, r8 @ encoding: [0x11,0xea,0x08,0x01] +// CHECK: ands.w r2, r2, r1, lsl #1 @ encoding: [0x12,0xea,0x41,0x02] +// CHECK: ands.w r0, r1, r0, lsr #1 @ encoding: [0x11,0xea,0x50,0x00] + + IT EQ + ANDEQ r0, r2, r1 // Must be wide - 3 distinct registers + IT EQ + ANDEQ r3, r3, r1 // Should choose narrow + IT EQ + ANDEQ r3, r1, r3 // Should choose narrow - commutative + IT EQ + ANDEQ.W r0, r0, r1 // Explicitly wide + IT EQ + ANDEQ.W r2, r1, r2 + IT EQ + ANDSEQ r0, r1, r0 // Must use wide encoding as flag-setting + IT EQ + ANDEQ r7, r7, r1 // Should use narrow + IT EQ + ANDEQ r7, r1, r7 // Commutative + IT EQ + ANDEQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + ANDEQ r8, r8, r1 + IT EQ + ANDEQ r4, r8, r4 + IT EQ + ANDEQ r4, r4, r8 + IT EQ + ANDEQ r0, r0, r1, lsl #1 // Must use wide - shifted register + IT EQ + ANDEQ r5, r1, r5, lsr #1 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r0, r2, r1 @ encoding: [0x02,0xea,0x01,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq r3, r1 @ encoding: [0x0b,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq r3, r1 @ encoding: [0x0b,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r0, r0, r1 @ encoding: [0x00,0xea,0x01,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r2, r1, r2 @ encoding: [0x01,0xea,0x02,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andseq.w r0, r1, r0 @ encoding: [0x11,0xea,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq r7, r1 @ encoding: [0x0f,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq r7, r1 @ encoding: [0x0f,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r8, r1, r8 @ encoding: [0x01,0xea,0x08,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r8, r8, r1 @ encoding: [0x08,0xea,0x01,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r4, r8, r4 @ encoding: [0x08,0xea,0x04,0x04] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r4, r4, r8 @ encoding: [0x04,0xea,0x08,0x04] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r0, r0, r1, lsl #1 @ encoding: [0x00,0xea,0x41,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: andeq.w r5, r1, r5, lsr #1 @ encoding: [0x01,0xea,0x55,0x05] + +// EOR (commutative) + EORS r0, r2, r1 // Must be wide - 3 distinct registers + EORS r5, r5, r1 // Should choose narrow + EORS r5, r1, r5 // Should choose narrow - commutative + EORS.W r0, r0, r1 // Explicitly wide + EORS.W r2, r1, r2 + EOR r1, r1, r1 // Must use wide encoding as not flag-setting + EORS r7, r7, r1 // Should use narrow + EORS r7, r1, r7 // Commutative + EORS r8, r1, r8 // high registers so must use wide encoding + EORS r8, r8, r1 + EORS r6, r8, r6 + EORS r0, r0, r8 + EORS r2, r2, r1, lsl #1 // Must use wide - shifted register + EORS r0, r1, r0, lsr #1 +// CHECK: eors.w r0, r2, r1 @ encoding: [0x92,0xea,0x01,0x00] +// CHECK: eors r5, r1 @ encoding: [0x4d,0x40] +// CHECK: eors r5, r1 @ encoding: [0x4d,0x40] +// CHECK: eors.w r0, r0, r1 @ encoding: [0x90,0xea,0x01,0x00] +// CHECK: eors.w r2, r1, r2 @ encoding: [0x91,0xea,0x02,0x02] +// CHECK: eor.w r1, r1, r1 @ encoding: [0x81,0xea,0x01,0x01] +// CHECK: eors r7, r1 @ encoding: [0x4f,0x40] +// CHECK: eors r7, r1 @ encoding: [0x4f,0x40] +// CHECK: eors.w r8, r1, r8 @ encoding: [0x91,0xea,0x08,0x08] +// CHECK: eors.w r8, r8, r1 @ encoding: [0x98,0xea,0x01,0x08] +// CHECK: eors.w r6, r8, r6 @ encoding: [0x98,0xea,0x06,0x06] +// CHECK: eors.w r0, r0, r8 @ encoding: [0x90,0xea,0x08,0x00] +// CHECK: eors.w r2, r2, r1, lsl #1 @ encoding: [0x92,0xea,0x41,0x02] +// CHECK: eors.w r0, r1, r0, lsr #1 @ encoding: [0x91,0xea,0x50,0x00] + + IT EQ + EOREQ r3, r2, r1 // Must be wide - 3 distinct registers + IT EQ + EOREQ r0, r0, r1 // Should choose narrow + IT EQ + EOREQ r2, r1, r2 // Should choose narrow - commutative + IT EQ + EOREQ.W r3, r3, r1 // Explicitly wide + IT EQ + EOREQ.W r0, r1, r0 + IT EQ + EORSEQ r1, r1, r1 // Must use wide encoding as flag-setting + IT EQ + EOREQ r7, r7, r1 // Should use narrow + IT EQ + EOREQ r7, r1, r7 // Commutative + IT EQ + EOREQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + EOREQ r8, r8, r1 + IT EQ + EOREQ r0, r8, r0 + IT EQ + EOREQ r3, r3, r8 + IT EQ + EOREQ r4, r4, r1, lsl #1 // Must use wide - shifted register + IT EQ + EOREQ r0, r1, r0, lsr #1 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r3, r2, r1 @ encoding: [0x82,0xea,0x01,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq r0, r1 @ encoding: [0x48,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq r2, r1 @ encoding: [0x4a,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r3, r3, r1 @ encoding: [0x83,0xea,0x01,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r0, r1, r0 @ encoding: [0x81,0xea,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eorseq.w r1, r1, r1 @ encoding: [0x91,0xea,0x01,0x01] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq r7, r1 @ encoding: [0x4f,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq r7, r1 @ encoding: [0x4f,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r8, r1, r8 @ encoding: [0x81,0xea,0x08,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r8, r8, r1 @ encoding: [0x88,0xea,0x01,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r0, r8, r0 @ encoding: [0x88,0xea,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r3, r3, r8 @ encoding: [0x83,0xea,0x08,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r4, r4, r1, lsl #1 @ encoding: [0x84,0xea,0x41,0x04] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: eoreq.w r0, r1, r0, lsr #1 @ encoding: [0x81,0xea,0x50,0x00] + +// LSL + LSLS r0, r2, r1 // Must be wide - 3 distinct registers + LSLS r2, r2, r1 // Should choose narrow + LSLS r2, r1, r2 // Should choose wide - not commutative + LSLS.W r0, r0, r1 // Explicitly wide + LSLS.W r4, r1, r4 + LSL r4, r1, r4 // Must use wide encoding as not flag-setting + LSLS r7, r7, r1 // Should use narrow + LSLS r8, r1, r8 // high registers so must use wide encoding + LSLS r8, r8, r1 + LSLS r3, r8, r3 + LSLS r5, r5, r8 +// CHECK: lsls.w r0, r2, r1 @ encoding: [0x12,0xfa,0x01,0xf0] +// CHECK: lsls r2, r1 @ encoding: [0x8a,0x40] +// CHECK: lsls.w r2, r1, r2 @ encoding: [0x11,0xfa,0x02,0xf2] +// CHECK: lsls.w r0, r0, r1 @ encoding: [0x10,0xfa,0x01,0xf0] +// CHECK: lsls.w r4, r1, r4 @ encoding: [0x11,0xfa,0x04,0xf4] +// CHECK: lsl.w r4, r1, r4 @ encoding: [0x01,0xfa,0x04,0xf4] +// CHECK: lsls r7, r1 @ encoding: [0x8f,0x40] +// CHECK: lsls.w r8, r1, r8 @ encoding: [0x11,0xfa,0x08,0xf8] +// CHECK: lsls.w r8, r8, r1 @ encoding: [0x18,0xfa,0x01,0xf8] +// CHECK: lsls.w r3, r8, r3 @ encoding: [0x18,0xfa,0x03,0xf3] +// CHECK: lsls.w r5, r5, r8 @ encoding: [0x15,0xfa,0x08,0xf5] + + IT EQ + LSLEQ r0, r2, r1 // Must be wide - 3 distinct registers + IT EQ + LSLEQ r2, r2, r1 // Should choose narrow + IT EQ + LSLEQ r2, r1, r2 // Should choose wide - not commutative + IT EQ + LSLEQ.W r0, r0, r1 // Explicitly wide + IT EQ + LSLEQ.W r3, r1, r3 + IT EQ + LSLSEQ r4, r1, r4 // Must use wide encoding as flag-setting + IT EQ + LSLEQ r7, r7, r1 // Should use narrow + IT EQ + LSLEQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + LSLEQ r8, r8, r1 + IT EQ + LSLEQ r0, r8, r0 + IT EQ + LSLEQ r3, r3, r8 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r0, r2, r1 @ encoding: [0x02,0xfa,0x01,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq r2, r1 @ encoding: [0x8a,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r2, r1, r2 @ encoding: [0x01,0xfa,0x02,0xf2] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r0, r0, r1 @ encoding: [0x00,0xfa,0x01,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r3, r1, r3 @ encoding: [0x01,0xfa,0x03,0xf3] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lslseq.w r4, r1, r4 @ encoding: [0x11,0xfa,0x04,0xf4] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq r7, r1 @ encoding: [0x8f,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r8, r1, r8 @ encoding: [0x01,0xfa,0x08,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r8, r8, r1 @ encoding: [0x08,0xfa,0x01,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r0, r8, r0 @ encoding: [0x08,0xfa,0x00,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsleq.w r3, r3, r8 @ encoding: [0x03,0xfa,0x08,0xf3] + +// LSR + LSRS r6, r2, r1 // Must be wide - 3 distinct registers + LSRS r2, r2, r1 // Should choose narrow + LSRS r2, r1, r2 // Should choose wide - not commutative + LSRS.W r2, r2, r1 // Explicitly wide + LSRS.W r3, r1, r3 + LSR r4, r1, r4 // Must use wide encoding as not flag-setting + LSRS r7, r7, r1 // Should use narrow + LSRS r8, r1, r8 // high registers so must use wide encoding + LSRS r8, r8, r1 + LSRS r2, r8, r2 + LSRS r5, r5, r8 +// CHECK: lsrs.w r6, r2, r1 @ encoding: [0x32,0xfa,0x01,0xf6] +// CHECK: lsrs r2, r1 @ encoding: [0xca,0x40] +// CHECK: lsrs.w r2, r1, r2 @ encoding: [0x31,0xfa,0x02,0xf2] +// CHECK: lsrs.w r2, r2, r1 @ encoding: [0x32,0xfa,0x01,0xf2] +// CHECK: lsrs.w r3, r1, r3 @ encoding: [0x31,0xfa,0x03,0xf3] +// CHECK: lsr.w r4, r1, r4 @ encoding: [0x21,0xfa,0x04,0xf4] +// CHECK: lsrs r7, r1 @ encoding: [0xcf,0x40] +// CHECK: lsrs.w r8, r1, r8 @ encoding: [0x31,0xfa,0x08,0xf8] +// CHECK: lsrs.w r8, r8, r1 @ encoding: [0x38,0xfa,0x01,0xf8] +// CHECK: lsrs.w r2, r8, r2 @ encoding: [0x38,0xfa,0x02,0xf2] +// CHECK: lsrs.w r5, r5, r8 @ encoding: [0x35,0xfa,0x08,0xf5] + + IT EQ + LSREQ r6, r2, r1 // Must be wide - 3 distinct registers + IT EQ + LSREQ r7, r7, r1 // Should choose narrow + IT EQ + LSREQ r7, r1, r7 // Should choose wide - not commutative + IT EQ + LSREQ.W r7, r7, r1 // Explicitly wide + IT EQ + LSREQ.W r2, r1, r2 + IT EQ + LSRSEQ r0, r1, r0 // Must use wide encoding as flag-setting + IT EQ + LSREQ r7, r7, r1 // Should use narrow + IT EQ + LSREQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + LSREQ r8, r8, r1 + IT EQ + LSREQ r1, r8, r1 + IT EQ + LSREQ r4, r4, r8 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r6, r2, r1 @ encoding: [0x22,0xfa,0x01,0xf6] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq r7, r1 @ encoding: [0xcf,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r7, r1, r7 @ encoding: [0x21,0xfa,0x07,0xf7] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r7, r7, r1 @ encoding: [0x27,0xfa,0x01,0xf7] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r2, r1, r2 @ encoding: [0x21,0xfa,0x02,0xf2] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsrseq.w r0, r1, r0 @ encoding: [0x31,0xfa,0x00,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq r7, r1 @ encoding: [0xcf,0x40] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r8, r1, r8 @ encoding: [0x21,0xfa,0x08,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r8, r8, r1 @ encoding: [0x28,0xfa,0x01,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r1, r8, r1 @ encoding: [0x28,0xfa,0x01,0xf1] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: lsreq.w r4, r4, r8 @ encoding: [0x24,0xfa,0x08,0xf4] + +// ASR + ASRS r7, r6, r5 // Must be wide - 3 distinct registers + ASRS r0, r0, r1 // Should choose narrow + ASRS r0, r1, r0 // Should choose wide - not commutative + ASRS.W r3, r3, r1 // Explicitly wide + ASRS.W r1, r1, r1 + ASR r0, r1, r0 // Must use wide encoding as not flag-setting + ASRS r7, r7, r1 // Should use narrow + ASRS r8, r1, r8 // high registers so must use wide encoding + ASRS r8, r8, r1 + ASRS r5, r8, r5 + ASRS r5, r5, r8 +// CHECK: asrs.w r7, r6, r5 @ encoding: [0x56,0xfa,0x05,0xf7] +// CHECK: asrs r0, r1 @ encoding: [0x08,0x41] +// CHECK: asrs.w r0, r1, r0 @ encoding: [0x51,0xfa,0x00,0xf0] +// CHECK: asrs.w r3, r3, r1 @ encoding: [0x53,0xfa,0x01,0xf3] +// CHECK: asrs.w r1, r1, r1 @ encoding: [0x51,0xfa,0x01,0xf1] +// CHECK: asr.w r0, r1, r0 @ encoding: [0x41,0xfa,0x00,0xf0] +// CHECK: asrs r7, r1 @ encoding: [0x0f,0x41] +// CHECK: asrs.w r8, r1, r8 @ encoding: [0x51,0xfa,0x08,0xf8] +// CHECK: asrs.w r8, r8, r1 @ encoding: [0x58,0xfa,0x01,0xf8] +// CHECK: asrs.w r5, r8, r5 @ encoding: [0x58,0xfa,0x05,0xf5] +// CHECK: asrs.w r5, r5, r8 @ encoding: [0x55,0xfa,0x08,0xf5] + + IT EQ + ASREQ r0, r2, r1 // Must be wide - 3 distinct registers + IT EQ + ASREQ r2, r2, r1 // Should choose narrow + IT EQ + ASREQ r1, r2, r1 // Should choose wide - not commutative + IT EQ + ASREQ.W r4, r4, r1 // Explicitly wide + IT EQ + ASREQ.W r6, r1, r6 + IT EQ + ASRSEQ r3, r1, r3 // Must use wide encoding as flag-setting + IT EQ + ASREQ r7, r7, r1 // Should use narrow + IT EQ + ASREQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + ASREQ r8, r8, r1 + IT EQ + ASREQ r1, r8, r1 + IT EQ + ASREQ r3, r3, r8 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r0, r2, r1 @ encoding: [0x42,0xfa,0x01,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq r2, r1 @ encoding: [0x0a,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r1, r2, r1 @ encoding: [0x42,0xfa,0x01,0xf1] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r4, r4, r1 @ encoding: [0x44,0xfa,0x01,0xf4] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r6, r1, r6 @ encoding: [0x41,0xfa,0x06,0xf6] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asrseq.w r3, r1, r3 @ encoding: [0x51,0xfa,0x03,0xf3] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq r7, r1 @ encoding: [0x0f,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r8, r1, r8 @ encoding: [0x41,0xfa,0x08,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r8, r8, r1 @ encoding: [0x48,0xfa,0x01,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r1, r8, r1 @ encoding: [0x48,0xfa,0x01,0xf1] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: asreq.w r3, r3, r8 @ encoding: [0x43,0xfa,0x08,0xf3] + +// ADC (commutative) + ADCS r5, r2, r1 // Must be wide - 3 distinct registers + ADCS r5, r5, r1 // Should choose narrow + ADCS r3, r1, r3 // Should choose narrow - commutative + ADCS.W r2, r2, r1 // Explicitly wide + ADCS.W r3, r1, r3 + ADC r0, r1, r0 // Must use wide encoding as not flag-setting + ADCS r7, r7, r1 // Should use narrow + ADCS r7, r1, r7 // Commutative + ADCS r8, r1, r8 // high registers so must use wide encoding + ADCS r8, r8, r1 + ADCS r5, r8, r5 + ADCS r2, r2, r8 + ADCS r3, r3, r1, lsl #1 // Must use wide - shifted register + ADCS r4, r1, r4, lsr #1 +// CHECK: adcs.w r5, r2, r1 @ encoding: [0x52,0xeb,0x01,0x05] +// CHECK: adcs r5, r1 @ encoding: [0x4d,0x41] +// CHECK: adcs r3, r1 @ encoding: [0x4b,0x41] +// CHECK: adcs.w r2, r2, r1 @ encoding: [0x52,0xeb,0x01,0x02] +// CHECK: adcs.w r3, r1, r3 @ encoding: [0x51,0xeb,0x03,0x03] +// CHECK: adc.w r0, r1, r0 @ encoding: [0x41,0xeb,0x00,0x00] +// CHECK: adcs r7, r1 @ encoding: [0x4f,0x41] +// CHECK: adcs r7, r1 @ encoding: [0x4f,0x41] +// CHECK: adcs.w r8, r1, r8 @ encoding: [0x51,0xeb,0x08,0x08] +// CHECK: adcs.w r8, r8, r1 @ encoding: [0x58,0xeb,0x01,0x08] +// CHECK: adcs.w r5, r8, r5 @ encoding: [0x58,0xeb,0x05,0x05] +// CHECK: adcs.w r2, r2, r8 @ encoding: [0x52,0xeb,0x08,0x02] +// CHECK: adcs.w r3, r3, r1, lsl #1 @ encoding: [0x53,0xeb,0x41,0x03] +// CHECK: adcs.w r4, r1, r4, lsr #1 @ encoding: [0x51,0xeb,0x54,0x04] + + IT EQ + ADCEQ r1, r2, r3 // Must be wide - 3 distinct registers + IT EQ + ADCEQ r1, r1, r1 // Should choose narrow + IT EQ + ADCEQ r3, r1, r3 // Should choose narrow - commutative + IT EQ + ADCEQ.W r3, r3, r1 // Explicitly wide + IT EQ + ADCEQ.W r0, r1, r0 + IT EQ + ADCSEQ r3, r1, r3 // Must use wide encoding as flag-setting + IT EQ + ADCEQ r7, r7, r1 // Should use narrow + IT EQ + ADCEQ r7, r1, r7 // Commutative + IT EQ + ADCEQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + ADCEQ r8, r8, r1 + IT EQ + ADCEQ r3, r8, r3 + IT EQ + ADCEQ r1, r1, r8 + IT EQ + ADCEQ r2, r2, r1, lsl #1 // Must use wide - shifted register + IT EQ + ADCEQ r1, r1, r1, lsr #1 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r1, r2, r3 @ encoding: [0x42,0xeb,0x03,0x01] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq r1, r1 @ encoding: [0x49,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq r3, r1 @ encoding: [0x4b,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r3, r3, r1 @ encoding: [0x43,0xeb,0x01,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r0, r1, r0 @ encoding: [0x41,0xeb,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adcseq.w r3, r1, r3 @ encoding: [0x51,0xeb,0x03,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq r7, r1 @ encoding: [0x4f,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq r7, r1 @ encoding: [0x4f,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r8, r1, r8 @ encoding: [0x41,0xeb,0x08,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r8, r8, r1 @ encoding: [0x48,0xeb,0x01,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r3, r8, r3 @ encoding: [0x48,0xeb,0x03,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r1, r1, r8 @ encoding: [0x41,0xeb,0x08,0x01] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r2, r2, r1, lsl #1 @ encoding: [0x42,0xeb,0x41,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: adceq.w r1, r1, r1, lsr #1 @ encoding: [0x41,0xeb,0x51,0x01] + +// SBC + SBCS r3, r2, r1 // Must be wide - 3 distinct registers + SBCS r4, r4, r1 // Should choose narrow + SBCS r1, r4, r1 // Should choose wide - not commutative + SBCS.W r4, r4, r1 // Explicitly wide + SBCS.W r2, r1, r2 + SBC r0, r1, r0 // Must use wide encoding as not flag-setting + SBCS r7, r7, r1 // Should use narrow + SBCS r8, r1, r8 // high registers so must use wide encoding + SBCS r8, r8, r1 + SBCS r4, r8, r4 + SBCS r3, r3, r8 + SBCS r2, r2, r1, lsl #1 // Must use wide - shifted register + SBCS r5, r1, r5, lsr #1 +// CHECK: sbcs.w r3, r2, r1 @ encoding: [0x72,0xeb,0x01,0x03] +// CHECK: sbcs r4, r1 @ encoding: [0x8c,0x41] +// CHECK: sbcs.w r1, r4, r1 @ encoding: [0x74,0xeb,0x01,0x01] +// CHECK: sbcs.w r4, r4, r1 @ encoding: [0x74,0xeb,0x01,0x04] +// CHECK: sbcs.w r2, r1, r2 @ encoding: [0x71,0xeb,0x02,0x02] +// CHECK: sbc.w r0, r1, r0 @ encoding: [0x61,0xeb,0x00,0x00] +// CHECK: sbcs r7, r1 @ encoding: [0x8f,0x41] +// CHECK: sbcs.w r8, r1, r8 @ encoding: [0x71,0xeb,0x08,0x08] +// CHECK: sbcs.w r8, r8, r1 @ encoding: [0x78,0xeb,0x01,0x08] +// CHECK: sbcs.w r4, r8, r4 @ encoding: [0x78,0xeb,0x04,0x04] +// CHECK: sbcs.w r3, r3, r8 @ encoding: [0x73,0xeb,0x08,0x03] +// CHECK: sbcs.w r2, r2, r1, lsl #1 @ encoding: [0x72,0xeb,0x41,0x02] +// CHECK: sbcs.w r5, r1, r5, lsr #1 @ encoding: [0x71,0xeb,0x55,0x05] + + IT EQ + SBCEQ r5, r2, r1 // Must be wide - 3 distinct registers + IT EQ + SBCEQ r5, r5, r1 // Should choose narrow + IT EQ + SBCEQ r1, r5, r1 // Should choose narrow + IT EQ + SBCEQ.W r5, r5, r1 // Explicitly wide + IT EQ + SBCEQ.W r0, r1, r0 + IT EQ + SBCSEQ r2, r1, r2 // Must use wide encoding as flag-setting + IT EQ + SBCEQ r7, r7, r1 // Should use narrow + IT EQ + SBCEQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + SBCEQ r8, r8, r1 + IT EQ + SBCEQ r7, r8, r7 + IT EQ + SBCEQ r7, r7, r8 + IT EQ + SBCEQ r2, r2, r1, lsl #1 // Must use wide - shifted register + IT EQ + SBCEQ r5, r1, r5, lsr #1 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r5, r2, r1 @ encoding: [0x62,0xeb,0x01,0x05] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq r5, r1 @ encoding: [0x8d,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r1, r5, r1 @ encoding: [0x65,0xeb,0x01,0x01] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r5, r5, r1 @ encoding: [0x65,0xeb,0x01,0x05] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r0, r1, r0 @ encoding: [0x61,0xeb,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbcseq.w r2, r1, r2 @ encoding: [0x71,0xeb,0x02,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq r7, r1 @ encoding: [0x8f,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r8, r1, r8 @ encoding: [0x61,0xeb,0x08,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r8, r8, r1 @ encoding: [0x68,0xeb,0x01,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r7, r8, r7 @ encoding: [0x68,0xeb,0x07,0x07] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r7, r7, r8 @ encoding: [0x67,0xeb,0x08,0x07] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r2, r2, r1, lsl #1 @ encoding: [0x62,0xeb,0x41,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: sbceq.w r5, r1, r5, lsr #1 @ encoding: [0x61,0xeb,0x55,0x05] + +// ROR + RORS r3, r2, r1 // Must be wide - 3 distinct registers + RORS r0, r0, r1 // Should choose narrow + RORS r1, r0, r1 // Should choose wide - not commutative + RORS.W r2, r2, r1 // Explicitly wide + RORS.W r2, r1, r2 + ROR r5, r1, r5 // Must use wide encoding as not flag-setting + RORS r7, r7, r1 // Should use narrow + RORS r8, r1, r8 // high registers so must use wide encoding + RORS r8, r8, r1 + RORS r6, r8, r6 + RORS r6, r6, r8 +// CHECK: rors.w r3, r2, r1 @ encoding: [0x72,0xfa,0x01,0xf3] +// CHECK: rors r0, r1 @ encoding: [0xc8,0x41] +// CHECK: rors.w r1, r0, r1 @ encoding: [0x70,0xfa,0x01,0xf1] +// CHECK: rors.w r2, r2, r1 @ encoding: [0x72,0xfa,0x01,0xf2] +// CHECK: rors.w r2, r1, r2 @ encoding: [0x71,0xfa,0x02,0xf2] +// CHECK: ror.w r5, r1, r5 @ encoding: [0x61,0xfa,0x05,0xf5] +// CHECK: rors r7, r1 @ encoding: [0xcf,0x41] +// CHECK: rors.w r8, r1, r8 @ encoding: [0x71,0xfa,0x08,0xf8] +// CHECK: rors.w r8, r8, r1 @ encoding: [0x78,0xfa,0x01,0xf8] +// CHECK: rors.w r6, r8, r6 @ encoding: [0x78,0xfa,0x06,0xf6] +// CHECK: rors.w r6, r6, r8 @ encoding: [0x76,0xfa,0x08,0xf6] + + IT EQ + ROREQ r4, r2, r1 // Must be wide - 3 distinct registers + IT EQ + ROREQ r4, r4, r1 // Should choose narrow + IT EQ + ROREQ r1, r4, r1 // Should choose wide - not commutative + IT EQ + ROREQ.W r4, r4, r1 // Explicitly wide + IT EQ + ROREQ.W r0, r1, r0 + IT EQ + RORSEQ r0, r1, r0 // Must use wide encoding as flag-setting + IT EQ + ROREQ r7, r7, r1 // Should use narrow + IT EQ + ROREQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + ROREQ r8, r8, r1 + IT EQ + ROREQ r3, r8, r3 + IT EQ + ROREQ r1, r1, r8 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r4, r2, r1 @ encoding: [0x62,0xfa,0x01,0xf4] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq r4, r1 @ encoding: [0xcc,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r1, r4, r1 @ encoding: [0x64,0xfa,0x01,0xf1] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r4, r4, r1 @ encoding: [0x64,0xfa,0x01,0xf4] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r0, r1, r0 @ encoding: [0x61,0xfa,0x00,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: rorseq.w r0, r1, r0 @ encoding: [0x71,0xfa,0x00,0xf0] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq r7, r1 @ encoding: [0xcf,0x41] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r8, r1, r8 @ encoding: [0x61,0xfa,0x08,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r8, r8, r1 @ encoding: [0x68,0xfa,0x01,0xf8] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r3, r8, r3 @ encoding: [0x68,0xfa,0x03,0xf3] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: roreq.w r1, r1, r8 @ encoding: [0x61,0xfa,0x08,0xf1] + +// TST - only two register version available +// RSB - only two register version available +// CMP - only two register version available +// CMN - only two register version available + +// ORR (commutative) + ORRS r7, r2, r1 // Must be wide - 3 distinct registers + ORRS r2, r2, r1 // Should choose narrow + ORRS r3, r1, r3 // Should choose narrow - commutative + ORRS.W r4, r4, r1 // Explicitly wide + ORRS.W r5, r1, r5 + ORR r2, r1, r2 // Must use wide encoding as not flag-setting + ORRS r7, r7, r1 // Should use narrow + ORRS r7, r1, r7 // Commutative + ORRS r8, r1, r8 // high registers so must use wide encoding + ORRS r8, r8, r1 + ORRS r1, r8, r1 + ORRS r0, r0, r8 + ORRS r1, r1, r1, lsl #1 // Must use wide - shifted register + ORRS r0, r1, r0, lsr #1 +// CHECK: orrs.w r7, r2, r1 @ encoding: [0x52,0xea,0x01,0x07] +// CHECK: orrs r2, r1 @ encoding: [0x0a,0x43] +// CHECK: orrs r3, r1 @ encoding: [0x0b,0x43] +// CHECK: orrs.w r4, r4, r1 @ encoding: [0x54,0xea,0x01,0x04] +// CHECK: orrs.w r5, r1, r5 @ encoding: [0x51,0xea,0x05,0x05] +// CHECK: orr.w r2, r1, r2 @ encoding: [0x41,0xea,0x02,0x02] +// CHECK: orrs r7, r1 @ encoding: [0x0f,0x43] +// CHECK: orrs r7, r1 @ encoding: [0x0f,0x43] +// CHECK: orrs.w r8, r1, r8 @ encoding: [0x51,0xea,0x08,0x08] +// CHECK: orrs.w r8, r8, r1 @ encoding: [0x58,0xea,0x01,0x08] +// CHECK: orrs.w r1, r8, r1 @ encoding: [0x58,0xea,0x01,0x01] +// CHECK: orrs.w r0, r0, r8 @ encoding: [0x50,0xea,0x08,0x00] +// CHECK: orrs.w r1, r1, r1, lsl #1 @ encoding: [0x51,0xea,0x41,0x01] +// CHECK: orrs.w r0, r1, r0, lsr #1 @ encoding: [0x51,0xea,0x50,0x00] + + IT EQ + ORREQ r0, r2, r1 // Must be wide - 3 distinct registers + IT EQ + ORREQ r5, r5, r1 // Should choose narrow + IT EQ + ORREQ r5, r1, r5 // Should choose narrow - commutative + IT EQ + ORREQ.W r2, r2, r1 // Explicitly wide + IT EQ + ORREQ.W r3, r1, r3 + IT EQ + ORRSEQ r4, r1, r4 // Must use wide encoding as flag-setting + IT EQ + ORREQ r7, r7, r1 // Should use narrow + IT EQ + ORREQ r7, r1, r7 // Commutative + IT EQ + ORREQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + ORREQ r8, r8, r1 + IT EQ + ORREQ r0, r8, r0 + IT EQ + ORREQ r0, r0, r8 + IT EQ + ORREQ r2, r2, r1, lsl #1 // Must use wide - shifted register + IT EQ + ORREQ r2, r1, r2, lsr #1 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r0, r2, r1 @ encoding: [0x42,0xea,0x01,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq r5, r1 @ encoding: [0x0d,0x43] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq r5, r1 @ encoding: [0x0d,0x43] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r2, r2, r1 @ encoding: [0x42,0xea,0x01,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r3, r1, r3 @ encoding: [0x41,0xea,0x03,0x03] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orrseq.w r4, r1, r4 @ encoding: [0x51,0xea,0x04,0x04] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq r7, r1 @ encoding: [0x0f,0x43] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq r7, r1 @ encoding: [0x0f,0x43] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r8, r1, r8 @ encoding: [0x41,0xea,0x08,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r8, r8, r1 @ encoding: [0x48,0xea,0x01,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r0, r8, r0 @ encoding: [0x48,0xea,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r0, r0, r8 @ encoding: [0x40,0xea,0x08,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r2, r2, r1, lsl #1 @ encoding: [0x42,0xea,0x41,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: orreq.w r2, r1, r2, lsr #1 @ encoding: [0x41,0xea,0x52,0x02] + +// MUL - not affected by this change + +// BIC + BICS r3, r2, r1 // Must be wide - 3 distinct registers + BICS r2, r2, r1 // Should choose narrow + BICS r1, r2, r1 // Should choose wide - not commutative + BICS.W r2, r2, r1 // Explicitly wide + BICS.W r0, r1, r0 + BIC r0, r1, r0 // Must use wide encoding as not flag-setting + BICS r7, r7, r1 // Should use narrow + BICS r8, r1, r8 // high registers so must use wide encoding + BICS r8, r8, r1 + BICS r7, r8, r7 + BICS r5, r5, r8 + BICS r3, r3, r1, lsl #1 // Must use wide - shifted register + BICS r4, r1, r4, lsr #1 +// CHECK: bics.w r3, r2, r1 @ encoding: [0x32,0xea,0x01,0x03] +// CHECK: bics r2, r1 @ encoding: [0x8a,0x43] +// CHECK: bics.w r1, r2, r1 @ encoding: [0x32,0xea,0x01,0x01] +// CHECK: bics.w r2, r2, r1 @ encoding: [0x32,0xea,0x01,0x02] +// CHECK: bics.w r0, r1, r0 @ encoding: [0x31,0xea,0x00,0x00] +// CHECK: bic.w r0, r1, r0 @ encoding: [0x21,0xea,0x00,0x00] +// CHECK: bics r7, r1 @ encoding: [0x8f,0x43] +// CHECK: bics.w r8, r1, r8 @ encoding: [0x31,0xea,0x08,0x08] +// CHECK: bics.w r8, r8, r1 @ encoding: [0x38,0xea,0x01,0x08] +// CHECK: bics.w r7, r8, r7 @ encoding: [0x38,0xea,0x07,0x07] +// CHECK: bics.w r5, r5, r8 @ encoding: [0x35,0xea,0x08,0x05] +// CHECK: bics.w r3, r3, r1, lsl #1 @ encoding: [0x33,0xea,0x41,0x03] +// CHECK: bics.w r4, r1, r4, lsr #1 @ encoding: [0x31,0xea,0x54,0x04] + + IT EQ + BICEQ r0, r2, r1 // Must be wide - 3 distinct registers + IT EQ + BICEQ r5, r5, r1 // Should choose narrow + IT EQ + BICEQ r1, r5, r1 // Should choose wide - not commutative + IT EQ + BICEQ.W r4, r4, r1 // Explicitly wide + IT EQ + BICEQ.W r2, r1, r2 + IT EQ + BICSEQ r5, r1, r5 // Must use wide encoding as flag-setting + IT EQ + BICEQ r7, r7, r1 // Should use narrow + IT EQ + BICEQ r8, r1, r8 // high registers so must use wide encoding + IT EQ + BICEQ r8, r8, r1 + IT EQ + BICEQ r0, r8, r0 + IT EQ + BICEQ r2, r2, r8 + IT EQ + BICEQ r4, r4, r1, lsl #1 // Must use wide - shifted register + IT EQ + BICEQ r5, r1, r5, lsr #1 +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r0, r2, r1 @ encoding: [0x22,0xea,0x01,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq r5, r1 @ encoding: [0x8d,0x43] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r1, r5, r1 @ encoding: [0x25,0xea,0x01,0x01] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r4, r4, r1 @ encoding: [0x24,0xea,0x01,0x04] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r2, r1, r2 @ encoding: [0x21,0xea,0x02,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: bicseq.w r5, r1, r5 @ encoding: [0x31,0xea,0x05,0x05] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq r7, r1 @ encoding: [0x8f,0x43] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r8, r1, r8 @ encoding: [0x21,0xea,0x08,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r8, r8, r1 @ encoding: [0x28,0xea,0x01,0x08] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r0, r8, r0 @ encoding: [0x28,0xea,0x00,0x00] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r2, r2, r8 @ encoding: [0x22,0xea,0x08,0x02] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r4, r4, r1, lsl #1 @ encoding: [0x24,0xea,0x41,0x04] +// CHECK: it eq @ encoding: [0x08,0xbf] +// CHECK: biceq.w r5, r1, r5, lsr #1 @ encoding: [0x21,0xea,0x55,0x05] + +// CMN - only two register version available From richard.barton at arm.com Mon Jul 9 11:14:50 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 9 Jul 2012 17:14:50 +0100 Subject: [llvm-commits] [PATCH] Fix assembly of narrow Thumb data processing instructions In-Reply-To: <3D6047B8-E0D7-441C-A113-6A69DF5C4219@apple.com> References: <000001cd5a14$f43d3470$dcb79d50$%barton@arm.com> <3D6047B8-E0D7-441C-A113-6A69DF5C4219@apple.com> Message-ID: <000c01cd5ded$f7a265e0$e6e731a0$@barton@arm.com> Hi Jim Thanks for the review. > This looks great, thank you. You scared me a bit in the description here w/ a > reference to validateInstruction(), but the actual patch correctly uses > processInstruction(), so all good. :) Oh dear, at least I have better patches than descriptions ;} > Very minor nit: Add periods to the ends of sentences in comments. For one > example, > + // Assemblers should use the narrow encodings of these instructions when > permissible > + // These instructions are special in that they are commutable, so shorter > encodings > + // are available more often Done. > Second, it's best not to use R0 in assembler test cases, as that's the default > value for the encoder, so we won't be able to tell the difference between a > correct encoding of the operand and the operand not being encoded explicitly > at all. There are a lot of tests already in the test suite that are horrible > examples in this regard, unfortunately. Oh dear, I have moaned about this before in the past as well, so I am a hypocrite. I have changed most of them over to use random non-r0 values. (We still want _some_ r0 values :-) I have made the suggested adjustments and committed as r159935. Thanks Rich > -----Original Message----- > From: Jim Grosbach [mailto:grosbach at apple.com] > Sent: 06 July 2012 22:47 > To: Richard Barton > Cc: llvm-commits at cs.uiuc.edu > Subject: Re: [PATCH] Fix assembly of narrow Thumb data processing instructions > > Hi Richard, > > This looks great, thank you. You scared me a bit in the description here w/ a > reference to validateInstruction(), but the actual patch correctly uses > processInstruction(), so all good. :) > > Very minor nit: Add periods to the ends of sentences in comments. For one > example, > + // Assemblers should use the narrow encodings of these instructions when > permissable > + // These instructions are special in that they are commutable, so shorter > encodings > + // are available more often > > Second, it's best not to use R0 in assembler test cases, as that's the default > value for the encoder, so we won't be able to tell the difference between a > correct encoding of the operand and the operand not being encoded explicitly > at all. There are a lot of tests already in the test suite that are horrible > examples in this regard, unfortunately. > > Good to commit w/ those fixes. > > -Jim > > On Jul 4, 2012, at 11:43 AM, Richard Barton wrote: > > > Hello Reviewers > > > > The attached patch implements correct assembly of Thumb dp-operations with > both > > three and two register syntaxes. > > > > i.e. AND, EOR, LSL, LSR, ASR, ADC, SBC, ROR, and ORR. > > > > The assembly syntax for these instructions is of the form: > > > > AND{S}{}{} {,} , {, } > > > > The optional Rd argument allows the use of the short encoding when Rd == Rn > (and > > other conditions hold.) The assembler should always prefer the short > encoding > > over the wide in these cases, unless the wide encoding is specifically > requested > > with '.W'. > > > > In addition, ADD, EOR, ADC and ORR are commutative, so the narrow encoding > is > > also available when Rd == Rm (as well as the other conditions holding.) > > > > The additional conditions are: > > - Rd, Rn and Rm are all in the range R0 - R7. > > - The instruction is flag-setting and not in an IT block, or not-flag > setting > > and in an IT block. > > - No shift is applied to the instruction. > > > > The attached patch uses the validateInstruction callback to chose the narrow > > encodings in the correct circumstances, and adds a new regression test. > > > > Please review. > > > > Regards, > > Richard Barton > > ARM Ltd, Cambridge > > > > > > From richard.barton at arm.com Mon Jul 9 11:14:28 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 09 Jul 2012 16:14:28 -0000 Subject: [llvm-commits] [llvm] r159936 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <20120709161429.1FF3A2A6C069@llvm.org> Author: rbarton Date: Mon Jul 9 11:14:28 2012 New Revision: 159936 URL: http://llvm.org/viewvc/llvm-project?rev=159936&view=rev Log: Spelling! Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=159936&r1=159935&r2=159936&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jul 9 11:14:28 2012 @@ -7294,7 +7294,7 @@ case ARM::t2RORrr: case ARM::t2BICrr: { - // Assemblers should use the narrow encodings of these instructions when permissable. + // Assemblers should use the narrow encodings of these instructions when permissible. if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && @@ -7330,7 +7330,7 @@ case ARM::t2ADCrr: case ARM::t2ORRrr: { - // Assemblers should use the narrow encodings of these instructions when permissable. + // Assemblers should use the narrow encodings of these instructions when permissible. // These instructions are special in that they are commutable, so shorter encodings // are available more often. if ((isARMLowRegister(Inst.getOperand(1).getReg()) && From clattner at apple.com Mon Jul 9 11:20:16 2012 From: clattner at apple.com (Chris Lattner) Date: Mon, 09 Jul 2012 09:20:16 -0700 Subject: [llvm-commits] [llvm] r159876 - in /llvm/trunk: lib/Transforms/InstCombine/InstructionCombining.cpp test/Transforms/InstCombine/badmalloc.ll test/Transforms/InstCombine/invoke.ll test/Transforms/InstCombine/malloc-free-delete.ll test/Transforms/InstCom In-Reply-To: <4FFB0010.9020303@free.fr> References: <20120706230925.C3A562A6C069@llvm.org> <4FF7E2A7.9030704@free.fr> <65B0029D4BCA46AABF0F161BEA01987D@PC07655> <4FFB0010.9020303@free.fr> Message-ID: On Jul 9, 2012, at 9:00 AM, Duncan Sands wrote: > Hi Nuno, > > On 08/07/12 22:01, Nuno Lopes wrote: >>>> --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original) >>>> +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Fri Jul 6 >>>> 18:09:25 2012 >>>> @@ -1137,12 +1137,29 @@ >>>> } >>>> } >>>> if (IntrinsicInst *II = dyn_cast(U)) { >>>> - if (II->getIntrinsicID() == Intrinsic::lifetime_start || >>>> - II->getIntrinsicID() == Intrinsic::lifetime_end) { >>>> + switch (II->getIntrinsicID()) { >>>> + default: return false; >>>> + case Intrinsic::memmove: >>>> + case Intrinsic::memcpy: >>>> + case Intrinsic::memset: { >>>> + MemIntrinsic *MI = cast(II); >>>> + if (MI->isVolatile() || MI->getRawDest() != V) >>> >>> why exclude volatile stores? If all that is being done to the allocated memory >>> is doing a bunch of volatile stores to it, I don't see why you can't discard >>> them and the allocation too. >> >> I thought about that, but.. I guess it's better not to mess around with >> volatiles. And I don't even think it's legal (C standard wise) to remove it. > > we already remove volatile stores to alloca's, so why not to heap allocations > too? Where do we do that? I thought we stopped that a long time ago, due to setjmp/longjmp issues. -Chris From baldrick at free.fr Mon Jul 9 11:28:25 2012 From: baldrick at free.fr (Duncan Sands) Date: Mon, 09 Jul 2012 18:28:25 +0200 Subject: [llvm-commits] [llvm] r159876 - in /llvm/trunk: lib/Transforms/InstCombine/InstructionCombining.cpp test/Transforms/InstCombine/badmalloc.ll test/Transforms/InstCombine/invoke.ll test/Transforms/InstCombine/malloc-free-delete.ll test/Transforms/InstCom In-Reply-To: References: <20120706230925.C3A562A6C069@llvm.org> <4FF7E2A7.9030704@free.fr> <65B0029D4BCA46AABF0F161BEA01987D@PC07655> <4FFB0010.9020303@free.fr> Message-ID: <4FFB06A9.2090400@free.fr> Hi Chris, >> we already remove volatile stores to alloca's, so why not to heap allocations >> too? > > Where do we do that? I thought we stopped that a long time ago, due to setjmp/longjmp issues. you are right, the optimizers don't remove volatile stores to alloca's any more. Sorry for the noise. Ciao, Duncan. From richard.barton at arm.com Mon Jul 9 11:31:16 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 09 Jul 2012 16:31:16 -0000 Subject: [llvm-commits] [llvm] r159937 - in /llvm/trunk: lib/Target/ARM/AsmParser/ARMAsmParser.cpp test/MC/ARM/basic-arm-instructions.s Message-ID: <20120709163116.5F6A32A6C069@llvm.org> Author: rbarton Date: Mon Jul 9 11:31:14 2012 New Revision: 159937 URL: http://llvm.org/viewvc/llvm-project?rev=159937&view=rev Log: Prevent ARM assembler from losing a right shift by #32 applied to a register Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp llvm/trunk/test/MC/ARM/basic-arm-instructions.s Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=159937&r1=159936&r2=159937&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jul 9 11:31:14 2012 @@ -7247,7 +7247,9 @@ case ARM::ADDrsi: newOpc = ARM::ADDrr; break; } // If the shift is by zero, use the non-shifted instruction definition. - if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) { + // The exception is for right shifts, where 0 == 32 + if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0 && + !(SOpc == ARM_AM::lsr || SOpc == ARM_AM::asr)) { MCInst TmpInst; TmpInst.setOpcode(newOpc); TmpInst.addOperand(Inst.getOperand(0)); Modified: llvm/trunk/test/MC/ARM/basic-arm-instructions.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/basic-arm-instructions.s?rev=159937&r1=159936&r2=159937&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/basic-arm-instructions.s (original) +++ llvm/trunk/test/MC/ARM/basic-arm-instructions.s Mon Jul 9 11:31:14 2012 @@ -206,6 +206,11 @@ @ CHECK: sub r0, r0, #4 @ encoding: [0x04,0x00,0x40,0xe2] @ CHECK: sub r4, r5, #21 @ encoding: [0x15,0x40,0x45,0xe2] + @ Test right shift by 32, which is encoded as 0 + add r3, r1, r2, lsr #32 + add r3, r1, r2, asr #32 +@ CHECK: add r3, r1, r2, lsr #32 @ encoding: [0x22,0x30,0x81,0xe0] +@ CHECK: add r3, r1, r2, asr #32 @ encoding: [0x42,0x30,0x81,0xe0] @------------------------------------------------------------------------------ @ AND @@ -265,6 +270,12 @@ @ CHECK: and r6, r6, r7, ror r2 @ encoding: [0x77,0x62,0x06,0xe0] @ CHECK: and r10, r10, r1, rrx @ encoding: [0x61,0xa0,0x0a,0xe0] + @ Test right shift by 32, which is encoded as 0 + and r3, r1, r2, lsr #32 + and r3, r1, r2, asr #32 +@ CHECK: and r3, r1, r2, lsr #32 @ encoding: [0x22,0x30,0x01,0xe0] +@ CHECK: and r3, r1, r2, asr #32 @ encoding: [0x42,0x30,0x01,0xe0] + @------------------------------------------------------------------------------ @ ASR @------------------------------------------------------------------------------ @@ -368,6 +379,12 @@ @ CHECK: bic r6, r6, r7, ror r2 @ encoding: [0x77,0x62,0xc6,0xe1] @ CHECK: bic r10, r10, r1, rrx @ encoding: [0x61,0xa0,0xca,0xe1] + @ Test right shift by 32, which is encoded as 0 + bic r3, r1, r2, lsr #32 + bic r3, r1, r2, asr #32 +@ CHECK: bic r3, r1, r2, lsr #32 @ encoding: [0x22,0x30,0xc1,0xe1] +@ CHECK: bic r3, r1, r2, asr #32 @ encoding: [0x42,0x30,0xc1,0xe1] + @------------------------------------------------------------------------------ @ BKPT @------------------------------------------------------------------------------ @@ -664,6 +681,11 @@ @ CHECK: eor r6, r6, r7, ror r9 @ encoding: [0x77,0x69,0x26,0xe0] @ CHECK: eor r4, r4, r5, rrx @ encoding: [0x65,0x40,0x24,0xe0] + @ Test right shift by 32, which is encoded as 0 + eor r3, r1, r2, lsr #32 + eor r3, r1, r2, asr #32 +@ CHECK: eor r3, r1, r2, lsr #32 @ encoding: [0x22,0x30,0x21,0xe0] +@ CHECK: eor r3, r1, r2, asr #32 @ encoding: [0x42,0x30,0x21,0xe0] @------------------------------------------------------------------------------ @ ISB @@ -1211,6 +1233,12 @@ @ CHECK: orrslt r6, r6, r7, ror r9 @ encoding: [0x77,0x69,0x96,0xb1] @ CHECK: orrsgt r4, r4, r5, rrx @ encoding: [0x65,0x40,0x94,0xc1] + @ Test right shift by 32, which is encoded as 0 + orr r3, r1, r2, lsr #32 + orr r3, r1, r2, asr #32 +@ CHECK: orr r3, r1, r2, lsr #32 @ encoding: [0x22,0x30,0x81,0xe1] +@ CHECK: orr r3, r1, r2, asr #32 @ encoding: [0x42,0x30,0x81,0xe1] + @------------------------------------------------------------------------------ @ PKH @------------------------------------------------------------------------------ @@ -2216,6 +2244,11 @@ @ CHECK: sub r6, r6, r7, asr r9 @ encoding: [0x57,0x69,0x46,0xe0] @ CHECK: sub r6, r6, r7, ror r9 @ encoding: [0x77,0x69,0x46,0xe0] + @ Test right shift by 32, which is encoded as 0 + sub r3, r1, r2, lsr #32 + sub r3, r1, r2, asr #32 +@ CHECK: sub r3, r1, r2, lsr #32 @ encoding: [0x22,0x30,0x41,0xe0] +@ CHECK: sub r3, r1, r2, asr #32 @ encoding: [0x42,0x30,0x41,0xe0] @------------------------------------------------------------------------------ @ SVC From richard.barton at arm.com Mon Jul 9 11:41:34 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 09 Jul 2012 16:41:34 -0000 Subject: [llvm-commits] [llvm] r159938 - in /llvm/trunk: lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp test/MC/ARM/simple-fp-encoding.s Message-ID: <20120709164134.517C52A6C069@llvm.org> Author: rbarton Date: Mon Jul 9 11:41:33 2012 New Revision: 159938 URL: http://llvm.org/viewvc/llvm-project?rev=159938&view=rev Log: Fix instruction description of VMOV (between two ARM core registers and two single-precision resiters) Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp llvm/trunk/test/MC/ARM/simple-fp-encoding.s Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=159938&r1=159937&r2=159938&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Mon Jul 9 11:41:33 2012 @@ -567,8 +567,8 @@ bits<4> Rt2; // Encode instruction operands. - let Inst{3-0} = src1{3-0}; - let Inst{5} = src1{4}; + let Inst{3-0} = src1{4-1}; + let Inst{5} = src1{0}; let Inst{15-12} = Rt; let Inst{19-16} = Rt2; @@ -617,8 +617,8 @@ bits<4> src2; // Encode instruction operands. - let Inst{3-0} = dst1{3-0}; - let Inst{5} = dst1{4}; + let Inst{3-0} = dst1{4-1}; + let Inst{5} = dst1{0}; let Inst{15-12} = src1; let Inst{19-16} = src2; Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=159938&r1=159937&r2=159938&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original) +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Jul 9 11:41:33 2012 @@ -4198,9 +4198,9 @@ DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4224,9 +4224,9 @@ DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; Modified: llvm/trunk/test/MC/ARM/simple-fp-encoding.s URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/simple-fp-encoding.s?rev=159938&r1=159937&r2=159938&view=diff ============================================================================== --- llvm/trunk/test/MC/ARM/simple-fp-encoding.s (original) +++ llvm/trunk/test/MC/ARM/simple-fp-encoding.s Mon Jul 9 11:41:33 2012 @@ -196,6 +196,27 @@ @ CHECK: vmov r0, r1, d16 @ encoding: [0x30,0x0b,0x51,0xec] vmov r0, r1, d16 +@ Between two single precision registers and two core registers + vmov s3, s4, r1, r2 + vmov s2, s3, r1, r2 + vmov r1, r2, s3, s4 + vmov r1, r2, s2, s3 +@ CHECK: vmov s3, s4, r1, r2 @ encoding: [0x31,0x1a,0x42,0xec] +@ CHECK: vmov s2, s3, r1, r2 @ encoding: [0x11,0x1a,0x42,0xec] +@ CHECK: vmov r1, r2, s3, s4 @ encoding: [0x31,0x1a,0x52,0xec] +@ CHECK: vmov r1, r2, s2, s3 @ encoding: [0x11,0x1a,0x52,0xec] + +@ Between one double precision register and two core registers + vmov d15, r1, r2 + vmov d16, r1, r2 + vmov r1, r2, d15 + vmov r1, r2, d16 +@ CHECK: vmov d15, r1, r2 @ encoding: [0x1f,0x1b,0x42,0xec] +@ CHECK: vmov d16, r1, r2 @ encoding: [0x30,0x1b,0x42,0xec] +@ CHECK: vmov r1, r2, d15 @ encoding: [0x1f,0x1b,0x52,0xec] +@ CHECK: vmov r1, r2, d16 @ encoding: [0x30,0x1b,0x52,0xec] + + @ CHECK: vldr d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] @ CHECK: vldr s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] @ CHECK: vldr d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] From stpworld at narod.ru Mon Jul 9 11:54:17 2012 From: stpworld at narod.ru (Stepan Dyatkovskiy) Date: Mon, 09 Jul 2012 20:54:17 +0400 Subject: [llvm-commits] Please review the patch for IntegersSubsetMapping In-Reply-To: <4FF9865B.20904@narod.ru> References: <4FF3DF6F.9080709@narod.ru> <1B54898A-47F2-46F6-A7ED-A7935E5B70A0@gmail.com> <4FF9865B.20904@narod.ru> Message-ID: <4FFB0CB9.9020701@narod.ru> Stepan Dyatkovskiy wrote: > SimplifyCFG pass will optimize two these folded switches to the single one: > > switch (c) { > case '0'..'1': // BITS_SPECIFIC_SUBSET > tryProcessBit(c); > break; > case '2'..'9': // DECS_SPECIFIC_SUBSET > tryProcessDecs(c); > break; > case 'A'..'F': // HEXS_SPECIFIC_SUBSET > tryProcessHexs(c); > break; > default: > processSomethingElse(); > } Forgot about '\\' character. Fixed simplified switch is below: switch (c) { case '0'..'1': // BITS_SPECIFIC_SUBSET tryProcessBit(c); break; case '2'..'9': // DECS_SPECIFIC_SUBSET tryProcessDecs(c); break; case 'A'..'F': // HEXS_SPECIFIC_SUBSET tryProcessHexs(c); break; case '\\": processEscape(); break; default: processSomethingElse(); } -Stepan. From mren at apple.com Mon Jul 9 12:52:52 2012 From: mren at apple.com (Manman Ren) Date: Mon, 09 Jul 2012 10:52:52 -0700 Subject: [llvm-commits] [llvm] r159838 - in /llvm/trunk: lib/Target/X86/X86InstrArithmetic.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.h test/CodeGen/X86/jump_sign.ll In-Reply-To: <208901AC-ACB9-4308-A8ED-607B13230374@apple.com> References: <20120706173621.477F82A6C069@llvm.org> <208901AC-ACB9-4308-A8ED-607B13230374@apple.com> Message-ID: <96AFDA67-5828-44AD-B26E-A0FA27CC7A14@apple.com> I am working on this. Just wondering whether we should implement the decoding part (analysis) in .td file. For example, add a 5-bit condition code for X86 instructions: // To avoid decode the condition code from op code. bits<5> CondCode = 31; let CondCode = 13 in defm CMOVO : CMOV<0x40, "cmovo" , X86_COND_O>; We can add isCMov as well to differentiate CMOV from SET. We will still implement the "synthesis" part in X86InstrInfo.cpp. Let me know, Thanks, Manman On Jul 6, 2012, at 2:24 PM, Manman Ren wrote: > > I thought about that as well when looking at your change r159695. > I was a little worried about the performance, since I need to go through the switch twice, once to analyze the condition code, the other time to synthesize the opcode. > But the interface will be general enough so we can all use it. > Will work on it. > > Thanks, > Manman > > On Jul 6, 2012, at 2:14 PM, Jakob Stoklund Olesen wrote: > >> >> On Jul 6, 2012, at 10:36 AM, Manman Ren wrote: >> >>> Author: mren >>> Date: Fri Jul 6 12:36:20 2012 >>> New Revision: 159838 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=159838&view=rev >>> Log: >>> X86: peephole optimization to remove cmp instruction >>> >>> For each Cmp, we check whether there is an earlier Sub which make Cmp >>> redundant. We handle the case where SUB operates on the same source operands as >>> Cmp, including the case where the two source operands are swapped. >>> >> >> Manman, I think these opcode tables are growing too big. We can do better by using analysis and synthesis functions. >> >> Analysis: Given an opcode, compute (stem, cond-code, regbytes), where stem is (SET, JUMP, CMOVrr, CMOVrm), cond-code is one of the X86::CondCode enumerators, and regbytes is 2, 4, or 8 (only for CMOV). >> >> Synthesis: Compute an opcode from (stem, cond-code, regwidth). Return 0 if no such opcode exists. >> >> This would simplify your code a lot because you can go: Analyze - swap condcode - synthesize. The giant switch in commuteInstruction() would also go away. >> >> We already have some of this functionality in place, see GetCondFromBranchOpc, GetCondBranchFromCond, and getCMovFromCond which I just added. >> >> /jakob >> > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From mcrosier at apple.com Mon Jul 9 12:56:46 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 09 Jul 2012 10:56:46 -0700 Subject: [llvm-commits] [PATCH] Support for dynamic stack realignment + VLAs for x86 In-Reply-To: <9ADA0350-3D7B-4EFC-B8F4-9D6F7F56DC34@apple.com> References: <9ADA0350-3D7B-4EFC-B8F4-9D6F7F56DC34@apple.com> Message-ID: <65FAF44A-315A-46D0-B7E4-D83283F4B1C1@apple.com> Ping. -------------- next part -------------- A non-text attachment was scrubbed... Name: dynamic_stack_realign_VLAs_x86.patch Type: application/octet-stream Size: 16231 bytes Desc: not available Url : http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120709/2ef0a03a/attachment.obj -------------- next part -------------- On Jul 6, 2012, at 9:31 AM, Chad Rosier wrote: > All, > Attached is a revised patch for adding support for dynamic stack realignment in the presence of VLAs on x86. It addresses Chandler's concern (i.e., incorrect restoring of the stack pointer from the base pointer) as well as another bug that was discovered while testing (i.e., see comments on obscure tail call with byval case). To test the patch I hard coded MachineFrameInfo to always return true for hasVarSizedObjects() and enabled the force-stack-align flag by default. All benchmarks in the test suite, including the external tests, pass. Please take a look.. > > Chad > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits From mcrosier at apple.com Mon Jul 9 13:01:47 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 09 Jul 2012 11:01:47 -0700 Subject: [llvm-commits] [llvm] r159938 - in /llvm/trunk: lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp test/MC/ARM/simple-fp-encoding.s In-Reply-To: <20120709164134.517C52A6C069@llvm.org> References: <20120709164134.517C52A6C069@llvm.org> Message-ID: <67A813F8-DAF5-4267-A140-41A23DF1F2C3@apple.com> Richard, This appears to be causing failures on our internal builders with the following warnings: ******************** TEST 'LLVM :: MC/Disassembler/ARM/neon.txt' FAILED ********************Script: -- 0xa4 0x0d 0xa3 0xf4 ^ llvm/test/MC/Disassembler/ARM/neon.txt:1898:10: error: expected string not found in input # CHECK: vmovvs r2, lr, s29, s30 ^ :897:2: note: scanning from here stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ ^ :897:11: note: possible intended match here stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ ^ -- ******************** Chad On Jul 9, 2012, at 9:41 AM, Richard Barton wrote: > Author: rbarton > Date: Mon Jul 9 11:41:33 2012 > New Revision: 159938 > > URL: http://llvm.org/viewvc/llvm-project?rev=159938&view=rev > Log: > Fix instruction description of VMOV (between two ARM core registers and two single-precision resiters) > > Modified: > llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp > llvm/trunk/test/MC/ARM/simple-fp-encoding.s > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=159938&r1=159937&r2=159938&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Mon Jul 9 11:41:33 2012 > @@ -567,8 +567,8 @@ > bits<4> Rt2; > > // Encode instruction operands. > - let Inst{3-0} = src1{3-0}; > - let Inst{5} = src1{4}; > + let Inst{3-0} = src1{4-1}; > + let Inst{5} = src1{0}; > let Inst{15-12} = Rt; > let Inst{19-16} = Rt2; > > @@ -617,8 +617,8 @@ > bits<4> src2; > > // Encode instruction operands. > - let Inst{3-0} = dst1{3-0}; > - let Inst{5} = dst1{4}; > + let Inst{3-0} = dst1{4-1}; > + let Inst{5} = dst1{0}; > let Inst{15-12} = src1; > let Inst{19-16} = src2; > > > Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=159938&r1=159937&r2=159938&view=diff > ============================================================================== > --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original) > +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Jul 9 11:41:33 2012 > @@ -4198,9 +4198,9 @@ > DecodeStatus S = MCDisassembler::Success; > unsigned Rt = fieldFromInstruction32(Insn, 12, 4); > unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); > - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); > + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); > unsigned pred = fieldFromInstruction32(Insn, 28, 4); > - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; > + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; > > if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) > S = MCDisassembler::SoftFail; > @@ -4224,9 +4224,9 @@ > DecodeStatus S = MCDisassembler::Success; > unsigned Rt = fieldFromInstruction32(Insn, 12, 4); > unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); > - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); > + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); > unsigned pred = fieldFromInstruction32(Insn, 28, 4); > - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; > + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; > > if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) > S = MCDisassembler::SoftFail; > > Modified: llvm/trunk/test/MC/ARM/simple-fp-encoding.s > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/simple-fp-encoding.s?rev=159938&r1=159937&r2=159938&view=diff > ============================================================================== > --- llvm/trunk/test/MC/ARM/simple-fp-encoding.s (original) > +++ llvm/trunk/test/MC/ARM/simple-fp-encoding.s Mon Jul 9 11:41:33 2012 > @@ -196,6 +196,27 @@ > @ CHECK: vmov r0, r1, d16 @ encoding: [0x30,0x0b,0x51,0xec] > vmov r0, r1, d16 > > +@ Between two single precision registers and two core registers > + vmov s3, s4, r1, r2 > + vmov s2, s3, r1, r2 > + vmov r1, r2, s3, s4 > + vmov r1, r2, s2, s3 > +@ CHECK: vmov s3, s4, r1, r2 @ encoding: [0x31,0x1a,0x42,0xec] > +@ CHECK: vmov s2, s3, r1, r2 @ encoding: [0x11,0x1a,0x42,0xec] > +@ CHECK: vmov r1, r2, s3, s4 @ encoding: [0x31,0x1a,0x52,0xec] > +@ CHECK: vmov r1, r2, s2, s3 @ encoding: [0x11,0x1a,0x52,0xec] > + > +@ Between one double precision register and two core registers > + vmov d15, r1, r2 > + vmov d16, r1, r2 > + vmov r1, r2, d15 > + vmov r1, r2, d16 > +@ CHECK: vmov d15, r1, r2 @ encoding: [0x1f,0x1b,0x42,0xec] > +@ CHECK: vmov d16, r1, r2 @ encoding: [0x30,0x1b,0x42,0xec] > +@ CHECK: vmov r1, r2, d15 @ encoding: [0x1f,0x1b,0x52,0xec] > +@ CHECK: vmov r1, r2, d16 @ encoding: [0x30,0x1b,0x52,0xec] > + > + > @ CHECK: vldr d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] > @ CHECK: vldr s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] > @ CHECK: vldr d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120709/4e673532/attachment.html From gkistanova at gmail.com Mon Jul 9 13:05:41 2012 From: gkistanova at gmail.com (Galina Kistanova) Date: Mon, 09 Jul 2012 18:05:41 -0000 Subject: [llvm-commits] [zorg] r159944 - in /zorg/trunk/buildbot/osuosl/master/config: builders.py slaves.py Message-ID: <20120709180541.D9BEF2A6C069@llvm.org> Author: gkistanova Date: Mon Jul 9 13:05:41 2012 New Revision: 159944 URL: http://llvm.org/viewvc/llvm-project?rev=159944&view=rev Log: Patch by Mikael Lyngvig! Remove builder clang-native-mingw64-win7 as requested by owner. Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py zorg/trunk/buildbot/osuosl/master/config/slaves.py Modified: zorg/trunk/buildbot/osuosl/master/config/builders.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/buildbot/osuosl/master/config/builders.py?rev=159944&r1=159943&r2=159944&view=diff ============================================================================== --- zorg/trunk/buildbot/osuosl/master/config/builders.py (original) +++ zorg/trunk/buildbot/osuosl/master/config/builders.py Mon Jul 9 13:05:41 2012 @@ -193,11 +193,6 @@ stage1_config='Release+Asserts', stage2_config='Release+Asserts')}, - {'name' : "clang-native-mingw64-win7", - 'slavenames': ["milyng1"], - 'builddir' : "clang-native-mingw64-win7", - 'factory' : ClangBuilder.getClangMinGWBuildFactory(jobs=6)}, - # Clang cross builders. {'name' : "clang-x86_64-darwin11-cross-mingw32", 'slavenames' :["as-bldslv11"], Modified: zorg/trunk/buildbot/osuosl/master/config/slaves.py URL: http://llvm.org/viewvc/llvm-project/zorg/trunk/buildbot/osuosl/master/config/slaves.py?rev=159944&r1=159943&r2=159944&view=diff ============================================================================== --- zorg/trunk/buildbot/osuosl/master/config/slaves.py (original) +++ zorg/trunk/buildbot/osuosl/master/config/slaves.py Mon Jul 9 13:05:41 2012 @@ -146,9 +146,6 @@ # 2005 PowerPC Mac Mini, Mac OS X 10.5 create_slave("arxan_bellini", properties={'jobs': 2}, max_builds=1), - # Intel(R) Core(TM) 2 Duo E8400 3.0 GHz, 8 GB RAM, Windows 7 64, Mingw64 - create_slave("milyng1", properties={ 'jobs': 2}, max_builds=1), - # Defunct. # Pentium Dual CPU T3400 @ 2.1GHz #create_slave("dumitrescu1", properties={'jobs' : 2}, max_builds=1), From richard.barton at arm.com Mon Jul 9 13:20:02 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 09 Jul 2012 18:20:02 -0000 Subject: [llvm-commits] [llvm] r159945 - /llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Message-ID: <20120709182002.7D2832A6C069@llvm.org> Author: rbarton Date: Mon Jul 9 13:20:02 2012 New Revision: 159945 URL: http://llvm.org/viewvc/llvm-project?rev=159945&view=rev Log: Oops - correct broken disassembly for VMOV Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=159945&r1=159944&r2=159945&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original) +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Jul 9 13:20:02 2012 @@ -4226,7 +4226,7 @@ unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); unsigned Rm = fieldFromInstruction32(Insn, 5, 1); unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; + Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; From richard.barton at arm.com Mon Jul 9 13:22:01 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 9 Jul 2012 19:22:01 +0100 Subject: [llvm-commits] [llvm] r159938 - in /llvm/trunk: lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp test/MC/ARM/simple-fp-encoding.s In-Reply-To: <67A813F8-DAF5-4267-A140-41A23DF1F2C3@apple.com> References: <20120709164134.517C52A6C069@llvm.org> <67A813F8-DAF5-4267-A140-41A23DF1F2C3@apple.com> Message-ID: <000d01cd5dff$bcb98190$362c84b0$@barton@arm.com> Hi Chad My change completely broke disassembling the VMOV Rt, Rt2, Sm, Sm+1 instruction. I must not have run the full test suite over it - must do better! Have committed a fix: r159945. Sorry for the breakage. Rich > -----Original Message----- > From: Chad Rosier [mailto:mcrosier at apple.com] > Sent: 09 July 2012 19:02 > To: Richard Barton > Cc: llvm-commits at cs.uiuc.edu > Subject: Re: [llvm-commits] [llvm] r159938 - in /llvm/trunk: > lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp > test/MC/ARM/simple-fp-encoding.s > > Richard, > This appears to be causing failures on our internal builders with the > following warnings: > > ******************** TEST 'LLVM :: MC/Disassembler/ARM/neon.txt' FAILED > ********************Script: > -- > 0xa4 0x0d 0xa3 0xf4 > ^ > llvm/test/MC/Disassembler/ARM/neon.txt:1898:10: error: expected string not > found in input > # CHECK: vmovvs r2, lr, s29, s30 > ^ > :897:2: note: scanning from here > stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ > ^ > :897:11: note: possible intended match here > stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ > ^ > -- > > ******************** > > Chad > > On Jul 9, 2012, at 9:41 AM, Richard Barton wrote: > > > Author: rbarton > Date: Mon Jul 9 11:41:33 2012 > New Revision: 159938 > > URL: http://llvm.org/viewvc/llvm-project?rev=159938&view=rev > Log: > Fix instruction description of VMOV (between two ARM core registers and > two single-precision resiters) > > Modified: > llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp > llvm/trunk/test/MC/ARM/simple-fp-encoding.s > > Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=159938&r1=159937&r2=15993 > 8&view=diff > ======================================================================== > ====== > --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) > +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Mon Jul 9 11:41:33 2012 > @@ -567,8 +567,8 @@ > bits<4> Rt2; > > // Encode instruction operands. > - let Inst{3-0} = src1{3-0}; > - let Inst{5} = src1{4}; > + let Inst{3-0} = src1{4-1}; > + let Inst{5} = src1{0}; > let Inst{15-12} = Rt; > let Inst{19-16} = Rt2; > > @@ -617,8 +617,8 @@ > bits<4> src2; > > // Encode instruction operands. > - let Inst{3-0} = dst1{3-0}; > - let Inst{5} = dst1{4}; > + let Inst{3-0} = dst1{4-1}; > + let Inst{5} = dst1{0}; > let Inst{15-12} = src1; > let Inst{19-16} = src2; > > > Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp > URL: http://llvm.org/viewvc/llvm- > project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=159938& > r1=159937&r2=159938&view=diff > ======================================================================== > ====== > --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp > (original) > +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Jul > 9 11:41:33 2012 > @@ -4198,9 +4198,9 @@ > DecodeStatus S = MCDisassembler::Success; > unsigned Rt = fieldFromInstruction32(Insn, 12, 4); > unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); > - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); > + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); > unsigned pred = fieldFromInstruction32(Insn, 28, 4); > - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; > + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; > > if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) > S = MCDisassembler::SoftFail; > @@ -4224,9 +4224,9 @@ > DecodeStatus S = MCDisassembler::Success; > unsigned Rt = fieldFromInstruction32(Insn, 12, 4); > unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); > - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); > + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); > unsigned pred = fieldFromInstruction32(Insn, 28, 4); > - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; > + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; > > if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) > S = MCDisassembler::SoftFail; > > Modified: llvm/trunk/test/MC/ARM/simple-fp-encoding.s > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/simple- > fp-encoding.s?rev=159938&r1=159937&r2=159938&view=diff > ======================================================================== > ====== > --- llvm/trunk/test/MC/ARM/simple-fp-encoding.s (original) > +++ llvm/trunk/test/MC/ARM/simple-fp-encoding.s Mon Jul 9 11:41:33 2012 > @@ -196,6 +196,27 @@ > @ CHECK: vmov r0, r1, d16 @ encoding: [0x30,0x0b,0x51,0xec] > vmov r0, r1, d16 > > +@ Between two single precision registers and two core registers > + vmov s3, s4, r1, r2 > + vmov s2, s3, r1, r2 > + vmov r1, r2, s3, s4 > + vmov r1, r2, s2, s3 > +@ CHECK: vmov s3, s4, r1, r2 @ encoding: [0x31,0x1a,0x42,0xec] > +@ CHECK: vmov s2, s3, r1, r2 @ encoding: [0x11,0x1a,0x42,0xec] > +@ CHECK: vmov r1, r2, s3, s4 @ encoding: [0x31,0x1a,0x52,0xec] > +@ CHECK: vmov r1, r2, s2, s3 @ encoding: [0x11,0x1a,0x52,0xec] > + > +@ Between one double precision register and two core registers > + vmov d15, r1, r2 > + vmov d16, r1, r2 > + vmov r1, r2, d15 > + vmov r1, r2, d16 > +@ CHECK: vmov d15, r1, r2 @ encoding: [0x1f,0x1b,0x42,0xec] > +@ CHECK: vmov d16, r1, r2 @ encoding: [0x30,0x1b,0x42,0xec] > +@ CHECK: vmov r1, r2, d15 @ encoding: [0x1f,0x1b,0x52,0xec] > +@ CHECK: vmov r1, r2, d16 @ encoding: [0x30,0x1b,0x52,0xec] > + > + > @ CHECK: vldr d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] > @ CHECK: vldr s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] > @ CHECK: vldr d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] > > > _______________________________________________ > llvm-commits mailing list > llvm-commits at cs.uiuc.edu > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits > > From mcrosier at apple.com Mon Jul 9 13:23:15 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 09 Jul 2012 11:23:15 -0700 Subject: [llvm-commits] [llvm] r159938 - in /llvm/trunk: lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp test/MC/ARM/simple-fp-encoding.s In-Reply-To: <000d01cd5dff$bcb98190$362c84b0$%barton@arm.com> References: <20120709164134.517C52A6C069@llvm.org> <67A813F8-DAF5-4267-A140-41A23DF1F2C3@apple.com> <000d01cd5dff$bcb98190$362c84b0$%barton@arm.com> Message-ID: <37FA420D-B74C-4A7F-8FA7-BFD20F7E7858@apple.com> On Jul 9, 2012, at 11:22 AM, Richard Barton wrote: > Hi Chad > > My change completely broke disassembling the VMOV Rt, Rt2, Sm, Sm+1 instruction. > I must not have run the full test suite over it - must do better! > > Have committed a fix: r159945. Sorry for the breakage. Thanks for the quick fix! Chad > Rich > >> -----Original Message----- >> From: Chad Rosier [mailto:mcrosier at apple.com] >> Sent: 09 July 2012 19:02 >> To: Richard Barton >> Cc: llvm-commits at cs.uiuc.edu >> Subject: Re: [llvm-commits] [llvm] r159938 - in /llvm/trunk: >> lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp >> test/MC/ARM/simple-fp-encoding.s >> >> Richard, >> This appears to be causing failures on our internal builders with the >> following warnings: >> >> ******************** TEST 'LLVM :: MC/Disassembler/ARM/neon.txt' FAILED >> ********************Script: >> -- >> 0xa4 0x0d 0xa3 0xf4 >> ^ >> llvm/test/MC/Disassembler/ARM/neon.txt:1898:10: error: expected string not >> found in input >> # CHECK: vmovvs r2, lr, s29, s30 >> ^ >> :897:2: note: scanning from here >> stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ >> ^ >> :897:11: note: possible intended match here >> stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ >> ^ >> -- >> >> ******************** >> >> Chad >> >> On Jul 9, 2012, at 9:41 AM, Richard Barton wrote: >> >> >> Author: rbarton >> Date: Mon Jul 9 11:41:33 2012 >> New Revision: 159938 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=159938&view=rev >> Log: >> Fix instruction description of VMOV (between two ARM core registers and >> two single-precision resiters) >> >> Modified: >> llvm/trunk/lib/Target/ARM/ARMInstrVFP.td >> llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp >> llvm/trunk/test/MC/ARM/simple-fp-encoding.s >> >> Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td >> URL: http://llvm.org/viewvc/llvm- >> project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=159938&r1=159937&r2=15993 >> 8&view=diff >> ======================================================================== >> ====== >> --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) >> +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Mon Jul 9 11:41:33 2012 >> @@ -567,8 +567,8 @@ >> bits<4> Rt2; >> >> // Encode instruction operands. >> - let Inst{3-0} = src1{3-0}; >> - let Inst{5} = src1{4}; >> + let Inst{3-0} = src1{4-1}; >> + let Inst{5} = src1{0}; >> let Inst{15-12} = Rt; >> let Inst{19-16} = Rt2; >> >> @@ -617,8 +617,8 @@ >> bits<4> src2; >> >> // Encode instruction operands. >> - let Inst{3-0} = dst1{3-0}; >> - let Inst{5} = dst1{4}; >> + let Inst{3-0} = dst1{4-1}; >> + let Inst{5} = dst1{0}; >> let Inst{15-12} = src1; >> let Inst{19-16} = src2; >> >> >> Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp >> URL: http://llvm.org/viewvc/llvm- >> project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=159938& >> r1=159937&r2=159938&view=diff >> ======================================================================== >> ====== >> --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp >> (original) >> +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Jul >> 9 11:41:33 2012 >> @@ -4198,9 +4198,9 @@ >> DecodeStatus S = MCDisassembler::Success; >> unsigned Rt = fieldFromInstruction32(Insn, 12, 4); >> unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); >> - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); >> + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); >> unsigned pred = fieldFromInstruction32(Insn, 28, 4); >> - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; >> + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; >> >> if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) >> S = MCDisassembler::SoftFail; >> @@ -4224,9 +4224,9 @@ >> DecodeStatus S = MCDisassembler::Success; >> unsigned Rt = fieldFromInstruction32(Insn, 12, 4); >> unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); >> - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); >> + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); >> unsigned pred = fieldFromInstruction32(Insn, 28, 4); >> - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; >> + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; >> >> if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) >> S = MCDisassembler::SoftFail; >> >> Modified: llvm/trunk/test/MC/ARM/simple-fp-encoding.s >> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/simple- >> fp-encoding.s?rev=159938&r1=159937&r2=159938&view=diff >> ======================================================================== >> ====== >> --- llvm/trunk/test/MC/ARM/simple-fp-encoding.s (original) >> +++ llvm/trunk/test/MC/ARM/simple-fp-encoding.s Mon Jul 9 11:41:33 2012 >> @@ -196,6 +196,27 @@ >> @ CHECK: vmov r0, r1, d16 @ encoding: [0x30,0x0b,0x51,0xec] >> vmov r0, r1, d16 >> >> +@ Between two single precision registers and two core registers >> + vmov s3, s4, r1, r2 >> + vmov s2, s3, r1, r2 >> + vmov r1, r2, s3, s4 >> + vmov r1, r2, s2, s3 >> +@ CHECK: vmov s3, s4, r1, r2 @ encoding: [0x31,0x1a,0x42,0xec] >> +@ CHECK: vmov s2, s3, r1, r2 @ encoding: [0x11,0x1a,0x42,0xec] >> +@ CHECK: vmov r1, r2, s3, s4 @ encoding: [0x31,0x1a,0x52,0xec] >> +@ CHECK: vmov r1, r2, s2, s3 @ encoding: [0x11,0x1a,0x52,0xec] >> + >> +@ Between one double precision register and two core registers >> + vmov d15, r1, r2 >> + vmov d16, r1, r2 >> + vmov r1, r2, d15 >> + vmov r1, r2, d16 >> +@ CHECK: vmov d15, r1, r2 @ encoding: [0x1f,0x1b,0x42,0xec] >> +@ CHECK: vmov d16, r1, r2 @ encoding: [0x30,0x1b,0x42,0xec] >> +@ CHECK: vmov r1, r2, d15 @ encoding: [0x1f,0x1b,0x52,0xec] >> +@ CHECK: vmov r1, r2, d16 @ encoding: [0x30,0x1b,0x52,0xec] >> + >> + >> @ CHECK: vldr d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] >> @ CHECK: vldr s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] >> @ CHECK: vldr d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] >> >> >> _______________________________________________ >> llvm-commits mailing list >> llvm-commits at cs.uiuc.edu >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >> >> > > > From richard.barton at arm.com Mon Jul 9 13:30:56 2012 From: richard.barton at arm.com (Richard Barton) Date: Mon, 09 Jul 2012 18:30:56 -0000 Subject: [llvm-commits] [llvm] r159948 - /llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Message-ID: <20120709183056.49C312A6C069@llvm.org> Author: rbarton Date: Mon Jul 9 13:30:56 2012 New Revision: 159948 URL: http://llvm.org/viewvc/llvm-project?rev=159948&view=rev Log: Some formatting to keep Clang happy Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=159948&r1=159947&r2=159948&view=diff ============================================================================== --- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original) +++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Mon Jul 9 13:30:56 2012 @@ -7300,8 +7300,8 @@ if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - (!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR || - inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) && + ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && (!static_cast(Operands[3])->isToken() || !static_cast(Operands[3])->getToken().equals_lower(".w"))) { unsigned NewOpc; @@ -7339,8 +7339,8 @@ isARMLowRegister(Inst.getOperand(2).getReg())) && (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && - (!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR || - inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) && + ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && (!static_cast(Operands[3])->isToken() || !static_cast(Operands[3])->getToken().equals_lower(".w"))) { unsigned NewOpc; From nunoplopes at sapo.pt Mon Jul 9 13:38:21 2012 From: nunoplopes at sapo.pt (Nuno Lopes) Date: Mon, 09 Jul 2012 18:38:21 -0000 Subject: [llvm-commits] [llvm] r159952 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombine.h lib/Transforms/InstCombine/InstCombineCalls.cpp lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp lib/Transforms/InstCombine/InstructionCombining.cpp test/Transforms/InstCombine/objsize.ll Message-ID: <20120709183821.483982A6C06A@llvm.org> Author: nlopes Date: Mon Jul 9 13:38:20 2012 New Revision: 159952 URL: http://llvm.org/viewvc/llvm-project?rev=159952&view=rev Log: instcombine: merge the functions that remove dead allocas and dead mallocs/callocs/... This patch removes ~70 lines in InstCombineLoadStoreAlloca.cpp and makes both functions a bit more aggressive than before :) In theory, we can be more aggressive when removing an alloca than a malloc, because an alloca pointer should never escape, but we are not taking advantage of this anyway Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombine.h llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp llvm/trunk/test/Transforms/InstCombine/objsize.ll Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombine.h URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombine.h?rev=159952&r1=159951&r2=159952&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombine.h (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombine.h Mon Jul 9 13:38:20 2012 @@ -187,7 +187,7 @@ Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitMalloc(Instruction &FI); + Instruction *visitAllocSite(Instruction &FI); Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=159952&r1=159951&r2=159952&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Jul 9 13:38:20 2012 @@ -880,7 +880,7 @@ // Instruction *InstCombiner::visitCallSite(CallSite CS) { if (isAllocLikeFn(CS.getInstruction())) - return visitMalloc(*CS.getInstruction()); + return visitAllocSite(*CS.getInstruction()); bool Changed = false; Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp?rev=159952&r1=159951&r2=159952&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp Mon Jul 9 13:38:20 2012 @@ -22,72 +22,6 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated"); -// Try to kill dead allocas by walking through its uses until we see some use -// that could escape. This is a conservative analysis which tries to handle -// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things -// left after inlining and SROA finish chewing on an alloca. -static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) { - SmallVector Worklist, DeadStores; - Worklist.push_back(&AI); - do { - Instruction *PI = Worklist.pop_back_val(); - for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); - UI != UE; ++UI) { - Instruction *I = cast(*UI); - switch (I->getOpcode()) { - default: - // Give up the moment we see something we can't handle. - return 0; - - case Instruction::GetElementPtr: - case Instruction::BitCast: - Worklist.push_back(I); - continue; - - case Instruction::Call: - // We can handle a limited subset of calls to no-op intrinsics. - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - continue; - default: - return 0; - } - } - // Reject everything else. - return 0; - - case Instruction::Store: { - // Stores into the alloca are only live if the alloca is live. - StoreInst *SI = cast(I); - // We can eliminate atomic stores, but not volatile. - if (SI->isVolatile()) - return 0; - // The store is only trivially safe if the poniter is the destination - // as opposed to the value. We're conservative here and don't check for - // the case where we store the address of a dead alloca into a dead - // alloca. - if (SI->getPointerOperand() != PI) - return 0; - DeadStores.push_back(I); - continue; - } - } - } - } while (!Worklist.empty()); - - // The alloca is dead. Kill off all the stores to it, and then replace it - // with undef. - while (!DeadStores.empty()) - IC.EraseInstFromFunction(*DeadStores.pop_back_val()); - return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType())); -} - Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -179,10 +113,9 @@ } } - // Try to aggressively remove allocas which are only used for GEPs, lifetime - // markers, and stores. This happens when SROA iteratively promotes stores - // out of the alloca, and we need to cleanup after it. - return removeDeadAlloca(*this, AI); + // At last, use the generic allocation site handler to aggressively remove + // unused allocas. + return visitAllocSite(AI); } Modified: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp?rev=159952&r1=159951&r2=159952&view=diff ============================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp (original) +++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp Mon Jul 9 13:38:20 2012 @@ -1106,71 +1106,89 @@ -static bool IsOnlyNullComparedAndFreed(Value *V, SmallVectorImpl &Users, - int Depth = 0) { - if (Depth == 8) - return false; +static bool +isAllocSiteRemovable(Instruction *AI, SmallVectorImpl &Users) { + SmallVector Worklist; + Worklist.push_back(AI); - for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - User *U = *UI; - if (isFreeCall(U)) { - Users.push_back(U); - continue; - } - if (ICmpInst *ICI = dyn_cast(U)) { - if (ICI->isEquality() && isa(ICI->getOperand(1))) { - Users.push_back(ICI); - continue; - } - } - if (BitCastInst *BCI = dyn_cast(U)) { - if (IsOnlyNullComparedAndFreed(BCI, Users, Depth+1)) { - Users.push_back(BCI); + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); UI != UE; + ++UI) { + Instruction *I = cast(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return false; + + case Instruction::BitCast: + case Instruction::GetElementPtr: + Users.push_back(I); + Worklist.push_back(I); continue; - } - } - if (GetElementPtrInst *GEPI = dyn_cast(U)) { - if (IsOnlyNullComparedAndFreed(GEPI, Users, Depth+1)) { - Users.push_back(GEPI); + + case Instruction::ICmp: { + ICmpInst *ICI = cast(I); + // We can fold eq/ne comparisons with null to false/true, respectively. + if (!ICI->isEquality() || !isa(ICI->getOperand(1))) + return false; + Users.push_back(I); continue; } - } - if (IntrinsicInst *II = dyn_cast(U)) { - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memset: { - MemIntrinsic *MI = cast(II); - if (MI->isVolatile() || MI->getRawDest() != V) + + case Instruction::Call: + // Ignore no-op and store intrinsics. + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: { + MemIntrinsic *MI = cast(II); + if (MI->isVolatile() || MI->getRawDest() != PI) + return false; + } + // fall through + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + Users.push_back(I); + continue; + } + } + + if (isFreeCall(I)) { + Users.push_back(I); + continue; + } + return false; + + case Instruction::Store: { + StoreInst *SI = cast(I); + if (SI->isVolatile() || SI->getPointerOperand() != PI) return false; - } - // fall through - case Intrinsic::objectsize: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - Users.push_back(II); + Users.push_back(I); continue; } + } + llvm_unreachable("missing a return?"); } - if (StoreInst *SI = dyn_cast(U)) { - if (SI->isVolatile() || SI->getPointerOperand() != V) - return false; - Users.push_back(SI); - continue; - } - return false; - } + } while (!Worklist.empty()); return true; } -Instruction *InstCombiner::visitMalloc(Instruction &MI) { +Instruction *InstCombiner::visitAllocSite(Instruction &MI) { // If we have a malloc call which is only used in any amount of comparisons // to null and free calls, delete the calls and replace the comparisons with // true or false as appropriate. SmallVector Users; - if (IsOnlyNullComparedAndFreed(&MI, Users)) { + if (isAllocSiteRemovable(&MI, Users)) { for (unsigned i = 0, e = Users.size(); i != e; ++i) { Instruction *I = cast_or_null(&*Users[i]); if (!I) continue; Modified: llvm/trunk/test/Transforms/InstCombine/objsize.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/objsize.ll?rev=159952&r1=159951&r2=159952&view=diff ============================================================================== --- llvm/trunk/test/Transforms/InstCombine/objsize.ll (original) +++ llvm/trunk/test/Transforms/InstCombine/objsize.ll Mon Jul 9 13:38:20 2012 @@ -106,7 +106,7 @@ %struct.data = type { [100 x i32], [100 x i32], [1024 x i8] } -define i32 @test4() nounwind ssp { +define i32 @test4(i8** %esc) nounwind ssp { ; CHECK: @test4 entry: %0 = alloca %struct.data, align 8 @@ -115,6 +115,7 @@ ; CHECK-NOT: @llvm.objectsize ; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false) %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind + store i8* %1, i8** %esc ret i32 0 } From ahatanaka at mips.com Mon Jul 9 13:46:47 2012 From: ahatanaka at mips.com (Akira Hatanaka) Date: Mon, 09 Jul 2012 18:46:47 -0000 Subject: [llvm-commits] [llvm] r159953 - in /llvm/trunk: lib/Target/Mips/Disassembler/MipsDisassembler.cpp test/MC/Disassembler/Mips/mips32.txt test/MC/Disassembler/Mips/mips32_le.txt test/MC/Disassembler/Mips/mips32r2.txt test/MC/Disassembler/Mips/mips32r2_le.txt Message-ID: <20120709184647.E86B62A6C06A@llvm.org> Author: ahatanak Date: Mon Jul 9 13:46:47 2012 New Revision: 159953 URL: http://llvm.org/viewvc/llvm-project?rev=159953&view=rev Log: Reapply r158846. Access mips register classes via MCRegisterInfo's functions instead of via the TargetRegisterClasses defined in MipsGenRegisterInfo.inc. Modified: llvm/trunk/lib/Target/Mips/Disassembler/MipsDisassembler.cpp llvm/trunk/test/MC/Disassembler/Mips/mips32.txt llvm/trunk/test/MC/Disassembler/Mips/mips32_le.txt llvm/trunk/test/MC/Disassembler/Mips/mips32r2.txt llvm/trunk/test/MC/Disassembler/Mips/mips32r2_le.txt Modified: llvm/trunk/lib/Target/Mips/Disassembler/MipsDisassembler.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Mips/Disassembler/MipsDisassembler.cpp?rev=159953&r1=159952&r2=159953&view=diff ============================================================================== --- llvm/trunk/lib/Target/Mips/Disassembler/MipsDisassembler.cpp (original) +++ llvm/trunk/lib/Target/Mips/Disassembler/MipsDisassembler.cpp Mon Jul 9 13:46:47 2012 @@ -13,16 +13,15 @@ #include "Mips.h" #include "MipsSubtarget.h" +#include "MipsRegisterInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MathExtras.h" - #include "MipsGenEDInfo.inc" using namespace llvm; @@ -31,123 +30,71 @@ namespace { -/// MipsDisassembler - a disasembler class for Mips32. -class MipsDisassembler : public MCDisassembler { +/// MipsDisassemblerBase - a disasembler class for Mips. +class MipsDisassemblerBase : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - MipsDisassembler(const MCSubtargetInfo &STI, bool bigEndian) : - MCDisassembler(STI), isBigEndian(bigEndian) { - } + MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + bool bigEndian) : + MCDisassembler(STI), RegInfo(Info), isBigEndian(bigEndian) {} - ~MipsDisassembler() { - } - - /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; + virtual ~MipsDisassemblerBase() {} /// getEDInfo - See MCDisassembler. const EDInstInfo *getEDInfo() const; + const MCRegisterInfo *getRegInfo() const { return RegInfo; } + private: + const MCRegisterInfo *RegInfo; +protected: bool isBigEndian; }; - -/// Mips64Disassembler - a disasembler class for Mips64. -class Mips64Disassembler : public MCDisassembler { +/// MipsDisassembler - a disasembler class for Mips32. +class MipsDisassembler : public MipsDisassemblerBase { public: /// Constructor - Initializes the disassembler. /// - Mips64Disassembler(const MCSubtargetInfo &STI, bool bigEndian) : - MCDisassembler(STI), isBigEndian(bigEndian) { - } - - ~Mips64Disassembler() { - } + MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + bool bigEndian) : + MipsDisassemblerBase(STI, Info, bigEndian) {} /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; +}; - /// getEDInfo - See MCDisassembler. - const EDInstInfo *getEDInfo() const; -private: - bool isBigEndian; +/// Mips64Disassembler - a disasembler class for Mips64. +class Mips64Disassembler : public MipsDisassemblerBase { +public: + /// Constructor - Initializes the disassembler. + /// + Mips64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + bool bigEndian) : + MipsDisassemblerBase(STI, Info, bigEndian) {} + + /// getInstruction - See MCDisassembler. + virtual DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; }; } // end anonymous namespace -const EDInstInfo *MipsDisassembler::getEDInfo() const { - return instInfoMips; -} - -const EDInstInfo *Mips64Disassembler::getEDInfo() const { +const EDInstInfo *MipsDisassemblerBase::getEDInfo() const { return instInfoMips; } -// Decoder tables for Mips register -static const uint16_t CPURegsTable[] = { - Mips::ZERO, Mips::AT, Mips::V0, Mips::V1, - Mips::A0, Mips::A1, Mips::A2, Mips::A3, - Mips::T0, Mips::T1, Mips::T2, Mips::T3, - Mips::T4, Mips::T5, Mips::T6, Mips::T7, - Mips::S0, Mips::S1, Mips::S2, Mips::S3, - Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::T8, Mips::T9, Mips::K0, Mips::K1, - Mips::GP, Mips::SP, Mips::FP, Mips::RA -}; - -static const uint16_t FGR32RegsTable[] = { - Mips::F0, Mips::F1, Mips::F2, Mips::F3, - Mips::F4, Mips::F5, Mips::F6, Mips::F7, - Mips::F8, Mips::F9, Mips::F10, Mips::F11, - Mips::F12, Mips::F13, Mips::F14, Mips::F15, - Mips::F16, Mips::F17, Mips::F18, Mips::F18, - Mips::F20, Mips::F21, Mips::F22, Mips::F23, - Mips::F24, Mips::F25, Mips::F26, Mips::F27, - Mips::F28, Mips::F29, Mips::F30, Mips::F31 -}; - -static const uint16_t CPU64RegsTable[] = { - Mips::ZERO_64, Mips::AT_64, Mips::V0_64, Mips::V1_64, - Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, - Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64, - Mips::T4_64, Mips::T5_64, Mips::T6_64, Mips::T7_64, - Mips::S0_64, Mips::S1_64, Mips::S2_64, Mips::S3_64, - Mips::S4_64, Mips::S5_64, Mips::S6_64, Mips::S7_64, - Mips::T8_64, Mips::T9_64, Mips::K0_64, Mips::K1_64, - Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 -}; - -static const uint16_t FGR64RegsTable[] = { - Mips::D0_64, Mips::D1_64, Mips::D2_64, Mips::D3_64, - Mips::D4_64, Mips::D5_64, Mips::D6_64, Mips::D7_64, - Mips::D8_64, Mips::D9_64, Mips::D10_64, Mips::D11_64, - Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, - Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64, - Mips::D20_64, Mips::D21_64, Mips::D22_64, Mips::D23_64, - Mips::D24_64, Mips::D25_64, Mips::D26_64, Mips::D27_64, - Mips::D28_64, Mips::D29_64, Mips::D30_64, Mips::D31_64 -}; - -static const uint16_t AFGR64RegsTable[] = { - Mips::D0, Mips::D1, Mips::D2, Mips::D3, - Mips::D4, Mips::D5, Mips::D6, Mips::D7, - Mips::D8, Mips::D9, Mips::D10, Mips::D11, - Mips::D12, Mips::D13, Mips::D14, Mips::D15 -}; - // Forward declare these because the autogenerated code will reference them. // Definitions are further down. static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, @@ -243,25 +190,25 @@ static MCDisassembler *createMipsDisassembler( const Target &T, const MCSubtargetInfo &STI) { - return new MipsDisassembler(STI,true); + return new MipsDisassembler(STI, T.createMCRegInfo(""), true); } static MCDisassembler *createMipselDisassembler( const Target &T, const MCSubtargetInfo &STI) { - return new MipsDisassembler(STI,false); + return new MipsDisassembler(STI, T.createMCRegInfo(""), false); } static MCDisassembler *createMips64Disassembler( const Target &T, const MCSubtargetInfo &STI) { - return new Mips64Disassembler(STI,true); + return new Mips64Disassembler(STI, T.createMCRegInfo(""), true); } static MCDisassembler *createMips64elDisassembler( const Target &T, const MCSubtargetInfo &STI) { - return new Mips64Disassembler(STI, false); + return new Mips64Disassembler(STI, T.createMCRegInfo(""), false); } extern "C" void LLVMInitializeMipsDisassembler() { @@ -366,6 +313,11 @@ return MCDisassembler::Fail; } +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const MipsDisassemblerBase *Dis = static_cast(D); + return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); +} + static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -374,7 +326,8 @@ if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(CPU64RegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::CPU64RegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -384,8 +337,8 @@ const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::CPURegsRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -396,7 +349,8 @@ if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::FGR64RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -407,7 +361,8 @@ if (RegNo > 31) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(FGR32RegsTable[RegNo])); + unsigned Reg = getReg(Decoder, Mips::FGR32RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -424,15 +379,18 @@ uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - int Reg = (int)fieldFromInstruction32(Insn, 16, 5); - int Base = (int)fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction32(Insn, 16, 5); + unsigned Base = fieldFromInstruction32(Insn, 21, 5); + + Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg); + Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); if(Inst.getOpcode() == Mips::SC){ - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); + Inst.addOperand(MCOperand::CreateReg(Reg)); } - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); Inst.addOperand(MCOperand::CreateImm(Offset)); return MCDisassembler::Success; @@ -443,11 +401,14 @@ uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - int Reg = (int)fieldFromInstruction32(Insn, 16, 5); - int Base = (int)fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction32(Insn, 16, 5); + unsigned Base = fieldFromInstruction32(Insn, 21, 5); - Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[Reg])); - Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg); + Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); + + Inst.addOperand(MCOperand::CreateReg(Reg)); + Inst.addOperand(MCOperand::CreateReg(Base)); Inst.addOperand(MCOperand::CreateImm(Offset)); return MCDisassembler::Success; @@ -478,10 +439,12 @@ unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 31) + if (RegNo > 30 || RegNo %2) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(AFGR64RegsTable[RegNo])); + ; + unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2); + Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } @@ -492,7 +455,7 @@ //Currently only hardware register 29 is supported if (RegNo != 29) return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateReg(Mips::HWR29)); + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29_64)); return MCDisassembler::Success; } Modified: llvm/trunk/test/MC/Disassembler/Mips/mips32.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/Mips/mips32.txt?rev=159953&r1=159952&r2=159953&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/Mips/mips32.txt (original) +++ llvm/trunk/test/MC/Disassembler/Mips/mips32.txt Mon Jul 9 13:46:47 2012 @@ -1,7 +1,7 @@ # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux # CHECK: abs.d $f12,$f14 -0x46 0x20 0x39 0x85 +0x46 0x20 0x73 0x05 # CHECK: abs.s $f6,$f7 0x46 0x00 0x39 0x85 @@ -9,8 +9,8 @@ # CHECK: add t1,a2,a3 0x00 0xc7 0x48 0x20 -# CHECK: add.d $f18,$f12,$f14 -0x46 0x27 0x32 0x40 +# CHECK: add.d $f8,$f12,$f14 +0x46 0x2e 0x62 0x00 # CHECK: add.s $f9,$f6,$f7 0x46 0x07 0x32 0x40 @@ -61,103 +61,103 @@ 0x15 0x26 0x01 0x4c # CHECK: c.eq.d $f12,$f14 -0x46 0x27 0x30 0x32 +0x46 0x2e 0x60 0x32 # CHECK: c.eq.s $f6,$f7 0x46 0x07 0x30 0x32 # CHECK: c.f.d $f12,$f14 -0x46 0x27 0x30 0x30 +0x46 0x2e 0x60 0x30 # CHECK: c.f.s $f6,$f7 0x46 0x07 0x30 0x30 # CHECK: c.le.d $f12,$f14 -0x46 0x27 0x30 0x3e +0x46 0x2e 0x60 0x3e # CHECK: c.le.s $f6,$f7 0x46 0x07 0x30 0x3e # CHECK: c.lt.d $f12,$f14 -0x46 0x27 0x30 0x3c +0x46 0x2e 0x60 0x3c # CHECK: c.lt.s $f6,$f7 0x46 0x07 0x30 0x3c # CHECK: c.nge.d $f12,$f14 -0x46 0x27 0x30 0x3d +0x46 0x2e 0x60 0x3d # CHECK: c.nge.s $f6,$f7 0x46 0x07 0x30 0x3d # CHECK: c.ngl.d $f12,$f14 -0x46 0x27 0x30 0x3b +0x46 0x2e 0x60 0x3b # CHECK: c.ngl.s $f6,$f7 0x46 0x07 0x30 0x3b # CHECK: c.ngle.d $f12,$f14 -0x46 0x27 0x30 0x39 +0x46 0x2e 0x60 0x39 # CHECK: c.ngle.s $f6,$f7 0x46 0x07 0x30 0x39 # CHECK: c.ngt.d $f12,$f14 -0x46 0x27 0x30 0x3f +0x46 0x2e 0x60 0x3f # CHECK: c.ngt.s $f6,$f7 0x46 0x07 0x30 0x3f # CHECK: c.ole.d $f12,$f14 -0x46 0x27 0x30 0x36 +0x46 0x2e 0x60 0x36 # CHECK: c.ole.s $f6,$f7 0x46 0x07 0x30 0x36 # CHECK: c.olt.d $f12,$f14 -0x46 0x27 0x30 0x34 +0x46 0x2e 0x60 0x34 # CHECK: c.olt.s $f6,$f7 0x46 0x07 0x30 0x34 # CHECK: c.seq.d $f12,$f14 -0x46 0x27 0x30 0x3a +0x46 0x2e 0x60 0x3a # CHECK: c.seq.s $f6,$f7 0x46 0x07 0x30 0x3a # CHECK: c.sf.d $f12,$f14 -0x46 0x27 0x30 0x38 +0x46 0x2e 0x60 0x38 # CHECK: c.sf.s $f6,$f7 0x46 0x07 0x30 0x38 # CHECK: c.ueq.d $f12,$f14 -0x46 0x27 0x30 0x33 +0x46 0x2e 0x60 0x33 # CHECK: c.ueq.s $f28,$f18 0x46 0x12 0xe0 0x33 # CHECK: c.ule.d $f12,$f14 -0x46 0x27 0x30 0x37 +0x46 0x2e 0x60 0x37 # CHECK: c.ule.s $f6,$f7 0x46 0x07 0x30 0x37 # CHECK: c.ult.d $f12,$f14 -0x46 0x27 0x30 0x35 +0x46 0x2e 0x60 0x35 # CHECK: c.ult.s $f6,$f7 0x46 0x07 0x30 0x35 # CHECK: c.un.d $f12,$f14 -0x46 0x27 0x30 0x31 +0x46 0x2e 0x60 0x31 # CHECK: c.un.s $f6,$f7 0x46 0x07 0x30 0x31 # CHECK: ceil.w.d $f12,$f14 -0x46 0x20 0x39 0x8e +0x46 0x20 0x73 0x0e # CHECK: ceil.w.s $f6,$f7 0x46 0x00 0x39 0x8e @@ -175,31 +175,25 @@ 0x44 0xc6 0x38 0x00 # CHECK: cvt.d.s $f6,$f7 -0x46 0x00 0x38 0xa1 +0x46 0x00 0x39 0xa1 # CHECK: cvt.d.w $f12,$f14 -0x46 0x80 0x38 0xa1 - -# CHECK: cvt.l.d $f12,$f14 -0x46 0x20 0x39 0xa5 - -# CHECK: cvt.l.s $f6,$f7 -0x46 0x00 0x39 0xa5 +0x46 0x80 0x73 0x21 # CHECK: cvt.s.d $f12,$f14 -0x46 0x20 0x39 0xa0 +0x46 0x20 0x73 0x20 # CHECK: cvt.s.w $f6,$f7 0x46 0x80 0x39 0xa0 # CHECK: cvt.w.d $f12,$f14 -0x46 0x20 0x39 0xa4 +0x46 0x20 0x73 0x24 # CHECK: cvt.w.s $f6,$f7 0x46 0x00 0x39 0xa4 # CHECK: floor.w.d $f12,$f14 -0x46 0x20 0x39 0x8f +0x46 0x20 0x73 0x0f # CHECK: floor.w.s $f6,$f7 0x46 0x00 0x39 0x8f @@ -246,6 +240,12 @@ # CHECK: lwc1 $f9,9158(a3) 0xc4 0xe9 0x23 0xc6 +# CHECK: lwl $v0, 3($a0) +0x88 0x82 0x00 0x03 + +# CHECK: lwr $v1,16($a1) +0x98 0xa3 0x00 0x10 + # CHECK: madd a2,a3 0x70 0xc7 0x00 0x00 @@ -261,8 +261,8 @@ # CHECK: mflo a1 0x00 0x00 0x28 0x12 -# CHECK: mov.d $f6,$f7 -0x46 0x20 0x39 0x86 +# CHECK: mov.d $f6,$f8 +0x46 0x20 0x41 0x86 # CHECK: mov.s $f6,$f7 0x46 0x00 0x39 0x86 @@ -285,8 +285,8 @@ # CHECK: mtlo a3 0x00 0xe0 0x00 0x13 -# CHECK: mul.d $f9,$f12,$f14 -0x46 0x27 0x32 0x42 +# CHECK: mul.d $f8,$f12,$f14 +0x46 0x2e 0x62 0x02 # CHECK: mul.s $f9,$f6,$f7 0x46 0x07 0x32 0x42 @@ -301,7 +301,7 @@ 0x00 0x65 0x00 0x19 # CHECK: neg.d $f12,$f14 -0x46 0x20 0x39 0x87 +0x46 0x20 0x73 0x07 # CHECK: neg.s $f6,$f7 0x46 0x00 0x39 0x87 @@ -327,8 +327,8 @@ # CHECK: rdhwr a2,$29 0x7c 0x06 0xe8 0x3b -# CHECK: round.w.d $f12,$f14 -0x46 0x20 0x39 0x8c +# CHECK: round.w.d $f6,$f14 +0x46 0x20 0x73 0x0c # CHECK: round.w.s $f6,$f7 0x46 0x00 0x39 0x8c @@ -367,7 +367,7 @@ 0x00 0x65 0x18 0x2b # CHECK: sqrt.d $f12,$f14 -0x46 0x20 0x39 0x84 +0x46 0x20 0x73 0x04 # CHECK: sqrt.s $f6,$f7 0x46 0x00 0x39 0x84 @@ -387,8 +387,8 @@ # CHECK: srlv v0,v1,a1 0x00 0xa3 0x10 0x06 -# CHECK: sub.d $f9,$f12,$f14 -0x46 0x27 0x32 0x41 +# CHECK: sub.d $f8,$f12,$f14 +0x46 0x2e 0x62 0x01 # CHECK: sub.s $f9,$f6,$f7 0x46 0x07 0x32 0x41 @@ -405,11 +405,17 @@ # CHECK: swc1 $f9,9158(a3) 0xe4 0xe9 0x23 0xc6 +# CHECK: swl $a0, 16($a1) +0xa8 0xa4 0x00 0x10 + +# CHECK: swr $a2, 16($a3) +0xb8 0xe6 0x00 0x10 + # CHECK: sync 0x7 0x00 0x00 0x01 0xcf # CHECK: trunc.w.d $f12,$f14 -0x46 0x20 0x39 0x8d +0x46 0x20 0x73 0x0d # CHECK: trunc.w.s $f6,$f7 0x46 0x00 0x39 0x8d Modified: llvm/trunk/test/MC/Disassembler/Mips/mips32_le.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/Mips/mips32_le.txt?rev=159953&r1=159952&r2=159953&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/Mips/mips32_le.txt (original) +++ llvm/trunk/test/MC/Disassembler/Mips/mips32_le.txt Mon Jul 9 13:46:47 2012 @@ -1,7 +1,7 @@ # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux # CHECK: abs.d $f12,$f14 -0x85 0x39 0x20 0x46 +0x05 0x73 0x20 0x46 # CHECK: abs.s $f6,$f7 0x85 0x39 0x00 0x46 @@ -9,8 +9,8 @@ # CHECK: add t1,a2,a3 0x20 0x48 0xc7 0x00 -# CHECK: add.d $f18,$f12,$f14 -0x40 0x32 0x27 0x46 +# CHECK: add.d $8,$f12,$f14 +0x00 0x62 0x2e 0x46 # CHECK: add.s $f9,$f6,$f7 0x40 0x32 0x07 0x46 @@ -61,106 +61,106 @@ 0x4c 0x01 0x26 0x15 # CHECK: c.eq.d $f12,$f14 -0x32 0x30 0x27 0x46 +0x32 0x60 0x2e 0x46 # CHECK: c.eq.s $f6,$f7 0x32 0x30 0x07 0x46 # CHECK: c.f.d $f12,$f14 -0x30 0x30 0x27 0x46 +0x30 0x60 0x2e 0x46 # CHECK: c.f.s $f6,$f7 0x30 0x30 0x07 0x46 # CHECK: c.le.d $f12,$f14 -0x3e 0x30 0x27 0x46 +0x3e 0x60 0x2e 0x46 # CHECK: c.le.s $f6,$f7 0x3e 0x30 0x07 0x46 # CHECK: c.lt.d $f12,$f14 -0x3c 0x30 0x27 0x46 +0x3c 0x60 0x2e 0x46 # CHECK: c.lt.s $f6,$f7 0x3c 0x30 0x07 0x46 # CHECK: c.nge.d $f12,$f14 -0x3d 0x30 0x27 0x46 +0x3d 0x60 0x2e 0x46 # CHECK: c.nge.s $f6,$f7 0x3d 0x30 0x07 0x46 # CHECK: c.ngl.d $f12,$f14 -0x3b 0x30 0x27 0x46 +0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.s $f6,$f7 0x3b 0x30 0x07 0x46 # CHECK: c.ngle.d $f12,$f14 -0x39 0x30 0x27 0x46 +0x39 0x60 0x2e 0x46 # CHECK: c.ngle.s $f6,$f7 0x39 0x30 0x07 0x46 # CHECK: c.ngt.d $f12,$f14 -0x3f 0x30 0x27 0x46 +0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.s $f6,$f7 0x3f 0x30 0x07 0x46 # CHECK: c.ole.d $f12,$f14 -0x36 0x30 0x27 0x46 +0x36 0x60 0x2e 0x46 # CHECK: c.ole.s $f6,$f7 0x36 0x30 0x07 0x46 # CHECK: c.olt.d $f12,$f14 -0x34 0x30 0x27 0x46 +0x34 0x60 0x2e 0x46 # CHECK: c.olt.s $f6,$f7 0x34 0x30 0x07 0x46 # CHECK: c.seq.d $f12,$f14 -0x3a 0x30 0x27 0x46 +0x3a 0x60 0x2e 0x46 # CHECK: c.seq.s $f6,$f7 0x3a 0x30 0x07 0x46 # CHECK: c.sf.d $f12,$f14 -0x38 0x30 0x27 0x46 +0x38 0x60 0x2e 0x46 # CHECK: c.sf.s $f6,$f7 0x38 0x30 0x07 0x46 # CHECK: c.ueq.d $f12,$f14 -0x33 0x30 0x27 0x46 +0x33 0x60 0x2e 0x46 # CHECK: c.ueq.s $f28,$f18 0x33 0xe0 0x12 0x46 # CHECK: c.ule.d $f12,$f14 -0x37 0x30 0x27 0x46 +0x37 0x60 0x2e 0x46 # CHECK: c.ule.s $f6,$f7 0x37 0x30 0x07 0x46 # CHECK: c.ult.d $f12,$f14 -0x35 0x30 0x27 0x46 +0x35 0x60 0x2e 0x46 # CHECK: c.ult.s $f6,$f7 0x35 0x30 0x07 0x46 # CHECK: c.un.d $f12,$f14 -0x31 0x30 0x27 0x46 +0x31 0x60 0x2e 0x46 # CHECK: c.un.s $f6,$f7 0x31 0x30 0x07 0x46 # CHECK: ceil.w.d $f12,$f14 -0x8e 0x38 0x20 0x46 +0x0e 0x73 0x20 0x46 # CHECK: ceil.w.s $f6,$f7 -0x8e 0x38 0x00 0x46 +0x0e 0x73 0x20 0x46 # CHECK: cfc1 a2,$7 0x00 0x38 0x46 0x44 @@ -178,28 +178,22 @@ 0xa1 0x39 0x00 0x46 # CHECK: cvt.d.w $f12,$f14 -0xa1 0x39 0x80 0x46 - -# CHECK: cvt.l.d $f12,$f14 -0xa5 0x39 0x20 0x46 - -# CHECK: cvt.l.s $f6,$f7 -0xa5 0x39 0x00 0x46 +0x21 0x73 0x80 0x46 # CHECK: cvt.s.d $f12,$f14 -0xa0 0x39 0x20 0x46 +0x20 0x73 0x20 0x46 # CHECK: cvt.s.w $f6,$f7 0xa0 0x39 0x80 0x46 # CHECK: cvt.w.d $f12,$f14 -0xa4 0x39 0x20 0x46 +0x24 0x73 0x20 0x46 # CHECK: cvt.w.s $f6,$f7 0xa4 0x39 0x00 0x46 # CHECK: floor.w.d $f12,$f14 -0x8f 0x39 0x20 0x46 +0x0f 0x73 0x20 0x46 # CHECK: floor.w.s $f6,$f7 0x8f 0x39 0x00 0x46 @@ -210,7 +204,7 @@ # CHECK: jal 00000530 0x4c 0x01 0x00 0x0c -# CHECK: jalr a2,a3 +# CHECK: jalr a3 0x09 0xf8 0xe0 0x00 # CHECK: jr a3 @@ -249,6 +243,12 @@ # CHECK: lwc1 $f9,9158(a3) 0xc6 0x23 0xe9 0xc4 +# CHECK: lwl $v0, 3($a0) +0x03 0x00 0x82 0x88 + +# CHECK: lwr $v1,16($a1) +0x10 0x00 0xa3 0x98 + # CHECK: madd a2,a3 0x00 0x00 0xc7 0x70 @@ -264,8 +264,8 @@ # CHECK: mflo a1 0x12 0x28 0x00 0x00 -# CHECK: mov.d $f12,$f14 -0x86 0x39 0x20 0x46 +# CHECK: mov.d $f6,$f8 +0x86 0x41 0x20 0x46 # CHECK: mov.s $f6,$f7 0x86 0x39 0x00 0x46 @@ -288,11 +288,11 @@ # CHECK: mtlo a3 0x13 0x00 0xe0 0x00 -# CHECK: mul.d $f9,$f12,$f14 -0x42 0x32 0x27 0x46 +# CHECK: mul.d $f8,$f12,$f14 +0x02 0x62 0x2e 0x46 # CHECK: mul.s $f9,$f6,$f7 -0x42 0x32 0x07 0x46 +0x02 0x62 0x07 0x46 # CHECK: mul t1,a2,a3 0x02 0x48 0xc7 0x70 @@ -304,7 +304,7 @@ 0x19 0x00 0x65 0x00 # CHECK: neg.d $f12,$f14 -0x87 0x39 0x20 0x46 +0x07 0x73 0x20 0x46 # CHECK: neg.s $f6,$f7 0x87 0x39 0x00 0x46 @@ -331,7 +331,7 @@ 0x3b 0xe8 0x06 0x7c # CHECK: round.w.d $f12,$f14 -0x8c 0x39 0x20 0x46 +0x0c 0x73 0x20 0x46 # CHECK: round.w.s $f6,$f7 0x8c 0x39 0x00 0x46 @@ -370,7 +370,7 @@ 0x2b 0x18 0x65 0x00 # CHECK: sqrt.d $f12,$f14 -0x84 0x39 0x20 0x46 +0x04 0x73 0x20 0x46 # CHECK: sqrt.s $f6,$f7 0x84 0x39 0x00 0x46 @@ -390,8 +390,8 @@ # CHECK: srlv v0,v1,a1 0x06 0x10 0xa3 0x00 -# CHECK: sub.d $f9,$f12,$f14 -0x41 0x32 0x27 0x46 +# CHECK: sub.d $f8,$f12,$f14 +0x01 0x62 0x2e 0x46 # CHECK: sub.s $f9,$f6,$f7 0x41 0x32 0x07 0x46 @@ -408,11 +408,17 @@ # CHECK: swc1 $f9,9158(a3) 0xc6 0x23 0xe9 0xe4 +# CHECK: swl $a0, 16($a1) +0x10 0x00 0xa4 0xa8 + +# CHECK: swr $a2, 16($a3) +0x10 0x00 0xe6 0xb8 + # CHECK: sync 0x7 0xcf 0x01 0x00 0x00 # CHECK: trunc.w.d $f12,$f14 -0x8d 0x39 0x20 0x46 +0x0d 0x73 0x20 0x46 # CHECK: trunc.w.s $f6,$f7 0x8d 0x39 0x00 0x46 Modified: llvm/trunk/test/MC/Disassembler/Mips/mips32r2.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/Mips/mips32r2.txt?rev=159953&r1=159952&r2=159953&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/Mips/mips32r2.txt (original) +++ llvm/trunk/test/MC/Disassembler/Mips/mips32r2.txt Mon Jul 9 13:46:47 2012 @@ -1,7 +1,7 @@ # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 # CHECK: abs.d $f12,$f14 -0x46 0x20 0x39 0x85 +0x46 0x20 0x73 0x05 # CHECK: abs.s $f6,$f7 0x46 0x00 0x39 0x85 @@ -9,8 +9,8 @@ # CHECK: add t1,a2,a3 0x00 0xc7 0x48 0x20 -# CHECK: add.d $f18,$f12,$f14 -0x46 0x27 0x32 0x40 +# CHECK: add.d $f8,$f12,$f14 +0x46 0x2e 0x62 0x00 # CHECK: add.s $f9,$f6,$f7 0x46 0x07 0x32 0x40 @@ -61,103 +61,103 @@ 0x15 0x26 0x01 0x4c # CHECK: c.eq.d $f12,$f14 -0x46 0x27 0x30 0x32 +0x46 0x2e 0x60 0x32 # CHECK: c.eq.s $f6,$f7 0x46 0x07 0x30 0x32 # CHECK: c.f.d $f12,$f14 -0x46 0x27 0x30 0x30 +0x46 0x2e 0x60 0x30 # CHECK: c.f.s $f6,$f7 0x46 0x07 0x30 0x30 # CHECK: c.le.d $f12,$f14 -0x46 0x27 0x30 0x3e +0x46 0x2e 0x60 0x3e # CHECK: c.le.s $f6,$f7 0x46 0x07 0x30 0x3e # CHECK: c.lt.d $f12,$f14 -0x46 0x27 0x30 0x3c +0x46 0x2e 0x60 0x3c # CHECK: c.lt.s $f6,$f7 0x46 0x07 0x30 0x3c # CHECK: c.nge.d $f12,$f14 -0x46 0x27 0x30 0x3d +0x46 0x2e 0x60 0x3d # CHECK: c.nge.s $f6,$f7 0x46 0x07 0x30 0x3d # CHECK: c.ngl.d $f12,$f14 -0x46 0x27 0x30 0x3b +0x46 0x2e 0x60 0x3b # CHECK: c.ngl.s $f6,$f7 0x46 0x07 0x30 0x3b # CHECK: c.ngle.d $f12,$f14 -0x46 0x27 0x30 0x39 +0x46 0x2e 0x60 0x39 # CHECK: c.ngle.s $f6,$f7 0x46 0x07 0x30 0x39 # CHECK: c.ngt.d $f12,$f14 -0x46 0x27 0x30 0x3f +0x46 0x2e 0x60 0x3f # CHECK: c.ngt.s $f6,$f7 0x46 0x07 0x30 0x3f # CHECK: c.ole.d $f12,$f14 -0x46 0x27 0x30 0x36 +0x46 0x2e 0x60 0x36 # CHECK: c.ole.s $f6,$f7 0x46 0x07 0x30 0x36 # CHECK: c.olt.d $f12,$f14 -0x46 0x27 0x30 0x34 +0x46 0x2e 0x60 0x34 # CHECK: c.olt.s $f6,$f7 0x46 0x07 0x30 0x34 # CHECK: c.seq.d $f12,$f14 -0x46 0x27 0x30 0x3a +0x46 0x2e 0x60 0x3a # CHECK: c.seq.s $f6,$f7 0x46 0x07 0x30 0x3a # CHECK: c.sf.d $f12,$f14 -0x46 0x27 0x30 0x38 +0x46 0x2e 0x60 0x38 # CHECK: c.sf.s $f6,$f7 0x46 0x07 0x30 0x38 # CHECK: c.ueq.d $f12,$f14 -0x46 0x27 0x30 0x33 +0x46 0x2e 0x60 0x33 # CHECK: c.ueq.s $f28,$f18 0x46 0x12 0xe0 0x33 # CHECK: c.ule.d $f12,$f14 -0x46 0x27 0x30 0x37 +0x46 0x2e 0x60 0x37 # CHECK: c.ule.s $f6,$f7 0x46 0x07 0x30 0x37 # CHECK: c.ult.d $f12,$f14 -0x46 0x27 0x30 0x35 +0x46 0x2e 0x60 0x35 # CHECK: c.ult.s $f6,$f7 0x46 0x07 0x30 0x35 # CHECK: c.un.d $f12,$f14 -0x46 0x27 0x30 0x31 +0x46 0x2e 0x60 0x31 # CHECK: c.un.s $f6,$f7 0x46 0x07 0x30 0x31 # CHECK: ceil.w.d $f12,$f14 -0x46 0x20 0x39 0x8e +0x46 0x20 0x73 0x0e # CHECK: ceil.w.s $f6,$f7 0x46 0x00 0x39 0x8e @@ -175,31 +175,31 @@ 0x44 0xc6 0x38 0x00 # CHECK: cvt.d.s $f6,$f7 -0x46 0x00 0x38 0xa1 +0x46 0x00 0x39 0xa1 # CHECK: cvt.d.w $f12,$f14 -0x46 0x80 0x38 0xa1 +0x46 0x80 0x73 0x21 # CHECK: cvt.l.d $f12,$f14 -0x46 0x20 0x39 0xa5 +0x46 0x20 0x73 0x05 # CHECK: cvt.l.s $f6,$f7 0x46 0x00 0x39 0xa5 # CHECK: cvt.s.d $f12,$f14 -0x46 0x20 0x39 0xa0 +0x46 0x20 0x73 0x20 # CHECK: cvt.s.w $f6,$f7 0x46 0x80 0x39 0xa0 # CHECK: cvt.w.d $f12,$f14 -0x46 0x20 0x39 0xa4 +0x46 0x20 0x73 0x24 # CHECK: cvt.w.s $f6,$f7 0x46 0x00 0x39 0xa4 # CHECK: floor.w.d $f12,$f14 -0x46 0x20 0x39 0x8f +0x46 0x20 0x73 0x0f # CHECK: floor.w.s $f6,$f7 0x46 0x00 0x39 0x8f @@ -264,8 +264,8 @@ # CHECK: mflo a1 0x00 0x00 0x28 0x12 -# CHECK: mov.d $f6,$f7 -0x46 0x20 0x39 0x86 +# CHECK: mov.d $f6,$f8 +0x46 0x20 0x41 0x86 # CHECK: mov.s $f6,$f7 0x46 0x00 0x39 0x86 @@ -288,8 +288,8 @@ # CHECK: mtlo a3 0x00 0xe0 0x00 0x13 -# CHECK: mul.d $f9,$f12,$f14 -0x46 0x27 0x32 0x42 +# CHECK: mul.d $f8,$f12,$f14 +0x46 0x2e 0x62 0x02 # CHECK: mul.s $f9,$f6,$f7 0x46 0x07 0x32 0x42 @@ -304,7 +304,7 @@ 0x00 0x65 0x00 0x19 # CHECK: neg.d $f12,$f14 -0x46 0x20 0x39 0x87 +0x46 0x20 0x73 0x07 # CHECK: neg.s $f6,$f7 0x46 0x00 0x39 0x87 @@ -336,8 +336,8 @@ # CHECK: rorv t1,a2,a3 0x00 0xe6 0x48 0x46 -# CHECK: round.w.d $f12,$f14 -0x46 0x20 0x39 0x8c +# CHECK: round.w.d $f6,$f14 +0x46 0x20 0x73 0x0c # CHECK: round.w.s $f6,$f7 0x46 0x00 0x39 0x8c @@ -382,7 +382,7 @@ 0x00 0x65 0x18 0x2b # CHECK: sqrt.d $f12,$f14 -0x46 0x20 0x39 0x84 +0x46 0x20 0x73 0x04 # CHECK: sqrt.s $f6,$f7 0x46 0x00 0x39 0x84 @@ -402,8 +402,8 @@ # CHECK: srlv v0,v1,a1 0x00 0xa3 0x10 0x06 -# CHECK: sub.d $f9,$f12,$f14 -0x46 0x27 0x32 0x41 +# CHECK: sub.d $f8,$f12,$f14 +0x46 0x2e 0x62 0x01 # CHECK: sub.s $f9,$f6,$f7 0x46 0x07 0x32 0x41 @@ -424,7 +424,7 @@ 0x00 0x00 0x01 0xcf # CHECK: trunc.w.d $f12,$f14 -0x46 0x20 0x39 0x8d +0x46 0x20 0x73 0x0d # CHECK: trunc.w.s $f6,$f7 0x46 0x00 0x39 0x8d Modified: llvm/trunk/test/MC/Disassembler/Mips/mips32r2_le.txt URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/Mips/mips32r2_le.txt?rev=159953&r1=159952&r2=159953&view=diff ============================================================================== --- llvm/trunk/test/MC/Disassembler/Mips/mips32r2_le.txt (original) +++ llvm/trunk/test/MC/Disassembler/Mips/mips32r2_le.txt Mon Jul 9 13:46:47 2012 @@ -1,7 +1,7 @@ # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 # CHECK: abs.d $f12,$f14 -0x85 0x39 0x20 0x46 +0x05 0x73 0x20 0x46 # CHECK: abs.s $f6,$f7 0x85 0x39 0x00 0x46 @@ -9,8 +9,8 @@ # CHECK: add t1,a2,a3 0x20 0x48 0xc7 0x00 -# CHECK: add.d $f18,$f12,$f14 -0x40 0x32 0x27 0x46 +# CHECK: add.d $8,$f12,$f14 +0x00 0x62 0x2e 0x46 # CHECK: add.s $f9,$f6,$f7 0x40 0x32 0x07 0x46 @@ -61,106 +61,106 @@ 0x4c 0x01 0x26 0x15 # CHECK: c.eq.d $f12,$f14 -0x32 0x30 0x27 0x46 +0x32 0x60 0x2e 0x46 # CHECK: c.eq.s $f6,$f7 0x32 0x30 0x07 0x46 # CHECK: c.f.d $f12,$f14 -0x30 0x30 0x27 0x46 +0x30 0x60 0x2e 0x46 # CHECK: c.f.s $f6,$f7 0x30 0x30 0x07 0x46 # CHECK: c.le.d $f12,$f14 -0x3e 0x30 0x27 0x46 +0x3e 0x60 0x2e 0x46 # CHECK: c.le.s $f6,$f7 0x3e 0x30 0x07 0x46 # CHECK: c.lt.d $f12,$f14 -0x3c 0x30 0x27 0x46 +0x3c 0x60 0x2e 0x46 # CHECK: c.lt.s $f6,$f7 0x3c 0x30 0x07 0x46 # CHECK: c.nge.d $f12,$f14 -0x3d 0x30 0x27 0x46 +0x3d 0x60 0x2e 0x46 # CHECK: c.nge.s $f6,$f7 0x3d 0x30 0x07 0x46 # CHECK: c.ngl.d $f12,$f14 -0x3b 0x30 0x27 0x46 +0x3b 0x60 0x2e 0x46 # CHECK: c.ngl.s $f6,$f7 0x3b 0x30 0x07 0x46 # CHECK: c.ngle.d $f12,$f14 -0x39 0x30 0x27 0x46 +0x39 0x60 0x2e 0x46 # CHECK: c.ngle.s $f6,$f7 0x39 0x30 0x07 0x46 # CHECK: c.ngt.d $f12,$f14 -0x3f 0x30 0x27 0x46 +0x3f 0x60 0x2e 0x46 # CHECK: c.ngt.s $f6,$f7 0x3f 0x30 0x07 0x46 # CHECK: c.ole.d $f12,$f14 -0x36 0x30 0x27 0x46 +0x36 0x60 0x2e 0x46 # CHECK: c.ole.s $f6,$f7 0x36 0x30 0x07 0x46 # CHECK: c.olt.d $f12,$f14 -0x34 0x30 0x27 0x46 +0x34 0x60 0x2e 0x46 # CHECK: c.olt.s $f6,$f7 0x34 0x30 0x07 0x46 # CHECK: c.seq.d $f12,$f14 -0x3a 0x30 0x27 0x46 +0x3a 0x60 0x2e 0x46 # CHECK: c.seq.s $f6,$f7 0x3a 0x30 0x07 0x46 # CHECK: c.sf.d $f12,$f14 -0x38 0x30 0x27 0x46 +0x38 0x60 0x2e 0x46 # CHECK: c.sf.s $f6,$f7 0x38 0x30 0x07 0x46 # CHECK: c.ueq.d $f12,$f14 -0x33 0x30 0x27 0x46 +0x33 0x60 0x2e 0x46 # CHECK: c.ueq.s $f28,$f18 0x33 0xe0 0x12 0x46 # CHECK: c.ule.d $f12,$f14 -0x37 0x30 0x27 0x46 +0x37 0x60 0x2e 0x46 # CHECK: c.ule.s $f6,$f7 0x37 0x30 0x07 0x46 # CHECK: c.ult.d $f12,$f14 -0x35 0x30 0x27 0x46 +0x35 0x60 0x2e 0x46 # CHECK: c.ult.s $f6,$f7 0x35 0x30 0x07 0x46 # CHECK: c.un.d $f12,$f14 -0x31 0x30 0x27 0x46 +0x31 0x60 0x2e 0x46 # CHECK: c.un.s $f6,$f7 0x31 0x30 0x07 0x46 # CHECK: ceil.w.d $f12,$f14 -0x8e 0x38 0x20 0x46 +0x0e 0x73 0x20 0x46 # CHECK: ceil.w.s $f6,$f7 -0x8e 0x38 0x00 0x46 +0x0e 0x73 0x20 0x46 # CHECK: cfc1 a2,$7 0x00 0x38 0x46 0x44 @@ -178,28 +178,28 @@ 0xa1 0x39 0x00 0x46 # CHECK: cvt.d.w $f12,$f14 -0xa1 0x39 0x80 0x46 +0x21 0x73 0x80 0x46 # CHECK: cvt.l.d $f12,$f14 -0xa5 0x39 0x20 0x46 +0x05 0x73 0x20 0x46 # CHECK: cvt.l.s $f6,$f7 0xa5 0x39 0x00 0x46 # CHECK: cvt.s.d $f12,$f14 -0xa0 0x39 0x20 0x46 +0x20 0x73 0x20 0x46 # CHECK: cvt.s.w $f6,$f7 0xa0 0x39 0x80 0x46 # CHECK: cvt.w.d $f12,$f14 -0xa4 0x39 0x20 0x46 +0x24 0x73 0x20 0x46 # CHECK: cvt.w.s $f6,$f7 0xa4 0x39 0x00 0x46 # CHECK: floor.w.d $f12,$f14 -0x8f 0x39 0x20 0x46 +0x0f 0x73 0x20 0x46 # CHECK: floor.w.s $f6,$f7 0x8f 0x39 0x00 0x46 @@ -213,7 +213,7 @@ # CHECK: jal 00000530 0x4c 0x01 0x00 0x0c -# CHECK: jalr a2,a3 +# CHECK: jalr a3 0x09 0xf8 0xe0 0x00 # CHECK: jr a3 @@ -267,8 +267,8 @@ # CHECK: mflo a1 0x12 0x28 0x00 0x00 -# CHECK: mov.d $f12,$f14 -0x86 0x39 0x20 0x46 +# CHECK: mov.d $f6,$f8 +0x86 0x41 0x20 0x46 # CHECK: mov.s $f6,$f7 0x86 0x39 0x00 0x46 @@ -291,11 +291,11 @@ # CHECK: mtlo a3 0x13 0x00 0xe0 0x00 -# CHECK: mul.d $f9,$f12,$f14 -0x42 0x32 0x27 0x46 +# CHECK: mul.d $f8,$f12,$f14 +0x02 0x62 0x2e 0x46 # CHECK: mul.s $f9,$f6,$f7 -0x42 0x32 0x07 0x46 +0x02 0x62 0x07 0x46 # CHECK: mul t1,a2,a3 0x02 0x48 0xc7 0x70 @@ -307,7 +307,7 @@ 0x19 0x00 0x65 0x00 # CHECK: neg.d $f12,$f14 -0x87 0x39 0x20 0x46 +0x07 0x73 0x20 0x46 # CHECK: neg.s $f6,$f7 0x87 0x39 0x00 0x46 @@ -340,7 +340,7 @@ 0x46 0x48 0xe6 0x00 # CHECK: round.w.d $f12,$f14 -0x8c 0x39 0x20 0x46 +0x0c 0x73 0x20 0x46 # CHECK: round.w.s $f6,$f7 0x8c 0x39 0x00 0x46 @@ -385,7 +385,7 @@ 0x2b 0x18 0x65 0x00 # CHECK: sqrt.d $f12,$f14 -0x84 0x39 0x20 0x46 +0x04 0x73 0x20 0x46 # CHECK: sqrt.s $f6,$f7 0x84 0x39 0x00 0x46 @@ -405,8 +405,8 @@ # CHECK: srlv v0,v1,a1 0x06 0x10 0xa3 0x00 -# CHECK: sub.d $f9,$f12,$f14 -0x41 0x32 0x27 0x46 +# CHECK: sub.d $f8,$f12,$f14 +0x01 0x62 0x2e 0x46 # CHECK: sub.s $f9,$f6,$f7 0x41 0x32 0x07 0x46 @@ -427,7 +427,7 @@ 0xcf 0x01 0x00 0x00 # CHECK: trunc.w.d $f12,$f14 -0x8d 0x39 0x20 0x46 +0x0d 0x73 0x20 0x46 # CHECK: trunc.w.s $f6,$f7 0x8d 0x39 0x00 0x46 From mren at apple.com Mon Jul 9 13:57:12 2012 From: mren at apple.com (Manman Ren) Date: Mon, 09 Jul 2012 18:57:12 -0000 Subject: [llvm-commits] [llvm] r159955 - in /llvm/trunk: lib/Target/X86/X86InstrInfo.cpp test/CodeGen/X86/jump_sign.ll Message-ID: <20120709185712.CA4E82A6C06A@llvm.org> Author: mren Date: Mon Jul 9 13:57:12 2012 New Revision: 159955 URL: http://llvm.org/viewvc/llvm-project?rev=159955&view=rev Log: X86: implement functions to analyze & synthesize CMOV|SET|Jcc getCondFromSETOpc, getCondFromCMovOpc, getSETFromCond, getCMovFromCond No functional change intended. If we want to update the condition code of CMOV|SET|Jcc, we first analyze the opcode to get the condition code, then update the condition code, finally synthesize the new opcode form the new condition code. Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp llvm/trunk/test/CodeGen/X86/jump_sign.ll Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=159955&r1=159954&r2=159955&view=diff ============================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original) +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Mon Jul 9 13:57:12 2012 @@ -2227,7 +2227,7 @@ } } -static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { +static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) { switch (BrOpc) { default: return X86::COND_INVALID; case X86::JE_4: return X86::COND_E; @@ -2249,6 +2249,84 @@ } } +/// getCondFromSETOpc - return condition code of a SET opcode. +static X86::CondCode getCondFromSETOpc(unsigned Opc) { + switch (Opc) { + default: return X86::COND_INVALID; + case X86::SETAr: case X86::SETAm: return X86::COND_A; + case X86::SETAEr: case X86::SETAEm: return X86::COND_AE; + case X86::SETBr: case X86::SETBm: return X86::COND_B; + case X86::SETBEr: case X86::SETBEm: return X86::COND_BE; + case X86::SETEr: case X86::SETEm: return X86::COND_E; + case X86::SETGr: case X86::SETGm: return X86::COND_G; + case X86::SETGEr: case X86::SETGEm: return X86::COND_GE; + case X86::SETLr: case X86::SETLm: return X86::COND_L; + case X86::SETLEr: case X86::SETLEm: return X86::COND_LE; + case X86::SETNEr: case X86::SETNEm: return X86::COND_NE; + case X86::SETNOr: case X86::SETNOm: return X86::COND_NO; + case X86::SETNPr: case X86::SETNPm: return X86::COND_NP; + case X86::SETNSr: case X86::SETNSm: return X86::COND_NS; + case X86::SETOr: case X86::SETOm: return X86::COND_O; + case X86::SETPr: case X86::SETPm: return X86::COND_P; + case X86::SETSr: case X86::SETSm: return X86::COND_S; + } +} + +/// getCondFromCmovOpc - return condition code of a CMov opcode. +static X86::CondCode getCondFromCMovOpc(unsigned Opc) { + switch (Opc) { + default: return X86::COND_INVALID; + case X86::CMOVA16rm: case X86::CMOVA16rr: case X86::CMOVA32rm: + case X86::CMOVA32rr: case X86::CMOVA64rm: case X86::CMOVA64rr: + return X86::COND_A; + case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm: + case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr: + return X86::COND_AE; + case X86::CMOVB16rm: case X86::CMOVB16rr: case X86::CMOVB32rm: + case X86::CMOVB32rr: case X86::CMOVB64rm: case X86::CMOVB64rr: + return X86::COND_B; + case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm: + case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr: + return X86::COND_BE; + case X86::CMOVE16rm: case X86::CMOVE16rr: case X86::CMOVE32rm: + case X86::CMOVE32rr: case X86::CMOVE64rm: case X86::CMOVE64rr: + return X86::COND_E; + case X86::CMOVG16rm: case X86::CMOVG16rr: case X86::CMOVG32rm: + case X86::CMOVG32rr: case X86::CMOVG64rm: case X86::CMOVG64rr: + return X86::COND_G; + case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm: + case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr: + return X86::COND_GE; + case X86::CMOVL16rm: case X86::CMOVL16rr: case X86::CMOVL32rm: + case X86::CMOVL32rr: case X86::CMOVL64rm: case X86::CMOVL64rr: + return X86::COND_L; + case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm: + case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr: + return X86::COND_LE; + case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm: + case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr: + return X86::COND_NE; + case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm: + case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr: + return X86::COND_NO; + case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm: + case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr: + return X86::COND_NP; + case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm: + case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr: + return X86::COND_NS; + case X86::CMOVO16rm: case X86::CMOVO16rr: case X86::CMOVO32rm: + case X86::CMOVO32rr: case X86::CMOVO64rm: case X86::CMOVO64rr: + return X86::COND_O; + case X86::CMOVP16rm: case X86::CMOVP16rr: case X86::CMOVP32rm: + case X86::CMOVP32rr: case X86::CMOVP64rm: case X86::CMOVP64rr: + return X86::COND_P; + case X86::CMOVS16rm: case X86::CMOVS16rr: case X86::CMOVS32rm: + case X86::CMOVS32rr: case X86::CMOVS64rm: case X86::CMOVS64rr: + return X86::COND_S; + } +} + unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { switch (CC) { default: llvm_unreachable("Illegal condition code!"); @@ -2295,10 +2373,57 @@ } } -/// getCMovFromCond - Return a cmov(rr) opcode for the given condition and -/// register size in bytes. -static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes) { - static const unsigned Opc[16][3] = { +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +X86::CondCode getSwappedCondition(X86::CondCode CC) { + switch (CC) { + default: return X86::COND_INVALID; + case X86::COND_E: return X86::COND_E; + case X86::COND_NE: return X86::COND_NE; + case X86::COND_L: return X86::COND_G; + case X86::COND_LE: return X86::COND_GE; + case X86::COND_G: return X86::COND_L; + case X86::COND_GE: return X86::COND_LE; + case X86::COND_B: return X86::COND_A; + case X86::COND_BE: return X86::COND_AE; + case X86::COND_A: return X86::COND_B; + case X86::COND_AE: return X86::COND_BE; + } +} + +/// getSETFromCond - Return a set opcode for the given condition and +/// whether it has memory operand. +static unsigned getSETFromCond(X86::CondCode CC, + bool HasMemoryOperand) { + static const unsigned Opc[16][2] = { + { X86::SETAr, X86::SETAm }, + { X86::SETAEr, X86::SETAEm }, + { X86::SETBr, X86::SETBm }, + { X86::SETBEr, X86::SETBEm }, + { X86::SETEr, X86::SETEm }, + { X86::SETGr, X86::SETGm }, + { X86::SETGEr, X86::SETGEm }, + { X86::SETLr, X86::SETLm }, + { X86::SETLEr, X86::SETLEm }, + { X86::SETNEr, X86::SETNEm }, + { X86::SETNOr, X86::SETNOm }, + { X86::SETNPr, X86::SETNPm }, + { X86::SETNSr, X86::SETNSm }, + { X86::SETOr, X86::SETOm }, + { X86::SETPr, X86::SETPm }, + { X86::SETSr, X86::SETSm } + }; + + assert(CC < 16 && "Can only handle standard cond codes"); + return Opc[CC][HasMemoryOperand ? 1 : 0]; +} + +/// getCMovFromCond - Return a cmov opcode for the given condition, +/// register size in bytes, and operand type. +static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, + bool HasMemoryOperand) { + static const unsigned Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, @@ -2314,15 +2439,32 @@ { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr }, { X86::CMOVO16rr, X86::CMOVO32rr, X86::CMOVO64rr }, { X86::CMOVP16rr, X86::CMOVP32rr, X86::CMOVP64rr }, - { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr } + { X86::CMOVS16rr, X86::CMOVS32rr, X86::CMOVS64rr }, + { X86::CMOVA16rm, X86::CMOVA32rm, X86::CMOVA64rm }, + { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm }, + { X86::CMOVB16rm, X86::CMOVB32rm, X86::CMOVB64rm }, + { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm }, + { X86::CMOVE16rm, X86::CMOVE32rm, X86::CMOVE64rm }, + { X86::CMOVG16rm, X86::CMOVG32rm, X86::CMOVG64rm }, + { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm }, + { X86::CMOVL16rm, X86::CMOVL32rm, X86::CMOVL64rm }, + { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm }, + { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm }, + { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm }, + { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm }, + { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm }, + { X86::CMOVO16rm, X86::CMOVO32rm, X86::CMOVO64rm }, + { X86::CMOVP16rm, X86::CMOVP32rm, X86::CMOVP64rm }, + { X86::CMOVS16rm, X86::CMOVS32rm, X86::CMOVS64rm } }; assert(CC < 16 && "Can only handle standard cond codes"); + unsigned Idx = HasMemoryOperand ? 16+CC : CC; switch(RegBytes) { default: llvm_unreachable("Illegal register size!"); - case 2: return Opc[CC][0]; - case 4: return Opc[CC][1]; - case 8: return Opc[CC][2]; + case 2: return Opc[Idx][0]; + case 4: return Opc[Idx][1]; + case 8: return Opc[Idx][2]; } } @@ -2392,7 +2534,7 @@ } // Handle conditional branches. - X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); + X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode()); if (BranchCode == X86::COND_INVALID) return true; // Can't handle indirect branch. @@ -2490,7 +2632,7 @@ if (I->isDebugValue()) continue; if (I->getOpcode() != X86::JMP_4 && - GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) + getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) break; // Remove the branch. I->eraseFromParent(); @@ -2595,7 +2737,8 @@ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); assert(Cond.size() == 1 && "Invalid Cond array"); unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(), - MRI.getRegClass(DstReg)->getSize()); + MRI.getRegClass(DstReg)->getSize(), + false/*HasMemoryOperand*/); BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg); } @@ -2895,127 +3038,6 @@ return false; } -/// getSwappedConditionForSET - assume the flags are set by MI(a,b), return -/// the opcode if we modify the instructions such that flags are -/// set by MI(b,a). -static unsigned getSwappedConditionForSET(unsigned SETOpc) { - switch (SETOpc) { - default: return 0; - case X86::SETEr: return X86::SETEr; - case X86::SETEm: return X86::SETEm; - case X86::SETNEr: return X86::SETNEr; - case X86::SETNEm: return X86::SETNEm; - case X86::SETLr: return X86::SETGr; - case X86::SETLm: return X86::SETGm; - case X86::SETLEr: return X86::SETGEr; - case X86::SETLEm: return X86::SETGEm; - case X86::SETGr: return X86::SETLr; - case X86::SETGm: return X86::SETLm; - case X86::SETGEr: return X86::SETLEr; - case X86::SETGEm: return X86::SETLEm; - case X86::SETBr: return X86::SETAr; - case X86::SETBm: return X86::SETAm; - case X86::SETBEr: return X86::SETAEr; - case X86::SETBEm: return X86::SETAEm; - case X86::SETAr: return X86::SETBr; - case X86::SETAm: return X86::SETBm; - case X86::SETAEr: return X86::SETBEr; - case X86::SETAEm: return X86::SETBEm; - } -} - -/// getSwappedConditionForBranch - assume the flags are set by MI(a,b), return -/// the opcode if we modify the instructions such that flags are -/// set by MI(b,a). -static unsigned getSwappedConditionForBranch(unsigned BranchOpc) { - switch (BranchOpc) { - default: return 0; - case X86::JE_4: return X86::JE_4; - case X86::JNE_4: return X86::JNE_4; - case X86::JL_4: return X86::JG_4; - case X86::JLE_4: return X86::JGE_4; - case X86::JG_4: return X86::JL_4; - case X86::JGE_4: return X86::JLE_4; - case X86::JB_4: return X86::JA_4; - case X86::JBE_4: return X86::JAE_4; - case X86::JA_4: return X86::JB_4; - case X86::JAE_4: return X86::JBE_4; - } -} - -/// getSwappedConditionForCMov - assume the flags are set by MI(a,b), return -/// the opcode if we modify the instructions such that flags are -/// set by MI(b,a). -static unsigned getSwappedConditionForCMov(unsigned CMovOpc) { - switch (CMovOpc) { - default: return 0; - case X86::CMOVE16rm: return X86::CMOVE16rm; - case X86::CMOVE16rr: return X86::CMOVE16rr; - case X86::CMOVE32rm: return X86::CMOVE32rm; - case X86::CMOVE32rr: return X86::CMOVE32rr; - case X86::CMOVE64rm: return X86::CMOVE64rm; - case X86::CMOVE64rr: return X86::CMOVE64rr; - case X86::CMOVNE16rm: return X86::CMOVNE16rm; - case X86::CMOVNE16rr: return X86::CMOVNE16rr; - case X86::CMOVNE32rm: return X86::CMOVNE32rm; - case X86::CMOVNE32rr: return X86::CMOVNE32rr; - case X86::CMOVNE64rm: return X86::CMOVNE64rm; - case X86::CMOVNE64rr: return X86::CMOVNE64rr; - - case X86::CMOVL16rm: return X86::CMOVG16rm; - case X86::CMOVL16rr: return X86::CMOVG16rr; - case X86::CMOVL32rm: return X86::CMOVG32rm; - case X86::CMOVL32rr: return X86::CMOVG32rr; - case X86::CMOVL64rm: return X86::CMOVG64rm; - case X86::CMOVL64rr: return X86::CMOVG64rr; - case X86::CMOVLE16rm: return X86::CMOVGE16rm; - case X86::CMOVLE16rr: return X86::CMOVGE16rr; - case X86::CMOVLE32rm: return X86::CMOVGE32rm; - case X86::CMOVLE32rr: return X86::CMOVGE32rr; - case X86::CMOVLE64rm: return X86::CMOVGE64rm; - case X86::CMOVLE64rr: return X86::CMOVGE64rr; - - case X86::CMOVG16rm: return X86::CMOVL16rm; - case X86::CMOVG16rr: return X86::CMOVL16rr; - case X86::CMOVG32rm: return X86::CMOVL32rm; - case X86::CMOVG32rr: return X86::CMOVL32rr; - case X86::CMOVG64rm: return X86::CMOVL64rm; - case X86::CMOVG64rr: return X86::CMOVL64rr; - case X86::CMOVGE16rm: return X86::CMOVLE16rm; - case X86::CMOVGE16rr: return X86::CMOVLE16rr; - case X86::CMOVGE32rm: return X86::CMOVLE32rm; - case X86::CMOVGE32rr: return X86::CMOVLE32rr; - case X86::CMOVGE64rm: return X86::CMOVLE64rm; - case X86::CMOVGE64rr: return X86::CMOVLE64rr; - - case X86::CMOVB16rm: return X86::CMOVA16rm; - case X86::CMOVB16rr: return X86::CMOVA16rr; - case X86::CMOVB32rm: return X86::CMOVA32rm; - case X86::CMOVB32rr: return X86::CMOVA32rr; - case X86::CMOVB64rm: return X86::CMOVA64rm; - case X86::CMOVB64rr: return X86::CMOVA64rr; - case X86::CMOVBE16rm: return X86::CMOVAE16rm; - case X86::CMOVBE16rr: return X86::CMOVAE16rr; - case X86::CMOVBE32rm: return X86::CMOVAE32rm; - case X86::CMOVBE32rr: return X86::CMOVAE32rr; - case X86::CMOVBE64rm: return X86::CMOVAE64rm; - case X86::CMOVBE64rr: return X86::CMOVAE64rr; - - case X86::CMOVA16rm: return X86::CMOVB16rm; - case X86::CMOVA16rr: return X86::CMOVB16rr; - case X86::CMOVA32rm: return X86::CMOVB32rm; - case X86::CMOVA32rr: return X86::CMOVB32rr; - case X86::CMOVA64rm: return X86::CMOVB64rm; - case X86::CMOVA64rr: return X86::CMOVB64rr; - case X86::CMOVAE16rm: return X86::CMOVBE16rm; - case X86::CMOVAE16rr: return X86::CMOVBE16rr; - case X86::CMOVAE32rm: return X86::CMOVBE32rm; - case X86::CMOVAE32rr: return X86::CMOVBE32rr; - case X86::CMOVAE64rm: return X86::CMOVBE64rm; - case X86::CMOVAE64rr: return X86::CMOVBE64rr; - } -} - /// isRedundantFlagInstr - check whether the first instruction, whose only /// purpose is to update flags, can be made redundant. /// CMPrr can be made redundant by SUBrr if the operands are the same. @@ -3077,7 +3099,7 @@ // redundant and that instruction will be saved in Sub. MachineInstr *Sub = NULL; const TargetRegisterInfo *TRI = &getRegisterInfo(); - + // We iterate backward, starting from the instruction before CmpInstr and // stop when reaching the definition of a source register or done with the BB. // RI points to the instruction before CmpInstr. @@ -3131,10 +3153,35 @@ if (IsSwapped) { // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. - unsigned NewOpc = getSwappedConditionForSET(Instr.getOpcode()); - if (!NewOpc) NewOpc = getSwappedConditionForBranch(Instr.getOpcode()); - if (!NewOpc) NewOpc = getSwappedConditionForCMov(Instr.getOpcode()); - if (!NewOpc) return false; + // We decode the condition code from opcode, swap the condition code, + // and synthesize the new opcode. + bool OpcIsSET = false; + X86::CondCode OldCC; + if (Instr.isBranch()) + OldCC = getCondFromBranchOpc(Instr.getOpcode()); + else { + OldCC = getCondFromSETOpc(Instr.getOpcode()); + if (OldCC != X86::COND_INVALID) + OpcIsSET = true; + else + OldCC = getCondFromCMovOpc(Instr.getOpcode()); + } + if (OldCC == X86::COND_INVALID) return false; + X86::CondCode NewCC = getSwappedCondition(OldCC); + if (NewCC == X86::COND_INVALID) return false; + + // Synthesize the new opcode. + bool HasMemoryOperand = Instr.hasOneMemOperand(); + unsigned NewOpc; + if (Instr.isBranch()) + NewOpc = GetCondBranchFromCond(NewCC); + else if(OpcIsSET) + NewOpc = getSETFromCond(NewCC, HasMemoryOperand); + else { + unsigned DstReg = Instr.getOperand(0).getReg(); + NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(), + HasMemoryOperand); + } // Push the MachineInstr to OpsToUpdate. // If it is safe to remove CmpInstr, the condition code of these Modified: llvm/trunk/test/CodeGen/X86/jump_sign.ll URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/jump_sign.ll?rev=159955&r1=159954&r2=159955&view=diff ============================================================================== --- llvm/trunk/test/CodeGen/X86/jump_sign.ll (original) +++ llvm/trunk/test/CodeGen/X86/jump_sign.ll Mon Jul 9 13:57:12 2012 @@ -120,6 +120,23 @@ if.else: ret i32 %sub } +define i32 @l3(i32 %a, i32 %b) nounwind { +entry: +; CHECK: l3: +; CHECK: sub +; CHECK-NOT: cmp +; CHECK: jge + %cmp = icmp sgt i32 %b, %a + %sub = sub nsw i32 %a, %b + br i1 %cmp, label %if.then, label %if.else + +if.then: + ret i32 %sub + +if.else: + %add = add nsw i32 %sub, 1 + ret i32 %add +} ; rdar://11540023 define i32 @n(i32 %x, i32 %y) nounwind { entry: From mcrosier at apple.com Mon Jul 9 14:34:25 2012 From: mcrosier at apple.com (Chad Rosier) Date: Mon, 09 Jul 2012 12:34:25 -0700 Subject: [llvm-commits] [llvm] r159938 - in /llvm/trunk: lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp test/MC/ARM/simple-fp-encoding.s In-Reply-To: <37FA420D-B74C-4A7F-8FA7-BFD20F7E7858@apple.com> References: <20120709164134.517C52A6C069@llvm.org> <67A813F8-DAF5-4267-A140-41A23DF1F2C3@apple.com> <000d01cd5dff$bcb98190$362c84b0$%barton@arm.com> <37FA420D-B74C-4A7F-8FA7-BFD20F7E7858@apple.com> Message-ID: Richard, Looks like this isn't quite fixed; the register names are not matching. -- llvm/test/MC/Disassembler/ARM/neon.txt:1898:10: error: expected string not found in input # CHECK: vmovvs r2, lr, s29, s30 ^ :897:2: note: scanning from here vmovvs r2, lr, s27, s28 ^ -- Chad On Jul 9, 2012, at 11:23 AM, Chad Rosier wrote: > > On Jul 9, 2012, at 11:22 AM, Richard Barton wrote: > >> Hi Chad >> >> My change completely broke disassembling the VMOV Rt, Rt2, Sm, Sm+1 instruction. >> I must not have run the full test suite over it - must do better! >> >> Have committed a fix: r159945. Sorry for the breakage. > > Thanks for the quick fix! > > Chad > >> Rich >> >>> -----Original Message----- >>> From: Chad Rosier [mailto:mcrosier at apple.com] >>> Sent: 09 July 2012 19:02 >>> To: Richard Barton >>> Cc: llvm-commits at cs.uiuc.edu >>> Subject: Re: [llvm-commits] [llvm] r159938 - in /llvm/trunk: >>> lib/Target/ARM/ARMInstrVFP.td lib/Target/ARM/Disassembler/ARMDisassembler.cpp >>> test/MC/ARM/simple-fp-encoding.s >>> >>> Richard, >>> This appears to be causing failures on our internal builders with the >>> following warnings: >>> >>> ******************** TEST 'LLVM :: MC/Disassembler/ARM/neon.txt' FAILED >>> ********************Script: >>> -- >>> 0xa4 0x0d 0xa3 0xf4 >>> ^ >>> llvm/test/MC/Disassembler/ARM/neon.txt:1898:10: error: expected string not >>> found in input >>> # CHECK: vmovvs r2, lr, s29, s30 >>> ^ >>> :897:2: note: scanning from here >>> stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ >>> ^ >>> :897:11: note: possible intended match here >>> stmdb r12!, {r1, r3, r5, r9, r10, r11, r12, lr} ^ >>> ^ >>> -- >>> >>> ******************** >>> >>> Chad >>> >>> On Jul 9, 2012, at 9:41 AM, Richard Barton wrote: >>> >>> >>> Author: rbarton >>> Date: Mon Jul 9 11:41:33 2012 >>> New Revision: 159938 >>> >>> URL: http://llvm.org/viewvc/llvm-project?rev=159938&view=rev >>> Log: >>> Fix instruction description of VMOV (between two ARM core registers and >>> two single-precision resiters) >>> >>> Modified: >>> llvm/trunk/lib/Target/ARM/ARMInstrVFP.td >>> llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp >>> llvm/trunk/test/MC/ARM/simple-fp-encoding.s >>> >>> Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td >>> URL: http://llvm.org/viewvc/llvm- >>> project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=159938&r1=159937&r2=15993 >>> 8&view=diff >>> ======================================================================== >>> ====== >>> --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original) >>> +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Mon Jul 9 11:41:33 2012 >>> @@ -567,8 +567,8 @@ >>> bits<4> Rt2; >>> >>> // Encode instruction operands. >>> - let Inst{3-0} = src1{3-0}; >>> - let Inst{5} = src1{4}; >>> + let Inst{3-0} = src1{4-1}; >>> + let Inst{5} = src1{0}; >>> let Inst{15-12} = Rt; >>> let Inst{19-16} = Rt2; >>> >>> @@ -617,8 +617,8 @@ >>> bits<4> src2; >>> >>> // Encode instruction operands. >>> - let Inst{3-0} = dst1{3-0}; >>> - let Inst{5} = dst1{4}; >>> + let Inst{3-0} = dst1{4-1}; >>> + let Inst{5} = dst1{0}; >>> let Inst{15-12} = src1; >>> let Inst{19-16} = src2; >>> >>> >>> Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp >>> URL: http://llvm.org/viewvc/llvm- >>> project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=159938& >>> r1=159937&r2=159938&view=diff >>> ======================================================================== >>> ====== >>> --- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp >>> (original) >>> +++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Mon Jul >>> 9 11:41:33 2012 >>> @@ -4198,9 +4198,9 @@ >>> DecodeStatus S = MCDisassembler::Success; >>> unsigned Rt = fieldFromInstruction32(Insn, 12, 4); >>> unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); >>> - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); >>> + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); >>> unsigned pred = fieldFromInstruction32(Insn, 28, 4); >>> - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; >>> + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; >>> >>> if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) >>> S = MCDisassembler::SoftFail; >>> @@ -4224,9 +4224,9 @@ >>> DecodeStatus S = MCDisassembler::Success; >>> unsigned Rt = fieldFromInstruction32(Insn, 12, 4); >>> unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); >>> - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); >>> + unsigned Rm = fieldFromInstruction32(Insn, 5, 1); >>> unsigned pred = fieldFromInstruction32(Insn, 28, 4); >>> - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; >>> + Rm |= fieldFromInstruction32(Insn, 0, 4) << 4; >>> >>> if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) >>> S = MCDisassembler::SoftFail; >>> >>> Modified: llvm/trunk/test/MC/ARM/simple-fp-encoding.s >>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/simple- >>> fp-encoding.s?rev=159938&r1=159937&r2=159938&view=diff >>> ======================================================================== >>> ====== >>> --- llvm/trunk/test/MC/ARM/simple-fp-encoding.s (original) >>> +++ llvm/trunk/test/MC/ARM/simple-fp-encoding.s Mon Jul 9 11:41:33 2012 >>> @@ -196,6 +196,27 @@ >>> @ CHECK: vmov r0, r1, d16 @ encoding: [0x30,0x0b,0x51,0xec] >>> vmov r0, r1, d16 >>> >>> +@ Between two single precision registers and two core registers >>> + vmov s3, s4, r1, r2 >>> + vmov s2, s3, r1, r2 >>> + vmov r1, r2, s3, s4 >>> + vmov r1, r2, s2, s3 >>> +@ CHECK: vmov s3, s4, r1, r2 @ encoding: [0x31,0x1a,0x42,0xec] >>> +@ CHECK: vmov s2, s3, r1, r2 @ encoding: [0x11,0x1a,0x42,0xec] >>> +@ CHECK: vmov r1, r2, s3, s4 @ encoding: [0x31,0x1a,0x52,0xec] >>> +@ CHECK: vmov r1, r2, s2, s3 @ encoding: [0x11,0x1a,0x52,0xec] >>> + >>> +@ Between one double precision register and two core registers >>> + vmov d15, r1, r2 >>> + vmov d16, r1, r2 >>> + vmov r1, r2, d15 >>> + vmov r1, r2, d16 >>> +@ CHECK: vmov d15, r1, r2 @ encoding: [0x1f,0x1b,0x42,0xec] >>> +@ CHECK: vmov d16, r1, r2 @ encoding: [0x30,0x1b,0x42,0xec] >>> +@ CHECK: vmov r1, r2, d15 @ encoding: [0x1f,0x1b,0x52,0xec] >>> +@ CHECK: vmov r1, r2, d16 @ encoding: [0x30,0x1b,0x52,0xec] >>> + >>> + >>> @ CHECK: vldr d17, [r0] @ encoding: [0x00,0x1b,0xd0,0xed] >>> @ CHECK: vldr s0, [lr] @ encoding: [0x00,0x0a,0x9e,0xed] >>> @ CHECK: vldr d0, [lr] @ encoding: [0x00,0x0b,0x9e,0xed] >>> >>> >>> _______________________________________________ >>> llvm-commits mailing list >>> llvm-commits at cs.uiuc.edu >>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits >>> >>> >> >> >> > -------------- next part -------------- An HTML attachment was scrubbed... URL: http://lists.cs.uiuc.edu/pipermail/llvm-commits/attachments/20120709/01812fc3/attachment.html From tstellar at gmail.com Mon Jul 9 15:24:40 2012 From: tstellar at gmail.com (Tom Stellard) Date: Mon, 9 Jul 2012 20:24:40 +0000 Subject: [llvm-commits] RFC: R600/SI backend v5 Message-ID: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> Hi, Here is an updated version of the R600/SI backend that I am submitting for inclusion in the main LLVM tree. The R600/SI backend is a backend for AMD GPUs (HD2XXX-HD7XXX). It is currently being used by the Mesa[1] project for compiling 3D and compute shaders for our Open Source GPU drivers. At this point, compiling OpenCL C compute shaders for our Open Source GPU drivers requires a patched version of LLVM (for target intrinsics) and Clang (for TargetInfo). Upstreaming this backend is very important for us, because it will remove this dependency on patched versions of LLVM and Clang and give our users access to a fully Open Source compute stack that works out of the box (i.e. with distro supplied packages) This is the fifth version of the backend that I have posted to the list, and it is significantly better than the first version I posted over three months ago. I haven't received many comments on it, but the comments I have received, I feel like I have addressed (Mainly that the backend was too big and had too much legacy code). If there are no further comments on this backend would anyone object to me committing it next Monday July 16th? Thanks, Tom Stellard [1] http://www.mesa3d.org/ From tstellar at gmail.com Mon Jul 9 15:24:42 2012 From: tstellar at gmail.com (Tom Stellard) Date: Mon, 9 Jul 2012 20:24:42 +0000 Subject: [llvm-commits] [PATCH 2/4] include/llvm: Add R600 Intrinsics v5 In-Reply-To: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> References: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> Message-ID: <1341865484-25597-3-git-send-email-thomas.stellard@amd.com> --- include/llvm/Intrinsics.td | 1 + include/llvm/IntrinsicsR600.td | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 0 deletions(-) create mode 100644 include/llvm/IntrinsicsR600.td diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 95fd843..30aabb1 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -458,3 +458,4 @@ include "llvm/IntrinsicsXCore.td" include "llvm/IntrinsicsHexagon.td" include "llvm/IntrinsicsNVVM.td" include "llvm/IntrinsicsMips.td" +include "llvm/IntrinsicsR600.td" diff --git a/include/llvm/IntrinsicsR600.td b/include/llvm/IntrinsicsR600.td new file mode 100644 index 0000000..ecb5668 --- /dev/null +++ b/include/llvm/IntrinsicsR600.td @@ -0,0 +1,36 @@ +//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the R600-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "r600" in { + +class R600ReadPreloadRegisterIntrinsic + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + +multiclass R600ReadPreloadRegisterIntrinsic_xyz { + def _x : R600ReadPreloadRegisterIntrinsic; + def _y : R600ReadPreloadRegisterIntrinsic; + def _z : R600ReadPreloadRegisterIntrinsic; +} + +defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_global_size">; +defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_local_size">; +defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_ngroups">; +defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_tgid">; +defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < + "__builtin_r600_read_tidig">; +} // End TargetPrefix = "r600" -- 1.7.7.6 From tstellar at gmail.com Mon Jul 9 15:24:43 2012 From: tstellar at gmail.com (Tom Stellard) Date: Mon, 9 Jul 2012 20:24:43 +0000 Subject: [llvm-commits] [PATCH 3/4] Build script changes for R600/SI Codegen v5 In-Reply-To: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> References: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> Message-ID: <1341865484-25597-4-git-send-email-thomas.stellard@amd.com> --- CMakeLists.txt | 1 + autoconf/configure.ac | 5 +++-- configure | 7 ++++--- lib/Target/LLVMBuild.txt | 2 +- projects/sample/autoconf/configure.ac | 5 +++-- projects/sample/configure | 8 +++++--- 6 files changed, 17 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bb64db9..f03cfbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,6 +74,7 @@ set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) set(LLVM_ALL_TARGETS + AMDGPU ARM CellSPU CppBackend diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 258e6b5..53a42fd 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -652,13 +652,13 @@ TARGETS_TO_BUILD="" AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets], [Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, nvptx, and cpp (default=all)]),, + xcore, msp430, nvptx, r600, and cpp (default=all)]),, enableval=all) if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon AMDGPU" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -675,6 +675,7 @@ case "$enableval" in hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; + r600) TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;; host) case "$llvm_cv_target_arch" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; diff --git a/configure b/configure index 72145bc..82dceb4 100755 --- a/configure +++ b/configure @@ -1417,7 +1417,7 @@ Optional Features: --enable-targets Build specific host targets: all or target1,target2,... Valid targets are: host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon, - xcore, msp430, nvptx, and cpp (default=all) + xcore, msp430, nvptx, r600, and cpp (default=all) --enable-bindings Build specific language bindings: all,auto,none,{binding-name} (default=auto) --enable-libffi Check for the presence of libffi (default is NO) @@ -5329,7 +5329,7 @@ if test "$enableval" = host-only ; then enableval=host fi case "$enableval" in - all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;; + all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon AMDGPU" ;; *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do case "$a_target" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -5346,6 +5346,7 @@ case "$enableval" in hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;; mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;; nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;; + r600) TARGETS_TO_BUILD="AMDGPU $TARGETS_TO_BUILD" ;; host) case "$llvm_cv_target_arch" in x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; x86_64) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;; @@ -10210,7 +10211,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < conftest.$ac_ext < References: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> Message-ID: <1341865484-25597-5-git-send-email-thomas.stellard@amd.com> --- test/CodeGen/R600/fadd.ll | 15 +++++++++++++++ test/CodeGen/R600/fadd.ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/fmul.ll | 15 +++++++++++++++ test/CodeGen/R600/fmul.ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/fsub.ll | 15 +++++++++++++++ test/CodeGen/R600/fsub.ll.check | Bin 0 -> 96 bytes test/CodeGen/R600/lit.local.cfg | 13 +++++++++++++ test/CodeGen/R600/llvm.AMDGPU.cos.ll | 15 +++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.cos.ll.check | Bin 0 -> 144 bytes test/CodeGen/R600/llvm.AMDGPU.floor.ll | 15 +++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.floor.ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/llvm.AMDGPU.mul.ll | 16 ++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.mul.ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/llvm.AMDGPU.pow.ll | 16 ++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.pow.ll.check | Bin 0 -> 144 bytes test/CodeGen/R600/llvm.AMDGPU.rcp.ll | 15 +++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.rcp.ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/llvm.AMDGPU.sin.ll | 15 +++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.sin.ll.check | Bin 0 -> 144 bytes test/CodeGen/R600/llvm.AMDGPU.trunc.ll | 15 +++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.trunc.ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/llvm.AMDIL.fabs..ll | 15 +++++++++++++++ test/CodeGen/R600/llvm.AMDIL.fabs..ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/llvm.AMDIL.max..ll | 16 ++++++++++++++++ test/CodeGen/R600/llvm.AMDIL.max..ll.check | Bin 0 -> 48 bytes test/CodeGen/R600/llvm.AMDIL.min..ll | 16 ++++++++++++++++ test/CodeGen/R600/llvm.AMDIL.min..ll.check | Bin 0 -> 48 bytes 27 files changed, 212 insertions(+), 0 deletions(-) create mode 100644 test/CodeGen/R600/fadd.ll create mode 100644 test/CodeGen/R600/fadd.ll.check create mode 100644 test/CodeGen/R600/fmul.ll create mode 100644 test/CodeGen/R600/fmul.ll.check create mode 100644 test/CodeGen/R600/fsub.ll create mode 100644 test/CodeGen/R600/fsub.ll.check create mode 100644 test/CodeGen/R600/lit.local.cfg create mode 100644 test/CodeGen/R600/llvm.AMDGPU.cos.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.cos.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDGPU.floor.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.floor.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDGPU.mul.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.mul.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDGPU.pow.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.pow.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDGPU.rcp.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.rcp.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDGPU.sin.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.sin.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDGPU.trunc.ll create mode 100644 test/CodeGen/R600/llvm.AMDGPU.trunc.ll.check create mode 100644 test/CodeGen/R600/llvm.AMDIL.fabs..ll create mode 100644 test/CodeGen/R600/llvm.AMDIL.fabs..ll.check create mode 100644 test/CodeGen/R600/llvm.AMDIL.max..ll create mode 100644 test/CodeGen/R600/llvm.AMDIL.max..ll.check create mode 100644 test/CodeGen/R600/llvm.AMDIL.min..ll create mode 100644 test/CodeGen/R600/llvm.AMDIL.min..ll.check diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll new file mode 100644 index 0000000..874fcc6 --- /dev/null +++ b/test/CodeGen/R600/fadd.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | diff %s.check - + + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fadd float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fadd.ll.check b/test/CodeGen/R600/fadd.ll.check new file mode 100644 index 0000000000000000000000000000000000000000..886082f22622bf687d2a9c53214873c3691f7620 GIT binary patch literal 48 TcmZQzKn9G+90VIA43z=^0Av6J literal 0 HcmV?d00001 diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll new file mode 100644 index 0000000..28bc4d8 --- /dev/null +++ b/test/CodeGen/R600/fmul.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | diff %s.check - + + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fmul float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fmul.ll.check b/test/CodeGen/R600/fmul.ll.check new file mode 100644 index 0000000000000000000000000000000000000000..9ba36ccb7416ea5886d596684ff4bf8bc3759d07 GIT binary patch literal 48 VcmZQzKn9G+90Z$@fr$Ys1pokc00sa6 literal 0 HcmV?d00001 diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll new file mode 100644 index 0000000..8e43128 --- /dev/null +++ b/test/CodeGen/R600/fsub.ll @@ -0,0 +1,15 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | diff %s.check - + + +define void @test() { + %r0 = call float @llvm.R600.load.input(i32 0) + %r1 = call float @llvm.R600.load.input(i32 1) + %r2 = fsub float %r0, %r1 + call void @llvm.AMDGPU.store.output(float %r2, i32 0) + ret void +} + +declare float @llvm.R600.load.input(i32) readnone + +declare void @llvm.AMDGPU.store.output(float, i32) + diff --git a/test/CodeGen/R600/fsub.ll.check b/test/CodeGen/R600/fsub.ll.check new file mode 100644 index 0000000000000000000000000000000000000000..79993541ce6ba84501876e488bfd428501f1383f GIT binary patch literal 96 fcmZQzU|?iqzzY}|7#Spi7z at BE2s9C>6e References: <1341865484-25597-1-git-send-email-thomas.stellard@amd.com> Message-ID: <1341865484-25597-2-git-send-email-thomas.stellard@amd.com> --- lib/Target/AMDGPU/AMDGPU.h | 35 + lib/Target/AMDGPU/AMDGPU.td | 21 + lib/Target/AMDGPU/AMDGPUConvertToISA.cpp | 63 + lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 427 ++ lib/Target/AMDGPU/AMDGPUISelLowering.h | 78 + lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 46 + lib/Target/AMDGPU/AMDGPUInstrInfo.h | 48 + lib/Target/AMDGPU/AMDGPUInstrInfo.td | 69 + lib/Target/AMDGPU/AMDGPUInstructions.td | 123 + lib/Target/AMDGPU/AMDGPUIntrinsics.td | 64 + lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 24 + lib/Target/AMDGPU/AMDGPURegisterInfo.h | 42 + lib/Target/AMDGPU/AMDGPURegisterInfo.td | 22 + lib/Target/AMDGPU/AMDGPUSubtarget.h | 36 + lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 162 + lib/Target/AMDGPU/AMDGPUTargetMachine.h | 76 + lib/Target/AMDGPU/AMDGPUUtil.cpp | 139 + lib/Target/AMDGPU/AMDGPUUtil.h | 46 + lib/Target/AMDGPU/AMDIL.h | 251 + lib/Target/AMDGPU/AMDIL7XXDevice.cpp | 128 + lib/Target/AMDGPU/AMDIL7XXDevice.h | 71 + lib/Target/AMDGPU/AMDILAlgorithms.tpp | 93 + lib/Target/AMDGPU/AMDILBase.td | 113 + lib/Target/AMDGPU/AMDILCFGStructurizer.cpp | 3236 ++++++++++++ lib/Target/AMDGPU/AMDILCallingConv.td | 42 + lib/Target/AMDGPU/AMDILCodeEmitter.h | 48 + lib/Target/AMDGPU/AMDILDevice.cpp | 137 + lib/Target/AMDGPU/AMDILDevice.h | 116 + lib/Target/AMDGPU/AMDILDeviceInfo.cpp | 93 + lib/Target/AMDGPU/AMDILDeviceInfo.h | 89 + lib/Target/AMDGPU/AMDILDevices.h | 19 + lib/Target/AMDGPU/AMDILEnumeratedTypes.td | 522 ++ lib/Target/AMDGPU/AMDILEvergreenDevice.cpp | 183 + lib/Target/AMDGPU/AMDILEvergreenDevice.h | 87 + lib/Target/AMDGPU/AMDILFormats.td | 175 + lib/Target/AMDGPU/AMDILFrameLowering.cpp | 53 + lib/Target/AMDGPU/AMDILFrameLowering.h | 46 + lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp | 393 ++ lib/Target/AMDGPU/AMDILISelLowering.cpp | 1850 +++++++ lib/Target/AMDGPU/AMDILISelLowering.h | 203 + lib/Target/AMDGPU/AMDILInstrInfo.cpp | 509 ++ lib/Target/AMDGPU/AMDILInstrInfo.h | 161 + lib/Target/AMDGPU/AMDILInstrInfo.td | 108 + lib/Target/AMDGPU/AMDILInstructions.td | 143 + lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp | 171 + lib/Target/AMDGPU/AMDILIntrinsicInfo.h | 49 + lib/Target/AMDGPU/AMDILIntrinsics.td | 705 +++ lib/Target/AMDGPU/AMDILMultiClass.td | 95 + lib/Target/AMDGPU/AMDILNIDevice.cpp | 71 + lib/Target/AMDGPU/AMDILNIDevice.h | 59 + lib/Target/AMDGPU/AMDILNodes.td | 47 + lib/Target/AMDGPU/AMDILOperands.td | 32 + lib/Target/AMDGPU/AMDILPatterns.td | 504 ++ lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp | 1264 +++++ lib/Target/AMDGPU/AMDILProfiles.td | 174 + lib/Target/AMDGPU/AMDILRegisterInfo.cpp | 162 + lib/Target/AMDGPU/AMDILRegisterInfo.h | 95 + lib/Target/AMDGPU/AMDILRegisterInfo.td | 110 + lib/Target/AMDGPU/AMDILSIDevice.cpp | 49 + lib/Target/AMDGPU/AMDILSIDevice.h | 45 + lib/Target/AMDGPU/AMDILSubtarget.cpp | 178 + lib/Target/AMDGPU/AMDILSubtarget.h | 77 + lib/Target/AMDGPU/AMDILTokenDesc.td | 120 + lib/Target/AMDGPU/AMDILUtilityFunctions.h | 75 + lib/Target/AMDGPU/AMDILVersion.td | 58 + lib/Target/AMDGPU/CMakeLists.txt | 50 + lib/Target/AMDGPU/GENERATED_FILES | 13 + lib/Target/AMDGPU/LLVMBuild.txt | 32 + lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 104 + lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h | 30 + .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 61 + .../AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h | 35 + lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt | 7 + lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt | 23 + lib/Target/AMDGPU/MCTargetDesc/Makefile | 16 + lib/Target/AMDGPU/Makefile | 22 + lib/Target/AMDGPU/Processors.td | 27 + lib/Target/AMDGPU/R600CodeEmitter.cpp | 613 +++ lib/Target/AMDGPU/R600GenRegisterInfo.pl | 190 + lib/Target/AMDGPU/R600HwRegInfo.include | 1056 ++++ lib/Target/AMDGPU/R600ISelLowering.cpp | 286 ++ lib/Target/AMDGPU/R600ISelLowering.h | 48 + lib/Target/AMDGPU/R600InstrInfo.cpp | 106 + lib/Target/AMDGPU/R600InstrInfo.h | 76 + lib/Target/AMDGPU/R600Instructions.td | 1300 +++++ lib/Target/AMDGPU/R600Intrinsics.td | 16 + lib/Target/AMDGPU/R600KernelParameters.cpp | 546 ++ lib/Target/AMDGPU/R600MachineFunctionInfo.cpp | 16 + lib/Target/AMDGPU/R600MachineFunctionInfo.h | 33 + lib/Target/AMDGPU/R600RegisterInfo.cpp | 88 + lib/Target/AMDGPU/R600RegisterInfo.h | 54 + lib/Target/AMDGPU/R600RegisterInfo.td | 5271 ++++++++++++++++++++ lib/Target/AMDGPU/R600Schedule.td | 36 + lib/Target/AMDGPU/SIAssignInterpRegs.cpp | 117 + lib/Target/AMDGPU/SICodeEmitter.cpp | 321 ++ lib/Target/AMDGPU/SIGenRegisterInfo.pl | 224 + lib/Target/AMDGPU/SIISelLowering.cpp | 195 + lib/Target/AMDGPU/SIISelLowering.h | 48 + lib/Target/AMDGPU/SIInstrFormats.td | 128 + lib/Target/AMDGPU/SIInstrInfo.cpp | 104 + lib/Target/AMDGPU/SIInstrInfo.h | 90 + lib/Target/AMDGPU/SIInstrInfo.td | 477 ++ lib/Target/AMDGPU/SIInstructions.td | 964 ++++ lib/Target/AMDGPU/SIIntrinsics.td | 35 + lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 18 + lib/Target/AMDGPU/SIMachineFunctionInfo.h | 37 + lib/Target/AMDGPU/SIRegisterInfo.cpp | 51 + lib/Target/AMDGPU/SIRegisterInfo.h | 47 + lib/Target/AMDGPU/SIRegisterInfo.td | 886 ++++ lib/Target/AMDGPU/SISchedule.td | 15 + lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp | 26 + lib/Target/AMDGPU/TargetInfo/CMakeLists.txt | 7 + lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt | 23 + lib/Target/AMDGPU/TargetInfo/Makefile | 15 + 114 files changed, 28513 insertions(+), 0 deletions(-) create mode 100644 lib/Target/AMDGPU/AMDGPU.h create mode 100644 lib/Target/AMDGPU/AMDGPU.td create mode 100644 lib/Target/AMDGPU/AMDGPUConvertToISA.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUISelLowering.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUISelLowering.h create mode 100644 lib/Target/AMDGPU/AMDGPUInstrInfo.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUInstrInfo.h create mode 100644 lib/Target/AMDGPU/AMDGPUInstrInfo.td create mode 100644 lib/Target/AMDGPU/AMDGPUInstructions.td create mode 100644 lib/Target/AMDGPU/AMDGPUIntrinsics.td create mode 100644 lib/Target/AMDGPU/AMDGPURegisterInfo.cpp create mode 100644 lib/Target/AMDGPU/AMDGPURegisterInfo.h create mode 100644 lib/Target/AMDGPU/AMDGPURegisterInfo.td create mode 100644 lib/Target/AMDGPU/AMDGPUSubtarget.h create mode 100644 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUTargetMachine.h create mode 100644 lib/Target/AMDGPU/AMDGPUUtil.cpp create mode 100644 lib/Target/AMDGPU/AMDGPUUtil.h create mode 100644 lib/Target/AMDGPU/AMDIL.h create mode 100644 lib/Target/AMDGPU/AMDIL7XXDevice.cpp create mode 100644 lib/Target/AMDGPU/AMDIL7XXDevice.h create mode 100644 lib/Target/AMDGPU/AMDILAlgorithms.tpp create mode 100644 lib/Target/AMDGPU/AMDILBase.td create mode 100644 lib/Target/AMDGPU/AMDILCFGStructurizer.cpp create mode 100644 lib/Target/AMDGPU/AMDILCallingConv.td create mode 100644 lib/Target/AMDGPU/AMDILCodeEmitter.h create mode 100644 lib/Target/AMDGPU/AMDILDevice.cpp create mode 100644 lib/Target/AMDGPU/AMDILDevice.h create mode 100644 lib/Target/AMDGPU/AMDILDeviceInfo.cpp create mode 100644 lib/Target/AMDGPU/AMDILDeviceInfo.h create mode 100644 lib/Target/AMDGPU/AMDILDevices.h create mode 100644 lib/Target/AMDGPU/AMDILEnumeratedTypes.td create mode 100644 lib/Target/AMDGPU/AMDILEvergreenDevice.cpp create mode 100644 lib/Target/AMDGPU/AMDILEvergreenDevice.h create mode 100644 lib/Target/AMDGPU/AMDILFormats.td create mode 100644 lib/Target/AMDGPU/AMDILFrameLowering.cpp create mode 100644 lib/Target/AMDGPU/AMDILFrameLowering.h create mode 100644 lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp create mode 100644 lib/Target/AMDGPU/AMDILISelLowering.cpp create mode 100644 lib/Target/AMDGPU/AMDILISelLowering.h create mode 100644 lib/Target/AMDGPU/AMDILInstrInfo.cpp create mode 100644 lib/Target/AMDGPU/AMDILInstrInfo.h create mode 100644 lib/Target/AMDGPU/AMDILInstrInfo.td create mode 100644 lib/Target/AMDGPU/AMDILInstructions.td create mode 100644 lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp create mode 100644 lib/Target/AMDGPU/AMDILIntrinsicInfo.h create mode 100644 lib/Target/AMDGPU/AMDILIntrinsics.td create mode 100644 lib/Target/AMDGPU/AMDILMultiClass.td create mode 100644 lib/Target/AMDGPU/AMDILNIDevice.cpp create mode 100644 lib/Target/AMDGPU/AMDILNIDevice.h create mode 100644 lib/Target/AMDGPU/AMDILNodes.td create mode 100644 lib/Target/AMDGPU/AMDILOperands.td create mode 100644 lib/Target/AMDGPU/AMDILPatterns.td create mode 100644 lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp create mode 100644 lib/Target/AMDGPU/AMDILProfiles.td create mode 100644 lib/Target/AMDGPU/AMDILRegisterInfo.cpp create mode 100644 lib/Target/AMDGPU/AMDILRegisterInfo.h create mode 100644 lib/Target/AMDGPU/AMDILRegisterInfo.td create mode 100644 lib/Target/AMDGPU/AMDILSIDevice.cpp create mode 100644 lib/Target/AMDGPU/AMDILSIDevice.h create mode 100644 lib/Target/AMDGPU/AMDILSubtarget.cpp create mode 100644 lib/Target/AMDGPU/AMDILSubtarget.h create mode 100644 lib/Target/AMDGPU/AMDILTokenDesc.td create mode 100644 lib/Target/AMDGPU/AMDILUtilityFunctions.h create mode 100644 lib/Target/AMDGPU/AMDILVersion.td create mode 100644 lib/Target/AMDGPU/CMakeLists.txt create mode 100644 lib/Target/AMDGPU/GENERATED_FILES create mode 100644 lib/Target/AMDGPU/LLVMBuild.txt create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h create mode 100644 lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/AMDGPU/MCTargetDesc/Makefile create mode 100644 lib/Target/AMDGPU/Makefile create mode 100644 lib/Target/AMDGPU/Processors.td create mode 100644 lib/Target/AMDGPU/R600CodeEmitter.cpp create mode 100644 lib/Target/AMDGPU/R600GenRegisterInfo.pl create mode 100644 lib/Target/AMDGPU/R600HwRegInfo.include create mode 100644 lib/Target/AMDGPU/R600ISelLowering.cpp create mode 100644 lib/Target/AMDGPU/R600ISelLowering.h create mode 100644 lib/Target/AMDGPU/R600InstrInfo.cpp create mode 100644 lib/Target/AMDGPU/R600InstrInfo.h create mode 100644 lib/Target/AMDGPU/R600Instructions.td create mode 100644 lib/Target/AMDGPU/R600Intrinsics.td create mode 100644 lib/Target/AMDGPU/R600KernelParameters.cpp create mode 100644 lib/Target/AMDGPU/R600MachineFunctionInfo.cpp create mode 100644 lib/Target/AMDGPU/R600MachineFunctionInfo.h create mode 100644 lib/Target/AMDGPU/R600RegisterInfo.cpp create mode 100644 lib/Target/AMDGPU/R600RegisterInfo.h create mode 100644 lib/Target/AMDGPU/R600RegisterInfo.td create mode 100644 lib/Target/AMDGPU/R600Schedule.td create mode 100644 lib/Target/AMDGPU/SIAssignInterpRegs.cpp create mode 100644 lib/Target/AMDGPU/SICodeEmitter.cpp create mode 100644 lib/Target/AMDGPU/SIGenRegisterInfo.pl create mode 100644 lib/Target/AMDGPU/SIISelLowering.cpp create mode 100644 lib/Target/AMDGPU/SIISelLowering.h create mode 100644 lib/Target/AMDGPU/SIInstrFormats.td create mode 100644 lib/Target/AMDGPU/SIInstrInfo.cpp create mode 100644 lib/Target/AMDGPU/SIInstrInfo.h create mode 100644 lib/Target/AMDGPU/SIInstrInfo.td create mode 100644 lib/Target/AMDGPU/SIInstructions.td create mode 100644 lib/Target/AMDGPU/SIIntrinsics.td create mode 100644 lib/Target/AMDGPU/SIMachineFunctionInfo.cpp create mode 100644 lib/Target/AMDGPU/SIMachineFunctionInfo.h create mode 100644 lib/Target/AMDGPU/SIRegisterInfo.cpp create mode 100644 lib/Target/AMDGPU/SIRegisterInfo.h create mode 100644 lib/Target/AMDGPU/SIRegisterInfo.td create mode 100644 lib/Target/AMDGPU/SISchedule.td create mode 100644 lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp create mode 100644 lib/Target/AMDGPU/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/AMDGPU/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/AMDGPU/TargetInfo/Makefile diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h new file mode 100644 index 0000000..191f495 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -0,0 +1,35 @@ +//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_H +#define AMDGPU_H + +#include "AMDGPUTargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + +class FunctionPass; +class AMDGPUTargetMachine; + +// R600 Passes +FunctionPass* createR600KernelParametersPass(const TargetData* TD); +FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); + +// SI Passes +FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); +FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); + +// Passes common to R600 and SI +FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); + +} // End namespace llvm + +#endif // AMDGPU_H diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td new file mode 100644 index 0000000..1bb5fb9 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -0,0 +1,21 @@ +//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +// Include AMDIL TD files +include "AMDILBase.td" +include "AMDILVersion.td" + +// Include AMDGPU TD files +include "R600Schedule.td" +include "SISchedule.td" +include "Processors.td" +include "AMDGPUInstrInfo.td" +include "AMDGPUIntrinsics.td" +include "AMDGPURegisterInfo.td" +include "AMDGPUInstructions.td" diff --git a/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp b/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp new file mode 100644 index 0000000..5e8fe9a --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUConvertToISA.cpp @@ -0,0 +1,63 @@ +//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers AMDIL machine instructions to the appropriate hardware +// instructions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +#include +using namespace llvm; + +namespace { + +class AMDGPUConvertToISAPass : public MachineFunctionPass { + +private: + static char ID; + TargetMachine &TM; + +public: + AMDGPUConvertToISAPass(TargetMachine &tm) : + MachineFunctionPass(ID), TM(tm) { } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const {return "AMDGPU Convert to ISA";} + +}; + +} // End anonymous namespace + +char AMDGPUConvertToISAPass::ID = 0; + +FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { + return new AMDGPUConvertToISAPass(tm); +} + +bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) +{ + const AMDGPUInstrInfo * TII = + static_cast(TM.getInstrInfo()); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); + } + } + return false; +} diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp new file mode 100644 index 0000000..ef5715f --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -0,0 +1,427 @@ +//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the parent TargetLowering class for hardware code gen targets. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUISelLowering.h" +#include "AMDILIntrinsicInfo.h" +#include "AMDGPUUtil.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : + AMDILTargetLowering(TM) +{ + // We need to custom lower some of the intrinsics + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + + // Library functions. These default to Expand, but we have instructions + // for them. + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FEXP2, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + + setOperationAction(ISD::LOAD, MVT::f32, Custom); + setOperationAction(ISD::LOAD, MVT::v4f32, Custom); + + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Expand); +} + +SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) + const +{ + switch (Op.getOpcode()) { + default: return AMDILTargetLowering::LowerOperation(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::LOAD: return BitcastLOAD(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); + } +} + +SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const +{ + unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + switch (IntrinsicID) { + default: return Op; + case AMDGPUIntrinsic::AMDIL_abs: + return LowerIntrinsicIABS(Op, DAG); + case AMDGPUIntrinsic::AMDIL_exp: + return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDIL_fabs: + return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_lrp: + return LowerIntrinsicLRP(Op, DAG); + case AMDGPUIntrinsic::AMDIL_fraction: + return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDIL_mad: + return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDIL_max: + return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_imax: + return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umax: + return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDIL_min: + return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_imin: + return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umin: + return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), + Op.getOperand(2)); + case AMDGPUIntrinsic::AMDIL_round_nearest: + return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDIL_round_posinf: + return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1)); + } +} + +///IABS(a) = SMAX(sub(0, a), a) +SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, + SelectionDAG &DAG) const +{ + + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + Op.getOperand(1)); + + return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); +} + +/// Linear Interpolation +/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) +SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, + SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, + DAG.getConstantFP(1.0f, MVT::f32), + Op.getOperand(1)); + SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, + Op.getOperand(3)); + return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1), + Op.getOperand(2), + OneSubAC); +} + +/// BitcastLoad - Convert floating point loads to integer loads of the same +/// type width and the bitcast the result back to a floating point type. +SDValue AMDGPUTargetLowering::BitcastLOAD(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + EVT IntVT; + + if (VT == MVT::f32) { + IntVT = MVT::i32; + } else if (VT == MVT::v4f32) { + IntVT = MVT::v4i32; + } else { + return Op; + } + LoadSDNode * LD = dyn_cast(Op); + assert(LD); + + SDValue NewLoad = DAG.getLoad (LD->getAddressingMode(), + LD->getExtensionType(), IntVT, DL, + LD->getChain(), LD->getBasePtr(), + LD->getOffset(), IntVT, + LD->getMemOperand()); + + SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, NewLoad); + DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Bitcast); + + return Op; +} + +SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue True = Op.getOperand(2); + SDValue False = Op.getOperand(3); + SDValue CC = Op.getOperand(4); + ISD::CondCode CCOpcode = cast(CC)->get(); + SDValue Temp; + + // LHS and RHS are guaranteed to be the same value type + EVT CompareVT = LHS.getValueType(); + + // We need all the operands of SELECT_CC to have the same value type, so if + // necessary we need to convert LHS and RHS to be the same type True and + // False. True and False are guaranteed to have the same type as this + // SELECT_CC node. + + if (CompareVT != VT) { + ISD::NodeType ConversionOp = ISD::DELETED_NODE; + if (VT == MVT::f32 && CompareVT == MVT::i32) { + if (isUnsignedIntSetCC(CCOpcode)) { + ConversionOp = ISD::UINT_TO_FP; + } else { + ConversionOp = ISD::SINT_TO_FP; + } + } else if (VT == MVT::i32 && CompareVT == MVT::f32) { + ConversionOp = ISD::FP_TO_SINT; + } else { + // I don't think there will be any other type pairings. + assert(!"Unhandled operand type parings in SELECT_CC"); + } + // XXX Check the value of LHS and RHS and avoid creating sequences like + // (FTOI (ITOF)) + LHS = DAG.getNode(ConversionOp, DL, VT, LHS); + RHS = DAG.getNode(ConversionOp, DL, VT, RHS); + } + + // If True is a hardware TRUE value and False is a hardware FALSE value or + // vice-versa we can handle this with a native instruction (SET* instructions). + if ((isHWTrueValue(True) && isHWFalseValue(False))) { + return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + } + + // XXX If True is a hardware TRUE value and False is a hardware FALSE value, + // we can handle this with a native instruction, but we need to swap true + // and false and change the conditional. + if (isHWTrueValue(False) && isHWFalseValue(True)) { + } + + // XXX Check if we can lower this to a SELECT or if it is supported by a native + // operation. (The code below does this but we don't have the Instruction + // selection patterns to do this yet. +#if 0 + if (isZero(LHS) || isZero(RHS)) { + SDValue Cond = (isZero(LHS) ? RHS : LHS); + bool SwapTF = false; + switch (CCOpcode) { + case ISD::SETOEQ: + case ISD::SETUEQ: + case ISD::SETEQ: + SwapTF = true; + // Fall through + case ISD::SETONE: + case ISD::SETUNE: + case ISD::SETNE: + // We can lower to select + if (SwapTF) { + Temp = True; + True = False; + False = Temp; + } + // CNDE + return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); + default: + // Supported by a native operation (CNDGE, CNDGT) + return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + } + } +#endif + + // If we make it this for it means we have no native instructions to handle + // this SELECT_CC, so we must lower it. + SDValue HWTrue, HWFalse; + + if (VT == MVT::f32) { + HWTrue = DAG.getConstantFP(1.0f, VT); + HWFalse = DAG.getConstantFP(0.0f, VT); + } else if (VT == MVT::i32) { + HWTrue = DAG.getConstant(-1, VT); + HWFalse = DAG.getConstant(0, VT); + } + else { + assert(!"Unhandled value type in LowerSELECT_CC"); + } + + // Lower this unsupported SELECT_CC into a combination of two supported + // SELECT_CC operations. + SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC); + + return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); +} + + +SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, + SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + SDValue Num = Op.getOperand(0); + SDValue Den = Op.getOperand(1); + + SmallVector Results; + + // RCP = URECIP(Den) = 2^32 / Den + e + // e is rounding error. + SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); + + // RCP_LO = umulo(RCP, Den) */ + SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); + + // RCP_HI = mulhu (RCP, Den) */ + SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); + + // NEG_RCP_LO = -RCP_LO + SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + RCP_LO); + + // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) + SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), + NEG_RCP_LO, RCP_LO, + ISD::SETEQ); + // Calculate the rounding error from the URECIP instruction + // E = mulhu(ABS_RCP_LO, RCP) + SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); + + // RCP_A_E = RCP + E + SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); + + // RCP_S_E = RCP - E + SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); + + // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) + SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), + RCP_A_E, RCP_S_E, + ISD::SETEQ); + // Quotient = mulhu(Tmp0, Num) + SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); + + // Num_S_Remainder = Quotient * Den + SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); + + // Remainder = Num - Num_S_Remainder + SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); + + // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) + SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, + DAG.getConstant(-1, VT), + DAG.getConstant(0, VT), + ISD::SETGE); + // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0) + SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder, + DAG.getConstant(0, VT), + DAG.getConstant(-1, VT), + DAG.getConstant(0, VT), + ISD::SETGE); + // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero + SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, + Remainder_GE_Zero); + + // Calculate Division result: + + // Quotient_A_One = Quotient + 1 + SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, + DAG.getConstant(1, VT)); + + // Quotient_S_One = Quotient - 1 + SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, + DAG.getConstant(1, VT)); + + // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) + SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), + Quotient, Quotient_A_One, ISD::SETEQ); + + // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) + Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), + Quotient_S_One, Div, ISD::SETEQ); + + // Calculate Rem result: + + // Remainder_S_Den = Remainder - Den + SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); + + // Remainder_A_Den = Remainder + Den + SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); + + // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) + SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), + Remainder, Remainder_S_Den, ISD::SETEQ); + + // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) + Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), + Remainder_A_Den, Rem, ISD::SETEQ); + + DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div); + DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem); + + return Op; +} + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const +{ + if (ConstantFPSDNode * CFP = dyn_cast(Op)) { + return CFP->isExactlyValue(1.0); + } + if (ConstantSDNode *C = dyn_cast(Op)) { + return C->isAllOnesValue(); + } + return false; +} + +bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const +{ + if (ConstantFPSDNode * CFP = dyn_cast(Op)) { + return CFP->getValueAPF().isZero(); + } + if (ConstantSDNode *C = dyn_cast(Op)) { + return C->isNullValue(); + } + return false; +} + +void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI, + MachineFunction * MF, MachineRegisterInfo & MRI, + const TargetInstrInfo * TII, unsigned reg) const +{ + AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg()); +} + +#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; + +const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) { + default: return AMDILTargetLowering::getTargetNodeName(Opcode); + + NODE_NAME_CASE(FRACT) + NODE_NAME_CASE(FMAX) + NODE_NAME_CASE(SMAX) + NODE_NAME_CASE(UMAX) + NODE_NAME_CASE(FMIN) + NODE_NAME_CASE(SMIN) + NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(URECIP) + } +} diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h new file mode 100644 index 0000000..4d1a312 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -0,0 +1,78 @@ +//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the interface defintiion of the TargetLowering class +// that is common to all AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUISELLOWERING_H +#define AMDGPUISELLOWERING_H + +#include "AMDILISelLowering.h" + +namespace llvm { + +class AMDGPUTargetLowering : public AMDILTargetLowering +{ +private: + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue BitcastLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + +protected: + + /// addLiveIn - This functions adds reg to the live in list of the entry block + /// and emits a copy from reg to MI.getOperand(0). + /// + // Some registers are loaded with values before the program + /// begins to execute. The loading of these values is modeled with pseudo + /// instructions which are lowered using this function. + void addLiveIn(MachineInstr * MI, MachineFunction * MF, + MachineRegisterInfo & MRI, const TargetInstrInfo * TII, + unsigned reg) const; + + bool isHWTrueValue(SDValue Op) const; + bool isHWFalseValue(SDValue Op) const; + +public: + AMDGPUTargetLowering(TargetMachine &TM); + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; + virtual const char* getTargetNodeName(unsigned Opcode) const; + +}; + +namespace AMDGPUISD +{ + +enum +{ + AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER, + BITALIGN, + FRACT, + FMAX, + SMAX, + UMAX, + FMIN, + SMIN, + UMIN, + URECIP, + LAST_AMDGPU_ISD_NUMBER +}; + + +} // End namespace AMDGPUISD + +} // End namespace llvm + +#endif // AMDGPUISELLOWERING_H diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp new file mode 100644 index 0000000..1996351 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -0,0 +1,46 @@ +//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the TargetInstrInfo class that is +// common to all AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUInstrInfo.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm) + : AMDILInstrInfo(tm), TM(tm) { } + +void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const +{ + MachineRegisterInfo &MRI = MF.getRegInfo(); + const AMDGPURegisterInfo & RI = getRegisterInfo(); + + for (unsigned i = 0; i < MI.getNumOperands(); i++) { + MachineOperand &MO = MI.getOperand(i); + // Convert dst regclass to one that is supported by the ISA + if (MO.isReg() && MO.isDef()) { + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); + const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); + + assert(newRegClass); + + MRI.setRegClass(MO.getReg(), newRegClass); + } + } + } +} diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h new file mode 100644 index 0000000..cd9c3cd --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -0,0 +1,48 @@ +//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definition of a TargetInstrInfo class that is common +// to all AMD GPUs. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUINSTRUCTIONINFO_H_ +#define AMDGPUINSTRUCTIONINFO_H_ + +#include "AMDGPURegisterInfo.h" +#include "AMDILInstrInfo.h" + +#include + +namespace llvm { + +class AMDGPUTargetMachine; +class MachineFunction; +class MachineInstr; +class MachineInstrBuilder; + +class AMDGPUInstrInfo : public AMDILInstrInfo { +private: + AMDGPUTargetMachine & TM; + +public: + explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm); + + virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; + + /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA + /// MachineInstr + virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, + DebugLoc DL) const; + +}; + +} // End llvm namespace + +#endif // AMDGPUINSTRINFO_H_ diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td new file mode 100644 index 0000000..4452719 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -0,0 +1,69 @@ +//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains DAG node defintions for the AMDGPU target. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AMDGPU DAG Profiles +//===----------------------------------------------------------------------===// + +def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> +]>; + +//===----------------------------------------------------------------------===// +// AMDGPU DAG Nodes +// + +// out = ((a << 32) | b) >> c) +// +// Can be used to optimize rtol: +// rotl(a, b) = bitalign(a, a, 32 - b) +def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>; + +// out = a - floor(a) +def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; + +// out = max(a, b) a and b are floats +def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = max(a, b) a and b are signed ints +def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = max(a, b) a and b are unsigned ints +def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are floats +def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a snd b are signed ints +def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are unsigned ints +def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// urecip - This operation is a helper for integer division, it returns the +// result of 1 / a as a fractional unsigned integer. +// out = (2^32 / a) + e +// e is rounding error +def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td new file mode 100644 index 0000000..81b58c1 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -0,0 +1,123 @@ +//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains instruction defs that are common to all hw codegen +// targets. +// +//===----------------------------------------------------------------------===// + +class AMDGPUInst pattern> : Instruction { + field bits<16> AMDILOp = 0; + field bits<3> Gen = 0; + + let Namespace = "AMDGPU"; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + let Itinerary = NullALU; + let TSFlags{42-40} = Gen; + let TSFlags{63-48} = AMDILOp; +} + +class AMDGPUShaderInst pattern> + : AMDGPUInst { + + field bits<32> Inst = 0xffffffff; + +} + +class Constants { +int TWO_PI = 0x40c90fdb; +int PI = 0x40490fdb; +int TWO_PI_INV = 0x3e22f983; +} +def CONST : Constants; + +def FP_ZERO : PatLeaf < + (fpimm), + [{return N->getValueAPF().isZero();}] +>; + +def FP_ONE : PatLeaf < + (fpimm), + [{return N->isExactlyValue(1.0);}] +>; + +let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in { + +class CLAMP : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "CLAMP $dst, $src0", + [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] +>; + +class FABS : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "FABS $dst, $src0", + [(set rc:$dst, (fabs rc:$src0))] +>; + +class FNEG : AMDGPUShaderInst < + (outs rc:$dst), + (ins rc:$src0), + "FNEG $dst, $src0", + [(set rc:$dst, (fneg rc:$src0))] +>; + +} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1 + +/* Generic helper patterns for intrinsics */ +/* -------------------------------------- */ + +class POW_Common : Pat < + (int_AMDGPU_pow rc:$src0, rc:$src1), + (exp_ieee (mul rc:$src1, (log_ieee rc:$src0))) +>; + +/* Other helper patterns */ +/* --------------------- */ + +/* Extract element pattern */ +class Extract_Element : Pat< + (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)), + (EXTRACT_SUBREG vec_class:$src, sub_reg) +>; + +/* Insert element pattern */ +class Insert_Element : Pat < + + (vec_type (vector_insert (vec_type vec_class:$vec), + (elem_type elem_class:$elem), sub_idx)), + (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg) +>; + +// Vector Build pattern +class Vector_Build : Pat < + (IL_vbuild elemClass:$src), + (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x) +>; + +// bitconvert pattern +class BitConvert : Pat < + (dt (bitconvert (st rc:$src0))), + (dt rc:$src0) +>; + +include "R600Instructions.td" + +include "SIInstrInfo.td" + diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td new file mode 100644 index 0000000..78f072c --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -0,0 +1,64 @@ +//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines intrinsics that are used by all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "AMDGPU", isTarget = 1 in { + + def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + + def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; + def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; + def int_AMDGPU_kilp : Intrinsic<[], [], []>; + def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; +} + +let TargetPrefix = "TGSI", isTarget = 1 in { + + def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>; +} + +include "SIIntrinsics.td" diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp new file mode 100644 index 0000000..ad48335 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -0,0 +1,24 @@ +//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Parent TargetRegisterInfo class common to all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPURegisterInfo.h" +#include "AMDGPUTargetMachine.h" + +using namespace llvm; + +AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDILRegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h new file mode 100644 index 0000000..5863807 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -0,0 +1,42 @@ +//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the TargetRegisterInfo interface that is implemented +// by all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUREGISTERINFO_H_ +#define AMDGPUREGISTERINFO_H_ + +#include "AMDILRegisterInfo.h" + +namespace llvm { + +class AMDGPUTargetMachine; +class TargetInstrInfo; + +struct AMDGPURegisterInfo : public AMDILRegisterInfo +{ + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; + + /// getISARegClass - rc is an AMDIL reg class. This function returns the + /// ISA reg class that is equivalent to the given AMDIL reg class. + virtual const TargetRegisterClass * + getISARegClass(const TargetRegisterClass * rc) const = 0; +}; + +} // End namespace llvm + +#endif // AMDIDSAREGISTERINFO_H_ diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.td b/lib/Target/AMDGPU/AMDGPURegisterInfo.td new file mode 100644 index 0000000..8181e02 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.td @@ -0,0 +1,22 @@ +//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Tablegen register definitions common to all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +let Namespace = "AMDGPU" in { + def sel_x : SubRegIndex; + def sel_y : SubRegIndex; + def sel_z : SubRegIndex; + def sel_w : SubRegIndex; +} + +include "R600RegisterInfo.td" +include "SIRegisterInfo.td" diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h new file mode 100644 index 0000000..96ace88 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -0,0 +1,36 @@ +//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file declares the AMDGPU specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef _AMDGPUSUBTARGET_H_ +#define _AMDGPUSUBTARGET_H_ +#include "AMDILSubtarget.h" + +namespace llvm { + +class AMDGPUSubtarget : public AMDILSubtarget +{ + InstrItineraryData InstrItins; + +public: + AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : + AMDILSubtarget(TT, CPU, FS) + { + InstrItins = getInstrItineraryForCPU(CPU); + } + + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } +}; + +} // End namespace llvm + +#endif // AMDGPUSUBTARGET_H_ diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp new file mode 100644 index 0000000..a581aca --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -0,0 +1,162 @@ +//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The AMDGPU target machine contains all of the hardware specific information +// needed to emit code for R600 and SI GPUs. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "AMDGPU.h" +#include "R600ISelLowering.h" +#include "R600InstrInfo.h" +#include "SIISelLowering.h" +#include "SIInstrInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/PassManager.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +extern "C" void LLVMInitializeAMDGPUTarget() { + // Register the target + RegisterTargetMachine X(TheAMDGPUTarget); +} + +AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OptLevel +) +: + LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + Subtarget(TT, CPU, FS), + DataLayout(Subtarget.getDataLayout()), + FrameLowering(TargetFrameLowering::StackGrowsUp, + Subtarget.device()->getStackAlignment(), 0), + IntrinsicInfo(this), + InstrItins(&Subtarget.getInstrItineraryData()), + mDump(false) + +{ + // TLInfo uses InstrInfo so it must be initialized after. + if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + InstrInfo = new R600InstrInfo(*this); + TLInfo = new R600TargetLowering(*this); + } else { + InstrInfo = new SIInstrInfo(*this); + TLInfo = new SITargetLowering(*this); + } +} + +AMDGPUTargetMachine::~AMDGPUTargetMachine() +{ +} + +bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) { + // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are + // only using it to access addPassesToGenerateCode() + bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType, + DisableVerify); + assert(fail); + + const AMDILSubtarget &STM = getSubtarget(); + std::string gpu = STM.getDeviceName(); + if (gpu == "SI") { + PM.add(createSICodeEmitterPass(Out)); + } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + PM.add(createR600CodeEmitterPass(Out)); + } else { + abort(); + return true; + } + PM.add(createGCInfoDeleter()); + + return false; +} + +namespace { +class AMDGPUPassConfig : public TargetPassConfig { +public: + AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + AMDGPUTargetMachine &getAMDGPUTargetMachine() const { + return getTM(); + } + + virtual bool addPreISel(); + virtual bool addInstSelector(); + virtual bool addPreRegAlloc(); + virtual bool addPostRegAlloc(); + virtual bool addPreSched2(); + virtual bool addPreEmitPass(); +}; +} // End of anonymous namespace + +TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { + return new AMDGPUPassConfig(this, PM); +} + +bool +AMDGPUPassConfig::addPreISel() +{ + const AMDILSubtarget &ST = TM->getSubtarget(); + if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { + addPass(createR600KernelParametersPass( + getAMDGPUTargetMachine().getTargetData())); + } + return false; +} + +bool AMDGPUPassConfig::addInstSelector() { + addPass(createAMDILPeepholeOpt(*TM)); + addPass(createAMDILISelDag(getAMDGPUTargetMachine())); + return false; +} + +bool AMDGPUPassConfig::addPreRegAlloc() { + const AMDILSubtarget &ST = TM->getSubtarget(); + + if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { + addPass(createSIAssignInterpRegsPass(*TM)); + } + addPass(createAMDGPUConvertToISAPass(*TM)); + return false; +} + +bool AMDGPUPassConfig::addPostRegAlloc() { + return false; +} + +bool AMDGPUPassConfig::addPreSched2() { + return false; +} + +bool AMDGPUPassConfig::addPreEmitPass() { + addPass(createAMDILCFGPreparationPass(*TM)); + addPass(createAMDILCFGStructurizerPass(*TM)); + + return false; +} + diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h new file mode 100644 index 0000000..c704864 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -0,0 +1,76 @@ +//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The AMDGPU TargetMachine interface definition for hw codgen targets. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_TARGET_MACHINE_H +#define AMDGPU_TARGET_MACHINE_H + +#include "AMDGPUInstrInfo.h" +#include "AMDGPUSubtarget.h" +#include "AMDILFrameLowering.h" +#include "AMDILIntrinsicInfo.h" +#include "R600ISelLowering.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT); + +class AMDGPUTargetMachine : public LLVMTargetMachine { + + AMDGPUSubtarget Subtarget; + const TargetData DataLayout; + AMDILFrameLowering FrameLowering; + AMDILIntrinsicInfo IntrinsicInfo; + const AMDGPUInstrInfo * InstrInfo; + AMDGPUTargetLowering * TLInfo; + const InstrItineraryData* InstrItins; + bool mDump; + +public: + AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS, + StringRef CPU, + TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~AMDGPUTargetMachine(); + virtual const AMDILFrameLowering* getFrameLowering() const { + return &FrameLowering; + } + virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const { + return &IntrinsicInfo; + } + virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;} + virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; } + virtual const AMDGPURegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + virtual AMDGPUTargetLowering * getTargetLowering() const { + return TLInfo; + } + virtual const InstrItineraryData* getInstrItineraryData() const { + return InstrItins; + } + virtual const TargetData* getTargetData() const { return &DataLayout; } + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + bool DisableVerify, + AnalysisID StartAfter = 0, + AnalysisID StopAfter = 0); +}; + +} // End namespace llvm + +#endif // AMDGPU_TARGET_MACHINE_H diff --git a/lib/Target/AMDGPU/AMDGPUUtil.cpp b/lib/Target/AMDGPU/AMDGPUUtil.cpp new file mode 100644 index 0000000..63b359f --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUUtil.cpp @@ -0,0 +1,139 @@ +//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common utility functions used by hw codegen targets +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUUtil.h" +#include "AMDGPURegisterInfo.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +// Some instructions act as place holders to emulate operations that the GPU +// hardware does automatically. This function can be used to check if +// an opcode falls into this category. +bool AMDGPU::isPlaceHolderOpcode(unsigned opcode) +{ + switch (opcode) { + default: return false; + case AMDGPU::RETURN: + case AMDGPU::LOAD_INPUT: + case AMDGPU::LAST: + case AMDGPU::MASK_WRITE: + case AMDGPU::RESERVE_REG: + return true; + } +} + +bool AMDGPU::isTransOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + + case AMDGPU::COS_r600: + case AMDGPU::COS_eg: + case AMDGPU::MULLIT: + case AMDGPU::MUL_LIT_r600: + case AMDGPU::MUL_LIT_eg: + case AMDGPU::EXP_IEEE_r600: + case AMDGPU::EXP_IEEE_eg: + case AMDGPU::LOG_CLAMPED_r600: + case AMDGPU::LOG_IEEE_r600: + case AMDGPU::LOG_CLAMPED_eg: + case AMDGPU::LOG_IEEE_eg: + return true; + } +} + +bool AMDGPU::isTexOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: + return true; + } +} + +bool AMDGPU::isReductionOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDGPU::DOT4_r600: + case AMDGPU::DOT4_eg: + return true; + } +} + +bool AMDGPU::isCubeOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDGPU::CUBE_r600: + case AMDGPU::CUBE_eg: + return true; + } +} + + +bool AMDGPU::isFCOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDGPU::BREAK_LOGICALZ_f32: + case AMDGPU::BREAK_LOGICALNZ_i32: + case AMDGPU::BREAK_LOGICALZ_i32: + case AMDGPU::BREAK_LOGICALNZ_f32: + case AMDGPU::CONTINUE_LOGICALNZ_f32: + case AMDGPU::IF_LOGICALNZ_i32: + case AMDGPU::IF_LOGICALZ_f32: + case AMDGPU::ELSE: + case AMDGPU::ENDIF: + case AMDGPU::ENDLOOP: + case AMDGPU::IF_LOGICALNZ_f32: + case AMDGPU::WHILELOOP: + return true; + } +} + +void AMDGPU::utilAddLiveIn(MachineFunction * MF, + MachineRegisterInfo & MRI, + const TargetInstrInfo * TII, + unsigned physReg, unsigned virtReg) +{ + if (!MRI.isLiveIn(physReg)) { + MRI.addLiveIn(physReg, virtReg); + MF->front().addLiveIn(physReg); + BuildMI(MF->front(), MF->front().begin(), DebugLoc(), + TII->get(TargetOpcode::COPY), virtReg) + .addReg(physReg); + } else { + MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); + } +} diff --git a/lib/Target/AMDGPU/AMDGPUUtil.h b/lib/Target/AMDGPU/AMDGPUUtil.h new file mode 100644 index 0000000..e8b02b1 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUUtil.h @@ -0,0 +1,46 @@ +//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Declarations for utility functions common to all hw codegen targets. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPU_UTIL_H +#define AMDGPU_UTIL_H + +namespace llvm { + +class MachineFunction; +class MachineRegisterInfo; +class TargetInstrInfo; + +namespace AMDGPU { + +bool isPlaceHolderOpcode(unsigned opcode); + +bool isTransOp(unsigned opcode); +bool isTexOp(unsigned opcode); +bool isReductionOp(unsigned opcode); +bool isCubeOp(unsigned opcode); +bool isFCOp(unsigned opcode); + +// XXX: Move these to AMDGPUInstrInfo.h +#define MO_FLAG_CLAMP (1 << 0) +#define MO_FLAG_NEG (1 << 1) +#define MO_FLAG_ABS (1 << 2) +#define MO_FLAG_MASK (1 << 3) + +void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, + const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg); + +} // End namespace AMDGPU + +} // End namespace llvm + +#endif // AMDGPU_UTIL_H diff --git a/lib/Target/AMDGPU/AMDIL.h b/lib/Target/AMDGPU/AMDIL.h new file mode 100644 index 0000000..4029f27 --- /dev/null +++ b/lib/Target/AMDGPU/AMDIL.h @@ -0,0 +1,251 @@ +//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// AMDIL back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDIL_H_ +#define AMDIL_H_ + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetMachine.h" + +#define AMDIL_MAJOR_VERSION 2 +#define AMDIL_MINOR_VERSION 0 +#define AMDIL_REVISION_NUMBER 74 +#define ARENA_SEGMENT_RESERVED_UAVS 12 +#define DEFAULT_ARENA_UAV_ID 8 +#define DEFAULT_RAW_UAV_ID 7 +#define GLOBAL_RETURN_RAW_UAV_ID 11 +#define HW_MAX_NUM_CB 8 +#define MAX_NUM_UNIQUE_UAVS 8 +#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8 +#define OPENCL_MAX_READ_IMAGES 128 +#define OPENCL_MAX_WRITE_IMAGES 8 +#define OPENCL_MAX_SAMPLERS 16 + +// The next two values can never be zero, as zero is the ID that is +// used to assert against. +#define DEFAULT_LDS_ID 1 +#define DEFAULT_GDS_ID 1 +#define DEFAULT_SCRATCH_ID 1 +#define DEFAULT_VEC_SLOTS 8 + +// SC->CAL version matchings. +#define CAL_VERSION_SC_150 1700 +#define CAL_VERSION_SC_149 1700 +#define CAL_VERSION_SC_148 1525 +#define CAL_VERSION_SC_147 1525 +#define CAL_VERSION_SC_146 1525 +#define CAL_VERSION_SC_145 1451 +#define CAL_VERSION_SC_144 1451 +#define CAL_VERSION_SC_143 1441 +#define CAL_VERSION_SC_142 1441 +#define CAL_VERSION_SC_141 1420 +#define CAL_VERSION_SC_140 1400 +#define CAL_VERSION_SC_139 1387 +#define CAL_VERSION_SC_138 1387 +#define CAL_APPEND_BUFFER_SUPPORT 1340 +#define CAL_VERSION_SC_137 1331 +#define CAL_VERSION_SC_136 982 +#define CAL_VERSION_SC_135 950 +#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990 + +#define OCL_DEVICE_RV710 0x0001 +#define OCL_DEVICE_RV730 0x0002 +#define OCL_DEVICE_RV770 0x0004 +#define OCL_DEVICE_CEDAR 0x0008 +#define OCL_DEVICE_REDWOOD 0x0010 +#define OCL_DEVICE_JUNIPER 0x0020 +#define OCL_DEVICE_CYPRESS 0x0040 +#define OCL_DEVICE_CAICOS 0x0080 +#define OCL_DEVICE_TURKS 0x0100 +#define OCL_DEVICE_BARTS 0x0200 +#define OCL_DEVICE_CAYMAN 0x0400 +#define OCL_DEVICE_ALL 0x3FFF + +/// The number of function ID's that are reserved for +/// internal compiler usage. +const unsigned int RESERVED_FUNCS = 1024; + +#define AMDIL_OPT_LEVEL_DECL +#define AMDIL_OPT_LEVEL_VAR +#define AMDIL_OPT_LEVEL_VAR_NO_COMMA + +namespace llvm { +class AMDILInstrPrinter; +class FunctionPass; +class MCAsmInfo; +class raw_ostream; +class Target; +class TargetMachine; + +/// Instruction selection passes. +FunctionPass* + createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); +FunctionPass* + createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); + +/// Pre emit passes. +FunctionPass* + createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); +FunctionPass* + createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); + +extern Target TheAMDILTarget; +extern Target TheAMDGPUTarget; +} // end namespace llvm; + +#define GET_REGINFO_ENUM +#include "AMDGPUGenRegisterInfo.inc" +#define GET_INSTRINFO_ENUM +#include "AMDGPUGenInstrInfo.inc" + +/// Include device information enumerations +#include "AMDILDeviceInfo.h" + +namespace llvm { +/// OpenCL uses address spaces to differentiate between +/// various memory regions on the hardware. On the CPU +/// all of the address spaces point to the same memory, +/// however on the GPU, each address space points to +/// a seperate piece of memory that is unique from other +/// memory locations. +namespace AMDILAS { +enum AddressSpaces { + PRIVATE_ADDRESS = 0, // Address space for private memory. + GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). + CONSTANT_ADDRESS = 2, // Address space for constant memory. + LOCAL_ADDRESS = 3, // Address space for local memory. + REGION_ADDRESS = 4, // Address space for region memory. + ADDRESS_NONE = 5, // Address space for unknown memory. + PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) + PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) + USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI + LAST_ADDRESS = 9 +}; + +// This union/struct combination is an easy way to read out the +// exact bits that are needed. +typedef union ResourceRec { + struct { +#ifdef __BIG_ENDIAN__ + unsigned short isImage : 1; // Reserved for future use/llvm. + unsigned short ResourceID : 10; // Flag to specify the resourece ID for + // the op. + unsigned short HardwareInst : 1; // Flag to specify that this instruction + // is a hardware instruction. + unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a + // conflict. + unsigned short ByteStore : 1; // Flag to specify if the op is a byte + // store op. + unsigned short PointerPath : 1; // Flag to specify if the op is on the + // pointer path. + unsigned short CacheableRead : 1; // Flag to specify if the read is + // cacheable. +#else + unsigned short CacheableRead : 1; // Flag to specify if the read is + // cacheable. + unsigned short PointerPath : 1; // Flag to specify if the op is on the + // pointer path. + unsigned short ByteStore : 1; // Flag to specify if the op is byte + // store op. + unsigned short ConflictPtr : 1; // Flag to specify that the pointer has + // a conflict. + unsigned short HardwareInst : 1; // Flag to specify that this instruction + // is a hardware instruction. + unsigned short ResourceID : 10; // Flag to specify the resource ID for + // the op. + unsigned short isImage : 1; // Reserved for future use. +#endif + } bits; + unsigned short u16all; +} InstrResEnc; + +} // namespace AMDILAS + +// Enums corresponding to AMDIL condition codes for IL. These +// values must be kept in sync with the ones in the .td file. +namespace AMDILCC { +enum CondCodes { + // AMDIL specific condition codes. These correspond to the IL_CC_* + // in AMDILInstrInfo.td and must be kept in the same order. + IL_CC_D_EQ = 0, // DEQ instruction. + IL_CC_D_GE = 1, // DGE instruction. + IL_CC_D_LT = 2, // DLT instruction. + IL_CC_D_NE = 3, // DNE instruction. + IL_CC_F_EQ = 4, // EQ instruction. + IL_CC_F_GE = 5, // GE instruction. + IL_CC_F_LT = 6, // LT instruction. + IL_CC_F_NE = 7, // NE instruction. + IL_CC_I_EQ = 8, // IEQ instruction. + IL_CC_I_GE = 9, // IGE instruction. + IL_CC_I_LT = 10, // ILT instruction. + IL_CC_I_NE = 11, // INE instruction. + IL_CC_U_GE = 12, // UGE instruction. + IL_CC_U_LT = 13, // ULE instruction. + // Pseudo IL Comparison instructions here. + IL_CC_F_GT = 14, // GT instruction. + IL_CC_U_GT = 15, + IL_CC_I_GT = 16, + IL_CC_D_GT = 17, + IL_CC_F_LE = 18, // LE instruction + IL_CC_U_LE = 19, + IL_CC_I_LE = 20, + IL_CC_D_LE = 21, + IL_CC_F_UNE = 22, + IL_CC_F_UEQ = 23, + IL_CC_F_ULT = 24, + IL_CC_F_UGT = 25, + IL_CC_F_ULE = 26, + IL_CC_F_UGE = 27, + IL_CC_F_ONE = 28, + IL_CC_F_OEQ = 29, + IL_CC_F_OLT = 30, + IL_CC_F_OGT = 31, + IL_CC_F_OLE = 32, + IL_CC_F_OGE = 33, + IL_CC_D_UNE = 34, + IL_CC_D_UEQ = 35, + IL_CC_D_ULT = 36, + IL_CC_D_UGT = 37, + IL_CC_D_ULE = 38, + IL_CC_D_UGE = 39, + IL_CC_D_ONE = 40, + IL_CC_D_OEQ = 41, + IL_CC_D_OLT = 42, + IL_CC_D_OGT = 43, + IL_CC_D_OLE = 44, + IL_CC_D_OGE = 45, + IL_CC_U_EQ = 46, + IL_CC_U_NE = 47, + IL_CC_F_O = 48, + IL_CC_D_O = 49, + IL_CC_F_UO = 50, + IL_CC_D_UO = 51, + IL_CC_L_LE = 52, + IL_CC_L_GE = 53, + IL_CC_L_EQ = 54, + IL_CC_L_NE = 55, + IL_CC_L_LT = 56, + IL_CC_L_GT = 57, + IL_CC_UL_LE = 58, + IL_CC_UL_GE = 59, + IL_CC_UL_EQ = 60, + IL_CC_UL_NE = 61, + IL_CC_UL_LT = 62, + IL_CC_UL_GT = 63, + COND_ERROR = 64 +}; + +} // end namespace AMDILCC +} // end namespace llvm +#endif // AMDIL_H_ diff --git a/lib/Target/AMDGPU/AMDIL7XXDevice.cpp b/lib/Target/AMDGPU/AMDIL7XXDevice.cpp new file mode 100644 index 0000000..3f2f821 --- /dev/null +++ b/lib/Target/AMDGPU/AMDIL7XXDevice.cpp @@ -0,0 +1,128 @@ +//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDIL7XXDevice.h" +#include "AMDILDevice.h" + +using namespace llvm; + +AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST) +{ + setCaps(); + std::string name = mSTM->getDeviceName(); + if (name == "rv710") { + mDeviceFlag = OCL_DEVICE_RV710; + } else if (name == "rv730") { + mDeviceFlag = OCL_DEVICE_RV730; + } else { + mDeviceFlag = OCL_DEVICE_RV770; + } +} + +AMDIL7XXDevice::~AMDIL7XXDevice() +{ +} + +void AMDIL7XXDevice::setCaps() +{ + mSWBits.set(AMDILDeviceInfo::LocalMem); +} + +size_t AMDIL7XXDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_700; + } + return 0; +} + +size_t AMDIL7XXDevice::getWavefrontSize() const +{ + return AMDILDevice::HalfWavefrontSize; +} + +uint32_t AMDIL7XXDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD4XXX; +} + +uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const +{ + switch (DeviceID) { + default: + assert(0 && "ID type passed in is unknown!"); + break; + case GLOBAL_ID: + case CONSTANT_ID: + case RAW_UAV_ID: + case ARENA_UAV_ID: + break; + case LDS_ID: + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return DEFAULT_LDS_ID; + } + break; + case SCRATCH_ID: + if (usesHardware(AMDILDeviceInfo::PrivateMem)) { + return DEFAULT_SCRATCH_ID; + } + break; + case GDS_ID: + assert(0 && "GDS UAV ID is not supported on this chip"); + if (usesHardware(AMDILDeviceInfo::RegionMem)) { + return DEFAULT_GDS_ID; + } + break; + }; + + return 0; +} + +uint32_t AMDIL7XXDevice::getMaxNumUAVs() const +{ + return 1; +} + +AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST) +{ + setCaps(); +} + +AMDIL770Device::~AMDIL770Device() +{ +} + +void AMDIL770Device::setCaps() +{ + if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) { + mSWBits.set(AMDILDeviceInfo::FMA); + mHWBits.set(AMDILDeviceInfo::DoubleOps); + } + mSWBits.set(AMDILDeviceInfo::BarrierDetect); + mHWBits.reset(AMDILDeviceInfo::LongOps); + mSWBits.set(AMDILDeviceInfo::LongOps); + mSWBits.set(AMDILDeviceInfo::LocalMem); +} + +size_t AMDIL770Device::getWavefrontSize() const +{ + return AMDILDevice::WavefrontSize; +} + +AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST) +{ +} + +AMDIL710Device::~AMDIL710Device() +{ +} + +size_t AMDIL710Device::getWavefrontSize() const +{ + return AMDILDevice::QuarterWavefrontSize; +} diff --git a/lib/Target/AMDGPU/AMDIL7XXDevice.h b/lib/Target/AMDGPU/AMDIL7XXDevice.h new file mode 100644 index 0000000..4d8d47a --- /dev/null +++ b/lib/Target/AMDGPU/AMDIL7XXDevice.h @@ -0,0 +1,71 @@ +//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===----------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef _AMDIL7XXDEVICEIMPL_H_ +#define _AMDIL7XXDEVICEIMPL_H_ +#include "AMDILDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { +class AMDILSubtarget; + +//===----------------------------------------------------------------------===// +// 7XX generation of devices and their respective sub classes +//===----------------------------------------------------------------------===// + +// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX +// devices are derived from this class. The AMDIL7XX device will only +// support the minimal features that are required to be considered OpenCL 1.0 +// compliant and nothing more. +class AMDIL7XXDevice : public AMDILDevice { +public: + AMDIL7XXDevice(AMDILSubtarget *ST); + virtual ~AMDIL7XXDevice(); + virtual size_t getMaxLDSSize() const; + virtual size_t getWavefrontSize() const; + virtual uint32_t getGeneration() const; + virtual uint32_t getResourceID(uint32_t DeviceID) const; + virtual uint32_t getMaxNumUAVs() const; + +protected: + virtual void setCaps(); +}; // AMDIL7XXDevice + +// The AMDIL770Device class represents the RV770 chip and it's +// derivative cards. The difference between this device and the base +// class is this device device adds support for double precision +// and has a larger wavefront size. +class AMDIL770Device : public AMDIL7XXDevice { +public: + AMDIL770Device(AMDILSubtarget *ST); + virtual ~AMDIL770Device(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; // AMDIL770Device + +// The AMDIL710Device class derives from the 7XX base class, but this +// class is a smaller derivative, so we need to overload some of the +// functions in order to correctly specify this information. +class AMDIL710Device : public AMDIL7XXDevice { +public: + AMDIL710Device(AMDILSubtarget *ST); + virtual ~AMDIL710Device(); + virtual size_t getWavefrontSize() const; +}; // AMDIL710Device + +} // namespace llvm +#endif // _AMDILDEVICEIMPL_H_ diff --git a/lib/Target/AMDGPU/AMDILAlgorithms.tpp b/lib/Target/AMDGPU/AMDILAlgorithms.tpp new file mode 100644 index 0000000..058475f --- /dev/null +++ b/lib/Target/AMDGPU/AMDILAlgorithms.tpp @@ -0,0 +1,93 @@ +//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides templates algorithms that extend the STL algorithms, but +// are useful for the AMDIL backend +// +//===----------------------------------------------------------------------===// + +// A template function that loops through the iterators and passes the second +// argument along with each iterator to the function. If the function returns +// true, then the current iterator is invalidated and it moves back, before +// moving forward to the next iterator, otherwise it moves forward without +// issue. This is based on the for_each STL function, but allows a reference to +// the second argument +template +Function binaryForEach(InputIterator First, InputIterator Last, Function F, + Arg &Second) +{ + for ( ; First!=Last; ++First ) { + F(*First, Second); + } + return F; +} + +template +Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F, + Arg &Second) +{ + for ( ; First!=Last; ++First ) { + if (F(*First, Second)) { + --First; + } + } + return F; +} + +// A template function that has two levels of looping before calling the +// function with the passed in argument. See binaryForEach for further +// explanation +template +Function binaryNestedForEach(InputIterator First, InputIterator Last, + Function F, Arg &Second) +{ + for ( ; First != Last; ++First) { + binaryForEach(First->begin(), First->end(), F, Second); + } + return F; +} +template +Function safeBinaryNestedForEach(InputIterator First, InputIterator Last, + Function F, Arg &Second) +{ + for ( ; First != Last; ++First) { + safeBinaryForEach(First->begin(), First->end(), F, Second); + } + return F; +} + +// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe' +// versions of these functions This allows the function to handle situations +// such as invalidated iterators +template +Function safeForEach(InputIterator First, InputIterator Last, Function F) +{ + for ( ; First!=Last; ++First ) F(&First) + ; // Do nothing. + return F; +} + +// A template function that has two levels of looping before calling the +// function with a pointer to the current iterator. See binaryForEach for +// further explanation +template +Function safeNestedForEach(InputIterator First, InputIterator Last, + SecondIterator S, Function F) +{ + for ( ; First != Last; ++First) { + SecondIterator sf, sl; + for (sf = First->begin(), sl = First->end(); + sf != sl; ) { + if (!F(&sf)) { + ++sf; + } + } + } + return F; +} diff --git a/lib/Target/AMDGPU/AMDILBase.td b/lib/Target/AMDGPU/AMDILBase.td new file mode 100644 index 0000000..8a2d34a --- /dev/null +++ b/lib/Target/AMDGPU/AMDILBase.td @@ -0,0 +1,113 @@ +//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Target-independent interfaces which we are implementing +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +// Dummy Instruction itineraries for pseudo instructions +def ALU_NULL : FuncUnit; +def NullALU : InstrItinClass; + +//===----------------------------------------------------------------------===// +// AMDIL Subtarget features. +//===----------------------------------------------------------------------===// +def FeatureFP64 : SubtargetFeature<"fp64", + "CapsOverride[AMDILDeviceInfo::DoubleOps]", + "true", + "Enable 64bit double precision operations">; +def FeatureByteAddress : SubtargetFeature<"byte_addressable_store", + "CapsOverride[AMDILDeviceInfo::ByteStores]", + "true", + "Enable byte addressable stores">; +def FeatureBarrierDetect : SubtargetFeature<"barrier_detect", + "CapsOverride[AMDILDeviceInfo::BarrierDetect]", + "true", + "Enable duplicate barrier detection(HD5XXX or later).">; +def FeatureImages : SubtargetFeature<"images", + "CapsOverride[AMDILDeviceInfo::Images]", + "true", + "Enable image functions">; +def FeatureMultiUAV : SubtargetFeature<"multi_uav", + "CapsOverride[AMDILDeviceInfo::MultiUAV]", + "true", + "Generate multiple UAV code(HD5XXX family or later)">; +def FeatureMacroDB : SubtargetFeature<"macrodb", + "CapsOverride[AMDILDeviceInfo::MacroDB]", + "true", + "Use internal macrodb, instead of macrodb in driver">; +def FeatureNoAlias : SubtargetFeature<"noalias", + "CapsOverride[AMDILDeviceInfo::NoAlias]", + "true", + "assert that all kernel argument pointers are not aliased">; +def FeatureNoInline : SubtargetFeature<"no-inline", + "CapsOverride[AMDILDeviceInfo::NoInline]", + "true", + "specify whether to not inline functions">; + +def Feature64BitPtr : SubtargetFeature<"64BitPtr", + "mIs64bit", + "false", + "Specify if 64bit addressing should be used.">; + +def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", + "mIs32on64bit", + "false", + "Specify if 64bit sized pointers with 32bit addressing should be used.">; +def FeatureDebug : SubtargetFeature<"debug", + "CapsOverride[AMDILDeviceInfo::Debug]", + "true", + "Debug mode is enabled, so disable hardware accelerated address spaces.">; +def FeatureDumpCode : SubtargetFeature <"DumpCode", + "mDumpCode", + "true", + "Dump MachineInstrs in the CodeEmitter">; + + +//===----------------------------------------------------------------------===// +// Register File, Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + + +include "AMDILRegisterInfo.td" +include "AMDILCallingConv.td" +include "AMDILInstrInfo.td" + +def AMDILInstrInfo : InstrInfo {} + +//===----------------------------------------------------------------------===// +// AMDIL processors supported. +//===----------------------------------------------------------------------===// +//include "Processors.td" + +//===----------------------------------------------------------------------===// +// Declare the target which we are implementing +//===----------------------------------------------------------------------===// +def AMDILAsmWriter : AsmWriter { + string AsmWriterClassName = "AsmPrinter"; + int Variant = 0; +} + +def AMDILAsmParser : AsmParser { + string AsmParserClassName = "AsmParser"; + int Variant = 0; + + string CommentDelimiter = ";"; + + string RegisterPrefix = "r"; + +} + + +def AMDIL : Target { + // Pull in Instruction Info: + let InstructionSet = AMDILInstrInfo; + let AssemblyWriters = [AMDILAsmWriter]; + let AssemblyParsers = [AMDILAsmParser]; +} diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp new file mode 100644 index 0000000..1f1a6da --- /dev/null +++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -0,0 +1,3236 @@ +//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#define DEBUGME 0 +#define DEBUG_TYPE "structcfg" + +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "AMDILRegisterInfo.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define FirstNonDebugInstr(A) A->begin() +using namespace llvm; + +// TODO: move-begin. + +//===----------------------------------------------------------------------===// +// +// Statistics for CFGStructurizer. +// +//===----------------------------------------------------------------------===// + +STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " + "matched"); +STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " + "matched"); +STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " + "pattern matched"); +STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " + "pattern matched"); +STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " + "matched"); +STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); +STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); + +//===----------------------------------------------------------------------===// +// +// Miscellaneous utility for CFGStructurizer. +// +//===----------------------------------------------------------------------===// +namespace llvmCFGStruct +{ +#define SHOWNEWINSTR(i) \ + if (DEBUGME) errs() << "New instr: " << *i << "\n" + +#define SHOWNEWBLK(b, msg) \ +if (DEBUGME) { \ + errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + errs() << "\n"; \ +} + +#define SHOWBLK_DETAIL(b, msg) \ +if (DEBUGME) { \ + if (b) { \ + errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ + b->print(errs()); \ + errs() << "\n"; \ + } \ +} + +#define INVALIDSCCNUM -1 +#define INVALIDREGNUM 0 + +template +void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { + for (typename LoopinfoT::iterator iter = LoopInfo.begin(), + iterEnd = LoopInfo.end(); + iter != iterEnd; ++iter) { + (*iter)->print(OS, 0); + } +} + +template +void ReverseVector(SmallVector &Src) { + size_t sz = Src.size(); + for (size_t i = 0; i < sz/2; ++i) { + NodeT *t = Src[i]; + Src[i] = Src[sz - i - 1]; + Src[sz - i - 1] = t; + } +} + +} //end namespace llvmCFGStruct + + +//===----------------------------------------------------------------------===// +// +// MachinePostDominatorTree +// +//===----------------------------------------------------------------------===// + +namespace llvm { + +/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used +/// to compute the a post-dominator tree. +/// +struct MachinePostDominatorTree : public MachineFunctionPass { + static char ID; // Pass identification, replacement for typeid + DominatorTreeBase *DT; + MachinePostDominatorTree() : MachineFunctionPass(ID) + { + DT = new DominatorTreeBase(true); //true indicate + // postdominator + } + + ~MachinePostDominatorTree(); + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + inline const std::vector &getRoots() const { + return DT->getRoots(); + } + + inline MachineDomTreeNode *getRootNode() const { + return DT->getRootNode(); + } + + inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { + return DT->getNode(BB); + } + + inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { + return DT->getNode(BB); + } + + inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { + return DT->dominates(A, B); + } + + inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { + return DT->dominates(A, B); + } + + inline bool + properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { + return DT->properlyDominates(A, B); + } + + inline bool + properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { + return DT->properlyDominates(A, B); + } + + inline MachineBasicBlock * + findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { + return DT->findNearestCommonDominator(A, B); + } + + virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const { + DT->print(OS); + } +}; +} //end of namespace llvm + +char MachinePostDominatorTree::ID = 0; +static RegisterPass +machinePostDominatorTreePass("machinepostdomtree", + "MachinePostDominator Tree Construction", + true, true); + +//const PassInfo *const llvm::MachinePostDominatorsID +//= &machinePostDominatorTreePass; + +bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + //DEBUG(DT->dump()); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +//===----------------------------------------------------------------------===// +// +// supporting data structure for CFGStructurizer +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct +{ +template +struct CFGStructTraits { +}; + +template +class BlockInformation { +public: + bool isRetired; + int sccNum; + //SmallVector succInstr; + //Instructions defining the corresponding successor. + BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} +}; + +template +class LandInformation { +public: + BlockT *landBlk; + std::set breakInitRegs; //Registers that need to "reg = 0", before + //WHILELOOP(thisloop) init before entering + //thisloop. + std::set contInitRegs; //Registers that need to "reg = 0", after + //WHILELOOP(thisloop) init after entering + //thisloop. + std::set endbranchInitRegs; //Init before entering this loop, at loop + //land block, branch cond on this reg. + std::set breakOnRegs; //registers that need to "if (reg) break + //endif" after ENDLOOP(thisloop) break + //outerLoopOf(thisLoop). + std::set contOnRegs; //registers that need to "if (reg) continue + //endif" after ENDLOOP(thisloop) continue on + //outerLoopOf(thisLoop). + LandInformation() : landBlk(NULL) {} +}; + +} //end of namespace llvmCFGStruct + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct +{ +// bixia TODO: port it to BasicBlock, not just MachineBasicBlock. +template +class CFGStructurizer +{ +public: + typedef enum { + Not_SinglePath = 0, + SinglePath_InPath = 1, + SinglePath_NotInPath = 2 + } PathToKind; + +public: + typedef typename PassT::InstructionType InstrT; + typedef typename PassT::FunctionType FuncT; + typedef typename PassT::DominatortreeType DomTreeT; + typedef typename PassT::PostDominatortreeType PostDomTreeT; + typedef typename PassT::DomTreeNodeType DomTreeNodeT; + typedef typename PassT::LoopinfoType LoopInfoT; + + typedef GraphTraits FuncGTraits; + //typedef FuncGTraits::nodes_iterator BlockIterator; + typedef typename FuncT::iterator BlockIterator; + + typedef typename FuncGTraits::NodeType BlockT; + typedef GraphTraits BlockGTraits; + typedef GraphTraits > InvBlockGTraits; + //typedef BlockGTraits::succ_iterator InstructionIterator; + typedef typename BlockT::iterator InstrIterator; + + typedef CFGStructTraits CFGTraits; + typedef BlockInformation BlockInfo; + typedef std::map BlockInfoMap; + + typedef int RegiT; + typedef typename PassT::LoopType LoopT; + typedef LandInformation LoopLandInfo; + typedef std::map LoopLandInfoMap; + //landing info for loop break + typedef SmallVector BlockTSmallerVector; + +public: + CFGStructurizer(); + ~CFGStructurizer(); + + /// Perform the CFG structurization + bool run(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri); + + /// Perform the CFG preparation + bool prepare(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri); + +private: + void orderBlocks(); + void printOrderedBlocks(llvm::raw_ostream &OS); + int patternMatch(BlockT *CurBlock); + int patternMatchGroup(BlockT *CurBlock); + + int serialPatternMatch(BlockT *CurBlock); + int ifPatternMatch(BlockT *CurBlock); + int switchPatternMatch(BlockT *CurBlock); + int loopendPatternMatch(BlockT *CurBlock); + int loopPatternMatch(BlockT *CurBlock); + + int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); + int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); + //int loopWithoutBreak(BlockT *); + + void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, + BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); + void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, + BlockT *ContBlock, LoopT *contLoop); + bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); + int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock); + int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock); + int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock, BlockT **LandBlockPtr); + void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, + BlockT *FalseBlock, BlockT *LandBlock, + bool Detail = false); + PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, + bool AllowSideEntry = true); + BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, + bool AllowSideEntry = true); + int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); + void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); + + void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, + BlockT *TrueBlock, BlockT *FalseBlock, + BlockT *LandBlock); + void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); + void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, + BlockT *ExitLandBlock, RegiT SetReg); + void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, + RegiT SetReg); + BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, + std::set &ExitBlockSet, + BlockT *ExitLandBlk); + BlockT *addLoopEndbranchBlock(LoopT *LoopRep, + BlockTSmallerVector &ExitingBlocks, + BlockTSmallerVector &ExitBlocks); + BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); + void removeUnconditionalBranch(BlockT *SrcBlock); + void removeRedundantConditionalBranch(BlockT *SrcBlock); + void addDummyExitBlock(SmallVector &RetBlocks); + + void removeSuccessor(BlockT *SrcBlock); + BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); + BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); + + void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, + InstrIterator InsertPos); + + void recordSccnum(BlockT *SrcBlock, int SCCNum); + int getSCCNum(BlockT *srcBlk); + + void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); + bool isRetiredBlock(BlockT *SrcBlock); + bool isActiveLoophead(BlockT *CurBlock); + bool needMigrateBlock(BlockT *Block); + + BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, + BlockTSmallerVector &exitBlocks, + std::set &ExitBlockSet); + void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); + BlockT *getLoopLandBlock(LoopT *LoopRep); + LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); + + void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); + void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); + void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); + void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); + void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); + + bool hasBackEdge(BlockT *curBlock); + unsigned getLoopDepth (LoopT *LoopRep); + int countActiveBlock( + typename SmallVector::const_iterator IterStart, + typename SmallVector::const_iterator IterEnd); + BlockT *findNearestCommonPostDom(std::set&); + BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); + +private: + DomTreeT *domTree; + PostDomTreeT *postDomTree; + LoopInfoT *loopInfo; + PassT *passRep; + FuncT *funcRep; + + BlockInfoMap blockInfoMap; + LoopLandInfoMap loopLandInfoMap; + SmallVector orderedBlks; + const AMDILRegisterInfo *TRI; + +}; //template class CFGStructurizer + +template CFGStructurizer::CFGStructurizer() + : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { +} + +template CFGStructurizer::~CFGStructurizer() { + for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), + E = blockInfoMap.end(); I != E; ++I) { + delete I->second; + } +} + +template +bool CFGStructurizer::prepare(FuncT &func, PassT &pass, + const AMDILRegisterInfo * tri) { + passRep = &pass; + funcRep = &func; + TRI = tri; + + bool changed = false; + //func.RenumberBlocks(); + + //to do, if not reducible flow graph, make it so ??? + + if (DEBUGME) { + errs() << "AMDILCFGStructurizer::prepare\n"; + //func.viewCFG(); + //func.viewCFGOnly(); + //func.dump(); + } + + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + //domTree = CFGTraits::getDominatorTree(pass); + //if (DEBUGME) { + // domTree->print(errs()); + //} + + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + //postDomTree = CFGTraits::getPostDominatorTree(pass); + //if (DEBUGME) { + // postDomTree->print(errs()); + //} + + //FIXME: gcc complains on this. + //loopInfo = &pass.getAnalysis(); + loopInfo = CFGTraits::getLoopInfo(pass); + if (DEBUGME) { + errs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, errs()); + } + + orderBlocks(); + if (DEBUGME) { + errs() << "Ordered blocks:\n"; + printOrderedBlocks(errs()); + } + + SmallVector retBlks; + + for (typename LoopInfoT::iterator iter = loopInfo->begin(), + iterEnd = loopInfo->end(); + iter != iterEnd; ++iter) { + LoopT* loopRep = (*iter); + BlockTSmallerVector exitingBlks; + loopRep->getExitingBlocks(exitingBlks); + + if (exitingBlks.size() == 0) { + BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); + if (dummyExitBlk != NULL) + retBlks.push_back(dummyExitBlk); + } + } + + // Remove unconditional branch instr. + // Add dummy exit block iff there are multiple returns. + + for (typename SmallVector::const_iterator + iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); + iterBlk != iterEndBlk; + ++iterBlk) { + BlockT *curBlk = *iterBlk; + removeUnconditionalBranch(curBlk); + removeRedundantConditionalBranch(curBlk); + if (CFGTraits::isReturnBlock(curBlk)) { + retBlks.push_back(curBlk); + } + assert(curBlk->succ_size() <= 2); + //assert(curBlk->size() > 0); + //removeEmptyBlock(curBlk) ?? + } //for + + if (retBlks.size() >= 2) { + addDummyExitBlock(retBlks); + changed = true; + } + + return changed; +} //CFGStructurizer::prepare + +template +bool CFGStructurizer::run(FuncT &func, PassT &pass, + const AMDILRegisterInfo * tri) { + passRep = &pass; + funcRep = &func; + TRI = tri; + + //func.RenumberBlocks(); + + //Assume reducible CFG... + if (DEBUGME) { + errs() << "AMDILCFGStructurizer::run\n"; + //errs() << func.getFunction()->getNameStr() << "\n"; + func.viewCFG(); + //func.viewCFGOnly(); + //func.dump(); + } + +#if 1 + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + domTree = CFGTraits::getDominatorTree(pass); + if (DEBUGME) { + domTree->print(errs(), (const llvm::Module*)0); + } +#endif + + //FIXME: gcc complains on this. + //domTree = &pass.getAnalysis(); + postDomTree = CFGTraits::getPostDominatorTree(pass); + if (DEBUGME) { + postDomTree->print(errs()); + } + + //FIXME: gcc complains on this. + //loopInfo = &pass.getAnalysis(); + loopInfo = CFGTraits::getLoopInfo(pass); + if (DEBUGME) { + errs() << "LoopInfo:\n"; + PrintLoopinfo(*loopInfo, errs()); + } + + orderBlocks(); +//#define STRESSTEST +#ifdef STRESSTEST + //Use the worse block ordering to test the algorithm. + ReverseVector(orderedBlks); +#endif + + if (DEBUGME) { + errs() << "Ordered blocks:\n"; + printOrderedBlocks(errs()); + } + int numIter = 0; + bool finish = false; + BlockT *curBlk; + bool makeProgress = false; + int numRemainedBlk = countActiveBlock(orderedBlks.begin(), + orderedBlks.end()); + + do { + ++numIter; + if (DEBUGME) { + errs() << "numIter = " << numIter + << ", numRemaintedBlk = " << numRemainedBlk << "\n"; + } + + typename SmallVector::const_iterator + iterBlk = orderedBlks.begin(); + typename SmallVector::const_iterator + iterBlkEnd = orderedBlks.end(); + + typename SmallVector::const_iterator + sccBeginIter = iterBlk; + BlockT *sccBeginBlk = NULL; + int sccNumBlk = 0; // The number of active blocks, init to a + // maximum possible number. + int sccNumIter; // Number of iteration in this SCC. + + while (iterBlk != iterBlkEnd) { + curBlk = *iterBlk; + + if (sccBeginBlk == NULL) { + sccBeginIter = iterBlk; + sccBeginBlk = curBlk; + sccNumIter = 0; + sccNumBlk = numRemainedBlk; // Init to maximum possible number. + if (DEBUGME) { + errs() << "start processing SCC" << getSCCNum(sccBeginBlk); + errs() << "\n"; + } + } + + if (!isRetiredBlock(curBlk)) { + patternMatch(curBlk); + } + + ++iterBlk; + + bool contNextScc = true; + if (iterBlk == iterBlkEnd + || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { + // Just finish one scc. + ++sccNumIter; + int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { + if (DEBUGME) { + errs() << "Can't reduce SCC " << getSCCNum(curBlk) + << ", sccNumIter = " << sccNumIter; + errs() << "doesn't make any progress\n"; + } + contNextScc = true; + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { + sccNumBlk = sccRemainedNumBlk; + iterBlk = sccBeginIter; + contNextScc = false; + if (DEBUGME) { + errs() << "repeat processing SCC" << getSCCNum(curBlk) + << "sccNumIter = " << sccNumIter << "\n"; + func.viewCFG(); + //func.viewCFGOnly(); + } + } else { + // Finish the current scc. + contNextScc = true; + } + } else { + // Continue on next component in the current scc. + contNextScc = false; + } + + if (contNextScc) { + sccBeginBlk = NULL; + } + } //while, "one iteration" over the function. + + BlockT *entryBlk = FuncGTraits::nodes_begin(&func); + if (entryBlk->succ_size() == 0) { + finish = true; + if (DEBUGME) { + errs() << "Reduce to one block\n"; + } + } else { + int newnumRemainedBlk + = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); + // consider cloned blocks ?? + if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { + makeProgress = true; + numRemainedBlk = newnumRemainedBlk; + } else { + makeProgress = false; + if (DEBUGME) { + errs() << "No progress\n"; + } + } + } + } while (!finish && makeProgress); + + // Misc wrap up to maintain the consistency of the Function representation. + CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); + + // Detach retired Block, release memory. + for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), + iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { + if ((*iterMap).second && (*iterMap).second->isRetired) { + assert(((*iterMap).first)->getNumber() != -1); + if (DEBUGME) { + errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; + } + (*iterMap).first->eraseFromParent(); //Remove from the parent Function. + } + delete (*iterMap).second; + } + blockInfoMap.clear(); + + // clear loopLandInfoMap + for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), + iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { + delete (*iterMap).second; + } + loopLandInfoMap.clear(); + + if (DEBUGME) { + func.viewCFG(); + //func.dump(); + } + + if (!finish) { + assert(!"IRREDUCIBL_CF"); + } + + return true; +} //CFGStructurizer::run + +/// Print the ordered Blocks. +/// +template +void CFGStructurizer::printOrderedBlocks(llvm::raw_ostream &os) { + size_t i = 0; + for (typename SmallVector::const_iterator + iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); + iterBlk != iterBlkEnd; + ++iterBlk, ++i) { + os << "BB" << (*iterBlk)->getNumber(); + os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; + if (i != 0 && i % 10 == 0) { + os << "\n"; + } else { + os << " "; + } + } +} //printOrderedBlocks + +/// Compute the reversed DFS post order of Blocks +/// +template void CFGStructurizer::orderBlocks() { + int sccNum = 0; + BlockT *bb; + for (scc_iterator sccIter = scc_begin(funcRep), + sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { + std::vector &sccNext = *sccIter; + for (typename std::vector::const_iterator + blockIter = sccNext.begin(), blockEnd = sccNext.end(); + blockIter != blockEnd; ++blockIter) { + bb = *blockIter; + orderedBlks.push_back(bb); + recordSccnum(bb, sccNum); + } + } + + //walk through all the block in func to check for unreachable + for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), + blockEnd1 = FuncGTraits::nodes_end(funcRep); + blockIter1 != blockEnd1; ++blockIter1) { + BlockT *bb = &(*blockIter1); + sccNum = getSCCNum(bb); + if (sccNum == INVALIDSCCNUM) { + errs() << "unreachable block BB" << bb->getNumber() << "\n"; + } + } //end of for +} //orderBlocks + +template int CFGStructurizer::patternMatch(BlockT *curBlk) { + int numMatch = 0; + int curMatch; + + if (DEBUGME) { + errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; + } + + while ((curMatch = patternMatchGroup(curBlk)) > 0) { + numMatch += curMatch; + } + + if (DEBUGME) { + errs() << "End patternMatch BB" << curBlk->getNumber() + << ", numMatch = " << numMatch << "\n"; + } + + return numMatch; +} //patternMatch + +template +int CFGStructurizer::patternMatchGroup(BlockT *curBlk) { + int numMatch = 0; + numMatch += serialPatternMatch(curBlk); + numMatch += ifPatternMatch(curBlk); + //numMatch += switchPatternMatch(curBlk); + numMatch += loopendPatternMatch(curBlk); + numMatch += loopPatternMatch(curBlk); + return numMatch; +}//patternMatchGroup + +template +int CFGStructurizer::serialPatternMatch(BlockT *curBlk) { + if (curBlk->succ_size() != 1) { + return 0; + } + + BlockT *childBlk = *curBlk->succ_begin(); + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { + return 0; + } + + mergeSerialBlock(curBlk, childBlk); + ++numSerialPatternMatch; + return 1; +} //serialPatternMatch + +template +int CFGStructurizer::ifPatternMatch(BlockT *curBlk) { + //two edges + if (curBlk->succ_size() != 2) { + return 0; + } + + if (hasBackEdge(curBlk)) { + return 0; + } + + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); + if (branchInstr == NULL) { + return 0; + } + + assert(CFGTraits::isCondBranch(branchInstr)); + + BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); + BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); + BlockT *landBlk; + int cloned = 0; + + // TODO: Simplify + if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 + && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { + landBlk = *trueBlk->succ_begin(); + } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { + landBlk = NULL; + } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { + landBlk = falseBlk; + falseBlk = NULL; + } else if (falseBlk->succ_size() == 1 + && *falseBlk->succ_begin() == trueBlk) { + landBlk = trueBlk; + trueBlk = NULL; + } else if (falseBlk->succ_size() == 1 + && isSameloopDetachedContbreak(trueBlk, falseBlk)) { + landBlk = *falseBlk->succ_begin(); + } else if (trueBlk->succ_size() == 1 + && isSameloopDetachedContbreak(falseBlk, trueBlk)) { + landBlk = *trueBlk->succ_begin(); + } else { + return handleJumpintoIf(curBlk, trueBlk, falseBlk); + } + + // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the + // new BB created for landBlk==NULL may introduce new challenge to the + // reduction process. + if (landBlk != NULL && + ((trueBlk && trueBlk->pred_size() > 1) + || (falseBlk && falseBlk->pred_size() > 1))) { + cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); + } + + if (trueBlk && trueBlk->pred_size() > 1) { + trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); + ++cloned; + } + + if (falseBlk && falseBlk->pred_size() > 1) { + falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); + ++cloned; + } + + mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); + + ++numIfPatternMatch; + + numClonedBlock += cloned; + + return 1 + cloned; +} //ifPatternMatch + +template +int CFGStructurizer::switchPatternMatch(BlockT *curBlk) { + return 0; +} //switchPatternMatch + +template +int CFGStructurizer::loopendPatternMatch(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + typename std::vector nestedLoops; + while (loopRep) { + nestedLoops.push_back(loopRep); + loopRep = loopRep->getParentLoop(); + } + + if (nestedLoops.size() == 0) { + return 0; + } + + // Process nested loop outside->inside, so "continue" to a outside loop won't + // be mistaken as "break" of the current loop. + int num = 0; + for (typename std::vector::reverse_iterator + iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); + iter != iterEnd; ++iter) { + loopRep = *iter; + + if (getLoopLandBlock(loopRep) != NULL) { + continue; + } + + BlockT *loopHeader = loopRep->getHeader(); + + int numBreak = loopbreakPatternMatch(loopRep, loopHeader); + + if (numBreak == -1) { + break; + } + + int numCont = loopcontPatternMatch(loopRep, loopHeader); + num += numBreak + numCont; + } + + return num; +} //loopendPatternMatch + +template +int CFGStructurizer::loopPatternMatch(BlockT *curBlk) { + if (curBlk->succ_size() != 0) { + return 0; + } + + int numLoop = 0; + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + while (loopRep && loopRep->getHeader() == curBlk) { + LoopLandInfo *loopLand = getLoopLandInfo(loopRep); + if (loopLand) { + BlockT *landBlk = loopLand->landBlk; + assert(landBlk); + if (!isRetiredBlock(landBlk)) { + mergeLooplandBlock(curBlk, loopLand); + ++numLoop; + } + } + loopRep = loopRep->getParentLoop(); + } + + numLoopPatternMatch += numLoop; + + return numLoop; +} //loopPatternMatch + +template +int CFGStructurizer::loopbreakPatternMatch(LoopT *loopRep, + BlockT *loopHeader) { + BlockTSmallerVector exitingBlks; + loopRep->getExitingBlocks(exitingBlks); + + if (DEBUGME) { + errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; + } + + if (exitingBlks.size() == 0) { + setLoopLandBlock(loopRep); + return 0; + } + + // Compute the corresponding exitBlks and exit block set. + BlockTSmallerVector exitBlks; + std::set exitBlkSet; + for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), + iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { + BlockT *exitingBlk = *iter; + BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); + exitBlks.push_back(exitBlk); + exitBlkSet.insert(exitBlk); //non-duplicate insert + } + + assert(exitBlkSet.size() > 0); + assert(exitBlks.size() == exitingBlks.size()); + + if (DEBUGME) { + errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; + } + + // Find exitLandBlk. + BlockT *exitLandBlk = NULL; + int numCloned = 0; + int numSerial = 0; + + if (exitBlkSet.size() == 1) + { + exitLandBlk = *exitBlkSet.begin(); + } else { + exitLandBlk = findNearestCommonPostDom(exitBlkSet); + + if (exitLandBlk == NULL) { + return -1; + } + + bool allInPath = true; + bool allNotInPath = true; + for (typename std::set::const_iterator + iter = exitBlkSet.begin(), + iterEnd = exitBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *exitBlk = *iter; + + PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); + if (DEBUGME) { + errs() << "BB" << exitBlk->getNumber() + << " to BB" << exitLandBlk->getNumber() << " PathToKind=" + << pathKind << "\n"; + } + + allInPath = allInPath && (pathKind == SinglePath_InPath); + allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); + + if (!allInPath && !allNotInPath) { + if (DEBUGME) { + errs() << "singlePath check fail\n"; + } + return -1; + } + } // check all exit blocks + + if (allNotInPath) { +#if 1 + + // TODO: Simplify, maybe separate function? + //funcRep->viewCFG(); + LoopT *parentLoopRep = loopRep->getParentLoop(); + BlockT *parentLoopHeader = NULL; + if (parentLoopRep) + parentLoopHeader = parentLoopRep->getHeader(); + + if (exitLandBlk == parentLoopHeader && + (exitLandBlk = relocateLoopcontBlock(parentLoopRep, + loopRep, + exitBlkSet, + exitLandBlk)) != NULL) { + if (DEBUGME) { + errs() << "relocateLoopcontBlock success\n"; + } + } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, + exitingBlks, + exitBlks)) != NULL) { + if (DEBUGME) { + errs() << "insertEndbranchBlock success\n"; + } + } else { + if (DEBUGME) { + errs() << "loop exit fail\n"; + } + return -1; + } +#else + return -1; +#endif + } + + // Handle side entry to exit path. + exitBlks.clear(); + exitBlkSet.clear(); + for (typename BlockTSmallerVector::iterator iterExiting = + exitingBlks.begin(), + iterExitingEnd = exitingBlks.end(); + iterExiting != iterExitingEnd; ++iterExiting) { + BlockT *exitingBlk = *iterExiting; + BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); + BlockT *newExitBlk = exitBlk; + + if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { + newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); + ++numCloned; + } + + numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); + + exitBlks.push_back(newExitBlk); + exitBlkSet.insert(newExitBlk); + } + + for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), + iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit) { + BlockT *exitBlk = *iterExit; + numSerial += serialPatternMatch(exitBlk); + } + + for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), + iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit) { + BlockT *exitBlk = *iterExit; + if (exitBlk->pred_size() > 1) { + if (exitBlk != exitLandBlk) { + return -1; + } + } else { + if (exitBlk != exitLandBlk && + (exitBlk->succ_size() != 1 || + *exitBlk->succ_begin() != exitLandBlk)) { + return -1; + } + } + } + } // else + + // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk); + exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); + + // Fold break into the breaking block. Leverage across level breaks. + assert(exitingBlks.size() == exitBlks.size()); + for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), + iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); + iterExit != iterExitEnd; ++iterExit, ++iterExiting) { + BlockT *exitBlk = *iterExit; + BlockT *exitingBlk = *iterExiting; + assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); + LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); + handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); + } + + int numBreak = static_cast(exitingBlks.size()); + numLoopbreakPatternMatch += numBreak; + numClonedBlock += numCloned; + return numBreak + numSerial + numCloned; +} //loopbreakPatternMatch + +template +int CFGStructurizer::loopcontPatternMatch(LoopT *loopRep, + BlockT *loopHeader) { + int numCont = 0; + SmallVector contBlk; + for (typename InvBlockGTraits::ChildIteratorType iter = + InvBlockGTraits::child_begin(loopHeader), + iterEnd = InvBlockGTraits::child_end(loopHeader); + iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + if (loopRep->contains(curBlk)) { + handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), + loopHeader, loopRep); + contBlk.push_back(curBlk); + ++numCont; + } + } + + for (typename SmallVector::iterator + iter = contBlk.begin(), iterEnd = contBlk.end(); + iter != iterEnd; ++iter) { + (*iter)->removeSuccessor(loopHeader); + } + + numLoopcontPatternMatch += numCont; + + return numCont; +} //loopcontPatternMatch + + +template +bool CFGStructurizer::isSameloopDetachedContbreak(BlockT *src1Blk, + BlockT *src2Blk) { + // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the + // same loop with LoopLandInfo without explicitly keeping track of + // loopContBlks and loopBreakBlks, this is a method to get the information. + // + if (src1Blk->succ_size() == 0) { + LoopT *loopRep = loopInfo->getLoopFor(src1Blk); + if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + if (theEntry != NULL) { + if (DEBUGME) { + errs() << "isLoopContBreakBlock yes src1 = BB" + << src1Blk->getNumber() + << " src2 = BB" << src2Blk->getNumber() << "\n"; + } + return true; + } + } + } + return false; +} //isSameloopDetachedContbreak + +template +int CFGStructurizer::handleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk) { + int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); + if (num == 0) { + if (DEBUGME) { + errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; + } + num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); + } + return num; +} + +template +int CFGStructurizer::handleJumpintoIfImp(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk) { + int num = 0; + BlockT *downBlk; + + //trueBlk could be the common post dominator + downBlk = trueBlk; + + if (DEBUGME) { + errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() + << " true = BB" << trueBlk->getNumber() + << ", numSucc=" << trueBlk->succ_size() + << " false = BB" << falseBlk->getNumber() << "\n"; + } + + while (downBlk) { + if (DEBUGME) { + errs() << "check down = BB" << downBlk->getNumber(); + } + + if (//postDomTree->dominates(downBlk, falseBlk) && + singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { + if (DEBUGME) { + errs() << " working\n"; + } + + num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); + num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); + + numClonedBlock += num; + num += serialPatternMatch(*headBlk->succ_begin()); + num += serialPatternMatch(*(++headBlk->succ_begin())); + num += ifPatternMatch(headBlk); + assert(num > 0); // + + break; + } + if (DEBUGME) { + errs() << " not working\n"; + } + downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; + } // walk down the postDomTree + + return num; +} //handleJumpintoIf + +template +void CFGStructurizer::showImproveSimpleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT *landBlk, + bool detail) { + errs() << "head = BB" << headBlk->getNumber() + << " size = " << headBlk->size(); + if (detail) { + errs() << "\n"; + headBlk->print(errs()); + errs() << "\n"; + } + + if (trueBlk) { + errs() << ", true = BB" << trueBlk->getNumber() << " size = " + << trueBlk->size() << " numPred = " << trueBlk->pred_size(); + if (detail) { + errs() << "\n"; + trueBlk->print(errs()); + errs() << "\n"; + } + } + if (falseBlk) { + errs() << ", false = BB" << falseBlk->getNumber() << " size = " + << falseBlk->size() << " numPred = " << falseBlk->pred_size(); + if (detail) { + errs() << "\n"; + falseBlk->print(errs()); + errs() << "\n"; + } + } + if (landBlk) { + errs() << ", land = BB" << landBlk->getNumber() << " size = " + << landBlk->size() << " numPred = " << landBlk->pred_size(); + if (detail) { + errs() << "\n"; + landBlk->print(errs()); + errs() << "\n"; + } + } + + errs() << "\n"; +} //showImproveSimpleJumpintoIf + +template +int CFGStructurizer::improveSimpleJumpintoIf(BlockT *headBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT **plandBlk) { + bool migrateTrue = false; + bool migrateFalse = false; + + BlockT *landBlk = *plandBlk; + + assert((trueBlk == NULL || trueBlk->succ_size() <= 1) + && (falseBlk == NULL || falseBlk->succ_size() <= 1)); + + if (trueBlk == falseBlk) { + return 0; + } + +#if 0 + if (DEBUGME) { + errs() << "improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + } +#endif + + // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0; + // May consider the # landBlk->pred_size() as it represents the number of + // assignment initReg = .. needed to insert. + migrateTrue = needMigrateBlock(trueBlk); + migrateFalse = needMigrateBlock(falseBlk); + + if (!migrateTrue && !migrateFalse) { + return 0; + } + + // If we need to migrate either trueBlk and falseBlk, migrate the rest that + // have more than one predecessors. without doing this, its predecessor + // rather than headBlk will have undefined value in initReg. + if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { + migrateTrue = true; + } + if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { + migrateFalse = true; + } + + if (DEBUGME) { + errs() << "before improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); + } + + // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk + // + // new: headBlk => if () {initReg = 1; org trueBlk branch} else + // {initReg = 0; org falseBlk branch } + // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} + // => org landBlk + // if landBlk->pred_size() > 2, put the about if-else inside + // if (initReg !=2) {...} + // + // add initReg = initVal to headBlk + + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + unsigned initReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + if (!migrateTrue || !migrateFalse) { + int initVal = migrateTrue ? 0 : 1; + CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); + } + + int numNewBlk = 0; + + if (landBlk == NULL) { + landBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(landBlk); //insert to function + + if (trueBlk) { + trueBlk->addSuccessor(landBlk); + } else { + headBlk->addSuccessor(landBlk); + } + + if (falseBlk) { + falseBlk->addSuccessor(landBlk); + } else { + headBlk->addSuccessor(landBlk); + } + + numNewBlk ++; + } + + bool landBlkHasOtherPred = (landBlk->pred_size() > 2); + + //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" + typename BlockT::iterator insertPos = + CFGTraits::getInstrPos + (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); + + if (landBlkHasOtherPred) { + unsigned immReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); + unsigned cmpResReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + + CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, + initReg, immReg); + CFGTraits::insertCondBranchBefore(landBlk, insertPos, + AMDGPU::IF_LOGICALZ_i32, passRep, + cmpResReg, DebugLoc()); + } + + CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32, + passRep, initReg, DebugLoc()); + + if (migrateTrue) { + migrateInstruction(trueBlk, landBlk, insertPos); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 1). + CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); + } + CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); + + if (migrateFalse) { + migrateInstruction(falseBlk, landBlk, insertPos); + // need to uncondionally insert the assignment to ensure a path from its + // predecessor rather than headBlk has valid value in initReg if + // (initVal != 0) + CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); + } + //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); + + if (landBlkHasOtherPred) { + // add endif + CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); + + // put initReg = 2 to other predecessors of landBlk + for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), + predIterEnd = landBlk->pred_end(); predIter != predIterEnd; + ++predIter) { + BlockT *curBlk = *predIter; + if (curBlk != trueBlk && curBlk != falseBlk) { + CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); + } + } //for + } + if (DEBUGME) { + errs() << "result from improveSimpleJumpintoIf: "; + showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); + //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); + } + + // update landBlk + *plandBlk = landBlk; + + return numNewBlk; +} //improveSimpleJumpintoIf + +template +void CFGStructurizer::handleLoopbreak(BlockT *exitingBlk, + LoopT *exitingLoop, + BlockT *exitBlk, + LoopT *exitLoop, + BlockT *landBlk) { + if (DEBUGME) { + errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) + << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; + } + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + + RegiT initReg = INVALIDREGNUM; + if (exitingLoop != exitLoop) { + initReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + assert(initReg != INVALIDREGNUM); + addLoopBreakInitReg(exitLoop, initReg); + while (exitingLoop != exitLoop && exitingLoop) { + addLoopBreakOnReg(exitingLoop, initReg); + exitingLoop = exitingLoop->getParentLoop(); + } + assert(exitingLoop == exitLoop); + } + + mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); + +} //handleLoopbreak + +template +void CFGStructurizer::handleLoopcontBlock(BlockT *contingBlk, + LoopT *contingLoop, + BlockT *contBlk, + LoopT *contLoop) { + if (DEBUGME) { + errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() + << " header = BB" << contBlk->getNumber() << "\n"; + + errs() << "Trying to continue loop-depth = " + << getLoopDepth(contLoop) + << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; + } + + RegiT initReg = INVALIDREGNUM; + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + if (contingLoop != contLoop) { + initReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + assert(initReg != INVALIDREGNUM); + addLoopContInitReg(contLoop, initReg); + while (contingLoop && contingLoop->getParentLoop() != contLoop) { + addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg + contingLoop = contingLoop->getParentLoop(); + } + assert(contingLoop && contingLoop->getParentLoop() == contLoop); + addLoopContOnReg(contingLoop, initReg); + } + + settleLoopcontBlock(contingBlk, contBlk, initReg); + //contingBlk->removeSuccessor(loopHeader); +} //handleLoopcontBlock + +template +void CFGStructurizer::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { + if (DEBUGME) { + errs() << "serialPattern BB" << dstBlk->getNumber() + << " <= BB" << srcBlk->getNumber() << "\n"; + } + //removeUnconditionalBranch(dstBlk); + dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end()); + + dstBlk->removeSuccessor(srcBlk); + CFGTraits::cloneSuccessorList(dstBlk, srcBlk); + + removeSuccessor(srcBlk); + retireBlock(dstBlk, srcBlk); +} //mergeSerialBlock + +template +void CFGStructurizer::mergeIfthenelseBlock(InstrT *branchInstr, + BlockT *curBlk, + BlockT *trueBlk, + BlockT *falseBlk, + BlockT *landBlk) { + if (DEBUGME) { + errs() << "ifPattern BB" << curBlk->getNumber(); + errs() << "{ "; + if (trueBlk) { + errs() << "BB" << trueBlk->getNumber(); + } + errs() << " } else "; + errs() << "{ "; + if (falseBlk) { + errs() << "BB" << falseBlk->getNumber(); + } + errs() << " }\n "; + errs() << "landBlock: "; + if (landBlk == NULL) { + errs() << "NULL"; + } else { + errs() << "BB" << landBlk->getNumber(); + } + errs() << "\n"; + } + + int oldOpcode = branchInstr->getOpcode(); + DebugLoc branchDL = branchInstr->getDebugLoc(); + +// transform to +// if cond +// trueBlk +// else +// falseBlk +// endif +// landBlk + + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(curBlk, branchInstr); + CFGTraits::insertCondBranchBefore(branchInstrPos, + CFGTraits::getBranchNzeroOpcode(oldOpcode), + passRep, + branchDL); + + if (trueBlk) { + curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end()); + curBlk->removeSuccessor(trueBlk); + if (landBlk && trueBlk->succ_size()!=0) { + trueBlk->removeSuccessor(landBlk); + } + retireBlock(curBlk, trueBlk); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); + + if (falseBlk) { + curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk), + falseBlk->end()); + curBlk->removeSuccessor(falseBlk); + if (landBlk && falseBlk->succ_size() != 0) { + falseBlk->removeSuccessor(landBlk); + } + retireBlock(curBlk, falseBlk); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); + + //curBlk->remove(branchInstrPos); + branchInstr->eraseFromParent(); + + if (landBlk && trueBlk && falseBlk) { + curBlk->addSuccessor(landBlk); + } + +} //mergeIfthenelseBlock + +template +void CFGStructurizer::mergeLooplandBlock(BlockT *dstBlk, + LoopLandInfo *loopLand) { + BlockT *landBlk = loopLand->landBlk; + + if (DEBUGME) { + errs() << "loopPattern header = BB" << dstBlk->getNumber() + << " land = BB" << landBlk->getNumber() << "\n"; + } + + // Loop contInitRegs are init at the beginning of the loop. + for (typename std::set::const_iterator iter = + loopLand->contInitRegs.begin(), + iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + + /* we last inserterd the DebugLoc in the + * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. + * search for the DebugLoc in the that statement. + * if not found, we have to insert the empty/default DebugLoc */ + InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); + DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); + + CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); + // Loop breakInitRegs are init before entering the loop. + for (typename std::set::const_iterator iter = + loopLand->breakInitRegs.begin(), + iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) + { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + // Loop endbranchInitRegs are init before entering the loop. + for (typename std::set::const_iterator iter = + loopLand->endbranchInitRegs.begin(), + iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); + } + + /* we last inserterd the DebugLoc in the continue statement in the current dstBlk + * search for the DebugLoc in the continue statement. + * if not found, we have to insert the empty/default DebugLoc */ + InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); + DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); + + CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); + // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this + // loop. + for (typename std::set::const_iterator iter = + loopLand->breakOnRegs.begin(), + iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep, + *iter); + } + + // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this + // loop. + for (std::set::const_iterator iter = loopLand->contOnRegs.begin(), + iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { + CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, + passRep, *iter); + } + + dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); + + for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), + iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { + dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. + } + + removeSuccessor(landBlk); + retireBlock(dstBlk, landBlk); +} //mergeLooplandBlock + +template +void CFGStructurizer::mergeLoopbreakBlock(BlockT *exitingBlk, + BlockT *exitBlk, + BlockT *exitLandBlk, + RegiT setReg) { + if (DEBUGME) { + errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() + << " exit = BB" << exitBlk->getNumber() + << " land = BB" << exitLandBlk->getNumber() << "\n"; + } + + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); + assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); + + DebugLoc DL = branchInstr->getDebugLoc(); + + BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); + int oldOpcode = branchInstr->getOpcode(); + + // transform exitingBlk to + // if ( ) { + // exitBlk (if exitBlk != exitLandBlk) + // setReg = 1 + // break + // }endif + // successor = {orgSuccessor(exitingBlk) - exitBlk} + + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(exitingBlk, branchInstr); + + if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { + //break_logical + int newOpcode = + (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode) + : CFGTraits::getBreakZeroOpcode(oldOpcode); + CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); + } else { + int newOpcode = + (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode) + : CFGTraits::getBranchZeroOpcode(oldOpcode); + CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); + if (exitBlk != exitLandBlk) { + //splice is insert-before ... + exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), + exitBlk->end()); + } + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); + } + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); + CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); + } //if_logical + + //now branchInst can be erase safely + //exitingBlk->eraseFromParent(branchInstr); + branchInstr->eraseFromParent(); + + //now take care of successors, retire blocks + exitingBlk->removeSuccessor(exitBlk); + if (exitBlk != exitLandBlk) { + //splice is insert-before ... + exitBlk->removeSuccessor(exitLandBlk); + retireBlock(exitingBlk, exitBlk); + } + +} //mergeLoopbreakBlock + +template +void CFGStructurizer::settleLoopcontBlock(BlockT *contingBlk, + BlockT *contBlk, + RegiT setReg) { + if (DEBUGME) { + errs() << "settleLoopcontBlock conting = BB" + << contingBlk->getNumber() + << ", cont = BB" << contBlk->getNumber() << "\n"; + } + + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); + if (branchInstr) { + assert(CFGTraits::isCondBranch(branchInstr)); + typename BlockT::iterator branchInstrPos = + CFGTraits::getInstrPos(contingBlk, branchInstr); + BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); + int oldOpcode = branchInstr->getOpcode(); + DebugLoc DL = branchInstr->getDebugLoc(); + + // transform contingBlk to + // if () { + // move instr after branchInstr + // continue + // or + // setReg = 1 + // break + // }endif + // successor = {orgSuccessor(contingBlk) - loopHeader} + + bool useContinueLogical = + (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); + + if (useContinueLogical == false) + { + int branchOpcode = + trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) + : CFGTraits::getBranchZeroOpcode(oldOpcode); + + CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); + } else { + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); + } + + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); + } else { + int branchOpcode = + trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) + : CFGTraits::getContinueZeroOpcode(oldOpcode); + + CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); + } + + //contingBlk->eraseFromParent(branchInstr); + branchInstr->eraseFromParent(); + } else { + /* if we've arrived here then we've already erased the branch instruction + * travel back up the basic block to see the last reference of our debug location + * we've just inserted that reference here so it should be representative */ + if (setReg != INVALIDREGNUM) { + CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); + } else { + // insertEnd to ensure phi-moves, if exist, go before the continue-instr. + CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); + } + } //else + +} //settleLoopcontBlock + +// BBs in exitBlkSet are determined as in break-path for loopRep, +// before we can put code for BBs as inside loop-body for loopRep +// check whether those BBs are determined as cont-BB for parentLoopRep +// earlier. +// If so, generate a new BB newBlk +// (1) set newBlk common successor of BBs in exitBlkSet +// (2) change the continue-instr in BBs in exitBlkSet to break-instr +// (3) generate continue-instr in newBlk +// +template +typename CFGStructurizer::BlockT * +CFGStructurizer::relocateLoopcontBlock(LoopT *parentLoopRep, + LoopT *loopRep, + std::set &exitBlkSet, + BlockT *exitLandBlk) { + std::set endBlkSet; + +// BlockT *parentLoopHead = parentLoopRep->getHeader(); + + + for (typename std::set::const_iterator iter = exitBlkSet.begin(), + iterEnd = exitBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *exitBlk = *iter; + BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); + + if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) + return NULL; + + endBlkSet.insert(endBlk); + } + + BlockT *newBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(newBlk); //insert to function + CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); + SHOWNEWBLK(newBlk, "New continue block: "); + + for (typename std::set::const_iterator iter = endBlkSet.begin(), + iterEnd = endBlkSet.end(); + iter != iterEnd; ++iter) { + BlockT *endBlk = *iter; + InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); + if (contInstr) { + contInstr->eraseFromParent(); + } + endBlk->addSuccessor(newBlk); + if (DEBUGME) { + errs() << "Add new continue Block to BB" + << endBlk->getNumber() << " successors\n"; + } + } + + return newBlk; +} //relocateLoopcontBlock + + +// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as +// LoopLandBlock. This BB branch on the loop endBranchInit register to the +// pathes corresponding to the loop exiting branches. + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::addLoopEndbranchBlock(LoopT *loopRep, + BlockTSmallerVector &exitingBlks, + BlockTSmallerVector &exitBlks) { + const AMDILInstrInfo *tii = + static_cast(passRep->getTargetInstrInfo()); + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + + RegiT endBranchReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + assert(endBranchReg >= 0); + + // reg = 0 before entering the loop + addLoopEndbranchInitReg(loopRep, endBranchReg); + + uint32_t numBlks = static_cast(exitingBlks.size()); + assert(numBlks >=2 && numBlks == exitBlks.size()); + + BlockT *preExitingBlk = exitingBlks[0]; + BlockT *preExitBlk = exitBlks[0]; + BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(preBranchBlk); //insert to function + SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); + + BlockT *newLandBlk = preBranchBlk; + + CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, + newLandBlk); + preExitingBlk->removeSuccessor(preExitBlk); + preExitingBlk->addSuccessor(newLandBlk); + + //it is redundant to add reg = 0 to exitingBlks[0] + + // For 1..n th exiting path (the last iteration handles two pathes) create the + // branch to the previous path and the current path. + for (uint32_t i = 1; i < numBlks; ++i) { + BlockT *curExitingBlk = exitingBlks[i]; + BlockT *curExitBlk = exitBlks[i]; + BlockT *curBranchBlk; + + if (i == numBlks - 1) { + curBranchBlk = curExitBlk; + } else { + curBranchBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(curBranchBlk); //insert to function + SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); + } + + // Add reg = i to exitingBlks[i]. + CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, + endBranchReg, i); + + // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge + // (exitingBlks[i], newLandBlk). + CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, + newLandBlk); + curExitingBlk->removeSuccessor(curExitBlk); + curExitingBlk->addSuccessor(newLandBlk); + + // add to preBranchBlk the branch instruction: + // if (endBranchReg == preVal) + // preExitBlk + // else + // curBranchBlk + // + // preValReg = i - 1 + + DebugLoc DL; + RegiT preValReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + + preBranchBlk->insert(preBranchBlk->begin(), + tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, + i - 1)); + + // condResReg = (endBranchReg == preValReg) + RegiT condResReg = static_cast + (funcRep->getRegInfo().createVirtualRegister(I32RC)); + BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) + .addReg(endBranchReg).addReg(preValReg); + + BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) + .addMBB(preExitBlk).addReg(condResReg); + + preBranchBlk->addSuccessor(preExitBlk); + preBranchBlk->addSuccessor(curBranchBlk); + + // Update preExitingBlk, preExitBlk, preBranchBlk. + preExitingBlk = curExitingBlk; + preExitBlk = curExitBlk; + preBranchBlk = curBranchBlk; + + } //end for 1 .. n blocks + + return newLandBlk; +} //addLoopEndbranchBlock + +template +typename CFGStructurizer::PathToKind +CFGStructurizer::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, + bool allowSideEntry) { + assert(dstBlk); + + if (srcBlk == dstBlk) { + return SinglePath_InPath; + } + + while (srcBlk && srcBlk->succ_size() == 1) { + srcBlk = *srcBlk->succ_begin(); + if (srcBlk == dstBlk) { + return SinglePath_InPath; + } + + if (!allowSideEntry && srcBlk->pred_size() > 1) { + return Not_SinglePath; + } + } + + if (srcBlk && srcBlk->succ_size()==0) { + return SinglePath_NotInPath; + } + + return Not_SinglePath; +} //singlePathTo + +// If there is a single path from srcBlk to dstBlk, return the last block before +// dstBlk If there is a single path from srcBlk->end without dstBlk, return the +// last block in the path Otherwise, return NULL +template +typename CFGStructurizer::BlockT * +CFGStructurizer::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, + bool allowSideEntry) { + assert(dstBlk); + + if (srcBlk == dstBlk) { + return srcBlk; + } + + if (srcBlk->succ_size() == 0) { + return srcBlk; + } + + while (srcBlk && srcBlk->succ_size() == 1) { + BlockT *preBlk = srcBlk; + + srcBlk = *srcBlk->succ_begin(); + if (srcBlk == NULL) { + return preBlk; + } + + if (!allowSideEntry && srcBlk->pred_size() > 1) { + return NULL; + } + } + + if (srcBlk && srcBlk->succ_size()==0) { + return srcBlk; + } + + return NULL; + +} //singlePathEnd + +template +int CFGStructurizer::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, + BlockT *dstBlk) { + int cloned = 0; + assert(preBlk->isSuccessor(srcBlk)); + while (srcBlk && srcBlk != dstBlk) { + assert(srcBlk->succ_size() == 1); + if (srcBlk->pred_size() > 1) { + srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); + ++cloned; + } + + preBlk = srcBlk; + srcBlk = *srcBlk->succ_begin(); + } + + return cloned; +} //cloneOnSideEntryTo + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::cloneBlockForPredecessor(BlockT *curBlk, + BlockT *predBlk) { + assert(predBlk->isSuccessor(curBlk) && + "succBlk is not a prececessor of curBlk"); + + BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions + CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); + //srcBlk, oldBlk, newBlk + + predBlk->removeSuccessor(curBlk); + predBlk->addSuccessor(cloneBlk); + + // add all successor to cloneBlk + CFGTraits::cloneSuccessorList(cloneBlk, curBlk); + + numClonedInstr += curBlk->size(); + + if (DEBUGME) { + errs() << "Cloned block: " << "BB" + << curBlk->getNumber() << "size " << curBlk->size() << "\n"; + } + + SHOWNEWBLK(cloneBlk, "result of Cloned block: "); + + return cloneBlk; +} //cloneBlockForPredecessor + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::exitingBlock2ExitBlock(LoopT *loopRep, + BlockT *exitingBlk) { + BlockT *exitBlk = NULL; + + for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), + iterSuccEnd = exitingBlk->succ_end(); + iterSucc != iterSuccEnd; ++iterSucc) { + BlockT *curBlk = *iterSucc; + if (!loopRep->contains(curBlk)) { + assert(exitBlk == NULL); + exitBlk = curBlk; + } + } + + assert(exitBlk != NULL); + + return exitBlk; +} //exitingBlock2ExitBlock + +template +void CFGStructurizer::migrateInstruction(BlockT *srcBlk, + BlockT *dstBlk, + InstrIterator insertPos) { + InstrIterator spliceEnd; + //look for the input branchinstr, not the AMDIL branchinstr + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); + if (branchInstr == NULL) { + if (DEBUGME) { + errs() << "migrateInstruction don't see branch instr\n" ; + } + spliceEnd = srcBlk->end(); + } else { + if (DEBUGME) { + errs() << "migrateInstruction see branch instr\n" ; + branchInstr->dump(); + } + spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); + } + if (DEBUGME) { + errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() + << "srcSize = " << srcBlk->size() << "\n"; + } + + //splice insert before insertPos + dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); + + if (DEBUGME) { + errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() + << "srcSize = " << srcBlk->size() << "\n"; + } +} //migrateInstruction + +// normalizeInfiniteLoopExit change +// B1: +// uncond_br LoopHeader +// +// to +// B1: +// cond_br 1 LoopHeader dummyExit +// and return the newly added dummy exit block +// +template +typename CFGStructurizer::BlockT * +CFGStructurizer::normalizeInfiniteLoopExit(LoopT* LoopRep) { + BlockT *loopHeader; + BlockT *loopLatch; + loopHeader = LoopRep->getHeader(); + loopLatch = LoopRep->getLoopLatch(); + BlockT *dummyExitBlk = NULL; + const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); + if (loopHeader!=NULL && loopLatch!=NULL) { + InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); + if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { + dummyExitBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(dummyExitBlk); //insert to function + SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); + + if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; + + typename BlockT::iterator insertPos = + CFGTraits::getInstrPos(loopLatch, branchInstr); + unsigned immReg = + funcRep->getRegInfo().createVirtualRegister(I32RC); + CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); + InstrT *newInstr = + CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); + MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); + + SHOWNEWINSTR(newInstr); + + branchInstr->eraseFromParent(); + loopLatch->addSuccessor(dummyExitBlk); + } + } + + return dummyExitBlk; +} //normalizeInfiniteLoopExit + +template +void CFGStructurizer::removeUnconditionalBranch(BlockT *srcBlk) { + InstrT *branchInstr; + + // I saw two unconditional branch in one basic block in example + // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. + while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) + && CFGTraits::isUncondBranch(branchInstr)) { + if (DEBUGME) { + errs() << "Removing unconditional branch instruction" ; + branchInstr->dump(); + } + branchInstr->eraseFromParent(); + } +} //removeUnconditionalBranch + +template +void CFGStructurizer::removeRedundantConditionalBranch(BlockT *srcBlk) { + if (srcBlk->succ_size() == 2) { + BlockT *blk1 = *srcBlk->succ_begin(); + BlockT *blk2 = *(++srcBlk->succ_begin()); + + if (blk1 == blk2) { + InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); + assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); + if (DEBUGME) { + errs() << "Removing unneeded conditional branch instruction" ; + branchInstr->dump(); + } + branchInstr->eraseFromParent(); + SHOWNEWBLK(blk1, "Removing redundant successor"); + srcBlk->removeSuccessor(blk1); + } + } +} //removeRedundantConditionalBranch + +template +void CFGStructurizer::addDummyExitBlock(SmallVector &retBlks) { + BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(dummyExitBlk); //insert to function + CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); + + for (typename SmallVector::iterator iter = + retBlks.begin(), + iterEnd = retBlks.end(); iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); + if (curInstr) { + curInstr->eraseFromParent(); + } +#if 0 + if (curBlk->size()==0 && curBlk->pred_size() == 1) { + if (DEBUGME) { + errs() << "Replace empty block BB" << curBlk->getNumber() + << " with dummyExitBlock\n"; + } + BlockT *predb = *curBlk->pred_begin(); + predb->removeSuccessor(curBlk); + curBlk = predb; + } //handle empty curBlk +#endif + curBlk->addSuccessor(dummyExitBlk); + if (DEBUGME) { + errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() + << " successors\n"; + } + } //for + + SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); +} //addDummyExitBlock + +template +void CFGStructurizer::removeSuccessor(BlockT *srcBlk) { + while (srcBlk->succ_size()) { + srcBlk->removeSuccessor(*srcBlk->succ_begin()); + } +} + +template +void CFGStructurizer::recordSccnum(BlockT *srcBlk, int sccNum) { + BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + + if (srcBlkInfo == NULL) { + srcBlkInfo = new BlockInfo(); + } + + srcBlkInfo->sccNum = sccNum; +} + +template +int CFGStructurizer::getSCCNum(BlockT *srcBlk) { + BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; + return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; +} + +template +void CFGStructurizer::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { + if (DEBUGME) { + errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; + } + + BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; + + if (srcBlkInfo == NULL) { + srcBlkInfo = new BlockInfo(); + } + + srcBlkInfo->isRetired = true; + //int i = srcBlk->succ_size(); + //int j = srcBlk->pred_size(); + assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 + && "can't retire block yet"); +} + +template +bool CFGStructurizer::isRetiredBlock(BlockT *srcBlk) { + BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; + return (srcBlkInfo && srcBlkInfo->isRetired); +} + +template +bool CFGStructurizer::isActiveLoophead(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + while (loopRep && loopRep->getHeader() == curBlk) { + LoopLandInfo *loopLand = getLoopLandInfo(loopRep); + + if(loopLand == NULL) + return true; + + BlockT *landBlk = loopLand->landBlk; + assert(landBlk); + if (!isRetiredBlock(landBlk)) { + return true; + } + + loopRep = loopRep->getParentLoop(); + } + + return false; +} //isActiveLoophead + +template +bool CFGStructurizer::needMigrateBlock(BlockT *blk) { + const unsigned blockSizeThreshold = 30; + const unsigned cloneInstrThreshold = 100; + + bool multiplePreds = blk && (blk->pred_size() > 1); + + if(!multiplePreds) + return false; + + unsigned blkSize = blk->size(); + return ((blkSize > blockSizeThreshold) + && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); +} //needMigrateBlock + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, + BlockTSmallerVector &exitBlks, + std::set &exitBlkSet) { + SmallVector inpathBlks; //in exit path blocks + + for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), + predIterEnd = landBlk->pred_end(); + predIter != predIterEnd; ++predIter) { + BlockT *curBlk = *predIter; + if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { + inpathBlks.push_back(curBlk); + } + } //for + + //if landBlk has predecessors that are not in the given loop, + //create a new block + BlockT *newLandBlk = landBlk; + if (inpathBlks.size() != landBlk->pred_size()) { + newLandBlk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(newLandBlk); //insert to function + newLandBlk->addSuccessor(landBlk); + for (typename SmallVector::iterator iter = + inpathBlks.begin(), + iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { + BlockT *curBlk = *iter; + CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); + //srcBlk, oldBlk, newBlk + curBlk->removeSuccessor(landBlk); + curBlk->addSuccessor(newLandBlk); + } + for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { + if (exitBlks[i] == landBlk) { + exitBlks[i] = newLandBlk; + } + } + SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); + } + + setLoopLandBlock(loopRep, newLandBlk); + + return newLandBlk; +} // recordLoopbreakLand + +template +void CFGStructurizer::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + assert(theEntry->landBlk == NULL); + + if (blk == NULL) { + blk = funcRep->CreateMachineBasicBlock(); + funcRep->push_back(blk); //insert to function + SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); + } + + theEntry->landBlk = blk; + + if (DEBUGME) { + errs() << "setLoopLandBlock loop-header = BB" + << loopRep->getHeader()->getNumber() + << " landing-block = BB" << blk->getNumber() << "\n"; + } +} // setLoopLandBlock + +template +void CFGStructurizer::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + + theEntry->breakOnRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopBreakOnReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopBreakOnReg + +template +void CFGStructurizer::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->contOnRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopContOnReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopContOnReg + +template +void CFGStructurizer::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->breakInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopBreakInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopBreakInitReg + +template +void CFGStructurizer::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->contInitRegs.insert(regNum); + + if (DEBUGME) { + errs() << "addLoopContInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopContInitReg + +template +void CFGStructurizer::addLoopEndbranchInitReg(LoopT *loopRep, + RegiT regNum) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + if (theEntry == NULL) { + theEntry = new LoopLandInfo(); + } + theEntry->endbranchInitRegs.insert(regNum); + + if (DEBUGME) + { + errs() << "addLoopEndbranchInitReg loop-header = BB" + << loopRep->getHeader()->getNumber() + << " regNum = " << regNum << "\n"; + } +} // addLoopEndbranchInitReg + +template +typename CFGStructurizer::LoopLandInfo * +CFGStructurizer::getLoopLandInfo(LoopT *loopRep) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + return theEntry; +} // getLoopLandInfo + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::getLoopLandBlock(LoopT *loopRep) { + LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; + + return theEntry ? theEntry->landBlk : NULL; +} // getLoopLandBlock + + +template +bool CFGStructurizer::hasBackEdge(BlockT *curBlk) { + LoopT *loopRep = loopInfo->getLoopFor(curBlk); + if (loopRep == NULL) + return false; + + BlockT *loopHeader = loopRep->getHeader(); + + return curBlk->isSuccessor(loopHeader); + +} //hasBackEdge + +template +unsigned CFGStructurizer::getLoopDepth(LoopT *loopRep) { + return loopRep ? loopRep->getLoopDepth() : 0; +} //getLoopDepth + +template +int CFGStructurizer::countActiveBlock +(typename SmallVector::const_iterator iterStart, + typename SmallVector::const_iterator iterEnd) { + int count = 0; + while (iterStart != iterEnd) { + if (!isRetiredBlock(*iterStart)) { + ++count; + } + ++iterStart; + } + + return count; +} //countActiveBlock + +// This is work around solution for findNearestCommonDominator not avaiable to +// post dom a proper fix should go to Dominators.h. + +template +typename CFGStructurizer::BlockT* +CFGStructurizer::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { + + if (postDomTree->dominates(blk1, blk2)) { + return blk1; + } + if (postDomTree->dominates(blk2, blk1)) { + return blk2; + } + + DomTreeNodeT *node1 = postDomTree->getNode(blk1); + DomTreeNodeT *node2 = postDomTree->getNode(blk2); + + // Handle newly cloned node. + if (node1 == NULL && blk1->succ_size() == 1) { + return findNearestCommonPostDom(*blk1->succ_begin(), blk2); + } + if (node2 == NULL && blk2->succ_size() == 1) { + return findNearestCommonPostDom(blk1, *blk2->succ_begin()); + } + + if (node1 == NULL || node2 == NULL) { + return NULL; + } + + node1 = node1->getIDom(); + while (node1) { + if (postDomTree->dominates(node1, node2)) { + return node1->getBlock(); + } + node1 = node1->getIDom(); + } + + return NULL; +} + +template +typename CFGStructurizer::BlockT * +CFGStructurizer::findNearestCommonPostDom +(typename std::set &blks) { + BlockT *commonDom; + typename std::set::const_iterator iter = blks.begin(); + typename std::set::const_iterator iterEnd = blks.end(); + for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { + BlockT *curBlk = *iter; + if (curBlk != commonDom) { + commonDom = findNearestCommonPostDom(curBlk, commonDom); + } + } + + if (DEBUGME) { + errs() << "Common post dominator for exit blocks is "; + if (commonDom) { + errs() << "BB" << commonDom->getNumber() << "\n"; + } else { + errs() << "NULL\n"; + } + } + + return commonDom; +} //findNearestCommonPostDom + +} //end namespace llvm + +//todo: move-end + + +//===----------------------------------------------------------------------===// +// +// CFGStructurizer for AMDIL +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm +{ +class AMDILCFGStructurizer : public MachineFunctionPass +{ +public: + typedef MachineInstr InstructionType; + typedef MachineFunction FunctionType; + typedef MachineBasicBlock BlockType; + typedef MachineLoopInfo LoopinfoType; + typedef MachineDominatorTree DominatortreeType; + typedef MachinePostDominatorTree PostDominatortreeType; + typedef MachineDomTreeNode DomTreeNodeType; + typedef MachineLoop LoopType; + +protected: + TargetMachine &TM; + const TargetInstrInfo *TII; + const AMDILRegisterInfo *TRI; + +public: + AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + const TargetInstrInfo *getTargetInstrInfo() const; + //bool runOnMachineFunction(MachineFunction &F); + +private: + +}; //end of class AMDILCFGStructurizer + +//char AMDILCFGStructurizer::ID = 0; +} //end of namespace llvm +AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) +: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), + TRI(static_cast(tm.getRegisterInfo()) + ) { +} + +const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const { + return TII; +} +//===----------------------------------------------------------------------===// +// +// CFGPrepare +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm +{ +class AMDILCFGPrepare : public AMDILCFGStructurizer +{ +public: + static char ID; + +public: + AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + + virtual const char *getPassName() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + + bool runOnMachineFunction(MachineFunction &F); + +private: + +}; //end of class AMDILCFGPrepare + +char AMDILCFGPrepare::ID = 0; +} //end of namespace llvm + +AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR) +{ +} +const char *AMDILCFGPrepare::getPassName() const { + return "AMD IL Control Flow Graph Preparation Pass"; +} + +void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +//===----------------------------------------------------------------------===// +// +// CFGPerform +// +//===----------------------------------------------------------------------===// + + +using namespace llvmCFGStruct; + +namespace llvm +{ +class AMDILCFGPerform : public AMDILCFGStructurizer +{ +public: + static char ID; + +public: + AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + virtual const char *getPassName() const; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + bool runOnMachineFunction(MachineFunction &F); + +private: + +}; //end of class AMDILCFGPerform + +char AMDILCFGPerform::ID = 0; +} //end of namespace llvm + + AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) +: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR) +{ +} + +const char *AMDILCFGPerform::getPassName() const { + return "AMD IL Control Flow Graph structurizer Pass"; +} + +void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +//===----------------------------------------------------------------------===// +// +// CFGStructTraits +// +//===----------------------------------------------------------------------===// + +namespace llvmCFGStruct +{ +// this class is tailor to the AMDIL backend +template<> +struct CFGStructTraits +{ + typedef int RegiT; + + static int getBreakNzeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getBreakZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getBranchNzeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getBranchZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getContinueNzeroOpcode(int oldOpcode) + { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + + static int getContinueZeroOpcode(int oldOpcode) { + switch(oldOpcode) { + ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ); + default: + assert(0 && "internal error"); + }; + return -1; + } + +// the explicitly represented branch target is the true branch target +#define getExplicitBranch getTrueBranch +#define setExplicitBranch setTrueBranch + + static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { + return instr->getOperand(0).getMBB(); + } + + static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { + instr->getOperand(0).setMBB(blk); + } + + static MachineBasicBlock * + getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { + assert(blk->succ_size() == 2); + MachineBasicBlock *trueBranch = getTrueBranch(instr); + MachineBasicBlock::succ_iterator iter = blk->succ_begin(); + MachineBasicBlock::succ_iterator iterNext = iter; + ++iterNext; + + return (*iter == trueBranch) ? *iterNext : *iter; + } + + static bool isCondBranch(MachineInstr *instr) { + switch (instr->getOpcode()) { + ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); + break; + default: + return false; + } + return true; + } + + static bool isUncondBranch(MachineInstr *instr) { + switch (instr->getOpcode()) { + case AMDGPU::BRANCH: + break; + default: + return false; + } + return true; + } + + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { + //get DebugLoc from the first MachineBasicBlock instruction with debug info + DebugLoc DL; + for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { + MachineInstr *instr = &(*iter); + if (instr->getDebugLoc().isUnknown() == false) { + DL = instr->getDebugLoc(); + } + } + return DL; + } + + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + MachineInstr *instr = &*iter; + if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { + return instr; + } + return NULL; + } + + // The correct naming for this is getPossibleLoopendBlockBranchInstr. + // + // BB with backward-edge could have move instructions after the branch + // instruction. Such move instruction "belong to" the loop backward-edge. + // + static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { + const AMDILInstrInfo * TII = static_cast( + blk->getParent()->getTarget().getInstrInfo()); + + for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), + iterEnd = blk->rend(); iter != iterEnd; ++iter) { + // FIXME: Simplify + MachineInstr *instr = &*iter; + if (instr) { + if (isCondBranch(instr) || isUncondBranch(instr)) { + return instr; + } else if (!TII->isMov(instr->getOpcode())) { + break; + } + } + } + return NULL; + } + + static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + if (iter != blk->rend()) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDGPU::RETURN) { + return instr; + } + } + return NULL; + } + + static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { + MachineBasicBlock::reverse_iterator iter = blk->rbegin(); + if (iter != blk->rend()) { + MachineInstr *instr = &(*iter); + if (instr->getOpcode() == AMDGPU::CONTINUE) { + return instr; + } + } + return NULL; + } + + static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { + for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { + MachineInstr *instr = &(*iter); + if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) { + return instr; + } + } + return NULL; + } + + static bool isReturnBlock(MachineBasicBlock *blk) { + MachineInstr *instr = getReturnInstr(blk); + bool isReturn = (blk->succ_size() == 0); + if (instr) { + assert(isReturn); + } else if (isReturn) { + if (DEBUGME) { + errs() << "BB" << blk->getNumber() + <<" is return block without RETURN instr\n"; + } + } + + return isReturn; + } + + static MachineBasicBlock::iterator + getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { + assert(instr->getParent() == blk && "instruction doesn't belong to block"); + MachineBasicBlock::iterator iter = blk->begin(); + MachineBasicBlock::iterator iterEnd = blk->end(); + while (&(*iter) != instr && iter != iterEnd) { + ++iter; + } + + assert(iter != iterEnd); + return iter; + }//getInstrPos + + static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep) { + return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); + } //insertInstrBefore + + static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep, DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); + + MachineBasicBlock::iterator res; + if (blk->begin() != blk->end()) { + blk->insert(blk->begin(), newInstr); + } else { + blk->push_back(newInstr); + } + + SHOWNEWINSTR(newInstr); + + return newInstr; + } //insertInstrBefore + + static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep) { + insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); + } //insertInstrEnd + + static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, + AMDILCFGStructurizer *passRep, DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = blk->getParent() + ->CreateMachineInstr(tii->get(newOpcode), DL); + + blk->push_back(newInstr); + //assume the instruction doesn't take any reg operand ... + + SHOWNEWINSTR(newInstr); + } //insertInstrEnd + + static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, + int newOpcode, + AMDILCFGStructurizer *passRep) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), + DebugLoc()); + + blk->insert(instrPos, newInstr); + //assume the instruction doesn't take any reg operand ... + + SHOWNEWINSTR(newInstr); + return newInstr; + } //insertInstrBefore + + static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, + int newOpcode, + AMDILCFGStructurizer *passRep, + DebugLoc DL) { + MachineInstr *oldInstr = &(*instrPos); + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), + DL); + + blk->insert(instrPos, newInstr); + MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), + false); + + SHOWNEWINSTR(newInstr); + //erase later oldInstr->eraseFromParent(); + } //insertCondBranchBefore + + static void insertCondBranchBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator insertPos, + int newOpcode, + AMDILCFGStructurizer *passRep, + RegiT regNum, + DebugLoc DL) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); + + //insert before + blk->insert(insertPos, newInstr); + MachineInstrBuilder(newInstr).addReg(regNum, false); + + SHOWNEWINSTR(newInstr); + } //insertCondBranchBefore + + static void insertCondBranchEnd(MachineBasicBlock *blk, + int newOpcode, + AMDILCFGStructurizer *passRep, + RegiT regNum) { + const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); + + blk->push_back(newInstr); + MachineInstrBuilder(newInstr).addReg(regNum, false); + + SHOWNEWINSTR(newInstr); + } //insertCondBranchEnd + + + static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, + AMDILCFGStructurizer *passRep, + RegiT regNum, int regVal) { + MachineInstr *oldInstr = &(*instrPos); + const AMDILInstrInfo *tii = + static_cast(passRep->getTargetInstrInfo()); + MachineBasicBlock *blk = oldInstr->getParent(); + MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, + regVal); + blk->insert(instrPos, newInstr); + + SHOWNEWINSTR(newInstr); + } //insertAssignInstrBefore + + static void insertAssignInstrBefore(MachineBasicBlock *blk, + AMDILCFGStructurizer *passRep, + RegiT regNum, int regVal) { + const AMDILInstrInfo *tii = + static_cast(passRep->getTargetInstrInfo()); + + MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, + regVal); + if (blk->begin() != blk->end()) { + blk->insert(blk->begin(), newInstr); + } else { + blk->push_back(newInstr); + } + + SHOWNEWINSTR(newInstr); + + } //insertInstrBefore + + static void insertCompareInstrBefore(MachineBasicBlock *blk, + MachineBasicBlock::iterator instrPos, + AMDILCFGStructurizer *passRep, + RegiT dstReg, RegiT src1Reg, + RegiT src2Reg) { + const AMDILInstrInfo *tii = + static_cast(passRep->getTargetInstrInfo()); + MachineInstr *newInstr = + blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); + + MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target + MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value + MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value + + blk->insert(instrPos, newInstr); + SHOWNEWINSTR(newInstr); + + } //insertCompareInstrBefore + + static void cloneSuccessorList(MachineBasicBlock *dstBlk, + MachineBasicBlock *srcBlk) { + for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), + iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { + dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of + } + } //cloneSuccessorList + + static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { + MachineFunction *func = srcBlk->getParent(); + MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); + func->push_back(newBlk); //insert to function + //newBlk->setNumber(srcBlk->getNumber()); + for (MachineBasicBlock::iterator iter = srcBlk->begin(), + iterEnd = srcBlk->end(); + iter != iterEnd; ++iter) { + MachineInstr *instr = func->CloneMachineInstr(iter); + newBlk->push_back(instr); + } + return newBlk; + } + + //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because + //the AMDIL instruction is not recognized as terminator fix this and retire + //this routine + static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, + MachineBasicBlock *oldBlk, + MachineBasicBlock *newBlk) { + MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); + if (branchInstr && isCondBranch(branchInstr) && + getExplicitBranch(branchInstr) == oldBlk) { + setExplicitBranch(branchInstr, newBlk); + } + } + + static void wrapup(MachineBasicBlock *entryBlk) { + assert((!entryBlk->getParent()->getJumpTableInfo() + || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) + && "found a jump table"); + + //collect continue right before endloop + SmallVector contInstr; + MachineBasicBlock::iterator pre = entryBlk->begin(); + MachineBasicBlock::iterator iterEnd = entryBlk->end(); + MachineBasicBlock::iterator iter = pre; + while (iter != iterEnd) { + if (pre->getOpcode() == AMDGPU::CONTINUE + && iter->getOpcode() == AMDGPU::ENDLOOP) { + contInstr.push_back(pre); + } + pre = iter; + ++iter; + } //end while + + //delete continue right before endloop + for (unsigned i = 0; i < contInstr.size(); ++i) { + contInstr[i]->eraseFromParent(); + } + + // TODO to fix up jump table so later phase won't be confused. if + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but + // there isn't such an interface yet. alternatively, replace all the other + // blocks in the jump table with the entryBlk //} + + } //wrapup + + static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) { + return &pass.getAnalysis(); + } + + static MachinePostDominatorTree* + getPostDominatorTree(AMDILCFGStructurizer &pass) { + return &pass.getAnalysis(); + } + + static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) { + return &pass.getAnalysis(); + } +}; // template class CFGStructTraits +} //end of namespace llvm + +// createAMDILCFGPreparationPass- Returns a pass +FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) { + return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR); +} + +bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) { + return llvmCFGStruct::CFGStructurizer().prepare(func, + *this, + TRI); +} + +// createAMDILCFGStructurizerPass- Returns a pass +FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm + AMDIL_OPT_LEVEL_DECL) { + return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR); +} + +bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) { + return llvmCFGStruct::CFGStructurizer().run(func, + *this, + TRI); +} + +//end of file newline goes below + diff --git a/lib/Target/AMDGPU/AMDILCallingConv.td b/lib/Target/AMDGPU/AMDILCallingConv.td new file mode 100644 index 0000000..371d02a --- /dev/null +++ b/lib/Target/AMDGPU/AMDILCallingConv.td @@ -0,0 +1,42 @@ +//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This describes the calling conventions for the AMDIL architectures. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Return Value Calling Conventions +//===----------------------------------------------------------------------===// + +// AMDIL 32-bit C return-value convention. +def RetCC_AMDIL32 : CallingConv<[ + // Since IL has no return values, all values can be emulated on the stack + // The stack can then be mapped to a number of sequential virtual registers + // in IL + + // Integer and FP scalar values get put on the stack at 16-byte alignment + // but with a size of 4 bytes + CCIfType<[i32, f32], CCAssignToReg< + [ + R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20 +]> >, CCAssignToStack<16, 16>]>; + +// AMDIL 32-bit C Calling convention. +def CC_AMDIL32 : CallingConv<[ + // Since IL has parameter values, all values can be emulated on the stack + // The stack can then be mapped to a number of sequential virtual registers + // in IL + // Integer and FP scalar values get put on the stack at 16-byte alignment + // but with a size of 4 bytes + // Integer and FP scalar values get put on the stack at 16-byte alignment + // but with a size of 4 bytes + CCIfType<[i32, f32], CCAssignToReg< +[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20 +]> >, CCAssignToStack<16, 16>]>; diff --git a/lib/Target/AMDGPU/AMDILCodeEmitter.h b/lib/Target/AMDGPU/AMDILCodeEmitter.h new file mode 100644 index 0000000..0c7ae59 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILCodeEmitter.h @@ -0,0 +1,48 @@ +//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// CodeEmitter interface for R600 and SI codegen. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILCODEEMITTER_H +#define AMDILCODEEMITTER_H + +namespace llvm { + + class AMDILCodeEmitter { + public: + uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const { return 0; } + virtual unsigned GPR4AlignEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual unsigned GPR2AlignEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual uint64_t VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const { + return Value; + } + virtual uint64_t i32LiteralEncode(const MachineInstr &MI, + unsigned OpNo) const { + return 0; + } + virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) + const { + return 0; + } + }; + +} // End namespace llvm + +#endif // AMDILCODEEMITTER_H diff --git a/lib/Target/AMDGPU/AMDILDevice.cpp b/lib/Target/AMDGPU/AMDILDevice.cpp new file mode 100644 index 0000000..4294a8b --- /dev/null +++ b/lib/Target/AMDGPU/AMDILDevice.cpp @@ -0,0 +1,137 @@ +//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILDevice.h" +#include "AMDILSubtarget.h" + +using namespace llvm; +// Default implementation for all of the classes. +AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST) +{ + mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities); + mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities); + setCaps(); + mDeviceFlag = OCL_DEVICE_ALL; +} + +AMDILDevice::~AMDILDevice() +{ + mHWBits.clear(); + mSWBits.clear(); +} + +size_t AMDILDevice::getMaxGDSSize() const +{ + return 0; +} + +uint32_t +AMDILDevice::getDeviceFlag() const +{ + return mDeviceFlag; +} + +size_t AMDILDevice::getMaxNumCBs() const +{ + if (usesHardware(AMDILDeviceInfo::ConstantMem)) { + return HW_MAX_NUM_CB; + } + + return 0; +} + +size_t AMDILDevice::getMaxCBSize() const +{ + if (usesHardware(AMDILDeviceInfo::ConstantMem)) { + return MAX_CB_SIZE; + } + + return 0; +} + +size_t AMDILDevice::getMaxScratchSize() const +{ + return 65536; +} + +uint32_t AMDILDevice::getStackAlignment() const +{ + return 16; +} + +void AMDILDevice::setCaps() +{ + mSWBits.set(AMDILDeviceInfo::HalfOps); + mSWBits.set(AMDILDeviceInfo::ByteOps); + mSWBits.set(AMDILDeviceInfo::ShortOps); + mSWBits.set(AMDILDeviceInfo::HW64BitDivMod); + if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) { + mSWBits.set(AMDILDeviceInfo::NoInline); + } + if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) { + mSWBits.set(AMDILDeviceInfo::MacroDB); + } + if (mSTM->isOverride(AMDILDeviceInfo::Debug)) { + mSWBits.set(AMDILDeviceInfo::ConstantMem); + } else { + mHWBits.set(AMDILDeviceInfo::ConstantMem); + } + if (mSTM->isOverride(AMDILDeviceInfo::Debug)) { + mSWBits.set(AMDILDeviceInfo::PrivateMem); + } else { + mHWBits.set(AMDILDeviceInfo::PrivateMem); + } + if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) { + mSWBits.set(AMDILDeviceInfo::BarrierDetect); + } + mSWBits.set(AMDILDeviceInfo::ByteLDSOps); + mSWBits.set(AMDILDeviceInfo::LongOps); +} + +AMDILDeviceInfo::ExecutionMode +AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const +{ + if (mHWBits[Caps]) { + assert(!mSWBits[Caps] && "Cannot set both SW and HW caps"); + return AMDILDeviceInfo::Hardware; + } + + if (mSWBits[Caps]) { + assert(!mHWBits[Caps] && "Cannot set both SW and HW caps"); + return AMDILDeviceInfo::Software; + } + + return AMDILDeviceInfo::Unsupported; + +} + +bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const +{ + return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported; +} + +bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const +{ + return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware; +} + +bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const +{ + return getExecutionMode(Mode) == AMDILDeviceInfo::Software; +} + +std::string +AMDILDevice::getDataLayout() const +{ + return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/lib/Target/AMDGPU/AMDILDevice.h b/lib/Target/AMDGPU/AMDILDevice.h new file mode 100644 index 0000000..706dd82 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILDevice.h @@ -0,0 +1,116 @@ +//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===----------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef _AMDILDEVICEIMPL_H_ +#define _AMDILDEVICEIMPL_H_ +#include "AMDIL.h" +#include "llvm/ADT/BitVector.h" + +namespace llvm { + class AMDILSubtarget; + class MCStreamer; +//===----------------------------------------------------------------------===// +// Interface for data that is specific to a single device +//===----------------------------------------------------------------------===// +class AMDILDevice { +public: + AMDILDevice(AMDILSubtarget *ST); + virtual ~AMDILDevice(); + + // Enum values for the various memory types. + enum { + RAW_UAV_ID = 0, + ARENA_UAV_ID = 1, + LDS_ID = 2, + GDS_ID = 3, + SCRATCH_ID = 4, + CONSTANT_ID = 5, + GLOBAL_ID = 6, + MAX_IDS = 7 + } IO_TYPE_IDS; + + // Returns the max LDS size that the hardware supports. Size is in + // bytes. + virtual size_t getMaxLDSSize() const = 0; + + // Returns the max GDS size that the hardware supports if the GDS is + // supported by the hardware. Size is in bytes. + virtual size_t getMaxGDSSize() const; + + // Returns the max number of hardware constant address spaces that + // are supported by this device. + virtual size_t getMaxNumCBs() const; + + // Returns the max number of bytes a single hardware constant buffer + // can support. Size is in bytes. + virtual size_t getMaxCBSize() const; + + // Returns the max number of bytes allowed by the hardware scratch + // buffer. Size is in bytes. + virtual size_t getMaxScratchSize() const; + + // Get the flag that corresponds to the device. + virtual uint32_t getDeviceFlag() const; + + // Returns the number of work-items that exist in a single hardware + // wavefront. + virtual size_t getWavefrontSize() const = 0; + + // Get the generational name of this specific device. + virtual uint32_t getGeneration() const = 0; + + // Get the stack alignment of this specific device. + virtual uint32_t getStackAlignment() const; + + // Get the resource ID for this specific device. + virtual uint32_t getResourceID(uint32_t DeviceID) const = 0; + + // Get the max number of UAV's for this device. + virtual uint32_t getMaxNumUAVs() const = 0; + + + // API utilizing more detailed capabilities of each family of + // cards. If a capability is supported, then either usesHardware or + // usesSoftware returned true. If usesHardware returned true, then + // usesSoftware must return false for the same capability. Hardware + // execution means that the feature is done natively by the hardware + // and is not emulated by the softare. Software execution means + // that the feature could be done in the hardware, but there is + // software that emulates it with possibly using the hardware for + // support since the hardware does not fully comply with OpenCL + // specs. + bool isSupported(AMDILDeviceInfo::Caps Mode) const; + bool usesHardware(AMDILDeviceInfo::Caps Mode) const; + bool usesSoftware(AMDILDeviceInfo::Caps Mode) const; + virtual std::string getDataLayout() const; + static const unsigned int MAX_LDS_SIZE_700 = 16384; + static const unsigned int MAX_LDS_SIZE_800 = 32768; + static const unsigned int WavefrontSize = 64; + static const unsigned int HalfWavefrontSize = 32; + static const unsigned int QuarterWavefrontSize = 16; +protected: + virtual void setCaps(); + llvm::BitVector mHWBits; + llvm::BitVector mSWBits; + AMDILSubtarget *mSTM; + uint32_t mDeviceFlag; +private: + AMDILDeviceInfo::ExecutionMode + getExecutionMode(AMDILDeviceInfo::Caps Caps) const; +}; // AMDILDevice + +} // namespace llvm +#endif // _AMDILDEVICEIMPL_H_ diff --git a/lib/Target/AMDGPU/AMDILDeviceInfo.cpp b/lib/Target/AMDGPU/AMDILDeviceInfo.cpp new file mode 100644 index 0000000..cbf5b51 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILDeviceInfo.cpp @@ -0,0 +1,93 @@ +//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Function that creates DeviceInfo from a device name and other information. +// +//==-----------------------------------------------------------------------===// +#include "AMDILDevices.h" +#include "AMDILSubtarget.h" + +using namespace llvm; +namespace llvm { +namespace AMDILDeviceInfo { + AMDILDevice* +getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit) +{ + if (deviceName.c_str()[2] == '7') { + switch (deviceName.c_str()[3]) { + case '1': + return new AMDIL710Device(ptr); + case '7': + return new AMDIL770Device(ptr); + default: + return new AMDIL7XXDevice(ptr); + }; + } else if (deviceName == "cypress") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILCypressDevice(ptr); + } else if (deviceName == "juniper") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILEvergreenDevice(ptr); + } else if (deviceName == "redwood") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILRedwoodDevice(ptr); + } else if (deviceName == "cedar") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILCedarDevice(ptr); + } else if (deviceName == "barts" + || deviceName == "turks") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILNIDevice(ptr); + } else if (deviceName == "cayman") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILCaymanDevice(ptr); + } else if (deviceName == "caicos") { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDILNIDevice(ptr); + } else if (deviceName == "SI") { + return new AMDILSIDevice(ptr); + } else { +#if DEBUG + assert(!is64bit && "This device does not support 64bit pointers!"); + assert(!is64on32bit && "This device does not support 64bit" + " on 32bit pointers!"); +#endif + return new AMDIL7XXDevice(ptr); + } +} +} // End namespace AMDILDeviceInfo +} // End namespace llvm diff --git a/lib/Target/AMDGPU/AMDILDeviceInfo.h b/lib/Target/AMDGPU/AMDILDeviceInfo.h new file mode 100644 index 0000000..06ac432 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILDeviceInfo.h @@ -0,0 +1,89 @@ +//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#ifndef _AMDILDEVICEINFO_H_ +#define _AMDILDEVICEINFO_H_ + + +#include + +namespace llvm +{ + class AMDILDevice; + class AMDILSubtarget; + namespace AMDILDeviceInfo + { + // Each Capabilities can be executed using a hardware instruction, + // emulated with a sequence of software instructions, or not + // supported at all. + enum ExecutionMode { + Unsupported = 0, // Unsupported feature on the card(Default value) + Software, // This is the execution mode that is set if the + // feature is emulated in software + Hardware // This execution mode is set if the feature exists + // natively in hardware + }; + + // Any changes to this needs to have a corresponding update to the + // twiki page GPUMetadataABI + enum Caps { + HalfOps = 0x1, // Half float is supported or not. + DoubleOps = 0x2, // Double is supported or not. + ByteOps = 0x3, // Byte(char) is support or not. + ShortOps = 0x4, // Short is supported or not. + LongOps = 0x5, // Long is supported or not. + Images = 0x6, // Images are supported or not. + ByteStores = 0x7, // ByteStores available(!HD4XXX). + ConstantMem = 0x8, // Constant/CB memory. + LocalMem = 0x9, // Local/LDS memory. + PrivateMem = 0xA, // Scratch/Private/Stack memory. + RegionMem = 0xB, // OCL GDS Memory Extension. + FMA = 0xC, // Use HW FMA or SW FMA. + ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023. + MultiUAV = 0xE, // Use for UAV per Pointer 0-7. + Reserved0 = 0xF, // ReservedFlag + NoAlias = 0x10, // Cached loads. + Signed24BitOps = 0x11, // Peephole Optimization. + // Debug mode implies that no hardware features or optimizations + // are performned and that all memory access go through a single + // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX). + Debug = 0x12, // Debug mode is enabled. + CachedMem = 0x13, // Cached mem is available or not. + BarrierDetect = 0x14, // Detect duplicate barriers. + Reserved1 = 0x15, // Reserved flag + ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available. + ArenaVectors = 0x17, // Flag to specify if vector loads from arena work. + TmrReg = 0x18, // Flag to specify if Tmr register is supported. + NoInline = 0x19, // Flag to specify that no inlining should occur. + MacroDB = 0x1A, // Flag to specify that backend handles macrodb. + HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod. + ArenaUAV = 0x1C, // Flag to specify that arena uav is supported. + PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's. + // If more capabilities are required, then + // this number needs to be increased. + // All capabilities must come before this + // number. + MaxNumberCapabilities = 0x20 + }; + // These have to be in order with the older generations + // having the lower number enumerations. + enum Generation { + HD4XXX = 0, // 7XX based devices. + HD5XXX, // Evergreen based devices. + HD6XXX, // NI/Evergreen+ based devices. + HD7XXX, + HDTEST, // Experimental feature testing device. + HDNUMGEN + }; + + + AMDILDevice* + getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false); + } // namespace AMDILDeviceInfo +} // namespace llvm +#endif // _AMDILDEVICEINFO_H_ diff --git a/lib/Target/AMDGPU/AMDILDevices.h b/lib/Target/AMDGPU/AMDILDevices.h new file mode 100644 index 0000000..cfcc330 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILDevices.h @@ -0,0 +1,19 @@ +//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#ifndef __AMDIL_DEVICES_H_ +#define __AMDIL_DEVICES_H_ +// Include all of the device specific header files +// This file is for Internal use only! +#include "AMDIL7XXDevice.h" +#include "AMDILDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" +#include "AMDILSIDevice.h" + +#endif // _AMDIL_DEVICES_H_ diff --git a/lib/Target/AMDGPU/AMDILEnumeratedTypes.td b/lib/Target/AMDGPU/AMDILEnumeratedTypes.td new file mode 100644 index 0000000..f10936b --- /dev/null +++ b/lib/Target/AMDGPU/AMDILEnumeratedTypes.td @@ -0,0 +1,522 @@ +//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// ILEnumreatedTypes.td - The IL Enumerated Types +//===--------------------------------------------------------------------===// + +// Section 5.1 IL Shader +class ILShader val> { + bits<8> Value = val; +} +// Table 5-1 +def IL_SHADER_PIXEL : ILShader<0>; +def IL_SHADER_COMPUTE : ILShader<1>; + +// Section 5.2 IL RegType +class ILRegType val> { + bits<6> Value = val; +} +// Table 5-2 +def IL_REGTYPE_TEMP : ILRegType<0>; +def IL_REGTYPE_WINCOORD : ILRegType<1>; +def IL_REGTYPE_CONST_BUF : ILRegType<2>; +def IL_REGTYPE_LITERAL : ILRegType<3>; +def IL_REGTYPE_ITEMP : ILRegType<4>; +def IL_REGTYPE_GLOBAL : ILRegType<5>; + +// Section 5.3 IL Component Select +class ILComponentSelect val, string text> { + bits<3> Value = val; + string Text = text; +} +// Table 5-3 +def IL_COMPSEL_X : ILComponentSelect<0, "x">; +def IL_COMPSEL_Y : ILComponentSelect<1, "y">; +def IL_COMPSEL_Z : ILComponentSelect<2, "z">; +def IL_COMPSEL_W : ILComponentSelect<3, "w">; +def IL_COMPSEL_0 : ILComponentSelect<4, "0">; +def IL_COMPSEL_1 : ILComponentSelect<5, "1">; + +// Section 5.4 IL Mod Dst Comp +class ILModDstComp val, string text> { + bits<2> Value = val; + string Text = text; +} +// Table 5-4 +def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">; +def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">; +def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">; +def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">; +def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">; +def IL_MODCOMP_0 : ILModDstComp<2, "0">; +def IL_MODCOMP_1 : ILModDstComp<3, "1">; + +// Section 5.5 IL Import Usage +class ILImportUsage val, string usage> { + bits<1> Value = val; + string Text = usage; +} +// Table 5-5 +def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">; + +// Section 5.6 Il Shift Scale +class ILShiftScale val, string scale> { + bits<4> Value = val; + string Text = scale; +} + +// Table 5-6 +def IL_SHIFT_NONE : ILShiftScale<0, "">; +def IL_SHIFT_X2 : ILShiftScale<1, "_x2">; +def IL_SHIFT_X4 : ILShiftScale<2, "_x4">; +def IL_SHIFT_X8 : ILShiftScale<3, "_x8">; +def IL_SHIFT_D2 : ILShiftScale<4, "_d2">; +def IL_SHIFT_D4 : ILShiftScale<5, "_d4">; +def IL_SHIFT_D8 : ILShiftScale<6, "_d8">; + +// Section 5.7 IL Divide Component +class ILDivComp val, string divcomp> { + bits<3> Value = val; + string Text = divcomp; +} + +// Table 5-7 +def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">; +def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">; +def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">; +def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">; +//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">; + +// Section 5.8 IL Relational Op +class ILRelOp val, string op> { + bits<3> Value = val; + string Text = op; +} + +// Table 5-8 +def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">; +def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">; +def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">; +def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">; +def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">; +def IL_RELOP_LE : ILRelOp<5, "_relop(le)">; + +// Section 5.9 IL Zero Op +class ILZeroOp val, string behavior> { + bits<3> Value = val; + string Text = behavior; +} + +// Table 5-9 +def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">; +def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">; +def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">; +def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">; + +// Section 5.10 IL Cmp Value +class ILCmpValue val, string num> { + bits<3> Value = val; + string Text = num; +} + +// Table 5-10 +def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">; +def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">; +def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">; +def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">; +def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">; + +// Section 5.11 IL Addressing +class ILAddressing val> { + bits<3> Value = val; +} + +// Table 5-11 +def IL_ADDR_ABSOLUTE : ILAddressing<0>; +def IL_ADDR_RELATIVE : ILAddressing<1>; +def IL_ADDR_REG_RELATIVE : ILAddressing<2>; + +// Section 5.11 IL Element Format +class ILElementFormat val> { + bits<5> Value = val; +} + +// Table 5-11 +def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>; +def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>; +def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>; +def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>; +def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>; +def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>; +def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>; +def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>; +def IL_ELEMENTFORMAT_Last : ILElementFormat<8>; + +// Section 5.12 IL Op Code +class ILOpCode val = -1, string cmd> { + bits<16> Value = val; + string Text = cmd; +} + +// Table 5-12 +def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">; +def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">; +def IL_DCL_INPUT : ILOpCode<2, "dcl_input">; +def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">; +def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">; +def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">; +def IL_OP_ABS : ILOpCode<6, "abs">; +def IL_OP_ADD : ILOpCode<7, "add">; +def IL_OP_AND : ILOpCode<8, "iand">; +def IL_OP_BREAK : ILOpCode<9, "break">; +def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">; +def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">; +def IL_OP_BREAKC : ILOpCode<12, "breakc">; +def IL_OP_CALL : ILOpCode<13, "call">; +def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">; +def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">; +def IL_OP_CASE : ILOpCode<16, "case">; +def IL_OP_CLG : ILOpCode<17, "clg">; +def IL_OP_CMOV : ILOpCode<18, "cmov">; +def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">; +def IL_OP_CMP : ILOpCode<20, "cmp">; +def IL_OP_CONTINUE : ILOpCode<21, "continue">; +def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">; +def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">; +def IL_OP_CONTINUEC : ILOpCode<24, "continuec">; +def IL_OP_COS : ILOpCode<25, "cos">; +def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">; +def IL_OP_D_2_F : ILOpCode<27, "d2f">; +def IL_OP_D_ADD : ILOpCode<28, "dadd">; +def IL_OP_D_EQ : ILOpCode<29, "deq">; +def IL_OP_D_FRC : ILOpCode<30, "dfrac">; +def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">; +def IL_OP_D_GE : ILOpCode<32, "dge">; +def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">; +def IL_OP_D_LT : ILOpCode<34, "dlt">; +def IL_OP_D_MAD : ILOpCode<35, "dmad">; +def IL_OP_D_MUL : ILOpCode<36, "dmul">; +def IL_OP_D_NE : ILOpCode<37, "dne">; +def IL_OP_DEFAULT : ILOpCode<38, "default">; +def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">; +def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">; +def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">; +def IL_OP_DP2 : ILOpCode<42, "dp2">; +def IL_OP_DP3 : ILOpCode<43, "dp3">; +def IL_OP_DP4 : ILOpCode<44, "dp4">; +def IL_OP_ELSE : ILOpCode<45, "else">; +def IL_OP_END : ILOpCode<46, "end">; +def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">; +def IL_OP_ENDIF : ILOpCode<48, "endif">; +def IL_OP_ENDLOOP : ILOpCode<49, "endloop">; +def IL_OP_ENDMAIN : ILOpCode<50, "endmain">; +def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">; +def IL_OP_EQ : ILOpCode<52, "eq">; +def IL_OP_EXP : ILOpCode<53, "exp">; +def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">; +def IL_OP_F_2_D : ILOpCode<55, "f2d">; +def IL_OP_FLR : ILOpCode<56, "flr">; +def IL_OP_FRC : ILOpCode<57, "frc">; +def IL_OP_FTOI : ILOpCode<58, "ftoi">; +def IL_OP_FTOU : ILOpCode<59, "ftou">; +def IL_OP_FUNC : ILOpCode<60, "func">; +def IL_OP_GE : ILOpCode<61, "ge">; +def IL_OP_I_ADD : ILOpCode<62, "iadd">; +def IL_OP_I_EQ : ILOpCode<63, "ieq">; +def IL_OP_I_GE : ILOpCode<64, "ige">; +def IL_OP_I_LT : ILOpCode<65, "ilt">; +def IL_OP_I_MAD : ILOpCode<66, "imad">; +def IL_OP_I_MAX : ILOpCode<67, "imax">; +def IL_OP_I_MIN : ILOpCode<68, "imin">; +def IL_OP_I_MUL : ILOpCode<69, "imul">; +def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">; +def IL_OP_I_NE : ILOpCode<71, "ine">; +def IL_OP_I_NEGATE : ILOpCode<72, "inegate">; +def IL_OP_I_NOT : ILOpCode<73, "inot">; +def IL_OP_I_OR : ILOpCode<74, "ior">; +def IL_OP_I_SHL : ILOpCode<75, "ishl">; +def IL_OP_I_SHR : ILOpCode<76, "ishr">; +def IL_OP_I_XOR : ILOpCode<77, "ixor">; +def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">; +def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">; +def IL_OP_IFC : ILOpCode<80, "ifc">; +def IL_OP_ITOF : ILOpCode<81, "itof">; +def IL_OP_LN : ILOpCode<82, "ln">; +def IL_OP_LOG : ILOpCode<83, "log">; +def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">; +def IL_OP_LOOP : ILOpCode<85, "loop">; +def IL_OP_LT : ILOpCode<86, "lt">; +def IL_OP_MAD : ILOpCode<87, "mad_ieee">; +def IL_OP_MAX : ILOpCode<88, "max_ieee">; +def IL_OP_MIN : ILOpCode<89, "min_ieee">; +def IL_OP_MOD : ILOpCode<90, "mod_ieee">; +def IL_OP_MOV : ILOpCode<91, "mov">; +def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">; +def IL_OP_NE : ILOpCode<93, "ne">; +def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">; +def IL_OP_POW : ILOpCode<95, "pow">; +def IL_OP_RCP : ILOpCode<96, "rcp">; +def IL_OP_RET : ILOpCode<97, "ret">; +def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">; +def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">; +def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">; +def IL_OP_RND : ILOpCode<101, "rnd">; +def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">; +def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">; +def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">; +def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">; +def IL_OP_RSQ : ILOpCode<106, "rsq">; +def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">; +def IL_OP_SAMPLE : ILOpCode<108, "sample">; +def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">; +def IL_OP_SET : ILOpCode<110, "set">; +def IL_OP_SGN : ILOpCode<111, "sgn">; +def IL_OP_SIN : ILOpCode<112, "sin">; +def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">; +def IL_OP_SUB : ILOpCode<114, "sub">; +def IL_OP_SWITCH : ILOpCode<115, "switch">; +def IL_OP_TRC : ILOpCode<116, "trc">; +def IL_OP_U_DIV : ILOpCode<117, "udiv">; +def IL_OP_U_GE : ILOpCode<118, "uge">; +def IL_OP_U_LT : ILOpCode<119, "ult">; +def IL_OP_U_MAD : ILOpCode<120, "umad">; +def IL_OP_U_MAX : ILOpCode<121, "umax">; +def IL_OP_U_MIN : ILOpCode<122, "umin">; +def IL_OP_U_MOD : ILOpCode<123, "umod">; +def IL_OP_U_MUL : ILOpCode<124, "umul">; +def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">; +def IL_OP_U_SHR : ILOpCode<126, "ushr">; +def IL_OP_UTOF : ILOpCode<127, "utof">; +def IL_OP_WHILE : ILOpCode<128, "whileloop">; +// SC IL instructions that are not in CAL IL +def IL_OP_ACOS : ILOpCode<129, "acos">; +def IL_OP_ASIN : ILOpCode<130, "asin">; +def IL_OP_EXN : ILOpCode<131, "exn">; +def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">; +def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">; +def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">; +def IL_OP_SQRT : ILOpCode<135, "sqrt">; +def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">; +def IL_OP_ATAN : ILOpCode<137, "atan">; +def IL_OP_TAN : ILOpCode<137, "tan">; +def IL_OP_D_DIV : ILOpCode<138, "ddiv">; +def IL_OP_F_NEG : ILOpCode<139, "mov">; +def IL_OP_GT : ILOpCode<140, "gt">; +def IL_OP_LE : ILOpCode<141, "lt">; +def IL_OP_DIST : ILOpCode<142, "dist">; +def IL_OP_LEN : ILOpCode<143, "len">; +def IL_OP_MACRO : ILOpCode<144, "mcall">; +def IL_OP_INTR : ILOpCode<145, "call">; +def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">; +def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">; +def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">; +def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">; +def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">; +def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">; +def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">; +def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">; +def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">; +def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">; +def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">; +def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">; +def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">; +def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">; +def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">; +def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">; +def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">; +def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">; +def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">; +def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">; +def IL_OP_SAD : ILOpCode<166, "sad">; +def IL_OP_SAD_HI : ILOpCode<167, "sadhi">; +def IL_OP_SAD4 : ILOpCode<168, "sad4">; +def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">; +def IL_OP_I_CARRY : ILOpCode<170, "icarry">; +def IL_OP_I_BORROW : ILOpCode<171, "iborrow">; +def IL_OP_U_MAD24 : ILOpCode<172, "umad24">; +def IL_OP_U_MUL24 : ILOpCode<173, "umul24">; +def IL_OP_I_MAD24 : ILOpCode<174, "imad24">; +def IL_OP_I_MUL24 : ILOpCode<175, "imul24">; +def IL_OP_CLAMP : ILOpCode<176, "clamp">; +def IL_OP_LERP : ILOpCode<177, "lrp">; +def IL_OP_FMA : ILOpCode<178, "fma">; +def IL_OP_D_MIN : ILOpCode<179, "dmin">; +def IL_OP_D_MAX : ILOpCode<180, "dmax">; +def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">; +def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">; +def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">; +def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">; +def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">; +def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">; +def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">; +def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">; +def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">; +def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">; +def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">; +def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">; +def IL_OP_D_MOV : ILOpCode<193, "dmov">; +def IL_OP_D_MOVC : ILOpCode<194, "dmovc">; +def IL_OP_NOP : ILOpCode<195, "nop">; +def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">; +def IL_OP_UAV_AND : ILOpCode<197, "uav_and">; +def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">; +def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">; +def IL_OP_UAV_OR : ILOpCode<200, "uav_or">; +def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">; +def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">; +def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">; +def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">; +def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">; +def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">; +def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">; +def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">; +def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">; +def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">; +def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">; +def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">; +def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">; +def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">; +def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">; +def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">; +def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">; +def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">; +def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">; +def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">; +def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">; +def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">; +def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">; +def IL_OP_LDS_AND : ILOpCode<224, "lds_and">; +def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">; +def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">; +def IL_OP_LDS_OR : ILOpCode<227, "lds_or">; +def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">; +def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">; +def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">; +def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">; +def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">; +def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">; +def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">; +def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">; +def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">; +def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">; +def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">; +def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">; +def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">; +def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">; +def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">; +def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">; +def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">; +def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">; +def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">; +def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">; +def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">; +def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">; +def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">; +def IL_OP_GDS_AND : ILOpCode<251, "gds_and">; +def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">; +def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">; +def IL_OP_GDS_OR : ILOpCode<254, "gds_or">; +def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">; +def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">; +def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">; +def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">; +def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">; +def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">; +def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">; +def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">; +def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">; +def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">; +def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">; +def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">; +def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">; +def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">; +def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">; +def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">; +def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">; +def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">; +def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">; +def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">; +def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">; +def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">; +def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">; +def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">; +def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">; +def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">; +def IL_OP_I64_ADD : ILOpCode<281, "i64add">; +def IL_OP_I64_MAX : ILOpCode<282, "i64max">; +def IL_OP_U64_MAX : ILOpCode<283, "u64max">; +def IL_OP_I64_MIN : ILOpCode<284, "i64min">; +def IL_OP_U64_MIN : ILOpCode<285, "u64min">; +def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">; +def IL_OP_I64_SHL : ILOpCode<287, "i64shl">; +def IL_OP_I64_SHR : ILOpCode<288, "i64shr">; +def IL_OP_U64_SHR : ILOpCode<289, "u64shr">; +def IL_OP_I64_EQ : ILOpCode<290, "i64eq">; +def IL_OP_I64_GE : ILOpCode<291, "i64ge">; +def IL_OP_U64_GE : ILOpCode<292, "u64ge">; +def IL_OP_I64_LT : ILOpCode<293, "i64lt">; +def IL_OP_U64_LT : ILOpCode<294, "u64lt">; +def IL_OP_I64_NE : ILOpCode<295, "i64ne">; +def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">; +def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">; +def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">; +def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">; +def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">; +def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">; +def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">; +def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">; +def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">; +def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">; +def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">; +def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">; +def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">; +def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">; +def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">; +def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">; +def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">; +def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">; +def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">; +def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">; +def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">; +def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">; +def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">; +def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">; +def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">; +def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">; +def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">; +def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">; +def IL_OP_MUL : ILOpCode<323, "mul">; +def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">; +def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">; +def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">; +def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">; +def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">; +def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">; +def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">; +def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">; +def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">; +def IL_OP_I64_MUL : ILOpCode<333, "i64mul">; +def IL_OP_U64_MUL : ILOpCode<334, "u64mul">; +def IL_OP_CU_ID : ILOpCode<355, "cu_id">; +def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">; +def IL_OP_I64_SUB : ILOpCode<357, "i64sub">; +def IL_OP_I64_DIV : ILOpCode<358, "i64div">; +def IL_OP_U64_DIV : ILOpCode<359, "u64div">; +def IL_OP_I64_MOD : ILOpCode<360, "i64mod">; +def IL_OP_U64_MOD : ILOpCode<361, "u64mod">; +def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">; +def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">; +def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">; +def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">; +def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">; +def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">; +def IL_OP_BFI : ILOpCode<394, "bfi">; +def IL_OP_BFM : ILOpCode<395, "bfm">; +def IL_DBG_STRING : ILOpCode<396, "dbg_string">; +def IL_DBG_LINE : ILOpCode<397, "dbg_line">; +def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">; diff --git a/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp b/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp new file mode 100644 index 0000000..6652c74 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILEvergreenDevice.cpp @@ -0,0 +1,183 @@ +//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILEvergreenDevice.h" + +using namespace llvm; + +AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST) +: AMDILDevice(ST) { + setCaps(); + std::string name = ST->getDeviceName(); + if (name == "cedar") { + mDeviceFlag = OCL_DEVICE_CEDAR; + } else if (name == "redwood") { + mDeviceFlag = OCL_DEVICE_REDWOOD; + } else if (name == "cypress") { + mDeviceFlag = OCL_DEVICE_CYPRESS; + } else { + mDeviceFlag = OCL_DEVICE_JUNIPER; + } +} + +AMDILEvergreenDevice::~AMDILEvergreenDevice() { +} + +size_t AMDILEvergreenDevice::getMaxLDSSize() const { + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_800; + } else { + return 0; + } +} +size_t AMDILEvergreenDevice::getMaxGDSSize() const { + if (usesHardware(AMDILDeviceInfo::RegionMem)) { + return MAX_LDS_SIZE_800; + } else { + return 0; + } +} +uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const { + return 12; +} + +uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const { + switch(id) { + default: + assert(0 && "ID type passed in is unknown!"); + break; + case CONSTANT_ID: + case RAW_UAV_ID: + if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) { + return GLOBAL_RETURN_RAW_UAV_ID; + } else { + return DEFAULT_RAW_UAV_ID; + } + case GLOBAL_ID: + case ARENA_UAV_ID: + return DEFAULT_ARENA_UAV_ID; + case LDS_ID: + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return DEFAULT_LDS_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + case GDS_ID: + if (usesHardware(AMDILDeviceInfo::RegionMem)) { + return DEFAULT_GDS_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + case SCRATCH_ID: + if (usesHardware(AMDILDeviceInfo::PrivateMem)) { + return DEFAULT_SCRATCH_ID; + } else { + return DEFAULT_ARENA_UAV_ID; + } + }; + return 0; +} + +size_t AMDILEvergreenDevice::getWavefrontSize() const { + return AMDILDevice::WavefrontSize; +} + +uint32_t AMDILEvergreenDevice::getGeneration() const { + return AMDILDeviceInfo::HD5XXX; +} + +void AMDILEvergreenDevice::setCaps() { + mSWBits.set(AMDILDeviceInfo::ArenaSegment); + mHWBits.set(AMDILDeviceInfo::ArenaUAV); + if (mSTM->calVersion() >= CAL_VERSION_SC_140) { + mHWBits.set(AMDILDeviceInfo::HW64BitDivMod); + mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod); + } + mSWBits.set(AMDILDeviceInfo::Signed24BitOps); + if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) { + mHWBits.set(AMDILDeviceInfo::ByteStores); + } + if (mSTM->isOverride(AMDILDeviceInfo::Debug)) { + mSWBits.set(AMDILDeviceInfo::LocalMem); + mSWBits.set(AMDILDeviceInfo::RegionMem); + } else { + mHWBits.set(AMDILDeviceInfo::LocalMem); + mHWBits.set(AMDILDeviceInfo::RegionMem); + } + mHWBits.set(AMDILDeviceInfo::Images); + if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) { + mHWBits.set(AMDILDeviceInfo::NoAlias); + } + if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) { + mHWBits.set(AMDILDeviceInfo::CachedMem); + } + if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) { + mHWBits.set(AMDILDeviceInfo::MultiUAV); + } + if (mSTM->calVersion() > CAL_VERSION_SC_136) { + mHWBits.set(AMDILDeviceInfo::ByteLDSOps); + mSWBits.reset(AMDILDeviceInfo::ByteLDSOps); + mHWBits.set(AMDILDeviceInfo::ArenaVectors); + } else { + mSWBits.set(AMDILDeviceInfo::ArenaVectors); + } + if (mSTM->calVersion() > CAL_VERSION_SC_137) { + mHWBits.set(AMDILDeviceInfo::LongOps); + mSWBits.reset(AMDILDeviceInfo::LongOps); + } + mHWBits.set(AMDILDeviceInfo::TmrReg); +} + +AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) { + setCaps(); +} + +AMDILCypressDevice::~AMDILCypressDevice() { +} + +void AMDILCypressDevice::setCaps() { + if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) { + mHWBits.set(AMDILDeviceInfo::DoubleOps); + mHWBits.set(AMDILDeviceInfo::FMA); + } +} + + +AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) { + setCaps(); +} + +AMDILCedarDevice::~AMDILCedarDevice() { +} + +void AMDILCedarDevice::setCaps() { + mSWBits.set(AMDILDeviceInfo::FMA); +} + +size_t AMDILCedarDevice::getWavefrontSize() const { + return AMDILDevice::QuarterWavefrontSize; +} + +AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) { + setCaps(); +} + +AMDILRedwoodDevice::~AMDILRedwoodDevice() +{ +} + +void AMDILRedwoodDevice::setCaps() { + mSWBits.set(AMDILDeviceInfo::FMA); +} + +size_t AMDILRedwoodDevice::getWavefrontSize() const { + return AMDILDevice::HalfWavefrontSize; +} diff --git a/lib/Target/AMDGPU/AMDILEvergreenDevice.h b/lib/Target/AMDGPU/AMDILEvergreenDevice.h new file mode 100644 index 0000000..2639ab8 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILEvergreenDevice.h @@ -0,0 +1,87 @@ +//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===----------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===----------------------------------------------------------------------===// +#ifndef _AMDILEVERGREENDEVICE_H_ +#define _AMDILEVERGREENDEVICE_H_ +#include "AMDILDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===----------------------------------------------------------------------===// +// Evergreen generation of devices and their respective sub classes +//===----------------------------------------------------------------------===// + + +// The AMDILEvergreenDevice is the base device class for all of the Evergreen +// series of cards. This class contains information required to differentiate +// the Evergreen device from the generic AMDILDevice. This device represents +// that capabilities of the 'Juniper' cards, also known as the HD57XX. +class AMDILEvergreenDevice : public AMDILDevice { +public: + AMDILEvergreenDevice(AMDILSubtarget *ST); + virtual ~AMDILEvergreenDevice(); + virtual size_t getMaxLDSSize() const; + virtual size_t getMaxGDSSize() const; + virtual size_t getWavefrontSize() const; + virtual uint32_t getGeneration() const; + virtual uint32_t getMaxNumUAVs() const; + virtual uint32_t getResourceID(uint32_t) const; +protected: + virtual void setCaps(); +}; // AMDILEvergreenDevice + +// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has +// support for double precision operations. This device is used to represent +// both the Cypress and Hemlock cards, which are commercially known as HD58XX +// and HD59XX cards. +class AMDILCypressDevice : public AMDILEvergreenDevice { +public: + AMDILCypressDevice(AMDILSubtarget *ST); + virtual ~AMDILCypressDevice(); +private: + virtual void setCaps(); +}; // AMDILCypressDevice + + +// The AMDILCedarDevice is the class that represents all of the 'Cedar' based +// devices. This class differs from the base AMDILEvergreenDevice in that the +// device is a ~quarter of the 'Juniper'. These are commercially known as the +// HD54XX and HD53XX series of cards. +class AMDILCedarDevice : public AMDILEvergreenDevice { +public: + AMDILCedarDevice(AMDILSubtarget *ST); + virtual ~AMDILCedarDevice(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; // AMDILCedarDevice + +// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based +// devices. This class differs from the base class, in that these devices are +// considered about half of a 'Juniper' device. These are commercially known as +// the HD55XX and HD56XX series of cards. +class AMDILRedwoodDevice : public AMDILEvergreenDevice { +public: + AMDILRedwoodDevice(AMDILSubtarget *ST); + virtual ~AMDILRedwoodDevice(); + virtual size_t getWavefrontSize() const; +private: + virtual void setCaps(); +}; // AMDILRedwoodDevice + +} // namespace llvm +#endif // _AMDILEVERGREENDEVICE_H_ diff --git a/lib/Target/AMDGPU/AMDILFormats.td b/lib/Target/AMDGPU/AMDILFormats.td new file mode 100644 index 0000000..5a71ded --- /dev/null +++ b/lib/Target/AMDGPU/AMDILFormats.td @@ -0,0 +1,175 @@ +//==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +//===--------------------------------------------------------------------===// +include "AMDILTokenDesc.td" + +//===--------------------------------------------------------------------===// +// The parent IL instruction class that inherits the Instruction class. This +// class sets the corresponding namespace, the out and input dag lists the +// pattern to match to and the string to print out for the assembly printer. +//===--------------------------------------------------------------------===// +class ILFormat pattern> +: Instruction { + + let Namespace = "AMDGPU"; + dag OutOperandList = outs; + dag InOperandList = ins; + ILOpCode operation = op; + let Pattern = pattern; + let AsmString = !strconcat(asmstr, "\n"); + let isPseudo = 1; + let Itinerary = NullALU; + bit hasIEEEFlag = 0; + bit hasZeroOpFlag = 0; +} + +//===--------------------------------------------------------------------===// +// Class that has one input parameters and one output parameter. +// The basic pattern for this class is "Opcode Dst, Src0" and +// handles the unary math operators. +// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod +// if the addressing is register relative for input and output register 0. +//===--------------------------------------------------------------------===// +class OneInOneOut pattern> + : ILFormat +{ + ILDst dst_reg; + ILDstMod dst_mod; + ILRelAddr dst_rel; + ILSrc dst_reg_rel; + ILSrcMod dst_reg_rel_mod; + ILSrc src0_reg; + ILSrcMod src0_mod; + ILRelAddr src0_rel; + ILSrc src0_reg_rel; + ILSrcMod src0_reg_rel_mod; +} + +//===--------------------------------------------------------------------===// +// This class is similiar to the UnaryOp class, however, there is no +// result value to assign. +//===--------------------------------------------------------------------===// +class UnaryOpNoRet pattern> + : ILFormat +{ + ILSrc src0_reg; + ILSrcMod src0_mod; + ILRelAddr src0_rel; + ILSrc src0_reg_rel; + ILSrcMod src0_reg_rel_mod; +} + +//===--------------------------------------------------------------------===// +// Set of classes that have two input parameters and one output parameter. +// The basic pattern for this class is "Opcode Dst, Src0, Src1" and +// handles the binary math operators and comparison operations. +// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod +// if the addressing is register relative for input register 1. +//===--------------------------------------------------------------------===// +class TwoInOneOut pattern> + : OneInOneOut +{ + ILSrc src1_reg; + ILSrcMod src1_mod; + ILRelAddr src1_rel; + ILSrc src1_reg_rel; + ILSrcMod src1_reg_rel_mod; +} + +//===--------------------------------------------------------------------===// +// Similiar to the UnaryOpNoRet class, but takes as arguments two input +// operands. Used mainly for barrier instructions on PC platform. +//===--------------------------------------------------------------------===// +class BinaryOpNoRet pattern> + : UnaryOpNoRet +{ + ILSrc src1_reg; + ILSrcMod src1_mod; + ILRelAddr src1_rel; + ILSrc src1_reg_rel; + ILSrcMod src1_reg_rel_mod; +} + +//===--------------------------------------------------------------------===// +// Set of classes that have three input parameters and one output parameter. +// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and +// handles the mad and conditional mov instruction. +// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod +// if the addressing is register relative. +// This class is the parent class of TernaryOp +//===--------------------------------------------------------------------===// +class ThreeInOneOut pattern> + : TwoInOneOut { + ILSrc src2_reg; + ILSrcMod src2_mod; + ILRelAddr src2_rel; + ILSrc src2_reg_rel; + ILSrcMod src2_reg_rel_mod; + } + +//===--------------------------------------------------------------------===// +// Intrinsic classes +// Generic versions of the above classes but for Target specific intrinsics +// instead of SDNode patterns. +//===--------------------------------------------------------------------===// +let TargetPrefix = "AMDIL", isTarget = 1 in { + class VoidIntLong : + Intrinsic<[llvm_i64_ty], [], []>; + class VoidIntInt : + Intrinsic<[llvm_i32_ty], [], []>; + class VoidIntBool : + Intrinsic<[llvm_i32_ty], [], []>; + class UnaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + class UnaryIntFloat : + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + class ConvertIntFTOI : + Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; + class ConvertIntITOF : + Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; + class UnaryIntNoRetInt : + Intrinsic<[], [llvm_anyint_ty], []>; + class UnaryIntNoRetFloat : + Intrinsic<[], [llvm_anyfloat_ty], []>; + class BinaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class BinaryIntFloat : + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class BinaryIntNoRetInt : + Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; + class BinaryIntNoRetFloat : + Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; + class TernaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class TernaryIntFloat : + Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class QuaternaryIntInt : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class UnaryAtomicInt : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class BinaryAtomicInt : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class TernaryAtomicInt : + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; + class UnaryAtomicIntNoRet : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class BinaryAtomicIntNoRet : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; + class TernaryAtomicIntNoRet : + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; +} diff --git a/lib/Target/AMDGPU/AMDILFrameLowering.cpp b/lib/Target/AMDGPU/AMDILFrameLowering.cpp new file mode 100644 index 0000000..87eca87 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILFrameLowering.cpp @@ -0,0 +1,53 @@ +//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface to describe a layout of a stack frame on a AMDIL target machine +// +//===----------------------------------------------------------------------===// +#include "AMDILFrameLowering.h" +#include "llvm/CodeGen/MachineFrameInfo.h" + +using namespace llvm; +AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl, + int LAO, unsigned TransAl) + : TargetFrameLowering(D, StackAl, LAO, TransAl) +{ +} + +AMDILFrameLowering::~AMDILFrameLowering() +{ +} + +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. +int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->getObjectOffset(FI); +} + +const TargetFrameLowering::SpillSlot * +AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const +{ + NumEntries = 0; + return 0; +} +void +AMDILFrameLowering::emitPrologue(MachineFunction &MF) const +{ +} +void +AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const +{ +} +bool +AMDILFrameLowering::hasFP(const MachineFunction &MF) const +{ + return false; +} diff --git a/lib/Target/AMDGPU/AMDILFrameLowering.h b/lib/Target/AMDGPU/AMDILFrameLowering.h new file mode 100644 index 0000000..b1d919e --- /dev/null +++ b/lib/Target/AMDGPU/AMDILFrameLowering.h @@ -0,0 +1,46 @@ +//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface to describe a layout of a stack frame on a AMDIL target machine +// +//===----------------------------------------------------------------------===// +#ifndef _AMDILFRAME_LOWERING_H_ +#define _AMDILFRAME_LOWERING_H_ + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" + +/// Information about the stack frame layout on the AMDIL targets. It holds +/// the direction of the stack growth, the known stack alignment on entry to +/// each function, and the offset to the locals area. +/// See TargetFrameInfo for more comments. + +namespace llvm { + class AMDILFrameLowering : public TargetFrameLowering { + public: + AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned + TransAl = 1); + virtual ~AMDILFrameLowering(); + virtual int getFrameIndexOffset(const MachineFunction &MF, + int FI) const; + virtual const SpillSlot * + getCalleeSavedSpillSlots(unsigned &NumEntries) const; + virtual void emitPrologue(MachineFunction &MF) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + virtual bool hasFP(const MachineFunction &MF) const; + }; // class AMDILFrameLowering +} // namespace llvm +#endif // _AMDILFRAME_LOWERING_H_ diff --git a/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp new file mode 100644 index 0000000..df0ac75 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILISelDAGToDAG.cpp @@ -0,0 +1,393 @@ +//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the AMDIL target. +// +//===----------------------------------------------------------------------===// +#include "AMDGPUISelLowering.h" // For AMDGPUISD +#include "AMDILDevices.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Compiler.h" +#include +#include + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions +// //for SelectionDAG operations. +// +namespace { +class AMDILDAGToDAGISel : public SelectionDAGISel { + // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can + // make the right decision when generating code for different targets. + const AMDILSubtarget &Subtarget; +public: + AMDILDAGToDAGISel(TargetMachine &TM AMDIL_OPT_LEVEL_DECL); + virtual ~AMDILDAGToDAGISel(); + + SDNode *Select(SDNode *N); + virtual const char *getPassName() const; + +private: + inline SDValue getSmallIPtrImm(unsigned Imm); + + // Complex pattern selectors + bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); + bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); + bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); + + static bool checkType(const Value *ptr, unsigned int addrspace); + static const Value *getBasePointerValue(const Value *V); + + static bool isGlobalStore(const StoreSDNode *N); + static bool isPrivateStore(const StoreSDNode *N); + static bool isLocalStore(const StoreSDNode *N); + static bool isRegionStore(const StoreSDNode *N); + + static bool isCPLoad(const LoadSDNode *N); + static bool isConstantLoad(const LoadSDNode *N, int cbID); + static bool isGlobalLoad(const LoadSDNode *N); + static bool isPrivateLoad(const LoadSDNode *N); + static bool isLocalLoad(const LoadSDNode *N); + static bool isRegionLoad(const LoadSDNode *N); + + bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset); + bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset); + bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); + + // Include the pieces autogenerated from the target description. +#include "AMDGPUGenDAGISel.inc" +}; +} // end anonymous namespace + +// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific +// DAG, ready for instruction scheduling. +// +FunctionPass *llvm::createAMDILISelDag(TargetMachine &TM + AMDIL_OPT_LEVEL_DECL) { + return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR); +} + +AMDILDAGToDAGISel::AMDILDAGToDAGISel(TargetMachine &TM + AMDIL_OPT_LEVEL_DECL) + : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget()) +{ +} + +AMDILDAGToDAGISel::~AMDILDAGToDAGISel() { +} + +SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); +} + +bool AMDILDAGToDAGISel::SelectADDRParam( + SDValue Addr, SDValue& R1, SDValue& R2) { + + if (Addr.getOpcode() == ISD::FrameIndex) { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } + } else if (Addr.getOpcode() == ISD::ADD) { + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i32); + } + return true; +} + +bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + return SelectADDRParam(Addr, R1, R2); +} + + +bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + + if (Addr.getOpcode() == ISD::FrameIndex) { + if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { + R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64); + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } + } else if (Addr.getOpcode() == ISD::ADD) { + R1 = Addr.getOperand(0); + R2 = Addr.getOperand(1); + } else { + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, MVT::i64); + } + return true; +} + +SDNode *AMDILDAGToDAGISel::Select(SDNode *N) { + unsigned int Opc = N->getOpcode(); + if (N->isMachineOpcode()) { + return NULL; // Already selected. + } + switch (Opc) { + default: break; + case ISD::FrameIndex: + { + if (FrameIndexSDNode *FIN = dyn_cast(N)) { + unsigned int FI = FIN->getIndex(); + EVT OpVT = N->getValueType(0); + unsigned int NewOpc = AMDGPU::COPY; + SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); + return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI); + } + } + break; + } + return SelectCode(N); +} + +bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { + if (!ptr) { + return false; + } + Type *ptrType = ptr->getType(); + return dyn_cast(ptrType)->getAddressSpace() == addrspace; +} + +const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V) +{ + if (!V) { + return NULL; + } + const Value *ret = NULL; + ValueMap ValueBitMap; + std::queue > ValueQueue; + ValueQueue.push(V); + while (!ValueQueue.empty()) { + V = ValueQueue.front(); + if (ValueBitMap.find(V) == ValueBitMap.end()) { + ValueBitMap[V] = true; + if (dyn_cast(V) && dyn_cast(V->getType())) { + ret = V; + break; + } else if (dyn_cast(V)) { + ret = V; + break; + } else if (dyn_cast(V)) { + const ConstantExpr *CE = dyn_cast(V); + if (CE) { + ValueQueue.push(CE->getOperand(0)); + } + } else if (const AllocaInst *AI = dyn_cast(V)) { + ret = AI; + break; + } else if (const Instruction *I = dyn_cast(V)) { + uint32_t numOps = I->getNumOperands(); + for (uint32_t x = 0; x < numOps; ++x) { + ValueQueue.push(I->getOperand(x)); + } + } else { + // assert(0 && "Found a Value that we didn't know how to handle!"); + } + } + ValueQueue.pop(); + } + return ret; +} + +bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { + return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { + return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)); +} + +bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) { + return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) { + return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS); +} + +bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { + if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) { + return true; + } + MachineMemOperand *MMO = N->getMemOperand(); + const Value *V = MMO->getValue(); + const Value *BV = getBasePointerValue(V); + if (MMO + && MMO->getValue() + && ((V && dyn_cast(V)) + || (BV && dyn_cast( + getBasePointerValue(MMO->getValue()))))) { + return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS); + } else { + return false; + } +} + +bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); +} + +bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { + return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS); +} + +bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) { + MachineMemOperand *MMO = N->getMemOperand(); + if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + if (MMO) { + const Value *V = MMO->getValue(); + const PseudoSourceValue *PSV = dyn_cast(V); + if (PSV && PSV == PseudoSourceValue::getConstantPool()) { + return true; + } + } + } + return false; +} + +bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { + if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + // Check to make sure we are not a constant pool load or a constant load + // that is marked as a private load + if (isCPLoad(N) || isConstantLoad(N, -1)) { + return false; + } + } + if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS)) + { + return true; + } + return false; +} + +const char *AMDILDAGToDAGISel::getPassName() const { + return "AMDIL DAG->DAG Pattern Instruction Selection"; +} + +#ifdef DEBUGTMP +#undef INT64_C +#endif +#undef DEBUGTMP + +///==== AMDGPU Functions ====/// + +bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base, + SDValue& Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress) { + return false; + } + + + if (Addr.getOpcode() == ISD::ADD) { + bool Match = false; + + // Find the base ptr and the offset + for (unsigned i = 0; i < Addr.getNumOperands(); i++) { + SDValue Arg = Addr.getOperand(i); + ConstantSDNode * OffsetNode = dyn_cast(Arg); + // This arg isn't a constant so it must be the base PTR. + if (!OffsetNode) { + Base = Addr.getOperand(i); + continue; + } + // Check if the constant argument fits in 8-bits. The offset is in bytes + // so we need to convert it to dwords. + if (isInt<8>(OffsetNode->getZExtValue() >> 2)) { + Match = true; + Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2, + MVT::i32); + } + } + return Match; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AMDILDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, + SDValue &Offset) +{ + ConstantSDNode * IMMOffset; + + if (Addr.getOpcode() == ISD::ADD + && (IMMOffset = dyn_cast(Addr.getOperand(1))) + && isInt<16>(IMMOffset->getZExtValue())) { + + Base = Addr.getOperand(0); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); + return true; + // If the pointer address is constant, we can move it to the offset field. + } else if ((IMMOffset = dyn_cast(Addr)) + && isInt<16>(IMMOffset->getZExtValue())) { + Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), + CurDAG->getEntryNode().getDebugLoc(), + AMDGPU::ZERO, MVT::i32); + Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32); + return true; + } + + // Default case, no offset + Base = Addr; + Offset = CurDAG->getTargetConstant(0, MVT::i32); + return true; +} + +bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base, + SDValue& Offset) { + if (Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress || + Addr.getOpcode() != ISD::ADD) { + return false; + } + + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + + return false; +} diff --git a/lib/Target/AMDGPU/AMDILISelLowering.cpp b/lib/Target/AMDGPU/AMDILISelLowering.cpp new file mode 100644 index 0000000..af99122 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILISelLowering.cpp @@ -0,0 +1,1850 @@ +//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file implements the interfaces that AMDIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "AMDILISelLowering.h" +#include "AMDILDevices.h" +#include "AMDILIntrinsicInfo.h" +#include "AMDILRegisterInfo.h" +#include "AMDILSubtarget.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; +#define ISDBITCAST ISD::BITCAST +#define MVTGLUE MVT::Glue +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// +#include "AMDGPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions Begin +//===----------------------------------------------------------------------===// + static SDValue +getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) +{ + DebugLoc DL = Src.getDebugLoc(); + EVT svt = Src.getValueType().getScalarType(); + EVT dvt = Dst.getValueType().getScalarType(); + if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { + if (dvt.bitsGT(svt)) { + Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); + } else if (svt.bitsLT(svt)) { + Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, + DAG.getConstant(1, MVT::i32)); + } + } else if (svt.isInteger() && dvt.isInteger()) { + if (!svt.bitsEq(dvt)) { + Src = DAG.getSExtOrTrunc(Src, DL, dvt); + } + } else if (svt.isInteger()) { + unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; + if (!svt.bitsEq(dvt)) { + if (dvt.getSimpleVT().SimpleTy == MVT::f32) { + Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); + } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { + Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); + } else { + assert(0 && "We only support 32 and 64bit fp types"); + } + } + Src = DAG.getNode(opcode, DL, dvt, Src); + } else if (dvt.isInteger()) { + unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; + if (svt.getSimpleVT().SimpleTy == MVT::f32) { + Src = DAG.getNode(opcode, DL, MVT::i32, Src); + } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { + Src = DAG.getNode(opcode, DL, MVT::i64, Src); + } else { + assert(0 && "We only support 32 and 64bit fp types"); + } + Src = DAG.getSExtOrTrunc(Src, DL, dvt); + } + return Src; +} +// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC +// condition. + static AMDILCC::CondCodes +CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) +{ + switch (CC) { + default: + { + errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; + assert(0 && "Unknown condition code!"); + } + case ISD::SETO: + switch(type) { + case MVT::f32: + return AMDILCC::IL_CC_F_O; + case MVT::f64: + return AMDILCC::IL_CC_D_O; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUO: + switch(type) { + case MVT::f32: + return AMDILCC::IL_CC_F_UO; + case MVT::f64: + return AMDILCC::IL_CC_D_UO; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETGT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_GT; + case MVT::f32: + return AMDILCC::IL_CC_F_GT; + case MVT::f64: + return AMDILCC::IL_CC_D_GT; + case MVT::i64: + return AMDILCC::IL_CC_L_GT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETGE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_GE; + case MVT::f32: + return AMDILCC::IL_CC_F_GE; + case MVT::f64: + return AMDILCC::IL_CC_D_GE; + case MVT::i64: + return AMDILCC::IL_CC_L_GE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETLT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_LT; + case MVT::f32: + return AMDILCC::IL_CC_F_LT; + case MVT::f64: + return AMDILCC::IL_CC_D_LT; + case MVT::i64: + return AMDILCC::IL_CC_L_LT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETLE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_LE; + case MVT::f32: + return AMDILCC::IL_CC_F_LE; + case MVT::f64: + return AMDILCC::IL_CC_D_LE; + case MVT::i64: + return AMDILCC::IL_CC_L_LE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETNE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_NE; + case MVT::f32: + return AMDILCC::IL_CC_F_NE; + case MVT::f64: + return AMDILCC::IL_CC_D_NE; + case MVT::i64: + return AMDILCC::IL_CC_L_NE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETEQ: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_EQ; + case MVT::f32: + return AMDILCC::IL_CC_F_EQ; + case MVT::f64: + return AMDILCC::IL_CC_D_EQ; + case MVT::i64: + return AMDILCC::IL_CC_L_EQ; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUGT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_GT; + case MVT::f32: + return AMDILCC::IL_CC_F_UGT; + case MVT::f64: + return AMDILCC::IL_CC_D_UGT; + case MVT::i64: + return AMDILCC::IL_CC_UL_GT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUGE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_GE; + case MVT::f32: + return AMDILCC::IL_CC_F_UGE; + case MVT::f64: + return AMDILCC::IL_CC_D_UGE; + case MVT::i64: + return AMDILCC::IL_CC_UL_GE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETULT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_LT; + case MVT::f32: + return AMDILCC::IL_CC_F_ULT; + case MVT::f64: + return AMDILCC::IL_CC_D_ULT; + case MVT::i64: + return AMDILCC::IL_CC_UL_LT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETULE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_LE; + case MVT::f32: + return AMDILCC::IL_CC_F_ULE; + case MVT::f64: + return AMDILCC::IL_CC_D_ULE; + case MVT::i64: + return AMDILCC::IL_CC_UL_LE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUNE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_NE; + case MVT::f32: + return AMDILCC::IL_CC_F_UNE; + case MVT::f64: + return AMDILCC::IL_CC_D_UNE; + case MVT::i64: + return AMDILCC::IL_CC_UL_NE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUEQ: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_EQ; + case MVT::f32: + return AMDILCC::IL_CC_F_UEQ; + case MVT::f64: + return AMDILCC::IL_CC_D_UEQ; + case MVT::i64: + return AMDILCC::IL_CC_UL_EQ; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOGT: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OGT; + case MVT::f64: + return AMDILCC::IL_CC_D_OGT; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOGE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OGE; + case MVT::f64: + return AMDILCC::IL_CC_D_OGE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOLT: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OLT; + case MVT::f64: + return AMDILCC::IL_CC_D_OLT; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOLE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OLE; + case MVT::f64: + return AMDILCC::IL_CC_D_OLE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETONE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_ONE; + case MVT::f64: + return AMDILCC::IL_CC_D_ONE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOEQ: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OEQ; + case MVT::f64: + return AMDILCC::IL_CC_D_OEQ; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + }; +} + +SDValue +AMDILTargetLowering::LowerMemArgument( + SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + MachineFrameInfo *MFI, + unsigned i) const +{ + // Create the nodes corresponding to a load from this parameter slot. + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && + getTargetMachine().Options.GuaranteedTailCallOpt; + bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); + + // FIXME: For now, all byval parameter objects are marked mutable. This can + // be changed with more analysis. + // In case of tail call optimization mark all arguments mutable. Since they + // could be overwritten by lowering of arguments in case of a tail call. + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + + if (Flags.isByVal()) + return FIN; + return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); +} +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions End +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TargetLowering Class Implementation Begins +//===----------------------------------------------------------------------===// + AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) +: TargetLowering(TM, new TargetLoweringObjectFileELF()) +{ + int types[] = + { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::f32, + (int)MVT::f64, + (int)MVT::i64, + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + + int IntTypes[] = + { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::i64 + }; + + int FloatTypes[] = + { + (int)MVT::f32, + (int)MVT::f64 + }; + + int VectorTypes[] = + { + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + size_t numTypes = sizeof(types) / sizeof(*types); + size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); + size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); + size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); + + const AMDILSubtarget &STM = getTargetMachine().getSubtarget(); + // These are the current register classes that are + // supported + + for (unsigned int x = 0; x < numTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + + //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types + // We cannot sextinreg, expand to shifts + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); + setOperationAction(ISD::SUBE, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::BRCOND, VT, Custom); + setOperationAction(ISD::BR_CC, VT, Custom); + setOperationAction(ISD::BR_JT, VT, Expand); + setOperationAction(ISD::BRIND, VT, Expand); + // TODO: Implement custom UREM/SREM routines + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + if (VT != MVT::i64 && VT != MVT::v2i64) { + setOperationAction(ISD::SDIV, VT, Custom); + } + } + for (unsigned int x = 0; x < numFloatTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + + // IL does not have these operations for floating point types + setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); + setOperationAction(ISD::SETOLT, VT, Expand); + setOperationAction(ISD::SETOGE, VT, Expand); + setOperationAction(ISD::SETOGT, VT, Expand); + setOperationAction(ISD::SETOLE, VT, Expand); + setOperationAction(ISD::SETULT, VT, Expand); + setOperationAction(ISD::SETUGE, VT, Expand); + setOperationAction(ISD::SETUGT, VT, Expand); + setOperationAction(ISD::SETULE, VT, Expand); + } + + for (unsigned int x = 0; x < numIntTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; + + // GPU also does not have divrem function for signed or unsigned + setOperationAction(ISD::SDIVREM, VT, Expand); + + // GPU does not have [S|U]MUL_LOHI functions as a single instruction + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + + // GPU doesn't have a rotl, rotr, or byteswap instruction + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); + + // GPU doesn't have any counting operators + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + } + + for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) + { + MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + // setOperationAction(ISD::VSETCC, VT, Expand); + setOperationAction(ISD::SETCC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::SELECT, VT, Expand); + + } + if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::v2i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::v2i64, Expand); + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SREM, MVT::v2i64, Expand); + setOperationAction(ISD::Constant , MVT::i64 , Legal); + setOperationAction(ISD::SDIV, MVT::v2i64, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); + } + if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { + // we support loading/storing v2f64 but not operations on the type + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); + // We want to expand vector conversions into their scalar + // counterparts. + setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + } + // TODO: Fix the UDIV24 algorithm so it works for these + // types correctly. This needs vector comparisons + // for this to work correctly. + setOperationAction(ISD::UDIV, MVT::v2i8, Expand); + setOperationAction(ISD::UDIV, MVT::v4i8, Expand); + setOperationAction(ISD::UDIV, MVT::v2i16, Expand); + setOperationAction(ISD::UDIV, MVT::v4i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + setOperationAction(ISD::SUBC, MVT::Other, Expand); + setOperationAction(ISD::ADDE, MVT::Other, Expand); + setOperationAction(ISD::ADDC, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::BR_CC, MVT::Other, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::SETCC, MVT::Other, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + + setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); + setOperationAction(ISD::Constant , MVT::i32 , Legal); + setOperationAction(ISD::TRAP , MVT::Other , Legal); + + setStackPointerRegisterToSaveRestore(AMDGPU::SP); + setSchedulingPreference(Sched::RegPressure); + setPow2DivIsCheap(false); + setPrefLoopAlignment(16); + setSelectIsExpensive(true); + setJumpIsExpensive(true); + + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + +#undef numTypes +#undef numIntTypes +#undef numVectorTypes +#undef numFloatTypes +} + +const char * +AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) { + default: return 0; + case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; + case AMDILISD::MAD: return "AMDILISD::MAD"; + case AMDILISD::CALL: return "AMDILISD::CALL"; + case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC"; + case AMDILISD::UMUL: return "AMDILISD::UMUL"; + case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; + case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; + case AMDILISD::CMP: return "AMDILISD::CMP"; + case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; + case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; + case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; + case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; + case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; + case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; + case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; + case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; + + }; +} +bool +AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const +{ + return false; +} + +// The backend supports 32 and 64 bit floating point immediates +bool +AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const +{ + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return true; + } else { + return false; + } +} + +bool +AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const +{ + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return false; + } else { + return true; + } +} + + +// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +// be zero. Op is expected to be a target specific node. Used by DAG +// combiner. + +void +AMDILTargetLowering::computeMaskedBitsForTargetNode( + const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const +{ + APInt KnownZero2; + APInt KnownOne2; + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything + switch (Op.getOpcode()) { + default: break; + case AMDILISD::SELECT_CC: + DAG.ComputeMaskedBits( + Op.getOperand(1), + KnownZero, + KnownOne, + Depth + 1 + ); + DAG.ComputeMaskedBits( + Op.getOperand(0), + KnownZero2, + KnownOne2 + ); + assert((KnownZero & KnownOne) == 0 + && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 + && "Bits known to be one AND zero?"); + // Only known if known in both the LHS and RHS + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + }; +} + +// This is the function that determines which calling convention should +// be used. Currently there is only one calling convention +CCAssignFn* +AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const +{ + //uint64_t CC = cast(Op.getOperand(1))->getZExtValue(); + return CC_AMDIL32; +} + +// LowerCallResult - Lower the result values of an ISD::CALL into the +// appropriate copies out of appropriate physical registers. This assumes that +// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +// being lowered. The returns a SDNode with the same number of values as the +// ISD::CALL. +SDValue +AMDILTargetLowering::LowerCallResult( + SDValue Chain, + SDValue InFlag, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) const +{ + // Assign locations to each value returned by this call + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + EVT CopyVT = RVLocs[i].getValVT(); + if (RVLocs[i].isRegLoc()) { + Chain = DAG.getCopyFromReg( + Chain, + dl, + RVLocs[i].getLocReg(), + CopyVT, + InFlag + ).getValue(1); + SDValue Val = Chain.getValue(0); + InFlag = Chain.getValue(2); + InVals.push_back(Val); + } + } + + return Chain; + +} + +//===----------------------------------------------------------------------===// +// Other Lowering Hooks +//===----------------------------------------------------------------------===// + +// Recursively assign SDNodeOrdering to any unordered nodes +// This is necessary to maintain source ordering of instructions +// under -O0 to avoid odd-looking "skipping around" issues. + static const SDValue +Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) +{ + if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { + DAG.AssignOrdering( New.getNode(), order ); + for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) + Ordered( DAG, order, New.getOperand(i) ); + } + return New; +} + +#define LOWER(A) \ + case ISD:: A: \ +return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) + +SDValue +AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + switch (Op.getOpcode()) { + default: + Op.getNode()->dump(); + assert(0 && "Custom lowering code for this" + "instruction is not implemented yet!"); + break; + LOWER(GlobalAddress); + LOWER(JumpTable); + LOWER(ConstantPool); + LOWER(ExternalSymbol); + LOWER(SDIV); + LOWER(SREM); + LOWER(BUILD_VECTOR); + LOWER(SELECT); + LOWER(SETCC); + LOWER(SIGN_EXTEND_INREG); + LOWER(DYNAMIC_STACKALLOC); + LOWER(BRCOND); + LOWER(BR_CC); + } + return Op; +} + +#undef LOWER + +SDValue +AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const +{ + SDValue DST = Op; + const GlobalAddressSDNode *GADN = cast(Op); + const GlobalValue *G = GADN->getGlobal(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalVariable *GV = dyn_cast(G); + if (!GV) { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } else { + if (GV->hasInitializer()) { + const Constant *C = dyn_cast(GV->getInitializer()); + if (const ConstantInt *CI = dyn_cast(C)) { + DST = DAG.getConstant(CI->getValue(), Op.getValueType()); + } else if (const ConstantFP *CF = dyn_cast(C)) { + DST = DAG.getConstantFP(CF->getValueAPF(), + Op.getValueType()); + } else if (dyn_cast(C)) { + EVT VT = Op.getValueType(); + if (VT.isInteger()) { + DST = DAG.getConstant(0, VT); + } else { + DST = DAG.getConstantFP(0, VT); + } + } else { + assert(!"lowering this type of Global Address " + "not implemented yet!"); + C->dump(); + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } + } else { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const +{ + JumpTableSDNode *JT = cast(Op); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + return Result; +} +SDValue +AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const +{ + ConstantPoolSDNode *CP = cast(Op); + EVT PtrVT = Op.getValueType(); + SDValue Result; + if (CP->isMachineConstantPoolEntry()) { + Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); + } else { + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); + } + return Result; +} + +SDValue +AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const +{ + const char *Sym = cast(Op)->getSymbol(); + SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); + return Result; +} + +/// LowerFORMAL_ARGUMENTS - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +/// TODO: isVarArg, hasStructRet, isMemReg + SDValue +AMDILTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) +const +{ + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + //const Function *Fn = MF.getFunction(); + //MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + SmallVector ArgLocs; + CallingConv::ID CC = MF.getFunction()->getCallingConv(); + //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); + + CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // When more calling conventions are added, they need to be chosen here + CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); + SDValue StackPtr; + + //unsigned int FirstStackArgLoc = 0; + + for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFor( + RegVT.getSimpleVT().SimpleTy); + + unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg( + Chain, + dl, + Reg, + RegVT); + // If this is an 8 or 16-bit value, it is really passed + // promoted to 32 bits. Insert an assert[sz]ext to capture + // this, then truncate to the right size. + + if (VA.getLocInfo() == CCValAssign::SExt) { + ArgValue = DAG.getNode( + ISD::AssertSext, + dl, + RegVT, + ArgValue, + DAG.getValueType(VA.getValVT())); + } else if (VA.getLocInfo() == CCValAssign::ZExt) { + ArgValue = DAG.getNode( + ISD::AssertZext, + dl, + RegVT, + ArgValue, + DAG.getValueType(VA.getValVT())); + } + if (VA.getLocInfo() != CCValAssign::Full) { + ArgValue = DAG.getNode( + ISD::TRUNCATE, + dl, + VA.getValVT(), + ArgValue); + } + // Add the value to the list of arguments + // to be passed in registers + InVals.push_back(ArgValue); + if (isVarArg) { + assert(0 && "Variable arguments are not yet supported"); + // See MipsISelLowering.cpp for ideas on how to implement + } + } else if(VA.isMemLoc()) { + InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, + dl, DAG, VA, MFI, i)); + } else { + assert(0 && "found a Value Assign that is " + "neither a register or a memory location"); + } + } + /*if (hasStructRet) { + assert(0 && "Has struct return is not yet implemented"); + // See MipsISelLowering.cpp for ideas on how to implement + }*/ + + if (isVarArg) { + assert(0 && "Variable arguments are not yet supported"); + // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement + } + // This needs to be changed to non-zero if the return function needs + // to pop bytes + return Chain; +} +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" with size and alignment information specified by +/// the specific parameter attribute. The copy will be passed as a byval +/// function parameter. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { + assert(0 && "MemCopy does not exist yet"); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + + return DAG.getMemcpy(Chain, + Src.getDebugLoc(), + Dst, Src, SizeNode, Flags.getByValAlign(), + /*IsVol=*/false, /*AlwaysInline=*/true, + MachinePointerInfo(), MachinePointerInfo()); +} + +SDValue +AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, + SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const +{ + unsigned int LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, + dl, + getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); + } else { + PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo::getStack(LocMemOffset), + false, false, 0); + } + return PtrOff; +} +/// LowerCAL - functions arguments are copied from virtual +/// regs to (physical regs)/(stack frame), CALLSEQ_START and +/// CALLSEQ_END are emitted. +/// TODO: isVarArg, isTailCall, hasStructRet +SDValue +AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const + +#if 0 + SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, + bool& isTailCall, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, +#endif +{ + CLI.IsTailCall = false; + MachineFunction& MF = CLI.DAG.getMachineFunction(); + // FIXME: DO we need to handle fast calling conventions and tail call + // optimizations?? X86/PPC ISelLowering + /*bool hasStructRet = (TheCall->getNumArgs()) + ? TheCall->getArgFlags(0).device()->isSRet() + : false;*/ + + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Analyze operands of the call, assigning locations to each operand + SmallVector ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *CLI.DAG.getContext()); + // Analyize the calling operands, but need to change + // if we have more than one calling convetion + CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv)); + + unsigned int NumBytes = CCInfo.getNextStackOffset(); + if (CLI.IsTailCall) { + assert(CLI.IsTailCall && "Tail Call not handled yet!"); + // See X86/PPC ISelLowering + } + + CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain, + CLI.DAG.getIntPtrConstant(NumBytes, true)); + + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; + SDValue StackPtr; + //unsigned int FirstStacArgLoc = 0; + //int LastArgStackLoc = 0; + + // Walk the register/memloc assignments, insert copies/loads + for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers + // Arguments start after the 5 first operands of ISD::CALL + SDValue Arg = CLI.OutVals[i]; + //Promote the value if needed + switch(VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND, + CLI.DL, + VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND, + CLI.DL, + VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = CLI.DAG.getNode(ISD::ANY_EXTEND, + CLI.DL, + VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else if (VA.isMemLoc()) { + // Create the frame index object for this incoming parameter + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } else { + assert(0 && "Not a Reg/Mem Loc, major error!"); + } + } + if (!MemOpChains.empty()) { + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, + CLI.DL, + MVT::Other, + &MemOpChains[0], + MemOpChains.size()); + } + SDValue InFlag; + if (!CLI.IsTailCall) { + for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { + CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain, + CLI.DL, + RegsToPass[i].first, + RegsToPass[i].second, + InFlag); + InFlag = CLI.Chain.getValue(1); + } + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress/ + // TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(CLI.Callee)) { + CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy()); + } + else if (ExternalSymbolSDNode *S = dyn_cast(CLI.Callee)) { + CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } + else if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1708 + } + + SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE); + SmallVector Ops; + + if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1721 + } + // If this is a direct call, pass the chain and the callee + if (CLI.Callee.getNode()) { + Ops.push_back(CLI.Chain); + Ops.push_back(CLI.Callee); + } + + if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1739 + } + + // Add argument registers to the end of the list so that they are known + // live into the call + for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(CLI.DAG.getRegister( + RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + if (InFlag.getNode()) { + Ops.push_back(InFlag); + } + + // Emit Tail Call + if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1762 + } + + CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL, + CLI.DL, + NodeTys, &Ops[0], Ops.size()); + InFlag = CLI.Chain.getValue(1); + + // Create the CALLSEQ_END node + CLI.Chain = CLI.DAG.getCALLSEQ_END( + CLI.Chain, + CLI.DAG.getIntPtrConstant(NumBytes, true), + CLI.DAG.getIntPtrConstant(0, true), + InFlag); + InFlag = CLI.Chain.getValue(1); + // Handle result values, copying them out of physregs into vregs that + // we return + return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG, + InVals); +} + +SDValue +AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSDIV64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSDIV32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16 + || OVT.getScalarType() == MVT::i8) { + DST = LowerSDIV24(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSREM64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSREM32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16) { + DST = LowerSREM16(Op, DAG); + } else if (OVT.getScalarType() == MVT::i8) { + DST = LowerSREM8(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const +{ + EVT VT = Op.getValueType(); + SDValue Nodes1; + SDValue second; + SDValue third; + SDValue fourth; + DebugLoc DL = Op.getDebugLoc(); + Nodes1 = DAG.getNode(AMDILISD::VBUILD, + DL, + VT, Op.getOperand(0)); +#if 0 + bool allEqual = true; + for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { + if (Op.getOperand(0) != Op.getOperand(x)) { + allEqual = false; + break; + } + } + if (allEqual) { + return Nodes1; + } +#endif + switch(Op.getNumOperands()) { + default: + case 1: + break; + case 4: + fourth = Op.getOperand(3); + if (fourth.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + fourth, + DAG.getConstant(7, MVT::i32)); + } + case 3: + third = Op.getOperand(2); + if (third.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + third, + DAG.getConstant(6, MVT::i32)); + } + case 2: + second = Op.getOperand(1); + if (second.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + second, + DAG.getConstant(5, MVT::i32)); + } + break; + }; + return Nodes1; +} + +SDValue +AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + Cond = getConversionNode(DAG, Cond, Op, true); + Cond = DAG.getNode(AMDILISD::CMOVLOG, + DL, + Op.getValueType(), Cond, LHS, RHS); + return Cond; +} +SDValue +AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + ISD::CondCode SetCCOpcode = cast(CC)->get(); + unsigned int AMDILCC = CondCCodeToCC( + SetCCOpcode, + LHS.getValueType().getSimpleVT().SimpleTy); + assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); + Cond = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + LHS.getValueType(), + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + Cond = getConversionNode(DAG, Cond, Op, true); + Cond = DAG.getNode( + ISD::AND, + DL, + Cond.getValueType(), + DAG.getConstant(1, Cond.getValueType()), + Cond); + return Cond; +} + +SDValue +AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Data = Op.getOperand(0); + VTSDNode *BaseType = cast(Op.getOperand(1)); + DebugLoc DL = Op.getDebugLoc(); + EVT DVT = Data.getValueType(); + EVT BVT = BaseType->getVT(); + unsigned baseBits = BVT.getScalarType().getSizeInBits(); + unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; + unsigned shiftBits = srcBits - baseBits; + if (srcBits < 32) { + // If the op is less than 32 bits, then it needs to extend to 32bits + // so it can properly keep the upper bits valid. + EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); + Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); + shiftBits = 32 - baseBits; + DVT = IVT; + } + SDValue Shift = DAG.getConstant(shiftBits, DVT); + // Shift left by 'Shift' bits. + Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); + // Signed shift Right by 'Shift' bits. + Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); + if (srcBits < 32) { + // Once the sign extension is done, the op needs to be converted to + // its original type. + Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); + } + return Data; +} +EVT +AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const +{ + int iSize = (size * numEle); + int vEle = (iSize >> ((size == 64) ? 6 : 5)); + if (!vEle) { + vEle = 1; + } + if (size == 64) { + if (vEle == 1) { + return EVT(MVT::i64); + } else { + return EVT(MVT::getVectorVT(MVT::i64, vEle)); + } + } else { + if (vEle == 1) { + return EVT(MVT::i32); + } else { + return EVT(MVT::getVectorVT(MVT::i32, vEle)); + } + } +} + +SDValue +AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned int SPReg = AMDGPU::SP; + DebugLoc DL = Op.getDebugLoc(); + SDValue SP = DAG.getCopyFromReg(Chain, + DL, + SPReg, MVT::i32); + SDValue NewSP = DAG.getNode(ISD::ADD, + DL, + MVT::i32, SP, Size); + Chain = DAG.getCopyToReg(SP.getValue(1), + DL, + SPReg, NewSP); + SDValue Ops[2] = {NewSP, Chain}; + Chain = DAG.getMergeValues(Ops, 2 ,DL); + return Chain; +} +SDValue +AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Jump = Op.getOperand(2); + SDValue Result; + Result = DAG.getNode( + AMDILISD::BRANCH_COND, + Op.getDebugLoc(), + Op.getValueType(), + Chain, Jump, Cond); + return Result; +} + +SDValue +AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue CC = Op.getOperand(1); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue JumpT = Op.getOperand(4); + SDValue CmpValue; + SDValue Result; + CmpValue = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + LHS.getValueType(), + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + Result = DAG.getNode( + AMDILISD::BRANCH_COND, + CmpValue.getDebugLoc(), + MVT::Other, Chain, + JumpT, CmpValue); + return Result; +} + +// LowerRET - Lower an ISD::RET node. +SDValue +AMDILTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + DebugLoc dl, SelectionDAG &DAG) +const +{ + //MachineFunction& MF = DAG.getMachineFunction(); + // CCValAssign - represent the assignment of the return value + // to a location + SmallVector RVLocs; + + // CCState - Info about the registers and stack slot + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { + if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { + MRI.addLiveOut(RVLocs[i].getLocReg()); + } + } + // FIXME: implement this when tail call is implemented + // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); + // both x86 and ppc implement this in ISelLowering + + // Regular return here + SDValue Flag; + SmallVector RetOps; + RetOps.push_back(Chain); + RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); + for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + SDValue ValToCopy = OutVals[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + // ISD::Ret => ret chain, (regnum1, val1), ... + // So i * 2 + 1 index only the regnums + Chain = DAG.getCopyToReg(Chain, + dl, + VA.getLocReg(), + ValToCopy, + Flag); + // guarantee that all emitted copies are stuck together + // avoiding something bad + Flag = Chain.getValue(1); + } + /*if (MF.getFunction()->hasStructRetAttr()) { + assert(0 && "Struct returns are not yet implemented!"); + // Both MIPS and X86 have this + }*/ + RetOps[0] = Chain; + if (Flag.getNode()) + RetOps.push_back(Flag); + + Flag = DAG.getNode(AMDILISD::RET_FLAG, + dl, + MVT::Other, &RetOps[0], RetOps.size()); + return Flag; +} + +unsigned int +AMDILTargetLowering::getFunctionAlignment(const Function *) const +{ + return 0; +} + +SDValue +AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + MVT INTTY; + MVT FLTTY; + if (!OVT.isVector()) { + INTTY = MVT::i32; + FLTTY = MVT::f32; + } else if (OVT.getVectorNumElements() == 2) { + INTTY = MVT::v2i32; + FLTTY = MVT::v2f32; + } else if (OVT.getVectorNumElements() == 4) { + INTTY = MVT::v4i32; + FLTTY = MVT::v4f32; + } + unsigned bitsize = OVT.getScalarType().getSizeInBits(); + // char|short jq = ia ^ ib; + SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); + + // jq = jq >> (bitsize - 2) + jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); + + // jq = jq | 0x1 + jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); + + // jq = (int)jq + jq = DAG.getSExtOrTrunc(jq, DL, INTTY); + + // int ia = (int)LHS; + SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); + + // int ib, (int)RHS; + SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); + + // float fa = (float)ia; + SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + + // float fb = (float)ib; + SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + + // float fq = native_divide(fa, fb); + SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); + + // fq = trunc(fq); + fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + + // float fqneg = -fq; + SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); + + // float fr = mad(fqneg, fb, fa); + SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); + + // int iq = (int)fq; + SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + + // fr = fabs(fr); + fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); + + // fb = fabs(fb); + fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); + + // int cv = fr >= fb; + SDValue cv; + if (INTTY == MVT::i32) { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } else { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } + // jq = (cv ? jq : 0); + jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, + DAG.getConstant(0, OVT)); + // dst = iq + jq; + iq = DAG.getSExtOrTrunc(iq, DL, OVT); + iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); + return iq; +} + +SDValue +AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSDIV32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r0, r0, r1 + // ixor r10, r10, r11 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getSelectCC(DL, + r0, DAG.getConstant(0, OVT), + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + ISD::SETLT); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getSelectCC(DL, + r1, DAG.getConstant(0, OVT), + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + ISD::SETLT); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r0, r0, r1 + r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); + + // ixor r10, r10, r11 + r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} + +SDValue +AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i8) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i8) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i16) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i16) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSREM32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r20, r0, r1 + // umul r20, r20, r1 + // sub r0, r0, r20 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r0, DAG.getConstant(0, OVT)); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r1, DAG.getConstant(0, OVT)); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r20, r0, r1 + SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); + + // umul r20, r20, r1 + r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); + + // sub r0, r0, r20 + r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} diff --git a/lib/Target/AMDGPU/AMDILISelLowering.h b/lib/Target/AMDGPU/AMDILISelLowering.h new file mode 100644 index 0000000..817aaf5 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILISelLowering.h @@ -0,0 +1,203 @@ +//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file defines the interfaces that AMDIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDIL_ISELLOWERING_H_ +#define AMDIL_ISELLOWERING_H_ +#include "AMDIL.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm +{ + namespace AMDILISD + { + enum + { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + CMOVLOG, // 32bit FP Conditional move logical instruction + MAD, // 32bit Fused Multiply Add instruction + VBUILD, // scalar to vector mov instruction + CALL, // Function call based on a single integer + SELECT_CC, // Select the correct conditional instruction + UMUL, // 32bit unsigned multiplication + DIV_INF, // Divide with infinity returned on zero divisor + CMP, + IL_CC_I_GT, + IL_CC_I_LT, + IL_CC_I_GE, + IL_CC_I_LE, + IL_CC_I_EQ, + IL_CC_I_NE, + RET_FLAG, + BRANCH_COND, + LAST_ISD_NUMBER + }; + } // AMDILISD + + class MachineBasicBlock; + class MachineInstr; + class DebugLoc; + class TargetInstrInfo; + + class AMDILTargetLowering : public TargetLowering + { + public: + AMDILTargetLowering(TargetMachine &TM); + + virtual SDValue + LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + /// computeMaskedBitsForTargetNode - Determine which of + /// the bits specified + /// in Mask are known to be either zero or one and return them in + /// the + /// KnownZero/KnownOne bitsets. + virtual void + computeMaskedBitsForTargetNode( + const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0 + ) const; + + virtual bool + getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const; + virtual const char* + getTargetNodeName( + unsigned Opcode + ) const; + // We want to mark f32/f64 floating point values as + // legal + bool + isFPImmLegal(const APFloat &Imm, EVT VT) const; + // We don't want to shrink f64/f32 constants because + // they both take up the same amount of space and + // we don't want to use a f2d instruction. + bool ShouldShrinkFPConstant(EVT VT) const; + + /// getFunctionAlignment - Return the Log2 alignment of this + /// function. + virtual unsigned int + getFunctionAlignment(const Function *F) const; + + private: + CCAssignFn* + CCAssignFnForNode(unsigned int CC) const; + + SDValue LowerCallResult(SDValue Chain, + SDValue InFlag, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + SDValue LowerMemArgument(SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl &ArgInfo, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, MachineFrameInfo *MFI, + unsigned i) const; + + SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, + SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const; + + virtual SDValue + LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + SDValue + LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSREM(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM8(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM16(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSREM64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + + EVT + genIntType(uint32_t size = 32, uint32_t numEle = 1) const; + + SDValue + LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + + SDValue + LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue + LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + + }; // AMDILTargetLowering +} // end namespace llvm + +#endif // AMDIL_ISELLOWERING_H_ diff --git a/lib/Target/AMDGPU/AMDILInstrInfo.cpp b/lib/Target/AMDGPU/AMDILInstrInfo.cpp new file mode 100644 index 0000000..723d5a1 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILInstrInfo.cpp @@ -0,0 +1,509 @@ +//===- AMDILInstrInfo.cpp - AMDIL Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AMDILInstrInfo.h" +#include "AMDIL.h" +#include "AMDILISelLowering.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Instructions.h" + +#define GET_INSTRINFO_CTOR +#include "AMDGPUGenInstrInfo.inc" + +using namespace llvm; + +AMDILInstrInfo::AMDILInstrInfo(TargetMachine &tm) + : AMDILGenInstrInfo(), + RI(tm, *this), + TM(tm) { +} + +const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const { + return RI; +} + +bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { +// TODO: Implement this function + return false; +} + +unsigned AMDILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} + +unsigned AMDILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} + +bool AMDILInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { +// TODO: Implement this function + return false; +} +unsigned AMDILInstrInfo::isStoreFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} +unsigned AMDILInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { +// TODO: Implement this function + return 0; +} +bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { +// TODO: Implement this function + return false; +} + +MachineInstr * +AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const { +// TODO: Implement this function + return NULL; +} +bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, + MachineBasicBlock &MBB) const { + while (iter != MBB.end()) { + switch (iter->getOpcode()) { + default: + break; + ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); + case AMDGPU::BRANCH: + return true; + }; + ++iter; + } + return false; +} + +bool AMDILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + bool retVal = true; + return retVal; + MachineBasicBlock::iterator iter = MBB.begin(); + if (!getNextBranchInstr(iter, MBB)) { + retVal = false; + } else { + MachineInstr *firstBranch = iter; + if (!getNextBranchInstr(++iter, MBB)) { + if (firstBranch->getOpcode() == AMDGPU::BRANCH) { + TBB = firstBranch->getOperand(0).getMBB(); + firstBranch->eraseFromParent(); + retVal = false; + } else { + TBB = firstBranch->getOperand(0).getMBB(); + FBB = *(++MBB.succ_begin()); + if (FBB == TBB) { + FBB = *(MBB.succ_begin()); + } + Cond.push_back(firstBranch->getOperand(1)); + retVal = false; + } + } else { + MachineInstr *secondBranch = iter; + if (!getNextBranchInstr(++iter, MBB)) { + if (secondBranch->getOpcode() == AMDGPU::BRANCH) { + TBB = firstBranch->getOperand(0).getMBB(); + Cond.push_back(firstBranch->getOperand(1)); + FBB = secondBranch->getOperand(0).getMBB(); + secondBranch->eraseFromParent(); + retVal = false; + } else { + assert(0 && "Should not have two consecutive conditional branches"); + } + } else { + MBB.getParent()->viewCFG(); + assert(0 && "Should not have three branch instructions in" + " a single basic block"); + retVal = false; + } + } + } + return retVal; +} + +unsigned int AMDILInstrInfo::getBranchInstr(const MachineOperand &op) const { + const MachineInstr *MI = op.getParent(); + + switch (MI->getDesc().OpInfo->RegClass) { + default: // FIXME: fallthrough?? + case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; + case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; + }; +} + +unsigned int +AMDILInstrInfo::InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const +{ + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + for (unsigned int x = 0; x < Cond.size(); ++x) { + Cond[x].getParent()->dump(); + } + if (FBB == 0) { + if (Cond.empty()) { + BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(getBranchInstr(Cond[0]))) + .addMBB(TBB).addReg(Cond[0].getReg()); + } + return 1; + } else { + BuildMI(&MBB, DL, get(getBranchInstr(Cond[0]))) + .addMBB(TBB).addReg(Cond[0].getReg()); + BuildMI(&MBB, DL, get(AMDGPU::BRANCH)).addMBB(FBB); + } + assert(0 && "Inserting two branches not supported"); + return 0; +} + +unsigned int AMDILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) { + return 0; + } + --I; + switch (I->getOpcode()) { + default: + return 0; + ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); + case AMDGPU::BRANCH: + I->eraseFromParent(); + break; + } + I = MBB.end(); + + if (I == MBB.begin()) { + return 1; + } + --I; + switch (I->getOpcode()) { + // FIXME: only one case?? + default: + return 1; + ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); + I->eraseFromParent(); + break; + } + return 2; +} + +MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator tmp = MBB->end(); + if (!MBB->size()) { + return MBB->end(); + } + while (--tmp) { + if (tmp->getOpcode() == AMDGPU::ENDLOOP + || tmp->getOpcode() == AMDGPU::ENDIF + || tmp->getOpcode() == AMDGPU::ELSE) { + if (tmp == MBB->begin()) { + return tmp; + } else { + continue; + } + } else { + return ++tmp; + } + } + return MBB->end(); +} + +void +AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + // MachineInstr *curMI = MI; + MachineFunction &MF = *(MBB.getParent()); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + DebugLoc DL; + switch (RC->getID()) { + case AMDGPU::GPRF32RegClassID: + Opc = AMDGPU::PRIVATESTORE_f32; + break; + case AMDGPU::GPRI32RegClassID: + Opc = AMDGPU::PRIVATESTORE_i32; + break; + } + if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineMemOperand *MMO = + new MachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FrameIndex) + .addMemOperand(MMO) + .addImm(0); +} + +void +AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + MachineFunction &MF = *(MBB.getParent()); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + DebugLoc DL; + switch (RC->getID()) { + case AMDGPU::GPRF32RegClassID: + Opc = AMDGPU::PRIVATELOAD_f32; + break; + case AMDGPU::GPRI32RegClassID: + Opc = AMDGPU::PRIVATELOAD_i32; + break; + } + + MachineMemOperand *MMO = + new MachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOLoad, + MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(DestReg, RegState::Define) + .addFrameIndex(FrameIndex) + .addMemOperand(MMO) + .addImm(0); +} +MachineInstr * +AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { +// TODO: Implement this function + return 0; +} +MachineInstr* +AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + MachineInstr *LoadMI) const { + // TODO: Implement this function + return 0; +} +bool +AMDILInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const +{ + // TODO: Implement this function + return false; +} +bool +AMDILInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, + bool UnfoldStore, + SmallVectorImpl &NewMIs) const { + // TODO: Implement this function + return false; +} + +bool +AMDILInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const { + // TODO: Implement this function + return false; +} + +unsigned +AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex) const { + // TODO: Implement this function + return 0; +} + +bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + assert(Offset2 > Offset1 + && "Second offset should be larger than first offset!"); + // If we have less than 16 loads in a row, and the offsets are within 16, + // then schedule together. + // TODO: Make the loads schedule near if it fits in a cacheline + return (NumLoads < 16 && (Offset2 - Offset1) < 16); +} + +bool +AMDILInstrInfo::ReverseBranchCondition(SmallVectorImpl &Cond) + const { + // TODO: Implement this function + return true; +} +void AMDILInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + // TODO: Implement this function +} + +bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const { + // TODO: Implement this function + return false; +} +bool +AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) + const { + // TODO: Implement this function + return false; +} + +bool AMDILInstrInfo::DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const { + // TODO: Implement this function + return false; +} + +bool AMDILInstrInfo::isPredicable(MachineInstr *MI) const { + // TODO: Implement this function + return MI->getDesc().isPredicable(); +} + +bool +AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { + // TODO: Implement this function + return true; +} + +bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const { + if (strstr(getName(MI->getOpcode()), "LOADCONST")) { + return false; + } + return strstr(getName(MI->getOpcode()), "LOAD"); +} + +bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const +{ + return false; +} + +bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "EXTLOAD"); +} + +bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "SEXTLOAD"); +} + +bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "AEXTLOAD"); +} + +bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "ZEXTLOAD"); +} + +bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "STORE"); +} + +bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "TRUNCSTORE"); +} + +bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "ATOM"); +} + +bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const { + if (!MI->memoperands_empty()) { + for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(), + moe = MI->memoperands_end(); mob != moe; ++mob) { + // If there is a volatile mem operand, this is a volatile instruction. + if ((*mob)->isVolatile()) { + return true; + } + } + } + return false; +} +bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "GLOBAL"); +} +bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "PRIVATE"); +} +bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "CONSTANT") + || strstr(getName(MI->getOpcode()), "CPOOL"); +} +bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "REGION"); +} +bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "LOCAL"); +} +bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "IMAGE"); +} +bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "APPEND"); +} +bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_R"); +} +bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_L"); +} +bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_G") + || isArenaAtomic(MI); +} +bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_A"); +} diff --git a/lib/Target/AMDGPU/AMDILInstrInfo.h b/lib/Target/AMDGPU/AMDILInstrInfo.h new file mode 100644 index 0000000..bff729b --- /dev/null +++ b/lib/Target/AMDGPU/AMDILInstrInfo.h @@ -0,0 +1,161 @@ +//===- AMDILInstrInfo.h - AMDIL Instruction Information ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILINSTRUCTIONINFO_H_ +#define AMDILINSTRUCTIONINFO_H_ + +#include "AMDILRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "AMDGPUGenInstrInfo.inc" + +namespace llvm { + // AMDIL - This namespace holds all of the target specific flags that + // instruction info tracks. + // + //class AMDILTargetMachine; +class AMDILInstrInfo : public AMDILGenInstrInfo { +private: + const AMDILRegisterInfo RI; + TargetMachine &TM; + bool getNextBranchInstr(MachineBasicBlock::iterator &iter, + MachineBasicBlock &MBB) const; + unsigned int getBranchInstr(const MachineOperand &op) const; +public: + explicit AMDILInstrInfo(TargetMachine &tm); + + // getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + // such, whenever a client has an instance of instruction info, it should + // always be able to get register info as well (through this method). + const AMDILRegisterInfo &getRegisterInfo() const; + + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + bool hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + bool hasStoreFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const; + + MachineInstr * + convertToThreeAddress(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI, + LiveVariables *LV) const; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + unsigned + InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const = 0; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + +protected: + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr *MI, + const SmallVectorImpl &Ops, + MachineInstr *LoadMI) const; +public: + bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const; + bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const; + bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, + SmallVectorImpl &NewNodes) const; + unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = 0) const; + bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + + bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const; + bool isPredicated(const MachineInstr *MI) const; + bool SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const; + bool isPredicable(MachineInstr *MI) const; + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + + // Helper functions that check the opcode for status information + bool isLoadInst(llvm::MachineInstr *MI) const; + bool isExtLoadInst(llvm::MachineInstr *MI) const; + bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; + bool isSExtLoadInst(llvm::MachineInstr *MI) const; + bool isZExtLoadInst(llvm::MachineInstr *MI) const; + bool isAExtLoadInst(llvm::MachineInstr *MI) const; + bool isStoreInst(llvm::MachineInstr *MI) const; + bool isTruncStoreInst(llvm::MachineInstr *MI) const; + bool isAtomicInst(llvm::MachineInstr *MI) const; + bool isVolatileInst(llvm::MachineInstr *MI) const; + bool isGlobalInst(llvm::MachineInstr *MI) const; + bool isPrivateInst(llvm::MachineInstr *MI) const; + bool isConstantInst(llvm::MachineInstr *MI) const; + bool isRegionInst(llvm::MachineInstr *MI) const; + bool isLocalInst(llvm::MachineInstr *MI) const; + bool isImageInst(llvm::MachineInstr *MI) const; + bool isAppendInst(llvm::MachineInstr *MI) const; + bool isRegionAtomic(llvm::MachineInstr *MI) const; + bool isLocalAtomic(llvm::MachineInstr *MI) const; + bool isGlobalAtomic(llvm::MachineInstr *MI) const; + bool isArenaAtomic(llvm::MachineInstr *MI) const; + + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const = 0; + + virtual unsigned getIEQOpcode() const = 0; + + virtual bool isMov(unsigned Opcode) const = 0; +}; + +} + +#endif // AMDILINSTRINFO_H_ diff --git a/lib/Target/AMDGPU/AMDILInstrInfo.td b/lib/Target/AMDGPU/AMDILInstrInfo.td new file mode 100644 index 0000000..969618e --- /dev/null +++ b/lib/Target/AMDGPU/AMDILInstrInfo.td @@ -0,0 +1,108 @@ +//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file describes the AMDIL instructions in TableGen format. +// +//===----------------------------------------------------------------------===// +// AMDIL Instruction Predicate Definitions +// Predicate that is set to true if the hardware supports double precision +// divide +def HasHWDDiv : Predicate<"Subtarget.device()" + "->getGeneration() > AMDILDeviceInfo::HD4XXX && " + "Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">; + +// Predicate that is set to true if the hardware supports double, but not double +// precision divide in hardware +def HasSWDDiv : Predicate<"Subtarget.device()" + "->getGeneration() == AMDILDeviceInfo::HD4XXX &&" + "Subtarget.device()->usesHardware(AMDILDeviceInfo::DoubleOps)">; + +// Predicate that is set to true if the hardware support 24bit signed +// math ops. Otherwise a software expansion to 32bit math ops is used instead. +def HasHWSign24Bit : Predicate<"Subtarget.device()" + "->getGeneration() > AMDILDeviceInfo::HD5XXX">; + +// Predicate that is set to true if 64bit operations are supported or not +def HasHW64Bit : Predicate<"Subtarget.device()" + "->usesHardware(AMDILDeviceInfo::LongOps)">; +def HasSW64Bit : Predicate<"Subtarget.device()" + "->usesSoftware(AMDILDeviceInfo::LongOps)">; + +// Predicate that is set to true if the timer register is supported +def HasTmrRegister : Predicate<"Subtarget.device()" + "->isSupported(AMDILDeviceInfo::TmrReg)">; +// Predicate that is true if we are at least evergreen series +def HasDeviceIDInst : Predicate<"Subtarget.device()" + "->getGeneration() >= AMDILDeviceInfo::HD5XXX">; + +// Predicate that is true if we have region address space. +def hasRegionAS : Predicate<"Subtarget.device()" + "->usesHardware(AMDILDeviceInfo::RegionMem)">; + +// Predicate that is false if we don't have region address space. +def noRegionAS : Predicate<"!Subtarget.device()" + "->isSupported(AMDILDeviceInfo::RegionMem)">; + + +// Predicate that is set to true if 64bit Mul is supported in the IL or not +def HasHW64Mul : Predicate<"Subtarget.calVersion()" + ">= CAL_VERSION_SC_139" + "&& Subtarget.device()" + "->getGeneration() >=" + "AMDILDeviceInfo::HD5XXX">; +def HasSW64Mul : Predicate<"Subtarget.calVersion()" + "< CAL_VERSION_SC_139">; +// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not +def HasHW64DivMod : Predicate<"Subtarget.device()" + "->usesHardware(AMDILDeviceInfo::HW64BitDivMod)">; +def HasSW64DivMod : Predicate<"Subtarget.device()" + "->usesSoftware(AMDILDeviceInfo::HW64BitDivMod)">; + +// Predicate that is set to true if 64bit pointer are used. +def Has64BitPtr : Predicate<"Subtarget.is64bit()">; +def Has32BitPtr : Predicate<"!Subtarget.is64bit()">; +//===--------------------------------------------------------------------===// +// Custom Operands +//===--------------------------------------------------------------------===// +include "AMDILOperands.td" + +//===--------------------------------------------------------------------===// +// Custom Selection DAG Type Profiles +//===--------------------------------------------------------------------===// +include "AMDILProfiles.td" + +//===--------------------------------------------------------------------===// +// Custom Selection DAG Nodes +//===--------------------------------------------------------------------===// +include "AMDILNodes.td" + +//===--------------------------------------------------------------------===// +// Custom Pattern DAG Nodes +//===--------------------------------------------------------------------===// +include "AMDILPatterns.td" + +//===----------------------------------------------------------------------===// +// Instruction format classes +//===----------------------------------------------------------------------===// +include "AMDILFormats.td" + +//===--------------------------------------------------------------------===// +// Multiclass Instruction formats +//===--------------------------------------------------------------------===// +include "AMDILMultiClass.td" + +//===--------------------------------------------------------------------===// +// Intrinsics support +//===--------------------------------------------------------------------===// +include "AMDILIntrinsics.td" + +//===--------------------------------------------------------------------===// +// Instructions support +//===--------------------------------------------------------------------===// +include "AMDILInstructions.td" diff --git a/lib/Target/AMDGPU/AMDILInstructions.td b/lib/Target/AMDGPU/AMDILInstructions.td new file mode 100644 index 0000000..ff0e2c1 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILInstructions.td @@ -0,0 +1,143 @@ +//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +let Predicates = [Has32BitPtr] in { + let isCodeGenOnly=1 in { + //===----------------------------------------------------------------------===// + // Store Memory Operations + //===----------------------------------------------------------------------===// + defm GLOBALTRUNCSTORE : GTRUNCSTORE<"!global trunc store">; + defm LOCALTRUNCSTORE : LTRUNCSTORE<"!local trunc store">; + defm LOCALSTORE : STORE<"!local store" , local_store>; + defm PRIVATETRUNCSTORE : PTRUNCSTORE<"!private trunc store">; + defm PRIVATESTORE : STORE<"!private store" , private_store>; + defm REGIONTRUNCSTORE : RTRUNCSTORE<"!region trunc store">; + defm REGIONSTORE : STORE<"!region hw store" , region_store>; + + + //===---------------------------------------------------------------------===// + // Load Memory Operations + //===---------------------------------------------------------------------===// + defm GLOBALZEXTLOAD : LOAD<"!global zext load" , global_zext_load>; + defm GLOBALSEXTLOAD : LOAD<"!global sext load" , global_sext_load>; + defm GLOBALAEXTLOAD : LOAD<"!global aext load" , global_aext_load>; + defm PRIVATELOAD : LOAD<"!private load" , private_load>; + defm PRIVATEZEXTLOAD : LOAD<"!private zext load" , private_zext_load>; + defm PRIVATESEXTLOAD : LOAD<"!private sext load" , private_sext_load>; + defm PRIVATEAEXTLOAD : LOAD<"!private aext load" , private_aext_load>; + defm CPOOLLOAD : LOAD<"!constant pool load" , cp_load>; + defm CPOOLZEXTLOAD : LOAD<"!constant pool zext load", cp_zext_load>; + defm CPOOLSEXTLOAD : LOAD<"!constant pool sext load", cp_sext_load>; + defm CPOOLAEXTLOAD : LOAD<"!constant aext pool load", cp_aext_load>; + defm CONSTANTLOAD : LOAD<"!constant load" , constant_load>; + defm CONSTANTZEXTLOAD : LOAD<"!constant zext load" , constant_zext_load>; + defm CONSTANTSEXTLOAD : LOAD<"!constant sext load" , constant_sext_load>; + defm CONSTANTAEXTLOAD : LOAD<"!constant aext load" , constant_aext_load>; + defm LOCALLOAD : LOAD<"!local load" , local_load>; + defm LOCALZEXTLOAD : LOAD<"!local zext load" , local_zext_load>; + defm LOCALSEXTLOAD : LOAD<"!local sext load" , local_sext_load>; + defm LOCALAEXTLOAD : LOAD<"!local aext load" , local_aext_load>; + defm REGIONLOAD : LOAD<"!region load" , region_load>; + defm REGIONZEXTLOAD : LOAD<"!region zext load" , region_zext_load>; + defm REGIONSEXTLOAD : LOAD<"!region sext load" , region_sext_load>; + defm REGIONAEXTLOAD : LOAD<"!region aext load" , region_aext_load>; + } +} + +//===---------------------------------------------------------------------===// +// Custom Inserter for Branches and returns, this eventually will be a +// seperate pass +//===---------------------------------------------------------------------===// +let isTerminator = 1 in { + def BRANCH : ILFormat; + defm BRANCH_COND : BranchConditional; +} +//===---------------------------------------------------------------------===// +// return instructions +//===---------------------------------------------------------------------===// +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in { + def RETURN : ILFormat; +} + +//===---------------------------------------------------------------------===// +// Handle a function call +//===---------------------------------------------------------------------===// +let isCall = 1, + Defs = [ + R1, R2, R3, R4, R5, R6, R7, R8, R9, R10 + ] + , + Uses = [ + R11, R12, R13, R14, R15, R16, R17, R18, R19, R20 + ] + in { + def CALL : UnaryOpNoRet; + } + + +//===---------------------------------------------------------------------===// +// Flow and Program control Instructions +//===---------------------------------------------------------------------===// +let isTerminator=1 in { + def SWITCH : ILFormat; + def CASE : ILFormat; + def BREAK : ILFormat; + def CONTINUE : ILFormat; + def DEFAULT : ILFormat; + def ELSE : ILFormat; + def ENDSWITCH : ILFormat; + def ENDMAIN : ILFormat; + def END : ILFormat; + def ENDFUNC : ILFormat; + def ENDIF : ILFormat; + def WHILELOOP : ILFormat; + def ENDLOOP : ILFormat; + def FUNC : ILFormat; + def RETDYN : ILFormat; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm IF_LOGICALNZ : BranchInstr; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm IF_LOGICALZ : BranchInstr; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm BREAK_LOGICALNZ : BranchInstr; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm BREAK_LOGICALZ : BranchInstr; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm CONTINUE_LOGICALNZ : BranchInstr; + // This opcode has custom swizzle pattern encoded in Swizzle Encoder + defm CONTINUE_LOGICALZ : BranchInstr; + defm IFC : BranchInstr2; + defm BREAKC : BranchInstr2; + defm CONTINUEC : BranchInstr2; +} +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { + def TRAP : ILFormat; +} + diff --git a/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp b/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp new file mode 100644 index 0000000..678e32e --- /dev/null +++ b/lib/Target/AMDGPU/AMDILIntrinsicInfo.cpp @@ -0,0 +1,171 @@ +//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL Implementation of the IntrinsicInfo class. +// +//===-----------------------------------------------------------------------===// + +#include "AMDILIntrinsicInfo.h" +#include "AMDIL.h" +#include "AMDILSubtarget.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Intrinsics.h" +#include "llvm/Module.h" + +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "AMDGPUGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + +AMDILIntrinsicInfo::AMDILIntrinsicInfo(TargetMachine *tm) + : TargetIntrinsicInfo(), mTM(tm) +{ +} + +std::string +AMDILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, + unsigned int numTys) const +{ + static const char* const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + //assert(!isOverloaded(IntrID) + //&& "AMDIL Intrinsics are not overloaded"); + if (IntrID < Intrinsic::num_intrinsics) { + return 0; + } + assert(IntrID < AMDGPUIntrinsic::num_AMDIL_intrinsics + && "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + + static bool +checkTruncation(const char *Name, unsigned int& Len) +{ + const char *ptr = Name + (Len - 1); + while(ptr != Name && *ptr != '_') { + --ptr; + } + // We don't want to truncate on atomic instructions + // but we do want to enter the check Truncation + // section so that we can translate the atomic + // instructions if we need to. + if (!strncmp(Name, "__atom", 6)) { + return true; + } + if (strstr(ptr, "i32") + || strstr(ptr, "u32") + || strstr(ptr, "i64") + || strstr(ptr, "u64") + || strstr(ptr, "f32") + || strstr(ptr, "f64") + || strstr(ptr, "i16") + || strstr(ptr, "u16") + || strstr(ptr, "i8") + || strstr(ptr, "u8")) { + Len = (unsigned int)(ptr - Name); + return true; + } + return false; +} + +// We don't want to support both the OpenCL 1.0 atomics +// and the 1.1 atomics with different names, so we translate +// the 1.0 atomics to the 1.1 naming here if needed. +static char* +atomTranslateIfNeeded(const char *Name, unsigned int Len) +{ + char *buffer = NULL; + if (strncmp(Name, "__atom_", 7)) { + // If we are not starting with __atom_, then + // go ahead and continue on with the allocation. + buffer = new char[Len + 1]; + memcpy(buffer, Name, Len); + } else { + buffer = new char[Len + 3]; + memcpy(buffer, "__atomic_", 9); + memcpy(buffer + 9, Name + 7, Len - 7); + Len += 2; + } + buffer[Len] = '\0'; + return buffer; +} + +unsigned int +AMDILIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const +{ +#define GET_FUNCTION_RECOGNIZER +#include "AMDGPUGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + AMDGPUIntrinsic::ID IntrinsicID + = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; + if (checkTruncation(Name, Len)) { + char *buffer = atomTranslateIfNeeded(Name, Len); + IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", buffer); + delete [] buffer; + } else { + IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name); + } + if (!isValidIntrinsic(IntrinsicID)) { + return 0; + } + if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { + return IntrinsicID; + } + return 0; +} + +bool +AMDILIntrinsicInfo::isOverloaded(unsigned id) const +{ + // Overload Table +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE +} + +/// This defines the "getAttributes(ID id)" method. +#define GET_INTRINSIC_ATTRIBUTES +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_ATTRIBUTES + +Function* +AMDILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + Type **Tys, + unsigned numTys) const +{ + assert(!"Not implemented"); +} + +/// Because the code generator has to support different SC versions, +/// this function is added to check that the intrinsic being used +/// is actually valid. In the case where it isn't valid, the +/// function call is not translated into an intrinsic and the +/// fall back software emulated path should pick up the result. +bool +AMDILIntrinsicInfo::isValidIntrinsic(unsigned int IntrID) const +{ + const AMDILSubtarget &STM = mTM->getSubtarget(); + switch (IntrID) { + default: + return true; + case AMDGPUIntrinsic::AMDIL_convert_f32_i32_rpi: + case AMDGPUIntrinsic::AMDIL_convert_f32_i32_flr: + case AMDGPUIntrinsic::AMDIL_convert_f32_f16_near: + case AMDGPUIntrinsic::AMDIL_convert_f32_f16_neg_inf: + case AMDGPUIntrinsic::AMDIL_convert_f32_f16_plus_inf: + return STM.calVersion() >= CAL_VERSION_SC_139; + }; +} diff --git a/lib/Target/AMDGPU/AMDILIntrinsicInfo.h b/lib/Target/AMDGPU/AMDILIntrinsicInfo.h new file mode 100644 index 0000000..072c265 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILIntrinsicInfo.h @@ -0,0 +1,49 @@ +//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the AMDIL Implementation of the Intrinsic Info class. +// +//===-----------------------------------------------------------------------===// +#ifndef _AMDIL_INTRINSICS_H_ +#define _AMDIL_INTRINSICS_H_ + +#include "llvm/Intrinsics.h" +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + class TargetMachine; + namespace AMDGPUIntrinsic { + enum ID { + last_non_AMDIL_intrinsic = Intrinsic::num_intrinsics - 1, +#define GET_INTRINSIC_ENUM_VALUES +#include "AMDGPUGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , num_AMDIL_intrinsics + }; + + } + + + class AMDILIntrinsicInfo : public TargetIntrinsicInfo { + TargetMachine *mTM; + public: + AMDILIntrinsicInfo(TargetMachine *tm); + std::string getName(unsigned int IntrId, Type **Tys = 0, + unsigned int numTys = 0) const; + unsigned int lookupName(const char *Name, unsigned int Len) const; + bool isOverloaded(unsigned int IID) const; + Function *getDeclaration(Module *M, unsigned int ID, + Type **Tys = 0, + unsigned int numTys = 0) const; + bool isValidIntrinsic(unsigned int) const; + }; // AMDILIntrinsicInfo +} + +#endif // _AMDIL_INTRINSICS_H_ + diff --git a/lib/Target/AMDGPU/AMDILIntrinsics.td b/lib/Target/AMDGPU/AMDILIntrinsics.td new file mode 100644 index 0000000..ef361f4 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILIntrinsics.td @@ -0,0 +1,705 @@ +//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file defines all of the amdil-specific intrinsics +// +//===---------------------------------------------------------------===// + +let TargetPrefix = "AMDIL", isTarget = 1 in { +//------------- Synchronization Functions - OpenCL 6.11.9 --------------------// + def int_AMDIL_fence : GCCBuiltin<"mem_fence">, + UnaryIntNoRetInt; + def int_AMDIL_fence_global : GCCBuiltin<"mem_fence_global">, + UnaryIntNoRetInt; + def int_AMDIL_fence_local : GCCBuiltin<"mem_fence_local">, + UnaryIntNoRetInt; + def int_AMDIL_fence_region : GCCBuiltin<"mem_fence_region">, + UnaryIntNoRetInt; + def int_AMDIL_fence_read_only : GCCBuiltin<"read_mem_fence">, + UnaryIntNoRetInt; + def int_AMDIL_fence_read_only_global : GCCBuiltin<"read_mem_fence_global">, + UnaryIntNoRetInt; + def int_AMDIL_fence_read_only_local : GCCBuiltin<"read_mem_fence_local">, + UnaryIntNoRetInt; + def int_AMDIL_fence_read_only_region : GCCBuiltin<"read_mem_fence_region">, + UnaryIntNoRetInt; + def int_AMDIL_fence_write_only : GCCBuiltin<"write_mem_fence">, + UnaryIntNoRetInt; + def int_AMDIL_fence_write_only_global : GCCBuiltin<"write_mem_fence_global">, + UnaryIntNoRetInt; + def int_AMDIL_fence_write_only_local : GCCBuiltin<"write_mem_fence_local">, + UnaryIntNoRetInt; + def int_AMDIL_fence_write_only_region : GCCBuiltin<"write_mem_fence_region">, + UnaryIntNoRetInt; + + def int_AMDIL_early_exit : GCCBuiltin<"__amdil_early_exit">, + UnaryIntNoRetInt; + + def int_AMDIL_cmov_logical : GCCBuiltin<"__amdil_cmov_logical">, + TernaryIntInt; + def int_AMDIL_fabs : GCCBuiltin<"__amdil_fabs">, UnaryIntFloat; + def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; + + def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, + TernaryIntInt; + def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, + TernaryIntInt; + def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, + UnaryIntInt; + def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, + UnaryIntInt; + def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">, + UnaryIntInt; + def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">, + UnaryIntInt; + def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">, + UnaryIntInt; + def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">, + TernaryIntInt; + def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">, + TernaryIntInt; + def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">, + QuaternaryIntInt; + def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">, + TernaryIntInt; + def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, + BinaryIntInt; + def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">, + TernaryIntInt; + def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">, + TernaryIntInt; + def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">, + TernaryIntFloat; + def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, + BinaryIntInt; + def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, + BinaryIntInt; + def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">, + BinaryIntInt; + def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">, + BinaryIntInt; + def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, + BinaryIntInt; + def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, + BinaryIntInt; + def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">, + TernaryIntInt; + def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">, + TernaryIntInt; + def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, + BinaryIntInt; + def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, + BinaryIntInt; + def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">, + BinaryIntInt; + def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">, + BinaryIntInt; + def int_AMDIL_min : GCCBuiltin<"__amdil_min">, + BinaryIntFloat; + def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">, + BinaryIntInt; + def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">, + BinaryIntInt; + def int_AMDIL_max : GCCBuiltin<"__amdil_max">, + BinaryIntFloat; + def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">, + TernaryIntInt; + def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">, + TernaryIntInt; + def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">, + TernaryIntInt; + def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">, + UnaryIntFloat; + def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">, + TernaryIntFloat; + def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">, + UnaryIntFloat; + def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">, + UnaryIntFloat; + def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">, + UnaryIntFloat; + def int_AMDIL_round_posinf : GCCBuiltin<"__amdil_round_posinf">, + UnaryIntFloat; + def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">, + UnaryIntFloat; + def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">, + UnaryIntFloat; + def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">, + UnaryIntFloat; + def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">, + UnaryIntFloat; + def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">, + UnaryIntFloat; + def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">, + UnaryIntFloat; + def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">, + UnaryIntFloat; + def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">, + UnaryIntFloat; + def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">, + UnaryIntFloat; + def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat; + def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat; + def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt; + def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">, + UnaryIntFloat; + def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">, + UnaryIntFloat; + def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">, + UnaryIntFloat; + def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">, + UnaryIntFloat; + def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">, + UnaryIntFloat; + def int_AMDIL_log : GCCBuiltin<"__amdil_log">, + UnaryIntFloat; + def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">, + UnaryIntFloat; + def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">, + UnaryIntFloat; + def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">, + UnaryIntFloat; + def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">, + TernaryIntFloat; + def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">, + UnaryIntFloat; + def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">, + UnaryIntFloat; + def int_AMDIL_length : GCCBuiltin<"__amdil_length">, + UnaryIntFloat; + def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">, + TernaryIntFloat; + def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, + llvm_v4i32_ty, llvm_i32_ty], []>; + + def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">, + Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>; + def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">, + Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>; + def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">, + ConvertIntITOF; + def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">, + ConvertIntFTOI; + def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">, + ConvertIntFTOI; + def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>; + def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">, + ConvertIntITOF; + def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">, + ConvertIntITOF; + def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">, + ConvertIntITOF; + def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">, + ConvertIntITOF; + def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">, + Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, + llvm_v2f32_ty, llvm_float_ty], []>; + def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">, + Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, + llvm_v2f32_ty], []>; + def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], []>; + def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">, + Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, + llvm_v4f32_ty], []>; +//===---------------------- Image functions begin ------------------------===// + def int_AMDIL_image1d_write : GCCBuiltin<"__amdil_image1d_write">, + Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image1d_read_norm : GCCBuiltin<"__amdil_image1d_read_norm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image1d_read_unnorm : GCCBuiltin<"__amdil_image1d_read_unnorm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image1d_info0 : GCCBuiltin<"__amdil_image1d_info0">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image1d_info1 : GCCBuiltin<"__amdil_image1d_info1">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image1d_array_write : GCCBuiltin<"__amdil_image1d_array_write">, + Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image1d_array_read_norm : GCCBuiltin<"__amdil_image1d_array_read_norm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image1d_array_read_unnorm : GCCBuiltin<"__amdil_image1d_array_read_unnorm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image1d_array_info0 : GCCBuiltin<"__amdil_image1d_array_info0">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image1d_array_info1 : GCCBuiltin<"__amdil_image1d_array_info1">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image2d_write : GCCBuiltin<"__amdil_image2d_write">, + Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image2d_read_norm : GCCBuiltin<"__amdil_image2d_read_norm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image2d_read_unnorm : GCCBuiltin<"__amdil_image2d_read_unnorm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image2d_info0 : GCCBuiltin<"__amdil_image2d_info0">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image2d_info1 : GCCBuiltin<"__amdil_image2d_info1">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image2d_array_write : GCCBuiltin<"__amdil_image2d_array_write">, + Intrinsic<[], [llvm_ptr_ty, llvm_v2i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image2d_array_read_norm : GCCBuiltin<"__amdil_image2d_array_read_norm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image2d_array_read_unnorm : GCCBuiltin<"__amdil_image2d_array_read_unnorm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image2d_array_info0 : GCCBuiltin<"__amdil_image2d_array_info0">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image2d_array_info1 : GCCBuiltin<"__amdil_image2d_array_info1">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image3d_write : GCCBuiltin<"__amdil_image3d_write">, + Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image3d_read_norm : GCCBuiltin<"__amdil_image3d_read_norm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image3d_read_unnorm : GCCBuiltin<"__amdil_image3d_read_unnorm">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_image3d_info0 : GCCBuiltin<"__amdil_image3d_info0">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + + def int_AMDIL_image3d_info1 : GCCBuiltin<"__amdil_image3d_info1">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], []>; + +//===---------------------- Image functions end --------------------------===// + + def int_AMDIL_append_alloc_i32 : GCCBuiltin<"__amdil_append_alloc">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_AMDIL_append_consume_i32 : GCCBuiltin<"__amdil_append_consume">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_AMDIL_append_alloc_i32_noret : GCCBuiltin<"__amdil_append_alloc_noret">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>; + def int_AMDIL_append_consume_i32_noret : GCCBuiltin<"__amdil_append_consume_noret">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadWriteArgMem]>; + + def int_AMDIL_get_global_id : GCCBuiltin<"__amdil_get_global_id_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_local_id : GCCBuiltin<"__amdil_get_local_id_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_group_id : GCCBuiltin<"__amdil_get_group_id_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_num_groups : GCCBuiltin<"__amdil_get_num_groups_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_local_size : GCCBuiltin<"__amdil_get_local_size_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_global_size : GCCBuiltin<"__amdil_get_global_size_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_global_offset : GCCBuiltin<"__amdil_get_global_offset_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; + def int_AMDIL_get_work_dim : GCCBuiltin<"get_work_dim">, + Intrinsic<[llvm_i32_ty], [], []>; + def int_AMDIL_get_printf_offset : GCCBuiltin<"__amdil_get_printf_offset">, + Intrinsic<[llvm_i32_ty], []>; + def int_AMDIL_get_printf_size : GCCBuiltin<"__amdil_get_printf_size">, + Intrinsic<[llvm_i32_ty], []>; + +/// Intrinsics for atomic instructions with no return value +/// Signed 32 bit integer atomics for global address space +def int_AMDIL_atomic_add_gi32_noret : GCCBuiltin<"__atomic_add_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_sub_gi32_noret : GCCBuiltin<"__atomic_sub_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_rsub_gi32_noret : GCCBuiltin<"__atomic_rsub_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_gi32_noret : GCCBuiltin<"__atomic_xchg_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_inc_gi32_noret : GCCBuiltin<"__atomic_inc_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_dec_gi32_noret : GCCBuiltin<"__atomic_dec_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_cmpxchg_gi32_noret : GCCBuiltin<"__atomic_cmpxchg_gi32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_min_gi32_noret : GCCBuiltin<"__atomic_min_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_max_gi32_noret : GCCBuiltin<"__atomic_max_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_and_gi32_noret : GCCBuiltin<"__atomic_and_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_or_gi32_noret : GCCBuiltin<"__atomic_or_gi32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xor_gi32_noret : GCCBuiltin<"__atomic_xor_gi32_noret">, + BinaryAtomicIntNoRet; + + + +/// Unsigned 32 bit integer atomics for global address space +def int_AMDIL_atomic_add_gu32_noret : GCCBuiltin<"__atomic_add_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_sub_gu32_noret : GCCBuiltin<"__atomic_sub_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_rsub_gu32_noret : GCCBuiltin<"__atomic_rsub_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_gu32_noret : GCCBuiltin<"__atomic_xchg_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_inc_gu32_noret : GCCBuiltin<"__atomic_inc_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_dec_gu32_noret : GCCBuiltin<"__atomic_dec_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_cmpxchg_gu32_noret : GCCBuiltin<"__atomic_cmpxchg_gu32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_min_gu32_noret : GCCBuiltin<"__atomic_min_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_max_gu32_noret : GCCBuiltin<"__atomic_max_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_and_gu32_noret : GCCBuiltin<"__atomic_and_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_or_gu32_noret : GCCBuiltin<"__atomic_or_gu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xor_gu32_noret : GCCBuiltin<"__atomic_xor_gu32_noret">, + BinaryAtomicIntNoRet; + + +/// Intrinsics for atomic instructions with a return value +/// Signed 32 bit integer atomics for global address space +def int_AMDIL_atomic_add_gi32 : GCCBuiltin<"__atomic_add_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_sub_gi32 : GCCBuiltin<"__atomic_sub_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_rsub_gi32 : GCCBuiltin<"__atomic_rsub_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_gi32 : GCCBuiltin<"__atomic_xchg_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_inc_gi32 : GCCBuiltin<"__atomic_inc_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_dec_gi32 : GCCBuiltin<"__atomic_dec_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_cmpxchg_gi32 : GCCBuiltin<"__atomic_cmpxchg_gi32">, + TernaryAtomicInt; +def int_AMDIL_atomic_min_gi32 : GCCBuiltin<"__atomic_min_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_max_gi32 : GCCBuiltin<"__atomic_max_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_and_gi32 : GCCBuiltin<"__atomic_and_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_or_gi32 : GCCBuiltin<"__atomic_or_gi32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xor_gi32 : GCCBuiltin<"__atomic_xor_gi32">, + BinaryAtomicInt; + +/// 32 bit float atomics required by OpenCL +def int_AMDIL_atomic_xchg_gf32 : GCCBuiltin<"__atomic_xchg_gf32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_gf32_noret : GCCBuiltin<"__atomic_xchg_gf32_noret">, + BinaryAtomicIntNoRet; + +/// Unsigned 32 bit integer atomics for global address space +def int_AMDIL_atomic_add_gu32 : GCCBuiltin<"__atomic_add_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_sub_gu32 : GCCBuiltin<"__atomic_sub_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_rsub_gu32 : GCCBuiltin<"__atomic_rsub_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_gu32 : GCCBuiltin<"__atomic_xchg_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_inc_gu32 : GCCBuiltin<"__atomic_inc_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_dec_gu32 : GCCBuiltin<"__atomic_dec_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_cmpxchg_gu32 : GCCBuiltin<"__atomic_cmpxchg_gu32">, + TernaryAtomicInt; +def int_AMDIL_atomic_min_gu32 : GCCBuiltin<"__atomic_min_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_max_gu32 : GCCBuiltin<"__atomic_max_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_and_gu32 : GCCBuiltin<"__atomic_and_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_or_gu32 : GCCBuiltin<"__atomic_or_gu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xor_gu32 : GCCBuiltin<"__atomic_xor_gu32">, + BinaryAtomicInt; + + +/// Intrinsics for atomic instructions with no return value +/// Signed 32 bit integer atomics for local address space +def int_AMDIL_atomic_add_li32_noret : GCCBuiltin<"__atomic_add_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_sub_li32_noret : GCCBuiltin<"__atomic_sub_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_rsub_li32_noret : GCCBuiltin<"__atomic_rsub_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_li32_noret : GCCBuiltin<"__atomic_xchg_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_inc_li32_noret : GCCBuiltin<"__atomic_inc_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_dec_li32_noret : GCCBuiltin<"__atomic_dec_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_cmpxchg_li32_noret : GCCBuiltin<"__atomic_cmpxchg_li32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_min_li32_noret : GCCBuiltin<"__atomic_min_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_max_li32_noret : GCCBuiltin<"__atomic_max_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_and_li32_noret : GCCBuiltin<"__atomic_and_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_or_li32_noret : GCCBuiltin<"__atomic_or_li32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_mskor_li32_noret : GCCBuiltin<"__atomic_mskor_li32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_xor_li32_noret : GCCBuiltin<"__atomic_xor_li32_noret">, + BinaryAtomicIntNoRet; + +/// Signed 32 bit integer atomics for region address space +def int_AMDIL_atomic_add_ri32_noret : GCCBuiltin<"__atomic_add_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_sub_ri32_noret : GCCBuiltin<"__atomic_sub_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_rsub_ri32_noret : GCCBuiltin<"__atomic_rsub_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_ri32_noret : GCCBuiltin<"__atomic_xchg_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_inc_ri32_noret : GCCBuiltin<"__atomic_inc_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_dec_ri32_noret : GCCBuiltin<"__atomic_dec_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_cmpxchg_ri32_noret : GCCBuiltin<"__atomic_cmpxchg_ri32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_min_ri32_noret : GCCBuiltin<"__atomic_min_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_max_ri32_noret : GCCBuiltin<"__atomic_max_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_and_ri32_noret : GCCBuiltin<"__atomic_and_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_or_ri32_noret : GCCBuiltin<"__atomic_or_ri32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_mskor_ri32_noret : GCCBuiltin<"__atomic_mskor_ri32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_xor_ri32_noret : GCCBuiltin<"__atomic_xor_ri32_noret">, + BinaryAtomicIntNoRet; + + + +/// Unsigned 32 bit integer atomics for local address space +def int_AMDIL_atomic_add_lu32_noret : GCCBuiltin<"__atomic_add_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_sub_lu32_noret : GCCBuiltin<"__atomic_sub_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_rsub_lu32_noret : GCCBuiltin<"__atomic_rsub_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_lu32_noret : GCCBuiltin<"__atomic_xchg_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_inc_lu32_noret : GCCBuiltin<"__atomic_inc_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_dec_lu32_noret : GCCBuiltin<"__atomic_dec_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_cmpxchg_lu32_noret : GCCBuiltin<"__atomic_cmpxchg_lu32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_min_lu32_noret : GCCBuiltin<"__atomic_min_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_max_lu32_noret : GCCBuiltin<"__atomic_max_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_and_lu32_noret : GCCBuiltin<"__atomic_and_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_or_lu32_noret : GCCBuiltin<"__atomic_or_lu32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_mskor_lu32_noret : GCCBuiltin<"__atomic_mskor_lu32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_xor_lu32_noret : GCCBuiltin<"__atomic_xor_lu32_noret">, + BinaryAtomicIntNoRet; + +/// Unsigned 32 bit integer atomics for region address space +def int_AMDIL_atomic_add_ru32_noret : GCCBuiltin<"__atomic_add_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_sub_ru32_noret : GCCBuiltin<"__atomic_sub_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_rsub_ru32_noret : GCCBuiltin<"__atomic_rsub_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_ru32_noret : GCCBuiltin<"__atomic_xchg_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_inc_ru32_noret : GCCBuiltin<"__atomic_inc_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_dec_ru32_noret : GCCBuiltin<"__atomic_dec_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_cmpxchg_ru32_noret : GCCBuiltin<"__atomic_cmpxchg_ru32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_min_ru32_noret : GCCBuiltin<"__atomic_min_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_max_ru32_noret : GCCBuiltin<"__atomic_max_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_and_ru32_noret : GCCBuiltin<"__atomic_and_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_or_ru32_noret : GCCBuiltin<"__atomic_or_ru32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_mskor_ru32_noret : GCCBuiltin<"__atomic_mskor_ru32_noret">, + TernaryAtomicIntNoRet; +def int_AMDIL_atomic_xor_ru32_noret : GCCBuiltin<"__atomic_xor_ru32_noret">, + BinaryAtomicIntNoRet; + +def int_AMDIL_get_cycle_count : GCCBuiltin<"__amdil_get_cycle_count">, + VoidIntLong; + +def int_AMDIL_compute_unit_id : GCCBuiltin<"__amdil_compute_unit_id">, + VoidIntInt; + +def int_AMDIL_wavefront_id : GCCBuiltin<"__amdil_wavefront_id">, + VoidIntInt; + + +/// Intrinsics for atomic instructions with a return value +/// Signed 32 bit integer atomics for local address space +def int_AMDIL_atomic_add_li32 : GCCBuiltin<"__atomic_add_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_sub_li32 : GCCBuiltin<"__atomic_sub_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_rsub_li32 : GCCBuiltin<"__atomic_rsub_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_li32 : GCCBuiltin<"__atomic_xchg_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_inc_li32 : GCCBuiltin<"__atomic_inc_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_dec_li32 : GCCBuiltin<"__atomic_dec_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_cmpxchg_li32 : GCCBuiltin<"__atomic_cmpxchg_li32">, + TernaryAtomicInt; +def int_AMDIL_atomic_min_li32 : GCCBuiltin<"__atomic_min_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_max_li32 : GCCBuiltin<"__atomic_max_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_and_li32 : GCCBuiltin<"__atomic_and_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_or_li32 : GCCBuiltin<"__atomic_or_li32">, + BinaryAtomicInt; +def int_AMDIL_atomic_mskor_li32 : GCCBuiltin<"__atomic_mskor_li32">, + TernaryAtomicInt; +def int_AMDIL_atomic_xor_li32 : GCCBuiltin<"__atomic_xor_li32">, + BinaryAtomicInt; + +/// Signed 32 bit integer atomics for region address space +def int_AMDIL_atomic_add_ri32 : GCCBuiltin<"__atomic_add_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_sub_ri32 : GCCBuiltin<"__atomic_sub_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_rsub_ri32 : GCCBuiltin<"__atomic_rsub_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_ri32 : GCCBuiltin<"__atomic_xchg_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_inc_ri32 : GCCBuiltin<"__atomic_inc_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_dec_ri32 : GCCBuiltin<"__atomic_dec_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_cmpxchg_ri32 : GCCBuiltin<"__atomic_cmpxchg_ri32">, + TernaryAtomicInt; +def int_AMDIL_atomic_min_ri32 : GCCBuiltin<"__atomic_min_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_max_ri32 : GCCBuiltin<"__atomic_max_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_and_ri32 : GCCBuiltin<"__atomic_and_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_or_ri32 : GCCBuiltin<"__atomic_or_ri32">, + BinaryAtomicInt; +def int_AMDIL_atomic_mskor_ri32 : GCCBuiltin<"__atomic_mskor_ri32">, + TernaryAtomicInt; +def int_AMDIL_atomic_xor_ri32 : GCCBuiltin<"__atomic_xor_ri32">, + BinaryAtomicInt; + +/// 32 bit float atomics required by OpenCL +def int_AMDIL_atomic_xchg_lf32 : GCCBuiltin<"__atomic_xchg_lf32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_lf32_noret : GCCBuiltin<"__atomic_xchg_lf32_noret">, + BinaryAtomicIntNoRet; +def int_AMDIL_atomic_xchg_rf32 : GCCBuiltin<"__atomic_xchg_rf32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_rf32_noret : GCCBuiltin<"__atomic_xchg_rf32_noret">, + BinaryAtomicIntNoRet; + +/// Unsigned 32 bit integer atomics for local address space +def int_AMDIL_atomic_add_lu32 : GCCBuiltin<"__atomic_add_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_sub_lu32 : GCCBuiltin<"__atomic_sub_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_rsub_lu32 : GCCBuiltin<"__atomic_rsub_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_lu32 : GCCBuiltin<"__atomic_xchg_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_inc_lu32 : GCCBuiltin<"__atomic_inc_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_dec_lu32 : GCCBuiltin<"__atomic_dec_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_cmpxchg_lu32 : GCCBuiltin<"__atomic_cmpxchg_lu32">, + TernaryAtomicInt; +def int_AMDIL_atomic_min_lu32 : GCCBuiltin<"__atomic_min_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_max_lu32 : GCCBuiltin<"__atomic_max_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_and_lu32 : GCCBuiltin<"__atomic_and_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_or_lu32 : GCCBuiltin<"__atomic_or_lu32">, + BinaryAtomicInt; +def int_AMDIL_atomic_mskor_lu32 : GCCBuiltin<"__atomic_mskor_lu32">, + TernaryAtomicInt; +def int_AMDIL_atomic_xor_lu32 : GCCBuiltin<"__atomic_xor_lu32">, + BinaryAtomicInt; + +/// Unsigned 32 bit integer atomics for region address space +def int_AMDIL_atomic_add_ru32 : GCCBuiltin<"__atomic_add_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_sub_ru32 : GCCBuiltin<"__atomic_sub_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_rsub_ru32 : GCCBuiltin<"__atomic_rsub_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_xchg_ru32 : GCCBuiltin<"__atomic_xchg_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_inc_ru32 : GCCBuiltin<"__atomic_inc_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_dec_ru32 : GCCBuiltin<"__atomic_dec_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_cmpxchg_ru32 : GCCBuiltin<"__atomic_cmpxchg_ru32">, + TernaryAtomicInt; +def int_AMDIL_atomic_min_ru32 : GCCBuiltin<"__atomic_min_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_max_ru32 : GCCBuiltin<"__atomic_max_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_and_ru32 : GCCBuiltin<"__atomic_and_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_or_ru32 : GCCBuiltin<"__atomic_or_ru32">, + BinaryAtomicInt; +def int_AMDIL_atomic_mskor_ru32 : GCCBuiltin<"__atomic_mskor_ru32">, + TernaryAtomicInt; +def int_AMDIL_atomic_xor_ru32 : GCCBuiltin<"__atomic_xor_ru32">, + BinaryAtomicInt; + +/// Semaphore signal/wait/init +def int_AMDIL_semaphore_init : GCCBuiltin<"__amdil_semaphore_init">, + Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>; +def int_AMDIL_semaphore_wait : GCCBuiltin<"__amdil_semaphore_wait">, + Intrinsic<[], [llvm_ptr_ty]>; +def int_AMDIL_semaphore_signal : GCCBuiltin<"__amdil_semaphore_signal">, + Intrinsic<[], [llvm_ptr_ty]>; +def int_AMDIL_semaphore_size : GCCBuiltin<"__amdil_max_semaphore_size">, + Intrinsic<[llvm_i32_ty], []>; +} diff --git a/lib/Target/AMDGPU/AMDILMultiClass.td b/lib/Target/AMDGPU/AMDILMultiClass.td new file mode 100644 index 0000000..12e92f5 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILMultiClass.td @@ -0,0 +1,95 @@ +//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// Multiclass that handles branch instructions +multiclass BranchConditional { + def _i32 : ILFormat; + def _f32 : ILFormat; +} + +// Multiclass that handles memory store operations +multiclass GTRUNCSTORE { + def _i32i8 : OneInOneOut; + def _i32i16 : OneInOneOut; +} + +// Multiclass that handles memory store operations +multiclass LTRUNCSTORE { + def _i32i8 : OneInOneOut; + def _i32i16 : OneInOneOut; +} + +// Multiclass that handles memory store operations +multiclass PTRUNCSTORE { + def _i32i8 : OneInOneOut; + def _i32i16 : OneInOneOut; +} + +// Multiclass that handles memory store operations +multiclass RTRUNCSTORE { + def _i32i8 : OneInOneOut; + def _i32i16 : OneInOneOut; +} + + +// Multiclass that handles memory store operations +multiclass STORE { + def _i32 : OneInOneOut; + def _f32 : OneInOneOut; +} + +// Multiclass that handles load operations +multiclass LOAD { + def _i32 : OneInOneOut; + def _f32 : OneInOneOut; +} + +// Only scalar types should generate flow control +multiclass BranchInstr { + def _i32 : UnaryOpNoRet; + def _f32 : UnaryOpNoRet; +} +// Only scalar types should generate flow control +multiclass BranchInstr2 { + def _i32 : BinaryOpNoRet; + def _f32 : BinaryOpNoRet; +} diff --git a/lib/Target/AMDGPU/AMDILNIDevice.cpp b/lib/Target/AMDGPU/AMDILNIDevice.cpp new file mode 100644 index 0000000..d4112cd --- /dev/null +++ b/lib/Target/AMDGPU/AMDILNIDevice.cpp @@ -0,0 +1,71 @@ +//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILNIDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +using namespace llvm; + +AMDILNIDevice::AMDILNIDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) +{ + std::string name = ST->getDeviceName(); + if (name == "caicos") { + mDeviceFlag = OCL_DEVICE_CAICOS; + } else if (name == "turks") { + mDeviceFlag = OCL_DEVICE_TURKS; + } else if (name == "cayman") { + mDeviceFlag = OCL_DEVICE_CAYMAN; + } else { + mDeviceFlag = OCL_DEVICE_BARTS; + } +} +AMDILNIDevice::~AMDILNIDevice() +{ +} + +size_t +AMDILNIDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDILNIDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD6XXX; +} + + +AMDILCaymanDevice::AMDILCaymanDevice(AMDILSubtarget *ST) + : AMDILNIDevice(ST) +{ + setCaps(); +} + +AMDILCaymanDevice::~AMDILCaymanDevice() +{ +} + +void +AMDILCaymanDevice::setCaps() +{ + if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) { + mHWBits.set(AMDILDeviceInfo::DoubleOps); + mHWBits.set(AMDILDeviceInfo::FMA); + } + mHWBits.set(AMDILDeviceInfo::Signed24BitOps); + mSWBits.reset(AMDILDeviceInfo::Signed24BitOps); + mSWBits.set(AMDILDeviceInfo::ArenaSegment); +} + diff --git a/lib/Target/AMDGPU/AMDILNIDevice.h b/lib/Target/AMDGPU/AMDILNIDevice.h new file mode 100644 index 0000000..556670a --- /dev/null +++ b/lib/Target/AMDGPU/AMDILNIDevice.h @@ -0,0 +1,59 @@ +//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===---------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===---------------------------------------------------------------------===// +#ifndef _AMDILNIDEVICE_H_ +#define _AMDILNIDEVICE_H_ +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===---------------------------------------------------------------------===// +// NI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +// The AMDILNIDevice is the base class for all Northern Island series of +// cards. It is very similiar to the AMDILEvergreenDevice, with the major +// exception being differences in wavefront size and hardware capabilities. The +// NI devices are all 64 wide wavefronts and also add support for signed 24 bit +// integer operations + + class AMDILNIDevice : public AMDILEvergreenDevice { + public: + AMDILNIDevice(AMDILSubtarget*); + virtual ~AMDILNIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; + protected: + }; // AMDILNIDevice + +// Just as the AMDILCypressDevice is the double capable version of the +// AMDILEvergreenDevice, the AMDILCaymanDevice is the double capable version of +// the AMDILNIDevice. The other major difference that is not as useful from +// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the +// NI family is a 5 wide. + + class AMDILCaymanDevice: public AMDILNIDevice { + public: + AMDILCaymanDevice(AMDILSubtarget*); + virtual ~AMDILCaymanDevice(); + private: + virtual void setCaps(); + }; // AMDILCaymanDevice + + static const unsigned int MAX_LDS_SIZE_900 = AMDILDevice::MAX_LDS_SIZE_800; +} // namespace llvm +#endif // _AMDILNIDEVICE_H_ diff --git a/lib/Target/AMDGPU/AMDILNodes.td b/lib/Target/AMDGPU/AMDILNodes.td new file mode 100644 index 0000000..699fdad --- /dev/null +++ b/lib/Target/AMDGPU/AMDILNodes.td @@ -0,0 +1,47 @@ +//===- AMDILNodes.td - AMD IL nodes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Flow Control DAG Nodes +//===----------------------------------------------------------------------===// +def IL_brcond : SDNode<"AMDILISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// Comparison DAG Nodes +//===----------------------------------------------------------------------===// +def IL_cmp : SDNode<"AMDILISD::CMP", SDTIL_Cmp>; + +//===----------------------------------------------------------------------===// +// Call/Return DAG Nodes +//===----------------------------------------------------------------------===// +def IL_call : SDNode<"AMDILISD::CALL", SDTIL_Call, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def IL_retflag : SDNode<"AMDILISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +//===--------------------------------------------------------------------===// +// Instructions +//===--------------------------------------------------------------------===// +// Floating point math functions +def IL_cmov_logical : SDNode<"AMDILISD::CMOVLOG", SDTIL_GenTernaryOp>; +def IL_div_inf : SDNode<"AMDILISD::DIV_INF", SDTIL_GenBinaryOp>; +def IL_mad : SDNode<"AMDILISD::MAD", SDTIL_GenTernaryOp>; + +//===----------------------------------------------------------------------===// +// Integer functions +//===----------------------------------------------------------------------===// +def IL_umul : SDNode<"AMDILISD::UMUL" , SDTIntBinOp, + [SDNPCommutative, SDNPAssociative]>; + +//===----------------------------------------------------------------------===// +// Vector functions +//===----------------------------------------------------------------------===// +def IL_vbuild : SDNode<"AMDILISD::VBUILD", SDTIL_GenVecBuild, + []>; diff --git a/lib/Target/AMDGPU/AMDILOperands.td b/lib/Target/AMDGPU/AMDILOperands.td new file mode 100644 index 0000000..1014f95 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILOperands.td @@ -0,0 +1,32 @@ +//===- AMDILOperands.td - AMD IL Operands ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Custom memory operand +//===----------------------------------------------------------------------===// + +def MEMI32 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops GPRI32, GPRI32); +} + +// Call target types +def calltarget : Operand; +def brtarget : Operand; + +// def v2i8imm : Operand; +// def v4i8imm : Operand; +// def v2i16imm : Operand; +// def v4i16imm : Operand; +// def v2i32imm : Operand; +// def v4i32imm : Operand; +// def v2i64imm : Operand; +// def v2f32imm : Operand; +// def v4f32imm : Operand; +// def v2f64imm : Operand; + diff --git a/lib/Target/AMDGPU/AMDILPatterns.td b/lib/Target/AMDGPU/AMDILPatterns.td new file mode 100644 index 0000000..aa59bcb --- /dev/null +++ b/lib/Target/AMDGPU/AMDILPatterns.td @@ -0,0 +1,504 @@ +//===- AMDILPatterns.td - AMDIL Target Patterns------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Store pattern fragments +//===----------------------------------------------------------------------===// +def truncstorei64 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; +def truncstorev2i8 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def truncstorev2i16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i16; +}]>; +def truncstorev2i32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i32; +}]>; +def truncstorev2i64 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i64; +}]>; +def truncstorev2f32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2f32; +}]>; +def truncstorev2f64 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2f64; +}]>; +def truncstorev4i8 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; +}]>; +def truncstorev4i16 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i16; +}]>; +def truncstorev4i32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i32; +}]>; +def truncstorev4f32 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4f32; +}]>; + +def global_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def private_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def local_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def region_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def global_i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei32 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei64 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref32 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref64 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i8 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i16 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i32 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i64 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f32 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f64 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i8 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i16 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i32 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def global_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4f32 node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; +def private_trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei32 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei64 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref32 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref64 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i8 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i16 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i32 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i64 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f32 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f64 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i8 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i16 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i32 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; +def private_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4f32 node:$val, node:$ptr), [{ + return isPrivateStore(dyn_cast(N)); +}]>; + +def local_trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei32 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei64 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref32 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref64 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i8 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i16 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i32 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i64 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f32 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f64 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i8 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i16 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i32 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; +def local_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4f32 node:$val, node:$ptr), [{ + return isLocalStore(dyn_cast(N)); +}]>; + +def region_trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei8 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei16 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei32 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorei64 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref32 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstoref64 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v2i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i8 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v2i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i16 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v2i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i32 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v2i64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2i64 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v2f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f32 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v2f64trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev2f64 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v4i8trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i8 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v4i16trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i16 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v4i32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4i32 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; +def region_v4f32trunc_store : PatFrag<(ops node:$val, node:$ptr), + (truncstorev4f32 node:$val, node:$ptr), [{ + return isRegionStore(dyn_cast(N)); +}]>; + +//===----------------------------------------------------------------------===// +// Load pattern fragments +//===----------------------------------------------------------------------===// +// Global address space loads +def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; +def global_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; +def global_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; +def global_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; +// Private address space loads +def private_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isPrivateLoad(dyn_cast(N)); +}]>; +def private_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return isPrivateLoad(dyn_cast(N)); +}]>; +def private_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return isPrivateLoad(dyn_cast(N)); +}]>; +def private_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return isPrivateLoad(dyn_cast(N)); +}]>; +// Local address space loads +def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; +def local_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; +def local_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; +def local_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return isLocalLoad(dyn_cast(N)); +}]>; +// Region address space loads +def region_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isRegionLoad(dyn_cast(N)); +}]>; +def region_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return isRegionLoad(dyn_cast(N)); +}]>; +def region_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return isRegionLoad(dyn_cast(N)); +}]>; +def region_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return isRegionLoad(dyn_cast(N)); +}]>; +// Constant address space loads +def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; +def constant_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; +def constant_aext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; +def constant_zext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; +// Constant pool loads +def cp_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isCPLoad(dyn_cast(N)); +}]>; +def cp_sext_load : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return isCPLoad(dyn_cast(N)); +}]>; +def cp_zext_load : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return isCPLoad(dyn_cast(N)); +}]>; +def cp_aext_load : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return isCPLoad(dyn_cast(N)); +}]>; + +//===----------------------------------------------------------------------===// +// Complex addressing mode patterns +//===----------------------------------------------------------------------===// +def ADDR : ComplexPattern; +def ADDRF : ComplexPattern; +def ADDR64 : ComplexPattern; +def ADDR64F : ComplexPattern; + + +//===----------------------------------------------------------------------===// +// Conditional Instruction Pattern Leafs +//===----------------------------------------------------------------------===// +class IL_CC_Op : PatLeaf<(i32 N)>; +def IL_CC_D_EQ : IL_CC_Op<0>; +def IL_CC_D_GE : IL_CC_Op<1>; +def IL_CC_D_LT : IL_CC_Op<2>; +def IL_CC_D_NE : IL_CC_Op<3>; +def IL_CC_F_EQ : IL_CC_Op<4>; +def IL_CC_F_GE : IL_CC_Op<5>; +def IL_CC_F_LT : IL_CC_Op<6>; +def IL_CC_F_NE : IL_CC_Op<7>; +def IL_CC_I_EQ : IL_CC_Op<8>; +def IL_CC_I_GE : IL_CC_Op<9>; +def IL_CC_I_LT : IL_CC_Op<10>; +def IL_CC_I_NE : IL_CC_Op<11>; +def IL_CC_U_GE : IL_CC_Op<12>; +def IL_CC_U_LT : IL_CC_Op<13>; +// Pseudo IL comparison instructions that aren't natively supported +def IL_CC_F_GT : IL_CC_Op<14>; +def IL_CC_U_GT : IL_CC_Op<15>; +def IL_CC_I_GT : IL_CC_Op<16>; +def IL_CC_D_GT : IL_CC_Op<17>; +def IL_CC_F_LE : IL_CC_Op<18>; +def IL_CC_U_LE : IL_CC_Op<19>; +def IL_CC_I_LE : IL_CC_Op<20>; +def IL_CC_D_LE : IL_CC_Op<21>; +def IL_CC_F_UNE : IL_CC_Op<22>; +def IL_CC_F_UEQ : IL_CC_Op<23>; +def IL_CC_F_ULT : IL_CC_Op<24>; +def IL_CC_F_UGT : IL_CC_Op<25>; +def IL_CC_F_ULE : IL_CC_Op<26>; +def IL_CC_F_UGE : IL_CC_Op<27>; +def IL_CC_F_ONE : IL_CC_Op<28>; +def IL_CC_F_OEQ : IL_CC_Op<29>; +def IL_CC_F_OLT : IL_CC_Op<30>; +def IL_CC_F_OGT : IL_CC_Op<31>; +def IL_CC_F_OLE : IL_CC_Op<32>; +def IL_CC_F_OGE : IL_CC_Op<33>; +def IL_CC_D_UNE : IL_CC_Op<34>; +def IL_CC_D_UEQ : IL_CC_Op<35>; +def IL_CC_D_ULT : IL_CC_Op<36>; +def IL_CC_D_UGT : IL_CC_Op<37>; +def IL_CC_D_ULE : IL_CC_Op<38>; +def IL_CC_D_UGE : IL_CC_Op<39>; +def IL_CC_D_ONE : IL_CC_Op<30>; +def IL_CC_D_OEQ : IL_CC_Op<41>; +def IL_CC_D_OLT : IL_CC_Op<42>; +def IL_CC_D_OGT : IL_CC_Op<43>; +def IL_CC_D_OLE : IL_CC_Op<44>; +def IL_CC_D_OGE : IL_CC_Op<45>; +def IL_CC_U_EQ : IL_CC_Op<46>; +def IL_CC_U_NE : IL_CC_Op<47>; +def IL_CC_F_O : IL_CC_Op<48>; +def IL_CC_D_O : IL_CC_Op<49>; +def IL_CC_F_UO : IL_CC_Op<50>; +def IL_CC_D_UO : IL_CC_Op<51>; +def IL_CC_L_LE : IL_CC_Op<52>; +def IL_CC_L_GE : IL_CC_Op<53>; +def IL_CC_L_EQ : IL_CC_Op<54>; +def IL_CC_L_NE : IL_CC_Op<55>; +def IL_CC_L_LT : IL_CC_Op<56>; +def IL_CC_L_GT : IL_CC_Op<57>; +def IL_CC_UL_LE : IL_CC_Op<58>; +def IL_CC_UL_GE : IL_CC_Op<59>; +def IL_CC_UL_EQ : IL_CC_Op<60>; +def IL_CC_UL_NE : IL_CC_Op<61>; +def IL_CC_UL_LT : IL_CC_Op<62>; +def IL_CC_UL_GT : IL_CC_Op<63>; diff --git a/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp b/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp new file mode 100644 index 0000000..16211a9 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILPeepholeOptimizer.cpp @@ -0,0 +1,1264 @@ +//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// + +#define DEBUG_TYPE "PeepholeOpt" +#ifdef DEBUG +#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) +#else +#define DEBUGME 0 +#endif + +#include "AMDILAlgorithms.tpp" +#include "AMDILDevices.h" +#include "AMDILInstrInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" + +#include + +#if 0 +STATISTIC(PointerAssignments, "Number of dynamic pointer " + "assigments discovered"); +STATISTIC(PointerSubtract, "Number of pointer subtractions discovered"); +#endif + +using namespace llvm; +// The Peephole optimization pass is used to do simple last minute optimizations +// that are required for correct code or to remove redundant functions +namespace { + +class OpaqueType; + +class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass { +public: + TargetMachine &TM; + static char ID; + AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL); + ~AMDILPeepholeOpt(); + const char *getPassName() const; + bool runOnFunction(Function &F); + bool doInitialization(Module &M); + bool doFinalization(Module &M); + void getAnalysisUsage(AnalysisUsage &AU) const; +protected: +private: + // Function to initiate all of the instruction level optimizations. + bool instLevelOptimizations(BasicBlock::iterator *inst); + // Quick check to see if we need to dump all of the pointers into the + // arena. If this is correct, then we set all pointers to exist in arena. This + // is a workaround for aliasing of pointers in a struct/union. + bool dumpAllIntoArena(Function &F); + // Because I don't want to invalidate any pointers while in the + // safeNestedForEachFunction. I push atomic conversions to a vector and handle + // it later. This function does the conversions if required. + void doAtomicConversionIfNeeded(Function &F); + // Because __amdil_is_constant cannot be properly evaluated if + // optimizations are disabled, the call's are placed in a vector + // and evaluated after the __amdil_image* functions are evaluated + // which should allow the __amdil_is_constant function to be + // evaluated correctly. + void doIsConstCallConversionIfNeeded(); + bool mChanged; + bool mDebug; + bool mConvertAtomics; + CodeGenOpt::Level optLevel; + // Run a series of tests to see if we can optimize a CALL instruction. + bool optimizeCallInst(BasicBlock::iterator *bbb); + // A peephole optimization to optimize bit extract sequences. + bool optimizeBitExtract(Instruction *inst); + // A peephole optimization to optimize bit insert sequences. + bool optimizeBitInsert(Instruction *inst); + bool setupBitInsert(Instruction *base, + Instruction *&src, + Constant *&mask, + Constant *&shift); + // Expand the bit field insert instruction on versions of OpenCL that + // don't support it. + bool expandBFI(CallInst *CI); + // Expand the bit field mask instruction on version of OpenCL that + // don't support it. + bool expandBFM(CallInst *CI); + // On 7XX and 8XX operations, we do not have 24 bit signed operations. So in + // this case we need to expand them. These functions check for 24bit functions + // and then expand. + bool isSigned24BitOps(CallInst *CI); + void expandSigned24BitOps(CallInst *CI); + // One optimization that can occur is that if the required workgroup size is + // specified then the result of get_local_size is known at compile time and + // can be returned accordingly. + bool isRWGLocalOpt(CallInst *CI); + // On northern island cards, the division is slightly less accurate than on + // previous generations, so we need to utilize a more accurate division. So we + // can translate the accurate divide to a normal divide on all other cards. + bool convertAccurateDivide(CallInst *CI); + void expandAccurateDivide(CallInst *CI); + // If the alignment is set incorrectly, it can produce really inefficient + // code. This checks for this scenario and fixes it if possible. + bool correctMisalignedMemOp(Instruction *inst); + + // If we are in no opt mode, then we need to make sure that + // local samplers are properly propagated as constant propagation + // doesn't occur and we need to know the value of kernel defined + // samplers at compile time. + bool propagateSamplerInst(CallInst *CI); + + // Helper functions + + // Group of functions that recursively calculate the size of a structure based + // on it's sub-types. + size_t getTypeSize(Type * const T, bool dereferencePtr = false); + size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); + size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); + size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); + size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); + size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); + size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); + size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); + + LLVMContext *mCTX; + Function *mF; + const AMDILSubtarget *mSTM; + SmallVector< std::pair, 16> atomicFuncs; + SmallVector isConstVec; +}; // class AMDILPeepholeOpt + char AMDILPeepholeOpt::ID = 0; +} // anonymous namespace + +namespace llvm { + FunctionPass * + createAMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + { + return new AMDILPeepholeOpt(tm AMDIL_OPT_LEVEL_VAR); + } +} // llvm namespace + +AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) + : FunctionPass(ID), TM(tm) +{ + mDebug = DEBUGME; + optLevel = TM.getOptLevel(); + +} + +AMDILPeepholeOpt::~AMDILPeepholeOpt() +{ +} + +const char * +AMDILPeepholeOpt::getPassName() const +{ + return "AMDIL PeepHole Optimization Pass"; +} + +bool +containsPointerType(Type *Ty) +{ + if (!Ty) { + return false; + } + switch(Ty->getTypeID()) { + default: + return false; + case Type::StructTyID: { + const StructType *ST = dyn_cast(Ty); + for (StructType::element_iterator stb = ST->element_begin(), + ste = ST->element_end(); stb != ste; ++stb) { + if (!containsPointerType(*stb)) { + continue; + } + return true; + } + break; + } + case Type::VectorTyID: + case Type::ArrayTyID: + return containsPointerType(dyn_cast(Ty)->getElementType()); + case Type::PointerTyID: + return true; + }; + return false; +} + +bool +AMDILPeepholeOpt::dumpAllIntoArena(Function &F) +{ + bool dumpAll = false; + for (Function::const_arg_iterator cab = F.arg_begin(), + cae = F.arg_end(); cab != cae; ++cab) { + const Argument *arg = cab; + const PointerType *PT = dyn_cast(arg->getType()); + if (!PT) { + continue; + } + Type *DereferencedType = PT->getElementType(); + if (!dyn_cast(DereferencedType) + ) { + continue; + } + if (!containsPointerType(DereferencedType)) { + continue; + } + // FIXME: Because a pointer inside of a struct/union may be aliased to + // another pointer we need to take the conservative approach and place all + // pointers into the arena until more advanced detection is implemented. + dumpAll = true; + } + return dumpAll; +} +void +AMDILPeepholeOpt::doIsConstCallConversionIfNeeded() +{ + if (isConstVec.empty()) { + return; + } + for (unsigned x = 0, y = isConstVec.size(); x < y; ++x) { + CallInst *CI = isConstVec[x]; + Constant *CV = dyn_cast(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) + : ConstantInt::get(aType, 0); + CI->replaceAllUsesWith(Val); + CI->eraseFromParent(); + } + isConstVec.clear(); +} +void +AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F) +{ + // Don't do anything if we don't have any atomic operations. + if (atomicFuncs.empty()) { + return; + } + // Change the function name for the atomic if it is required + uint32_t size = atomicFuncs.size(); + for (uint32_t x = 0; x < size; ++x) { + atomicFuncs[x].first->setOperand( + atomicFuncs[x].first->getNumOperands()-1, + atomicFuncs[x].second); + + } + mChanged = true; + if (mConvertAtomics) { + return; + } +} + +bool +AMDILPeepholeOpt::runOnFunction(Function &MF) +{ + mChanged = false; + mF = &MF; + mSTM = &TM.getSubtarget(); + if (mDebug) { + MF.dump(); + } + mCTX = &MF.getType()->getContext(); + mConvertAtomics = true; + safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(), + std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations), + this)); + + doAtomicConversionIfNeeded(MF); + doIsConstCallConversionIfNeeded(); + + if (mDebug) { + MF.dump(); + } + return mChanged; +} + +bool +AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb) +{ + Instruction *inst = (*bbb); + CallInst *CI = dyn_cast(inst); + if (!CI) { + return false; + } + if (isSigned24BitOps(CI)) { + expandSigned24BitOps(CI); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + if (propagateSamplerInst(CI)) { + return false; + } + if (expandBFI(CI) || expandBFM(CI)) { + ++(*bbb); + CI->eraseFromParent(); + return true; + } + if (convertAccurateDivide(CI)) { + expandAccurateDivide(CI); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + + StringRef calleeName = CI->getOperand(CI->getNumOperands()-1)->getName(); + if (calleeName.startswith("__amdil_is_constant")) { + // If we do not have optimizations, then this + // cannot be properly evaluated, so we add the + // call instruction to a vector and process + // them at the end of processing after the + // samplers have been correctly handled. + if (optLevel == CodeGenOpt::None) { + isConstVec.push_back(CI); + return false; + } else { + Constant *CV = dyn_cast(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = (CV != NULL) ? ConstantInt::get(aType, 1) + : ConstantInt::get(aType, 0); + CI->replaceAllUsesWith(Val); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + } + + if (calleeName.equals("__amdil_is_asic_id_i32")) { + ConstantInt *CV = dyn_cast(CI->getOperand(0)); + Type *aType = Type::getInt32Ty(*mCTX); + Value *Val = CV; + if (Val) { + Val = ConstantInt::get(aType, + mSTM->device()->getDeviceFlag() & CV->getZExtValue()); + } else { + Val = ConstantInt::get(aType, 0); + } + CI->replaceAllUsesWith(Val); + ++(*bbb); + CI->eraseFromParent(); + return true; + } + Function *F = dyn_cast(CI->getOperand(CI->getNumOperands()-1)); + if (!F) { + return false; + } + if (F->getName().startswith("__atom") && !CI->getNumUses() + && F->getName().find("_xchg") == StringRef::npos) { + std::string buffer(F->getName().str() + "_noret"); + F = dyn_cast( + F->getParent()->getOrInsertFunction(buffer, F->getFunctionType())); + atomicFuncs.push_back(std::make_pair (CI, F)); + } + + if (!mSTM->device()->isSupported(AMDILDeviceInfo::ArenaSegment) + && !mSTM->device()->isSupported(AMDILDeviceInfo::MultiUAV)) { + return false; + } + if (!mConvertAtomics) { + return false; + } + StringRef name = F->getName(); + if (name.startswith("__atom") && name.find("_g") != StringRef::npos) { + mConvertAtomics = false; + } + return false; +} + +bool +AMDILPeepholeOpt::setupBitInsert(Instruction *base, + Instruction *&src, + Constant *&mask, + Constant *&shift) +{ + if (!base) { + if (mDebug) { + dbgs() << "Null pointer passed into function.\n"; + } + return false; + } + bool andOp = false; + if (base->getOpcode() == Instruction::Shl) { + shift = dyn_cast(base->getOperand(1)); + } else if (base->getOpcode() == Instruction::And) { + mask = dyn_cast(base->getOperand(1)); + andOp = true; + } else { + if (mDebug) { + dbgs() << "Failed setup with no Shl or And instruction on base opcode!\n"; + } + // If the base is neither a Shl or a And, we don't fit any of the patterns above. + return false; + } + src = dyn_cast(base->getOperand(0)); + if (!src) { + if (mDebug) { + dbgs() << "Failed setup since the base operand is not an instruction!\n"; + } + return false; + } + // If we find an 'and' operation, then we don't need to + // find the next operation as we already know the + // bits that are valid at this point. + if (andOp) { + return true; + } + if (src->getOpcode() == Instruction::Shl && !shift) { + shift = dyn_cast(src->getOperand(1)); + src = dyn_cast(src->getOperand(0)); + } else if (src->getOpcode() == Instruction::And && !mask) { + mask = dyn_cast(src->getOperand(1)); + } + if (!mask && !shift) { + if (mDebug) { + dbgs() << "Failed setup since both mask and shift are NULL!\n"; + } + // Did not find a constant mask or a shift. + return false; + } + return true; +} +bool +AMDILPeepholeOpt::optimizeBitInsert(Instruction *inst) +{ + if (!inst) { + return false; + } + if (!inst->isBinaryOp()) { + return false; + } + if (inst->getOpcode() != Instruction::Or) { + return false; + } + if (optLevel == CodeGenOpt::None) { + return false; + } + // We want to do an optimization on a sequence of ops that in the end equals a + // single ISA instruction. + // The base pattern for this optimization is - ((A & B) << C) | ((D & E) << F) + // Some simplified versions of this pattern are as follows: + // (A & B) | (D & E) when B & E == 0 && C == 0 && F == 0 + // ((A & B) << C) | (D & E) when B ^ E == 0 && (1 << C) >= E + // (A & B) | ((D & E) << F) when B ^ E == 0 && (1 << F) >= B + // (A & B) | (D << F) when (1 << F) >= B + // (A << C) | (D & E) when (1 << C) >= E + if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // The HD4XXX hardware doesn't support the ubit_insert instruction. + return false; + } + Type *aType = inst->getType(); + bool isVector = aType->isVectorTy(); + int numEle = 1; + // This optimization only works on 32bit integers. + if (aType->getScalarType() + != Type::getInt32Ty(inst->getContext())) { + return false; + } + if (isVector) { + const VectorType *VT = dyn_cast(aType); + numEle = VT->getNumElements(); + // We currently cannot support more than 4 elements in a intrinsic and we + // cannot support Vec3 types. + if (numEle > 4 || numEle == 3) { + return false; + } + } + // TODO: Handle vectors. + if (isVector) { + if (mDebug) { + dbgs() << "!!! Vectors are not supported yet!\n"; + } + return false; + } + Instruction *LHSSrc = NULL, *RHSSrc = NULL; + Constant *LHSMask = NULL, *RHSMask = NULL; + Constant *LHSShift = NULL, *RHSShift = NULL; + Instruction *LHS = dyn_cast(inst->getOperand(0)); + Instruction *RHS = dyn_cast(inst->getOperand(1)); + if (!setupBitInsert(LHS, LHSSrc, LHSMask, LHSShift)) { + if (mDebug) { + dbgs() << "Found an OR Operation that failed setup!\n"; + inst->dump(); + if (LHS) { LHS->dump(); } + if (LHSSrc) { LHSSrc->dump(); } + if (LHSMask) { LHSMask->dump(); } + if (LHSShift) { LHSShift->dump(); } + } + // There was an issue with the setup for BitInsert. + return false; + } + if (!setupBitInsert(RHS, RHSSrc, RHSMask, RHSShift)) { + if (mDebug) { + dbgs() << "Found an OR Operation that failed setup!\n"; + inst->dump(); + if (RHS) { RHS->dump(); } + if (RHSSrc) { RHSSrc->dump(); } + if (RHSMask) { RHSMask->dump(); } + if (RHSShift) { RHSShift->dump(); } + } + // There was an issue with the setup for BitInsert. + return false; + } + if (mDebug) { + dbgs() << "Found an OR operation that can possible be optimized to ubit insert!\n"; + dbgs() << "Op: "; inst->dump(); + dbgs() << "LHS: "; if (LHS) { LHS->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Src: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Mask: "; if (LHSMask) { LHSMask->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "LHS Shift: "; if (LHSShift) { LHSShift->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS: "; if (RHS) { RHS->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Src: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Mask: "; if (RHSMask) { RHSMask->dump(); } else { dbgs() << "(None)\n"; } + dbgs() << "RHS Shift: "; if (RHSShift) { RHSShift->dump(); } else { dbgs() << "(None)\n"; } + } + Constant *offset = NULL; + Constant *width = NULL; + int32_t lhsMaskVal = 0, rhsMaskVal = 0; + int32_t lhsShiftVal = 0, rhsShiftVal = 0; + int32_t lhsMaskWidth = 0, rhsMaskWidth = 0; + int32_t lhsMaskOffset = 0, rhsMaskOffset = 0; + lhsMaskVal = (int32_t)(LHSMask + ? dyn_cast(LHSMask)->getZExtValue() : 0); + rhsMaskVal = (int32_t)(RHSMask + ? dyn_cast(RHSMask)->getZExtValue() : 0); + lhsShiftVal = (int32_t)(LHSShift + ? dyn_cast(LHSShift)->getZExtValue() : 0); + rhsShiftVal = (int32_t)(RHSShift + ? dyn_cast(RHSShift)->getZExtValue() : 0); + lhsMaskWidth = lhsMaskVal ? CountPopulation_32(lhsMaskVal) : 32 - lhsShiftVal; + rhsMaskWidth = rhsMaskVal ? CountPopulation_32(rhsMaskVal) : 32 - rhsShiftVal; + lhsMaskOffset = lhsMaskVal ? CountTrailingZeros_32(lhsMaskVal) : lhsShiftVal; + rhsMaskOffset = rhsMaskVal ? CountTrailingZeros_32(rhsMaskVal) : rhsShiftVal; + // TODO: Handle the case of A & B | D & ~B(i.e. inverted masks). + if (mDebug) { + dbgs() << "Found pattern: \'((A" << (LHSMask ? " & B)" : ")"); + dbgs() << (LHSShift ? " << C)" : ")") << " | ((D" ; + dbgs() << (RHSMask ? " & E)" : ")"); + dbgs() << (RHSShift ? " << F)\'\n" : ")\'\n"); + dbgs() << "A = LHSSrc\t\tD = RHSSrc \n"; + dbgs() << "B = " << lhsMaskVal << "\t\tE = " << rhsMaskVal << "\n"; + dbgs() << "C = " << lhsShiftVal << "\t\tF = " << rhsShiftVal << "\n"; + dbgs() << "width(B) = " << lhsMaskWidth; + dbgs() << "\twidth(E) = " << rhsMaskWidth << "\n"; + dbgs() << "offset(B) = " << lhsMaskOffset; + dbgs() << "\toffset(E) = " << rhsMaskOffset << "\n"; + dbgs() << "Constraints: \n"; + dbgs() << "\t(1) B ^ E == 0\n"; + dbgs() << "\t(2-LHS) B is a mask\n"; + dbgs() << "\t(2-LHS) E is a mask\n"; + dbgs() << "\t(3-LHS) (offset(B)) >= (width(E) + offset(E))\n"; + dbgs() << "\t(3-RHS) (offset(E)) >= (width(B) + offset(B))\n"; + } + if ((lhsMaskVal || rhsMaskVal) && !(lhsMaskVal ^ rhsMaskVal)) { + if (mDebug) { + dbgs() << lhsMaskVal << " ^ " << rhsMaskVal; + dbgs() << " = " << (lhsMaskVal ^ rhsMaskVal) << "\n"; + dbgs() << "Failed constraint 1!\n"; + } + return false; + } + if (mDebug) { + dbgs() << "LHS = " << lhsMaskOffset << ""; + dbgs() << " >= (" << rhsMaskWidth << " + " << rhsMaskOffset << ") = "; + dbgs() << (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)); + dbgs() << "\nRHS = " << rhsMaskOffset << ""; + dbgs() << " >= (" << lhsMaskWidth << " + " << lhsMaskOffset << ") = "; + dbgs() << (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)); + dbgs() << "\n"; + } + if (lhsMaskOffset >= (rhsMaskWidth + rhsMaskOffset)) { + offset = ConstantInt::get(aType, lhsMaskOffset, false); + width = ConstantInt::get(aType, lhsMaskWidth, false); + RHSSrc = RHS; + if (!isMask_32(lhsMaskVal) && !isShiftedMask_32(lhsMaskVal)) { + if (mDebug) { + dbgs() << "Value is not a Mask: " << lhsMaskVal << "\n"; + dbgs() << "Failed constraint 2!\n"; + } + return false; + } + if (!LHSShift) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", LHS); + } else if (lhsShiftVal != lhsMaskOffset) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", LHS); + } + if (mDebug) { + dbgs() << "Optimizing LHS!\n"; + } + } else if (rhsMaskOffset >= (lhsMaskWidth + lhsMaskOffset)) { + offset = ConstantInt::get(aType, rhsMaskOffset, false); + width = ConstantInt::get(aType, rhsMaskWidth, false); + LHSSrc = RHSSrc; + RHSSrc = LHS; + if (!isMask_32(rhsMaskVal) && !isShiftedMask_32(rhsMaskVal)) { + if (mDebug) { + dbgs() << "Non-Mask: " << rhsMaskVal << "\n"; + dbgs() << "Failed constraint 2!\n"; + } + return false; + } + if (!RHSShift) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", RHS); + } else if (rhsShiftVal != rhsMaskOffset) { + LHSSrc = BinaryOperator::Create(Instruction::LShr, LHSSrc, offset, + "MaskShr", RHS); + } + if (mDebug) { + dbgs() << "Optimizing RHS!\n"; + } + } else { + if (mDebug) { + dbgs() << "Failed constraint 3!\n"; + } + return false; + } + if (mDebug) { + dbgs() << "Width: "; if (width) { width->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "Offset: "; if (offset) { offset->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "LHSSrc: "; if (LHSSrc) { LHSSrc->dump(); } else { dbgs() << "(0)\n"; } + dbgs() << "RHSSrc: "; if (RHSSrc) { RHSSrc->dump(); } else { dbgs() << "(0)\n"; } + } + if (!offset || !width) { + if (mDebug) { + dbgs() << "Either width or offset are NULL, failed detection!\n"; + } + return false; + } + // Lets create the function signature. + std::vector callTypes; + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + FunctionType *funcType = FunctionType::get(aType, callTypes, false); + std::string name = "__amdil_ubit_insert"; + if (isVector) { name += "_v" + itostr(numEle) + "u32"; } else { name += "_u32"; } + Function *Func = + dyn_cast(inst->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[4] = { + width, + offset, + LHSSrc, + RHSSrc + }; + CallInst *CI = CallInst::Create(Func, Operands, "BitInsertOpt"); + if (mDebug) { + dbgs() << "Old Inst: "; + inst->dump(); + dbgs() << "New Inst: "; + CI->dump(); + dbgs() << "\n\n"; + } + CI->insertBefore(inst); + inst->replaceAllUsesWith(CI); + return true; +} + +bool +AMDILPeepholeOpt::optimizeBitExtract(Instruction *inst) +{ + if (!inst) { + return false; + } + if (!inst->isBinaryOp()) { + return false; + } + if (inst->getOpcode() != Instruction::And) { + return false; + } + if (optLevel == CodeGenOpt::None) { + return false; + } + // We want to do some simple optimizations on Shift right/And patterns. The + // basic optimization is to turn (A >> B) & C where A is a 32bit type, B is a + // value smaller than 32 and C is a mask. If C is a constant value, then the + // following transformation can occur. For signed integers, it turns into the + // function call dst = __amdil_ibit_extract(log2(C), B, A) For unsigned + // integers, it turns into the function call dst = + // __amdil_ubit_extract(log2(C), B, A) The function __amdil_[u|i]bit_extract + // can be found in Section 7.9 of the ATI IL spec of the stream SDK for + // Evergreen hardware. + if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { + // This does not work on HD4XXX hardware. + return false; + } + Type *aType = inst->getType(); + bool isVector = aType->isVectorTy(); + + // XXX Support vector types + if (isVector) { + return false; + } + int numEle = 1; + // This only works on 32bit integers + if (aType->getScalarType() + != Type::getInt32Ty(inst->getContext())) { + return false; + } + if (isVector) { + const VectorType *VT = dyn_cast(aType); + numEle = VT->getNumElements(); + // We currently cannot support more than 4 elements in a intrinsic and we + // cannot support Vec3 types. + if (numEle > 4 || numEle == 3) { + return false; + } + } + BinaryOperator *ShiftInst = dyn_cast(inst->getOperand(0)); + // If the first operand is not a shift instruction, then we can return as it + // doesn't match this pattern. + if (!ShiftInst || !ShiftInst->isShift()) { + return false; + } + // If we are a shift left, then we need don't match this pattern. + if (ShiftInst->getOpcode() == Instruction::Shl) { + return false; + } + bool isSigned = ShiftInst->isArithmeticShift(); + Constant *AndMask = dyn_cast(inst->getOperand(1)); + Constant *ShrVal = dyn_cast(ShiftInst->getOperand(1)); + // Lets make sure that the shift value and the and mask are constant integers. + if (!AndMask || !ShrVal) { + return false; + } + Constant *newMaskConst; + Constant *shiftValConst; + if (isVector) { + // Handle the vector case + std::vector maskVals; + std::vector shiftVals; + ConstantVector *AndMaskVec = dyn_cast(AndMask); + ConstantVector *ShrValVec = dyn_cast(ShrVal); + Type *scalarType = AndMaskVec->getType()->getScalarType(); + assert(AndMaskVec->getNumOperands() == + ShrValVec->getNumOperands() && "cannot have a " + "combination where the number of elements to a " + "shift and an and are different!"); + for (size_t x = 0, y = AndMaskVec->getNumOperands(); x < y; ++x) { + ConstantInt *AndCI = dyn_cast(AndMaskVec->getOperand(x)); + ConstantInt *ShiftIC = dyn_cast(ShrValVec->getOperand(x)); + if (!AndCI || !ShiftIC) { + return false; + } + uint32_t maskVal = (uint32_t)AndCI->getZExtValue(); + if (!isMask_32(maskVal)) { + return false; + } + maskVal = (uint32_t)CountTrailingOnes_32(maskVal); + uint32_t shiftVal = (uint32_t)ShiftIC->getZExtValue(); + // If the mask or shiftval is greater than the bitcount, then break out. + if (maskVal >= 32 || shiftVal >= 32) { + return false; + } + // If the mask val is greater than the the number of original bits left + // then this optimization is invalid. + if (maskVal > (32 - shiftVal)) { + return false; + } + maskVals.push_back(ConstantInt::get(scalarType, maskVal, isSigned)); + shiftVals.push_back(ConstantInt::get(scalarType, shiftVal, isSigned)); + } + newMaskConst = ConstantVector::get(maskVals); + shiftValConst = ConstantVector::get(shiftVals); + } else { + // Handle the scalar case + uint32_t maskVal = (uint32_t)dyn_cast(AndMask)->getZExtValue(); + // This must be a mask value where all lower bits are set to 1 and then any + // bit higher is set to 0. + if (!isMask_32(maskVal)) { + return false; + } + maskVal = (uint32_t)CountTrailingOnes_32(maskVal); + // Count the number of bits set in the mask, this is the width of the + // resulting bit set that is extracted from the source value. + uint32_t shiftVal = (uint32_t)dyn_cast(ShrVal)->getZExtValue(); + // If the mask or shift val is greater than the bitcount, then break out. + if (maskVal >= 32 || shiftVal >= 32) { + return false; + } + // If the mask val is greater than the the number of original bits left then + // this optimization is invalid. + if (maskVal > (32 - shiftVal)) { + return false; + } + newMaskConst = ConstantInt::get(aType, maskVal, isSigned); + shiftValConst = ConstantInt::get(aType, shiftVal, isSigned); + } + // Lets create the function signature. + std::vector callTypes; + callTypes.push_back(aType); + callTypes.push_back(aType); + callTypes.push_back(aType); + FunctionType *funcType = FunctionType::get(aType, callTypes, false); + std::string name = "llvm.AMDIL.bit.extract.u32"; + if (isVector) { + name += ".v" + itostr(numEle) + "i32"; + } else { + name += "."; + } + // Lets create the function. + Function *Func = + dyn_cast(inst->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[3] = { + ShiftInst->getOperand(0), + shiftValConst, + newMaskConst + }; + // Lets create the Call with the operands + CallInst *CI = CallInst::Create(Func, Operands, "ByteExtractOpt"); + CI->setDoesNotAccessMemory(); + CI->insertBefore(inst); + inst->replaceAllUsesWith(CI); + return true; +} + +bool +AMDILPeepholeOpt::expandBFI(CallInst *CI) +{ + if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + if (!LHS->getName().startswith("__amdil_bfi")) { + return false; + } + Type* type = CI->getOperand(0)->getType(); + Constant *negOneConst = NULL; + if (type->isVectorTy()) { + std::vector negOneVals; + negOneConst = ConstantInt::get(CI->getContext(), + APInt(32, StringRef("-1"), 10)); + for (size_t x = 0, + y = dyn_cast(type)->getNumElements(); x < y; ++x) { + negOneVals.push_back(negOneConst); + } + negOneConst = ConstantVector::get(negOneVals); + } else { + negOneConst = ConstantInt::get(CI->getContext(), + APInt(32, StringRef("-1"), 10)); + } + // __amdil_bfi => (A & B) | (~A & C) + BinaryOperator *lhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(0), + CI->getOperand(1), "bfi_and", CI); + BinaryOperator *rhs = + BinaryOperator::Create(Instruction::Xor, CI->getOperand(0), negOneConst, + "bfi_not", CI); + rhs = BinaryOperator::Create(Instruction::And, rhs, CI->getOperand(2), + "bfi_and", CI); + lhs = BinaryOperator::Create(Instruction::Or, lhs, rhs, "bfi_or", CI); + CI->replaceAllUsesWith(lhs); + return true; +} + +bool +AMDILPeepholeOpt::expandBFM(CallInst *CI) +{ + if (!CI || mSTM->calVersion() <= CAL_VERSION_SC_150) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + if (!LHS->getName().startswith("__amdil_bfm")) { + return false; + } + // __amdil_bfm => ((1 << (src0 & 0x1F)) - 1) << (src1 & 0x1f) + Constant *newMaskConst = NULL; + Constant *newShiftConst = NULL; + Type* type = CI->getOperand(0)->getType(); + if (type->isVectorTy()) { + std::vector newMaskVals, newShiftVals; + newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); + newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); + for (size_t x = 0, + y = dyn_cast(type)->getNumElements(); x < y; ++x) { + newMaskVals.push_back(newMaskConst); + newShiftVals.push_back(newShiftConst); + } + newMaskConst = ConstantVector::get(newMaskVals); + newShiftConst = ConstantVector::get(newShiftVals); + } else { + newMaskConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 0x1F); + newShiftConst = ConstantInt::get(Type::getInt32Ty(*mCTX), 1); + } + BinaryOperator *lhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(0), + newMaskConst, "bfm_mask", CI); + lhs = BinaryOperator::Create(Instruction::Shl, newShiftConst, + lhs, "bfm_shl", CI); + lhs = BinaryOperator::Create(Instruction::Sub, lhs, + newShiftConst, "bfm_sub", CI); + BinaryOperator *rhs = + BinaryOperator::Create(Instruction::And, CI->getOperand(1), + newMaskConst, "bfm_mask", CI); + lhs = BinaryOperator::Create(Instruction::Shl, lhs, rhs, "bfm_shl", CI); + CI->replaceAllUsesWith(lhs); + return true; +} + +bool +AMDILPeepholeOpt::instLevelOptimizations(BasicBlock::iterator *bbb) +{ + Instruction *inst = (*bbb); + if (optimizeCallInst(bbb)) { + return true; + } + if (optimizeBitExtract(inst)) { + return false; + } + if (optimizeBitInsert(inst)) { + return false; + } + if (correctMisalignedMemOp(inst)) { + return false; + } + return false; +} +bool +AMDILPeepholeOpt::correctMisalignedMemOp(Instruction *inst) +{ + LoadInst *linst = dyn_cast(inst); + StoreInst *sinst = dyn_cast(inst); + unsigned alignment; + Type* Ty = inst->getType(); + if (linst) { + alignment = linst->getAlignment(); + Ty = inst->getType(); + } else if (sinst) { + alignment = sinst->getAlignment(); + Ty = sinst->getValueOperand()->getType(); + } else { + return false; + } + unsigned size = getTypeSize(Ty); + if (size == alignment || size < alignment) { + return false; + } + if (!Ty->isStructTy()) { + return false; + } + if (alignment < 4) { + if (linst) { + linst->setAlignment(0); + return true; + } else if (sinst) { + sinst->setAlignment(0); + return true; + } + } + return false; +} +bool +AMDILPeepholeOpt::isSigned24BitOps(CallInst *CI) +{ + if (!CI) { + return false; + } + Value *LHS = CI->getOperand(CI->getNumOperands() - 1); + std::string namePrefix = LHS->getName().substr(0, 14); + if (namePrefix != "__amdil_imad24" && namePrefix != "__amdil_imul24" + && namePrefix != "__amdil__imul24_high") { + return false; + } + if (mSTM->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { + return false; + } + return true; +} + +void +AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI) +{ + assert(isSigned24BitOps(CI) && "Must be a " + "signed 24 bit operation to call this function!"); + Value *LHS = CI->getOperand(CI->getNumOperands()-1); + // On 7XX and 8XX we do not have signed 24bit, so we need to + // expand it to the following: + // imul24 turns into 32bit imul + // imad24 turns into 32bit imad + // imul24_high turns into 32bit imulhigh + if (LHS->getName().substr(0, 14) == "__amdil_imad24") { + Type *aType = CI->getOperand(0)->getType(); + bool isVector = aType->isVectorTy(); + int numEle = isVector ? dyn_cast(aType)->getNumElements() : 1; + std::vector callTypes; + callTypes.push_back(CI->getOperand(0)->getType()); + callTypes.push_back(CI->getOperand(1)->getType()); + callTypes.push_back(CI->getOperand(2)->getType()); + FunctionType *funcType = + FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); + std::string name = "__amdil_imad"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + Function *Func = dyn_cast( + CI->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[3] = { + CI->getOperand(0), + CI->getOperand(1), + CI->getOperand(2) + }; + CallInst *nCI = CallInst::Create(Func, Operands, "imad24"); + nCI->insertBefore(CI); + CI->replaceAllUsesWith(nCI); + } else if (LHS->getName().substr(0, 14) == "__amdil_imul24") { + BinaryOperator *mulOp = + BinaryOperator::Create(Instruction::Mul, CI->getOperand(0), + CI->getOperand(1), "imul24", CI); + CI->replaceAllUsesWith(mulOp); + } else if (LHS->getName().substr(0, 19) == "__amdil_imul24_high") { + Type *aType = CI->getOperand(0)->getType(); + + bool isVector = aType->isVectorTy(); + int numEle = isVector ? dyn_cast(aType)->getNumElements() : 1; + std::vector callTypes; + callTypes.push_back(CI->getOperand(0)->getType()); + callTypes.push_back(CI->getOperand(1)->getType()); + FunctionType *funcType = + FunctionType::get(CI->getOperand(0)->getType(), callTypes, false); + std::string name = "__amdil_imul_high"; + if (isVector) { + name += "_v" + itostr(numEle) + "i32"; + } else { + name += "_i32"; + } + Function *Func = dyn_cast( + CI->getParent()->getParent()->getParent()-> + getOrInsertFunction(llvm::StringRef(name), funcType)); + Value *Operands[2] = { + CI->getOperand(0), + CI->getOperand(1) + }; + CallInst *nCI = CallInst::Create(Func, Operands, "imul24_high"); + nCI->insertBefore(CI); + CI->replaceAllUsesWith(nCI); + } +} + +bool +AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI) +{ + return (CI != NULL + && CI->getOperand(CI->getNumOperands() - 1)->getName() + == "__amdil_get_local_size_int"); +} + +bool +AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI) +{ + if (!CI) { + return false; + } + if (mSTM->device()->getGeneration() == AMDILDeviceInfo::HD6XXX + && (mSTM->getDeviceName() == "cayman")) { + return false; + } + return CI->getOperand(CI->getNumOperands() - 1)->getName().substr(0, 20) + == "__amdil_improved_div"; +} + +void +AMDILPeepholeOpt::expandAccurateDivide(CallInst *CI) +{ + assert(convertAccurateDivide(CI) + && "expanding accurate divide can only happen if it is expandable!"); + BinaryOperator *divOp = + BinaryOperator::Create(Instruction::FDiv, CI->getOperand(0), + CI->getOperand(1), "fdiv32", CI); + CI->replaceAllUsesWith(divOp); +} + +bool +AMDILPeepholeOpt::propagateSamplerInst(CallInst *CI) +{ + if (optLevel != CodeGenOpt::None) { + return false; + } + + if (!CI) { + return false; + } + + unsigned funcNameIdx = 0; + funcNameIdx = CI->getNumOperands() - 1; + StringRef calleeName = CI->getOperand(funcNameIdx)->getName(); + if (calleeName != "__amdil_image2d_read_norm" + && calleeName != "__amdil_image2d_read_unnorm" + && calleeName != "__amdil_image3d_read_norm" + && calleeName != "__amdil_image3d_read_unnorm") { + return false; + } + + unsigned samplerIdx = 2; + samplerIdx = 1; + Value *sampler = CI->getOperand(samplerIdx); + LoadInst *lInst = dyn_cast(sampler); + if (!lInst) { + return false; + } + + if (lInst->getPointerAddressSpace() != AMDILAS::PRIVATE_ADDRESS) { + return false; + } + + GlobalVariable *gv = dyn_cast(lInst->getPointerOperand()); + // If we are loading from what is not a global value, then we + // fail and return. + if (!gv) { + return false; + } + + // If we don't have an initializer or we have an initializer and + // the initializer is not a 32bit integer, we fail. + if (!gv->hasInitializer() + || !gv->getInitializer()->getType()->isIntegerTy(32)) { + return false; + } + + // Now that we have the global variable initializer, lets replace + // all uses of the load instruction with the samplerVal and + // reparse the __amdil_is_constant() function. + Constant *samplerVal = gv->getInitializer(); + lInst->replaceAllUsesWith(samplerVal); + return true; +} + +bool +AMDILPeepholeOpt::doInitialization(Module &M) +{ + return false; +} + +bool +AMDILPeepholeOpt::doFinalization(Module &M) +{ + return false; +} + +void +AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const +{ + AU.addRequired(); + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); +} + +size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { + size_t size = 0; + if (!T) { + return size; + } + switch (T->getTypeID()) { + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + assert(0 && "These types are not supported by this backend"); + default: + case Type::FloatTyID: + case Type::DoubleTyID: + size = T->getPrimitiveSizeInBits() >> 3; + break; + case Type::PointerTyID: + size = getTypeSize(dyn_cast(T), dereferencePtr); + break; + case Type::IntegerTyID: + size = getTypeSize(dyn_cast(T), dereferencePtr); + break; + case Type::StructTyID: + size = getTypeSize(dyn_cast(T), dereferencePtr); + break; + case Type::ArrayTyID: + size = getTypeSize(dyn_cast(T), dereferencePtr); + break; + case Type::FunctionTyID: + size = getTypeSize(dyn_cast(T), dereferencePtr); + break; + case Type::VectorTyID: + size = getTypeSize(dyn_cast(T), dereferencePtr); + break; + }; + return size; +} + +size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST, + bool dereferencePtr) { + size_t size = 0; + if (!ST) { + return size; + } + Type *curType; + StructType::element_iterator eib; + StructType::element_iterator eie; + for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { + curType = *eib; + size += getTypeSize(curType, dereferencePtr); + } + return size; +} + +size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT, + bool dereferencePtr) { + return IT ? (IT->getBitWidth() >> 3) : 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT, + bool dereferencePtr) { + assert(0 && "Should not be able to calculate the size of an function type"); + return 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT, + bool dereferencePtr) { + return (size_t)(AT ? (getTypeSize(AT->getElementType(), + dereferencePtr) * AT->getNumElements()) + : 0); +} + +size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT, + bool dereferencePtr) { + return VT ? (VT->getBitWidth() >> 3) : 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT, + bool dereferencePtr) { + if (!PT) { + return 0; + } + Type *CT = PT->getElementType(); + if (CT->getTypeID() == Type::StructTyID && + PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + return getTypeSize(dyn_cast(CT)); + } else if (dereferencePtr) { + size_t size = 0; + for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { + size += getTypeSize(PT->getContainedType(x), dereferencePtr); + } + return size; + } else { + return 4; + } +} + +size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT, + bool dereferencePtr) { + //assert(0 && "Should not be able to calculate the size of an opaque type"); + return 4; +} diff --git a/lib/Target/AMDGPU/AMDILProfiles.td b/lib/Target/AMDGPU/AMDILProfiles.td new file mode 100644 index 0000000..60435a8 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILProfiles.td @@ -0,0 +1,174 @@ +//===- AMDILProfiles.td - AMD IL Profiles ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// These are used for custom selection dag type profiles + +//===----------------------------------------------------------------------===// +// Custom Selection DAG Type Profiles +//===----------------------------------------------------------------------===// +// SDTCisDP - The specified operand has double type +// Tablegen needs to be hacked to get this constraint to work +//class SDTCisDP : SDTypeConstraint; + +//===----------------------------------------------------------------------===// +// Generic Profile Types +//===----------------------------------------------------------------------===// + +def SDTIL_GenUnaryOp : SDTypeProfile<1, 1, [ + SDTCisSameAs<0, 1> + ]>; +def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> + ]>; +def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3> + ]>; +def SDTIL_GenCMovLog : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisInt<1> + ]>; +def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [ + SDTCisEltOfVec<1, 0> + ]>; + +def SDTIL_GenVecExtract : SDTypeProfile<1, 2, [ + SDTCisEltOfVec<0, 1>, SDTCisVT<2, i32> + ]>; + +def SDTIL_GenVecInsert : SDTypeProfile<1, 4, [ + SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, + SDTCisVT<3, i32>, SDTCisVT<4, i32> + ]>; + +def SDTIL_GenVecShuffle : SDTypeProfile <1, 2, [ + SDTCisSameAs<0, 1>, SDTCisVT<2, i32> + ]>; + +def SDTIL_GenVecConcat : SDTypeProfile <1, 2, [ + SDTCisSameAs<1, 2> + ]>; +//===----------------------------------------------------------------------===// +// Conversion Profile Types +//===----------------------------------------------------------------------===// +def SDTIL_DPToFPOp : SDTypeProfile<1, 1, [ + SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1> + ]>; // d2f + +def SDTIL_AnyToInt : SDTypeProfile<1, 1, [ + SDTCisInt<0> + ]>; +def SDTIL_IntToAny : SDTypeProfile<1, 1, [ + SDTCisInt<1> + ]>; +def SDTIL_GenBitConv : SDTypeProfile<1, 1, []>; +//===----------------------------------------------------------------------===// +// Scalar Profile Types +//===----------------------------------------------------------------------===// + +// Add instruction pattern to handle offsets of memory operationns +def SDTIL_AddAddrri: SDTypeProfile<1, 2, [ + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisSameAs<0, 2> + ]>; +def SDTIL_AddAddrir : SDTypeProfile<1, 2, [ + SDTCisInt<0>, SDTCisPtrTy<2>, SDTCisSameAs<0, 1> + ]>; + +def SDTIL_LCreate : SDTypeProfile<1, 2, [ + SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2> + ]>; +def SDTIL_LCreate2 : SDTypeProfile<1, 2, [ + SDTCisVT<0, v2i64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2> + ]>; +def SDTIL_LComp : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>, SDTCisVT<1, i64> + ]>; +def SDTIL_LComp2 : SDTypeProfile<1, 1, [ + SDTCisVT<0, v2i32>, SDTCisVT<1, v2i64> + ]>; +def SDTIL_DCreate : SDTypeProfile<1, 2, [ + SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2> + ]>; +def SDTIL_DComp : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>, SDTCisVT<1, f64> + ]>; +def SDTIL_DCreate2 : SDTypeProfile<1, 2, [ + SDTCisVT<0, v2f64>, SDTCisVT<1, v2i32>, SDTCisSameAs<1, 2> + ]>; +def SDTIL_DComp2 : SDTypeProfile<1, 1, [ + SDTCisVT<0, v2i32>, SDTCisVT<1, v2f64> + ]>; +//===----------------------------------------------------------------------===// +// Flow Control Profile Types +//===----------------------------------------------------------------------===// +// Profile for Normal Call +def SDTIL_Call : SDTypeProfile<0, 1, [ + SDTCisVT<0, i32> + ]>; +// Branch instruction where second and third are basic blocks +def SDTIL_BRCond : SDTypeProfile<0, 2, [ + SDTCisVT<0, OtherVT> + ]>; +// Comparison instruction +def SDTIL_Cmp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 2>, SDTCisSameAs<2,3>, SDTCisVT<1, i32> + ]>; + + +//===----------------------------------------------------------------------===// +// Call Sequence Profiles +//===----------------------------------------------------------------------===// +def SDTIL_CallSeqStart : SDCallSeqStart< [ + SDTCisVT<0, i32> + ]>; +def SDTIL_CallSeqEnd : SDCallSeqEnd< [ + SDTCisVT<0, i32>, SDTCisVT<1, i32> + ]>; + +//===----------------------------------------------------------------------===// +// Image Operation Profiles +//===----------------------------------------------------------------------===// +def SDTIL_ImageRead : SDTypeProfile<1, 3, + [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, v4f32>]>; +def SDTIL_ImageWrite : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, SDTCisVT<1, v2i32>, SDTCisVT<2, v4i32>]>; +def SDTIL_ImageWrite3D : SDTypeProfile<0, 3, + [SDTCisPtrTy<0>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>]>; +def SDTIL_ImageInfo : SDTypeProfile<1, 1, + [SDTCisVT<0, v4i32>, SDTCisPtrTy<1>]>; +//===----------------------------------------------------------------------===// +// Atomic Operation Profiles +//===----------------------------------------------------------------------===// +def SDTIL_UniAtomNoRet : SDTypeProfile<0, 2, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32> + ]>; +def SDTIL_BinAtomNoRet : SDTypeProfile<0, 3, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32> + ]>; +def SDTIL_TriAtomNoRet : SDTypeProfile<0, 4, [ + SDTCisPtrTy<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32> + ]>; +def SDTIL_UniAtom : SDTypeProfile<1, 2, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32> + ]>; +def SDTIL_BinAtom : SDTypeProfile<1, 3, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32> + ]>; +def SDTIL_TriAtom : SDTypeProfile<1, 4, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i32>, + SDTCisVT<3, i32>, SDTCisVT<4, i32> + ]>; + +def SDTIL_BinAtomFloat : SDTypeProfile<1, 3, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, f32>, SDTCisVT<3, f32> + ]>; +def SDTIL_BinAtomNoRetFloat : SDTypeProfile<0, 3, [ + SDTCisPtrTy<0>, SDTCisVT<1, f32>, SDTCisVT<2, f32> + ]>; + +def SDTIL_Append : SDTypeProfile<1, 1, [ + SDTCisVT<0, i32>, SDTCisPtrTy<1> + ]>; diff --git a/lib/Target/AMDGPU/AMDILRegisterInfo.cpp b/lib/Target/AMDGPU/AMDILRegisterInfo.cpp new file mode 100644 index 0000000..989ccd9 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILRegisterInfo.cpp @@ -0,0 +1,162 @@ +//===- AMDILRegisterInfo.cpp - AMDIL Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "AMDILRegisterInfo.h" +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +AMDILRegisterInfo::AMDILRegisterInfo(TargetMachine &tm, + const TargetInstrInfo &tii) +: AMDILGenRegisterInfo(0), // RA??? + TM(tm), TII(tii) +{ + baseOffset = 0; + nextFuncOffset = 0; +} + +const uint16_t* +AMDILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const +{ + static const uint16_t CalleeSavedRegs[] = { 0 }; + // TODO: Does IL need to actually have any callee saved regs? + // I don't think we do since we can just use sequential registers + // Maybe this would be easier if every function call was inlined first + // and then there would be no callee issues to deal with + //TODO(getCalleeSavedRegs); + return CalleeSavedRegs; +} + +BitVector +AMDILRegisterInfo::getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + // We reserve the first getNumRegs() registers as they are the ones passed + // in live-in/live-out + // and therefor cannot be killed by the scheduler. This works around a bug + // discovered + // that was causing the linearscan register allocator to kill registers + // inside of the + // function that were also passed as LiveIn registers. + for (unsigned int x = 0, y = 256; x < y; ++x) { + Reserved.set(x); + } + return Reserved; +} + +BitVector +AMDILRegisterInfo::getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC = NULL) const +{ + BitVector Allocatable(getNumRegs()); + Allocatable.clear(); + return Allocatable; +} + +const TargetRegisterClass* const* +AMDILRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const +{ + static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + // TODO: Keep in sync with getCalleeSavedRegs + //TODO(getCalleeSavedRegClasses); + return CalleeSavedRegClasses; +} +void +AMDILRegisterInfo::eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const +{ + MBB.erase(I); +} + +// For each frame index we find, we store the offset in the stack which is +// being pushed back into the global buffer. The offset into the stack where +// the value is stored is copied into a new register and the frame index is +// then replaced with that register. +void +AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const +{ + assert(!"Implement"); +} + +void +AMDILRegisterInfo::processFunctionBeforeFrameFinalized( + MachineFunction &MF) const +{ + //TODO(processFunctionBeforeFrameFinalized); + // Here we keep track of the amount of stack that the current function + // uses so + // that we can set the offset to the end of the stack and any other + // function call + // will not overwrite any stack variables. + // baseOffset = nextFuncOffset; + MachineFrameInfo *MFI = MF.getFrameInfo(); + + for (uint32_t x = 0, y = MFI->getNumObjects(); x < y; ++x) { + int64_t size = MFI->getObjectSize(x); + if (!(size % 4) && size > 1) { + nextFuncOffset += size; + } else { + nextFuncOffset += 16; + } + } +} +unsigned int +AMDILRegisterInfo::getRARegister() const +{ + return AMDGPU::RA; +} + +unsigned int +AMDILRegisterInfo::getFrameRegister(const MachineFunction &MF) const +{ + return AMDGPU::FP; +} + +unsigned int +AMDILRegisterInfo::getEHExceptionRegister() const +{ + assert(0 && "What is the exception register"); + return 0; +} + +unsigned int +AMDILRegisterInfo::getEHHandlerRegister() const +{ + assert(0 && "What is the exception handler register"); + return 0; +} + +int64_t +AMDILRegisterInfo::getStackSize() const +{ + return nextFuncOffset - baseOffset; +} + +#define GET_REGINFO_TARGET_DESC +#include "AMDGPUGenRegisterInfo.inc" + diff --git a/lib/Target/AMDGPU/AMDILRegisterInfo.h b/lib/Target/AMDGPU/AMDILRegisterInfo.h new file mode 100644 index 0000000..97db1ae --- /dev/null +++ b/lib/Target/AMDGPU/AMDILRegisterInfo.h @@ -0,0 +1,95 @@ +//===- AMDILRegisterInfo.h - AMDIL Register Information Impl ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file contains the AMDIL implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef AMDILREGISTERINFO_H_ +#define AMDILREGISTERINFO_H_ + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "AMDGPUGenRegisterInfo.inc" +// See header file for explanation + +namespace llvm +{ + + class TargetInstrInfo; + class Type; + + /// DWARFFlavour - Flavour of dwarf regnumbers + /// + namespace DWARFFlavour { + enum { + AMDIL_Generic = 0 + }; + } + + struct AMDILRegisterInfo : public AMDILGenRegisterInfo + { + TargetMachine &TM; + const TargetInstrInfo &TII; + + AMDILRegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii); + /// Code Generation virtual methods... + const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const; + + const TargetRegisterClass* const* + getCalleeSavedRegClasses( + const MachineFunction *MF = 0) const; + + BitVector + getReservedRegs(const MachineFunction &MF) const; + BitVector + getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC) const; + + void + eliminateCallFramePseudoInstr( + MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + void + eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, RegScavenger *RS = NULL) const; + + void + processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + + // Debug information queries. + unsigned int + getRARegister() const; + + unsigned int + getFrameRegister(const MachineFunction &MF) const; + + // Exception handling queries. + unsigned int + getEHExceptionRegister() const; + unsigned int + getEHHandlerRegister() const; + + int64_t + getStackSize() const; + + virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) + const { + return &AMDGPU::GPRI32RegClass; + } + private: + mutable int64_t baseOffset; + mutable int64_t nextFuncOffset; + }; + +} // end namespace llvm + +#endif // AMDILREGISTERINFO_H_ diff --git a/lib/Target/AMDGPU/AMDILRegisterInfo.td b/lib/Target/AMDGPU/AMDILRegisterInfo.td new file mode 100644 index 0000000..42235ff --- /dev/null +++ b/lib/Target/AMDGPU/AMDILRegisterInfo.td @@ -0,0 +1,110 @@ +//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Declarations that describe the AMDIL register file +// +//===----------------------------------------------------------------------===// + +class AMDILReg num, string n> : Register { + field bits<16> Value; + let Value = num; + let Namespace = "AMDGPU"; +} + +// We will start with 8 registers for each class before expanding to more +// Since the swizzle is added based on the register class, we can leave it +// off here and just specify different registers for different register classes +def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>; +def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>; +def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>; +def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>; +def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>; +def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>; +def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>; +def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>; +def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>; +def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>; +def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>; +def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>; +def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>; +def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>; +def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>; +def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>; +def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>; +def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>; +def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>; +def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>; + +// All registers between 1000 and 1024 are reserved and cannot be used +// unless commented in this section +// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's +// r1020 is used to hold the frame index for local arrays +// r1019 is used to hold the dynamic stack allocation pointer +// r1018 is used as a temporary register for handwritten code +// r1017 is used as a temporary register for handwritten code +// r1016 is used as a temporary register for load/store code +// r1015 is used as a temporary register for data segment offset +// r1014 is used as a temporary register for store code +// r1013 is used as the section data pointer register +// r1012-r1010 and r1001-r1008 are used for temporary I/O registers +// r1009 is used as the frame pointer register +// r999 is used as the mem register. +// r998 is used as the return address register. +//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>; +//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>; +//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>; +//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>; +//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>; +//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>; +def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>; +def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>; +def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>; +def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>; +def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>; +def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>; +def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>; +def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>; +def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>; +def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>; +def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>; +def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>; +def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>; +def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>; +def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>; +def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>; +def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>; +def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>; +def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>; +def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>; +def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>; +def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>; +def GPRI16 : RegisterClass<"AMDGPU", [i16], 16, + (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> +{ + let AltOrders = [(add (sequence "R%u", 1, 20))]; + let AltOrderSelect = [{ + return 1; + }]; + } +def GPRI32 : RegisterClass<"AMDGPU", [i32], 32, + (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> +{ + let AltOrders = [(add (sequence "R%u", 1, 20))]; + let AltOrderSelect = [{ + return 1; + }]; + } +def GPRF32 : RegisterClass<"AMDGPU", [f32], 32, + (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> +{ + let AltOrders = [(add (sequence "R%u", 1, 20))]; + let AltOrderSelect = [{ + return 1; + }]; + } diff --git a/lib/Target/AMDGPU/AMDILSIDevice.cpp b/lib/Target/AMDGPU/AMDILSIDevice.cpp new file mode 100644 index 0000000..ae402a5 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILSIDevice.cpp @@ -0,0 +1,49 @@ +//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +#include "AMDILSIDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" +#include "AMDILSubtarget.h" + +using namespace llvm; + +AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) +{ +} +AMDILSIDevice::~AMDILSIDevice() +{ +} + +size_t +AMDILSIDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDILSIDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD7XXX; +} + +std::string +AMDILSIDevice::getDataLayout() const +{ + return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/lib/Target/AMDGPU/AMDILSIDevice.h b/lib/Target/AMDGPU/AMDILSIDevice.h new file mode 100644 index 0000000..b272af7 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILSIDevice.h @@ -0,0 +1,45 @@ +//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===---------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the +// specific hardware. +//===---------------------------------------------------------------------===// +#ifndef _AMDILSIDEVICE_H_ +#define _AMDILSIDEVICE_H_ +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===---------------------------------------------------------------------===// +// SI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +// The AMDILSIDevice is the base class for all Northern Island series of +// cards. It is very similiar to the AMDILEvergreenDevice, with the major +// exception being differences in wavefront size and hardware capabilities. The +// SI devices are all 64 wide wavefronts and also add support for signed 24 bit +// integer operations + + class AMDILSIDevice : public AMDILEvergreenDevice { + public: + AMDILSIDevice(AMDILSubtarget*); + virtual ~AMDILSIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; + virtual std::string getDataLayout() const; + protected: + }; // AMDILSIDevice + +} // namespace llvm +#endif // _AMDILSIDEVICE_H_ diff --git a/lib/Target/AMDGPU/AMDILSubtarget.cpp b/lib/Target/AMDGPU/AMDILSubtarget.cpp new file mode 100644 index 0000000..723037e --- /dev/null +++ b/lib/Target/AMDGPU/AMDILSubtarget.cpp @@ -0,0 +1,178 @@ +//===- AMDILSubtarget.cpp - AMDIL Subtarget Information -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file implements the AMD IL specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#include "AMDILSubtarget.h" +#include "AMDIL.h" +#include "AMDILDevices.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/SubtargetFeature.h" + +using namespace llvm; + +#define GET_SUBTARGETINFO_ENUM +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "AMDGPUGenSubtargetInfo.inc" + +AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ), + mDumpCode(false) +{ + memset(CapsOverride, 0, sizeof(*CapsOverride) + * AMDILDeviceInfo::MaxNumberCapabilities); + // Default card + std::string GPU = "rv770"; + GPU = CPU; + mIs64bit = false; + mVersion = 0; + SmallVector Features; + SplitString(FS, Features, ","); + mDefaultSize[0] = 64; + mDefaultSize[1] = 1; + mDefaultSize[2] = 1; + std::string newFeatures = ""; +#if defined(_DEBUG) || defined(DEBUG) + bool useTest = false; +#endif + for (size_t x = 0; x < Features.size(); ++x) { + if (Features[x].startswith("+mwgs")) { + SmallVector sizes; + SplitString(Features[x], sizes, "-"); + size_t mDim = ::atoi(sizes[1].data()); + if (mDim > 3) { + mDim = 3; + } + for (size_t y = 0; y < mDim; ++y) { + mDefaultSize[y] = ::atoi(sizes[y+2].data()); + } +#if defined(_DEBUG) || defined(DEBUG) + } else if (!Features[x].compare("test")) { + useTest = true; +#endif + } else if (Features[x].startswith("+cal")) { + SmallVector version; + SplitString(Features[x], version, "="); + mVersion = ::atoi(version[1].data()); + } else { + GPU = CPU; + if (x > 0) newFeatures += ','; + newFeatures += Features[x]; + } + } + // If we don't have a version then set it to + // -1 which enables everything. This is for + // offline devices. + if (!mVersion) { + mVersion = (uint32_t)-1; + } + for (int x = 0; x < 3; ++x) { + if (!mDefaultSize[x]) { + mDefaultSize[x] = 1; + } + } +#if defined(_DEBUG) || defined(DEBUG) + if (useTest) { + GPU = "kauai"; + } +#endif + ParseSubtargetFeatures(GPU, newFeatures); +#if defined(_DEBUG) || defined(DEBUG) + if (useTest) { + GPU = "test"; + } +#endif + mDevName = GPU; + mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit); +} +AMDILSubtarget::~AMDILSubtarget() +{ + delete mDevice; +} +bool +AMDILSubtarget::isOverride(AMDILDeviceInfo::Caps caps) const +{ + assert(caps < AMDILDeviceInfo::MaxNumberCapabilities && + "Caps index is out of bounds!"); + return CapsOverride[caps]; +} +bool +AMDILSubtarget::is64bit() const +{ + return mIs64bit; +} +bool +AMDILSubtarget::isTargetELF() const +{ + return false; +} +size_t +AMDILSubtarget::getDefaultSize(uint32_t dim) const +{ + if (dim > 3) { + return 1; + } else { + return mDefaultSize[dim]; + } +} +uint32_t +AMDILSubtarget::calVersion() const +{ + return mVersion; +} + +AMDILGlobalManager* +AMDILSubtarget::getGlobalManager() const +{ + return mGM; +} +void +AMDILSubtarget::setGlobalManager(AMDILGlobalManager *gm) const +{ + mGM = gm; +} + +AMDILKernelManager* +AMDILSubtarget::getKernelManager() const +{ + return mKM; +} +void +AMDILSubtarget::setKernelManager(AMDILKernelManager *km) const +{ + mKM = km; +} +std::string +AMDILSubtarget::getDataLayout() const +{ + if (!mDevice) { + return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64"); + } + return mDevice->getDataLayout(); +} + +std::string +AMDILSubtarget::getDeviceName() const +{ + return mDevName; +} +const AMDILDevice * +AMDILSubtarget::device() const +{ + return mDevice; +} diff --git a/lib/Target/AMDGPU/AMDILSubtarget.h b/lib/Target/AMDGPU/AMDILSubtarget.h new file mode 100644 index 0000000..e3d8c81 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILSubtarget.h @@ -0,0 +1,77 @@ +//=====-- AMDILSubtarget.h - Define Subtarget for the AMDIL ----*- C++ -*-====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file declares the AMDIL specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef _AMDILSUBTARGET_H_ +#define _AMDILSUBTARGET_H_ + +#include "AMDILDevice.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#include +#include + +#define GET_SUBTARGETINFO_HEADER +#include "AMDGPUGenSubtargetInfo.inc" + +#define MAX_CB_SIZE (1 << 16) +namespace llvm { + class Module; + class AMDILKernelManager; + class AMDILGlobalManager; + class AMDILDevice; + class AMDILSubtarget : public AMDILGenSubtargetInfo { + private: + bool CapsOverride[AMDILDeviceInfo::MaxNumberCapabilities]; + mutable AMDILGlobalManager *mGM; + mutable AMDILKernelManager *mKM; + const AMDILDevice *mDevice; + size_t mDefaultSize[3]; + size_t mMinimumSize[3]; + std::string mDevName; + uint32_t mVersion; + bool mIs64bit; + bool mIs32on64bit; + bool mDumpCode; + public: + AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS); + virtual ~AMDILSubtarget(); + bool isOverride(AMDILDeviceInfo::Caps) const; + bool is64bit() const; + + // Helper functions to simplify if statements + bool isTargetELF() const; + AMDILGlobalManager* getGlobalManager() const; + void setGlobalManager(AMDILGlobalManager *gm) const; + AMDILKernelManager* getKernelManager() const; + void setKernelManager(AMDILKernelManager *gm) const; + const AMDILDevice* device() const; + std::string getDataLayout() const; + std::string getDeviceName() const; + virtual size_t getDefaultSize(uint32_t dim) const; + // Return the version of CAL that the backend should target. + uint32_t calVersion() const; + // ParseSubtargetFeatures - Parses features string setting specified + // subtarget options. Definition of function is + //auto generated by tblgen. + void + ParseSubtargetFeatures( + llvm::StringRef CPU, + llvm::StringRef FS); + bool dumpCode() const { return mDumpCode; } + + }; + +} // end namespace llvm + +#endif // AMDILSUBTARGET_H_ diff --git a/lib/Target/AMDGPU/AMDILTokenDesc.td b/lib/Target/AMDGPU/AMDILTokenDesc.td new file mode 100644 index 0000000..2dafb2c --- /dev/null +++ b/lib/Target/AMDGPU/AMDILTokenDesc.td @@ -0,0 +1,120 @@ +//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// + +include "AMDILEnumeratedTypes.td" + +// Each token is 32 bits as specified in section 2.1 of the IL spec +class ILToken n> { + field bits<32> _bits = n; +} + +// Section 2.2.1 - IL Language Token +class ILLang client_type> : ILToken<0> { + let _bits{0-7} = client_type; +} + +// Section 2.2.2 - IL Version Token +class ILVersion minor_version, bits<8> major_version, ILShader shader_type> : ILToken<0> { + let _bits{0-7} = minor_version; + let _bits{8-15} = major_version; + let _bits{16-23} = shader_type.Value; +} + +// Section 2.2.3 - IL Opcode Token +class ILOpcode control, bit sec_mod_pre, bit pri_mod_pre> : ILToken<0> { + let _bits{0-15} = opcode.Value; + let _bits{16-29} = control; + let _bits{30} = sec_mod_pre; + let _bits{31} = pri_mod_pre; +} + +// Section 2.2.4 - IL Destination Token +class ILDst relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> { + let _bits{0-15} = register_num.Value; + let _bits{16-21} = register_type.Value; + let _bits{22} = mod_pre; + let _bits{23-24} = relative_address; + let _bits{25} = dimension; + let _bits{26} = immediate_pre; + let _bits{31} = extended; +} + +// Section 2.2.5 - IL Destination Modifier Token +class ILDstMod : ILToken<0> { + let _bits{0-1} = x.Value; + let _bits{2-3} = y.Value; + let _bits{4-5} = z.Value; + let _bits{6-7} = w.Value; + let _bits{8} = clamp; + //let _bits{9-12} = shift_scale; +} + +// Section 2.2.6 - IL Source Token +class ILSrc relative_address, bit dimension, bit immediate_pre, bit extended> : ILToken<0> { + let _bits{0-15} = register_num.Value; + let _bits{16-21} = register_type.Value; + let _bits{22} = mod_pre; + let _bits{23-24} = relative_address; + let _bits{25} = dimension; + let _bits{26} = immediate_pre; + let _bits{31} = extended; +} + +// Section 2.2.7 - IL Source Modifier Token +class ILSrcMod clamp> : ILToken<0> { + let _bits{0-2} = swizzle_x.Value; + let _bits{3} = negate_x; + let _bits{4-6} = swizzle_y.Value; + let _bits{7} = negate_y; + let _bits{8-10} = swizzle_z.Value; + let _bits{11} = negate_z; + let _bits{12-14} = swizzle_w.Value; + let _bits{15} = negate_w; + let _bits{16} = invert; + let _bits{17} = bias; + let _bits{18} = x2; + let _bits{19} = sign; + let _bits{20} = abs; + let _bits{21-23} = divComp.Value; + let _bits{24-31} = clamp; +} + +// Section 2.2.8 - IL Relative Address Token +class ILRelAddr : ILToken<0> { + let _bits{0-15} = address_register.Value; + let _bits{16} = loop_relative; + let _bits{17-19} = component.Value; +} + +// IL Literal Token +class ILLiteral val> : ILToken<0> { + let _bits = val; +} + +// All tokens required for a destination register +class ILDstReg { + ILDst reg = Reg; + ILDstMod mod = Mod; + ILRelAddr rel = Rel; + ILSrc reg_rel = Reg_Rel; + ILSrcMod reg_rel_mod = Reg_Rel_Mod; +} + +// All tokens required for a source register +class ILSrcReg { + ILSrc reg = Reg; + ILSrcMod mod = Mod; + ILRelAddr rel = Rel; + ILSrc reg_rel = Reg_Rel; + ILSrcMod reg_rel_mod = Reg_Rel_Mod; +} + diff --git a/lib/Target/AMDGPU/AMDILUtilityFunctions.h b/lib/Target/AMDGPU/AMDILUtilityFunctions.h new file mode 100644 index 0000000..e6666f9 --- /dev/null +++ b/lib/Target/AMDGPU/AMDILUtilityFunctions.h @@ -0,0 +1,75 @@ +//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file provides helper macros for expanding case statements. +// +//===----------------------------------------------------------------------===// +#ifndef AMDILUTILITYFUNCTIONS_H_ +#define AMDILUTILITYFUNCTIONS_H_ + +// Macros that are used to help with switch statements for various data types +// However, these macro's do not return anything unlike the second set below. +#define ExpandCaseTo32bitIntTypes(Instr) \ +case Instr##_i32: + +#define ExpandCaseTo32bitIntTruncTypes(Instr) \ +case Instr##_i32i8: \ +case Instr##_i32i16: + +#define ExpandCaseToIntTypes(Instr) \ + ExpandCaseTo32bitIntTypes(Instr) + +#define ExpandCaseToIntTruncTypes(Instr) \ + ExpandCaseTo32bitIntTruncTypes(Instr) + +#define ExpandCaseToFloatTypes(Instr) \ + case Instr##_f32: + +#define ExpandCaseTo32bitScalarTypes(Instr) \ + ExpandCaseTo32bitIntTypes(Instr) \ +case Instr##_f32: + +#define ExpandCaseToAllScalarTypes(Instr) \ + ExpandCaseToFloatTypes(Instr) \ +ExpandCaseToIntTypes(Instr) + +#define ExpandCaseToAllScalarTruncTypes(Instr) \ + ExpandCaseToFloatTruncTypes(Instr) \ +ExpandCaseToIntTruncTypes(Instr) + +#define ExpandCaseToAllTypes(Instr) \ +ExpandCaseToAllScalarTypes(Instr) + +#define ExpandCaseToAllTruncTypes(Instr) \ +ExpandCaseToAllScalarTruncTypes(Instr) + +// Macros that expand into statements with return values +#define ExpandCaseTo32bitIntReturn(Instr, Return) \ +case Instr##_i32: return Return##_i32; + +#define ExpandCaseToIntReturn(Instr, Return) \ + ExpandCaseTo32bitIntReturn(Instr, Return) + +#define ExpandCaseToFloatReturn(Instr, Return) \ + case Instr##_f32: return Return##_f32;\ + +#define ExpandCaseToAllScalarReturn(Instr, Return) \ + ExpandCaseToFloatReturn(Instr, Return) \ +ExpandCaseToIntReturn(Instr, Return) + +// These macros expand to common groupings of RegClass ID's +#define ExpandCaseTo1CompRegID \ +case AMDGPU::GPRI32RegClassID: \ +case AMDGPU::GPRF32RegClassID: + +#define ExpandCaseTo32BitType(Instr) \ +case Instr##_i32: \ +case Instr##_f32: + +#endif // AMDILUTILITYFUNCTIONS_H_ diff --git a/lib/Target/AMDGPU/AMDILVersion.td b/lib/Target/AMDGPU/AMDILVersion.td new file mode 100644 index 0000000..158ae9e --- /dev/null +++ b/lib/Target/AMDGPU/AMDILVersion.td @@ -0,0 +1,58 @@ +//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--------------------------------------------------------------------===// +// Intrinsic operation support +//===--------------------------------------------------------------------===// +let TargetPrefix = "AMDIL", isTarget = 1 in { +def int_AMDIL_barrier : GCCBuiltin<"barrier">, + BinaryIntNoRetInt; +def int_AMDIL_barrier_global : GCCBuiltin<"barrierGlobal">, + BinaryIntNoRetInt; +def int_AMDIL_barrier_local : GCCBuiltin<"barrierLocal">, + BinaryIntNoRetInt; +def int_AMDIL_barrier_region : GCCBuiltin<"barrierRegion">, + BinaryIntNoRetInt; +def int_AMDIL_get_region_id : GCCBuiltin<"__amdil_get_region_id_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; +def int_AMDIL_get_region_local_id : GCCBuiltin<"__amdil_get_region_local_id_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; +def int_AMDIL_get_num_regions : GCCBuiltin<"__amdil_get_num_regions_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; +def int_AMDIL_get_region_size : GCCBuiltin<"__amdil_get_region_size_int">, + Intrinsic<[llvm_v4i32_ty], [], []>; +} + +let isCall=1, isNotDuplicable=1 in { + let Predicates=[hasRegionAS] in { +def BARRIER_EGNI : BinaryOpNoRet; +} +let Predicates=[noRegionAS] in { +def BARRIER_7XX : BinaryOpNoRet; +} + +def BARRIER_LOCAL : BinaryOpNoRet; + +def BARRIER_GLOBAL : BinaryOpNoRet; + +def BARRIER_REGION : BinaryOpNoRet; +} diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt new file mode 100644 index 0000000..1dc2c70 --- /dev/null +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -0,0 +1,50 @@ +set(LLVM_TARGET_DEFINITIONS AMDGPU.td) + +tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv) +tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(LLVM AMDGPUGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer) +add_public_tablegen_target(AMDGPUCommonTableGen) + +add_llvm_target(AMDGPUCodeGen + AMDIL7XXDevice.cpp + AMDILCFGStructurizer.cpp + AMDILDevice.cpp + AMDILDeviceInfo.cpp + AMDILEvergreenDevice.cpp + AMDILFrameLowering.cpp + AMDILInstrInfo.cpp + AMDILIntrinsicInfo.cpp + AMDILISelDAGToDAG.cpp + AMDILISelLowering.cpp + AMDILNIDevice.cpp + AMDILPeepholeOptimizer.cpp + AMDILRegisterInfo.cpp + AMDILSIDevice.cpp + AMDILSubtarget.cpp + AMDGPUTargetMachine.cpp + AMDGPUISelLowering.cpp + AMDGPUConvertToISA.cpp + AMDGPUInstrInfo.cpp + AMDGPURegisterInfo.cpp + AMDGPUUtil.cpp + R600CodeEmitter.cpp + R600InstrInfo.cpp + R600ISelLowering.cpp + R600KernelParameters.cpp + R600MachineFunctionInfo.cpp + R600RegisterInfo.cpp + SIAssignInterpRegs.cpp + SICodeEmitter.cpp + SIInstrInfo.cpp + SIISelLowering.cpp + SIMachineFunctionInfo.cpp + SIRegisterInfo.cpp + ) + +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/AMDGPU/GENERATED_FILES b/lib/Target/AMDGPU/GENERATED_FILES new file mode 100644 index 0000000..9fa63b9 --- /dev/null +++ b/lib/Target/AMDGPU/GENERATED_FILES @@ -0,0 +1,13 @@ +There are 3 files used by this backend that are generated by perl scripts: + +- R600RegisterInfo.td + + Generated with: + perl R600GenRegisterInfo.pl > R600RegisterInfo.td + +- R600HwRegInfo.include + + Generated with: + perl R600GenRegisterInfo.pl + +- SIRegisterInfo.td + + Generated with: + perl SIGenRegisterInfo.pl > SIRegisterInfo.td diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt new file mode 100644 index 0000000..55afac5 --- /dev/null +++ b/lib/Target/AMDGPU/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/AMDIL/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = AMDGPU +parent = Target +has_asmprinter = 0 + +[component_1] +type = Library +name = AMDGPUCodeGen +parent = AMDGPU +required_libraries = AsmPrinter CodeGen Core SelectionDAG Support Target MC AMDGPUInfo AMDGPUDesc +add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp new file mode 100644 index 0000000..9e18887 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -0,0 +1,104 @@ +//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCAsmInfo.h" +#ifndef NULL +#define NULL 0 +#endif + +using namespace llvm; +AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() +{ + //===------------------------------------------------------------------===// + HasSubsectionsViaSymbols = true; + HasMachoZeroFillDirective = false; + HasMachoTBSSDirective = false; + HasStaticCtorDtorReferenceInStaticMode = false; + LinkerRequiresNonEmptyDwarfLines = true; + MaxInstLength = 16; + PCSymbol = "$"; + SeparatorString = "\n"; + CommentColumn = 40; + CommentString = ";"; + LabelSuffix = ":"; + GlobalPrefix = "@"; + PrivateGlobalPrefix = ";."; + LinkerPrivateGlobalPrefix = "!"; + InlineAsmStart = ";#ASMSTART"; + InlineAsmEnd = ";#ASMEND"; + AssemblerDialect = 0; + AllowQuotesInName = false; + AllowNameToStartWithDigit = false; + AllowPeriodsInName = false; + + //===--- Data Emission Directives -------------------------------------===// + ZeroDirective = ".zero"; + AsciiDirective = ".ascii\t"; + AscizDirective = ".asciz\t"; + Data8bitsDirective = ".byte\t"; + Data16bitsDirective = ".short\t"; + Data32bitsDirective = ".long\t"; + Data64bitsDirective = ".quad\t"; + GPRel32Directive = NULL; + SunStyleELFSectionSwitchSyntax = true; + UsesELFSectionDirectiveForBSS = true; + HasMicrosoftFastStdCallMangling = false; + + //===--- Alignment Information ----------------------------------------===// + AlignDirective = ".align\t"; + AlignmentIsInBytes = true; + TextAlignFillValue = 0; + + //===--- Global Variable Emission Directives --------------------------===// + GlobalDirective = ".global"; + ExternDirective = ".extern"; + HasSetDirective = false; + HasAggressiveSymbolFolding = true; + LCOMMDirectiveType = LCOMM::None; + COMMDirectiveAlignmentIsInBytes = false; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = true; + HasNoDeadStrip = true; + HasSymbolResolver = false; + WeakRefDirective = ".weakref\t"; + WeakDefDirective = ".weakdef\t"; + LinkOnceDirective = NULL; + HiddenVisibilityAttr = MCSA_Hidden; + HiddenDeclarationVisibilityAttr = MCSA_Hidden; + ProtectedVisibilityAttr = MCSA_Protected; + + //===--- Dwarf Emission Directives -----------------------------------===// + HasLEB128 = true; + SupportsDebugInformation = true; + ExceptionsType = ExceptionHandling::None; + DwarfUsesInlineInfoSection = false; + DwarfSectionOffsetDirective = ".offset"; + +} +const char* +AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const +{ + switch (AS) { + default: + return NULL; + case 0: + return NULL; + }; + return NULL; +} + +const MCSection* +AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const +{ + return NULL; +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h new file mode 100644 index 0000000..0ca264b --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// TODO: Add full description +// +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUMCASMINFO_H_ +#define AMDGPUMCASMINFO_H_ + +#include "llvm/MC/MCAsmInfo.h" +namespace llvm { + class Target; + class StringRef; + + class AMDGPUMCAsmInfo : public MCAsmInfo { + public: + explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT); + const char* + getDataASDirective(unsigned int Size, unsigned int AS) const; + const MCSection* getNonexecutableStackSection(MCContext &CTX) const; + }; +} // namespace llvm +#endif // AMDGPUMCASMINFO_H_ diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp new file mode 100644 index 0000000..5c6d13c --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -0,0 +1,61 @@ +#include "AMDGPUMCTargetDesc.h" +#include "AMDGPUMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "AMDGPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "AMDGPUGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "AMDGPUGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createAMDGPUMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAMDILMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitAMDILMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo * X = new MCSubtargetInfo(); + InitAMDILMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +extern "C" void LLVMInitializeAMDGPUTargetMC() { + + RegisterMCAsmInfo Y(TheAMDGPUTarget); + + TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo); + + TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo); + + TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo); + + TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo); + +} diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h new file mode 100644 index 0000000..ed858b3 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -0,0 +1,35 @@ +//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides AMDGPU specific target descriptions. +// +//===----------------------------------------------------------------------===// +// + +#ifndef AMDGPUMCTARGETDESC_H +#define AMDGPUMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; + +extern Target TheAMDGPUTarget; + +} // End llvm namespace + +#define GET_REGINFO_ENUM +#include "AMDGPUGenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#include "AMDGPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "AMDGPUGenSubtargetInfo.inc" + +#endif // AMDGPUMCTARGETDESC_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..2c0d5af --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,7 @@ + +add_llvm_library(LLVMAMDGPUDesc + AMDGPUMCTargetDesc.cpp + AMDGPUMCAsmInfo.cpp + ) + +add_dependencies(LLVMAMDGPUDesc AMDGPUCommonTableGen) diff --git a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..c7745d6 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AMDGPUDesc +parent = AMDGPU +required_libraries = AMDGPUInfo MC +add_to_library_groups = AMDGPU diff --git a/lib/Target/AMDGPU/MCTargetDesc/Makefile b/lib/Target/AMDGPU/MCTargetDesc/Makefile new file mode 100644 index 0000000..5ad6866 --- /dev/null +++ b/lib/Target/AMDGPU/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/AMDGPU/TargetDesc/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMAMDGPUDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AMDGPU/Makefile b/lib/Target/AMDGPU/Makefile new file mode 100644 index 0000000..36c8126 --- /dev/null +++ b/lib/Target/AMDGPU/Makefile @@ -0,0 +1,22 @@ +##===- lib/Target/AMDGPU/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMAMDGPUCodeGen +TARGET = AMDGPU + +# Make sure that tblgen is run, first thing. +BUILT_SOURCES = AMDGPUGenRegisterInfo.inc AMDGPUGenInstrInfo.inc \ + AMDGPUGenDAGISel.inc \ AMDGPUGenSubtargetInfo.inc \ + AMDGPUGenCodeEmitter.inc \ AMDGPUGenCallingConv.inc \ + AMDGPUGenIntrinsics.inc AMDGPUGenDFAPacketizer.inc \ + +DIRS = TargetInfo MCTargetDesc + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td new file mode 100644 index 0000000..410d887 --- /dev/null +++ b/lib/Target/AMDGPU/Processors.td @@ -0,0 +1,27 @@ +//===-- Processors.td - TODO: Add brief description -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AMDIL processors supported. +// +//===----------------------------------------------------------------------===// + +class Proc Features> +: Processor; +def : Proc<"rv710", R600_EG_Itin, []>; +def : Proc<"rv730", R600_EG_Itin, []>; +def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>; +def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; +def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>; +def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>; +def : Proc<"SI", SI_Itin, []>; diff --git a/lib/Target/AMDGPU/R600CodeEmitter.cpp b/lib/Target/AMDGPU/R600CodeEmitter.cpp new file mode 100644 index 0000000..076ca5b --- /dev/null +++ b/lib/Target/AMDGPU/R600CodeEmitter.cpp @@ -0,0 +1,613 @@ +//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This code emitters outputs bytecode that is understood by the r600g driver +// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, +// except that the size of the instruction fields are rounded up to the +// nearest byte. +// +// [1] http://www.mesa3d.org/ +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUUtil.h" +#include "AMDILCodeEmitter.h" +#include "AMDILInstrInfo.h" +#include "AMDILUtilityFunctions.h" +#include "R600InstrInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetMachine.h" + +#include + +#define SRC_BYTE_COUNT 11 +#define DST_BYTE_COUNT 5 + +using namespace llvm; + +namespace { + +class R600CodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter { + +private: + + static char ID; + formatted_raw_ostream &_OS; + const TargetMachine * TM; + const MachineRegisterInfo * MRI; + const R600RegisterInfo * TRI; + + bool isCube; + bool isReduction; + bool isVector; + unsigned currentElement; + bool isLast; + + unsigned section_start; + +public: + + R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), + _OS(OS), TM(NULL), isCube(false), isReduction(false), isVector(false), + isLast(true) { } + + const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } + + bool runOnMachineFunction(MachineFunction &MF); + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + +private: + + void emitALUInstr(MachineInstr &MI); + void emitSrc(const MachineOperand & MO, int chan_override = -1); + void emitDst(const MachineOperand & MO); + void emitALU(MachineInstr &MI, unsigned numSrc); + void emitTexInstr(MachineInstr &MI); + void emitFCInstr(MachineInstr &MI); + + void emitNullBytes(unsigned int byteCount); + + void emitByte(unsigned int byte); + + void emitTwoBytes(uint32_t bytes); + + void emit(uint32_t value); + void emit(uint64_t value); + + unsigned getHWReg(unsigned regNo) const; + +}; + +} // End anonymous namespace + +enum RegElement { + ELEMENT_X = 0, + ELEMENT_Y, + ELEMENT_Z, + ELEMENT_W +}; + +enum InstrTypes { + INSTR_ALU = 0, + INSTR_TEX, + INSTR_FC, + INSTR_NATIVE, + INSTR_VTX +}; + +enum FCInstr { + FC_IF = 0, + FC_ELSE, + FC_ENDIF, + FC_BGNLOOP, + FC_ENDLOOP, + FC_BREAK, + FC_BREAK_NZ_INT, + FC_CONTINUE, + FC_BREAK_Z_INT +}; + +enum TextureTypes { + TEXTURE_1D = 1, + TEXTURE_2D, + TEXTURE_3D, + TEXTURE_CUBE, + TEXTURE_RECT, + TEXTURE_SHADOW1D, + TEXTURE_SHADOW2D, + TEXTURE_SHADOWRECT, + TEXTURE_1D_ARRAY, + TEXTURE_2D_ARRAY, + TEXTURE_SHADOW1D_ARRAY, + TEXTURE_SHADOW2D_ARRAY +}; + +char R600CodeEmitter::ID = 0; + +FunctionPass *llvm::createR600CodeEmitterPass(formatted_raw_ostream &OS) { + return new R600CodeEmitter(OS); +} + +bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { + + TM = &MF.getTarget(); + MRI = &MF.getRegInfo(); + TRI = static_cast(TM->getRegisterInfo()); + const R600InstrInfo * TII = static_cast(TM->getInstrInfo()); + const AMDILSubtarget &STM = TM->getSubtarget(); + std::string gpu = STM.getDeviceName(); + + if (STM.dumpCode()) { + MF.dump(); + } + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + isReduction = AMDGPU::isReductionOp(MI.getOpcode()); + isVector = TII->isVector(MI); + isCube = AMDGPU::isCubeOp(MI.getOpcode()); + if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { + continue; + } + if (AMDGPU::isTexOp(MI.getOpcode())) { + emitTexInstr(MI); + } else if (AMDGPU::isFCOp(MI.getOpcode())){ + emitFCInstr(MI); + } else if (isReduction || isVector || isCube) { + isLast = false; + // XXX: On Cayman, some (all?) of the vector instructions only need + // to fill the first three slots. + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); + emitALUInstr(MI); + } + isReduction = false; + isVector = false; + isCube = false; + } else if (MI.getOpcode() == AMDGPU::RETURN || + MI.getOpcode() == AMDGPU::BUNDLE || + MI.getOpcode() == AMDGPU::KILL) { + continue; + } else { + switch(MI.getOpcode()) { + case AMDGPU::RAT_WRITE_CACHELESS_eg: + { + uint64_t inst = getBinaryCodeForInstr(MI); + // Set End Of Program bit + // XXX: Need better check of end of program. EOP should be + // encoded in one of the operands of the MI, and it should be + // set in a prior pass. + MachineBasicBlock::iterator NextI = llvm::next(I); + MachineInstr &NextMI = *NextI; + if (NextMI.getOpcode() == AMDGPU::RETURN) { + inst |= (((uint64_t)1) << 53); + } + emitByte(INSTR_NATIVE); + emit(inst); + break; + } + case AMDGPU::VTX_READ_PARAM_eg: + case AMDGPU::VTX_READ_GLOBAL_eg: + case AMDGPU::VTX_READ_GLOBAL_128_eg: + { + uint64_t InstWord01 = getBinaryCodeForInstr(MI); + uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset + + emitByte(INSTR_VTX); + emit(InstWord01); + emit(InstWord2); + break; + } + + default: + emitALUInstr(MI); + break; + } + } + } + } + return false; +} + +void R600CodeEmitter::emitALUInstr(MachineInstr &MI) +{ + + unsigned numOperands = MI.getNumExplicitOperands(); + + // Some instructions are just place holder instructions that represent + // operations that the GPU does automatically. They should be ignored. + if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) { + return; + } + + // XXX Check if instruction writes a result + if (numOperands < 1) { + return; + } + const MachineOperand dstOp = MI.getOperand(0); + + // Emit instruction type + emitByte(0); + + if (isCube) { + static const int cube_src_swz[] = {2, 2, 0, 1}; + emitSrc(MI.getOperand(1), cube_src_swz[currentElement]); + emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); + emitNullBytes(SRC_BYTE_COUNT); + } else { + unsigned int opIndex; + for (opIndex = 1; opIndex < numOperands; opIndex++) { + // Literal constants are always stored as the last operand. + if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { + break; + } + emitSrc(MI.getOperand(opIndex)); + } + + // Emit zeros for unused sources + for ( ; opIndex < 4; opIndex++) { + emitNullBytes(SRC_BYTE_COUNT); + } + } + + emitDst(dstOp); + + emitALU(MI, numOperands - 1); +} + +void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override) +{ + uint32_t value = 0; + // Emit the source select (2 bytes). For GPRs, this is the register index. + // For other potential instruction operands, (e.g. constant registers) the + // value of the source select is defined in the r600isa docs. + if (MO.isReg()) { + unsigned reg = MO.getReg(); + emitTwoBytes(getHWReg(reg)); + if (reg == AMDGPU::ALU_LITERAL_X) { + const MachineInstr * parent = MO.getParent(); + unsigned immOpIndex = parent->getNumExplicitOperands() - 1; + MachineOperand immOp = parent->getOperand(immOpIndex); + if (immOp.isFPImm()) { + value = immOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue(); + } else { + assert(immOp.isImm()); + value = immOp.getImm(); + } + } + } else { + // XXX: Handle other operand types. + emitTwoBytes(0); + } + + // Emit the source channel (1 byte) + if (chan_override != -1) { + emitByte(chan_override); + } else if (isReduction) { + emitByte(currentElement); + } else if (MO.isReg()) { + emitByte(TRI->getHWRegChan(MO.getReg())); + } else { + emitByte(0); + } + + // XXX: Emit isNegated (1 byte) + if ((!(MO.getTargetFlags() & MO_FLAG_ABS)) + && (MO.getTargetFlags() & MO_FLAG_NEG || + (MO.isReg() && + (MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){ + emitByte(1); + } else { + emitByte(0); + } + + // Emit isAbsolute (1 byte) + if (MO.getTargetFlags() & MO_FLAG_ABS) { + emitByte(1); + } else { + emitByte(0); + } + + // XXX: Emit relative addressing mode (1 byte) + emitByte(0); + + // Emit kc_bank, This will be adjusted later by r600_asm + emitByte(0); + + // Emit the literal value, if applicable (4 bytes). + emit(value); + +} + +void R600CodeEmitter::emitDst(const MachineOperand & MO) +{ + if (MO.isReg()) { + // Emit the destination register index (1 byte) + emitByte(getHWReg(MO.getReg())); + + // Emit the element of the destination register (1 byte) + if (isReduction || isCube || isVector) { + emitByte(currentElement); + } else { + emitByte(TRI->getHWRegChan(MO.getReg())); + } + + // Emit isClamped (1 byte) + if (MO.getTargetFlags() & MO_FLAG_CLAMP) { + emitByte(1); + } else { + emitByte(0); + } + + // Emit writemask (1 byte). + if (((isReduction || isVector) && + currentElement != TRI->getHWRegChan(MO.getReg())) + || MO.getTargetFlags() & MO_FLAG_MASK) { + emitByte(0); + } else { + emitByte(1); + } + + // XXX: Emit relative addressing mode + emitByte(0); + } else { + // XXX: Handle other operand types. Are there any for destination regs? + emitNullBytes(DST_BYTE_COUNT); + } +} + +void R600CodeEmitter::emitALU(MachineInstr &MI, unsigned numSrc) +{ + // Emit the instruction (2 bytes) + emitTwoBytes(getBinaryCodeForInstr(MI)); + + // Emit isLast (for this instruction group) (1 byte) + if (isLast) { + emitByte(1); + } else { + emitByte(0); + } + // Emit isOp3 (1 byte) + if (numSrc == 3) { + emitByte(1); + } else { + emitByte(0); + } + + // XXX: Emit predicate (1 byte) + emitByte(0); + + // XXX: Emit bank swizzle. (1 byte) Do we need this? It looks like + // r600_asm.c sets it. + emitByte(0); + + // XXX: Emit bank_swizzle_force (1 byte) Not sure what this is for. + emitByte(0); + + // XXX: Emit OMOD (1 byte) Not implemented. + emitByte(0); + + // XXX: Emit index_mode. I think this is for indirect addressing, so we + // don't need to worry about it. + emitByte(0); +} + +void R600CodeEmitter::emitTexInstr(MachineInstr &MI) +{ + + unsigned opcode = MI.getOpcode(); + bool hasOffsets = (opcode == AMDGPU::TEX_LD); + unsigned op_offset = hasOffsets ? 3 : 0; + int64_t sampler = MI.getOperand(op_offset+2).getImm(); + int64_t textureType = MI.getOperand(op_offset+3).getImm(); + unsigned srcSelect[4] = {0, 1, 2, 3}; + + // Emit instruction type + emitByte(1); + + // Emit instruction + emitByte(getBinaryCodeForInstr(MI)); + + // XXX: Emit resource id r600_shader.c uses sampler + 1. Why? + emitByte(sampler + 1 + 1); + + // Emit source register + emitByte(getHWReg(MI.getOperand(1).getReg())); + + // XXX: Emit src isRelativeAddress + emitByte(0); + + // Emit destination register + emitByte(getHWReg(MI.getOperand(0).getReg())); + + // XXX: Emit dst isRealtiveAddress + emitByte(0); + + // XXX: Emit dst select + emitByte(0); // X + emitByte(1); // Y + emitByte(2); // Z + emitByte(3); // W + + // XXX: Emit lod bias + emitByte(0); + + // XXX: Emit coord types + unsigned coordType[4] = {1, 1, 1, 1}; + + if (textureType == TEXTURE_RECT + || textureType == TEXTURE_SHADOWRECT) { + coordType[ELEMENT_X] = 0; + coordType[ELEMENT_Y] = 0; + } + + if (textureType == TEXTURE_1D_ARRAY + || textureType == TEXTURE_SHADOW1D_ARRAY) { + if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) { + coordType[ELEMENT_Y] = 0; + } else { + coordType[ELEMENT_Z] = 0; + srcSelect[ELEMENT_Z] = ELEMENT_Y; + } + } else if (textureType == TEXTURE_2D_ARRAY + || textureType == TEXTURE_SHADOW2D_ARRAY) { + coordType[ELEMENT_Z] = 0; + } + + for (unsigned i = 0; i < 4; i++) { + emitByte(coordType[i]); + } + + // XXX: Emit offsets + if (hasOffsets) + for (unsigned i = 2; i < 5; i++) + emitByte(MI.getOperand(i).getImm()<<1); + else + emitNullBytes(3); + + // Emit sampler id + emitByte(sampler); + + // XXX:Emit source select + if ((textureType == TEXTURE_SHADOW1D + || textureType == TEXTURE_SHADOW2D + || textureType == TEXTURE_SHADOWRECT + || textureType == TEXTURE_SHADOW1D_ARRAY) + && opcode != AMDGPU::TEX_SAMPLE_C_L + && opcode != AMDGPU::TEX_SAMPLE_C_LB) { + srcSelect[ELEMENT_W] = ELEMENT_Z; + } + + for (unsigned i = 0; i < 4; i++) { + emitByte(srcSelect[i]); + } +} + +void R600CodeEmitter::emitFCInstr(MachineInstr &MI) +{ + // Emit instruction type + emitByte(INSTR_FC); + + // Emit SRC + unsigned numOperands = MI.getNumOperands(); + if (numOperands > 0) { + assert(numOperands == 1); + emitSrc(MI.getOperand(0)); + } else { + emitNullBytes(SRC_BYTE_COUNT); + } + + // Emit FC Instruction + enum FCInstr instr; + switch (MI.getOpcode()) { + case AMDGPU::BREAK_LOGICALZ_f32: + instr = FC_BREAK; + break; + case AMDGPU::BREAK_LOGICALNZ_f32: + case AMDGPU::BREAK_LOGICALNZ_i32: + instr = FC_BREAK_NZ_INT; + break; + case AMDGPU::BREAK_LOGICALZ_i32: + instr = FC_BREAK_Z_INT; + break; + case AMDGPU::CONTINUE_LOGICALNZ_f32: + case AMDGPU::CONTINUE_LOGICALNZ_i32: + instr = FC_CONTINUE; + break; + case AMDGPU::IF_LOGICALNZ_f32: + case AMDGPU::IF_LOGICALNZ_i32: + instr = FC_IF; + break; + case AMDGPU::IF_LOGICALZ_f32: + abort(); + break; + case AMDGPU::ELSE: + instr = FC_ELSE; + break; + case AMDGPU::ENDIF: + instr = FC_ENDIF; + break; + case AMDGPU::ENDLOOP: + instr = FC_ENDLOOP; + break; + case AMDGPU::WHILELOOP: + instr = FC_BGNLOOP; + break; + default: + abort(); + break; + } + emitByte(instr); +} + +void R600CodeEmitter::emitNullBytes(unsigned int byteCount) +{ + for (unsigned int i = 0; i < byteCount; i++) { + emitByte(0); + } +} + +void R600CodeEmitter::emitByte(unsigned int byte) +{ + _OS.write((uint8_t) byte & 0xff); +} +void R600CodeEmitter::emitTwoBytes(unsigned int bytes) +{ + _OS.write((uint8_t) (bytes & 0xff)); + _OS.write((uint8_t) ((bytes >> 8) & 0xff)); +} + +void R600CodeEmitter::emit(uint32_t value) +{ + for (unsigned i = 0; i < 4; i++) { + _OS.write((uint8_t) ((value >> (8 * i)) & 0xff)); + } +} + +void R600CodeEmitter::emit(uint64_t value) +{ + for (unsigned i = 0; i < 8; i++) { + emitByte((value >> (8 * i)) & 0xff); + } +} + +unsigned R600CodeEmitter::getHWReg(unsigned regNo) const +{ + unsigned hwReg; + + hwReg = TRI->getEncodingValue(regNo); + if (AMDGPU::R600_CReg32RegClass.contains(regNo)) { + hwReg += 512; + } + return hwReg; +} + +uint64_t R600CodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const +{ + if (MO.isReg()) { + return getHWReg(MO.getReg()); + } else { + return MO.getImm(); + } +} + +#include "AMDGPUGenCodeEmitter.inc" + diff --git a/lib/Target/AMDGPU/R600GenRegisterInfo.pl b/lib/Target/AMDGPU/R600GenRegisterInfo.pl new file mode 100644 index 0000000..e286975 --- /dev/null +++ b/lib/Target/AMDGPU/R600GenRegisterInfo.pl @@ -0,0 +1,190 @@ +#===-- R600GenRegisterInfo.pl - Script for generating register info files --===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# +# +# This perl script prints to stdout .td code to be used as R600RegisterInfo.td +# it also generates a file called R600HwRegInfo.include, which contains helper +# functions for determining the hw encoding of registers. +# +#===------------------------------------------------------------------------===# + +use strict; +use warnings; + +use constant CONST_REG_COUNT => 100; +use constant TEMP_REG_COUNT => 128; + +my $CREG_MAX = CONST_REG_COUNT - 1; +my $TREG_MAX = TEMP_REG_COUNT - 1; + +print < encoding> : Register { + let Namespace = "AMDGPU"; + let HWEncoding = encoding; +} + +class R600Reg_128 subregs, bits<16> encoding> : + RegisterWithSubRegs { + let Namespace = "AMDGPU"; + let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; + let HWEncoding = encoding; +} + +STRING + +my $i; + +### REG DEFS ### + +my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C"); +my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T"); + +my @t128reg; +my @treg_x; +for (my $i = 0; $i < TEMP_REG_COUNT; $i++) { + my $name = "T$i\_XYZW"; + print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W], $i >;\n}; + $t128reg[$i] = $name; + $treg_x[$i] = "T$i\_X"; + if ($i % 10 == 0) { + $t128reg[$i] .= "\n"; + $treg_x[$i] .= "\n"; + } +} + +my $treg_string = join(",", @treg_list); +my $creg_list = join(",", @creg_list); +my $t128_string = join(",", @t128reg); +my $treg_x_string = join(",", @treg_x); +print < { + dag set = s; +} + +def ZERO : R600Reg<"0.0", 248>; +def ONE : R600Reg<"1.0", 249>; +def NEG_ONE : R600Reg<"-1.0", 249>; +def ONE_INT : R600Reg<"1", 250>; +def HALF : R600Reg<"0.5", 252>; +def NEG_HALF : R600Reg<"-0.5", 252>; +def PV_X : R600Reg<"pv.x", 254>; +def ALU_LITERAL_X : R600Reg<"literal.x", 253>; + +def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add + $creg_list)>; + +def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add + $treg_string)>; + +def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add + $treg_x_string)>; + +def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add + R600_TReg32, + R600_CReg32, + ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; + +def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add + $t128_string)> +{ + let CopyCost = -1; +} + +STRING + +my %index_map; +my %chan_map; + +for ($i = 0; $i <= $#creg_list; $i++) { + push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]); + push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]); +} + +for ($i = 0; $i <= $#treg_list; $i++) { + push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]); + push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]); +} + +for ($i = 0; $i <= $#t128reg; $i++) { + push(@{$index_map{$i}}, $t128reg[$i]); + push(@{$chan_map{'X'}}, $t128reg[$i]); +} + +open(OUTFILE, ">", "R600HwRegInfo.include"); + +print OUTFILE <;\n}; + $reg_list[$i] = $name; + if ($i % 10 == 0) { + $reg_list[$i] .= "\n"; + } + } + return @reg_list; +} + +#Helper functions +sub get_hw_index { + my ($index) = @_; + return int($index / 4); +} + +sub get_chan_str { + my ($index) = @_; + my $chan = $index % 4; + if ($chan == 0 ) { + return 'X'; + } elsif ($chan == 1) { + return 'Y'; + } elsif ($chan == 2) { + return 'Z'; + } elsif ($chan == 3) { + return 'W'; + } else { + die("Unknown chan value: $chan"); + } +} diff --git a/lib/Target/AMDGPU/R600HwRegInfo.include b/lib/Target/AMDGPU/R600HwRegInfo.include new file mode 100644 index 0000000..e830a4c --- /dev/null +++ b/lib/Target/AMDGPU/R600HwRegInfo.include @@ -0,0 +1,1056 @@ + +unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const +{ + switch(reg) { + default: assert(!"Unknown register"); return 0; + case AMDGPU::C0_Z: + case AMDGPU::C1_Z: + case AMDGPU::C2_Z: + case AMDGPU::C3_Z: + case AMDGPU::C4_Z: + case AMDGPU::C5_Z: + case AMDGPU::C6_Z: + case AMDGPU::C7_Z: + case AMDGPU::C8_Z: + case AMDGPU::C9_Z: + case AMDGPU::C10_Z: + case AMDGPU::C11_Z: + case AMDGPU::C12_Z: + case AMDGPU::C13_Z: + case AMDGPU::C14_Z: + case AMDGPU::C15_Z: + case AMDGPU::C16_Z: + case AMDGPU::C17_Z: + case AMDGPU::C18_Z: + case AMDGPU::C19_Z: + case AMDGPU::C20_Z: + case AMDGPU::C21_Z: + case AMDGPU::C22_Z: + case AMDGPU::C23_Z: + case AMDGPU::C24_Z: + case AMDGPU::C25_Z: + case AMDGPU::C26_Z: + case AMDGPU::C27_Z: + case AMDGPU::C28_Z: + case AMDGPU::C29_Z: + case AMDGPU::C30_Z: + case AMDGPU::C31_Z: + case AMDGPU::C32_Z: + case AMDGPU::C33_Z: + case AMDGPU::C34_Z: + case AMDGPU::C35_Z: + case AMDGPU::C36_Z: + case AMDGPU::C37_Z: + case AMDGPU::C38_Z: + case AMDGPU::C39_Z: + case AMDGPU::C40_Z: + case AMDGPU::C41_Z: + case AMDGPU::C42_Z: + case AMDGPU::C43_Z: + case AMDGPU::C44_Z: + case AMDGPU::C45_Z: + case AMDGPU::C46_Z: + case AMDGPU::C47_Z: + case AMDGPU::C48_Z: + case AMDGPU::C49_Z: + case AMDGPU::C50_Z: + case AMDGPU::C51_Z: + case AMDGPU::C52_Z: + case AMDGPU::C53_Z: + case AMDGPU::C54_Z: + case AMDGPU::C55_Z: + case AMDGPU::C56_Z: + case AMDGPU::C57_Z: + case AMDGPU::C58_Z: + case AMDGPU::C59_Z: + case AMDGPU::C60_Z: + case AMDGPU::C61_Z: + case AMDGPU::C62_Z: + case AMDGPU::C63_Z: + case AMDGPU::C64_Z: + case AMDGPU::C65_Z: + case AMDGPU::C66_Z: + case AMDGPU::C67_Z: + case AMDGPU::C68_Z: + case AMDGPU::C69_Z: + case AMDGPU::C70_Z: + case AMDGPU::C71_Z: + case AMDGPU::C72_Z: + case AMDGPU::C73_Z: + case AMDGPU::C74_Z: + case AMDGPU::C75_Z: + case AMDGPU::C76_Z: + case AMDGPU::C77_Z: + case AMDGPU::C78_Z: + case AMDGPU::C79_Z: + case AMDGPU::C80_Z: + case AMDGPU::C81_Z: + case AMDGPU::C82_Z: + case AMDGPU::C83_Z: + case AMDGPU::C84_Z: + case AMDGPU::C85_Z: + case AMDGPU::C86_Z: + case AMDGPU::C87_Z: + case AMDGPU::C88_Z: + case AMDGPU::C89_Z: + case AMDGPU::C90_Z: + case AMDGPU::C91_Z: + case AMDGPU::C92_Z: + case AMDGPU::C93_Z: + case AMDGPU::C94_Z: + case AMDGPU::C95_Z: + case AMDGPU::C96_Z: + case AMDGPU::C97_Z: + case AMDGPU::C98_Z: + case AMDGPU::C99_Z: + case AMDGPU::T0_Z: + case AMDGPU::T1_Z: + case AMDGPU::T2_Z: + case AMDGPU::T3_Z: + case AMDGPU::T4_Z: + case AMDGPU::T5_Z: + case AMDGPU::T6_Z: + case AMDGPU::T7_Z: + case AMDGPU::T8_Z: + case AMDGPU::T9_Z: + case AMDGPU::T10_Z: + case AMDGPU::T11_Z: + case AMDGPU::T12_Z: + case AMDGPU::T13_Z: + case AMDGPU::T14_Z: + case AMDGPU::T15_Z: + case AMDGPU::T16_Z: + case AMDGPU::T17_Z: + case AMDGPU::T18_Z: + case AMDGPU::T19_Z: + case AMDGPU::T20_Z: + case AMDGPU::T21_Z: + case AMDGPU::T22_Z: + case AMDGPU::T23_Z: + case AMDGPU::T24_Z: + case AMDGPU::T25_Z: + case AMDGPU::T26_Z: + case AMDGPU::T27_Z: + case AMDGPU::T28_Z: + case AMDGPU::T29_Z: + case AMDGPU::T30_Z: + case AMDGPU::T31_Z: + case AMDGPU::T32_Z: + case AMDGPU::T33_Z: + case AMDGPU::T34_Z: + case AMDGPU::T35_Z: + case AMDGPU::T36_Z: + case AMDGPU::T37_Z: + case AMDGPU::T38_Z: + case AMDGPU::T39_Z: + case AMDGPU::T40_Z: + case AMDGPU::T41_Z: + case AMDGPU::T42_Z: + case AMDGPU::T43_Z: + case AMDGPU::T44_Z: + case AMDGPU::T45_Z: + case AMDGPU::T46_Z: + case AMDGPU::T47_Z: + case AMDGPU::T48_Z: + case AMDGPU::T49_Z: + case AMDGPU::T50_Z: + case AMDGPU::T51_Z: + case AMDGPU::T52_Z: + case AMDGPU::T53_Z: + case AMDGPU::T54_Z: + case AMDGPU::T55_Z: + case AMDGPU::T56_Z: + case AMDGPU::T57_Z: + case AMDGPU::T58_Z: + case AMDGPU::T59_Z: + case AMDGPU::T60_Z: + case AMDGPU::T61_Z: + case AMDGPU::T62_Z: + case AMDGPU::T63_Z: + case AMDGPU::T64_Z: + case AMDGPU::T65_Z: + case AMDGPU::T66_Z: + case AMDGPU::T67_Z: + case AMDGPU::T68_Z: + case AMDGPU::T69_Z: + case AMDGPU::T70_Z: + case AMDGPU::T71_Z: + case AMDGPU::T72_Z: + case AMDGPU::T73_Z: + case AMDGPU::T74_Z: + case AMDGPU::T75_Z: + case AMDGPU::T76_Z: + case AMDGPU::T77_Z: + case AMDGPU::T78_Z: + case AMDGPU::T79_Z: + case AMDGPU::T80_Z: + case AMDGPU::T81_Z: + case AMDGPU::T82_Z: + case AMDGPU::T83_Z: + case AMDGPU::T84_Z: + case AMDGPU::T85_Z: + case AMDGPU::T86_Z: + case AMDGPU::T87_Z: + case AMDGPU::T88_Z: + case AMDGPU::T89_Z: + case AMDGPU::T90_Z: + case AMDGPU::T91_Z: + case AMDGPU::T92_Z: + case AMDGPU::T93_Z: + case AMDGPU::T94_Z: + case AMDGPU::T95_Z: + case AMDGPU::T96_Z: + case AMDGPU::T97_Z: + case AMDGPU::T98_Z: + case AMDGPU::T99_Z: + case AMDGPU::T100_Z: + case AMDGPU::T101_Z: + case AMDGPU::T102_Z: + case AMDGPU::T103_Z: + case AMDGPU::T104_Z: + case AMDGPU::T105_Z: + case AMDGPU::T106_Z: + case AMDGPU::T107_Z: + case AMDGPU::T108_Z: + case AMDGPU::T109_Z: + case AMDGPU::T110_Z: + case AMDGPU::T111_Z: + case AMDGPU::T112_Z: + case AMDGPU::T113_Z: + case AMDGPU::T114_Z: + case AMDGPU::T115_Z: + case AMDGPU::T116_Z: + case AMDGPU::T117_Z: + case AMDGPU::T118_Z: + case AMDGPU::T119_Z: + case AMDGPU::T120_Z: + case AMDGPU::T121_Z: + case AMDGPU::T122_Z: + case AMDGPU::T123_Z: + case AMDGPU::T124_Z: + case AMDGPU::T125_Z: + case AMDGPU::T126_Z: + case AMDGPU::T127_Z: + return 2; + + case AMDGPU::C0_W: + case AMDGPU::C1_W: + case AMDGPU::C2_W: + case AMDGPU::C3_W: + case AMDGPU::C4_W: + case AMDGPU::C5_W: + case AMDGPU::C6_W: + case AMDGPU::C7_W: + case AMDGPU::C8_W: + case AMDGPU::C9_W: + case AMDGPU::C10_W: + case AMDGPU::C11_W: + case AMDGPU::C12_W: + case AMDGPU::C13_W: + case AMDGPU::C14_W: + case AMDGPU::C15_W: + case AMDGPU::C16_W: + case AMDGPU::C17_W: + case AMDGPU::C18_W: + case AMDGPU::C19_W: + case AMDGPU::C20_W: + case AMDGPU::C21_W: + case AMDGPU::C22_W: + case AMDGPU::C23_W: + case AMDGPU::C24_W: + case AMDGPU::C25_W: + case AMDGPU::C26_W: + case AMDGPU::C27_W: + case AMDGPU::C28_W: + case AMDGPU::C29_W: + case AMDGPU::C30_W: + case AMDGPU::C31_W: + case AMDGPU::C32_W: + case AMDGPU::C33_W: + case AMDGPU::C34_W: + case AMDGPU::C35_W: + case AMDGPU::C36_W: + case AMDGPU::C37_W: + case AMDGPU::C38_W: + case AMDGPU::C39_W: + case AMDGPU::C40_W: + case AMDGPU::C41_W: + case AMDGPU::C42_W: + case AMDGPU::C43_W: + case AMDGPU::C44_W: + case AMDGPU::C45_W: + case AMDGPU::C46_W: + case AMDGPU::C47_W: + case AMDGPU::C48_W: + case AMDGPU::C49_W: + case AMDGPU::C50_W: + case AMDGPU::C51_W: + case AMDGPU::C52_W: + case AMDGPU::C53_W: + case AMDGPU::C54_W: + case AMDGPU::C55_W: + case AMDGPU::C56_W: + case AMDGPU::C57_W: + case AMDGPU::C58_W: + case AMDGPU::C59_W: + case AMDGPU::C60_W: + case AMDGPU::C61_W: + case AMDGPU::C62_W: + case AMDGPU::C63_W: + case AMDGPU::C64_W: + case AMDGPU::C65_W: + case AMDGPU::C66_W: + case AMDGPU::C67_W: + case AMDGPU::C68_W: + case AMDGPU::C69_W: + case AMDGPU::C70_W: + case AMDGPU::C71_W: + case AMDGPU::C72_W: + case AMDGPU::C73_W: + case AMDGPU::C74_W: + case AMDGPU::C75_W: + case AMDGPU::C76_W: + case AMDGPU::C77_W: + case AMDGPU::C78_W: + case AMDGPU::C79_W: + case AMDGPU::C80_W: + case AMDGPU::C81_W: + case AMDGPU::C82_W: + case AMDGPU::C83_W: + case AMDGPU::C84_W: + case AMDGPU::C85_W: + case AMDGPU::C86_W: + case AMDGPU::C87_W: + case AMDGPU::C88_W: + case AMDGPU::C89_W: + case AMDGPU::C90_W: + case AMDGPU::C91_W: + case AMDGPU::C92_W: + case AMDGPU::C93_W: + case AMDGPU::C94_W: + case AMDGPU::C95_W: + case AMDGPU::C96_W: + case AMDGPU::C97_W: + case AMDGPU::C98_W: + case AMDGPU::C99_W: + case AMDGPU::T0_W: + case AMDGPU::T1_W: + case AMDGPU::T2_W: + case AMDGPU::T3_W: + case AMDGPU::T4_W: + case AMDGPU::T5_W: + case AMDGPU::T6_W: + case AMDGPU::T7_W: + case AMDGPU::T8_W: + case AMDGPU::T9_W: + case AMDGPU::T10_W: + case AMDGPU::T11_W: + case AMDGPU::T12_W: + case AMDGPU::T13_W: + case AMDGPU::T14_W: + case AMDGPU::T15_W: + case AMDGPU::T16_W: + case AMDGPU::T17_W: + case AMDGPU::T18_W: + case AMDGPU::T19_W: + case AMDGPU::T20_W: + case AMDGPU::T21_W: + case AMDGPU::T22_W: + case AMDGPU::T23_W: + case AMDGPU::T24_W: + case AMDGPU::T25_W: + case AMDGPU::T26_W: + case AMDGPU::T27_W: + case AMDGPU::T28_W: + case AMDGPU::T29_W: + case AMDGPU::T30_W: + case AMDGPU::T31_W: + case AMDGPU::T32_W: + case AMDGPU::T33_W: + case AMDGPU::T34_W: + case AMDGPU::T35_W: + case AMDGPU::T36_W: + case AMDGPU::T37_W: + case AMDGPU::T38_W: + case AMDGPU::T39_W: + case AMDGPU::T40_W: + case AMDGPU::T41_W: + case AMDGPU::T42_W: + case AMDGPU::T43_W: + case AMDGPU::T44_W: + case AMDGPU::T45_W: + case AMDGPU::T46_W: + case AMDGPU::T47_W: + case AMDGPU::T48_W: + case AMDGPU::T49_W: + case AMDGPU::T50_W: + case AMDGPU::T51_W: + case AMDGPU::T52_W: + case AMDGPU::T53_W: + case AMDGPU::T54_W: + case AMDGPU::T55_W: + case AMDGPU::T56_W: + case AMDGPU::T57_W: + case AMDGPU::T58_W: + case AMDGPU::T59_W: + case AMDGPU::T60_W: + case AMDGPU::T61_W: + case AMDGPU::T62_W: + case AMDGPU::T63_W: + case AMDGPU::T64_W: + case AMDGPU::T65_W: + case AMDGPU::T66_W: + case AMDGPU::T67_W: + case AMDGPU::T68_W: + case AMDGPU::T69_W: + case AMDGPU::T70_W: + case AMDGPU::T71_W: + case AMDGPU::T72_W: + case AMDGPU::T73_W: + case AMDGPU::T74_W: + case AMDGPU::T75_W: + case AMDGPU::T76_W: + case AMDGPU::T77_W: + case AMDGPU::T78_W: + case AMDGPU::T79_W: + case AMDGPU::T80_W: + case AMDGPU::T81_W: + case AMDGPU::T82_W: + case AMDGPU::T83_W: + case AMDGPU::T84_W: + case AMDGPU::T85_W: + case AMDGPU::T86_W: + case AMDGPU::T87_W: + case AMDGPU::T88_W: + case AMDGPU::T89_W: + case AMDGPU::T90_W: + case AMDGPU::T91_W: + case AMDGPU::T92_W: + case AMDGPU::T93_W: + case AMDGPU::T94_W: + case AMDGPU::T95_W: + case AMDGPU::T96_W: + case AMDGPU::T97_W: + case AMDGPU::T98_W: + case AMDGPU::T99_W: + case AMDGPU::T100_W: + case AMDGPU::T101_W: + case AMDGPU::T102_W: + case AMDGPU::T103_W: + case AMDGPU::T104_W: + case AMDGPU::T105_W: + case AMDGPU::T106_W: + case AMDGPU::T107_W: + case AMDGPU::T108_W: + case AMDGPU::T109_W: + case AMDGPU::T110_W: + case AMDGPU::T111_W: + case AMDGPU::T112_W: + case AMDGPU::T113_W: + case AMDGPU::T114_W: + case AMDGPU::T115_W: + case AMDGPU::T116_W: + case AMDGPU::T117_W: + case AMDGPU::T118_W: + case AMDGPU::T119_W: + case AMDGPU::T120_W: + case AMDGPU::T121_W: + case AMDGPU::T122_W: + case AMDGPU::T123_W: + case AMDGPU::T124_W: + case AMDGPU::T125_W: + case AMDGPU::T126_W: + case AMDGPU::T127_W: + return 3; + + case AMDGPU::C0_X: + case AMDGPU::C1_X: + case AMDGPU::C2_X: + case AMDGPU::C3_X: + case AMDGPU::C4_X: + case AMDGPU::C5_X: + case AMDGPU::C6_X: + case AMDGPU::C7_X: + case AMDGPU::C8_X: + case AMDGPU::C9_X: + case AMDGPU::C10_X: + case AMDGPU::C11_X: + case AMDGPU::C12_X: + case AMDGPU::C13_X: + case AMDGPU::C14_X: + case AMDGPU::C15_X: + case AMDGPU::C16_X: + case AMDGPU::C17_X: + case AMDGPU::C18_X: + case AMDGPU::C19_X: + case AMDGPU::C20_X: + case AMDGPU::C21_X: + case AMDGPU::C22_X: + case AMDGPU::C23_X: + case AMDGPU::C24_X: + case AMDGPU::C25_X: + case AMDGPU::C26_X: + case AMDGPU::C27_X: + case AMDGPU::C28_X: + case AMDGPU::C29_X: + case AMDGPU::C30_X: + case AMDGPU::C31_X: + case AMDGPU::C32_X: + case AMDGPU::C33_X: + case AMDGPU::C34_X: + case AMDGPU::C35_X: + case AMDGPU::C36_X: + case AMDGPU::C37_X: + case AMDGPU::C38_X: + case AMDGPU::C39_X: + case AMDGPU::C40_X: + case AMDGPU::C41_X: + case AMDGPU::C42_X: + case AMDGPU::C43_X: + case AMDGPU::C44_X: + case AMDGPU::C45_X: + case AMDGPU::C46_X: + case AMDGPU::C47_X: + case AMDGPU::C48_X: + case AMDGPU::C49_X: + case AMDGPU::C50_X: + case AMDGPU::C51_X: + case AMDGPU::C52_X: + case AMDGPU::C53_X: + case AMDGPU::C54_X: + case AMDGPU::C55_X: + case AMDGPU::C56_X: + case AMDGPU::C57_X: + case AMDGPU::C58_X: + case AMDGPU::C59_X: + case AMDGPU::C60_X: + case AMDGPU::C61_X: + case AMDGPU::C62_X: + case AMDGPU::C63_X: + case AMDGPU::C64_X: + case AMDGPU::C65_X: + case AMDGPU::C66_X: + case AMDGPU::C67_X: + case AMDGPU::C68_X: + case AMDGPU::C69_X: + case AMDGPU::C70_X: + case AMDGPU::C71_X: + case AMDGPU::C72_X: + case AMDGPU::C73_X: + case AMDGPU::C74_X: + case AMDGPU::C75_X: + case AMDGPU::C76_X: + case AMDGPU::C77_X: + case AMDGPU::C78_X: + case AMDGPU::C79_X: + case AMDGPU::C80_X: + case AMDGPU::C81_X: + case AMDGPU::C82_X: + case AMDGPU::C83_X: + case AMDGPU::C84_X: + case AMDGPU::C85_X: + case AMDGPU::C86_X: + case AMDGPU::C87_X: + case AMDGPU::C88_X: + case AMDGPU::C89_X: + case AMDGPU::C90_X: + case AMDGPU::C91_X: + case AMDGPU::C92_X: + case AMDGPU::C93_X: + case AMDGPU::C94_X: + case AMDGPU::C95_X: + case AMDGPU::C96_X: + case AMDGPU::C97_X: + case AMDGPU::C98_X: + case AMDGPU::C99_X: + case AMDGPU::T0_X: + case AMDGPU::T1_X: + case AMDGPU::T2_X: + case AMDGPU::T3_X: + case AMDGPU::T4_X: + case AMDGPU::T5_X: + case AMDGPU::T6_X: + case AMDGPU::T7_X: + case AMDGPU::T8_X: + case AMDGPU::T9_X: + case AMDGPU::T10_X: + case AMDGPU::T11_X: + case AMDGPU::T12_X: + case AMDGPU::T13_X: + case AMDGPU::T14_X: + case AMDGPU::T15_X: + case AMDGPU::T16_X: + case AMDGPU::T17_X: + case AMDGPU::T18_X: + case AMDGPU::T19_X: + case AMDGPU::T20_X: + case AMDGPU::T21_X: + case AMDGPU::T22_X: + case AMDGPU::T23_X: + case AMDGPU::T24_X: + case AMDGPU::T25_X: + case AMDGPU::T26_X: + case AMDGPU::T27_X: + case AMDGPU::T28_X: + case AMDGPU::T29_X: + case AMDGPU::T30_X: + case AMDGPU::T31_X: + case AMDGPU::T32_X: + case AMDGPU::T33_X: + case AMDGPU::T34_X: + case AMDGPU::T35_X: + case AMDGPU::T36_X: + case AMDGPU::T37_X: + case AMDGPU::T38_X: + case AMDGPU::T39_X: + case AMDGPU::T40_X: + case AMDGPU::T41_X: + case AMDGPU::T42_X: + case AMDGPU::T43_X: + case AMDGPU::T44_X: + case AMDGPU::T45_X: + case AMDGPU::T46_X: + case AMDGPU::T47_X: + case AMDGPU::T48_X: + case AMDGPU::T49_X: + case AMDGPU::T50_X: + case AMDGPU::T51_X: + case AMDGPU::T52_X: + case AMDGPU::T53_X: + case AMDGPU::T54_X: + case AMDGPU::T55_X: + case AMDGPU::T56_X: + case AMDGPU::T57_X: + case AMDGPU::T58_X: + case AMDGPU::T59_X: + case AMDGPU::T60_X: + case AMDGPU::T61_X: + case AMDGPU::T62_X: + case AMDGPU::T63_X: + case AMDGPU::T64_X: + case AMDGPU::T65_X: + case AMDGPU::T66_X: + case AMDGPU::T67_X: + case AMDGPU::T68_X: + case AMDGPU::T69_X: + case AMDGPU::T70_X: + case AMDGPU::T71_X: + case AMDGPU::T72_X: + case AMDGPU::T73_X: + case AMDGPU::T74_X: + case AMDGPU::T75_X: + case AMDGPU::T76_X: + case AMDGPU::T77_X: + case AMDGPU::T78_X: + case AMDGPU::T79_X: + case AMDGPU::T80_X: + case AMDGPU::T81_X: + case AMDGPU::T82_X: + case AMDGPU::T83_X: + case AMDGPU::T84_X: + case AMDGPU::T85_X: + case AMDGPU::T86_X: + case AMDGPU::T87_X: + case AMDGPU::T88_X: + case AMDGPU::T89_X: + case AMDGPU::T90_X: + case AMDGPU::T91_X: + case AMDGPU::T92_X: + case AMDGPU::T93_X: + case AMDGPU::T94_X: + case AMDGPU::T95_X: + case AMDGPU::T96_X: + case AMDGPU::T97_X: + case AMDGPU::T98_X: + case AMDGPU::T99_X: + case AMDGPU::T100_X: + case AMDGPU::T101_X: + case AMDGPU::T102_X: + case AMDGPU::T103_X: + case AMDGPU::T104_X: + case AMDGPU::T105_X: + case AMDGPU::T106_X: + case AMDGPU::T107_X: + case AMDGPU::T108_X: + case AMDGPU::T109_X: + case AMDGPU::T110_X: + case AMDGPU::T111_X: + case AMDGPU::T112_X: + case AMDGPU::T113_X: + case AMDGPU::T114_X: + case AMDGPU::T115_X: + case AMDGPU::T116_X: + case AMDGPU::T117_X: + case AMDGPU::T118_X: + case AMDGPU::T119_X: + case AMDGPU::T120_X: + case AMDGPU::T121_X: + case AMDGPU::T122_X: + case AMDGPU::T123_X: + case AMDGPU::T124_X: + case AMDGPU::T125_X: + case AMDGPU::T126_X: + case AMDGPU::T127_X: + case AMDGPU::T0_XYZW: + case AMDGPU::T1_XYZW: + case AMDGPU::T2_XYZW: + case AMDGPU::T3_XYZW: + case AMDGPU::T4_XYZW: + case AMDGPU::T5_XYZW: + case AMDGPU::T6_XYZW: + case AMDGPU::T7_XYZW: + case AMDGPU::T8_XYZW: + case AMDGPU::T9_XYZW: + case AMDGPU::T10_XYZW: + case AMDGPU::T11_XYZW: + case AMDGPU::T12_XYZW: + case AMDGPU::T13_XYZW: + case AMDGPU::T14_XYZW: + case AMDGPU::T15_XYZW: + case AMDGPU::T16_XYZW: + case AMDGPU::T17_XYZW: + case AMDGPU::T18_XYZW: + case AMDGPU::T19_XYZW: + case AMDGPU::T20_XYZW: + case AMDGPU::T21_XYZW: + case AMDGPU::T22_XYZW: + case AMDGPU::T23_XYZW: + case AMDGPU::T24_XYZW: + case AMDGPU::T25_XYZW: + case AMDGPU::T26_XYZW: + case AMDGPU::T27_XYZW: + case AMDGPU::T28_XYZW: + case AMDGPU::T29_XYZW: + case AMDGPU::T30_XYZW: + case AMDGPU::T31_XYZW: + case AMDGPU::T32_XYZW: + case AMDGPU::T33_XYZW: + case AMDGPU::T34_XYZW: + case AMDGPU::T35_XYZW: + case AMDGPU::T36_XYZW: + case AMDGPU::T37_XYZW: + case AMDGPU::T38_XYZW: + case AMDGPU::T39_XYZW: + case AMDGPU::T40_XYZW: + case AMDGPU::T41_XYZW: + case AMDGPU::T42_XYZW: + case AMDGPU::T43_XYZW: + case AMDGPU::T44_XYZW: + case AMDGPU::T45_XYZW: + case AMDGPU::T46_XYZW: + case AMDGPU::T47_XYZW: + case AMDGPU::T48_XYZW: + case AMDGPU::T49_XYZW: + case AMDGPU::T50_XYZW: + case AMDGPU::T51_XYZW: + case AMDGPU::T52_XYZW: + case AMDGPU::T53_XYZW: + case AMDGPU::T54_XYZW: + case AMDGPU::T55_XYZW: + case AMDGPU::T56_XYZW: + case AMDGPU::T57_XYZW: + case AMDGPU::T58_XYZW: + case AMDGPU::T59_XYZW: + case AMDGPU::T60_XYZW: + case AMDGPU::T61_XYZW: + case AMDGPU::T62_XYZW: + case AMDGPU::T63_XYZW: + case AMDGPU::T64_XYZW: + case AMDGPU::T65_XYZW: + case AMDGPU::T66_XYZW: + case AMDGPU::T67_XYZW: + case AMDGPU::T68_XYZW: + case AMDGPU::T69_XYZW: + case AMDGPU::T70_XYZW: + case AMDGPU::T71_XYZW: + case AMDGPU::T72_XYZW: + case AMDGPU::T73_XYZW: + case AMDGPU::T74_XYZW: + case AMDGPU::T75_XYZW: + case AMDGPU::T76_XYZW: + case AMDGPU::T77_XYZW: + case AMDGPU::T78_XYZW: + case AMDGPU::T79_XYZW: + case AMDGPU::T80_XYZW: + case AMDGPU::T81_XYZW: + case AMDGPU::T82_XYZW: + case AMDGPU::T83_XYZW: + case AMDGPU::T84_XYZW: + case AMDGPU::T85_XYZW: + case AMDGPU::T86_XYZW: + case AMDGPU::T87_XYZW: + case AMDGPU::T88_XYZW: + case AMDGPU::T89_XYZW: + case AMDGPU::T90_XYZW: + case AMDGPU::T91_XYZW: + case AMDGPU::T92_XYZW: + case AMDGPU::T93_XYZW: + case AMDGPU::T94_XYZW: + case AMDGPU::T95_XYZW: + case AMDGPU::T96_XYZW: + case AMDGPU::T97_XYZW: + case AMDGPU::T98_XYZW: + case AMDGPU::T99_XYZW: + case AMDGPU::T100_XYZW: + case AMDGPU::T101_XYZW: + case AMDGPU::T102_XYZW: + case AMDGPU::T103_XYZW: + case AMDGPU::T104_XYZW: + case AMDGPU::T105_XYZW: + case AMDGPU::T106_XYZW: + case AMDGPU::T107_XYZW: + case AMDGPU::T108_XYZW: + case AMDGPU::T109_XYZW: + case AMDGPU::T110_XYZW: + case AMDGPU::T111_XYZW: + case AMDGPU::T112_XYZW: + case AMDGPU::T113_XYZW: + case AMDGPU::T114_XYZW: + case AMDGPU::T115_XYZW: + case AMDGPU::T116_XYZW: + case AMDGPU::T117_XYZW: + case AMDGPU::T118_XYZW: + case AMDGPU::T119_XYZW: + case AMDGPU::T120_XYZW: + case AMDGPU::T121_XYZW: + case AMDGPU::T122_XYZW: + case AMDGPU::T123_XYZW: + case AMDGPU::T124_XYZW: + case AMDGPU::T125_XYZW: + case AMDGPU::T126_XYZW: + case AMDGPU::T127_XYZW: + return 0; + + case AMDGPU::C0_Y: + case AMDGPU::C1_Y: + case AMDGPU::C2_Y: + case AMDGPU::C3_Y: + case AMDGPU::C4_Y: + case AMDGPU::C5_Y: + case AMDGPU::C6_Y: + case AMDGPU::C7_Y: + case AMDGPU::C8_Y: + case AMDGPU::C9_Y: + case AMDGPU::C10_Y: + case AMDGPU::C11_Y: + case AMDGPU::C12_Y: + case AMDGPU::C13_Y: + case AMDGPU::C14_Y: + case AMDGPU::C15_Y: + case AMDGPU::C16_Y: + case AMDGPU::C17_Y: + case AMDGPU::C18_Y: + case AMDGPU::C19_Y: + case AMDGPU::C20_Y: + case AMDGPU::C21_Y: + case AMDGPU::C22_Y: + case AMDGPU::C23_Y: + case AMDGPU::C24_Y: + case AMDGPU::C25_Y: + case AMDGPU::C26_Y: + case AMDGPU::C27_Y: + case AMDGPU::C28_Y: + case AMDGPU::C29_Y: + case AMDGPU::C30_Y: + case AMDGPU::C31_Y: + case AMDGPU::C32_Y: + case AMDGPU::C33_Y: + case AMDGPU::C34_Y: + case AMDGPU::C35_Y: + case AMDGPU::C36_Y: + case AMDGPU::C37_Y: + case AMDGPU::C38_Y: + case AMDGPU::C39_Y: + case AMDGPU::C40_Y: + case AMDGPU::C41_Y: + case AMDGPU::C42_Y: + case AMDGPU::C43_Y: + case AMDGPU::C44_Y: + case AMDGPU::C45_Y: + case AMDGPU::C46_Y: + case AMDGPU::C47_Y: + case AMDGPU::C48_Y: + case AMDGPU::C49_Y: + case AMDGPU::C50_Y: + case AMDGPU::C51_Y: + case AMDGPU::C52_Y: + case AMDGPU::C53_Y: + case AMDGPU::C54_Y: + case AMDGPU::C55_Y: + case AMDGPU::C56_Y: + case AMDGPU::C57_Y: + case AMDGPU::C58_Y: + case AMDGPU::C59_Y: + case AMDGPU::C60_Y: + case AMDGPU::C61_Y: + case AMDGPU::C62_Y: + case AMDGPU::C63_Y: + case AMDGPU::C64_Y: + case AMDGPU::C65_Y: + case AMDGPU::C66_Y: + case AMDGPU::C67_Y: + case AMDGPU::C68_Y: + case AMDGPU::C69_Y: + case AMDGPU::C70_Y: + case AMDGPU::C71_Y: + case AMDGPU::C72_Y: + case AMDGPU::C73_Y: + case AMDGPU::C74_Y: + case AMDGPU::C75_Y: + case AMDGPU::C76_Y: + case AMDGPU::C77_Y: + case AMDGPU::C78_Y: + case AMDGPU::C79_Y: + case AMDGPU::C80_Y: + case AMDGPU::C81_Y: + case AMDGPU::C82_Y: + case AMDGPU::C83_Y: + case AMDGPU::C84_Y: + case AMDGPU::C85_Y: + case AMDGPU::C86_Y: + case AMDGPU::C87_Y: + case AMDGPU::C88_Y: + case AMDGPU::C89_Y: + case AMDGPU::C90_Y: + case AMDGPU::C91_Y: + case AMDGPU::C92_Y: + case AMDGPU::C93_Y: + case AMDGPU::C94_Y: + case AMDGPU::C95_Y: + case AMDGPU::C96_Y: + case AMDGPU::C97_Y: + case AMDGPU::C98_Y: + case AMDGPU::C99_Y: + case AMDGPU::T0_Y: + case AMDGPU::T1_Y: + case AMDGPU::T2_Y: + case AMDGPU::T3_Y: + case AMDGPU::T4_Y: + case AMDGPU::T5_Y: + case AMDGPU::T6_Y: + case AMDGPU::T7_Y: + case AMDGPU::T8_Y: + case AMDGPU::T9_Y: + case AMDGPU::T10_Y: + case AMDGPU::T11_Y: + case AMDGPU::T12_Y: + case AMDGPU::T13_Y: + case AMDGPU::T14_Y: + case AMDGPU::T15_Y: + case AMDGPU::T16_Y: + case AMDGPU::T17_Y: + case AMDGPU::T18_Y: + case AMDGPU::T19_Y: + case AMDGPU::T20_Y: + case AMDGPU::T21_Y: + case AMDGPU::T22_Y: + case AMDGPU::T23_Y: + case AMDGPU::T24_Y: + case AMDGPU::T25_Y: + case AMDGPU::T26_Y: + case AMDGPU::T27_Y: + case AMDGPU::T28_Y: + case AMDGPU::T29_Y: + case AMDGPU::T30_Y: + case AMDGPU::T31_Y: + case AMDGPU::T32_Y: + case AMDGPU::T33_Y: + case AMDGPU::T34_Y: + case AMDGPU::T35_Y: + case AMDGPU::T36_Y: + case AMDGPU::T37_Y: + case AMDGPU::T38_Y: + case AMDGPU::T39_Y: + case AMDGPU::T40_Y: + case AMDGPU::T41_Y: + case AMDGPU::T42_Y: + case AMDGPU::T43_Y: + case AMDGPU::T44_Y: + case AMDGPU::T45_Y: + case AMDGPU::T46_Y: + case AMDGPU::T47_Y: + case AMDGPU::T48_Y: + case AMDGPU::T49_Y: + case AMDGPU::T50_Y: + case AMDGPU::T51_Y: + case AMDGPU::T52_Y: + case AMDGPU::T53_Y: + case AMDGPU::T54_Y: + case AMDGPU::T55_Y: + case AMDGPU::T56_Y: + case AMDGPU::T57_Y: + case AMDGPU::T58_Y: + case AMDGPU::T59_Y: + case AMDGPU::T60_Y: + case AMDGPU::T61_Y: + case AMDGPU::T62_Y: + case AMDGPU::T63_Y: + case AMDGPU::T64_Y: + case AMDGPU::T65_Y: + case AMDGPU::T66_Y: + case AMDGPU::T67_Y: + case AMDGPU::T68_Y: + case AMDGPU::T69_Y: + case AMDGPU::T70_Y: + case AMDGPU::T71_Y: + case AMDGPU::T72_Y: + case AMDGPU::T73_Y: + case AMDGPU::T74_Y: + case AMDGPU::T75_Y: + case AMDGPU::T76_Y: + case AMDGPU::T77_Y: + case AMDGPU::T78_Y: + case AMDGPU::T79_Y: + case AMDGPU::T80_Y: + case AMDGPU::T81_Y: + case AMDGPU::T82_Y: + case AMDGPU::T83_Y: + case AMDGPU::T84_Y: + case AMDGPU::T85_Y: + case AMDGPU::T86_Y: + case AMDGPU::T87_Y: + case AMDGPU::T88_Y: + case AMDGPU::T89_Y: + case AMDGPU::T90_Y: + case AMDGPU::T91_Y: + case AMDGPU::T92_Y: + case AMDGPU::T93_Y: + case AMDGPU::T94_Y: + case AMDGPU::T95_Y: + case AMDGPU::T96_Y: + case AMDGPU::T97_Y: + case AMDGPU::T98_Y: + case AMDGPU::T99_Y: + case AMDGPU::T100_Y: + case AMDGPU::T101_Y: + case AMDGPU::T102_Y: + case AMDGPU::T103_Y: + case AMDGPU::T104_Y: + case AMDGPU::T105_Y: + case AMDGPU::T106_Y: + case AMDGPU::T107_Y: + case AMDGPU::T108_Y: + case AMDGPU::T109_Y: + case AMDGPU::T110_Y: + case AMDGPU::T111_Y: + case AMDGPU::T112_Y: + case AMDGPU::T113_Y: + case AMDGPU::T114_Y: + case AMDGPU::T115_Y: + case AMDGPU::T116_Y: + case AMDGPU::T117_Y: + case AMDGPU::T118_Y: + case AMDGPU::T119_Y: + case AMDGPU::T120_Y: + case AMDGPU::T121_Y: + case AMDGPU::T122_Y: + case AMDGPU::T123_Y: + case AMDGPU::T124_Y: + case AMDGPU::T125_Y: + case AMDGPU::T126_Y: + case AMDGPU::T127_Y: + return 1; + + } +} + diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp new file mode 100644 index 0000000..45be660 --- /dev/null +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -0,0 +1,286 @@ +//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file +// is mostly EmitInstrWithCustomInserter(). +// +//===----------------------------------------------------------------------===// + +#include "R600ISelLowering.h" +#include "AMDGPUUtil.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +R600TargetLowering::R600TargetLowering(TargetMachine &TM) : + AMDGPUTargetLowering(TM), + TII(static_cast(TM.getInstrInfo())) +{ + setOperationAction(ISD::MUL, MVT::i64, Expand); + addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); + addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); + addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); + computeRegisterProperties(); + + setOperationAction(ISD::FSUB, MVT::f32, Expand); + + setOperationAction(ISD::ROTL, MVT::i32, Custom); + + setSchedulingPreference(Sched::VLIW); +} + +MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( + MachineInstr * MI, MachineBasicBlock * BB) const +{ + MachineFunction * MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineBasicBlock::iterator I = *MI; + + switch (MI->getOpcode()) { + default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case AMDGPU::TGID_X: + addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_X); + break; + case AMDGPU::TGID_Y: + addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Y); + break; + case AMDGPU::TGID_Z: + addLiveIn(MI, MF, MRI, TII, AMDGPU::T1_Z); + break; + case AMDGPU::TIDIG_X: + addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_X); + break; + case AMDGPU::TIDIG_Y: + addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Y); + break; + case AMDGPU::TIDIG_Z: + addLiveIn(MI, MF, MRI, TII, AMDGPU::T0_Z); + break; + case AMDGPU::NGROUPS_X: + lowerImplicitParameter(MI, *BB, MRI, 0); + break; + case AMDGPU::NGROUPS_Y: + lowerImplicitParameter(MI, *BB, MRI, 1); + break; + case AMDGPU::NGROUPS_Z: + lowerImplicitParameter(MI, *BB, MRI, 2); + break; + case AMDGPU::GLOBAL_SIZE_X: + lowerImplicitParameter(MI, *BB, MRI, 3); + break; + case AMDGPU::GLOBAL_SIZE_Y: + lowerImplicitParameter(MI, *BB, MRI, 4); + break; + case AMDGPU::GLOBAL_SIZE_Z: + lowerImplicitParameter(MI, *BB, MRI, 5); + break; + case AMDGPU::LOCAL_SIZE_X: + lowerImplicitParameter(MI, *BB, MRI, 6); + break; + case AMDGPU::LOCAL_SIZE_Y: + lowerImplicitParameter(MI, *BB, MRI, 7); + break; + case AMDGPU::LOCAL_SIZE_Z: + lowerImplicitParameter(MI, *BB, MRI, 8); + break; + + case AMDGPU::CLAMP_R600: + MI->getOperand(0).addTargetFlag(MO_FLAG_CLAMP); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + break; + + case AMDGPU::FABS_R600: + MI->getOperand(1).addTargetFlag(MO_FLAG_ABS); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + break; + + case AMDGPU::FNEG_R600: + MI->getOperand(1).addTargetFlag(MO_FLAG_NEG); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)); + break; + + case AMDGPU::R600_LOAD_CONST: + { + int64_t RegIndex = MI->getOperand(1).getImm(); + unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY)) + .addOperand(MI->getOperand(0)) + .addReg(ConstantReg); + break; + } + + case AMDGPU::LOAD_INPUT: + { + int64_t RegIndex = MI->getOperand(1).getImm(); + addLiveIn(MI, MF, MRI, TII, + AMDGPU::R600_TReg32RegClass.getRegister(RegIndex)); + break; + } + + case AMDGPU::MASK_WRITE: + { + unsigned maskedRegister = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); + MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); + MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); + def->addTargetFlag(MO_FLAG_MASK); + // Return early so the instruction is not erased + return BB; + } + + case AMDGPU::RAT_WRITE_CACHELESS_eg: + { + // Convert to DWORD address + unsigned NewAddr = MRI.createVirtualRegister( + &AMDGPU::R600_TReg32_XRegClass); + unsigned ShiftValue = MRI.createVirtualRegister( + &AMDGPU::R600_TReg32RegClass); + + // XXX In theory, we should be able to pass ShiftValue directly to + // the LSHR_eg instruction as an inline literal, but I tried doing it + // this way and it didn't produce the correct results. + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV), ShiftValue) + .addReg(AMDGPU::ALU_LITERAL_X) + .addImm(2); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr) + .addOperand(MI->getOperand(1)) + .addReg(ShiftValue); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) + .addOperand(MI->getOperand(0)) + .addReg(NewAddr); + break; + } + + case AMDGPU::STORE_OUTPUT: + { + int64_t OutputIndex = MI->getOperand(1).getImm(); + unsigned OutputReg = AMDGPU::R600_TReg32RegClass.getRegister(OutputIndex); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY), OutputReg) + .addOperand(MI->getOperand(0)); + + if (!MRI.isLiveOut(OutputReg)) { + MRI.addLiveOut(OutputReg); + } + break; + } + + case AMDGPU::RESERVE_REG: + { + R600MachineFunctionInfo * MFI = MF->getInfo(); + int64_t ReservedIndex = MI->getOperand(0).getImm(); + unsigned ReservedReg = + AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex); + MFI->ReservedRegs.push_back(ReservedReg); + break; + } + + case AMDGPU::TXD: + { + unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addReg(t0, RegState::Implicit) + .addReg(t1, RegState::Implicit); + break; + } + case AMDGPU::TXD_SHADOW: + { + unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(4)) + .addOperand(MI->getOperand(5)) + .addReg(t0, RegState::Implicit) + .addReg(t1, RegState::Implicit); + break; + } + + + } + + MI->eraseFromParent(); + return BB; +} + +void R600TargetLowering::lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, + MachineRegisterInfo & MRI, unsigned dword_offset) const +{ + MachineBasicBlock::iterator I = *MI; + unsigned PtrReg = MRI.createVirtualRegister(&AMDGPU::R600_TReg32_XRegClass); + MRI.setRegClass(MI->getOperand(0).getReg(), &AMDGPU::R600_TReg32_XRegClass); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::MOV), PtrReg) + .addReg(AMDGPU::ALU_LITERAL_X) + .addImm(dword_offset * 4); + + BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::VTX_READ_PARAM_eg)) + .addOperand(MI->getOperand(0)) + .addReg(PtrReg) + .addImm(0); +} + +//===----------------------------------------------------------------------===// +// Custom DAG Lowering Operations +//===----------------------------------------------------------------------===// + + +SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + switch (Op.getOpcode()) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::ROTL: return LowerROTL(Op, DAG); + } +} + +SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + + return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, + Op.getOperand(0), + Op.getOperand(0), + DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(32, MVT::i32), + Op.getOperand(1))); +} diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h new file mode 100644 index 0000000..7b91373 --- /dev/null +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -0,0 +1,48 @@ +//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 DAG Lowering interface definition +// +//===----------------------------------------------------------------------===// + +#ifndef R600ISELLOWERING_H +#define R600ISELLOWERING_H + +#include "AMDGPUISelLowering.h" + +namespace llvm { + +class R600InstrInfo; + +class R600TargetLowering : public AMDGPUTargetLowering +{ +public: + R600TargetLowering(TargetMachine &TM); + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock * BB) const; + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + +private: + const R600InstrInfo * TII; + + /// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters + /// that are stored in the first nine dwords of a Vertex Buffer. These + /// implicit parameters are represented by pseudo instructions, which are + /// lowered to VTX_READ instructions by this function. + void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, + MachineRegisterInfo & MRI, unsigned dword_offset) const; + + /// LowerROTL - Lower ROTL opcode to BITALIGN + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + +}; + +} // End namespace llvm; + +#endif // R600ISELLOWERING_H diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp new file mode 100644 index 0000000..77679ab --- /dev/null +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -0,0 +1,106 @@ +//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 Implementation of TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "R600InstrInfo.h" +#include "AMDGPUTargetMachine.h" +#include "AMDILSubtarget.h" +#include "R600RegisterInfo.h" + +#define GET_INSTRINFO_CTOR +#include "AMDGPUGenDFAPacketizer.inc" + +using namespace llvm; + +R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) + : AMDGPUInstrInfo(tm), + RI(tm, *this), + TM(tm) + { } + +const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const +{ + return RI; +} + +bool R600InstrInfo::isTrig(const MachineInstr &MI) const +{ + return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; +} + +bool R600InstrInfo::isVector(const MachineInstr &MI) const +{ + return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; +} + +void +R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const +{ + + unsigned subRegMap[4] = {AMDGPU::sel_x, AMDGPU::sel_y, + AMDGPU::sel_z, AMDGPU::sel_w}; + + if (AMDGPU::R600_Reg128RegClass.contains(DestReg) + && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { + for (unsigned i = 0; i < 4; i++) { + BuildMI(MBB, MI, DL, get(AMDGPU::MOV)) + .addReg(RI.getSubReg(DestReg, subRegMap[i]), RegState::Define) + .addReg(RI.getSubReg(SrcReg, subRegMap[i])) + .addReg(DestReg, RegState::Define | RegState::Implicit); + } + } else { + + /* We can't copy vec4 registers */ + assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg) + && !AMDGPU::R600_Reg128RegClass.contains(SrcReg)); + + BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } +} + +MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF, + unsigned DstReg, int64_t Imm) const +{ + MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc()); + MachineInstrBuilder(MI).addReg(DstReg, RegState::Define); + MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X); + MachineInstrBuilder(MI).addImm(Imm); + + return MI; +} + +unsigned R600InstrInfo::getIEQOpcode() const +{ + return AMDGPU::SETE_INT; +} + +bool R600InstrInfo::isMov(unsigned Opcode) const +{ + switch(Opcode) { + default: return false; + case AMDGPU::MOV: + case AMDGPU::MOV_IMM_F32: + case AMDGPU::MOV_IMM_I32: + return true; + } +} + +DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const +{ + const InstrItineraryData *II = TM->getInstrItineraryData(); + return TM->getSubtarget().createDFAPacketizer(II); +} diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h new file mode 100644 index 0000000..b9cbcc8 --- /dev/null +++ b/lib/Target/AMDGPU/R600InstrInfo.h @@ -0,0 +1,76 @@ +//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface definition for R600InstrInfo +// +//===----------------------------------------------------------------------===// + +#ifndef R600INSTRUCTIONINFO_H_ +#define R600INSTRUCTIONINFO_H_ + +#include "AMDIL.h" +#include "AMDILInstrInfo.h" +#include "R600RegisterInfo.h" + +#include + +namespace llvm { + + class AMDGPUTargetMachine; + class DFAPacketizer; + class ScheduleDAG; + class MachineFunction; + class MachineInstr; + class MachineInstrBuilder; + + class R600InstrInfo : public AMDGPUInstrInfo { + private: + const R600RegisterInfo RI; + AMDGPUTargetMachine &TM; + + public: + explicit R600InstrInfo(AMDGPUTargetMachine &tm); + + const R600RegisterInfo &getRegisterInfo() const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + bool isTrig(const MachineInstr &MI) const; + + /// isVector - Vector instructions are instructions that must fill all + /// instruction slots within an instruction group. + bool isVector(const MachineInstr &MI) const; + + virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg, + int64_t Imm) const; + + virtual unsigned getIEQOpcode() const; + virtual bool isMov(unsigned Opcode) const; + + DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, + const ScheduleDAG *DAG) const; +}; + +} // End llvm namespace + +namespace R600_InstFlag { + enum TIF { + TRANS_ONLY = (1 << 0), + TEX = (1 << 1), + REDUCTION = (1 << 2), + FC = (1 << 3), + TRIG = (1 << 4), + OP3 = (1 << 5), + VECTOR = (1 << 6) + }; +} + +#endif // R600INSTRINFO_H_ diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td new file mode 100644 index 0000000..a7d29fe --- /dev/null +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -0,0 +1,1300 @@ +//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 Tablegen instruction definitions +// +//===----------------------------------------------------------------------===// + +include "R600Intrinsics.td" + +class InstR600 inst, dag outs, dag ins, string asm, list pattern, + InstrItinClass itin> + : AMDGPUInst { + + field bits<32> Inst; + bit Trig = 0; + bit Op3 = 0; + bit isVector = 0; + + let Inst = inst; + let Namespace = "AMDGPU"; + let OutOperandList = outs; + let InOperandList = ins; + let AsmString = asm; + let Pattern = pattern; + let Itinerary = itin; + + let TSFlags{4} = Trig; + let TSFlags{5} = Op3; + + // Vector instructions are instructions that must fill all slots in an + // instruction group + let TSFlags{6} = isVector; +} + +class InstR600ISA pattern> : + AMDGPUInst +{ + field bits<64> Inst; + + let Namespace = "AMDGPU"; +} + +def MEMxi : Operand { + let MIOperandInfo = (ops R600_TReg32_X:$ptr, i32imm:$index); +} + +def MEMrr : Operand { + let MIOperandInfo = (ops R600_Reg32:$ptr, R600_Reg32:$index); +} + +def ADDRParam : ComplexPattern; +def ADDRDWord : ComplexPattern; +def ADDRVTX_READ : ComplexPattern; + +class R600_ALU { + + bits<7> DST_GPR = 0; + bits<9> SRC0_SEL = 0; + bits<1> SRC0_NEG = 0; + bits<9> SRC1_SEL = 0; + bits<1> SRC1_NEG = 0; + bits<1> CLAMP = 0; + +} + + +class R600_1OP inst, string opName, list pattern, + InstrItinClass itin = AnyALU> : + InstR600 ; + +class R600_2OP inst, string opName, list pattern, + InstrItinClass itin = AnyALU> : + InstR600 ; + +class R600_3OP inst, string opName, list pattern, + InstrItinClass itin = AnyALU> : + InstR600 { + + let Op3 = 1; + } + +class R600_REDUCTION inst, dag ins, string asm, list pattern, + InstrItinClass itin = VecALU> : + InstR600 ; + +class R600_TEX inst, string opName, list pattern, + InstrItinClass itin = AnyALU> : + InstR600 ; + +def TEX_SHADOW : PatLeaf< + (imm), + [{uint32_t TType = (uint32_t)N->getZExtValue(); + return (TType >= 6 && TType <= 8) || TType == 11 || TType == 12; + }] +>; + +def COND_EQ : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOEQ: case ISD::SETUEQ: + case ISD::SETEQ: return true;}}}] +>; + +def COND_NE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETONE: case ISD::SETUNE: + case ISD::SETNE: return true;}}}] +>; +def COND_GT : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOGT: case ISD::SETUGT: + case ISD::SETGT: return true;}}}] +>; + +def COND_GE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOGE: case ISD::SETUGE: + case ISD::SETGE: return true;}}}] +>; + +def COND_LT : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOLT: case ISD::SETULT: + case ISD::SETLT: return true;}}}] +>; + +def COND_LE : PatLeaf < + (cond), + [{switch(N->get()){{default: return false; + case ISD::SETOLE: case ISD::SETULE: + case ISD::SETLE: return true;}}}] +>; + +class EG_CF_RAT cf_inst, bits <6> rat_inst, bits<4> rat_id, dag outs, + dag ins, string asm, list pattern> : + InstR600ISA +{ + bits<7> RW_GPR; + bits<7> INDEX_GPR; + + bits<2> RIM; + bits<2> TYPE; + bits<1> RW_REL; + bits<2> ELEM_SIZE; + + bits<12> ARRAY_SIZE; + bits<4> COMP_MASK; + bits<4> BURST_COUNT; + bits<1> VPM; + bits<1> EOP; + bits<1> MARK; + bits<1> BARRIER; + + /* CF_ALLOC_EXPORT_WORD0_RAT */ + let Inst{3-0} = rat_id; + let Inst{9-4} = rat_inst; + let Inst{10} = 0; /* Reserved */ + let Inst{12-11} = RIM; + let Inst{14-13} = TYPE; + let Inst{21-15} = RW_GPR; + let Inst{22} = RW_REL; + let Inst{29-23} = INDEX_GPR; + let Inst{31-30} = ELEM_SIZE; + + /* CF_ALLOC_EXPORT_WORD1_BUF */ + let Inst{43-32} = ARRAY_SIZE; + let Inst{47-44} = COMP_MASK; + let Inst{51-48} = BURST_COUNT; + let Inst{52} = VPM; + let Inst{53} = EOP; + let Inst{61-54} = cf_inst; + let Inst{62} = MARK; + let Inst{63} = BARRIER; +} + +/* +def store_global : PatFrag<(ops node:$value, node:$ptr), + (store node:$value, node:$ptr), + [{ + const Value *Src; + const PointerType *Type; + if ((src = cast(N)->getSrcValue() && + PT = dyn_cast(Src->getType()))) { + return PT->getAddressSpace() == 1; + } + return false; + }]>; + +*/ + +def load_param : PatFrag<(ops node:$ptr), + (load node:$ptr), + [{ + const Value *Src = cast(N)->getSrcValue(); + if (Src) { + PointerType * PT = dyn_cast(Src->getType()); + return PT && PT->getAddressSpace() == AMDILAS::PARAM_I_ADDRESS; + } + return false; + }]>; + +//class EG_CF inst, string asm> : +// InstR600 ; + +/* XXX: We will use this when we emit the real ISA. + bits<24> ADDR = 0; + bits<3> JTS = 0; + + bits<3> PC = 0; + bits<5> CF_CONS = 0; + bits<2> COND = 0; + bits<6> COUNT = 0; + bits<1> VPM = 0; + bits<1> EOP = 0; + bits<8> CF_INST = 0; + bits<1> WQM = 0; + bits<1> B = 0; + + let Inst{23-0} = ADDR; + let Inst{26-24} = JTS; + let Inst{34-32} = PC; + let Inst{39-35} = CF_CONST; + let Inst{41-40} = COND; + let Inst{47-42} = COUNT; + let Inst{52} = VPM; + let Inst{53} = EOP; + let Inst{61-54} = CF_INST; + let Inst{62} = WQM; + let Inst{63} = B; +//} +*/ +def isR600 : Predicate<"Subtarget.device()" + "->getGeneration() == AMDILDeviceInfo::HD4XXX">; +def isR700 : Predicate<"Subtarget.device()" + "->getGeneration() == AMDILDeviceInfo::HD4XXX &&" + "Subtarget.device()->getDeviceFlag()" + ">= OCL_DEVICE_RV710">; +def isEG : Predicate<"Subtarget.device()" + "->getGeneration() >= AMDILDeviceInfo::HD5XXX && " + "Subtarget.device()->getDeviceFlag() != OCL_DEVICE_CAYMAN">; +def isCayman : Predicate<"Subtarget.device()" + "->getDeviceFlag() == OCL_DEVICE_CAYMAN">; +def isEGorCayman : Predicate<"Subtarget.device()" + "->getGeneration() == AMDILDeviceInfo::HD5XXX" + "|| Subtarget.device()->getGeneration() ==" + "AMDILDeviceInfo::HD6XXX">; + +def isR600toCayman : Predicate< + "Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX">; + + +let Predicates = [isR600toCayman] in { + +/* ------------------------------------------- */ +/* Common Instructions R600, R700, Evergreen, Cayman */ +/* ------------------------------------------- */ +def ADD : R600_2OP < + 0x0, "ADD", + [(set R600_Reg32:$dst, (fadd R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +// Non-IEEE MUL: 0 * anything = 0 +def MUL : R600_2OP < + 0x1, "MUL NON-IEEE", + [(set R600_Reg32:$dst, (int_AMDGPU_mul R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def MUL_IEEE : R600_2OP < + 0x2, "MUL_IEEE", + [(set R600_Reg32:$dst, (fmul R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def MAX : R600_2OP < + 0x3, "MAX", + [(set R600_Reg32:$dst, (AMDGPUfmax R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def MIN : R600_2OP < + 0x4, "MIN", + [(set R600_Reg32:$dst, (AMDGPUfmin R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +/* For the SET* instructions there is a naming conflict in TargetSelectionDAG.td, + * so some of the instruction names don't match the asm string. + * XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics. + */ + +def SETE : R600_2OP < + 0x08, "SETE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_EQ))] +>; + +def SGT : R600_2OP < + 0x09, "SETGT", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_GT))] +>; + +def SGE : R600_2OP < + 0xA, "SETGE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_GE))] +>; + +def SNE : R600_2OP < + 0xB, "SETNE", + [(set R600_Reg32:$dst, + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, + COND_NE))] +>; + +def FRACT : R600_1OP < + 0x10, "FRACT", + [(set R600_Reg32:$dst, (AMDGPUfract R600_Reg32:$src))] +>; + +def TRUNC : R600_1OP < + 0x11, "TRUNC", + [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] +>; + +def CEIL : R600_1OP < + 0x12, "CEIL", + [(set R600_Reg32:$dst, (fceil R600_Reg32:$src))] +>; + +def RNDNE : R600_1OP < + 0x13, "RNDNE", + [(set R600_Reg32:$dst, (frint R600_Reg32:$src))] +>; + +def FLOOR : R600_1OP < + 0x14, "FLOOR", + [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] +>; + +def MOV : R600_1OP <0x19, "MOV", []>; + +class MOV_IMM : InstR600 <0x19, + (outs R600_Reg32:$dst), + (ins R600_Reg32:$alu_literal, immType:$imm), + "MOV_IMM $dst, $imm", + [], AnyALU +>; + +def MOV_IMM_I32 : MOV_IMM; +def : Pat < + (imm:$val), + (MOV_IMM_I32 (i32 ALU_LITERAL_X), imm:$val) +>; + +def MOV_IMM_F32 : MOV_IMM; +def : Pat < + (fpimm:$val), + (MOV_IMM_F32 (i32 ALU_LITERAL_X), fpimm:$val) +>; + +def KILLGT : R600_2OP < + 0x2D, "KILLGT", + [] +>; + +def AND_INT : R600_2OP < + 0x30, "AND_INT", + [(set R600_Reg32:$dst, (and R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def OR_INT : R600_2OP < + 0x31, "OR_INT", + [(set R600_Reg32:$dst, (or R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def XOR_INT : R600_2OP < + 0x32, "XOR_INT", + [(set R600_Reg32:$dst, (xor R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def NOT_INT : R600_1OP < + 0x33, "NOT_INT", + [(set R600_Reg32:$dst, (not R600_Reg32:$src))] +>; + +def ADD_INT : R600_2OP < + 0x34, "ADD_INT", + [(set R600_Reg32:$dst, (add R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def SUB_INT : R600_2OP < + 0x35, "SUB_INT", + [(set R600_Reg32:$dst, (sub R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def MAX_INT : R600_2OP < + 0x36, "MAX_INT", + [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MIN_INT : R600_2OP < + 0x37, "MIN_INT", + [(set R600_Reg32:$dst, (AMDGPUsmin R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MAX_UINT : R600_2OP < + 0x38, "MAX_UINT", + [(set R600_Reg32:$dst, (AMDGPUsmax R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def MIN_UINT : R600_2OP < + 0x39, "MIN_UINT", + [(set R600_Reg32:$dst, (AMDGPUumin R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +def SETE_INT : R600_2OP < + 0x3A, "SETE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETEQ))] +>; + +def SETGT_INT : R600_2OP < + 0x3B, "SGT_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGT))] +>; + +def SETGE_INT : R600_2OP < + 0x3C, "SETGE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETGE))] +>; + +def SETNE_INT : R600_2OP < + 0x3D, "SETNE_INT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETNE))] +>; + +def SETGT_UINT : R600_2OP < + 0x3E, "SETGT_UINT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGT))] +>; + +def SETGE_UINT : R600_2OP < + 0x3F, "SETGE_UINT", + [(set (i32 R600_Reg32:$dst), + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETUGE))] +>; + +def CNDE_INT : R600_3OP < + 0x1C, "CNDE_INT", + [(set (i32 R600_Reg32:$dst), + (IL_cmov_logical R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] +>; + +/* Texture instructions */ + + +def TEX_LD : R600_TEX < + 0x03, "TEX_LD", + [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$src4, imm:$src5))] +> { +let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $src4, $src5"; +let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5); +} + +def TEX_GET_TEXTURE_RESINFO : R600_TEX < + 0x04, "TEX_GET_TEXTURE_RESINFO", + [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_GRADIENTS_H : R600_TEX < + 0x07, "TEX_GET_GRADIENTS_H", + [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_GRADIENTS_V : R600_TEX < + 0x08, "TEX_GET_GRADIENTS_V", + [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_SET_GRADIENTS_H : R600_TEX < + 0x0B, "TEX_SET_GRADIENTS_H", + [] +>; + +def TEX_SET_GRADIENTS_V : R600_TEX < + 0x0C, "TEX_SET_GRADIENTS_V", + [] +>; + +def TEX_SAMPLE : R600_TEX < + 0x10, "TEX_SAMPLE", + [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_SAMPLE_C : R600_TEX < + 0x18, "TEX_SAMPLE_C", + [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] +>; + +def TEX_SAMPLE_L : R600_TEX < + 0x11, "TEX_SAMPLE_L", + [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_SAMPLE_C_L : R600_TEX < + 0x19, "TEX_SAMPLE_C_L", + [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] +>; + +def TEX_SAMPLE_LB : R600_TEX < + 0x12, "TEX_SAMPLE_LB", + [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_SAMPLE_C_LB : R600_TEX < + 0x1A, "TEX_SAMPLE_C_LB", + [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$src1, TEX_SHADOW:$src2))] +>; + +def TEX_SAMPLE_G : R600_TEX < + 0x14, "TEX_SAMPLE_G", + [] +>; + +def TEX_SAMPLE_C_G : R600_TEX < + 0x1C, "TEX_SAMPLE_C_G", + [] +>; + +/* Helper classes for common instructions */ + +class MUL_LIT_Common inst> : R600_3OP < + inst, "MUL_LIT", + [] +>; + +class MULADD_Common inst> : R600_3OP < + inst, "MULADD", + [(set (f32 R600_Reg32:$dst), + (IL_mad R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] +>; + +class CNDE_Common inst> : R600_3OP < + inst, "CNDE", + [(set (f32 R600_Reg32:$dst), + (IL_cmov_logical R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] +>; + +class CNDGT_Common inst> : R600_3OP < + inst, "CNDGT", + [] +>; + +class CNDGE_Common inst> : R600_3OP < + inst, "CNDGE", + [(set R600_Reg32:$dst, (int_AMDGPU_cndlt R600_Reg32:$src0, R600_Reg32:$src2, R600_Reg32:$src1))] +>; + +class DOT4_Common inst> : R600_REDUCTION < + inst, + (ins R600_Reg128:$src0, R600_Reg128:$src1), + "DOT4 $dst $src0, $src1", + [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] +>; + +class CUBE_Common inst> : InstR600 < + inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src), + "CUBE $dst $src", + [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + VecALU +>; + +class EXP_IEEE_Common inst> : R600_1OP < + inst, "EXP_IEEE", + [(set R600_Reg32:$dst, (fexp2 R600_Reg32:$src))] +>; + +class FLT_TO_INT_Common inst> : R600_1OP < + inst, "FLT_TO_INT", + [(set R600_Reg32:$dst, (fp_to_sint R600_Reg32:$src))] +>; + +class INT_TO_FLT_Common inst> : R600_1OP < + inst, "INT_TO_FLT", + [(set R600_Reg32:$dst, (sint_to_fp R600_Reg32:$src))] +>; + +class FLT_TO_UINT_Common inst> : R600_1OP < + inst, "FLT_TO_UINT", + [(set R600_Reg32:$dst, (fp_to_uint R600_Reg32:$src))] +>; + +class UINT_TO_FLT_Common inst> : R600_1OP < + inst, "UINT_TO_FLT", + [(set R600_Reg32:$dst, (uint_to_fp R600_Reg32:$src))] +>; + +class LOG_CLAMPED_Common inst> : R600_1OP < + inst, "LOG_CLAMPED", + [] +>; + +class LOG_IEEE_Common inst> : R600_1OP < + inst, "LOG_IEEE", + [(set R600_Reg32:$dst, (int_AMDIL_log R600_Reg32:$src))] +>; + +class LSHL_Common inst> : R600_2OP < + inst, "LSHL $dst, $src0, $src1", + [(set R600_Reg32:$dst, (shl R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +class LSHR_Common inst> : R600_2OP < + inst, "LSHR $dst, $src0, $src1", + [(set R600_Reg32:$dst, (srl R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +class ASHR_Common inst> : R600_2OP < + inst, "ASHR $dst, $src0, $src1", + [(set R600_Reg32:$dst, (sra R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +class MULHI_INT_Common inst> : R600_2OP < + inst, "MULHI_INT $dst, $src0, $src1", + [(set R600_Reg32:$dst, (mulhs R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +class MULHI_UINT_Common inst> : R600_2OP < + inst, "MULHI $dst, $src0, $src1", + [(set R600_Reg32:$dst, (mulhu R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +class MULLO_INT_Common inst> : R600_2OP < + inst, "MULLO_INT $dst, $src0, $src1", + [(set R600_Reg32:$dst, (mul R600_Reg32:$src0, R600_Reg32:$src1))] +>; + +class MULLO_UINT_Common inst> : R600_2OP < + inst, "MULLO_UINT $dst, $src0, $src1", + [] +>; + +class RECIP_CLAMPED_Common inst> : R600_1OP < + inst, "RECIP_CLAMPED", + [] +>; + +class RECIP_IEEE_Common inst> : R600_1OP < + inst, "RECIP_IEEE", + [(set R600_Reg32:$dst, (int_AMDGPU_rcp R600_Reg32:$src))] +>; + +class RECIP_UINT_Common inst> : R600_1OP < + inst, "RECIP_INT $dst, $src", + [(set R600_Reg32:$dst, (AMDGPUurecip R600_Reg32:$src))] +>; + +class RECIPSQRT_CLAMPED_Common inst> : R600_1OP < + inst, "RECIPSQRT_CLAMPED", + [(set R600_Reg32:$dst, (int_AMDGPU_rsq R600_Reg32:$src))] +>; + +class RECIPSQRT_IEEE_Common inst> : R600_1OP < + inst, "RECIPSQRT_IEEE", + [] +>; + +class SIN_Common inst> : R600_1OP < + inst, "SIN", []>{ + let Trig = 1; +} + +class COS_Common inst> : R600_1OP < + inst, "COS", []> { + let Trig = 1; +} + +/* Helper patterns for complex intrinsics */ +/* -------------------------------------- */ + +class DIV_Common : Pat< + (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), + (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) +>; + +class SSG_Common : Pat < + (int_AMDGPU_ssg R600_Reg32:$src), + (cndgt R600_Reg32:$src, (f32 ONE), (cndge R600_Reg32:$src, (f32 ZERO), (f32 NEG_ONE))) +>; + +class TGSI_LIT_Z_Common : Pat < + (int_TGSI_lit_z R600_Reg32:$src_x, R600_Reg32:$src_y, R600_Reg32:$src_w), + (exp_ieee (mul_lit (log_clamped (MAX R600_Reg32:$src_y, (f32 ZERO))), R600_Reg32:$src_w, R600_Reg32:$src_x)) +>; + +/* ---------------------- */ +/* R600 / R700 Only Instructions */ +/* ---------------------- */ + +let Predicates = [isR600] in { + + def MUL_LIT_r600 : MUL_LIT_Common<0x0C>; + def MULADD_r600 : MULADD_Common<0x10>; + def CNDE_r600 : CNDE_Common<0x18>; + def CNDGT_r600 : CNDGT_Common<0x19>; + def CNDGE_r600 : CNDGE_Common<0x1A>; + def DOT4_r600 : DOT4_Common<0x50>; + def CUBE_r600 : CUBE_Common<0x52>; + def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; + def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; + def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; + def RECIP_CLAMPED_r600 : RECIP_CLAMPED_Common<0x64>; + def RECIP_IEEE_r600 : RECIP_IEEE_Common<0x66>; + def RECIPSQRT_CLAMPED_r600 : RECIPSQRT_CLAMPED_Common<0x67>; + def RECIPSQRT_IEEE_r600 : RECIPSQRT_IEEE_Common<0x69>; + def FLT_TO_INT_r600 : FLT_TO_INT_Common<0x6b>; + def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; + def FLT_TO_UINT_r600 : FLT_TO_UINT_Common<0x79>; + def UINT_TO_FLT_r600 : UINT_TO_FLT_Common<0x6d>; + def SIN_r600 : SIN_Common<0x6E>; + def COS_r600 : COS_Common<0x6F>; + def ASHR_r600 : ASHR_Common<0x70>; + def LSHR_r600 : LSHR_Common<0x71>; + def LSHL_r600 : LSHL_Common<0x72>; + def MULLO_INT_r600 : MULLO_INT_Common<0x73>; + def MULHI_INT_r600 : MULHI_INT_Common<0x74>; + def MULLO_UINT_r600 : MULLO_UINT_Common<0x75>; + def MULHI_UINT_r600 : MULHI_UINT_Common<0x76>; + def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; + + def DIV_r600 : DIV_Common; + def POW_r600 : POW_Common; + def SSG_r600 : SSG_Common; + def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common; + +} + +// Helper pattern for normalizing inputs to triginomic instructions for R700+ +// cards. +class TRIG_eg : Pat< + (intr R600_Reg32:$src), + (trig (MUL (MOV_IMM_I32 (i32 ALU_LITERAL_X), CONST.TWO_PI_INV), R600_Reg32:$src)) +>; + +//===----------------------------------------------------------------------===// +// R700 Only instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isR700] in { + def SIN_r700 : SIN_Common<0x6E>; + def COS_r700 : COS_Common<0x6F>; + + // R700 normalizes inputs to SIN/COS the same as EG + def : TRIG_eg ; + def : TRIG_eg ; +} + +//===----------------------------------------------------------------------===// +// Evergreen Only instructions +//===----------------------------------------------------------------------===// + +let Predicates = [isEG] in { + +def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; + +def MULLO_INT_eg : MULLO_INT_Common<0x8F>; +def MULHI_INT_eg : MULHI_INT_Common<0x90>; +def MULLO_UINT_eg : MULLO_UINT_Common<0x91>; +def MULHI_UINT_eg : MULHI_UINT_Common<0x92>; +def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; + +} // End Predicates = [isEG] + +/* ------------------------------- */ +/* Evergreen / Cayman Instructions */ +/* ------------------------------- */ + +let Predicates = [isEGorCayman] in { + + // BFE_UINT - bit_extract, an optimization for mask and shift + // Src0 = Input + // Src1 = Offset + // Src2 = Width + // + // bit_extract = (Input << (32 - Offset - Width)) >> (32 - Width) + // + // Example Usage: + // (Offset, Width) + // + // (0, 8) = (Input << 24) >> 24 = (Input & 0xff) >> 0 + // (8, 8) = (Input << 16) >> 24 = (Input & 0xffff) >> 8 + // (16,8) = (Input << 8) >> 24 = (Input & 0xffffff) >> 16 + // (24,8) = (Input << 0) >> 24 = (Input & 0xffffffff) >> 24 + def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", + [(set R600_Reg32:$dst, (int_AMDIL_bit_extract_u32 R600_Reg32:$src0, + R600_Reg32:$src1, + R600_Reg32:$src2))], + VecALU + >; + + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", + [(set R600_Reg32:$dst, (AMDGPUbitalign R600_Reg32:$src0, R600_Reg32:$src1, + R600_Reg32:$src2))], + VecALU + >; + + def MULADD_eg : MULADD_Common<0x14>; + def ASHR_eg : ASHR_Common<0x15>; + def LSHR_eg : LSHR_Common<0x16>; + def LSHL_eg : LSHL_Common<0x17>; + def CNDE_eg : CNDE_Common<0x19>; + def CNDGT_eg : CNDGT_Common<0x1A>; + def CNDGE_eg : CNDGE_Common<0x1B>; + def MUL_LIT_eg : MUL_LIT_Common<0x1F>; + def EXP_IEEE_eg : EXP_IEEE_Common<0x81>; + def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; + def LOG_IEEE_eg : LOG_IEEE_Common<0x83>; + def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>; + def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>; + def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; + def SIN_eg : SIN_Common<0x8D>; + def COS_eg : COS_Common<0x8E>; + def DOT4_eg : DOT4_Common<0xBE>; + def CUBE_eg : CUBE_Common<0xC0>; + + def DIV_eg : DIV_Common; + def POW_eg : POW_Common; + def SSG_eg : SSG_Common; + def TGSI_LIT_Z_eg : TGSI_LIT_Z_Common; + + def : TRIG_eg ; + def : TRIG_eg ; + + def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> { + let Pattern = []; + } + + def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; + + def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { + let Pattern = []; + } + + def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; + + def : Pat<(fp_to_sint R600_Reg32:$src), + (FLT_TO_INT_eg (TRUNC R600_Reg32:$src))>; + + def : Pat<(fp_to_uint R600_Reg32:$src), + (FLT_TO_UINT_eg (TRUNC R600_Reg32:$src))>; + +//===----------------------------------------------------------------------===// +// Memory read/write instructions +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1 in { + +def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, 0, (outs), + (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr), + "RAT_WRITE_CACHELESS_eg $rw_gpr, $index_gpr", + [(global_store (i32 R600_TReg32_X:$rw_gpr), R600_TReg32_X:$index_gpr)]> +{ + let RIM = 0; + /* XXX: Have a separate instruction for non-indexed writes. */ + let TYPE = 1; + let RW_REL = 0; + let ELEM_SIZE = 0; + + let ARRAY_SIZE = 0; + let COMP_MASK = 1; + let BURST_COUNT = 0; + let VPM = 0; + let EOP = 0; + let MARK = 0; + let BARRIER = 1; +} + +} // End usesCustomInserter = 1 + +// Floating point global_store +def : Pat < + (global_store (f32 R600_TReg32_X:$val), R600_TReg32_X:$ptr), + (RAT_WRITE_CACHELESS_eg R600_TReg32_X:$val, R600_TReg32_X:$ptr) +>; + +class VTX_READ_eg buffer_id, dag outs, list pattern> + : InstR600ISA { + + // Operands + bits<7> DST_GPR; + bits<7> SRC_GPR; + + // Static fields + bits<5> VC_INST = 0; + bits<2> FETCH_TYPE = 2; + bits<1> FETCH_WHOLE_QUAD = 0; + bits<8> BUFFER_ID = buffer_id; + bits<1> SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + bits<2> SRC_SEL_X = 0; + bits<6> MEGA_FETCH_COUNT; + bits<1> DST_REL = 0; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + bits<1> USE_CONST_FIELDS = 0; + bits<6> DATA_FORMAT; + bits<2> NUM_FORMAT_ALL = 1; + bits<1> FORMAT_COMP_ALL = 0; + bits<1> SRF_MODE_ALL = 0; + + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD0 + let Inst{4-0} = VC_INST; + let Inst{6-5} = FETCH_TYPE; + let Inst{7} = FETCH_WHOLE_QUAD; + let Inst{15-8} = BUFFER_ID; + let Inst{22-16} = SRC_GPR; + let Inst{23} = SRC_REL; + let Inst{25-24} = SRC_SEL_X; + let Inst{31-26} = MEGA_FETCH_COUNT; + + // VTX_WORD1_GPR + let Inst{38-32} = DST_GPR; + let Inst{39} = DST_REL; + let Inst{40} = 0; // Reserved + let Inst{43-41} = DST_SEL_X; + let Inst{46-44} = DST_SEL_Y; + let Inst{49-47} = DST_SEL_Z; + let Inst{52-50} = DST_SEL_W; + let Inst{53} = USE_CONST_FIELDS; + let Inst{59-54} = DATA_FORMAT; + let Inst{61-60} = NUM_FORMAT_ALL; + let Inst{62} = FORMAT_COMP_ALL; + let Inst{63} = SRF_MODE_ALL; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; +} + +class VTX_READ_32_eg buffer_id, list pattern> + : VTX_READ_eg { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 +} + +def VTX_READ_PARAM_eg : VTX_READ_32_eg <0, + [(set (i32 R600_TReg32_X:$dst), (load_param ADDRVTX_READ:$ptr))] +>; + +def VTX_READ_GLOBAL_eg : VTX_READ_32_eg <1, + [(set (i32 R600_TReg32_X:$dst), (global_load ADDRVTX_READ:$ptr))] +>; + +class VTX_READ_128_eg buffer_id, list pattern> + : VTX_READ_eg { + + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 +} + +def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, + [(set (v4i32 R600_Reg128:$dst), (global_load ADDRVTX_READ:$ptr))] +>; + +} + +let Predicates = [isCayman] in { + +let isVector = 1 in { + +def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; + +def MULLO_INT_cm : MULLO_INT_Common<0x8F>; +def MULHI_INT_cm : MULHI_INT_Common<0x90>; +def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; +def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; + +} // End isVector = 1 + +// RECIP_UINT emulation for Cayman +def : Pat < + (AMDGPUurecip R600_Reg32:$src0), + (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg R600_Reg32:$src0)), + (MOV_IMM_I32 (i32 ALU_LITERAL_X), 0x4f800000))) +>; + +} // End isCayman + +/* Other Instructions */ + +let isCodeGenOnly = 1 in { +/* + def SWIZZLE : AMDGPUShaderInst < + (outs GPRV4F32:$dst), + (ins GPRV4F32:$src0, i32imm:$src1), + "SWIZZLE $dst, $src0, $src1", + [(set GPRV4F32:$dst, (int_AMDGPU_swizzle GPRV4F32:$src0, imm:$src1))] + >; +*/ + + def LAST : AMDGPUShaderInst < + (outs), + (ins), + "LAST", + [] + >; + + def GET_CHAN : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins R600_Reg128:$src0, i32imm:$src1), + "GET_CHAN $dst, $src0, $src1", + [] + >; + + def MULLIT : AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2), + "MULLIT $dst, $src0, $src1", + [(set R600_Reg128:$dst, (int_AMDGPU_mullit R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2))] + >; + +let usesCustomInserter = 1, isPseudo = 1 in { + +class R600PreloadInst : AMDGPUInst < + (outs R600_TReg32:$dst), + (ins), + asm, + [(set R600_TReg32:$dst, (intr))] +>; + +def TGID_X : R600PreloadInst <"TGID_X", int_r600_read_tgid_x>; +def TGID_Y : R600PreloadInst <"TGID_Y", int_r600_read_tgid_y>; +def TGID_Z : R600PreloadInst <"TGID_Z", int_r600_read_tgid_z>; + +def TIDIG_X : R600PreloadInst <"TIDIG_X", int_r600_read_tidig_x>; +def TIDIG_Y : R600PreloadInst <"TIDIG_Y", int_r600_read_tidig_y>; +def TIDIG_Z : R600PreloadInst <"TIDIG_Z", int_r600_read_tidig_z>; + +def NGROUPS_X : R600PreloadInst <"NGROUPS_X", int_r600_read_ngroups_x>; +def NGROUPS_Y : R600PreloadInst <"NGROUPS_Y", int_r600_read_ngroups_y>; +def NGROUPS_Z : R600PreloadInst <"NGROUPS_Z", int_r600_read_ngroups_z>; + +def GLOBAL_SIZE_X : R600PreloadInst <"GLOBAL_SIZE_X", + int_r600_read_global_size_x>; +def GLOBAL_SIZE_Y : R600PreloadInst <"GLOBAL_SIZE_Y", + int_r600_read_global_size_y>; +def GLOBAL_SIZE_Z : R600PreloadInst <"GLOBAL_SIZE_Z", + int_r600_read_global_size_z>; + +def LOCAL_SIZE_X : R600PreloadInst <"LOCAL_SIZE_X", + int_r600_read_local_size_x>; +def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y", + int_r600_read_local_size_y>; +def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z", + int_r600_read_local_size_z>; + +def R600_LOAD_CONST : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src0), + "R600_LOAD_CONST $dst, $src0", + [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] +>; + +def LOAD_INPUT : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src), + "LOAD_INPUT $dst, $src", + [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))] +>; + +def RESERVE_REG : AMDGPUShaderInst < + (outs), + (ins i32imm:$src), + "RESERVE_REG $src", + [(int_AMDGPU_reserve_reg imm:$src)] +>; + +def STORE_OUTPUT: AMDGPUShaderInst < + (outs), + (ins R600_Reg32:$src0, i32imm:$src1), + "STORE_OUTPUT $src0, $src1", + [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)] +>; + +def TXD: AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), + "TXD $dst, $src0, $src1, $src2, $src3, $src4", + [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, imm:$src4))] +>; + +def TXD_SHADOW: AMDGPUShaderInst < + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, i32imm:$src3, i32imm:$src4), + "TXD_SHADOW $dst, $src0, $src1, $src2, $src3, $src4", + [(set R600_Reg128:$dst, (int_AMDGPU_txd R600_Reg128:$src0, R600_Reg128:$src1, R600_Reg128:$src2, imm:$src3, TEX_SHADOW:$src4))] +>; + +} // End usesCustomInserter = 1, isPseudo = 1 + +} // End isCodeGenOnly = 1 + +def CLAMP_R600 : CLAMP ; +def FABS_R600 : FABS; +def FNEG_R600 : FNEG; + +let usesCustomInserter = 1 in { + +def MASK_WRITE : AMDGPUShaderInst < + (outs), + (ins R600_Reg32:$src), + "MASK_WRITE $src", + [] +>; + +} // End usesCustomInserter = 1 + +//===----------------------------------------------------------------------===// +// ISel Patterns +//===----------------------------------------------------------------------===// + +// KIL Patterns +def KILP : Pat < + (int_AMDGPU_kilp), + (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) +>; + +def KIL : Pat < + (int_AMDGPU_kill R600_Reg32:$src0), + (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) +>; + +// SGT Reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LT), + (SGT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SGE Reverse args +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, COND_LE), + (SGE R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_INT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLT), + (SETGT_INT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_INT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETLE), + (SETGE_INT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGT_UINT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULT), + (SETGT_UINT R600_Reg32:$src1, R600_Reg32:$src0) +>; + +// SETGE_UINT reverse args +def : Pat < + (selectcc (i32 R600_Reg32:$src0), R600_Reg32:$src1, -1, 0, SETULE), + (SETGE_UINT R600_Reg32:$src0, R600_Reg32:$src1) +>; + +// The next two patterns are special cases for handling 'true if ordered' and +// 'true if unordered' conditionals. The assumption here is that the behavior of +// SETE and SNE conforms to the Direct3D 10 rules for floating point values +// described here: +// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit +// We assume that SETE returns false when one of the operands is NAN and +// SNE returns true when on of the operands is NAN + +//SETE - 'true if ordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETO), + (SETE R600_Reg32:$src0, R600_Reg32:$src1) +>; + +//SNE - 'true if unordered' +def : Pat < + (selectcc (f32 R600_Reg32:$src0), R600_Reg32:$src1, FP_ONE, FP_ZERO, SETUO), + (SNE R600_Reg32:$src0, R600_Reg32:$src1) +>; + +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; + +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; + +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; +def : Extract_Element ; + +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; +def : Insert_Element ; + +def : Vector_Build ; +def : Vector_Build ; + +// bitconvert patterns + +def : BitConvert ; +def : BitConvert ; +def : BitConvert ; + +} // End isR600toCayman Predicate diff --git a/lib/Target/AMDGPU/R600Intrinsics.td b/lib/Target/AMDGPU/R600Intrinsics.td new file mode 100644 index 0000000..0265388 --- /dev/null +++ b/lib/Target/AMDGPU/R600Intrinsics.td @@ -0,0 +1,16 @@ +//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600 Intrinsic Definitions +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "R600", isTarget = 1 in { + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; +} diff --git a/lib/Target/AMDGPU/R600KernelParameters.cpp b/lib/Target/AMDGPU/R600KernelParameters.cpp new file mode 100644 index 0000000..e810023 --- /dev/null +++ b/lib/Target/AMDGPU/R600KernelParameters.cpp @@ -0,0 +1,546 @@ +//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers kernel function arguments to loads from the vertex buffer. +// +// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords, +// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from +// VTX_BUFFER[10], etc. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/TypeBuilder.h" + +#include +#include + +using namespace llvm; + +namespace { + +#define CONSTANT_CACHE_SIZE_DW 127 + +class R600KernelParameters : public FunctionPass +{ + const TargetData * TD; + LLVMContext* Context; + Module *mod; + + struct param + { + param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), + indirect(true), specialID(0) {} + + Value* val; + Value* ptr_val; + int offset_in_dw; + int size_in_dw; + + bool indirect; + + std::string specialType; + int specialID; + + int end() { return offset_in_dw + size_in_dw; } + // The first 9 dwords are reserved for the grid sizes. + int get_rat_offset() { return 9 + offset_in_dw; } + }; + + std::vector params; + + bool isOpenCLKernel(const Function* fun); + int getLastSpecialID(const std::string& TypeName); + + int getListSize(); + void AddParam(Argument* arg); + int calculateArgumentSize(Argument* arg); + void RunAna(Function* fun); + void Replace(Function* fun); + bool isIndirect(Value* val, std::set& visited); + void Propagate(Function* fun); + void Propagate(Value* v, const Twine& name, bool indirect = true); + Value* ConstantRead(Function* fun, param& p); + Value* handleSpecial(Function* fun, param& p); + bool isSpecialType(Type*); + std::string getSpecialTypeName(Type*); +public: + static char ID; + R600KernelParameters() : FunctionPass(ID) {}; + R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {} + bool runOnFunction (Function &F); + void getAnalysisUsage(AnalysisUsage &AU) const; + const char *getPassName() const; + bool doInitialization(Module &M); + bool doFinalization(Module &M); +}; + +char R600KernelParameters::ID = 0; + +static RegisterPass X("kerparam", + "OpenCL Kernel Parameter conversion", false, false); + +bool R600KernelParameters::isOpenCLKernel(const Function* fun) +{ + Module *mod = const_cast(fun)->getParent(); + NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels"); + + if (!md or !md->getNumOperands()) + { + return false; + } + + for (int i = 0; i < int(md->getNumOperands()); i++) + { + if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0)) + { + continue; + } + + assert(md->getOperand(i)->getNumOperands() == 1); + + if (md->getOperand(i)->getOperand(0)->getName() == fun->getName()) + { + return true; + } + } + + return false; +} + +int R600KernelParameters::getLastSpecialID(const std::string& TypeName) +{ + int lastID = -1; + + for (std::vector::iterator i = params.begin(); i != params.end(); i++) + { + if (i->specialType == TypeName) + { + lastID = i->specialID; + } + } + + return lastID; +} + +int R600KernelParameters::getListSize() +{ + if (params.size() == 0) + { + return 0; + } + + return params.back().end(); +} + +bool R600KernelParameters::isIndirect(Value* val, std::set& visited) +{ + //XXX Direct parameters are not supported yet, so return true here. + return true; +#if 0 + if (isa(val)) + { + return false; + } + + if (isa(val->getType())) + { + assert(0 and "Internal error"); + return false; + } + + if (visited.count(val)) + { + return false; + } + + visited.insert(val); + + if (isa(val)) + { + GetElementPtrInst* GEP = dyn_cast(val); + GetElementPtrInst::op_iterator i = GEP->op_begin(); + + for (i++; i != GEP->op_end(); i++) + { + if (!isa(*i)) + { + return true; + } + } + } + + for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++) + { + Value* v2 = dyn_cast(*i); + + if (v2) + { + if (isIndirect(v2, visited)) + { + return true; + } + } + } + + return false; +#endif +} + +void R600KernelParameters::AddParam(Argument* arg) +{ + param p; + + p.val = dyn_cast(arg); + p.offset_in_dw = getListSize(); + p.size_in_dw = calculateArgumentSize(arg); + + if (isa(arg->getType()) and arg->hasByValAttr()) + { + std::set visited; + p.indirect = isIndirect(p.val, visited); + } + + params.push_back(p); +} + +int R600KernelParameters::calculateArgumentSize(Argument* arg) +{ + Type* t = arg->getType(); + + if (arg->hasByValAttr() and dyn_cast(t)) + { + t = dyn_cast(t)->getElementType(); + } + + int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4; + + assert(store_size_in_dw); + + return store_size_in_dw; +} + + +void R600KernelParameters::RunAna(Function* fun) +{ + assert(isOpenCLKernel(fun)); + + for (Function::arg_iterator i = fun->arg_begin(); i != fun->arg_end(); i++) + { + AddParam(i); + } + +} + +void R600KernelParameters::Replace(Function* fun) +{ + for (std::vector::iterator i = params.begin(); i != params.end(); i++) + { + Value *new_val; + + if (isSpecialType(i->val->getType())) + { + new_val = handleSpecial(fun, *i); + } + else + { + new_val = ConstantRead(fun, *i); + } + if (new_val) + { + i->val->replaceAllUsesWith(new_val); + } + } +} + +void R600KernelParameters::Propagate(Function* fun) +{ + for (std::vector::iterator i = params.begin(); i != params.end(); i++) + { + if (i->ptr_val) + { + Propagate(i->ptr_val, i->val->getName(), i->indirect); + } + } +} + +void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) +{ + LoadInst* load = dyn_cast(v); + GetElementPtrInst *GEP = dyn_cast(v); + + unsigned addrspace; + + if (indirect) + { + addrspace = AMDILAS::PARAM_I_ADDRESS; + } + else + { + addrspace = AMDILAS::PARAM_D_ADDRESS; + } + + if (GEP and GEP->getType()->getAddressSpace() != addrspace) + { + Value* op = GEP->getPointerOperand(); + + if (dyn_cast(op->getType())->getAddressSpace() != addrspace) + { + op = new BitCastInst(op, PointerType::get(dyn_cast( + op->getType())->getElementType(), addrspace), + name, dyn_cast(v)); + } + + std::vector params(GEP->idx_begin(), GEP->idx_end()); + + GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, + dyn_cast(v)); + GEP2->setIsInBounds(GEP->isInBounds()); + v = dyn_cast(GEP2); + GEP->replaceAllUsesWith(GEP2); + GEP->eraseFromParent(); + load = NULL; + } + + if (load) + { + ///normally at this point we have the right address space + if (load->getPointerAddressSpace() != addrspace) + { + Value *orig_ptr = load->getPointerOperand(); + PointerType *orig_ptr_type = dyn_cast(orig_ptr->getType()); + + Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), + addrspace); + + Value* new_ptr = orig_ptr; + + if (orig_ptr->getType() != new_ptr_type) + { + new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load); + } + + Value* new_load = new LoadInst(new_ptr, name, load); + load->replaceAllUsesWith(new_load); + load->eraseFromParent(); + } + + return; + } + + std::vector users(v->use_begin(), v->use_end()); + + for (int i = 0; i < int(users.size()); i++) + { + Value* v2 = dyn_cast(users[i]); + + if (v2) + { + Propagate(v2, name, indirect); + } + } +} + +Value* R600KernelParameters::ConstantRead(Function* fun, param& p) +{ + assert(fun->front().begin() != fun->front().end()); + + Instruction *first_inst = fun->front().begin(); + IRBuilder <> builder (first_inst); +/* First 3 dwords are reserved for the dimmension info */ + + if (!p.val->hasNUsesOrMore(1)) + { + return NULL; + } + unsigned addrspace; + + if (p.indirect) + { + addrspace = AMDILAS::PARAM_I_ADDRESS; + } + else + { + addrspace = AMDILAS::PARAM_D_ADDRESS; + } + + Argument *arg = dyn_cast(p.val); + Type * argType = p.val->getType(); + PointerType * argPtrType = dyn_cast(p.val->getType()); + + if (argPtrType and arg->hasByValAttr()) + { + Value* param_addr_space_ptr = ConstantPointerNull::get( + PointerType::get(Type::getInt32Ty(*Context), + addrspace)); + Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, + ConstantInt::get(Type::getInt32Ty(*Context), + p.get_rat_offset()), arg->getName(), + first_inst); + param_ptr = new BitCastInst(param_ptr, + PointerType::get(argPtrType->getElementType(), + addrspace), + arg->getName(), first_inst); + p.ptr_val = param_ptr; + return param_ptr; + } + else + { + Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get( + argType, addrspace)); + + Value* param_ptr = builder.CreateGEP(param_addr_space_ptr, + ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), + arg->getName()); + + Value* param_value = builder.CreateLoad(param_ptr, arg->getName()); + + return param_value; + } +} + +Value* R600KernelParameters::handleSpecial(Function* fun, param& p) +{ + std::string name = getSpecialTypeName(p.val->getType()); + int ID; + + assert(!name.empty()); + + if (name == "image2d_t" or name == "image3d_t") + { + int lastID = std::max(getLastSpecialID("image2d_t"), + getLastSpecialID("image3d_t")); + + if (lastID == -1) + { + ID = 2; ///ID0 and ID1 are used internally by the driver + } + else + { + ID = lastID + 1; + } + } + else if (name == "sampler_t") + { + int lastID = getLastSpecialID("sampler_t"); + + if (lastID == -1) + { + ID = 0; + } + else + { + ID = lastID + 1; + } + } + else + { + ///TODO: give some error message + return NULL; + } + + p.specialType = name; + p.specialID = ID; + + Instruction *first_inst = fun->front().begin(); + + return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), + p.specialID), p.val->getType(), + "resourceID", first_inst); +} + + +bool R600KernelParameters::isSpecialType(Type* t) +{ + return !getSpecialTypeName(t).empty(); +} + +std::string R600KernelParameters::getSpecialTypeName(Type* t) +{ + PointerType *pt = dyn_cast(t); + StructType *st = NULL; + + if (pt) + { + st = dyn_cast(pt->getElementType()); + } + + if (st) + { + std::string prefix = "struct.opencl_builtin_type_"; + + std::string name = st->getName().str(); + + if (name.substr(0, prefix.length()) == prefix) + { + return name.substr(prefix.length(), name.length()); + } + } + + return ""; +} + + +bool R600KernelParameters::runOnFunction (Function &F) +{ + if (!isOpenCLKernel(&F)) + { + return false; + } + + RunAna(&F); + Replace(&F); + Propagate(&F); + + return false; +} + +void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const +{ + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); +} + +const char *R600KernelParameters::getPassName() const +{ + return "OpenCL Kernel parameter conversion to memory"; +} + +bool R600KernelParameters::doInitialization(Module &M) +{ + Context = &M.getContext(); + mod = &M; + + return false; +} + +bool R600KernelParameters::doFinalization(Module &M) +{ + return false; +} + +} // End anonymous namespace + +FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) +{ + FunctionPass *p = new R600KernelParameters(TD); + + return p; +} + + diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp new file mode 100644 index 0000000..48443fb --- /dev/null +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp @@ -0,0 +1,16 @@ +//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "R600MachineFunctionInfo.h" + +using namespace llvm; + +R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo() + { } diff --git a/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/lib/Target/AMDGPU/R600MachineFunctionInfo.h new file mode 100644 index 0000000..948e192 --- /dev/null +++ b/lib/Target/AMDGPU/R600MachineFunctionInfo.h @@ -0,0 +1,33 @@ +//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600MachineFunctionInfo is used for keeping track of which registers have +// been reserved by the llvm.AMDGPU.reserve.reg intrinsic. +// +//===----------------------------------------------------------------------===// + +#ifndef R600MACHINEFUNCTIONINFO_H +#define R600MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +#include + +namespace llvm { + +class R600MachineFunctionInfo : public MachineFunctionInfo { + +public: + R600MachineFunctionInfo(const MachineFunction &MF); + std::vector ReservedRegs; + +}; + +} // End llvm namespace + +#endif //R600MACHINEFUNCTIONINFO_H diff --git a/lib/Target/AMDGPU/R600RegisterInfo.cpp b/lib/Target/AMDGPU/R600RegisterInfo.cpp new file mode 100644 index 0000000..7ae702c --- /dev/null +++ b/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -0,0 +1,88 @@ +//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The file contains the R600 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "R600RegisterInfo.h" +#include "AMDGPUTargetMachine.h" +#include "R600MachineFunctionInfo.h" + +using namespace llvm; + +R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, + const TargetInstrInfo &tii) +: AMDGPURegisterInfo(tm, tii), + TM(tm), + TII(tii) + { } + +BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const +{ + BitVector Reserved(getNumRegs()); + const R600MachineFunctionInfo * MFI = MF.getInfo(); + + Reserved.set(AMDGPU::ZERO); + Reserved.set(AMDGPU::HALF); + Reserved.set(AMDGPU::ONE); + Reserved.set(AMDGPU::ONE_INT); + Reserved.set(AMDGPU::NEG_HALF); + Reserved.set(AMDGPU::NEG_ONE); + Reserved.set(AMDGPU::PV_X); + Reserved.set(AMDGPU::ALU_LITERAL_X); + + for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(), + E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) { + Reserved.set(*I); + } + + for (std::vector::const_iterator I = MFI->ReservedRegs.begin(), + E = MFI->ReservedRegs.end(); I != E; ++I) { + Reserved.set(*I); + } + + return Reserved; +} + +const TargetRegisterClass * +R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const +{ + switch (rc->getID()) { + case AMDGPU::GPRF32RegClassID: + case AMDGPU::GPRI32RegClassID: + return &AMDGPU::R600_Reg32RegClass; + default: return rc; + } +} + +unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const +{ + switch(reg) { + case AMDGPU::ZERO: + case AMDGPU::ONE: + case AMDGPU::ONE_INT: + case AMDGPU::NEG_ONE: + case AMDGPU::HALF: + case AMDGPU::NEG_HALF: + case AMDGPU::ALU_LITERAL_X: + return 0; + default: return getHWRegChanGen(reg); + } +} + +const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass( + MVT VT) const +{ + switch(VT.SimpleTy) { + default: + case MVT::i32: return &AMDGPU::R600_TReg32RegClass; + } +} +#include "R600HwRegInfo.include" diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h new file mode 100644 index 0000000..0df667b --- /dev/null +++ b/lib/Target/AMDGPU/R600RegisterInfo.h @@ -0,0 +1,54 @@ +//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface definition for R600RegisterInfo +// +//===----------------------------------------------------------------------===// + +#ifndef R600REGISTERINFO_H_ +#define R600REGISTERINFO_H_ + +#include "AMDGPUTargetMachine.h" +#include "AMDILRegisterInfo.h" + +namespace llvm { + +class R600TargetMachine; +class TargetInstrInfo; + +struct R600RegisterInfo : public AMDGPURegisterInfo +{ + AMDGPUTargetMachine &TM; + const TargetInstrInfo &TII; + + R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii); + + virtual BitVector getReservedRegs(const MachineFunction &MF) const; + + /// getISARegClass - rc is an AMDIL reg class. This function returns the + /// R600 reg class that is equivalent to the given AMDIL reg class. + virtual const TargetRegisterClass * getISARegClass( + const TargetRegisterClass * rc) const; + + /// getHWRegChan - get the HW encoding for a register's channel. + unsigned getHWRegChan(unsigned reg) const; + + /// getCFGStructurizerRegClass - get the register class of the specified + /// type to use in the CFGStructurizer + virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + +private: + /// getHWRegChanGen - Generated function returns a register's channel + /// encoding. + unsigned getHWRegChanGen(unsigned reg) const; +}; + +} // End namespace llvm + +#endif // AMDIDSAREGISTERINFO_H_ diff --git a/lib/Target/AMDGPU/R600RegisterInfo.td b/lib/Target/AMDGPU/R600RegisterInfo.td new file mode 100644 index 0000000..a866a6b --- /dev/null +++ b/lib/Target/AMDGPU/R600RegisterInfo.td @@ -0,0 +1,5271 @@ + +class R600Reg encoding> : Register { + let Namespace = "AMDGPU"; + let HWEncoding = encoding; +} + +class R600Reg_128 subregs, bits<16> encoding> : + RegisterWithSubRegs { + let Namespace = "AMDGPU"; + let SubRegIndices = [sel_x, sel_y, sel_z, sel_w]; + let HWEncoding = encoding; +} + +def C0_X : R600Reg <"C0.X", 0>; +def C0_Y : R600Reg <"C0.Y", 0>; +def C0_Z : R600Reg <"C0.Z", 0>; +def C0_W : R600Reg <"C0.W", 0>; +def C1_X : R600Reg <"C1.X", 1>; +def C1_Y : R600Reg <"C1.Y", 1>; +def C1_Z : R600Reg <"C1.Z", 1>; +def C1_W : R600Reg <"C1.W", 1>; +def C2_X : R600Reg <"C2.X", 2>; +def C2_Y : R600Reg <"C2.Y", 2>; +def C2_Z : R600Reg <"C2.Z", 2>; +def C2_W : R600Reg <"C2.W", 2>; +def C3_X : R600Reg <"C3.X", 3>; +def C3_Y : R600Reg <"C3.Y", 3>; +def C3_Z : R600Reg <"C3.Z", 3>; +def C3_W : R600Reg <"C3.W", 3>; +def C4_X : R600Reg <"C4.X", 4>; +def C4_Y : R600Reg <"C4.Y", 4>; +def C4_Z : R600Reg <"C4.Z", 4>; +def C4_W : R600Reg <"C4.W", 4>; +def C5_X : R600Reg <"C5.X", 5>; +def C5_Y : R600Reg <"C5.Y", 5>; +def C5_Z : R600Reg <"C5.Z", 5>; +def C5_W : R600Reg <"C5.W", 5>; +def C6_X : R600Reg <"C6.X", 6>; +def C6_Y : R600Reg <"C6.Y", 6>; +def C6_Z : R600Reg <"C6.Z", 6>; +def C6_W : R600Reg <"C6.W", 6>; +def C7_X : R600Reg <"C7.X", 7>; +def C7_Y : R600Reg <"C7.Y", 7>; +def C7_Z : R600Reg <"C7.Z", 7>; +def C7_W : R600Reg <"C7.W", 7>; +def C8_X : R600Reg <"C8.X", 8>; +def C8_Y : R600Reg <"C8.Y", 8>; +def C8_Z : R600Reg <"C8.Z", 8>; +def C8_W : R600Reg <"C8.W", 8>; +def C9_X : R600Reg <"C9.X", 9>; +def C9_Y : R600Reg <"C9.Y", 9>; +def C9_Z : R600Reg <"C9.Z", 9>; +def C9_W : R600Reg <"C9.W", 9>; +def C10_X : R600Reg <"C10.X", 10>; +def C10_Y : R600Reg <"C10.Y", 10>; +def C10_Z : R600Reg <"C10.Z", 10>; +def C10_W : R600Reg <"C10.W", 10>; +def C11_X : R600Reg <"C11.X", 11>; +def C11_Y : R600Reg <"C11.Y", 11>; +def C11_Z : R600Reg <"C11.Z", 11>; +def C11_W : R600Reg <"C11.W", 11>; +def C12_X : R600Reg <"C12.X", 12>; +def C12_Y : R600Reg <"C12.Y", 12>; +def C12_Z : R600Reg <"C12.Z", 12>; +def C12_W : R600Reg <"C12.W", 12>; +def C13_X : R600Reg <"C13.X", 13>; +def C13_Y : R600Reg <"C13.Y", 13>; +def C13_Z : R600Reg <"C13.Z", 13>; +def C13_W : R600Reg <"C13.W", 13>; +def C14_X : R600Reg <"C14.X", 14>; +def C14_Y : R600Reg <"C14.Y", 14>; +def C14_Z : R600Reg <"C14.Z", 14>; +def C14_W : R600Reg <"C14.W", 14>; +def C15_X : R600Reg <"C15.X", 15>; +def C15_Y : R600Reg <"C15.Y", 15>; +def C15_Z : R600Reg <"C15.Z", 15>; +def C15_W : R600Reg <"C15.W", 15>; +def C16_X : R600Reg <"C16.X", 16>; +def C16_Y : R600Reg <"C16.Y", 16>; +def C16_Z : R600Reg <"C16.Z", 16>; +def C16_W : R600Reg <"C16.W", 16>; +def C17_X : R600Reg <"C17.X", 17>; +def C17_Y : R600Reg <"C17.Y", 17>; +def C17_Z : R600Reg <"C17.Z", 17>; +def C17_W : R600Reg <"C17.W", 17>; +def C18_X : R600Reg <"C18.X", 18>; +def C18_Y : R600Reg <"C18.Y", 18>; +def C18_Z : R600Reg <"C18.Z", 18>; +def C18_W : R600Reg <"C18.W", 18>; +def C19_X : R600Reg <"C19.X", 19>; +def C19_Y : R600Reg <"C19.Y", 19>; +def C19_Z : R600Reg <"C19.Z", 19>; +def C19_W : R600Reg <"C19.W", 19>; +def C20_X : R600Reg <"C20.X", 20>; +def C20_Y : R600Reg <"C20.Y", 20>; +def C20_Z : R600Reg <"C20.Z", 20>; +def C20_W : R600Reg <"C20.W", 20>; +def C21_X : R600Reg <"C21.X", 21>; +def C21_Y : R600Reg <"C21.Y", 21>; +def C21_Z : R600Reg <"C21.Z", 21>; +def C21_W : R600Reg <"C21.W", 21>; +def C22_X : R600Reg <"C22.X", 22>; +def C22_Y : R600Reg <"C22.Y", 22>; +def C22_Z : R600Reg <"C22.Z", 22>; +def C22_W : R600Reg <"C22.W", 22>; +def C23_X : R600Reg <"C23.X", 23>; +def C23_Y : R600Reg <"C23.Y", 23>; +def C23_Z : R600Reg <"C23.Z", 23>; +def C23_W : R600Reg <"C23.W", 23>; +def C24_X : R600Reg <"C24.X", 24>; +def C24_Y : R600Reg <"C24.Y", 24>; +def C24_Z : R600Reg <"C24.Z", 24>; +def C24_W : R600Reg <"C24.W", 24>; +def C25_X : R600Reg <"C25.X", 25>; +def C25_Y : R600Reg <"C25.Y", 25>; +def C25_Z : R600Reg <"C25.Z", 25>; +def C25_W : R600Reg <"C25.W", 25>; +def C26_X : R600Reg <"C26.X", 26>; +def C26_Y : R600Reg <"C26.Y", 26>; +def C26_Z : R600Reg <"C26.Z", 26>; +def C26_W : R600Reg <"C26.W", 26>; +def C27_X : R600Reg <"C27.X", 27>; +def C27_Y : R600Reg <"C27.Y", 27>; +def C27_Z : R600Reg <"C27.Z", 27>; +def C27_W : R600Reg <"C27.W", 27>; +def C28_X : R600Reg <"C28.X", 28>; +def C28_Y : R600Reg <"C28.Y", 28>; +def C28_Z : R600Reg <"C28.Z", 28>; +def C28_W : R600Reg <"C28.W", 28>; +def C29_X : R600Reg <"C29.X", 29>; +def C29_Y : R600Reg <"C29.Y", 29>; +def C29_Z : R600Reg <"C29.Z", 29>; +def C29_W : R600Reg <"C29.W", 29>; +def C30_X : R600Reg <"C30.X", 30>; +def C30_Y : R600Reg <"C30.Y", 30>; +def C30_Z : R600Reg <"C30.Z", 30>; +def C30_W : R600Reg <"C30.W", 30>; +def C31_X : R600Reg <"C31.X", 31>; +def C31_Y : R600Reg <"C31.Y", 31>; +def C31_Z : R600Reg <"C31.Z", 31>; +def C31_W : R600Reg <"C31.W", 31>; +def C32_X : R600Reg <"C32.X", 32>; +def C32_Y : R600Reg <"C32.Y", 32>; +def C32_Z : R600Reg <"C32.Z", 32>; +def C32_W : R600Reg <"C32.W", 32>; +def C33_X : R600Reg <"C33.X", 33>; +def C33_Y : R600Reg <"C33.Y", 33>; +def C33_Z : R600Reg <"C33.Z", 33>; +def C33_W : R600Reg <"C33.W", 33>; +def C34_X : R600Reg <"C34.X", 34>; +def C34_Y : R600Reg <"C34.Y", 34>; +def C34_Z : R600Reg <"C34.Z", 34>; +def C34_W : R600Reg <"C34.W", 34>; +def C35_X : R600Reg <"C35.X", 35>; +def C35_Y : R600Reg <"C35.Y", 35>; +def C35_Z : R600Reg <"C35.Z", 35>; +def C35_W : R600Reg <"C35.W", 35>; +def C36_X : R600Reg <"C36.X", 36>; +def C36_Y : R600Reg <"C36.Y", 36>; +def C36_Z : R600Reg <"C36.Z", 36>; +def C36_W : R600Reg <"C36.W", 36>; +def C37_X : R600Reg <"C37.X", 37>; +def C37_Y : R600Reg <"C37.Y", 37>; +def C37_Z : R600Reg <"C37.Z", 37>; +def C37_W : R600Reg <"C37.W", 37>; +def C38_X : R600Reg <"C38.X", 38>; +def C38_Y : R600Reg <"C38.Y", 38>; +def C38_Z : R600Reg <"C38.Z", 38>; +def C38_W : R600Reg <"C38.W", 38>; +def C39_X : R600Reg <"C39.X", 39>; +def C39_Y : R600Reg <"C39.Y", 39>; +def C39_Z : R600Reg <"C39.Z", 39>; +def C39_W : R600Reg <"C39.W", 39>; +def C40_X : R600Reg <"C40.X", 40>; +def C40_Y : R600Reg <"C40.Y", 40>; +def C40_Z : R600Reg <"C40.Z", 40>; +def C40_W : R600Reg <"C40.W", 40>; +def C41_X : R600Reg <"C41.X", 41>; +def C41_Y : R600Reg <"C41.Y", 41>; +def C41_Z : R600Reg <"C41.Z", 41>; +def C41_W : R600Reg <"C41.W", 41>; +def C42_X : R600Reg <"C42.X", 42>; +def C42_Y : R600Reg <"C42.Y", 42>; +def C42_Z : R600Reg <"C42.Z", 42>; +def C42_W : R600Reg <"C42.W", 42>; +def C43_X : R600Reg <"C43.X", 43>; +def C43_Y : R600Reg <"C43.Y", 43>; +def C43_Z : R600Reg <"C43.Z", 43>; +def C43_W : R600Reg <"C43.W", 43>; +def C44_X : R600Reg <"C44.X", 44>; +def C44_Y : R600Reg <"C44.Y", 44>; +def C44_Z : R600Reg <"C44.Z", 44>; +def C44_W : R600Reg <"C44.W", 44>; +def C45_X : R600Reg <"C45.X", 45>; +def C45_Y : R600Reg <"C45.Y", 45>; +def C45_Z : R600Reg <"C45.Z", 45>; +def C45_W : R600Reg <"C45.W", 45>; +def C46_X : R600Reg <"C46.X", 46>; +def C46_Y : R600Reg <"C46.Y", 46>; +def C46_Z : R600Reg <"C46.Z", 46>; +def C46_W : R600Reg <"C46.W", 46>; +def C47_X : R600Reg <"C47.X", 47>; +def C47_Y : R600Reg <"C47.Y", 47>; +def C47_Z : R600Reg <"C47.Z", 47>; +def C47_W : R600Reg <"C47.W", 47>; +def C48_X : R600Reg <"C48.X", 48>; +def C48_Y : R600Reg <"C48.Y", 48>; +def C48_Z : R600Reg <"C48.Z", 48>; +def C48_W : R600Reg <"C48.W", 48>; +def C49_X : R600Reg <"C49.X", 49>; +def C49_Y : R600Reg <"C49.Y", 49>; +def C49_Z : R600Reg <"C49.Z", 49>; +def C49_W : R600Reg <"C49.W", 49>; +def C50_X : R600Reg <"C50.X", 50>; +def C50_Y : R600Reg <"C50.Y", 50>; +def C50_Z : R600Reg <"C50.Z", 50>; +def C50_W : R600Reg <"C50.W", 50>; +def C51_X : R600Reg <"C51.X", 51>; +def C51_Y : R600Reg <"C51.Y", 51>; +def C51_Z : R600Reg <"C51.Z", 51>; +def C51_W : R600Reg <"C51.W", 51>; +def C52_X : R600Reg <"C52.X", 52>; +def C52_Y : R600Reg <"C52.Y", 52>; +def C52_Z : R600Reg <"C52.Z", 52>; +def C52_W : R600Reg <"C52.W", 52>; +def C53_X : R600Reg <"C53.X", 53>; +def C53_Y : R600Reg <"C53.Y", 53>; +def C53_Z : R600Reg <"C53.Z", 53>; +def C53_W : R600Reg <"C53.W", 53>; +def C54_X : R600Reg <"C54.X", 54>; +def C54_Y : R600Reg <"C54.Y", 54>; +def C54_Z : R600Reg <"C54.Z", 54>; +def C54_W : R600Reg <"C54.W", 54>; +def C55_X : R600Reg <"C55.X", 55>; +def C55_Y : R600Reg <"C55.Y", 55>; +def C55_Z : R600Reg <"C55.Z", 55>; +def C55_W : R600Reg <"C55.W", 55>; +def C56_X : R600Reg <"C56.X", 56>; +def C56_Y : R600Reg <"C56.Y", 56>; +def C56_Z : R600Reg <"C56.Z", 56>; +def C56_W : R600Reg <"C56.W", 56>; +def C57_X : R600Reg <"C57.X", 57>; +def C57_Y : R600Reg <"C57.Y", 57>; +def C57_Z : R600Reg <"C57.Z", 57>; +def C57_W : R600Reg <"C57.W", 57>; +def C58_X : R600Reg <"C58.X", 58>; +def C58_Y : R600Reg <"C58.Y", 58>; +def C58_Z : R600Reg <"C58.Z", 58>; +def C58_W : R600Reg <"C58.W", 58>; +def C59_X : R600Reg <"C59.X", 59>; +def C59_Y : R600Reg <"C59.Y", 59>; +def C59_Z : R600Reg <"C59.Z", 59>; +def C59_W : R600Reg <"C59.W", 59>; +def C60_X : R600Reg <"C60.X", 60>; +def C60_Y : R600Reg <"C60.Y", 60>; +def C60_Z : R600Reg <"C60.Z", 60>; +def C60_W : R600Reg <"C60.W", 60>; +def C61_X : R600Reg <"C61.X", 61>; +def C61_Y : R600Reg <"C61.Y", 61>; +def C61_Z : R600Reg <"C61.Z", 61>; +def C61_W : R600Reg <"C61.W", 61>; +def C62_X : R600Reg <"C62.X", 62>; +def C62_Y : R600Reg <"C62.Y", 62>; +def C62_Z : R600Reg <"C62.Z", 62>; +def C62_W : R600Reg <"C62.W", 62>; +def C63_X : R600Reg <"C63.X", 63>; +def C63_Y : R600Reg <"C63.Y", 63>; +def C63_Z : R600Reg <"C63.Z", 63>; +def C63_W : R600Reg <"C63.W", 63>; +def C64_X : R600Reg <"C64.X", 64>; +def C64_Y : R600Reg <"C64.Y", 64>; +def C64_Z : R600Reg <"C64.Z", 64>; +def C64_W : R600Reg <"C64.W", 64>; +def C65_X : R600Reg <"C65.X", 65>; +def C65_Y : R600Reg <"C65.Y", 65>; +def C65_Z : R600Reg <"C65.Z", 65>; +def C65_W : R600Reg <"C65.W", 65>; +def C66_X : R600Reg <"C66.X", 66>; +def C66_Y : R600Reg <"C66.Y", 66>; +def C66_Z : R600Reg <"C66.Z", 66>; +def C66_W : R600Reg <"C66.W", 66>; +def C67_X : R600Reg <"C67.X", 67>; +def C67_Y : R600Reg <"C67.Y", 67>; +def C67_Z : R600Reg <"C67.Z", 67>; +def C67_W : R600Reg <"C67.W", 67>; +def C68_X : R600Reg <"C68.X", 68>; +def C68_Y : R600Reg <"C68.Y", 68>; +def C68_Z : R600Reg <"C68.Z", 68>; +def C68_W : R600Reg <"C68.W", 68>; +def C69_X : R600Reg <"C69.X", 69>; +def C69_Y : R600Reg <"C69.Y", 69>; +def C69_Z : R600Reg <"C69.Z", 69>; +def C69_W : R600Reg <"C69.W", 69>; +def C70_X : R600Reg <"C70.X", 70>; +def C70_Y : R600Reg <"C70.Y", 70>; +def C70_Z : R600Reg <"C70.Z", 70>; +def C70_W : R600Reg <"C70.W", 70>; +def C71_X : R600Reg <"C71.X", 71>; +def C71_Y : R600Reg <"C71.Y", 71>; +def C71_Z : R600Reg <"C71.Z", 71>; +def C71_W : R600Reg <"C71.W", 71>; +def C72_X : R600Reg <"C72.X", 72>; +def C72_Y : R600Reg <"C72.Y", 72>; +def C72_Z : R600Reg <"C72.Z", 72>; +def C72_W : R600Reg <"C72.W", 72>; +def C73_X : R600Reg <"C73.X", 73>; +def C73_Y : R600Reg <"C73.Y", 73>; +def C73_Z : R600Reg <"C73.Z", 73>; +def C73_W : R600Reg <"C73.W", 73>; +def C74_X : R600Reg <"C74.X", 74>; +def C74_Y : R600Reg <"C74.Y", 74>; +def C74_Z : R600Reg <"C74.Z", 74>; +def C74_W : R600Reg <"C74.W", 74>; +def C75_X : R600Reg <"C75.X", 75>; +def C75_Y : R600Reg <"C75.Y", 75>; +def C75_Z : R600Reg <"C75.Z", 75>; +def C75_W : R600Reg <"C75.W", 75>; +def C76_X : R600Reg <"C76.X", 76>; +def C76_Y : R600Reg <"C76.Y", 76>; +def C76_Z : R600Reg <"C76.Z", 76>; +def C76_W : R600Reg <"C76.W", 76>; +def C77_X : R600Reg <"C77.X", 77>; +def C77_Y : R600Reg <"C77.Y", 77>; +def C77_Z : R600Reg <"C77.Z", 77>; +def C77_W : R600Reg <"C77.W", 77>; +def C78_X : R600Reg <"C78.X", 78>; +def C78_Y : R600Reg <"C78.Y", 78>; +def C78_Z : R600Reg <"C78.Z", 78>; +def C78_W : R600Reg <"C78.W", 78>; +def C79_X : R600Reg <"C79.X", 79>; +def C79_Y : R600Reg <"C79.Y", 79>; +def C79_Z : R600Reg <"C79.Z", 79>; +def C79_W : R600Reg <"C79.W", 79>; +def C80_X : R600Reg <"C80.X", 80>; +def C80_Y : R600Reg <"C80.Y", 80>; +def C80_Z : R600Reg <"C80.Z", 80>; +def C80_W : R600Reg <"C80.W", 80>; +def C81_X : R600Reg <"C81.X", 81>; +def C81_Y : R600Reg <"C81.Y", 81>; +def C81_Z : R600Reg <"C81.Z", 81>; +def C81_W : R600Reg <"C81.W", 81>; +def C82_X : R600Reg <"C82.X", 82>; +def C82_Y : R600Reg <"C82.Y", 82>; +def C82_Z : R600Reg <"C82.Z", 82>; +def C82_W : R600Reg <"C82.W", 82>; +def C83_X : R600Reg <"C83.X", 83>; +def C83_Y : R600Reg <"C83.Y", 83>; +def C83_Z : R600Reg <"C83.Z", 83>; +def C83_W : R600Reg <"C83.W", 83>; +def C84_X : R600Reg <"C84.X", 84>; +def C84_Y : R600Reg <"C84.Y", 84>; +def C84_Z : R600Reg <"C84.Z", 84>; +def C84_W : R600Reg <"C84.W", 84>; +def C85_X : R600Reg <"C85.X", 85>; +def C85_Y : R600Reg <"C85.Y", 85>; +def C85_Z : R600Reg <"C85.Z", 85>; +def C85_W : R600Reg <"C85.W", 85>; +def C86_X : R600Reg <"C86.X", 86>; +def C86_Y : R600Reg <"C86.Y", 86>; +def C86_Z : R600Reg <"C86.Z", 86>; +def C86_W : R600Reg <"C86.W", 86>; +def C87_X : R600Reg <"C87.X", 87>; +def C87_Y : R600Reg <"C87.Y", 87>; +def C87_Z : R600Reg <"C87.Z", 87>; +def C87_W : R600Reg <"C87.W", 87>; +def C88_X : R600Reg <"C88.X", 88>; +def C88_Y : R600Reg <"C88.Y", 88>; +def C88_Z : R600Reg <"C88.Z", 88>; +def C88_W : R600Reg <"C88.W", 88>; +def C89_X : R600Reg <"C89.X", 89>; +def C89_Y : R600Reg <"C89.Y", 89>; +def C89_Z : R600Reg <"C89.Z", 89>; +def C89_W : R600Reg <"C89.W", 89>; +def C90_X : R600Reg <"C90.X", 90>; +def C90_Y : R600Reg <"C90.Y", 90>; +def C90_Z : R600Reg <"C90.Z", 90>; +def C90_W : R600Reg <"C90.W", 90>; +def C91_X : R600Reg <"C91.X", 91>; +def C91_Y : R600Reg <"C91.Y", 91>; +def C91_Z : R600Reg <"C91.Z", 91>; +def C91_W : R600Reg <"C91.W", 91>; +def C92_X : R600Reg <"C92.X", 92>; +def C92_Y : R600Reg <"C92.Y", 92>; +def C92_Z : R600Reg <"C92.Z", 92>; +def C92_W : R600Reg <"C92.W", 92>; +def C93_X : R600Reg <"C93.X", 93>; +def C93_Y : R600Reg <"C93.Y", 93>; +def C93_Z : R600Reg <"C93.Z", 93>; +def C93_W : R600Reg <"C93.W", 93>; +def C94_X : R600Reg <"C94.X", 94>; +def C94_Y : R600Reg <"C94.Y", 94>; +def C94_Z : R600Reg <"C94.Z", 94>; +def C94_W : R600Reg <"C94.W", 94>; +def C95_X : R600Reg <"C95.X", 95>; +def C95_Y : R600Reg <"C95.Y", 95>; +def C95_Z : R600Reg <"C95.Z", 95>; +def C95_W : R600Reg <"C95.W", 95>; +def C96_X : R600Reg <"C96.X", 96>; +def C96_Y : R600Reg <"C96.Y", 96>; +def C96_Z : R600Reg <"C96.Z", 96>; +def C96_W : R600Reg <"C96.W", 96>; +def C97_X : R600Reg <"C97.X", 97>; +def C97_Y : R600Reg <"C97.Y", 97>; +def C97_Z : R600Reg <"C97.Z", 97>; +def C97_W : R600Reg <"C97.W", 97>; +def C98_X : R600Reg <"C98.X", 98>; +def C98_Y : R600Reg <"C98.Y", 98>; +def C98_Z : R600Reg <"C98.Z", 98>; +def C98_W : R600Reg <"C98.W", 98>; +def C99_X : R600Reg <"C99.X", 99>; +def C99_Y : R600Reg <"C99.Y", 99>; +def C99_Z : R600Reg <"C99.Z", 99>; +def C99_W : R600Reg <"C99.W", 99>; +def C100_X : R600Reg <"C100.X", 100>; +def C100_Y : R600Reg <"C100.Y", 100>; +def C100_Z : R600Reg <"C100.Z", 100>; +def C100_W : R600Reg <"C100.W", 100>; +def C101_X : R600Reg <"C101.X", 101>; +def C101_Y : R600Reg <"C101.Y", 101>; +def C101_Z : R600Reg <"C101.Z", 101>; +def C101_W : R600Reg <"C101.W", 101>; +def C102_X : R600Reg <"C102.X", 102>; +def C102_Y : R600Reg <"C102.Y", 102>; +def C102_Z : R600Reg <"C102.Z", 102>; +def C102_W : R600Reg <"C102.W", 102>; +def C103_X : R600Reg <"C103.X", 103>; +def C103_Y : R600Reg <"C103.Y", 103>; +def C103_Z : R600Reg <"C103.Z", 103>; +def C103_W : R600Reg <"C103.W", 103>; +def C104_X : R600Reg <"C104.X", 104>; +def C104_Y : R600Reg <"C104.Y", 104>; +def C104_Z : R600Reg <"C104.Z", 104>; +def C104_W : R600Reg <"C104.W", 104>; +def C105_X : R600Reg <"C105.X", 105>; +def C105_Y : R600Reg <"C105.Y", 105>; +def C105_Z : R600Reg <"C105.Z", 105>; +def C105_W : R600Reg <"C105.W", 105>; +def C106_X : R600Reg <"C106.X", 106>; +def C106_Y : R600Reg <"C106.Y", 106>; +def C106_Z : R600Reg <"C106.Z", 106>; +def C106_W : R600Reg <"C106.W", 106>; +def C107_X : R600Reg <"C107.X", 107>; +def C107_Y : R600Reg <"C107.Y", 107>; +def C107_Z : R600Reg <"C107.Z", 107>; +def C107_W : R600Reg <"C107.W", 107>; +def C108_X : R600Reg <"C108.X", 108>; +def C108_Y : R600Reg <"C108.Y", 108>; +def C108_Z : R600Reg <"C108.Z", 108>; +def C108_W : R600Reg <"C108.W", 108>; +def C109_X : R600Reg <"C109.X", 109>; +def C109_Y : R600Reg <"C109.Y", 109>; +def C109_Z : R600Reg <"C109.Z", 109>; +def C109_W : R600Reg <"C109.W", 109>; +def C110_X : R600Reg <"C110.X", 110>; +def C110_Y : R600Reg <"C110.Y", 110>; +def C110_Z : R600Reg <"C110.Z", 110>; +def C110_W : R600Reg <"C110.W", 110>; +def C111_X : R600Reg <"C111.X", 111>; +def C111_Y : R600Reg <"C111.Y", 111>; +def C111_Z : R600Reg <"C111.Z", 111>; +def C111_W : R600Reg <"C111.W", 111>; +def C112_X : R600Reg <"C112.X", 112>; +def C112_Y : R600Reg <"C112.Y", 112>; +def C112_Z : R600Reg <"C112.Z", 112>; +def C112_W : R600Reg <"C112.W", 112>; +def C113_X : R600Reg <"C113.X", 113>; +def C113_Y : R600Reg <"C113.Y", 113>; +def C113_Z : R600Reg <"C113.Z", 113>; +def C113_W : R600Reg <"C113.W", 113>; +def C114_X : R600Reg <"C114.X", 114>; +def C114_Y : R600Reg <"C114.Y", 114>; +def C114_Z : R600Reg <"C114.Z", 114>; +def C114_W : R600Reg <"C114.W", 114>; +def C115_X : R600Reg <"C115.X", 115>; +def C115_Y : R600Reg <"C115.Y", 115>; +def C115_Z : R600Reg <"C115.Z", 115>; +def C115_W : R600Reg <"C115.W", 115>; +def C116_X : R600Reg <"C116.X", 116>; +def C116_Y : R600Reg <"C116.Y", 116>; +def C116_Z : R600Reg <"C116.Z", 116>; +def C116_W : R600Reg <"C116.W", 116>; +def C117_X : R600Reg <"C117.X", 117>; +def C117_Y : R600Reg <"C117.Y", 117>; +def C117_Z : R600Reg <"C117.Z", 117>; +def C117_W : R600Reg <"C117.W", 117>; +def C118_X : R600Reg <"C118.X", 118>; +def C118_Y : R600Reg <"C118.Y", 118>; +def C118_Z : R600Reg <"C118.Z", 118>; +def C118_W : R600Reg <"C118.W", 118>; +def C119_X : R600Reg <"C119.X", 119>; +def C119_Y : R600Reg <"C119.Y", 119>; +def C119_Z : R600Reg <"C119.Z", 119>; +def C119_W : R600Reg <"C119.W", 119>; +def C120_X : R600Reg <"C120.X", 120>; +def C120_Y : R600Reg <"C120.Y", 120>; +def C120_Z : R600Reg <"C120.Z", 120>; +def C120_W : R600Reg <"C120.W", 120>; +def C121_X : R600Reg <"C121.X", 121>; +def C121_Y : R600Reg <"C121.Y", 121>; +def C121_Z : R600Reg <"C121.Z", 121>; +def C121_W : R600Reg <"C121.W", 121>; +def C122_X : R600Reg <"C122.X", 122>; +def C122_Y : R600Reg <"C122.Y", 122>; +def C122_Z : R600Reg <"C122.Z", 122>; +def C122_W : R600Reg <"C122.W", 122>; +def C123_X : R600Reg <"C123.X", 123>; +def C123_Y : R600Reg <"C123.Y", 123>; +def C123_Z : R600Reg <"C123.Z", 123>; +def C123_W : R600Reg <"C123.W", 123>; +def C124_X : R600Reg <"C124.X", 124>; +def C124_Y : R600Reg <"C124.Y", 124>; +def C124_Z : R600Reg <"C124.Z", 124>; +def C124_W : R600Reg <"C124.W", 124>; +def C125_X : R600Reg <"C125.X", 125>; +def C125_Y : R600Reg <"C125.Y", 125>; +def C125_Z : R600Reg <"C125.Z", 125>; +def C125_W : R600Reg <"C125.W", 125>; +def C126_X : R600Reg <"C126.X", 126>; +def C126_Y : R600Reg <"C126.Y", 126>; +def C126_Z : R600Reg <"C126.Z", 126>; +def C126_W : R600Reg <"C126.W", 126>; +def C127_X : R600Reg <"C127.X", 127>; +def C127_Y : R600Reg <"C127.Y", 127>; +def C127_Z : R600Reg <"C127.Z", 127>; +def C127_W : R600Reg <"C127.W", 127>; +def C128_X : R600Reg <"C128.X", 128>; +def C128_Y : R600Reg <"C128.Y", 128>; +def C128_Z : R600Reg <"C128.Z", 128>; +def C128_W : R600Reg <"C128.W", 128>; +def C129_X : R600Reg <"C129.X", 129>; +def C129_Y : R600Reg <"C129.Y", 129>; +def C129_Z : R600Reg <"C129.Z", 129>; +def C129_W : R600Reg <"C129.W", 129>; +def C130_X : R600Reg <"C130.X", 130>; +def C130_Y : R600Reg <"C130.Y", 130>; +def C130_Z : R600Reg <"C130.Z", 130>; +def C130_W : R600Reg <"C130.W", 130>; +def C131_X : R600Reg <"C131.X", 131>; +def C131_Y : R600Reg <"C131.Y", 131>; +def C131_Z : R600Reg <"C131.Z", 131>; +def C131_W : R600Reg <"C131.W", 131>; +def C132_X : R600Reg <"C132.X", 132>; +def C132_Y : R600Reg <"C132.Y", 132>; +def C132_Z : R600Reg <"C132.Z", 132>; +def C132_W : R600Reg <"C132.W", 132>; +def C133_X : R600Reg <"C133.X", 133>; +def C133_Y : R600Reg <"C133.Y", 133>; +def C133_Z : R600Reg <"C133.Z", 133>; +def C133_W : R600Reg <"C133.W", 133>; +def C134_X : R600Reg <"C134.X", 134>; +def C134_Y : R600Reg <"C134.Y", 134>; +def C134_Z : R600Reg <"C134.Z", 134>; +def C134_W : R600Reg <"C134.W", 134>; +def C135_X : R600Reg <"C135.X", 135>; +def C135_Y : R600Reg <"C135.Y", 135>; +def C135_Z : R600Reg <"C135.Z", 135>; +def C135_W : R600Reg <"C135.W", 135>; +def C136_X : R600Reg <"C136.X", 136>; +def C136_Y : R600Reg <"C136.Y", 136>; +def C136_Z : R600Reg <"C136.Z", 136>; +def C136_W : R600Reg <"C136.W", 136>; +def C137_X : R600Reg <"C137.X", 137>; +def C137_Y : R600Reg <"C137.Y", 137>; +def C137_Z : R600Reg <"C137.Z", 137>; +def C137_W : R600Reg <"C137.W", 137>; +def C138_X : R600Reg <"C138.X", 138>; +def C138_Y : R600Reg <"C138.Y", 138>; +def C138_Z : R600Reg <"C138.Z", 138>; +def C138_W : R600Reg <"C138.W", 138>; +def C139_X : R600Reg <"C139.X", 139>; +def C139_Y : R600Reg <"C139.Y", 139>; +def C139_Z : R600Reg <"C139.Z", 139>; +def C139_W : R600Reg <"C139.W", 139>; +def C140_X : R600Reg <"C140.X", 140>; +def C140_Y : R600Reg <"C140.Y", 140>; +def C140_Z : R600Reg <"C140.Z", 140>; +def C140_W : R600Reg <"C140.W", 140>; +def C141_X : R600Reg <"C141.X", 141>; +def C141_Y : R600Reg <"C141.Y", 141>; +def C141_Z : R600Reg <"C141.Z", 141>; +def C141_W : R600Reg <"C141.W", 141>; +def C142_X : R600Reg <"C142.X", 142>; +def C142_Y : R600Reg <"C142.Y", 142>; +def C142_Z : R600Reg <"C142.Z", 142>; +def C142_W : R600Reg <"C142.W", 142>; +def C143_X : R600Reg <"C143.X", 143>; +def C143_Y : R600Reg <"C143.Y", 143>; +def C143_Z : R600Reg <"C143.Z", 143>; +def C143_W : R600Reg <"C143.W", 143>; +def C144_X : R600Reg <"C144.X", 144>; +def C144_Y : R600Reg <"C144.Y", 144>; +def C144_Z : R600Reg <"C144.Z", 144>; +def C144_W : R600Reg <"C144.W", 144>; +def C145_X : R600Reg <"C145.X", 145>; +def C145_Y : R600Reg <"C145.Y", 145>; +def C145_Z : R600Reg <"C145.Z", 145>; +def C145_W : R600Reg <"C145.W", 145>; +def C146_X : R600Reg <"C146.X", 146>; +def C146_Y : R600Reg <"C146.Y", 146>; +def C146_Z : R600Reg <"C146.Z", 146>; +def C146_W : R600Reg <"C146.W", 146>; +def C147_X : R600Reg <"C147.X", 147>; +def C147_Y : R600Reg <"C147.Y", 147>; +def C147_Z : R600Reg <"C147.Z", 147>; +def C147_W : R600Reg <"C147.W", 147>; +def C148_X : R600Reg <"C148.X", 148>; +def C148_Y : R600Reg <"C148.Y", 148>; +def C148_Z : R600Reg <"C148.Z", 148>; +def C148_W : R600Reg <"C148.W", 148>; +def C149_X : R600Reg <"C149.X", 149>; +def C149_Y : R600Reg <"C149.Y", 149>; +def C149_Z : R600Reg <"C149.Z", 149>; +def C149_W : R600Reg <"C149.W", 149>; +def C150_X : R600Reg <"C150.X", 150>; +def C150_Y : R600Reg <"C150.Y", 150>; +def C150_Z : R600Reg <"C150.Z", 150>; +def C150_W : R600Reg <"C150.W", 150>; +def C151_X : R600Reg <"C151.X", 151>; +def C151_Y : R600Reg <"C151.Y", 151>; +def C151_Z : R600Reg <"C151.Z", 151>; +def C151_W : R600Reg <"C151.W", 151>; +def C152_X : R600Reg <"C152.X", 152>; +def C152_Y : R600Reg <"C152.Y", 152>; +def C152_Z : R600Reg <"C152.Z", 152>; +def C152_W : R600Reg <"C152.W", 152>; +def C153_X : R600Reg <"C153.X", 153>; +def C153_Y : R600Reg <"C153.Y", 153>; +def C153_Z : R600Reg <"C153.Z", 153>; +def C153_W : R600Reg <"C153.W", 153>; +def C154_X : R600Reg <"C154.X", 154>; +def C154_Y : R600Reg <"C154.Y", 154>; +def C154_Z : R600Reg <"C154.Z", 154>; +def C154_W : R600Reg <"C154.W", 154>; +def C155_X : R600Reg <"C155.X", 155>; +def C155_Y : R600Reg <"C155.Y", 155>; +def C155_Z : R600Reg <"C155.Z", 155>; +def C155_W : R600Reg <"C155.W", 155>; +def C156_X : R600Reg <"C156.X", 156>; +def C156_Y : R600Reg <"C156.Y", 156>; +def C156_Z : R600Reg <"C156.Z", 156>; +def C156_W : R600Reg <"C156.W", 156>; +def C157_X : R600Reg <"C157.X", 157>; +def C157_Y : R600Reg <"C157.Y", 157>; +def C157_Z : R600Reg <"C157.Z", 157>; +def C157_W : R600Reg <"C157.W", 157>; +def C158_X : R600Reg <"C158.X", 158>; +def C158_Y : R600Reg <"C158.Y", 158>; +def C158_Z : R600Reg <"C158.Z", 158>; +def C158_W : R600Reg <"C158.W", 158>; +def C159_X : R600Reg <"C159.X", 159>; +def C159_Y : R600Reg <"C159.Y", 159>; +def C159_Z : R600Reg <"C159.Z", 159>; +def C159_W : R600Reg <"C159.W", 159>; +def C160_X : R600Reg <"C160.X", 160>; +def C160_Y : R600Reg <"C160.Y", 160>; +def C160_Z : R600Reg <"C160.Z", 160>; +def C160_W : R600Reg <"C160.W", 160>; +def C161_X : R600Reg <"C161.X", 161>; +def C161_Y : R600Reg <"C161.Y", 161>; +def C161_Z : R600Reg <"C161.Z", 161>; +def C161_W : R600Reg <"C161.W", 161>; +def C162_X : R600Reg <"C162.X", 162>; +def C162_Y : R600Reg <"C162.Y", 162>; +def C162_Z : R600Reg <"C162.Z", 162>; +def C162_W : R600Reg <"C162.W", 162>; +def C163_X : R600Reg <"C163.X", 163>; +def C163_Y : R600Reg <"C163.Y", 163>; +def C163_Z : R600Reg <"C163.Z", 163>; +def C163_W : R600Reg <"C163.W", 163>; +def C164_X : R600Reg <"C164.X", 164>; +def C164_Y : R600Reg <"C164.Y", 164>; +def C164_Z : R600Reg <"C164.Z", 164>; +def C164_W : R600Reg <"C164.W", 164>; +def C165_X : R600Reg <"C165.X", 165>; +def C165_Y : R600Reg <"C165.Y", 165>; +def C165_Z : R600Reg <"C165.Z", 165>; +def C165_W : R600Reg <"C165.W", 165>; +def C166_X : R600Reg <"C166.X", 166>; +def C166_Y : R600Reg <"C166.Y", 166>; +def C166_Z : R600Reg <"C166.Z", 166>; +def C166_W : R600Reg <"C166.W", 166>; +def C167_X : R600Reg <"C167.X", 167>; +def C167_Y : R600Reg <"C167.Y", 167>; +def C167_Z : R600Reg <"C167.Z", 167>; +def C167_W : R600Reg <"C167.W", 167>; +def C168_X : R600Reg <"C168.X", 168>; +def C168_Y : R600Reg <"C168.Y", 168>; +def C168_Z : R600Reg <"C168.Z", 168>; +def C168_W : R600Reg <"C168.W", 168>; +def C169_X : R600Reg <"C169.X", 169>; +def C169_Y : R600Reg <"C169.Y", 169>; +def C169_Z : R600Reg <"C169.Z", 169>; +def C169_W : R600Reg <"C169.W", 169>; +def C170_X : R600Reg <"C170.X", 170>; +def C170_Y : R600Reg <"C170.Y", 170>; +def C170_Z : R600Reg <"C170.Z", 170>; +def C170_W : R600Reg <"C170.W", 170>; +def C171_X : R600Reg <"C171.X", 171>; +def C171_Y : R600Reg <"C171.Y", 171>; +def C171_Z : R600Reg <"C171.Z", 171>; +def C171_W : R600Reg <"C171.W", 171>; +def C172_X : R600Reg <"C172.X", 172>; +def C172_Y : R600Reg <"C172.Y", 172>; +def C172_Z : R600Reg <"C172.Z", 172>; +def C172_W : R600Reg <"C172.W", 172>; +def C173_X : R600Reg <"C173.X", 173>; +def C173_Y : R600Reg <"C173.Y", 173>; +def C173_Z : R600Reg <"C173.Z", 173>; +def C173_W : R600Reg <"C173.W", 173>; +def C174_X : R600Reg <"C174.X", 174>; +def C174_Y : R600Reg <"C174.Y", 174>; +def C174_Z : R600Reg <"C174.Z", 174>; +def C174_W : R600Reg <"C174.W", 174>; +def C175_X : R600Reg <"C175.X", 175>; +def C175_Y : R600Reg <"C175.Y", 175>; +def C175_Z : R600Reg <"C175.Z", 175>; +def C175_W : R600Reg <"C175.W", 175>; +def C176_X : R600Reg <"C176.X", 176>; +def C176_Y : R600Reg <"C176.Y", 176>; +def C176_Z : R600Reg <"C176.Z", 176>; +def C176_W : R600Reg <"C176.W", 176>; +def C177_X : R600Reg <"C177.X", 177>; +def C177_Y : R600Reg <"C177.Y", 177>; +def C177_Z : R600Reg <"C177.Z", 177>; +def C177_W : R600Reg <"C177.W", 177>; +def C178_X : R600Reg <"C178.X", 178>; +def C178_Y : R600Reg <"C178.Y", 178>; +def C178_Z : R600Reg <"C178.Z", 178>; +def C178_W : R600Reg <"C178.W", 178>; +def C179_X : R600Reg <"C179.X", 179>; +def C179_Y : R600Reg <"C179.Y", 179>; +def C179_Z : R600Reg <"C179.Z", 179>; +def C179_W : R600Reg <"C179.W", 179>; +def C180_X : R600Reg <"C180.X", 180>; +def C180_Y : R600Reg <"C180.Y", 180>; +def C180_Z : R600Reg <"C180.Z", 180>; +def C180_W : R600Reg <"C180.W", 180>; +def C181_X : R600Reg <"C181.X", 181>; +def C181_Y : R600Reg <"C181.Y", 181>; +def C181_Z : R600Reg <"C181.Z", 181>; +def C181_W : R600Reg <"C181.W", 181>; +def C182_X : R600Reg <"C182.X", 182>; +def C182_Y : R600Reg <"C182.Y", 182>; +def C182_Z : R600Reg <"C182.Z", 182>; +def C182_W : R600Reg <"C182.W", 182>; +def C183_X : R600Reg <"C183.X", 183>; +def C183_Y : R600Reg <"C183.Y", 183>; +def C183_Z : R600Reg <"C183.Z", 183>; +def C183_W : R600Reg <"C183.W", 183>; +def C184_X : R600Reg <"C184.X", 184>; +def C184_Y : R600Reg <"C184.Y", 184>; +def C184_Z : R600Reg <"C184.Z", 184>; +def C184_W : R600Reg <"C184.W", 184>; +def C185_X : R600Reg <"C185.X", 185>; +def C185_Y : R600Reg <"C185.Y", 185>; +def C185_Z : R600Reg <"C185.Z", 185>; +def C185_W : R600Reg <"C185.W", 185>; +def C186_X : R600Reg <"C186.X", 186>; +def C186_Y : R600Reg <"C186.Y", 186>; +def C186_Z : R600Reg <"C186.Z", 186>; +def C186_W : R600Reg <"C186.W", 186>; +def C187_X : R600Reg <"C187.X", 187>; +def C187_Y : R600Reg <"C187.Y", 187>; +def C187_Z : R600Reg <"C187.Z", 187>; +def C187_W : R600Reg <"C187.W", 187>; +def C188_X : R600Reg <"C188.X", 188>; +def C188_Y : R600Reg <"C188.Y", 188>; +def C188_Z : R600Reg <"C188.Z", 188>; +def C188_W : R600Reg <"C188.W", 188>; +def C189_X : R600Reg <"C189.X", 189>; +def C189_Y : R600Reg <"C189.Y", 189>; +def C189_Z : R600Reg <"C189.Z", 189>; +def C189_W : R600Reg <"C189.W", 189>; +def C190_X : R600Reg <"C190.X", 190>; +def C190_Y : R600Reg <"C190.Y", 190>; +def C190_Z : R600Reg <"C190.Z", 190>; +def C190_W : R600Reg <"C190.W", 190>; +def C191_X : R600Reg <"C191.X", 191>; +def C191_Y : R600Reg <"C191.Y", 191>; +def C191_Z : R600Reg <"C191.Z", 191>; +def C191_W : R600Reg <"C191.W", 191>; +def C192_X : R600Reg <"C192.X", 192>; +def C192_Y : R600Reg <"C192.Y", 192>; +def C192_Z : R600Reg <"C192.Z", 192>; +def C192_W : R600Reg <"C192.W", 192>; +def C193_X : R600Reg <"C193.X", 193>; +def C193_Y : R600Reg <"C193.Y", 193>; +def C193_Z : R600Reg <"C193.Z", 193>; +def C193_W : R600Reg <"C193.W", 193>; +def C194_X : R600Reg <"C194.X", 194>; +def C194_Y : R600Reg <"C194.Y", 194>; +def C194_Z : R600Reg <"C194.Z", 194>; +def C194_W : R600Reg <"C194.W", 194>; +def C195_X : R600Reg <"C195.X", 195>; +def C195_Y : R600Reg <"C195.Y", 195>; +def C195_Z : R600Reg <"C195.Z", 195>; +def C195_W : R600Reg <"C195.W", 195>; +def C196_X : R600Reg <"C196.X", 196>; +def C196_Y : R600Reg <"C196.Y", 196>; +def C196_Z : R600Reg <"C196.Z", 196>; +def C196_W : R600Reg <"C196.W", 196>; +def C197_X : R600Reg <"C197.X", 197>; +def C197_Y : R600Reg <"C197.Y", 197>; +def C197_Z : R600Reg <"C197.Z", 197>; +def C197_W : R600Reg <"C197.W", 197>; +def C198_X : R600Reg <"C198.X", 198>; +def C198_Y : R600Reg <"C198.Y", 198>; +def C198_Z : R600Reg <"C198.Z", 198>; +def C198_W : R600Reg <"C198.W", 198>; +def C199_X : R600Reg <"C199.X", 199>; +def C199_Y : R600Reg <"C199.Y", 199>; +def C199_Z : R600Reg <"C199.Z", 199>; +def C199_W : R600Reg <"C199.W", 199>; +def C200_X : R600Reg <"C200.X", 200>; +def C200_Y : R600Reg <"C200.Y", 200>; +def C200_Z : R600Reg <"C200.Z", 200>; +def C200_W : R600Reg <"C200.W", 200>; +def C201_X : R600Reg <"C201.X", 201>; +def C201_Y : R600Reg <"C201.Y", 201>; +def C201_Z : R600Reg <"C201.Z", 201>; +def C201_W : R600Reg <"C201.W", 201>; +def C202_X : R600Reg <"C202.X", 202>; +def C202_Y : R600Reg <"C202.Y", 202>; +def C202_Z : R600Reg <"C202.Z", 202>; +def C202_W : R600Reg <"C202.W", 202>; +def C203_X : R600Reg <"C203.X", 203>; +def C203_Y : R600Reg <"C203.Y", 203>; +def C203_Z : R600Reg <"C203.Z", 203>; +def C203_W : R600Reg <"C203.W", 203>; +def C204_X : R600Reg <"C204.X", 204>; +def C204_Y : R600Reg <"C204.Y", 204>; +def C204_Z : R600Reg <"C204.Z", 204>; +def C204_W : R600Reg <"C204.W", 204>; +def C205_X : R600Reg <"C205.X", 205>; +def C205_Y : R600Reg <"C205.Y", 205>; +def C205_Z : R600Reg <"C205.Z", 205>; +def C205_W : R600Reg <"C205.W", 205>; +def C206_X : R600Reg <"C206.X", 206>; +def C206_Y : R600Reg <"C206.Y", 206>; +def C206_Z : R600Reg <"C206.Z", 206>; +def C206_W : R600Reg <"C206.W", 206>; +def C207_X : R600Reg <"C207.X", 207>; +def C207_Y : R600Reg <"C207.Y", 207>; +def C207_Z : R600Reg <"C207.Z", 207>; +def C207_W : R600Reg <"C207.W", 207>; +def C208_X : R600Reg <"C208.X", 208>; +def C208_Y : R600Reg <"C208.Y", 208>; +def C208_Z : R600Reg <"C208.Z", 208>; +def C208_W : R600Reg <"C208.W", 208>; +def C209_X : R600Reg <"C209.X", 209>; +def C209_Y : R600Reg <"C209.Y", 209>; +def C209_Z : R600Reg <"C209.Z", 209>; +def C209_W : R600Reg <"C209.W", 209>; +def C210_X : R600Reg <"C210.X", 210>; +def C210_Y : R600Reg <"C210.Y", 210>; +def C210_Z : R600Reg <"C210.Z", 210>; +def C210_W : R600Reg <"C210.W", 210>; +def C211_X : R600Reg <"C211.X", 211>; +def C211_Y : R600Reg <"C211.Y", 211>; +def C211_Z : R600Reg <"C211.Z", 211>; +def C211_W : R600Reg <"C211.W", 211>; +def C212_X : R600Reg <"C212.X", 212>; +def C212_Y : R600Reg <"C212.Y", 212>; +def C212_Z : R600Reg <"C212.Z", 212>; +def C212_W : R600Reg <"C212.W", 212>; +def C213_X : R600Reg <"C213.X", 213>; +def C213_Y : R600Reg <"C213.Y", 213>; +def C213_Z : R600Reg <"C213.Z", 213>; +def C213_W : R600Reg <"C213.W", 213>; +def C214_X : R600Reg <"C214.X", 214>; +def C214_Y : R600Reg <"C214.Y", 214>; +def C214_Z : R600Reg <"C214.Z", 214>; +def C214_W : R600Reg <"C214.W", 214>; +def C215_X : R600Reg <"C215.X", 215>; +def C215_Y : R600Reg <"C215.Y", 215>; +def C215_Z : R600Reg <"C215.Z", 215>; +def C215_W : R600Reg <"C215.W", 215>; +def C216_X : R600Reg <"C216.X", 216>; +def C216_Y : R600Reg <"C216.Y", 216>; +def C216_Z : R600Reg <"C216.Z", 216>; +def C216_W : R600Reg <"C216.W", 216>; +def C217_X : R600Reg <"C217.X", 217>; +def C217_Y : R600Reg <"C217.Y", 217>; +def C217_Z : R600Reg <"C217.Z", 217>; +def C217_W : R600Reg <"C217.W", 217>; +def C218_X : R600Reg <"C218.X", 218>; +def C218_Y : R600Reg <"C218.Y", 218>; +def C218_Z : R600Reg <"C218.Z", 218>; +def C218_W : R600Reg <"C218.W", 218>; +def C219_X : R600Reg <"C219.X", 219>; +def C219_Y : R600Reg <"C219.Y", 219>; +def C219_Z : R600Reg <"C219.Z", 219>; +def C219_W : R600Reg <"C219.W", 219>; +def C220_X : R600Reg <"C220.X", 220>; +def C220_Y : R600Reg <"C220.Y", 220>; +def C220_Z : R600Reg <"C220.Z", 220>; +def C220_W : R600Reg <"C220.W", 220>; +def C221_X : R600Reg <"C221.X", 221>; +def C221_Y : R600Reg <"C221.Y", 221>; +def C221_Z : R600Reg <"C221.Z", 221>; +def C221_W : R600Reg <"C221.W", 221>; +def C222_X : R600Reg <"C222.X", 222>; +def C222_Y : R600Reg <"C222.Y", 222>; +def C222_Z : R600Reg <"C222.Z", 222>; +def C222_W : R600Reg <"C222.W", 222>; +def C223_X : R600Reg <"C223.X", 223>; +def C223_Y : R600Reg <"C223.Y", 223>; +def C223_Z : R600Reg <"C223.Z", 223>; +def C223_W : R600Reg <"C223.W", 223>; +def C224_X : R600Reg <"C224.X", 224>; +def C224_Y : R600Reg <"C224.Y", 224>; +def C224_Z : R600Reg <"C224.Z", 224>; +def C224_W : R600Reg <"C224.W", 224>; +def C225_X : R600Reg <"C225.X", 225>; +def C225_Y : R600Reg <"C225.Y", 225>; +def C225_Z : R600Reg <"C225.Z", 225>; +def C225_W : R600Reg <"C225.W", 225>; +def C226_X : R600Reg <"C226.X", 226>; +def C226_Y : R600Reg <"C226.Y", 226>; +def C226_Z : R600Reg <"C226.Z", 226>; +def C226_W : R600Reg <"C226.W", 226>; +def C227_X : R600Reg <"C227.X", 227>; +def C227_Y : R600Reg <"C227.Y", 227>; +def C227_Z : R600Reg <"C227.Z", 227>; +def C227_W : R600Reg <"C227.W", 227>; +def C228_X : R600Reg <"C228.X", 228>; +def C228_Y : R600Reg <"C228.Y", 228>; +def C228_Z : R600Reg <"C228.Z", 228>; +def C228_W : R600Reg <"C228.W", 228>; +def C229_X : R600Reg <"C229.X", 229>; +def C229_Y : R600Reg <"C229.Y", 229>; +def C229_Z : R600Reg <"C229.Z", 229>; +def C229_W : R600Reg <"C229.W", 229>; +def C230_X : R600Reg <"C230.X", 230>; +def C230_Y : R600Reg <"C230.Y", 230>; +def C230_Z : R600Reg <"C230.Z", 230>; +def C230_W : R600Reg <"C230.W", 230>; +def C231_X : R600Reg <"C231.X", 231>; +def C231_Y : R600Reg <"C231.Y", 231>; +def C231_Z : R600Reg <"C231.Z", 231>; +def C231_W : R600Reg <"C231.W", 231>; +def C232_X : R600Reg <"C232.X", 232>; +def C232_Y : R600Reg <"C232.Y", 232>; +def C232_Z : R600Reg <"C232.Z", 232>; +def C232_W : R600Reg <"C232.W", 232>; +def C233_X : R600Reg <"C233.X", 233>; +def C233_Y : R600Reg <"C233.Y", 233>; +def C233_Z : R600Reg <"C233.Z", 233>; +def C233_W : R600Reg <"C233.W", 233>; +def C234_X : R600Reg <"C234.X", 234>; +def C234_Y : R600Reg <"C234.Y", 234>; +def C234_Z : R600Reg <"C234.Z", 234>; +def C234_W : R600Reg <"C234.W", 234>; +def C235_X : R600Reg <"C235.X", 235>; +def C235_Y : R600Reg <"C235.Y", 235>; +def C235_Z : R600Reg <"C235.Z", 235>; +def C235_W : R600Reg <"C235.W", 235>; +def C236_X : R600Reg <"C236.X", 236>; +def C236_Y : R600Reg <"C236.Y", 236>; +def C236_Z : R600Reg <"C236.Z", 236>; +def C236_W : R600Reg <"C236.W", 236>; +def C237_X : R600Reg <"C237.X", 237>; +def C237_Y : R600Reg <"C237.Y", 237>; +def C237_Z : R600Reg <"C237.Z", 237>; +def C237_W : R600Reg <"C237.W", 237>; +def C238_X : R600Reg <"C238.X", 238>; +def C238_Y : R600Reg <"C238.Y", 238>; +def C238_Z : R600Reg <"C238.Z", 238>; +def C238_W : R600Reg <"C238.W", 238>; +def C239_X : R600Reg <"C239.X", 239>; +def C239_Y : R600Reg <"C239.Y", 239>; +def C239_Z : R600Reg <"C239.Z", 239>; +def C239_W : R600Reg <"C239.W", 239>; +def C240_X : R600Reg <"C240.X", 240>; +def C240_Y : R600Reg <"C240.Y", 240>; +def C240_Z : R600Reg <"C240.Z", 240>; +def C240_W : R600Reg <"C240.W", 240>; +def C241_X : R600Reg <"C241.X", 241>; +def C241_Y : R600Reg <"C241.Y", 241>; +def C241_Z : R600Reg <"C241.Z", 241>; +def C241_W : R600Reg <"C241.W", 241>; +def C242_X : R600Reg <"C242.X", 242>; +def C242_Y : R600Reg <"C242.Y", 242>; +def C242_Z : R600Reg <"C242.Z", 242>; +def C242_W : R600Reg <"C242.W", 242>; +def C243_X : R600Reg <"C243.X", 243>; +def C243_Y : R600Reg <"C243.Y", 243>; +def C243_Z : R600Reg <"C243.Z", 243>; +def C243_W : R600Reg <"C243.W", 243>; +def C244_X : R600Reg <"C244.X", 244>; +def C244_Y : R600Reg <"C244.Y", 244>; +def C244_Z : R600Reg <"C244.Z", 244>; +def C244_W : R600Reg <"C244.W", 244>; +def C245_X : R600Reg <"C245.X", 245>; +def C245_Y : R600Reg <"C245.Y", 245>; +def C245_Z : R600Reg <"C245.Z", 245>; +def C245_W : R600Reg <"C245.W", 245>; +def C246_X : R600Reg <"C246.X", 246>; +def C246_Y : R600Reg <"C246.Y", 246>; +def C246_Z : R600Reg <"C246.Z", 246>; +def C246_W : R600Reg <"C246.W", 246>; +def C247_X : R600Reg <"C247.X", 247>; +def C247_Y : R600Reg <"C247.Y", 247>; +def C247_Z : R600Reg <"C247.Z", 247>; +def C247_W : R600Reg <"C247.W", 247>; +def C248_X : R600Reg <"C248.X", 248>; +def C248_Y : R600Reg <"C248.Y", 248>; +def C248_Z : R600Reg <"C248.Z", 248>; +def C248_W : R600Reg <"C248.W", 248>; +def C249_X : R600Reg <"C249.X", 249>; +def C249_Y : R600Reg <"C249.Y", 249>; +def C249_Z : R600Reg <"C249.Z", 249>; +def C249_W : R600Reg <"C249.W", 249>; +def C250_X : R600Reg <"C250.X", 250>; +def C250_Y : R600Reg <"C250.Y", 250>; +def C250_Z : R600Reg <"C250.Z", 250>; +def C250_W : R600Reg <"C250.W", 250>; +def C251_X : R600Reg <"C251.X", 251>; +def C251_Y : R600Reg <"C251.Y", 251>; +def C251_Z : R600Reg <"C251.Z", 251>; +def C251_W : R600Reg <"C251.W", 251>; +def C252_X : R600Reg <"C252.X", 252>; +def C252_Y : R600Reg <"C252.Y", 252>; +def C252_Z : R600Reg <"C252.Z", 252>; +def C252_W : R600Reg <"C252.W", 252>; +def C253_X : R600Reg <"C253.X", 253>; +def C253_Y : R600Reg <"C253.Y", 253>; +def C253_Z : R600Reg <"C253.Z", 253>; +def C253_W : R600Reg <"C253.W", 253>; +def C254_X : R600Reg <"C254.X", 254>; +def C254_Y : R600Reg <"C254.Y", 254>; +def C254_Z : R600Reg <"C254.Z", 254>; +def C254_W : R600Reg <"C254.W", 254>; +def C255_X : R600Reg <"C255.X", 255>; +def C255_Y : R600Reg <"C255.Y", 255>; +def C255_Z : R600Reg <"C255.Z", 255>; +def C255_W : R600Reg <"C255.W", 255>; +def C256_X : R600Reg <"C256.X", 256>; +def C256_Y : R600Reg <"C256.Y", 256>; +def C256_Z : R600Reg <"C256.Z", 256>; +def C256_W : R600Reg <"C256.W", 256>; +def C257_X : R600Reg <"C257.X", 257>; +def C257_Y : R600Reg <"C257.Y", 257>; +def C257_Z : R600Reg <"C257.Z", 257>; +def C257_W : R600Reg <"C257.W", 257>; +def C258_X : R600Reg <"C258.X", 258>; +def C258_Y : R600Reg <"C258.Y", 258>; +def C258_Z : R600Reg <"C258.Z", 258>; +def C258_W : R600Reg <"C258.W", 258>; +def C259_X : R600Reg <"C259.X", 259>; +def C259_Y : R600Reg <"C259.Y", 259>; +def C259_Z : R600Reg <"C259.Z", 259>; +def C259_W : R600Reg <"C259.W", 259>; +def C260_X : R600Reg <"C260.X", 260>; +def C260_Y : R600Reg <"C260.Y", 260>; +def C260_Z : R600Reg <"C260.Z", 260>; +def C260_W : R600Reg <"C260.W", 260>; +def C261_X : R600Reg <"C261.X", 261>; +def C261_Y : R600Reg <"C261.Y", 261>; +def C261_Z : R600Reg <"C261.Z", 261>; +def C261_W : R600Reg <"C261.W", 261>; +def C262_X : R600Reg <"C262.X", 262>; +def C262_Y : R600Reg <"C262.Y", 262>; +def C262_Z : R600Reg <"C262.Z", 262>; +def C262_W : R600Reg <"C262.W", 262>; +def C263_X : R600Reg <"C263.X", 263>; +def C263_Y : R600Reg <"C263.Y", 263>; +def C263_Z : R600Reg <"C263.Z", 263>; +def C263_W : R600Reg <"C263.W", 263>; +def C264_X : R600Reg <"C264.X", 264>; +def C264_Y : R600Reg <"C264.Y", 264>; +def C264_Z : R600Reg <"C264.Z", 264>; +def C264_W : R600Reg <"C264.W", 264>; +def C265_X : R600Reg <"C265.X", 265>; +def C265_Y : R600Reg <"C265.Y", 265>; +def C265_Z : R600Reg <"C265.Z", 265>; +def C265_W : R600Reg <"C265.W", 265>; +def C266_X : R600Reg <"C266.X", 266>; +def C266_Y : R600Reg <"C266.Y", 266>; +def C266_Z : R600Reg <"C266.Z", 266>; +def C266_W : R600Reg <"C266.W", 266>; +def C267_X : R600Reg <"C267.X", 267>; +def C267_Y : R600Reg <"C267.Y", 267>; +def C267_Z : R600Reg <"C267.Z", 267>; +def C267_W : R600Reg <"C267.W", 267>; +def C268_X : R600Reg <"C268.X", 268>; +def C268_Y : R600Reg <"C268.Y", 268>; +def C268_Z : R600Reg <"C268.Z", 268>; +def C268_W : R600Reg <"C268.W", 268>; +def C269_X : R600Reg <"C269.X", 269>; +def C269_Y : R600Reg <"C269.Y", 269>; +def C269_Z : R600Reg <"C269.Z", 269>; +def C269_W : R600Reg <"C269.W", 269>; +def C270_X : R600Reg <"C270.X", 270>; +def C270_Y : R600Reg <"C270.Y", 270>; +def C270_Z : R600Reg <"C270.Z", 270>; +def C270_W : R600Reg <"C270.W", 270>; +def C271_X : R600Reg <"C271.X", 271>; +def C271_Y : R600Reg <"C271.Y", 271>; +def C271_Z : R600Reg <"C271.Z", 271>; +def C271_W : R600Reg <"C271.W", 271>; +def C272_X : R600Reg <"C272.X", 272>; +def C272_Y : R600Reg <"C272.Y", 272>; +def C272_Z : R600Reg <"C272.Z", 272>; +def C272_W : R600Reg <"C272.W", 272>; +def C273_X : R600Reg <"C273.X", 273>; +def C273_Y : R600Reg <"C273.Y", 273>; +def C273_Z : R600Reg <"C273.Z", 273>; +def C273_W : R600Reg <"C273.W", 273>; +def C274_X : R600Reg <"C274.X", 274>; +def C274_Y : R600Reg <"C274.Y", 274>; +def C274_Z : R600Reg <"C274.Z", 274>; +def C274_W : R600Reg <"C274.W", 274>; +def C275_X : R600Reg <"C275.X", 275>; +def C275_Y : R600Reg <"C275.Y", 275>; +def C275_Z : R600Reg <"C275.Z", 275>; +def C275_W : R600Reg <"C275.W", 275>; +def C276_X : R600Reg <"C276.X", 276>; +def C276_Y : R600Reg <"C276.Y", 276>; +def C276_Z : R600Reg <"C276.Z", 276>; +def C276_W : R600Reg <"C276.W", 276>; +def C277_X : R600Reg <"C277.X", 277>; +def C277_Y : R600Reg <"C277.Y", 277>; +def C277_Z : R600Reg <"C277.Z", 277>; +def C277_W : R600Reg <"C277.W", 277>; +def C278_X : R600Reg <"C278.X", 278>; +def C278_Y : R600Reg <"C278.Y", 278>; +def C278_Z : R600Reg <"C278.Z", 278>; +def C278_W : R600Reg <"C278.W", 278>; +def C279_X : R600Reg <"C279.X", 279>; +def C279_Y : R600Reg <"C279.Y", 279>; +def C279_Z : R600Reg <"C279.Z", 279>; +def C279_W : R600Reg <"C279.W", 279>; +def C280_X : R600Reg <"C280.X", 280>; +def C280_Y : R600Reg <"C280.Y", 280>; +def C280_Z : R600Reg <"C280.Z", 280>; +def C280_W : R600Reg <"C280.W", 280>; +def C281_X : R600Reg <"C281.X", 281>; +def C281_Y : R600Reg <"C281.Y", 281>; +def C281_Z : R600Reg <"C281.Z", 281>; +def C281_W : R600Reg <"C281.W", 281>; +def C282_X : R600Reg <"C282.X", 282>; +def C282_Y : R600Reg <"C282.Y", 282>; +def C282_Z : R600Reg <"C282.Z", 282>; +def C282_W : R600Reg <"C282.W", 282>; +def C283_X : R600Reg <"C283.X", 283>; +def C283_Y : R600Reg <"C283.Y", 283>; +def C283_Z : R600Reg <"C283.Z", 283>; +def C283_W : R600Reg <"C283.W", 283>; +def C284_X : R600Reg <"C284.X", 284>; +def C284_Y : R600Reg <"C284.Y", 284>; +def C284_Z : R600Reg <"C284.Z", 284>; +def C284_W : R600Reg <"C284.W", 284>; +def C285_X : R600Reg <"C285.X", 285>; +def C285_Y : R600Reg <"C285.Y", 285>; +def C285_Z : R600Reg <"C285.Z", 285>; +def C285_W : R600Reg <"C285.W", 285>; +def C286_X : R600Reg <"C286.X", 286>; +def C286_Y : R600Reg <"C286.Y", 286>; +def C286_Z : R600Reg <"C286.Z", 286>; +def C286_W : R600Reg <"C286.W", 286>; +def C287_X : R600Reg <"C287.X", 287>; +def C287_Y : R600Reg <"C287.Y", 287>; +def C287_Z : R600Reg <"C287.Z", 287>; +def C287_W : R600Reg <"C287.W", 287>; +def C288_X : R600Reg <"C288.X", 288>; +def C288_Y : R600Reg <"C288.Y", 288>; +def C288_Z : R600Reg <"C288.Z", 288>; +def C288_W : R600Reg <"C288.W", 288>; +def C289_X : R600Reg <"C289.X", 289>; +def C289_Y : R600Reg <"C289.Y", 289>; +def C289_Z : R600Reg <"C289.Z", 289>; +def C289_W : R600Reg <"C289.W", 289>; +def C290_X : R600Reg <"C290.X", 290>; +def C290_Y : R600Reg <"C290.Y", 290>; +def C290_Z : R600Reg <"C290.Z", 290>; +def C290_W : R600Reg <"C290.W", 290>; +def C291_X : R600Reg <"C291.X", 291>; +def C291_Y : R600Reg <"C291.Y", 291>; +def C291_Z : R600Reg <"C291.Z", 291>; +def C291_W : R600Reg <"C291.W", 291>; +def C292_X : R600Reg <"C292.X", 292>; +def C292_Y : R600Reg <"C292.Y", 292>; +def C292_Z : R600Reg <"C292.Z", 292>; +def C292_W : R600Reg <"C292.W", 292>; +def C293_X : R600Reg <"C293.X", 293>; +def C293_Y : R600Reg <"C293.Y", 293>; +def C293_Z : R600Reg <"C293.Z", 293>; +def C293_W : R600Reg <"C293.W", 293>; +def C294_X : R600Reg <"C294.X", 294>; +def C294_Y : R600Reg <"C294.Y", 294>; +def C294_Z : R600Reg <"C294.Z", 294>; +def C294_W : R600Reg <"C294.W", 294>; +def C295_X : R600Reg <"C295.X", 295>; +def C295_Y : R600Reg <"C295.Y", 295>; +def C295_Z : R600Reg <"C295.Z", 295>; +def C295_W : R600Reg <"C295.W", 295>; +def C296_X : R600Reg <"C296.X", 296>; +def C296_Y : R600Reg <"C296.Y", 296>; +def C296_Z : R600Reg <"C296.Z", 296>; +def C296_W : R600Reg <"C296.W", 296>; +def C297_X : R600Reg <"C297.X", 297>; +def C297_Y : R600Reg <"C297.Y", 297>; +def C297_Z : R600Reg <"C297.Z", 297>; +def C297_W : R600Reg <"C297.W", 297>; +def C298_X : R600Reg <"C298.X", 298>; +def C298_Y : R600Reg <"C298.Y", 298>; +def C298_Z : R600Reg <"C298.Z", 298>; +def C298_W : R600Reg <"C298.W", 298>; +def C299_X : R600Reg <"C299.X", 299>; +def C299_Y : R600Reg <"C299.Y", 299>; +def C299_Z : R600Reg <"C299.Z", 299>; +def C299_W : R600Reg <"C299.W", 299>; +def C300_X : R600Reg <"C300.X", 300>; +def C300_Y : R600Reg <"C300.Y", 300>; +def C300_Z : R600Reg <"C300.Z", 300>; +def C300_W : R600Reg <"C300.W", 300>; +def C301_X : R600Reg <"C301.X", 301>; +def C301_Y : R600Reg <"C301.Y", 301>; +def C301_Z : R600Reg <"C301.Z", 301>; +def C301_W : R600Reg <"C301.W", 301>; +def C302_X : R600Reg <"C302.X", 302>; +def C302_Y : R600Reg <"C302.Y", 302>; +def C302_Z : R600Reg <"C302.Z", 302>; +def C302_W : R600Reg <"C302.W", 302>; +def C303_X : R600Reg <"C303.X", 303>; +def C303_Y : R600Reg <"C303.Y", 303>; +def C303_Z : R600Reg <"C303.Z", 303>; +def C303_W : R600Reg <"C303.W", 303>; +def C304_X : R600Reg <"C304.X", 304>; +def C304_Y : R600Reg <"C304.Y", 304>; +def C304_Z : R600Reg <"C304.Z", 304>; +def C304_W : R600Reg <"C304.W", 304>; +def C305_X : R600Reg <"C305.X", 305>; +def C305_Y : R600Reg <"C305.Y", 305>; +def C305_Z : R600Reg <"C305.Z", 305>; +def C305_W : R600Reg <"C305.W", 305>; +def C306_X : R600Reg <"C306.X", 306>; +def C306_Y : R600Reg <"C306.Y", 306>; +def C306_Z : R600Reg <"C306.Z", 306>; +def C306_W : R600Reg <"C306.W", 306>; +def C307_X : R600Reg <"C307.X", 307>; +def C307_Y : R600Reg <"C307.Y", 307>; +def C307_Z : R600Reg <"C307.Z", 307>; +def C307_W : R600Reg <"C307.W", 307>; +def C308_X : R600Reg <"C308.X", 308>; +def C308_Y : R600Reg <"C308.Y", 308>; +def C308_Z : R600Reg <"C308.Z", 308>; +def C308_W : R600Reg <"C308.W", 308>; +def C309_X : R600Reg <"C309.X", 309>; +def C309_Y : R600Reg <"C309.Y", 309>; +def C309_Z : R600Reg <"C309.Z", 309>; +def C309_W : R600Reg <"C309.W", 309>; +def C310_X : R600Reg <"C310.X", 310>; +def C310_Y : R600Reg <"C310.Y", 310>; +def C310_Z : R600Reg <"C310.Z", 310>; +def C310_W : R600Reg <"C310.W", 310>; +def C311_X : R600Reg <"C311.X", 311>; +def C311_Y : R600Reg <"C311.Y", 311>; +def C311_Z : R600Reg <"C311.Z", 311>; +def C311_W : R600Reg <"C311.W", 311>; +def C312_X : R600Reg <"C312.X", 312>; +def C312_Y : R600Reg <"C312.Y", 312>; +def C312_Z : R600Reg <"C312.Z", 312>; +def C312_W : R600Reg <"C312.W", 312>; +def C313_X : R600Reg <"C313.X", 313>; +def C313_Y : R600Reg <"C313.Y", 313>; +def C313_Z : R600Reg <"C313.Z", 313>; +def C313_W : R600Reg <"C313.W", 313>; +def C314_X : R600Reg <"C314.X", 314>; +def C314_Y : R600Reg <"C314.Y", 314>; +def C314_Z : R600Reg <"C314.Z", 314>; +def C314_W : R600Reg <"C314.W", 314>; +def C315_X : R600Reg <"C315.X", 315>; +def C315_Y : R600Reg <"C315.Y", 315>; +def C315_Z : R600Reg <"C315.Z", 315>; +def C315_W : R600Reg <"C315.W", 315>; +def C316_X : R600Reg <"C316.X", 316>; +def C316_Y : R600Reg <"C316.Y", 316>; +def C316_Z : R600Reg <"C316.Z", 316>; +def C316_W : R600Reg <"C316.W", 316>; +def C317_X : R600Reg <"C317.X", 317>; +def C317_Y : R600Reg <"C317.Y", 317>; +def C317_Z : R600Reg <"C317.Z", 317>; +def C317_W : R600Reg <"C317.W", 317>; +def C318_X : R600Reg <"C318.X", 318>; +def C318_Y : R600Reg <"C318.Y", 318>; +def C318_Z : R600Reg <"C318.Z", 318>; +def C318_W : R600Reg <"C318.W", 318>; +def C319_X : R600Reg <"C319.X", 319>; +def C319_Y : R600Reg <"C319.Y", 319>; +def C319_Z : R600Reg <"C319.Z", 319>; +def C319_W : R600Reg <"C319.W", 319>; +def C320_X : R600Reg <"C320.X", 320>; +def C320_Y : R600Reg <"C320.Y", 320>; +def C320_Z : R600Reg <"C320.Z", 320>; +def C320_W : R600Reg <"C320.W", 320>; +def C321_X : R600Reg <"C321.X", 321>; +def C321_Y : R600Reg <"C321.Y", 321>; +def C321_Z : R600Reg <"C321.Z", 321>; +def C321_W : R600Reg <"C321.W", 321>; +def C322_X : R600Reg <"C322.X", 322>; +def C322_Y : R600Reg <"C322.Y", 322>; +def C322_Z : R600Reg <"C322.Z", 322>; +def C322_W : R600Reg <"C322.W", 322>; +def C323_X : R600Reg <"C323.X", 323>; +def C323_Y : R600Reg <"C323.Y", 323>; +def C323_Z : R600Reg <"C323.Z", 323>; +def C323_W : R600Reg <"C323.W", 323>; +def C324_X : R600Reg <"C324.X", 324>; +def C324_Y : R600Reg <"C324.Y", 324>; +def C324_Z : R600Reg <"C324.Z", 324>; +def C324_W : R600Reg <"C324.W", 324>; +def C325_X : R600Reg <"C325.X", 325>; +def C325_Y : R600Reg <"C325.Y", 325>; +def C325_Z : R600Reg <"C325.Z", 325>; +def C325_W : R600Reg <"C325.W", 325>; +def C326_X : R600Reg <"C326.X", 326>; +def C326_Y : R600Reg <"C326.Y", 326>; +def C326_Z : R600Reg <"C326.Z", 326>; +def C326_W : R600Reg <"C326.W", 326>; +def C327_X : R600Reg <"C327.X", 327>; +def C327_Y : R600Reg <"C327.Y", 327>; +def C327_Z : R600Reg <"C327.Z", 327>; +def C327_W : R600Reg <"C327.W", 327>; +def C328_X : R600Reg <"C328.X", 328>; +def C328_Y : R600Reg <"C328.Y", 328>; +def C328_Z : R600Reg <"C328.Z", 328>; +def C328_W : R600Reg <"C328.W", 328>; +def C329_X : R600Reg <"C329.X", 329>; +def C329_Y : R600Reg <"C329.Y", 329>; +def C329_Z : R600Reg <"C329.Z", 329>; +def C329_W : R600Reg <"C329.W", 329>; +def C330_X : R600Reg <"C330.X", 330>; +def C330_Y : R600Reg <"C330.Y", 330>; +def C330_Z : R600Reg <"C330.Z", 330>; +def C330_W : R600Reg <"C330.W", 330>; +def C331_X : R600Reg <"C331.X", 331>; +def C331_Y : R600Reg <"C331.Y", 331>; +def C331_Z : R600Reg <"C331.Z", 331>; +def C331_W : R600Reg <"C331.W", 331>; +def C332_X : R600Reg <"C332.X", 332>; +def C332_Y : R600Reg <"C332.Y", 332>; +def C332_Z : R600Reg <"C332.Z", 332>; +def C332_W : R600Reg <"C332.W", 332>; +def C333_X : R600Reg <"C333.X", 333>; +def C333_Y : R600Reg <"C333.Y", 333>; +def C333_Z : R600Reg <"C333.Z", 333>; +def C333_W : R600Reg <"C333.W", 333>; +def C334_X : R600Reg <"C334.X", 334>; +def C334_Y : R600Reg <"C334.Y", 334>; +def C334_Z : R600Reg <"C334.Z", 334>; +def C334_W : R600Reg <"C334.W", 334>; +def C335_X : R600Reg <"C335.X", 335>; +def C335_Y : R600Reg <"C335.Y", 335>; +def C335_Z : R600Reg <"C335.Z", 335>; +def C335_W : R600Reg <"C335.W", 335>; +def C336_X : R600Reg <"C336.X", 336>; +def C336_Y : R600Reg <"C336.Y", 336>; +def C336_Z : R600Reg <"C336.Z", 336>; +def C336_W : R600Reg <"C336.W", 336>; +def C337_X : R600Reg <"C337.X", 337>; +def C337_Y : R600Reg <"C337.Y", 337>; +def C337_Z : R600Reg <"C337.Z", 337>; +def C337_W : R600Reg <"C337.W", 337>; +def C338_X : R600Reg <"C338.X", 338>; +def C338_Y : R600Reg <"C338.Y", 338>; +def C338_Z : R600Reg <"C338.Z", 338>; +def C338_W : R600Reg <"C338.W", 338>; +def C339_X : R600Reg <"C339.X", 339>; +def C339_Y : R600Reg <"C339.Y", 339>; +def C339_Z : R600Reg <"C339.Z", 339>; +def C339_W : R600Reg <"C339.W", 339>; +def C340_X : R600Reg <"C340.X", 340>; +def C340_Y : R600Reg <"C340.Y", 340>; +def C340_Z : R600Reg <"C340.Z", 340>; +def C340_W : R600Reg <"C340.W", 340>; +def C341_X : R600Reg <"C341.X", 341>; +def C341_Y : R600Reg <"C341.Y", 341>; +def C341_Z : R600Reg <"C341.Z", 341>; +def C341_W : R600Reg <"C341.W", 341>; +def C342_X : R600Reg <"C342.X", 342>; +def C342_Y : R600Reg <"C342.Y", 342>; +def C342_Z : R600Reg <"C342.Z", 342>; +def C342_W : R600Reg <"C342.W", 342>; +def C343_X : R600Reg <"C343.X", 343>; +def C343_Y : R600Reg <"C343.Y", 343>; +def C343_Z : R600Reg <"C343.Z", 343>; +def C343_W : R600Reg <"C343.W", 343>; +def C344_X : R600Reg <"C344.X", 344>; +def C344_Y : R600Reg <"C344.Y", 344>; +def C344_Z : R600Reg <"C344.Z", 344>; +def C344_W : R600Reg <"C344.W", 344>; +def C345_X : R600Reg <"C345.X", 345>; +def C345_Y : R600Reg <"C345.Y", 345>; +def C345_Z : R600Reg <"C345.Z", 345>; +def C345_W : R600Reg <"C345.W", 345>; +def C346_X : R600Reg <"C346.X", 346>; +def C346_Y : R600Reg <"C346.Y", 346>; +def C346_Z : R600Reg <"C346.Z", 346>; +def C346_W : R600Reg <"C346.W", 346>; +def C347_X : R600Reg <"C347.X", 347>; +def C347_Y : R600Reg <"C347.Y", 347>; +def C347_Z : R600Reg <"C347.Z", 347>; +def C347_W : R600Reg <"C347.W", 347>; +def C348_X : R600Reg <"C348.X", 348>; +def C348_Y : R600Reg <"C348.Y", 348>; +def C348_Z : R600Reg <"C348.Z", 348>; +def C348_W : R600Reg <"C348.W", 348>; +def C349_X : R600Reg <"C349.X", 349>; +def C349_Y : R600Reg <"C349.Y", 349>; +def C349_Z : R600Reg <"C349.Z", 349>; +def C349_W : R600Reg <"C349.W", 349>; +def C350_X : R600Reg <"C350.X", 350>; +def C350_Y : R600Reg <"C350.Y", 350>; +def C350_Z : R600Reg <"C350.Z", 350>; +def C350_W : R600Reg <"C350.W", 350>; +def C351_X : R600Reg <"C351.X", 351>; +def C351_Y : R600Reg <"C351.Y", 351>; +def C351_Z : R600Reg <"C351.Z", 351>; +def C351_W : R600Reg <"C351.W", 351>; +def C352_X : R600Reg <"C352.X", 352>; +def C352_Y : R600Reg <"C352.Y", 352>; +def C352_Z : R600Reg <"C352.Z", 352>; +def C352_W : R600Reg <"C352.W", 352>; +def C353_X : R600Reg <"C353.X", 353>; +def C353_Y : R600Reg <"C353.Y", 353>; +def C353_Z : R600Reg <"C353.Z", 353>; +def C353_W : R600Reg <"C353.W", 353>; +def C354_X : R600Reg <"C354.X", 354>; +def C354_Y : R600Reg <"C354.Y", 354>; +def C354_Z : R600Reg <"C354.Z", 354>; +def C354_W : R600Reg <"C354.W", 354>; +def C355_X : R600Reg <"C355.X", 355>; +def C355_Y : R600Reg <"C355.Y", 355>; +def C355_Z : R600Reg <"C355.Z", 355>; +def C355_W : R600Reg <"C355.W", 355>; +def C356_X : R600Reg <"C356.X", 356>; +def C356_Y : R600Reg <"C356.Y", 356>; +def C356_Z : R600Reg <"C356.Z", 356>; +def C356_W : R600Reg <"C356.W", 356>; +def C357_X : R600Reg <"C357.X", 357>; +def C357_Y : R600Reg <"C357.Y", 357>; +def C357_Z : R600Reg <"C357.Z", 357>; +def C357_W : R600Reg <"C357.W", 357>; +def C358_X : R600Reg <"C358.X", 358>; +def C358_Y : R600Reg <"C358.Y", 358>; +def C358_Z : R600Reg <"C358.Z", 358>; +def C358_W : R600Reg <"C358.W", 358>; +def C359_X : R600Reg <"C359.X", 359>; +def C359_Y : R600Reg <"C359.Y", 359>; +def C359_Z : R600Reg <"C359.Z", 359>; +def C359_W : R600Reg <"C359.W", 359>; +def C360_X : R600Reg <"C360.X", 360>; +def C360_Y : R600Reg <"C360.Y", 360>; +def C360_Z : R600Reg <"C360.Z", 360>; +def C360_W : R600Reg <"C360.W", 360>; +def C361_X : R600Reg <"C361.X", 361>; +def C361_Y : R600Reg <"C361.Y", 361>; +def C361_Z : R600Reg <"C361.Z", 361>; +def C361_W : R600Reg <"C361.W", 361>; +def C362_X : R600Reg <"C362.X", 362>; +def C362_Y : R600Reg <"C362.Y", 362>; +def C362_Z : R600Reg <"C362.Z", 362>; +def C362_W : R600Reg <"C362.W", 362>; +def C363_X : R600Reg <"C363.X", 363>; +def C363_Y : R600Reg <"C363.Y", 363>; +def C363_Z : R600Reg <"C363.Z", 363>; +def C363_W : R600Reg <"C363.W", 363>; +def C364_X : R600Reg <"C364.X", 364>; +def C364_Y : R600Reg <"C364.Y", 364>; +def C364_Z : R600Reg <"C364.Z", 364>; +def C364_W : R600Reg <"C364.W", 364>; +def C365_X : R600Reg <"C365.X", 365>; +def C365_Y : R600Reg <"C365.Y", 365>; +def C365_Z : R600Reg <"C365.Z", 365>; +def C365_W : R600Reg <"C365.W", 365>; +def C366_X : R600Reg <"C366.X", 366>; +def C366_Y : R600Reg <"C366.Y", 366>; +def C366_Z : R600Reg <"C366.Z", 366>; +def C366_W : R600Reg <"C366.W", 366>; +def C367_X : R600Reg <"C367.X", 367>; +def C367_Y : R600Reg <"C367.Y", 367>; +def C367_Z : R600Reg <"C367.Z", 367>; +def C367_W : R600Reg <"C367.W", 367>; +def C368_X : R600Reg <"C368.X", 368>; +def C368_Y : R600Reg <"C368.Y", 368>; +def C368_Z : R600Reg <"C368.Z", 368>; +def C368_W : R600Reg <"C368.W", 368>; +def C369_X : R600Reg <"C369.X", 369>; +def C369_Y : R600Reg <"C369.Y", 369>; +def C369_Z : R600Reg <"C369.Z", 369>; +def C369_W : R600Reg <"C369.W", 369>; +def C370_X : R600Reg <"C370.X", 370>; +def C370_Y : R600Reg <"C370.Y", 370>; +def C370_Z : R600Reg <"C370.Z", 370>; +def C370_W : R600Reg <"C370.W", 370>; +def C371_X : R600Reg <"C371.X", 371>; +def C371_Y : R600Reg <"C371.Y", 371>; +def C371_Z : R600Reg <"C371.Z", 371>; +def C371_W : R600Reg <"C371.W", 371>; +def C372_X : R600Reg <"C372.X", 372>; +def C372_Y : R600Reg <"C372.Y", 372>; +def C372_Z : R600Reg <"C372.Z", 372>; +def C372_W : R600Reg <"C372.W", 372>; +def C373_X : R600Reg <"C373.X", 373>; +def C373_Y : R600Reg <"C373.Y", 373>; +def C373_Z : R600Reg <"C373.Z", 373>; +def C373_W : R600Reg <"C373.W", 373>; +def C374_X : R600Reg <"C374.X", 374>; +def C374_Y : R600Reg <"C374.Y", 374>; +def C374_Z : R600Reg <"C374.Z", 374>; +def C374_W : R600Reg <"C374.W", 374>; +def C375_X : R600Reg <"C375.X", 375>; +def C375_Y : R600Reg <"C375.Y", 375>; +def C375_Z : R600Reg <"C375.Z", 375>; +def C375_W : R600Reg <"C375.W", 375>; +def C376_X : R600Reg <"C376.X", 376>; +def C376_Y : R600Reg <"C376.Y", 376>; +def C376_Z : R600Reg <"C376.Z", 376>; +def C376_W : R600Reg <"C376.W", 376>; +def C377_X : R600Reg <"C377.X", 377>; +def C377_Y : R600Reg <"C377.Y", 377>; +def C377_Z : R600Reg <"C377.Z", 377>; +def C377_W : R600Reg <"C377.W", 377>; +def C378_X : R600Reg <"C378.X", 378>; +def C378_Y : R600Reg <"C378.Y", 378>; +def C378_Z : R600Reg <"C378.Z", 378>; +def C378_W : R600Reg <"C378.W", 378>; +def C379_X : R600Reg <"C379.X", 379>; +def C379_Y : R600Reg <"C379.Y", 379>; +def C379_Z : R600Reg <"C379.Z", 379>; +def C379_W : R600Reg <"C379.W", 379>; +def C380_X : R600Reg <"C380.X", 380>; +def C380_Y : R600Reg <"C380.Y", 380>; +def C380_Z : R600Reg <"C380.Z", 380>; +def C380_W : R600Reg <"C380.W", 380>; +def C381_X : R600Reg <"C381.X", 381>; +def C381_Y : R600Reg <"C381.Y", 381>; +def C381_Z : R600Reg <"C381.Z", 381>; +def C381_W : R600Reg <"C381.W", 381>; +def C382_X : R600Reg <"C382.X", 382>; +def C382_Y : R600Reg <"C382.Y", 382>; +def C382_Z : R600Reg <"C382.Z", 382>; +def C382_W : R600Reg <"C382.W", 382>; +def C383_X : R600Reg <"C383.X", 383>; +def C383_Y : R600Reg <"C383.Y", 383>; +def C383_Z : R600Reg <"C383.Z", 383>; +def C383_W : R600Reg <"C383.W", 383>; +def C384_X : R600Reg <"C384.X", 384>; +def C384_Y : R600Reg <"C384.Y", 384>; +def C384_Z : R600Reg <"C384.Z", 384>; +def C384_W : R600Reg <"C384.W", 384>; +def C385_X : R600Reg <"C385.X", 385>; +def C385_Y : R600Reg <"C385.Y", 385>; +def C385_Z : R600Reg <"C385.Z", 385>; +def C385_W : R600Reg <"C385.W", 385>; +def C386_X : R600Reg <"C386.X", 386>; +def C386_Y : R600Reg <"C386.Y", 386>; +def C386_Z : R600Reg <"C386.Z", 386>; +def C386_W : R600Reg <"C386.W", 386>; +def C387_X : R600Reg <"C387.X", 387>; +def C387_Y : R600Reg <"C387.Y", 387>; +def C387_Z : R600Reg <"C387.Z", 387>; +def C387_W : R600Reg <"C387.W", 387>; +def C388_X : R600Reg <"C388.X", 388>; +def C388_Y : R600Reg <"C388.Y", 388>; +def C388_Z : R600Reg <"C388.Z", 388>; +def C388_W : R600Reg <"C388.W", 388>; +def C389_X : R600Reg <"C389.X", 389>; +def C389_Y : R600Reg <"C389.Y", 389>; +def C389_Z : R600Reg <"C389.Z", 389>; +def C389_W : R600Reg <"C389.W", 389>; +def C390_X : R600Reg <"C390.X", 390>; +def C390_Y : R600Reg <"C390.Y", 390>; +def C390_Z : R600Reg <"C390.Z", 390>; +def C390_W : R600Reg <"C390.W", 390>; +def C391_X : R600Reg <"C391.X", 391>; +def C391_Y : R600Reg <"C391.Y", 391>; +def C391_Z : R600Reg <"C391.Z", 391>; +def C391_W : R600Reg <"C391.W", 391>; +def C392_X : R600Reg <"C392.X", 392>; +def C392_Y : R600Reg <"C392.Y", 392>; +def C392_Z : R600Reg <"C392.Z", 392>; +def C392_W : R600Reg <"C392.W", 392>; +def C393_X : R600Reg <"C393.X", 393>; +def C393_Y : R600Reg <"C393.Y", 393>; +def C393_Z : R600Reg <"C393.Z", 393>; +def C393_W : R600Reg <"C393.W", 393>; +def C394_X : R600Reg <"C394.X", 394>; +def C394_Y : R600Reg <"C394.Y", 394>; +def C394_Z : R600Reg <"C394.Z", 394>; +def C394_W : R600Reg <"C394.W", 394>; +def C395_X : R600Reg <"C395.X", 395>; +def C395_Y : R600Reg <"C395.Y", 395>; +def C395_Z : R600Reg <"C395.Z", 395>; +def C395_W : R600Reg <"C395.W", 395>; +def C396_X : R600Reg <"C396.X", 396>; +def C396_Y : R600Reg <"C396.Y", 396>; +def C396_Z : R600Reg <"C396.Z", 396>; +def C396_W : R600Reg <"C396.W", 396>; +def C397_X : R600Reg <"C397.X", 397>; +def C397_Y : R600Reg <"C397.Y", 397>; +def C397_Z : R600Reg <"C397.Z", 397>; +def C397_W : R600Reg <"C397.W", 397>; +def C398_X : R600Reg <"C398.X", 398>; +def C398_Y : R600Reg <"C398.Y", 398>; +def C398_Z : R600Reg <"C398.Z", 398>; +def C398_W : R600Reg <"C398.W", 398>; +def C399_X : R600Reg <"C399.X", 399>; +def C399_Y : R600Reg <"C399.Y", 399>; +def C399_Z : R600Reg <"C399.Z", 399>; +def C399_W : R600Reg <"C399.W", 399>; +def C400_X : R600Reg <"C400.X", 400>; +def C400_Y : R600Reg <"C400.Y", 400>; +def C400_Z : R600Reg <"C400.Z", 400>; +def C400_W : R600Reg <"C400.W", 400>; +def C401_X : R600Reg <"C401.X", 401>; +def C401_Y : R600Reg <"C401.Y", 401>; +def C401_Z : R600Reg <"C401.Z", 401>; +def C401_W : R600Reg <"C401.W", 401>; +def C402_X : R600Reg <"C402.X", 402>; +def C402_Y : R600Reg <"C402.Y", 402>; +def C402_Z : R600Reg <"C402.Z", 402>; +def C402_W : R600Reg <"C402.W", 402>; +def C403_X : R600Reg <"C403.X", 403>; +def C403_Y : R600Reg <"C403.Y", 403>; +def C403_Z : R600Reg <"C403.Z", 403>; +def C403_W : R600Reg <"C403.W", 403>; +def C404_X : R600Reg <"C404.X", 404>; +def C404_Y : R600Reg <"C404.Y", 404>; +def C404_Z : R600Reg <"C404.Z", 404>; +def C404_W : R600Reg <"C404.W", 404>; +def C405_X : R600Reg <"C405.X", 405>; +def C405_Y : R600Reg <"C405.Y", 405>; +def C405_Z : R600Reg <"C405.Z", 405>; +def C405_W : R600Reg <"C405.W", 405>; +def C406_X : R600Reg <"C406.X", 406>; +def C406_Y : R600Reg <"C406.Y", 406>; +def C406_Z : R600Reg <"C406.Z", 406>; +def C406_W : R600Reg <"C406.W", 406>; +def C407_X : R600Reg <"C407.X", 407>; +def C407_Y : R600Reg <"C407.Y", 407>; +def C407_Z : R600Reg <"C407.Z", 407>; +def C407_W : R600Reg <"C407.W", 407>; +def C408_X : R600Reg <"C408.X", 408>; +def C408_Y : R600Reg <"C408.Y", 408>; +def C408_Z : R600Reg <"C408.Z", 408>; +def C408_W : R600Reg <"C408.W", 408>; +def C409_X : R600Reg <"C409.X", 409>; +def C409_Y : R600Reg <"C409.Y", 409>; +def C409_Z : R600Reg <"C409.Z", 409>; +def C409_W : R600Reg <"C409.W", 409>; +def C410_X : R600Reg <"C410.X", 410>; +def C410_Y : R600Reg <"C410.Y", 410>; +def C410_Z : R600Reg <"C410.Z", 410>; +def C410_W : R600Reg <"C410.W", 410>; +def C411_X : R600Reg <"C411.X", 411>; +def C411_Y : R600Reg <"C411.Y", 411>; +def C411_Z : R600Reg <"C411.Z", 411>; +def C411_W : R600Reg <"C411.W", 411>; +def C412_X : R600Reg <"C412.X", 412>; +def C412_Y : R600Reg <"C412.Y", 412>; +def C412_Z : R600Reg <"C412.Z", 412>; +def C412_W : R600Reg <"C412.W", 412>; +def C413_X : R600Reg <"C413.X", 413>; +def C413_Y : R600Reg <"C413.Y", 413>; +def C413_Z : R600Reg <"C413.Z", 413>; +def C413_W : R600Reg <"C413.W", 413>; +def C414_X : R600Reg <"C414.X", 414>; +def C414_Y : R600Reg <"C414.Y", 414>; +def C414_Z : R600Reg <"C414.Z", 414>; +def C414_W : R600Reg <"C414.W", 414>; +def C415_X : R600Reg <"C415.X", 415>; +def C415_Y : R600Reg <"C415.Y", 415>; +def C415_Z : R600Reg <"C415.Z", 415>; +def C415_W : R600Reg <"C415.W", 415>; +def C416_X : R600Reg <"C416.X", 416>; +def C416_Y : R600Reg <"C416.Y", 416>; +def C416_Z : R600Reg <"C416.Z", 416>; +def C416_W : R600Reg <"C416.W", 416>; +def C417_X : R600Reg <"C417.X", 417>; +def C417_Y : R600Reg <"C417.Y", 417>; +def C417_Z : R600Reg <"C417.Z", 417>; +def C417_W : R600Reg <"C417.W", 417>; +def C418_X : R600Reg <"C418.X", 418>; +def C418_Y : R600Reg <"C418.Y", 418>; +def C418_Z : R600Reg <"C418.Z", 418>; +def C418_W : R600Reg <"C418.W", 418>; +def C419_X : R600Reg <"C419.X", 419>; +def C419_Y : R600Reg <"C419.Y", 419>; +def C419_Z : R600Reg <"C419.Z", 419>; +def C419_W : R600Reg <"C419.W", 419>; +def C420_X : R600Reg <"C420.X", 420>; +def C420_Y : R600Reg <"C420.Y", 420>; +def C420_Z : R600Reg <"C420.Z", 420>; +def C420_W : R600Reg <"C420.W", 420>; +def C421_X : R600Reg <"C421.X", 421>; +def C421_Y : R600Reg <"C421.Y", 421>; +def C421_Z : R600Reg <"C421.Z", 421>; +def C421_W : R600Reg <"C421.W", 421>; +def C422_X : R600Reg <"C422.X", 422>; +def C422_Y : R600Reg <"C422.Y", 422>; +def C422_Z : R600Reg <"C422.Z", 422>; +def C422_W : R600Reg <"C422.W", 422>; +def C423_X : R600Reg <"C423.X", 423>; +def C423_Y : R600Reg <"C423.Y", 423>; +def C423_Z : R600Reg <"C423.Z", 423>; +def C423_W : R600Reg <"C423.W", 423>; +def C424_X : R600Reg <"C424.X", 424>; +def C424_Y : R600Reg <"C424.Y", 424>; +def C424_Z : R600Reg <"C424.Z", 424>; +def C424_W : R600Reg <"C424.W", 424>; +def C425_X : R600Reg <"C425.X", 425>; +def C425_Y : R600Reg <"C425.Y", 425>; +def C425_Z : R600Reg <"C425.Z", 425>; +def C425_W : R600Reg <"C425.W", 425>; +def C426_X : R600Reg <"C426.X", 426>; +def C426_Y : R600Reg <"C426.Y", 426>; +def C426_Z : R600Reg <"C426.Z", 426>; +def C426_W : R600Reg <"C426.W", 426>; +def C427_X : R600Reg <"C427.X", 427>; +def C427_Y : R600Reg <"C427.Y", 427>; +def C427_Z : R600Reg <"C427.Z", 427>; +def C427_W : R600Reg <"C427.W", 427>; +def C428_X : R600Reg <"C428.X", 428>; +def C428_Y : R600Reg <"C428.Y", 428>; +def C428_Z : R600Reg <"C428.Z", 428>; +def C428_W : R600Reg <"C428.W", 428>; +def C429_X : R600Reg <"C429.X", 429>; +def C429_Y : R600Reg <"C429.Y", 429>; +def C429_Z : R600Reg <"C429.Z", 429>; +def C429_W : R600Reg <"C429.W", 429>; +def C430_X : R600Reg <"C430.X", 430>; +def C430_Y : R600Reg <"C430.Y", 430>; +def C430_Z : R600Reg <"C430.Z", 430>; +def C430_W : R600Reg <"C430.W", 430>; +def C431_X : R600Reg <"C431.X", 431>; +def C431_Y : R600Reg <"C431.Y", 431>; +def C431_Z : R600Reg <"C431.Z", 431>; +def C431_W : R600Reg <"C431.W", 431>; +def C432_X : R600Reg <"C432.X", 432>; +def C432_Y : R600Reg <"C432.Y", 432>; +def C432_Z : R600Reg <"C432.Z", 432>; +def C432_W : R600Reg <"C432.W", 432>; +def C433_X : R600Reg <"C433.X", 433>; +def C433_Y : R600Reg <"C433.Y", 433>; +def C433_Z : R600Reg <"C433.Z", 433>; +def C433_W : R600Reg <"C433.W", 433>; +def C434_X : R600Reg <"C434.X", 434>; +def C434_Y : R600Reg <"C434.Y", 434>; +def C434_Z : R600Reg <"C434.Z", 434>; +def C434_W : R600Reg <"C434.W", 434>; +def C435_X : R600Reg <"C435.X", 435>; +def C435_Y : R600Reg <"C435.Y", 435>; +def C435_Z : R600Reg <"C435.Z", 435>; +def C435_W : R600Reg <"C435.W", 435>; +def C436_X : R600Reg <"C436.X", 436>; +def C436_Y : R600Reg <"C436.Y", 436>; +def C436_Z : R600Reg <"C436.Z", 436>; +def C436_W : R600Reg <"C436.W", 436>; +def C437_X : R600Reg <"C437.X", 437>; +def C437_Y : R600Reg <"C437.Y", 437>; +def C437_Z : R600Reg <"C437.Z", 437>; +def C437_W : R600Reg <"C437.W", 437>; +def C438_X : R600Reg <"C438.X", 438>; +def C438_Y : R600Reg <"C438.Y", 438>; +def C438_Z : R600Reg <"C438.Z", 438>; +def C438_W : R600Reg <"C438.W", 438>; +def C439_X : R600Reg <"C439.X", 439>; +def C439_Y : R600Reg <"C439.Y", 439>; +def C439_Z : R600Reg <"C439.Z", 439>; +def C439_W : R600Reg <"C439.W", 439>; +def C440_X : R600Reg <"C440.X", 440>; +def C440_Y : R600Reg <"C440.Y", 440>; +def C440_Z : R600Reg <"C440.Z", 440>; +def C440_W : R600Reg <"C440.W", 440>; +def C441_X : R600Reg <"C441.X", 441>; +def C441_Y : R600Reg <"C441.Y", 441>; +def C441_Z : R600Reg <"C441.Z", 441>; +def C441_W : R600Reg <"C441.W", 441>; +def C442_X : R600Reg <"C442.X", 442>; +def C442_Y : R600Reg <"C442.Y", 442>; +def C442_Z : R600Reg <"C442.Z", 442>; +def C442_W : R600Reg <"C442.W", 442>; +def C443_X : R600Reg <"C443.X", 443>; +def C443_Y : R600Reg <"C443.Y", 443>; +def C443_Z : R600Reg <"C443.Z", 443>; +def C443_W : R600Reg <"C443.W", 443>; +def C444_X : R600Reg <"C444.X", 444>; +def C444_Y : R600Reg <"C444.Y", 444>; +def C444_Z : R600Reg <"C444.Z", 444>; +def C444_W : R600Reg <"C444.W", 444>; +def C445_X : R600Reg <"C445.X", 445>; +def C445_Y : R600Reg <"C445.Y", 445>; +def C445_Z : R600Reg <"C445.Z", 445>; +def C445_W : R600Reg <"C445.W", 445>; +def C446_X : R600Reg <"C446.X", 446>; +def C446_Y : R600Reg <"C446.Y", 446>; +def C446_Z : R600Reg <"C446.Z", 446>; +def C446_W : R600Reg <"C446.W", 446>; +def C447_X : R600Reg <"C447.X", 447>; +def C447_Y : R600Reg <"C447.Y", 447>; +def C447_Z : R600Reg <"C447.Z", 447>; +def C447_W : R600Reg <"C447.W", 447>; +def C448_X : R600Reg <"C448.X", 448>; +def C448_Y : R600Reg <"C448.Y", 448>; +def C448_Z : R600Reg <"C448.Z", 448>; +def C448_W : R600Reg <"C448.W", 448>; +def C449_X : R600Reg <"C449.X", 449>; +def C449_Y : R600Reg <"C449.Y", 449>; +def C449_Z : R600Reg <"C449.Z", 449>; +def C449_W : R600Reg <"C449.W", 449>; +def C450_X : R600Reg <"C450.X", 450>; +def C450_Y : R600Reg <"C450.Y", 450>; +def C450_Z : R600Reg <"C450.Z", 450>; +def C450_W : R600Reg <"C450.W", 450>; +def C451_X : R600Reg <"C451.X", 451>; +def C451_Y : R600Reg <"C451.Y", 451>; +def C451_Z : R600Reg <"C451.Z", 451>; +def C451_W : R600Reg <"C451.W", 451>; +def C452_X : R600Reg <"C452.X", 452>; +def C452_Y : R600Reg <"C452.Y", 452>; +def C452_Z : R600Reg <"C452.Z", 452>; +def C452_W : R600Reg <"C452.W", 452>; +def C453_X : R600Reg <"C453.X", 453>; +def C453_Y : R600Reg <"C453.Y", 453>; +def C453_Z : R600Reg <"C453.Z", 453>; +def C453_W : R600Reg <"C453.W", 453>; +def C454_X : R600Reg <"C454.X", 454>; +def C454_Y : R600Reg <"C454.Y", 454>; +def C454_Z : R600Reg <"C454.Z", 454>; +def C454_W : R600Reg <"C454.W", 454>; +def C455_X : R600Reg <"C455.X", 455>; +def C455_Y : R600Reg <"C455.Y", 455>; +def C455_Z : R600Reg <"C455.Z", 455>; +def C455_W : R600Reg <"C455.W", 455>; +def C456_X : R600Reg <"C456.X", 456>; +def C456_Y : R600Reg <"C456.Y", 456>; +def C456_Z : R600Reg <"C456.Z", 456>; +def C456_W : R600Reg <"C456.W", 456>; +def C457_X : R600Reg <"C457.X", 457>; +def C457_Y : R600Reg <"C457.Y", 457>; +def C457_Z : R600Reg <"C457.Z", 457>; +def C457_W : R600Reg <"C457.W", 457>; +def C458_X : R600Reg <"C458.X", 458>; +def C458_Y : R600Reg <"C458.Y", 458>; +def C458_Z : R600Reg <"C458.Z", 458>; +def C458_W : R600Reg <"C458.W", 458>; +def C459_X : R600Reg <"C459.X", 459>; +def C459_Y : R600Reg <"C459.Y", 459>; +def C459_Z : R600Reg <"C459.Z", 459>; +def C459_W : R600Reg <"C459.W", 459>; +def C460_X : R600Reg <"C460.X", 460>; +def C460_Y : R600Reg <"C460.Y", 460>; +def C460_Z : R600Reg <"C460.Z", 460>; +def C460_W : R600Reg <"C460.W", 460>; +def C461_X : R600Reg <"C461.X", 461>; +def C461_Y : R600Reg <"C461.Y", 461>; +def C461_Z : R600Reg <"C461.Z", 461>; +def C461_W : R600Reg <"C461.W", 461>; +def C462_X : R600Reg <"C462.X", 462>; +def C462_Y : R600Reg <"C462.Y", 462>; +def C462_Z : R600Reg <"C462.Z", 462>; +def C462_W : R600Reg <"C462.W", 462>; +def C463_X : R600Reg <"C463.X", 463>; +def C463_Y : R600Reg <"C463.Y", 463>; +def C463_Z : R600Reg <"C463.Z", 463>; +def C463_W : R600Reg <"C463.W", 463>; +def C464_X : R600Reg <"C464.X", 464>; +def C464_Y : R600Reg <"C464.Y", 464>; +def C464_Z : R600Reg <"C464.Z", 464>; +def C464_W : R600Reg <"C464.W", 464>; +def C465_X : R600Reg <"C465.X", 465>; +def C465_Y : R600Reg <"C465.Y", 465>; +def C465_Z : R600Reg <"C465.Z", 465>; +def C465_W : R600Reg <"C465.W", 465>; +def C466_X : R600Reg <"C466.X", 466>; +def C466_Y : R600Reg <"C466.Y", 466>; +def C466_Z : R600Reg <"C466.Z", 466>; +def C466_W : R600Reg <"C466.W", 466>; +def C467_X : R600Reg <"C467.X", 467>; +def C467_Y : R600Reg <"C467.Y", 467>; +def C467_Z : R600Reg <"C467.Z", 467>; +def C467_W : R600Reg <"C467.W", 467>; +def C468_X : R600Reg <"C468.X", 468>; +def C468_Y : R600Reg <"C468.Y", 468>; +def C468_Z : R600Reg <"C468.Z", 468>; +def C468_W : R600Reg <"C468.W", 468>; +def C469_X : R600Reg <"C469.X", 469>; +def C469_Y : R600Reg <"C469.Y", 469>; +def C469_Z : R600Reg <"C469.Z", 469>; +def C469_W : R600Reg <"C469.W", 469>; +def C470_X : R600Reg <"C470.X", 470>; +def C470_Y : R600Reg <"C470.Y", 470>; +def C470_Z : R600Reg <"C470.Z", 470>; +def C470_W : R600Reg <"C470.W", 470>; +def C471_X : R600Reg <"C471.X", 471>; +def C471_Y : R600Reg <"C471.Y", 471>; +def C471_Z : R600Reg <"C471.Z", 471>; +def C471_W : R600Reg <"C471.W", 471>; +def C472_X : R600Reg <"C472.X", 472>; +def C472_Y : R600Reg <"C472.Y", 472>; +def C472_Z : R600Reg <"C472.Z", 472>; +def C472_W : R600Reg <"C472.W", 472>; +def C473_X : R600Reg <"C473.X", 473>; +def C473_Y : R600Reg <"C473.Y", 473>; +def C473_Z : R600Reg <"C473.Z", 473>; +def C473_W : R600Reg <"C473.W", 473>; +def C474_X : R600Reg <"C474.X", 474>; +def C474_Y : R600Reg <"C474.Y", 474>; +def C474_Z : R600Reg <"C474.Z", 474>; +def C474_W : R600Reg <"C474.W", 474>; +def C475_X : R600Reg <"C475.X", 475>; +def C475_Y : R600Reg <"C475.Y", 475>; +def C475_Z : R600Reg <"C475.Z", 475>; +def C475_W : R600Reg <"C475.W", 475>; +def C476_X : R600Reg <"C476.X", 476>; +def C476_Y : R600Reg <"C476.Y", 476>; +def C476_Z : R600Reg <"C476.Z", 476>; +def C476_W : R600Reg <"C476.W", 476>; +def C477_X : R600Reg <"C477.X", 477>; +def C477_Y : R600Reg <"C477.Y", 477>; +def C477_Z : R600Reg <"C477.Z", 477>; +def C477_W : R600Reg <"C477.W", 477>; +def C478_X : R600Reg <"C478.X", 478>; +def C478_Y : R600Reg <"C478.Y", 478>; +def C478_Z : R600Reg <"C478.Z", 478>; +def C478_W : R600Reg <"C478.W", 478>; +def C479_X : R600Reg <"C479.X", 479>; +def C479_Y : R600Reg <"C479.Y", 479>; +def C479_Z : R600Reg <"C479.Z", 479>; +def C479_W : R600Reg <"C479.W", 479>; +def C480_X : R600Reg <"C480.X", 480>; +def C480_Y : R600Reg <"C480.Y", 480>; +def C480_Z : R600Reg <"C480.Z", 480>; +def C480_W : R600Reg <"C480.W", 480>; +def C481_X : R600Reg <"C481.X", 481>; +def C481_Y : R600Reg <"C481.Y", 481>; +def C481_Z : R600Reg <"C481.Z", 481>; +def C481_W : R600Reg <"C481.W", 481>; +def C482_X : R600Reg <"C482.X", 482>; +def C482_Y : R600Reg <"C482.Y", 482>; +def C482_Z : R600Reg <"C482.Z", 482>; +def C482_W : R600Reg <"C482.W", 482>; +def C483_X : R600Reg <"C483.X", 483>; +def C483_Y : R600Reg <"C483.Y", 483>; +def C483_Z : R600Reg <"C483.Z", 483>; +def C483_W : R600Reg <"C483.W", 483>; +def C484_X : R600Reg <"C484.X", 484>; +def C484_Y : R600Reg <"C484.Y", 484>; +def C484_Z : R600Reg <"C484.Z", 484>; +def C484_W : R600Reg <"C484.W", 484>; +def C485_X : R600Reg <"C485.X", 485>; +def C485_Y : R600Reg <"C485.Y", 485>; +def C485_Z : R600Reg <"C485.Z", 485>; +def C485_W : R600Reg <"C485.W", 485>; +def C486_X : R600Reg <"C486.X", 486>; +def C486_Y : R600Reg <"C486.Y", 486>; +def C486_Z : R600Reg <"C486.Z", 486>; +def C486_W : R600Reg <"C486.W", 486>; +def C487_X : R600Reg <"C487.X", 487>; +def C487_Y : R600Reg <"C487.Y", 487>; +def C487_Z : R600Reg <"C487.Z", 487>; +def C487_W : R600Reg <"C487.W", 487>; +def C488_X : R600Reg <"C488.X", 488>; +def C488_Y : R600Reg <"C488.Y", 488>; +def C488_Z : R600Reg <"C488.Z", 488>; +def C488_W : R600Reg <"C488.W", 488>; +def C489_X : R600Reg <"C489.X", 489>; +def C489_Y : R600Reg <"C489.Y", 489>; +def C489_Z : R600Reg <"C489.Z", 489>; +def C489_W : R600Reg <"C489.W", 489>; +def C490_X : R600Reg <"C490.X", 490>; +def C490_Y : R600Reg <"C490.Y", 490>; +def C490_Z : R600Reg <"C490.Z", 490>; +def C490_W : R600Reg <"C490.W", 490>; +def C491_X : R600Reg <"C491.X", 491>; +def C491_Y : R600Reg <"C491.Y", 491>; +def C491_Z : R600Reg <"C491.Z", 491>; +def C491_W : R600Reg <"C491.W", 491>; +def C492_X : R600Reg <"C492.X", 492>; +def C492_Y : R600Reg <"C492.Y", 492>; +def C492_Z : R600Reg <"C492.Z", 492>; +def C492_W : R600Reg <"C492.W", 492>; +def C493_X : R600Reg <"C493.X", 493>; +def C493_Y : R600Reg <"C493.Y", 493>; +def C493_Z : R600Reg <"C493.Z", 493>; +def C493_W : R600Reg <"C493.W", 493>; +def C494_X : R600Reg <"C494.X", 494>; +def C494_Y : R600Reg <"C494.Y", 494>; +def C494_Z : R600Reg <"C494.Z", 494>; +def C494_W : R600Reg <"C494.W", 494>; +def C495_X : R600Reg <"C495.X", 495>; +def C495_Y : R600Reg <"C495.Y", 495>; +def C495_Z : R600Reg <"C495.Z", 495>; +def C495_W : R600Reg <"C495.W", 495>; +def C496_X : R600Reg <"C496.X", 496>; +def C496_Y : R600Reg <"C496.Y", 496>; +def C496_Z : R600Reg <"C496.Z", 496>; +def C496_W : R600Reg <"C496.W", 496>; +def C497_X : R600Reg <"C497.X", 497>; +def C497_Y : R600Reg <"C497.Y", 497>; +def C497_Z : R600Reg <"C497.Z", 497>; +def C497_W : R600Reg <"C497.W", 497>; +def C498_X : R600Reg <"C498.X", 498>; +def C498_Y : R600Reg <"C498.Y", 498>; +def C498_Z : R600Reg <"C498.Z", 498>; +def C498_W : R600Reg <"C498.W", 498>; +def C499_X : R600Reg <"C499.X", 499>; +def C499_Y : R600Reg <"C499.Y", 499>; +def C499_Z : R600Reg <"C499.Z", 499>; +def C499_W : R600Reg <"C499.W", 499>; +def C500_X : R600Reg <"C500.X", 500>; +def C500_Y : R600Reg <"C500.Y", 500>; +def C500_Z : R600Reg <"C500.Z", 500>; +def C500_W : R600Reg <"C500.W", 500>; +def C501_X : R600Reg <"C501.X", 501>; +def C501_Y : R600Reg <"C501.Y", 501>; +def C501_Z : R600Reg <"C501.Z", 501>; +def C501_W : R600Reg <"C501.W", 501>; +def C502_X : R600Reg <"C502.X", 502>; +def C502_Y : R600Reg <"C502.Y", 502>; +def C502_Z : R600Reg <"C502.Z", 502>; +def C502_W : R600Reg <"C502.W", 502>; +def C503_X : R600Reg <"C503.X", 503>; +def C503_Y : R600Reg <"C503.Y", 503>; +def C503_Z : R600Reg <"C503.Z", 503>; +def C503_W : R600Reg <"C503.W", 503>; +def C504_X : R600Reg <"C504.X", 504>; +def C504_Y : R600Reg <"C504.Y", 504>; +def C504_Z : R600Reg <"C504.Z", 504>; +def C504_W : R600Reg <"C504.W", 504>; +def C505_X : R600Reg <"C505.X", 505>; +def C505_Y : R600Reg <"C505.Y", 505>; +def C505_Z : R600Reg <"C505.Z", 505>; +def C505_W : R600Reg <"C505.W", 505>; +def C506_X : R600Reg <"C506.X", 506>; +def C506_Y : R600Reg <"C506.Y", 506>; +def C506_Z : R600Reg <"C506.Z", 506>; +def C506_W : R600Reg <"C506.W", 506>; +def C507_X : R600Reg <"C507.X", 507>; +def C507_Y : R600Reg <"C507.Y", 507>; +def C507_Z : R600Reg <"C507.Z", 507>; +def C507_W : R600Reg <"C507.W", 507>; +def C508_X : R600Reg <"C508.X", 508>; +def C508_Y : R600Reg <"C508.Y", 508>; +def C508_Z : R600Reg <"C508.Z", 508>; +def C508_W : R600Reg <"C508.W", 508>; +def C509_X : R600Reg <"C509.X", 509>; +def C509_Y : R600Reg <"C509.Y", 509>; +def C509_Z : R600Reg <"C509.Z", 509>; +def C509_W : R600Reg <"C509.W", 509>; +def C510_X : R600Reg <"C510.X", 510>; +def C510_Y : R600Reg <"C510.Y", 510>; +def C510_Z : R600Reg <"C510.Z", 510>; +def C510_W : R600Reg <"C510.W", 510>; +def C511_X : R600Reg <"C511.X", 511>; +def C511_Y : R600Reg <"C511.Y", 511>; +def C511_Z : R600Reg <"C511.Z", 511>; +def C511_W : R600Reg <"C511.W", 511>; +def C512_X : R600Reg <"C512.X", 512>; +def C512_Y : R600Reg <"C512.Y", 512>; +def C512_Z : R600Reg <"C512.Z", 512>; +def C512_W : R600Reg <"C512.W", 512>; +def C513_X : R600Reg <"C513.X", 513>; +def C513_Y : R600Reg <"C513.Y", 513>; +def C513_Z : R600Reg <"C513.Z", 513>; +def C513_W : R600Reg <"C513.W", 513>; +def C514_X : R600Reg <"C514.X", 514>; +def C514_Y : R600Reg <"C514.Y", 514>; +def C514_Z : R600Reg <"C514.Z", 514>; +def C514_W : R600Reg <"C514.W", 514>; +def C515_X : R600Reg <"C515.X", 515>; +def C515_Y : R600Reg <"C515.Y", 515>; +def C515_Z : R600Reg <"C515.Z", 515>; +def C515_W : R600Reg <"C515.W", 515>; +def C516_X : R600Reg <"C516.X", 516>; +def C516_Y : R600Reg <"C516.Y", 516>; +def C516_Z : R600Reg <"C516.Z", 516>; +def C516_W : R600Reg <"C516.W", 516>; +def C517_X : R600Reg <"C517.X", 517>; +def C517_Y : R600Reg <"C517.Y", 517>; +def C517_Z : R600Reg <"C517.Z", 517>; +def C517_W : R600Reg <"C517.W", 517>; +def C518_X : R600Reg <"C518.X", 518>; +def C518_Y : R600Reg <"C518.Y", 518>; +def C518_Z : R600Reg <"C518.Z", 518>; +def C518_W : R600Reg <"C518.W", 518>; +def C519_X : R600Reg <"C519.X", 519>; +def C519_Y : R600Reg <"C519.Y", 519>; +def C519_Z : R600Reg <"C519.Z", 519>; +def C519_W : R600Reg <"C519.W", 519>; +def C520_X : R600Reg <"C520.X", 520>; +def C520_Y : R600Reg <"C520.Y", 520>; +def C520_Z : R600Reg <"C520.Z", 520>; +def C520_W : R600Reg <"C520.W", 520>; +def C521_X : R600Reg <"C521.X", 521>; +def C521_Y : R600Reg <"C521.Y", 521>; +def C521_Z : R600Reg <"C521.Z", 521>; +def C521_W : R600Reg <"C521.W", 521>; +def C522_X : R600Reg <"C522.X", 522>; +def C522_Y : R600Reg <"C522.Y", 522>; +def C522_Z : R600Reg <"C522.Z", 522>; +def C522_W : R600Reg <"C522.W", 522>; +def C523_X : R600Reg <"C523.X", 523>; +def C523_Y : R600Reg <"C523.Y", 523>; +def C523_Z : R600Reg <"C523.Z", 523>; +def C523_W : R600Reg <"C523.W", 523>; +def C524_X : R600Reg <"C524.X", 524>; +def C524_Y : R600Reg <"C524.Y", 524>; +def C524_Z : R600Reg <"C524.Z", 524>; +def C524_W : R600Reg <"C524.W", 524>; +def C525_X : R600Reg <"C525.X", 525>; +def C525_Y : R600Reg <"C525.Y", 525>; +def C525_Z : R600Reg <"C525.Z", 525>; +def C525_W : R600Reg <"C525.W", 525>; +def C526_X : R600Reg <"C526.X", 526>; +def C526_Y : R600Reg <"C526.Y", 526>; +def C526_Z : R600Reg <"C526.Z", 526>; +def C526_W : R600Reg <"C526.W", 526>; +def C527_X : R600Reg <"C527.X", 527>; +def C527_Y : R600Reg <"C527.Y", 527>; +def C527_Z : R600Reg <"C527.Z", 527>; +def C527_W : R600Reg <"C527.W", 527>; +def C528_X : R600Reg <"C528.X", 528>; +def C528_Y : R600Reg <"C528.Y", 528>; +def C528_Z : R600Reg <"C528.Z", 528>; +def C528_W : R600Reg <"C528.W", 528>; +def C529_X : R600Reg <"C529.X", 529>; +def C529_Y : R600Reg <"C529.Y", 529>; +def C529_Z : R600Reg <"C529.Z", 529>; +def C529_W : R600Reg <"C529.W", 529>; +def C530_X : R600Reg <"C530.X", 530>; +def C530_Y : R600Reg <"C530.Y", 530>; +def C530_Z : R600Reg <"C530.Z", 530>; +def C530_W : R600Reg <"C530.W", 530>; +def C531_X : R600Reg <"C531.X", 531>; +def C531_Y : R600Reg <"C531.Y", 531>; +def C531_Z : R600Reg <"C531.Z", 531>; +def C531_W : R600Reg <"C531.W", 531>; +def C532_X : R600Reg <"C532.X", 532>; +def C532_Y : R600Reg <"C532.Y", 532>; +def C532_Z : R600Reg <"C532.Z", 532>; +def C532_W : R600Reg <"C532.W", 532>; +def C533_X : R600Reg <"C533.X", 533>; +def C533_Y : R600Reg <"C533.Y", 533>; +def C533_Z : R600Reg <"C533.Z", 533>; +def C533_W : R600Reg <"C533.W", 533>; +def C534_X : R600Reg <"C534.X", 534>; +def C534_Y : R600Reg <"C534.Y", 534>; +def C534_Z : R600Reg <"C534.Z", 534>; +def C534_W : R600Reg <"C534.W", 534>; +def C535_X : R600Reg <"C535.X", 535>; +def C535_Y : R600Reg <"C535.Y", 535>; +def C535_Z : R600Reg <"C535.Z", 535>; +def C535_W : R600Reg <"C535.W", 535>; +def C536_X : R600Reg <"C536.X", 536>; +def C536_Y : R600Reg <"C536.Y", 536>; +def C536_Z : R600Reg <"C536.Z", 536>; +def C536_W : R600Reg <"C536.W", 536>; +def C537_X : R600Reg <"C537.X", 537>; +def C537_Y : R600Reg <"C537.Y", 537>; +def C537_Z : R600Reg <"C537.Z", 537>; +def C537_W : R600Reg <"C537.W", 537>; +def C538_X : R600Reg <"C538.X", 538>; +def C538_Y : R600Reg <"C538.Y", 538>; +def C538_Z : R600Reg <"C538.Z", 538>; +def C538_W : R600Reg <"C538.W", 538>; +def C539_X : R600Reg <"C539.X", 539>; +def C539_Y : R600Reg <"C539.Y", 539>; +def C539_Z : R600Reg <"C539.Z", 539>; +def C539_W : R600Reg <"C539.W", 539>; +def C540_X : R600Reg <"C540.X", 540>; +def C540_Y : R600Reg <"C540.Y", 540>; +def C540_Z : R600Reg <"C540.Z", 540>; +def C540_W : R600Reg <"C540.W", 540>; +def C541_X : R600Reg <"C541.X", 541>; +def C541_Y : R600Reg <"C541.Y", 541>; +def C541_Z : R600Reg <"C541.Z", 541>; +def C541_W : R600Reg <"C541.W", 541>; +def C542_X : R600Reg <"C542.X", 542>; +def C542_Y : R600Reg <"C542.Y", 542>; +def C542_Z : R600Reg <"C542.Z", 542>; +def C542_W : R600Reg <"C542.W", 542>; +def C543_X : R600Reg <"C543.X", 543>; +def C543_Y : R600Reg <"C543.Y", 543>; +def C543_Z : R600Reg <"C543.Z", 543>; +def C543_W : R600Reg <"C543.W", 543>; +def C544_X : R600Reg <"C544.X", 544>; +def C544_Y : R600Reg <"C544.Y", 544>; +def C544_Z : R600Reg <"C544.Z", 544>; +def C544_W : R600Reg <"C544.W", 544>; +def C545_X : R600Reg <"C545.X", 545>; +def C545_Y : R600Reg <"C545.Y", 545>; +def C545_Z : R600Reg <"C545.Z", 545>; +def C545_W : R600Reg <"C545.W", 545>; +def C546_X : R600Reg <"C546.X", 546>; +def C546_Y : R600Reg <"C546.Y", 546>; +def C546_Z : R600Reg <"C546.Z", 546>; +def C546_W : R600Reg <"C546.W", 546>; +def C547_X : R600Reg <"C547.X", 547>; +def C547_Y : R600Reg <"C547.Y", 547>; +def C547_Z : R600Reg <"C547.Z", 547>; +def C547_W : R600Reg <"C547.W", 547>; +def C548_X : R600Reg <"C548.X", 548>; +def C548_Y : R600Reg <"C548.Y", 548>; +def C548_Z : R600Reg <"C548.Z", 548>; +def C548_W : R600Reg <"C548.W", 548>; +def C549_X : R600Reg <"C549.X", 549>; +def C549_Y : R600Reg <"C549.Y", 549>; +def C549_Z : R600Reg <"C549.Z", 549>; +def C549_W : R600Reg <"C549.W", 549>; +def C550_X : R600Reg <"C550.X", 550>; +def C550_Y : R600Reg <"C550.Y", 550>; +def C550_Z : R600Reg <"C550.Z", 550>; +def C550_W : R600Reg <"C550.W", 550>; +def C551_X : R600Reg <"C551.X", 551>; +def C551_Y : R600Reg <"C551.Y", 551>; +def C551_Z : R600Reg <"C551.Z", 551>; +def C551_W : R600Reg <"C551.W", 551>; +def C552_X : R600Reg <"C552.X", 552>; +def C552_Y : R600Reg <"C552.Y", 552>; +def C552_Z : R600Reg <"C552.Z", 552>; +def C552_W : R600Reg <"C552.W", 552>; +def C553_X : R600Reg <"C553.X", 553>; +def C553_Y : R600Reg <"C553.Y", 553>; +def C553_Z : R600Reg <"C553.Z", 553>; +def C553_W : R600Reg <"C553.W", 553>; +def C554_X : R600Reg <"C554.X", 554>; +def C554_Y : R600Reg <"C554.Y", 554>; +def C554_Z : R600Reg <"C554.Z", 554>; +def C554_W : R600Reg <"C554.W", 554>; +def C555_X : R600Reg <"C555.X", 555>; +def C555_Y : R600Reg <"C555.Y", 555>; +def C555_Z : R600Reg <"C555.Z", 555>; +def C555_W : R600Reg <"C555.W", 555>; +def C556_X : R600Reg <"C556.X", 556>; +def C556_Y : R600Reg <"C556.Y", 556>; +def C556_Z : R600Reg <"C556.Z", 556>; +def C556_W : R600Reg <"C556.W", 556>; +def C557_X : R600Reg <"C557.X", 557>; +def C557_Y : R600Reg <"C557.Y", 557>; +def C557_Z : R600Reg <"C557.Z", 557>; +def C557_W : R600Reg <"C557.W", 557>; +def C558_X : R600Reg <"C558.X", 558>; +def C558_Y : R600Reg <"C558.Y", 558>; +def C558_Z : R600Reg <"C558.Z", 558>; +def C558_W : R600Reg <"C558.W", 558>; +def C559_X : R600Reg <"C559.X", 559>; +def C559_Y : R600Reg <"C559.Y", 559>; +def C559_Z : R600Reg <"C559.Z", 559>; +def C559_W : R600Reg <"C559.W", 559>; +def C560_X : R600Reg <"C560.X", 560>; +def C560_Y : R600Reg <"C560.Y", 560>; +def C560_Z : R600Reg <"C560.Z", 560>; +def C560_W : R600Reg <"C560.W", 560>; +def C561_X : R600Reg <"C561.X", 561>; +def C561_Y : R600Reg <"C561.Y", 561>; +def C561_Z : R600Reg <"C561.Z", 561>; +def C561_W : R600Reg <"C561.W", 561>; +def C562_X : R600Reg <"C562.X", 562>; +def C562_Y : R600Reg <"C562.Y", 562>; +def C562_Z : R600Reg <"C562.Z", 562>; +def C562_W : R600Reg <"C562.W", 562>; +def C563_X : R600Reg <"C563.X", 563>; +def C563_Y : R600Reg <"C563.Y", 563>; +def C563_Z : R600Reg <"C563.Z", 563>; +def C563_W : R600Reg <"C563.W", 563>; +def C564_X : R600Reg <"C564.X", 564>; +def C564_Y : R600Reg <"C564.Y", 564>; +def C564_Z : R600Reg <"C564.Z", 564>; +def C564_W : R600Reg <"C564.W", 564>; +def C565_X : R600Reg <"C565.X", 565>; +def C565_Y : R600Reg <"C565.Y", 565>; +def C565_Z : R600Reg <"C565.Z", 565>; +def C565_W : R600Reg <"C565.W", 565>; +def C566_X : R600Reg <"C566.X", 566>; +def C566_Y : R600Reg <"C566.Y", 566>; +def C566_Z : R600Reg <"C566.Z", 566>; +def C566_W : R600Reg <"C566.W", 566>; +def C567_X : R600Reg <"C567.X", 567>; +def C567_Y : R600Reg <"C567.Y", 567>; +def C567_Z : R600Reg <"C567.Z", 567>; +def C567_W : R600Reg <"C567.W", 567>; +def C568_X : R600Reg <"C568.X", 568>; +def C568_Y : R600Reg <"C568.Y", 568>; +def C568_Z : R600Reg <"C568.Z", 568>; +def C568_W : R600Reg <"C568.W", 568>; +def C569_X : R600Reg <"C569.X", 569>; +def C569_Y : R600Reg <"C569.Y", 569>; +def C569_Z : R600Reg <"C569.Z", 569>; +def C569_W : R600Reg <"C569.W", 569>; +def C570_X : R600Reg <"C570.X", 570>; +def C570_Y : R600Reg <"C570.Y", 570>; +def C570_Z : R600Reg <"C570.Z", 570>; +def C570_W : R600Reg <"C570.W", 570>; +def C571_X : R600Reg <"C571.X", 571>; +def C571_Y : R600Reg <"C571.Y", 571>; +def C571_Z : R600Reg <"C571.Z", 571>; +def C571_W : R600Reg <"C571.W", 571>; +def C572_X : R600Reg <"C572.X", 572>; +def C572_Y : R600Reg <"C572.Y", 572>; +def C572_Z : R600Reg <"C572.Z", 572>; +def C572_W : R600Reg <"C572.W", 572>; +def C573_X : R600Reg <"C573.X", 573>; +def C573_Y : R600Reg <"C573.Y", 573>; +def C573_Z : R600Reg <"C573.Z", 573>; +def C573_W : R600Reg <"C573.W", 573>; +def C574_X : R600Reg <"C574.X", 574>; +def C574_Y : R600Reg <"C574.Y", 574>; +def C574_Z : R600Reg <"C574.Z", 574>; +def C574_W : R600Reg <"C574.W", 574>; +def C575_X : R600Reg <"C575.X", 575>; +def C575_Y : R600Reg <"C575.Y", 575>; +def C575_Z : R600Reg <"C575.Z", 575>; +def C575_W : R600Reg <"C575.W", 575>; +def C576_X : R600Reg <"C576.X", 576>; +def C576_Y : R600Reg <"C576.Y", 576>; +def C576_Z : R600Reg <"C576.Z", 576>; +def C576_W : R600Reg <"C576.W", 576>; +def C577_X : R600Reg <"C577.X", 577>; +def C577_Y : R600Reg <"C577.Y", 577>; +def C577_Z : R600Reg <"C577.Z", 577>; +def C577_W : R600Reg <"C577.W", 577>; +def C578_X : R600Reg <"C578.X", 578>; +def C578_Y : R600Reg <"C578.Y", 578>; +def C578_Z : R600Reg <"C578.Z", 578>; +def C578_W : R600Reg <"C578.W", 578>; +def C579_X : R600Reg <"C579.X", 579>; +def C579_Y : R600Reg <"C579.Y", 579>; +def C579_Z : R600Reg <"C579.Z", 579>; +def C579_W : R600Reg <"C579.W", 579>; +def C580_X : R600Reg <"C580.X", 580>; +def C580_Y : R600Reg <"C580.Y", 580>; +def C580_Z : R600Reg <"C580.Z", 580>; +def C580_W : R600Reg <"C580.W", 580>; +def C581_X : R600Reg <"C581.X", 581>; +def C581_Y : R600Reg <"C581.Y", 581>; +def C581_Z : R600Reg <"C581.Z", 581>; +def C581_W : R600Reg <"C581.W", 581>; +def C582_X : R600Reg <"C582.X", 582>; +def C582_Y : R600Reg <"C582.Y", 582>; +def C582_Z : R600Reg <"C582.Z", 582>; +def C582_W : R600Reg <"C582.W", 582>; +def C583_X : R600Reg <"C583.X", 583>; +def C583_Y : R600Reg <"C583.Y", 583>; +def C583_Z : R600Reg <"C583.Z", 583>; +def C583_W : R600Reg <"C583.W", 583>; +def C584_X : R600Reg <"C584.X", 584>; +def C584_Y : R600Reg <"C584.Y", 584>; +def C584_Z : R600Reg <"C584.Z", 584>; +def C584_W : R600Reg <"C584.W", 584>; +def C585_X : R600Reg <"C585.X", 585>; +def C585_Y : R600Reg <"C585.Y", 585>; +def C585_Z : R600Reg <"C585.Z", 585>; +def C585_W : R600Reg <"C585.W", 585>; +def C586_X : R600Reg <"C586.X", 586>; +def C586_Y : R600Reg <"C586.Y", 586>; +def C586_Z : R600Reg <"C586.Z", 586>; +def C586_W : R600Reg <"C586.W", 586>; +def C587_X : R600Reg <"C587.X", 587>; +def C587_Y : R600Reg <"C587.Y", 587>; +def C587_Z : R600Reg <"C587.Z", 587>; +def C587_W : R600Reg <"C587.W", 587>; +def C588_X : R600Reg <"C588.X", 588>; +def C588_Y : R600Reg <"C588.Y", 588>; +def C588_Z : R600Reg <"C588.Z", 588>; +def C588_W : R600Reg <"C588.W", 588>; +def C589_X : R600Reg <"C589.X", 589>; +def C589_Y : R600Reg <"C589.Y", 589>; +def C589_Z : R600Reg <"C589.Z", 589>; +def C589_W : R600Reg <"C589.W", 589>; +def C590_X : R600Reg <"C590.X", 590>; +def C590_Y : R600Reg <"C590.Y", 590>; +def C590_Z : R600Reg <"C590.Z", 590>; +def C590_W : R600Reg <"C590.W", 590>; +def C591_X : R600Reg <"C591.X", 591>; +def C591_Y : R600Reg <"C591.Y", 591>; +def C591_Z : R600Reg <"C591.Z", 591>; +def C591_W : R600Reg <"C591.W", 591>; +def C592_X : R600Reg <"C592.X", 592>; +def C592_Y : R600Reg <"C592.Y", 592>; +def C592_Z : R600Reg <"C592.Z", 592>; +def C592_W : R600Reg <"C592.W", 592>; +def C593_X : R600Reg <"C593.X", 593>; +def C593_Y : R600Reg <"C593.Y", 593>; +def C593_Z : R600Reg <"C593.Z", 593>; +def C593_W : R600Reg <"C593.W", 593>; +def C594_X : R600Reg <"C594.X", 594>; +def C594_Y : R600Reg <"C594.Y", 594>; +def C594_Z : R600Reg <"C594.Z", 594>; +def C594_W : R600Reg <"C594.W", 594>; +def C595_X : R600Reg <"C595.X", 595>; +def C595_Y : R600Reg <"C595.Y", 595>; +def C595_Z : R600Reg <"C595.Z", 595>; +def C595_W : R600Reg <"C595.W", 595>; +def C596_X : R600Reg <"C596.X", 596>; +def C596_Y : R600Reg <"C596.Y", 596>; +def C596_Z : R600Reg <"C596.Z", 596>; +def C596_W : R600Reg <"C596.W", 596>; +def C597_X : R600Reg <"C597.X", 597>; +def C597_Y : R600Reg <"C597.Y", 597>; +def C597_Z : R600Reg <"C597.Z", 597>; +def C597_W : R600Reg <"C597.W", 597>; +def C598_X : R600Reg <"C598.X", 598>; +def C598_Y : R600Re