1 //===----------------------Hexagon builtin routine ------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // An optimized version of a memcpy which is equivalent to the following loop:
12 // volatile unsigned *dest;
15 // for (i = 0; i < num_words; ++i)
18 // The corresponding C prototype for this function would be
19 // void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest,
20 // const unsigned *src,
21 // unsigned num_words);
23 // *** Both dest and src must be aligned to 32-bit boundaries. ***
24 // The code does not perform any runtime checks for this, and will fail
25 // in bad ways if this requirement is not met.
27 // The "forward" in the name refers to the fact that the function copies
28 // the words going forward in memory. It is incorrect to use this function
29 // for cases where the original code copied words in any other order.
31 // *** This function is only for the use by the compiler. ***
32 // The only indended use is for the LLVM compiler to generate calls to
33 // this function, when a mem-copy loop, like the one above, is detected.
42 .globl hexagon_memcpy_forward_vp4cp4n2
44 .type hexagon_memcpy_forward_vp4cp4n2,@function
45 hexagon_memcpy_forward_vp4cp4n2:
47 // Compute r3 to be the number of words remaining in the current page.
48 // At the same time, compute r4 to be the number of 32-byte blocks
49 // remaining in the page (for prefetch).
55 // The word count before end-of-page is in the 12 lowest bits of r3.
56 // (If the address in r1 was already page-aligned, the bits are 0.)
57 r3 = extractu(r3, #10, #2)
58 r4 = extractu(r3, #7, #5)
65 r4 = or(r4, ##2105344) // 2105344 = 0x202000
67 if (p0.new) jump:nt .Lskipprolog
72 r2 = sub(r2, r3) // r2 = number of words left after the prolog.
82 // Let r3 = number of whole pages left (page = 1024 words).
84 if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
88 r2 = extractu(r2, #10, #0) // r2 = r2 & 1023
89 r3 = ##2105472 // r3 = 0x202080 (prefetch info)
91 // Iterate over pages.
94 // Prefetch each individual page.
106 r3 = ##2105344 // r3 = 0x202000 (prefetch info)
107 r4 = lsr(r2, #3) // r4 = number of 32-byte blocks remaining.
109 if (p0.new) jumpr:nt r31
120 memw(r0++#4) = r4.new
125 .size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2