]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - 6/sys/dev/drm/r300_cmdbuf.c
merge fix for boot-time hang on centos' xen
[FreeBSD/FreeBSD.git] / 6 / sys / dev / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "dev/drm/drmP.h"
38 #include "dev/drm/drm.h"
39 #include "dev/drm/radeon_drm.h"
40 #include "dev/drm/radeon_drv.h"
41 #include "dev/drm/r300_reg.h"
42
43 #define R300_SIMULTANEOUS_CLIPRECTS             4
44
45 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
46  */
47 static const int r300_cliprect_cntl[4] = {
48         0xAAAA,
49         0xEEEE,
50         0xFEFE,
51         0xFFFE
52 };
53
54 /**
55  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
56  * buffer, starting with index n.
57  */
58 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
59                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
60 {
61         drm_clip_rect_t box;
62         int nr;
63         int i;
64         RING_LOCALS;
65
66         nr = cmdbuf->nbox - n;
67         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
68                 nr = R300_SIMULTANEOUS_CLIPRECTS;
69
70         DRM_DEBUG("%i cliprects\n", nr);
71
72         if (nr) {
73                 BEGIN_RING(6 + nr * 2);
74                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
75
76                 for (i = 0; i < nr; ++i) {
77                         if (DRM_COPY_FROM_USER_UNCHECKED
78                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
79                                 DRM_ERROR("copy cliprect faulted\n");
80                                 return DRM_ERR(EFAULT);
81                         }
82
83                         box.x1 =
84                             (box.x1 +
85                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86                         box.y1 =
87                             (box.y1 +
88                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
89                         box.x2 =
90                             (box.x2 +
91                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
92                         box.y2 =
93                             (box.y2 +
94                              R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
95
96                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
97                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
98                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
99                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
100                 }
101
102                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
103
104                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
105                  * client might be able to trample over memory.
106                  * The impact should be very limited, but I'd rather be safe than
107                  * sorry.
108                  */
109                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
110                 OUT_RING(0);
111                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
112                 ADVANCE_RING();
113         } else {
114                 /* Why we allow zero cliprect rendering:
115                  * There are some commands in a command buffer that must be submitted
116                  * even when there are no cliprects, e.g. DMA buffer discard
117                  * or state setting (though state setting could be avoided by
118                  * simulating a loss of context).
119                  *
120                  * Now since the cmdbuf interface is so chaotic right now (and is
121                  * bound to remain that way for a bit until things settle down),
122                  * it is basically impossible to filter out the commands that are
123                  * necessary and those that aren't.
124                  *
125                  * So I choose the safe way and don't do any filtering at all;
126                  * instead, I simply set up the engine so that all rendering
127                  * can't produce any fragments.
128                  */
129                 BEGIN_RING(2);
130                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
131                 ADVANCE_RING();
132         }
133
134         return 0;
135 }
136
137 static u8 r300_reg_flags[0x10000 >> 2];
138
139 void r300_init_reg_flags(void)
140 {
141         int i;
142         memset(r300_reg_flags, 0, 0x10000 >> 2);
143 #define ADD_RANGE_MARK(reg, count,mark) \
144                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
145                         r300_reg_flags[i]|=(mark);
146
147 #define MARK_SAFE               1
148 #define MARK_CHECK_OFFSET       2
149
150 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
151
152         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
153         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
154         ADD_RANGE(0x2080, 1);
155         ADD_RANGE(R300_SE_VTE_CNTL, 2);
156         ADD_RANGE(0x2134, 2);
157         ADD_RANGE(0x2140, 1);
158         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
159         ADD_RANGE(0x21DC, 1);
160         ADD_RANGE(0x221C, 1);
161         ADD_RANGE(0x2220, 4);
162         ADD_RANGE(0x2288, 1);
163         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
164         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
165         ADD_RANGE(R300_GB_ENABLE, 1);
166         ADD_RANGE(R300_GB_MSPOS0, 5);
167         ADD_RANGE(R300_TX_CNTL, 1);
168         ADD_RANGE(R300_TX_ENABLE, 1);
169         ADD_RANGE(0x4200, 4);
170         ADD_RANGE(0x4214, 1);
171         ADD_RANGE(R300_RE_POINTSIZE, 1);
172         ADD_RANGE(0x4230, 3);
173         ADD_RANGE(R300_RE_LINE_CNT, 1);
174         ADD_RANGE(0x4238, 1);
175         ADD_RANGE(0x4260, 3);
176         ADD_RANGE(0x4274, 4);
177         ADD_RANGE(0x4288, 5);
178         ADD_RANGE(0x42A0, 1);
179         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
180         ADD_RANGE(0x42B4, 1);
181         ADD_RANGE(R300_RE_CULL_CNTL, 1);
182         ADD_RANGE(0x42C0, 2);
183         ADD_RANGE(R300_RS_CNTL_0, 2);
184         ADD_RANGE(R300_RS_INTERP_0, 8);
185         ADD_RANGE(R300_RS_ROUTE_0, 8);
186         ADD_RANGE(0x43A4, 2);
187         ADD_RANGE(0x43E8, 1);
188         ADD_RANGE(R300_PFS_CNTL_0, 3);
189         ADD_RANGE(R300_PFS_NODE_0, 4);
190         ADD_RANGE(R300_PFS_TEXI_0, 64);
191         ADD_RANGE(0x46A4, 5);
192         ADD_RANGE(R300_PFS_INSTR0_0, 64);
193         ADD_RANGE(R300_PFS_INSTR1_0, 64);
194         ADD_RANGE(R300_PFS_INSTR2_0, 64);
195         ADD_RANGE(R300_PFS_INSTR3_0, 64);
196         ADD_RANGE(0x4BC0, 1);
197         ADD_RANGE(0x4BC8, 3);
198         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
199         ADD_RANGE(0x4BD8, 1);
200         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
201         ADD_RANGE(0x4E00, 1);
202         ADD_RANGE(R300_RB3D_CBLEND, 2);
203         ADD_RANGE(R300_RB3D_COLORMASK, 1);
204         ADD_RANGE(0x4E10, 3);
205         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
206         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
207         ADD_RANGE(0x4E50, 9);
208         ADD_RANGE(0x4E88, 1);
209         ADD_RANGE(0x4EA0, 2);
210         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
211         ADD_RANGE(0x4F10, 4);
212         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
213         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
214         ADD_RANGE(0x4F28, 1);
215         ADD_RANGE(0x4F30, 2);
216         ADD_RANGE(0x4F44, 1);
217         ADD_RANGE(0x4F54, 1);
218
219         ADD_RANGE(R300_TX_FILTER_0, 16);
220         ADD_RANGE(R300_TX_FILTER1_0, 16);
221         ADD_RANGE(R300_TX_SIZE_0, 16);
222         ADD_RANGE(R300_TX_FORMAT_0, 16);
223         ADD_RANGE(R300_TX_PITCH_0, 16);
224         /* Texture offset is dangerous and needs more checking */
225         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
226         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
227         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
228
229         /* Sporadic registers used as primitives are emitted */
230         ADD_RANGE(0x4f18, 1);
231         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
232         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
233         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
234
235 }
236
237 static __inline__ int r300_check_range(unsigned reg, int count)
238 {
239         int i;
240         if (reg & ~0xffff)
241                 return -1;
242         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
243                 if (r300_reg_flags[i] != MARK_SAFE)
244                         return 1;
245         return 0;
246 }
247
248 /*
249  * we expect offsets passed to the framebuffer to be either within video 
250  * memory or within AGP space 
251  */
252 static __inline__ int r300_check_offset(drm_radeon_private_t *dev_priv,
253                                         u32 offset)
254 {
255         /* we realy want to check against end of video aperture
256            but this value is not being kept.
257            This code is correct for now (does the same thing as the
258            code that sets MC_FB_LOCATION) in radeon_cp.c */
259         if (offset >= dev_priv->fb_location &&
260             offset < (dev_priv->fb_location + dev_priv->fb_size))
261                 return 0;
262         if (offset >= dev_priv->gart_vm_start &&
263             offset < (dev_priv->gart_vm_start + dev_priv->gart_size))
264                 return 0;
265         return 1;
266 }
267
268 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
269                                                           dev_priv,
270                                                           drm_radeon_kcmd_buffer_t
271                                                           * cmdbuf,
272                                                           drm_r300_cmd_header_t
273                                                           header)
274 {
275         int reg;
276         int sz;
277         int i;
278         int values[64];
279         RING_LOCALS;
280
281         sz = header.packet0.count;
282         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
283
284         if ((sz > 64) || (sz < 0)) {
285                 DRM_ERROR
286                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
287                      reg, sz);
288                 return DRM_ERR(EINVAL);
289         }
290         for (i = 0; i < sz; i++) {
291                 values[i] = ((int *)cmdbuf->buf)[i];
292                 switch (r300_reg_flags[(reg >> 2) + i]) {
293                 case MARK_SAFE:
294                         break;
295                 case MARK_CHECK_OFFSET:
296                         if (r300_check_offset(dev_priv, (u32) values[i])) {
297                                 DRM_ERROR
298                                     ("Offset failed range check (reg=%04x sz=%d)\n",
299                                      reg, sz);
300                                 return DRM_ERR(EINVAL);
301                         }
302                         break;
303                 default:
304                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
305                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
306                         return DRM_ERR(EINVAL);
307                 }
308         }
309
310         BEGIN_RING(1 + sz);
311         OUT_RING(CP_PACKET0(reg, sz - 1));
312         OUT_RING_TABLE(values, sz);
313         ADVANCE_RING();
314
315         cmdbuf->buf += sz * 4;
316         cmdbuf->bufsz -= sz * 4;
317
318         return 0;
319 }
320
321 /**
322  * Emits a packet0 setting arbitrary registers.
323  * Called by r300_do_cp_cmdbuf.
324  *
325  * Note that checks are performed on contents and addresses of the registers
326  */
327 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
328                                         drm_radeon_kcmd_buffer_t *cmdbuf,
329                                         drm_r300_cmd_header_t header)
330 {
331         int reg;
332         int sz;
333         RING_LOCALS;
334
335         sz = header.packet0.count;
336         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
337
338         if (!sz)
339                 return 0;
340
341         if (sz * 4 > cmdbuf->bufsz)
342                 return DRM_ERR(EINVAL);
343
344         if (reg + sz * 4 >= 0x10000) {
345                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
346                           sz);
347                 return DRM_ERR(EINVAL);
348         }
349
350         if (r300_check_range(reg, sz)) {
351                 /* go and check everything */
352                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
353                                                            header);
354         }
355         /* the rest of the data is safe to emit, whatever the values the user passed */
356
357         BEGIN_RING(1 + sz);
358         OUT_RING(CP_PACKET0(reg, sz - 1));
359         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
360         ADVANCE_RING();
361
362         cmdbuf->buf += sz * 4;
363         cmdbuf->bufsz -= sz * 4;
364
365         return 0;
366 }
367
368 /**
369  * Uploads user-supplied vertex program instructions or parameters onto
370  * the graphics card.
371  * Called by r300_do_cp_cmdbuf.
372  */
373 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
374                                     drm_radeon_kcmd_buffer_t *cmdbuf,
375                                     drm_r300_cmd_header_t header)
376 {
377         int sz;
378         int addr;
379         RING_LOCALS;
380
381         sz = header.vpu.count;
382         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
383
384         if (!sz)
385                 return 0;
386         if (sz * 16 > cmdbuf->bufsz)
387                 return DRM_ERR(EINVAL);
388
389         BEGIN_RING(5 + sz * 4);
390         /* Wait for VAP to come to senses.. */
391         /* there is no need to emit it multiple times, (only once before VAP is programmed,
392            but this optimization is for later */
393         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
394         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
395         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
396         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
397
398         ADVANCE_RING();
399
400         cmdbuf->buf += sz * 16;
401         cmdbuf->bufsz -= sz * 16;
402
403         return 0;
404 }
405
406 /**
407  * Emit a clear packet from userspace.
408  * Called by r300_emit_packet3.
409  */
410 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
411                                       drm_radeon_kcmd_buffer_t *cmdbuf)
412 {
413         RING_LOCALS;
414
415         if (8 * 4 > cmdbuf->bufsz)
416                 return DRM_ERR(EINVAL);
417
418         BEGIN_RING(10);
419         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
420         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
421                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
422         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
423         ADVANCE_RING();
424
425         cmdbuf->buf += 8 * 4;
426         cmdbuf->bufsz -= 8 * 4;
427
428         return 0;
429 }
430
431 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
432                                                drm_radeon_kcmd_buffer_t *cmdbuf,
433                                                u32 header)
434 {
435         int count, i, k;
436 #define MAX_ARRAY_PACKET  64
437         u32 payload[MAX_ARRAY_PACKET];
438         u32 narrays;
439         RING_LOCALS;
440
441         count = (header >> 16) & 0x3fff;
442
443         if ((count + 1) > MAX_ARRAY_PACKET) {
444                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
445                           count);
446                 return DRM_ERR(EINVAL);
447         }
448         memset(payload, 0, MAX_ARRAY_PACKET * 4);
449         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
450
451         /* carefully check packet contents */
452
453         narrays = payload[0];
454         k = 0;
455         i = 1;
456         while ((k < narrays) && (i < (count + 1))) {
457                 i++;            /* skip attribute field */
458                 if (r300_check_offset(dev_priv, payload[i])) {
459                         DRM_ERROR
460                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
461                              k, i);
462                         return DRM_ERR(EINVAL);
463                 }
464                 k++;
465                 i++;
466                 if (k == narrays)
467                         break;
468                 /* have one more to process, they come in pairs */
469                 if (r300_check_offset(dev_priv, payload[i])) {
470                         DRM_ERROR
471                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
472                              k, i);
473                         return DRM_ERR(EINVAL);
474                 }
475                 k++;
476                 i++;
477         }
478         /* do the counts match what we expect ? */
479         if ((k != narrays) || (i != (count + 1))) {
480                 DRM_ERROR
481                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
482                      k, i, narrays, count + 1);
483                 return DRM_ERR(EINVAL);
484         }
485
486         /* all clear, output packet */
487
488         BEGIN_RING(count + 2);
489         OUT_RING(header);
490         OUT_RING_TABLE(payload, count + 1);
491         ADVANCE_RING();
492
493         cmdbuf->buf += (count + 2) * 4;
494         cmdbuf->bufsz -= (count + 2) * 4;
495
496         return 0;
497 }
498
499 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
500                                              drm_radeon_kcmd_buffer_t *cmdbuf)
501 {
502         u32 *cmd = (u32 *) cmdbuf->buf;
503         int count, ret;
504         RING_LOCALS;
505
506         count=(cmd[0]>>16) & 0x3fff;
507
508         if (cmd[0] & 0x8000) {
509                 u32 offset;
510
511                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL 
512                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
513                         offset = cmd[2] << 10;
514                         ret = r300_check_offset(dev_priv, offset);
515                         if (ret) {
516                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
517                                 return DRM_ERR(EINVAL);
518                         }
519                 }
520
521                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
522                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
523                         offset = cmd[3] << 10;
524                         ret = r300_check_offset(dev_priv, offset);
525                         if (ret) {
526                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
527                                 return DRM_ERR(EINVAL);
528                         }
529                         
530                 }
531         }
532
533         BEGIN_RING(count+2);
534         OUT_RING(cmd[0]);
535         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
536         ADVANCE_RING();
537
538         cmdbuf->buf += (count+2)*4;
539         cmdbuf->bufsz -= (count+2)*4;
540
541         return 0;
542 }
543
544 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
545                                             drm_radeon_kcmd_buffer_t *cmdbuf)
546 {
547         u32 header;
548         int count;
549         RING_LOCALS;
550
551         if (4 > cmdbuf->bufsz)
552                 return DRM_ERR(EINVAL);
553
554         /* Fixme !! This simply emits a packet without much checking.
555            We need to be smarter. */
556
557         /* obtain first word - actual packet3 header */
558         header = *(u32 *) cmdbuf->buf;
559
560         /* Is it packet 3 ? */
561         if ((header >> 30) != 0x3) {
562                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
563                 return DRM_ERR(EINVAL);
564         }
565
566         count = (header >> 16) & 0x3fff;
567
568         /* Check again now that we know how much data to expect */
569         if ((count + 2) * 4 > cmdbuf->bufsz) {
570                 DRM_ERROR
571                     ("Expected packet3 of length %d but have only %d bytes left\n",
572                      (count + 2) * 4, cmdbuf->bufsz);
573                 return DRM_ERR(EINVAL);
574         }
575
576         /* Is it a packet type we know about ? */
577         switch (header & 0xff00) {
578         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
579                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
580
581         case RADEON_CNTL_BITBLT_MULTI:
582                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
583
584         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
585         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
586         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
587         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
588         case RADEON_WAIT_FOR_IDLE:
589         case RADEON_CP_NOP:
590                 /* these packets are safe */
591                 break;
592         default:
593                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
594                 return DRM_ERR(EINVAL);
595         }
596
597         BEGIN_RING(count + 2);
598         OUT_RING(header);
599         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
600         ADVANCE_RING();
601
602         cmdbuf->buf += (count + 2) * 4;
603         cmdbuf->bufsz -= (count + 2) * 4;
604
605         return 0;
606 }
607
608 /**
609  * Emit a rendering packet3 from userspace.
610  * Called by r300_do_cp_cmdbuf.
611  */
612 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
613                                         drm_radeon_kcmd_buffer_t *cmdbuf,
614                                         drm_r300_cmd_header_t header)
615 {
616         int n;
617         int ret;
618         char *orig_buf = cmdbuf->buf;
619         int orig_bufsz = cmdbuf->bufsz;
620
621         /* This is a do-while-loop so that we run the interior at least once,
622          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
623          */
624         n = 0;
625         do {
626                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
627                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
628                         if (ret)
629                                 return ret;
630
631                         cmdbuf->buf = orig_buf;
632                         cmdbuf->bufsz = orig_bufsz;
633                 }
634
635                 switch (header.packet3.packet) {
636                 case R300_CMD_PACKET3_CLEAR:
637                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
638                         ret = r300_emit_clear(dev_priv, cmdbuf);
639                         if (ret) {
640                                 DRM_ERROR("r300_emit_clear failed\n");
641                                 return ret;
642                         }
643                         break;
644
645                 case R300_CMD_PACKET3_RAW:
646                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
647                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
648                         if (ret) {
649                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
650                                 return ret;
651                         }
652                         break;
653
654                 default:
655                         DRM_ERROR("bad packet3 type %i at %p\n",
656                                   header.packet3.packet,
657                                   cmdbuf->buf - sizeof(header));
658                         return DRM_ERR(EINVAL);
659                 }
660
661                 n += R300_SIMULTANEOUS_CLIPRECTS;
662         } while (n < cmdbuf->nbox);
663
664         return 0;
665 }
666
667 /* Some of the R300 chips seem to be extremely touchy about the two registers
668  * that are configured in r300_pacify.
669  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
670  * sends a command buffer that contains only state setting commands and a
671  * vertex program/parameter upload sequence, this will eventually lead to a
672  * lockup, unless the sequence is bracketed by calls to r300_pacify.
673  * So we should take great care to *always* call r300_pacify before
674  * *anything* 3D related, and again afterwards. This is what the
675  * call bracket in r300_do_cp_cmdbuf is for.
676  */
677
678 /**
679  * Emit the sequence to pacify R300.
680  */
681 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
682 {
683         RING_LOCALS;
684
685         BEGIN_RING(6);
686         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
687         OUT_RING(0xa);
688         OUT_RING(CP_PACKET0(0x4f18, 0));
689         OUT_RING(0x3);
690         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
691         OUT_RING(0x0);
692         ADVANCE_RING();
693 }
694
695 /**
696  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
697  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
698  * be careful about how this function is called.
699  */
700 static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
701 {
702         drm_radeon_private_t *dev_priv = dev->dev_private;
703         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
704
705         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
706         buf->pending = 1;
707         buf->used = 0;
708 }
709
710 static int r300_scratch(drm_radeon_private_t *dev_priv,
711                         drm_radeon_kcmd_buffer_t *cmdbuf,
712                         drm_r300_cmd_header_t header)
713 {
714         u32 *ref_age_base;
715         u32 i, buf_idx, h_pending;
716         RING_LOCALS;
717         
718         if (cmdbuf->bufsz < sizeof(uint64_t) + header.scratch.n_bufs * sizeof(buf_idx) ) {
719                 return DRM_ERR(EINVAL);
720         }
721         
722         if (header.scratch.reg >= 5) {
723                 return DRM_ERR(EINVAL);
724         }
725         
726         dev_priv->scratch_ages[header.scratch.reg] ++;
727         
728         ref_age_base = (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
729         
730         cmdbuf->buf += sizeof(uint64_t);
731         cmdbuf->bufsz -= sizeof(uint64_t);
732         
733         for (i=0; i < header.scratch.n_bufs; i++) {
734                 buf_idx = *(u32 *)cmdbuf->buf;
735                 buf_idx *= 2; /* 8 bytes per buf */
736                 
737                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
738                         return DRM_ERR(EINVAL);
739                 }
740                                         
741                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
742                         return DRM_ERR(EINVAL);
743                 }
744                                         
745                 if (h_pending == 0) {
746                         return DRM_ERR(EINVAL);
747                 }
748                                         
749                 h_pending--;
750                                                 
751                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
752                         return DRM_ERR(EINVAL);
753                 }
754                                         
755                 cmdbuf->buf += sizeof(buf_idx);
756                 cmdbuf->bufsz -= sizeof(buf_idx);
757         }
758         
759         BEGIN_RING(2);
760         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
761         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
762         ADVANCE_RING();
763         
764         return 0;
765 }
766
767 /**
768  * Parses and validates a user-supplied command buffer and emits appropriate
769  * commands on the DMA ring buffer.
770  * Called by the ioctl handler function radeon_cp_cmdbuf.
771  */
772 int r300_do_cp_cmdbuf(drm_device_t *dev,
773                       DRMFILE filp,
774                       drm_file_t *filp_priv,
775                       drm_radeon_kcmd_buffer_t *cmdbuf)
776 {
777         drm_radeon_private_t *dev_priv = dev->dev_private;
778         drm_device_dma_t *dma = dev->dma;
779         drm_buf_t *buf = NULL;
780         int emit_dispatch_age = 0;
781         int ret = 0;
782
783         DRM_DEBUG("\n");
784
785         /* See the comment above r300_emit_begin3d for why this call must be here,
786          * and what the cleanup gotos are for. */
787         r300_pacify(dev_priv);
788
789         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
790                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
791                 if (ret)
792                         goto cleanup;
793         }
794
795         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
796                 int idx;
797                 drm_r300_cmd_header_t header;
798
799                 header.u = *(unsigned int *)cmdbuf->buf;
800
801                 cmdbuf->buf += sizeof(header);
802                 cmdbuf->bufsz -= sizeof(header);
803
804                 switch (header.header.cmd_type) {
805                 case R300_CMD_PACKET0:
806                         DRM_DEBUG("R300_CMD_PACKET0\n");
807                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
808                         if (ret) {
809                                 DRM_ERROR("r300_emit_packet0 failed\n");
810                                 goto cleanup;
811                         }
812                         break;
813
814                 case R300_CMD_VPU:
815                         DRM_DEBUG("R300_CMD_VPU\n");
816                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
817                         if (ret) {
818                                 DRM_ERROR("r300_emit_vpu failed\n");
819                                 goto cleanup;
820                         }
821                         break;
822
823                 case R300_CMD_PACKET3:
824                         DRM_DEBUG("R300_CMD_PACKET3\n");
825                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
826                         if (ret) {
827                                 DRM_ERROR("r300_emit_packet3 failed\n");
828                                 goto cleanup;
829                         }
830                         break;
831
832                 case R300_CMD_END3D:
833                         DRM_DEBUG("R300_CMD_END3D\n");
834                         /* TODO:
835                            Ideally userspace driver should not need to issue this call,
836                            i.e. the drm driver should issue it automatically and prevent
837                            lockups.
838
839                            In practice, we do not understand why this call is needed and what
840                            it does (except for some vague guesses that it has to do with cache
841                            coherence) and so the user space driver does it.
842
843                            Once we are sure which uses prevent lockups the code could be moved
844                            into the kernel and the userspace driver will not
845                            need to use this command.
846
847                            Note that issuing this command does not hurt anything
848                            except, possibly, performance */
849                         r300_pacify(dev_priv);
850                         break;
851
852                 case R300_CMD_CP_DELAY:
853                         /* simple enough, we can do it here */
854                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
855                         {
856                                 int i;
857                                 RING_LOCALS;
858
859                                 BEGIN_RING(header.delay.count);
860                                 for (i = 0; i < header.delay.count; i++)
861                                         OUT_RING(RADEON_CP_PACKET2);
862                                 ADVANCE_RING();
863                         }
864                         break;
865
866                 case R300_CMD_DMA_DISCARD:
867                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
868                         idx = header.dma.buf_idx;
869                         if (idx < 0 || idx >= dma->buf_count) {
870                                 DRM_ERROR("buffer index %d (of %d max)\n",
871                                           idx, dma->buf_count - 1);
872                                 ret = DRM_ERR(EINVAL);
873                                 goto cleanup;
874                         }
875
876                         buf = dma->buflist[idx];
877                         if (buf->filp != filp || buf->pending) {
878                                 DRM_ERROR("bad buffer %p %p %d\n",
879                                           buf->filp, filp, buf->pending);
880                                 ret = DRM_ERR(EINVAL);
881                                 goto cleanup;
882                         }
883
884                         emit_dispatch_age = 1;
885                         r300_discard_buffer(dev, buf);
886                         break;
887
888                 case R300_CMD_WAIT:
889                         /* simple enough, we can do it here */
890                         DRM_DEBUG("R300_CMD_WAIT\n");
891                         if (header.wait.flags == 0)
892                                 break;  /* nothing to do */
893
894                         {
895                                 RING_LOCALS;
896
897                                 BEGIN_RING(2);
898                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
899                                 OUT_RING((header.wait.flags & 0xf) << 14);
900                                 ADVANCE_RING();
901                         }
902                         break;
903
904                 case R300_CMD_SCRATCH:
905                         DRM_DEBUG("R300_CMD_SCRATCH\n");
906                         ret = r300_scratch(dev_priv, cmdbuf, header);
907                         if (ret) {
908                                 DRM_ERROR("r300_scratch failed\n");
909                                 goto cleanup;
910                         }
911                         break;
912                         
913                 default:
914                         DRM_ERROR("bad cmd_type %i at %p\n",
915                                   header.header.cmd_type,
916                                   cmdbuf->buf - sizeof(header));
917                         ret = DRM_ERR(EINVAL);
918                         goto cleanup;
919                 }
920         }
921
922         DRM_DEBUG("END\n");
923
924       cleanup:
925         r300_pacify(dev_priv);
926
927         /* We emit the vertex buffer age here, outside the pacifier "brackets"
928          * for two reasons:
929          *  (1) This may coalesce multiple age emissions into a single one and
930          *  (2) more importantly, some chips lock up hard when scratch registers
931          *      are written inside the pacifier bracket.
932          */
933         if (emit_dispatch_age) {
934                 RING_LOCALS;
935
936                 /* Emit the vertex buffer age */
937                 BEGIN_RING(2);
938                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
939                 ADVANCE_RING();
940         }
941
942         COMMIT_RING();
943
944         return ret;
945 }