]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/drm/r300_cmdbuf.c
Use compile-time detection of 64-bit addressing.
[FreeBSD/FreeBSD.git] / sys / dev / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "dev/drm/drmP.h"
38 #include "dev/drm/drm.h"
39 #include "dev/drm/radeon_drm.h"
40 #include "dev/drm/radeon_drv.h"
41 #include "dev/drm/r300_reg.h"
42
43
44 #define R300_SIMULTANEOUS_CLIPRECTS             4
45
46 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
47  */
48 static const int r300_cliprect_cntl[4] = {
49         0xAAAA,
50         0xEEEE,
51         0xFEFE,
52         0xFFFE
53 };
54
55
56 /**
57  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
58  * buffer, starting with index n.
59  */
60 static int r300_emit_cliprects(drm_radeon_private_t* dev_priv,
61                                drm_radeon_cmd_buffer_t* cmdbuf,
62                                int n)
63 {
64         drm_clip_rect_t box;
65         int nr;
66         int i;
67         RING_LOCALS;
68
69         nr = cmdbuf->nbox - n;
70         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
71                 nr = R300_SIMULTANEOUS_CLIPRECTS;
72
73         DRM_DEBUG("%i cliprects\n", nr);
74
75         if (nr) {
76                 BEGIN_RING(6 + nr*2);
77                 OUT_RING( CP_PACKET0( R300_RE_CLIPRECT_TL_0, nr*2 - 1 ) );
78
79                 for(i = 0; i < nr; ++i) {
80                         if (DRM_COPY_FROM_USER_UNCHECKED(&box, &cmdbuf->boxes[n+i], sizeof(box))) {
81                                 DRM_ERROR("copy cliprect faulted\n");
82                                 return DRM_ERR(EFAULT);
83                         }
84
85                         box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
86                         box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
87                         box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
88                         box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
89
90                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
91                                         (box.y1 << R300_CLIPRECT_Y_SHIFT));
92                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
93                                         (box.y2 << R300_CLIPRECT_Y_SHIFT));
94                 }
95
96                 OUT_RING_REG( R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr-1] );
97
98                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
99                 * client might be able to trample over memory.
100                 * The impact should be very limited, but I'd rather be safe than
101                 * sorry.
102                 */
103                 OUT_RING( CP_PACKET0( R300_RE_SCISSORS_TL, 1 ) );
104                 OUT_RING( 0 );
105                 OUT_RING( R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK );
106                 ADVANCE_RING();
107                 } else {
108                 /* Why we allow zero cliprect rendering:
109                  * There are some commands in a command buffer that must be submitted
110                  * even when there are no cliprects, e.g. DMA buffer discard
111                  * or state setting (though state setting could be avoided by
112                  * simulating a loss of context).
113                  *
114                  * Now since the cmdbuf interface is so chaotic right now (and is
115                  * bound to remain that way for a bit until things settle down),
116                  * it is basically impossible to filter out the commands that are
117                  * necessary and those that aren't.
118                  *
119                  * So I choose the safe way and don't do any filtering at all;
120                  * instead, I simply set up the engine so that all rendering
121                  * can't produce any fragments.
122                  */
123                 BEGIN_RING(2);
124                 OUT_RING_REG( R300_RE_CLIPRECT_CNTL, 0 );
125                 ADVANCE_RING();
126                 }
127
128         return 0;
129 }
130
131 u8  r300_reg_flags[0x10000>>2];
132
133
134 void r300_init_reg_flags(void)
135 {
136         int i;
137         memset(r300_reg_flags, 0, 0x10000>>2);
138         #define ADD_RANGE_MARK(reg, count,mark) \
139                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
140                         r300_reg_flags[i]|=(mark);
141         
142         #define MARK_SAFE               1
143         #define MARK_CHECK_OFFSET       2
144         
145         #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
146
147         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
148         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
149         ADD_RANGE(0x2080, 1);
150         ADD_RANGE(R300_SE_VTE_CNTL, 2);
151         ADD_RANGE(0x2134, 2);
152         ADD_RANGE(0x2140, 1);
153         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
154         ADD_RANGE(0x21DC, 1);
155         ADD_RANGE(0x221C, 1);
156         ADD_RANGE(0x2220, 4);
157         ADD_RANGE(0x2288, 1);
158         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
159         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
160         ADD_RANGE(R300_GB_ENABLE, 1);
161         ADD_RANGE(R300_GB_MSPOS0, 5);
162         ADD_RANGE(R300_TX_ENABLE, 1);
163         ADD_RANGE(0x4200, 4);
164         ADD_RANGE(0x4214, 1);
165         ADD_RANGE(R300_RE_POINTSIZE, 1);
166         ADD_RANGE(0x4230, 3);
167         ADD_RANGE(R300_RE_LINE_CNT, 1);
168         ADD_RANGE(0x4238, 1);
169         ADD_RANGE(0x4260, 3);
170         ADD_RANGE(0x4274, 4);
171         ADD_RANGE(0x4288, 5);
172         ADD_RANGE(0x42A0, 1);
173         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
174         ADD_RANGE(0x42B4, 1);
175         ADD_RANGE(R300_RE_CULL_CNTL, 1);
176         ADD_RANGE(0x42C0, 2);
177         ADD_RANGE(R300_RS_CNTL_0, 2);
178         ADD_RANGE(R300_RS_INTERP_0, 8);
179         ADD_RANGE(R300_RS_ROUTE_0, 8);
180         ADD_RANGE(0x43A4, 2);
181         ADD_RANGE(0x43E8, 1);
182         ADD_RANGE(R300_PFS_CNTL_0, 3);
183         ADD_RANGE(R300_PFS_NODE_0, 4);
184         ADD_RANGE(R300_PFS_TEXI_0, 64);
185         ADD_RANGE(0x46A4, 5);
186         ADD_RANGE(R300_PFS_INSTR0_0, 64);
187         ADD_RANGE(R300_PFS_INSTR1_0, 64);
188         ADD_RANGE(R300_PFS_INSTR2_0, 64);
189         ADD_RANGE(R300_PFS_INSTR3_0, 64);
190         ADD_RANGE(0x4BC0, 1);
191         ADD_RANGE(0x4BC8, 3);
192         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
193         ADD_RANGE(0x4BD8, 1);
194         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
195         ADD_RANGE(0x4E00, 1);
196         ADD_RANGE(R300_RB3D_CBLEND, 2);
197         ADD_RANGE(R300_RB3D_COLORMASK, 1);
198         ADD_RANGE(0x4E10, 3);
199         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
200         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
201         ADD_RANGE(0x4E50, 9);
202         ADD_RANGE(0x4E88, 1);
203         ADD_RANGE(0x4EA0, 2);
204         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
205         ADD_RANGE(0x4F10, 4);
206         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
207         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1); 
208         ADD_RANGE(0x4F28, 1);
209         ADD_RANGE(0x4F30, 2);
210         ADD_RANGE(0x4F44, 1);
211         ADD_RANGE(0x4F54, 1);
212
213         ADD_RANGE(R300_TX_FILTER_0, 16);
214         ADD_RANGE(R300_TX_UNK1_0, 16);
215         ADD_RANGE(R300_TX_SIZE_0, 16);
216         ADD_RANGE(R300_TX_FORMAT_0, 16);
217                 /* Texture offset is dangerous and needs more checking */
218         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
219         ADD_RANGE(R300_TX_UNK4_0, 16);
220         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
221
222         /* Sporadic registers used as primitives are emitted */
223         ADD_RANGE(0x4f18, 1);
224         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
225         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
226         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
227
228 }
229
230 static __inline__ int r300_check_range(unsigned  reg, int count)
231 {
232         int i;
233         if(reg & ~0xffff)return -1;
234         for(i=(reg>>2);i<(reg>>2)+count;i++)
235                 if(r300_reg_flags[i]!=MARK_SAFE)return 1;
236         return 0;
237 }
238
239   /* we expect offsets passed to the framebuffer to be either within video memory or
240       within AGP space */
241 static __inline__ int r300_check_offset(drm_radeon_private_t* dev_priv, u32 offset)
242 {
243         /* we realy want to check against end of video aperture
244                 but this value is not being kept. 
245                 This code is correct for now (does the same thing as the
246                 code that sets MC_FB_LOCATION) in radeon_cp.c */
247         if((offset>=dev_priv->fb_location) && 
248                 (offset<dev_priv->gart_vm_start))return 0;
249         if((offset>=dev_priv->gart_vm_start) &&
250                  (offset<dev_priv->gart_vm_start+dev_priv->gart_size))return 0;
251         return 1;
252 }
253
254 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t* dev_priv,
255                                                 drm_radeon_cmd_buffer_t* cmdbuf,
256                                                 drm_r300_cmd_header_t header)
257 {
258         int reg;
259         int sz;
260         int i;
261         int values[64];
262         RING_LOCALS;
263
264         sz = header.packet0.count;
265         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
266         
267         if((sz>64)||(sz<0)){
268                 DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", reg, sz);
269                 return DRM_ERR(EINVAL);
270                 }
271         for(i=0;i<sz;i++){
272                 values[i]=((int __user*)cmdbuf->buf)[i];
273                 switch(r300_reg_flags[(reg>>2)+i]){
274                 case MARK_SAFE:
275                         break;
276                 case MARK_CHECK_OFFSET:
277                         if(r300_check_offset(dev_priv, (u32)values[i])){
278                                 DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n", reg, sz);
279                                 return DRM_ERR(EINVAL);
280                                 }
281                         break;
282                 default:
283                         DRM_ERROR("Register %04x failed check as flag=%02x\n", reg+i*4, r300_reg_flags[(reg>>2)+i]);
284                         return DRM_ERR(EINVAL);
285                         }
286                 }
287                 
288         BEGIN_RING(1+sz);
289         OUT_RING( CP_PACKET0( reg, sz-1 ) );
290         OUT_RING_TABLE( values, sz );
291         ADVANCE_RING();
292
293         cmdbuf->buf += sz*4;
294         cmdbuf->bufsz -= sz*4;
295
296         return 0;
297 }
298
299 /**
300  * Emits a packet0 setting arbitrary registers.
301  * Called by r300_do_cp_cmdbuf.
302  *
303  * Note that checks are performed on contents and addresses of the registers
304  */
305 static __inline__ int r300_emit_packet0(drm_radeon_private_t* dev_priv,
306                                                 drm_radeon_cmd_buffer_t* cmdbuf,
307                                                 drm_r300_cmd_header_t header)
308 {
309         int reg;
310         int sz;
311         RING_LOCALS;
312
313         sz = header.packet0.count;
314         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
315
316         if (!sz)
317                 return 0;
318
319         if (sz*4 > cmdbuf->bufsz)
320                 return DRM_ERR(EINVAL);
321                 
322         if (reg+sz*4 >= 0x10000){
323                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, sz);
324                 return DRM_ERR(EINVAL);
325                 }
326
327         if(r300_check_range(reg, sz)){
328                 /* go and check everything */
329                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, header);
330                 }
331         /* the rest of the data is safe to emit, whatever the values the user passed */
332
333         BEGIN_RING(1+sz);
334         OUT_RING( CP_PACKET0( reg, sz-1 ) );
335         OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz );
336         ADVANCE_RING();
337
338         cmdbuf->buf += sz*4;
339         cmdbuf->bufsz -= sz*4;
340
341         return 0;
342 }
343
344
345 /**
346  * Uploads user-supplied vertex program instructions or parameters onto
347  * the graphics card.
348  * Called by r300_do_cp_cmdbuf.
349  */
350 static __inline__ int r300_emit_vpu(drm_radeon_private_t* dev_priv,
351                                     drm_radeon_cmd_buffer_t* cmdbuf,
352                                     drm_r300_cmd_header_t header)
353 {
354         int sz;
355         int addr;
356         RING_LOCALS;
357
358         sz = header.vpu.count;
359         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
360
361         if (!sz)
362                 return 0;
363         if (sz*16 > cmdbuf->bufsz)
364                 return DRM_ERR(EINVAL);
365
366         BEGIN_RING(5+sz*4);
367         /* Wait for VAP to come to senses.. */
368         /* there is no need to emit it multiple times, (only once before VAP is programmed,
369            but this optimization is for later */
370         OUT_RING_REG( R300_VAP_PVS_WAITIDLE, 0 );
371         OUT_RING_REG( R300_VAP_PVS_UPLOAD_ADDRESS, addr );
372         OUT_RING( CP_PACKET0_TABLE( R300_VAP_PVS_UPLOAD_DATA, sz*4 - 1 ) );
373         OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz*4 );
374
375         ADVANCE_RING();
376
377         cmdbuf->buf += sz*16;
378         cmdbuf->bufsz -= sz*16;
379
380         return 0;
381 }
382
383
384 /**
385  * Emit a clear packet from userspace.
386  * Called by r300_emit_packet3.
387  */
388 static __inline__ int r300_emit_clear(drm_radeon_private_t* dev_priv,
389                                       drm_radeon_cmd_buffer_t* cmdbuf)
390 {
391         RING_LOCALS;
392
393         if (8*4 > cmdbuf->bufsz)
394                 return DRM_ERR(EINVAL);
395
396         BEGIN_RING(10);
397         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 8 ) );
398         OUT_RING( R300_PRIM_TYPE_POINT|R300_PRIM_WALK_RING|
399                   (1<<R300_PRIM_NUM_VERTICES_SHIFT) );
400         OUT_RING_TABLE( (int __user*)cmdbuf->buf, 8 );
401         ADVANCE_RING();
402
403         cmdbuf->buf += 8*4;
404         cmdbuf->bufsz -= 8*4;
405
406         return 0;
407 }
408
409 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t* dev_priv,
410                                       drm_radeon_cmd_buffer_t* cmdbuf,
411                                       u32 header)
412 {
413         int count, i,k;
414         #define MAX_ARRAY_PACKET  64
415         u32 payload[MAX_ARRAY_PACKET];
416         u32 narrays;
417         RING_LOCALS;
418
419         count=(header>>16) & 0x3fff;
420         
421         if((count+1)>MAX_ARRAY_PACKET){
422                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count);
423                 return DRM_ERR(EINVAL);
424                 }
425         memset(payload, 0, MAX_ARRAY_PACKET*4);
426         memcpy(payload, cmdbuf->buf+4, (count+1)*4);    
427         
428         /* carefully check packet contents */
429         
430         narrays=payload[0];
431         k=0;
432         i=1;
433         while((k<narrays) && (i<(count+1))){
434                 i++; /* skip attribute field */
435                 if(r300_check_offset(dev_priv, payload[i])){
436                         DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
437                         return DRM_ERR(EINVAL);
438                         }
439                 k++;
440                 i++;
441                 if(k==narrays)break;
442                 /* have one more to process, they come in pairs */
443                 if(r300_check_offset(dev_priv, payload[i])){
444                         DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
445                         return DRM_ERR(EINVAL);
446                         }
447                 k++;
448                 i++;                    
449                 }
450         /* do the counts match what we expect ? */
451         if((k!=narrays) || (i!=(count+1))){
452                 DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", k, i, narrays, count+1);
453                 return DRM_ERR(EINVAL);
454                 }
455
456         /* all clear, output packet */
457
458         BEGIN_RING(count+2);
459         OUT_RING(header);
460         OUT_RING_TABLE(payload, count+1);
461         ADVANCE_RING();
462
463         cmdbuf->buf += (count+2)*4;
464         cmdbuf->bufsz -= (count+2)*4;
465
466         return 0;
467 }
468
469 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t* dev_priv,
470                                       drm_radeon_cmd_buffer_t* cmdbuf)
471 {
472         u32 header;
473         int count;
474         RING_LOCALS;
475
476         if (4 > cmdbuf->bufsz)
477                 return DRM_ERR(EINVAL);
478
479         /* Fixme !! This simply emits a packet without much checking.
480            We need to be smarter. */
481
482         /* obtain first word - actual packet3 header */
483         header = *(u32 __user*)cmdbuf->buf;
484
485         /* Is it packet 3 ? */
486         if( (header>>30)!=0x3 ) {
487                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
488                 return DRM_ERR(EINVAL);
489                 }
490
491         count=(header>>16) & 0x3fff;
492
493         /* Check again now that we know how much data to expect */
494         if ((count+2)*4 > cmdbuf->bufsz){
495                 DRM_ERROR("Expected packet3 of length %d but have only %d bytes left\n",
496                         (count+2)*4, cmdbuf->bufsz);
497                 return DRM_ERR(EINVAL);
498                 }
499
500         /* Is it a packet type we know about ? */
501         switch(header & 0xff00){
502         case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
503                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
504
505         case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
506         case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
507         case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
508         case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
509         case RADEON_WAIT_FOR_IDLE:
510         case RADEON_CP_NOP:
511                 /* these packets are safe */
512                 break;
513         default:
514                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
515                 return DRM_ERR(EINVAL);
516                 }
517
518
519         BEGIN_RING(count+2);
520         OUT_RING(header);
521         OUT_RING_TABLE( (int __user*)(cmdbuf->buf+4), count+1);
522         ADVANCE_RING();
523
524         cmdbuf->buf += (count+2)*4;
525         cmdbuf->bufsz -= (count+2)*4;
526
527         return 0;
528 }
529
530
531 /**
532  * Emit a rendering packet3 from userspace.
533  * Called by r300_do_cp_cmdbuf.
534  */
535 static __inline__ int r300_emit_packet3(drm_radeon_private_t* dev_priv,
536                                         drm_radeon_cmd_buffer_t* cmdbuf,
537                                         drm_r300_cmd_header_t header)
538 {
539         int n;
540         int ret;
541         char __user* orig_buf = cmdbuf->buf;
542         int orig_bufsz = cmdbuf->bufsz;
543
544         /* This is a do-while-loop so that we run the interior at least once,
545          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
546          */
547         n = 0;
548         do {
549                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
550                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
551                         if (ret)
552                                 return ret;
553
554                         cmdbuf->buf = orig_buf;
555                         cmdbuf->bufsz = orig_bufsz;
556                         }
557
558                 switch(header.packet3.packet) {
559                 case R300_CMD_PACKET3_CLEAR:
560                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
561                         ret = r300_emit_clear(dev_priv, cmdbuf);
562                         if (ret) {
563                                 DRM_ERROR("r300_emit_clear failed\n");
564                                 return ret;
565                                 }
566                         break;
567
568                 case R300_CMD_PACKET3_RAW:
569                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
570                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
571                         if (ret) {
572                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
573                                 return ret;
574                                 }
575                         break;
576
577                 default:
578                         DRM_ERROR("bad packet3 type %i at %p\n",
579                                 header.packet3.packet,
580                                 cmdbuf->buf - sizeof(header));
581                         return DRM_ERR(EINVAL);
582                         }
583
584                 n += R300_SIMULTANEOUS_CLIPRECTS;
585         } while(n < cmdbuf->nbox);
586
587         return 0;
588 }
589
590 /* Some of the R300 chips seem to be extremely touchy about the two registers
591  * that are configured in r300_pacify.
592  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
593  * sends a command buffer that contains only state setting commands and a
594  * vertex program/parameter upload sequence, this will eventually lead to a
595  * lockup, unless the sequence is bracketed by calls to r300_pacify.
596  * So we should take great care to *always* call r300_pacify before
597  * *anything* 3D related, and again afterwards. This is what the
598  * call bracket in r300_do_cp_cmdbuf is for.
599  */
600
601 /**
602  * Emit the sequence to pacify R300.
603  */
604 static __inline__ void r300_pacify(drm_radeon_private_t* dev_priv)
605 {
606         RING_LOCALS;
607
608         BEGIN_RING(6);
609         OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) );
610         OUT_RING( 0xa );
611         OUT_RING( CP_PACKET0( 0x4f18, 0 ) );
612         OUT_RING( 0x3 );
613         OUT_RING( CP_PACKET3( RADEON_CP_NOP, 0 ) );
614         OUT_RING( 0x0 );
615         ADVANCE_RING();
616 }
617
618
619 /**
620  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
621  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
622  * be careful about how this function is called.
623  */
624 static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
625 {
626         drm_radeon_private_t *dev_priv = dev->dev_private;
627         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
628
629         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
630         buf->pending = 1;
631         buf->used = 0;
632 }
633
634
635 /**
636  * Parses and validates a user-supplied command buffer and emits appropriate
637  * commands on the DMA ring buffer.
638  * Called by the ioctl handler function radeon_cp_cmdbuf.
639  */
640 int r300_do_cp_cmdbuf(drm_device_t* dev,
641                           DRMFILE filp,
642                       drm_file_t* filp_priv,
643                       drm_radeon_cmd_buffer_t* cmdbuf)
644 {
645         drm_radeon_private_t *dev_priv = dev->dev_private;
646         drm_device_dma_t *dma = dev->dma;
647         drm_buf_t *buf = NULL;
648         int emit_dispatch_age = 0;
649         int ret = 0;
650
651         DRM_DEBUG("\n");
652
653         /* See the comment above r300_emit_begin3d for why this call must be here,
654          * and what the cleanup gotos are for. */
655         r300_pacify(dev_priv);
656
657         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
658                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
659                 if (ret)
660                         goto cleanup;
661                 }
662
663         while(cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
664                 int idx;
665                 drm_r300_cmd_header_t header;
666
667                 header.u = *(unsigned int *)cmdbuf->buf;
668
669                 cmdbuf->buf += sizeof(header);
670                 cmdbuf->bufsz -= sizeof(header);
671
672                 switch(header.header.cmd_type) {
673                 case R300_CMD_PACKET0: 
674                         DRM_DEBUG("R300_CMD_PACKET0\n");
675                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
676                         if (ret) {
677                                 DRM_ERROR("r300_emit_packet0 failed\n");
678                                 goto cleanup;
679                                 }
680                         break;
681
682                 case R300_CMD_VPU:
683                         DRM_DEBUG("R300_CMD_VPU\n");
684                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
685                         if (ret) {
686                                 DRM_ERROR("r300_emit_vpu failed\n");
687                                 goto cleanup;
688                                 }
689                         break;
690
691                 case R300_CMD_PACKET3:
692                         DRM_DEBUG("R300_CMD_PACKET3\n");
693                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
694                         if (ret) {
695                                 DRM_ERROR("r300_emit_packet3 failed\n");
696                                 goto cleanup;
697                                 }
698                         break;
699
700                 case R300_CMD_END3D:
701                         DRM_DEBUG("R300_CMD_END3D\n");
702                         /* TODO: 
703                                 Ideally userspace driver should not need to issue this call, 
704                                 i.e. the drm driver should issue it automatically and prevent
705                                 lockups.
706                                 
707                                 In practice, we do not understand why this call is needed and what
708                                 it does (except for some vague guesses that it has to do with cache
709                                 coherence) and so the user space driver does it. 
710                                 
711                                 Once we are sure which uses prevent lockups the code could be moved
712                                 into the kernel and the userspace driver will not
713                                 need to use this command.
714
715                                 Note that issuing this command does not hurt anything
716                                 except, possibly, performance */
717                         r300_pacify(dev_priv);
718                         break;
719
720                 case R300_CMD_CP_DELAY:
721                         /* simple enough, we can do it here */
722                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
723                         {
724                                 int i;
725                                 RING_LOCALS;
726
727                                 BEGIN_RING(header.delay.count);
728                                 for(i=0;i<header.delay.count;i++)
729                                         OUT_RING(RADEON_CP_PACKET2);
730                                 ADVANCE_RING();
731                         }
732                         break;
733
734                 case R300_CMD_DMA_DISCARD:
735                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
736                         idx = header.dma.buf_idx;
737                         if (idx < 0 || idx >= dma->buf_count) {
738                                 DRM_ERROR("buffer index %d (of %d max)\n",
739                                         idx, dma->buf_count - 1);
740                                 ret = DRM_ERR(EINVAL);
741                                 goto cleanup;
742                                 }
743
744                         buf = dma->buflist[idx];
745                         if (buf->filp != filp || buf->pending) {
746                                 DRM_ERROR("bad buffer %p %p %d\n",
747                                 buf->filp, filp, buf->pending);
748                                 ret = DRM_ERR(EINVAL);
749                                 goto cleanup;
750                                 }
751
752                         emit_dispatch_age = 1;
753                         r300_discard_buffer(dev, buf);
754                         break;
755
756                 case R300_CMD_WAIT:
757                         /* simple enough, we can do it here */
758                         DRM_DEBUG("R300_CMD_WAIT\n");
759                         if(header.wait.flags==0)break; /* nothing to do */
760
761                         {
762                                 RING_LOCALS;
763
764                                 BEGIN_RING(2);
765                                 OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) );
766                                 OUT_RING( (header.wait.flags & 0xf)<<14 );
767                                 ADVANCE_RING();
768                         }
769                         break;
770
771                 default:
772                         DRM_ERROR("bad cmd_type %i at %p\n",
773                                   header.header.cmd_type,
774                                   cmdbuf->buf - sizeof(header));
775                         ret = DRM_ERR(EINVAL);
776                         goto cleanup;
777                         }
778         }
779
780         DRM_DEBUG("END\n");
781
782 cleanup:
783         r300_pacify(dev_priv);
784
785         /* We emit the vertex buffer age here, outside the pacifier "brackets"
786          * for two reasons:
787          *  (1) This may coalesce multiple age emissions into a single one and
788          *  (2) more importantly, some chips lock up hard when scratch registers
789          *      are written inside the pacifier bracket.
790          */
791         if (emit_dispatch_age) {
792                 RING_LOCALS;
793
794                 /* Emit the vertex buffer age */
795                 BEGIN_RING(2);
796                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
797                 ADVANCE_RING();
798                 }
799
800         COMMIT_RING();
801
802         return ret;
803 }
804