]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/dev/drm/radeon_state.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / dev / drm / radeon_state.c
1 /*-
2  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Gareth Hughes <gareth@valinux.com>
26  *    Kevin E. Martin <martin@valinux.com>
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include "dev/drm/drmP.h"
33 #include "dev/drm/drm.h"
34 #include "dev/drm/drm_sarea.h"
35 #include "dev/drm/radeon_drm.h"
36 #include "dev/drm/radeon_drv.h"
37
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41
42 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
43                                                     dev_priv,
44                                                     struct drm_file *file_priv,
45                                                     u32 *offset)
46 {
47         u64 off = *offset;
48         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
49         struct drm_radeon_driver_file_fields *radeon_priv;
50
51         /* Hrm ... the story of the offset ... So this function converts
52          * the various ideas of what userland clients might have for an
53          * offset in the card address space into an offset into the card
54          * address space :) So with a sane client, it should just keep
55          * the value intact and just do some boundary checking. However,
56          * not all clients are sane. Some older clients pass us 0 based
57          * offsets relative to the start of the framebuffer and some may
58          * assume the AGP aperture it appended to the framebuffer, so we
59          * try to detect those cases and fix them up.
60          *
61          * Note: It might be a good idea here to make sure the offset lands
62          * in some "allowed" area to protect things like the PCIE GART...
63          */
64
65         /* First, the best case, the offset already lands in either the
66          * framebuffer or the GART mapped space
67          */
68         if (radeon_check_offset(dev_priv, off))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = file_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > fb_end)
82                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
83
84         /* Now recheck and fail if out of bounds */
85         if (radeon_check_offset(dev_priv, off)) {
86                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
87                 *offset = off;
88                 return 0;
89         }
90         return -EINVAL;
91 }
92
93 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
94                                                      dev_priv,
95                                                      struct drm_file *file_priv,
96                                                      int id, u32 *data)
97 {
98         switch (id) {
99
100         case RADEON_EMIT_PP_MISC:
101                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
102                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
103                         DRM_ERROR("Invalid depth buffer offset\n");
104                         return -EINVAL;
105                 }
106                 break;
107
108         case RADEON_EMIT_PP_CNTL:
109                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
110                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
111                         DRM_ERROR("Invalid colour buffer offset\n");
112                         return -EINVAL;
113                 }
114                 break;
115
116         case R200_EMIT_PP_TXOFFSET_0:
117         case R200_EMIT_PP_TXOFFSET_1:
118         case R200_EMIT_PP_TXOFFSET_2:
119         case R200_EMIT_PP_TXOFFSET_3:
120         case R200_EMIT_PP_TXOFFSET_4:
121         case R200_EMIT_PP_TXOFFSET_5:
122                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
123                                                   &data[0])) {
124                         DRM_ERROR("Invalid R200 texture offset\n");
125                         return -EINVAL;
126                 }
127                 break;
128
129         case RADEON_EMIT_PP_TXFILTER_0:
130         case RADEON_EMIT_PP_TXFILTER_1:
131         case RADEON_EMIT_PP_TXFILTER_2:
132                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
133                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
134                         DRM_ERROR("Invalid R100 texture offset\n");
135                         return -EINVAL;
136                 }
137                 break;
138
139         case R200_EMIT_PP_CUBIC_OFFSETS_0:
140         case R200_EMIT_PP_CUBIC_OFFSETS_1:
141         case R200_EMIT_PP_CUBIC_OFFSETS_2:
142         case R200_EMIT_PP_CUBIC_OFFSETS_3:
143         case R200_EMIT_PP_CUBIC_OFFSETS_4:
144         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
145                         int i;
146                         for (i = 0; i < 5; i++) {
147                                 if (radeon_check_and_fixup_offset(dev_priv,
148                                                                   file_priv,
149                                                                   &data[i])) {
150                                         DRM_ERROR
151                                             ("Invalid R200 cubic texture offset\n");
152                                         return -EINVAL;
153                                 }
154                         }
155                         break;
156                 }
157
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
159         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
160         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
161                         int i;
162                         for (i = 0; i < 5; i++) {
163                                 if (radeon_check_and_fixup_offset(dev_priv,
164                                                                   file_priv,
165                                                                   &data[i])) {
166                                         DRM_ERROR
167                                             ("Invalid R100 cubic texture offset\n");
168                                         return -EINVAL;
169                                 }
170                         }
171                 }
172                 break;
173
174         case R200_EMIT_VAP_CTL: {
175                         RING_LOCALS;
176                         BEGIN_RING(2);
177                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
178                         ADVANCE_RING();
179                 }
180                 break;
181
182         case RADEON_EMIT_RB3D_COLORPITCH:
183         case RADEON_EMIT_RE_LINE_PATTERN:
184         case RADEON_EMIT_SE_LINE_WIDTH:
185         case RADEON_EMIT_PP_LUM_MATRIX:
186         case RADEON_EMIT_PP_ROT_MATRIX_0:
187         case RADEON_EMIT_RB3D_STENCILREFMASK:
188         case RADEON_EMIT_SE_VPORT_XSCALE:
189         case RADEON_EMIT_SE_CNTL:
190         case RADEON_EMIT_SE_CNTL_STATUS:
191         case RADEON_EMIT_RE_MISC:
192         case RADEON_EMIT_PP_BORDER_COLOR_0:
193         case RADEON_EMIT_PP_BORDER_COLOR_1:
194         case RADEON_EMIT_PP_BORDER_COLOR_2:
195         case RADEON_EMIT_SE_ZBIAS_FACTOR:
196         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
197         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
198         case R200_EMIT_PP_TXCBLEND_0:
199         case R200_EMIT_PP_TXCBLEND_1:
200         case R200_EMIT_PP_TXCBLEND_2:
201         case R200_EMIT_PP_TXCBLEND_3:
202         case R200_EMIT_PP_TXCBLEND_4:
203         case R200_EMIT_PP_TXCBLEND_5:
204         case R200_EMIT_PP_TXCBLEND_6:
205         case R200_EMIT_PP_TXCBLEND_7:
206         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
207         case R200_EMIT_TFACTOR_0:
208         case R200_EMIT_VTX_FMT_0:
209         case R200_EMIT_MATRIX_SELECT_0:
210         case R200_EMIT_TEX_PROC_CTL_2:
211         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
212         case R200_EMIT_PP_TXFILTER_0:
213         case R200_EMIT_PP_TXFILTER_1:
214         case R200_EMIT_PP_TXFILTER_2:
215         case R200_EMIT_PP_TXFILTER_3:
216         case R200_EMIT_PP_TXFILTER_4:
217         case R200_EMIT_PP_TXFILTER_5:
218         case R200_EMIT_VTE_CNTL:
219         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
220         case R200_EMIT_PP_TAM_DEBUG3:
221         case R200_EMIT_PP_CNTL_X:
222         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
223         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
224         case R200_EMIT_RE_SCISSOR_TL_0:
225         case R200_EMIT_RE_SCISSOR_TL_1:
226         case R200_EMIT_RE_SCISSOR_TL_2:
227         case R200_EMIT_SE_VAP_CNTL_STATUS:
228         case R200_EMIT_SE_VTX_STATE_CNTL:
229         case R200_EMIT_RE_POINTSIZE:
230         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
231         case R200_EMIT_PP_CUBIC_FACES_0:
232         case R200_EMIT_PP_CUBIC_FACES_1:
233         case R200_EMIT_PP_CUBIC_FACES_2:
234         case R200_EMIT_PP_CUBIC_FACES_3:
235         case R200_EMIT_PP_CUBIC_FACES_4:
236         case R200_EMIT_PP_CUBIC_FACES_5:
237         case RADEON_EMIT_PP_TEX_SIZE_0:
238         case RADEON_EMIT_PP_TEX_SIZE_1:
239         case RADEON_EMIT_PP_TEX_SIZE_2:
240         case R200_EMIT_RB3D_BLENDCOLOR:
241         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
242         case RADEON_EMIT_PP_CUBIC_FACES_0:
243         case RADEON_EMIT_PP_CUBIC_FACES_1:
244         case RADEON_EMIT_PP_CUBIC_FACES_2:
245         case R200_EMIT_PP_TRI_PERF_CNTL:
246         case R200_EMIT_PP_AFS_0:
247         case R200_EMIT_PP_AFS_1:
248         case R200_EMIT_ATF_TFACTOR:
249         case R200_EMIT_PP_TXCTLALL_0:
250         case R200_EMIT_PP_TXCTLALL_1:
251         case R200_EMIT_PP_TXCTLALL_2:
252         case R200_EMIT_PP_TXCTLALL_3:
253         case R200_EMIT_PP_TXCTLALL_4:
254         case R200_EMIT_PP_TXCTLALL_5:
255         case R200_EMIT_VAP_PVS_CNTL:
256                 /* These packets don't contain memory offsets */
257                 break;
258
259         default:
260                 DRM_ERROR("Unknown state packet ID %d\n", id);
261                 return -EINVAL;
262         }
263
264         return 0;
265 }
266
267 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
268                                                      dev_priv,
269                                                      struct drm_file *file_priv,
270                                                      drm_radeon_kcmd_buffer_t *
271                                                      cmdbuf,
272                                                      unsigned int *cmdsz)
273 {
274         u32 *cmd = (u32 *) cmdbuf->buf;
275         u32 offset, narrays;
276         int count, i, k;
277
278         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
279
280         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
281                 DRM_ERROR("Not a type 3 packet\n");
282                 return -EINVAL;
283         }
284
285         if (4 * *cmdsz > cmdbuf->bufsz) {
286                 DRM_ERROR("Packet size larger than size of data provided\n");
287                 return -EINVAL;
288         }
289
290         switch(cmd[0] & 0xff00) {
291         /* XXX Are there old drivers needing other packets? */
292
293         case RADEON_3D_DRAW_IMMD:
294         case RADEON_3D_DRAW_VBUF:
295         case RADEON_3D_DRAW_INDX:
296         case RADEON_WAIT_FOR_IDLE:
297         case RADEON_CP_NOP:
298         case RADEON_3D_CLEAR_ZMASK:
299 /*      case RADEON_CP_NEXT_CHAR:
300         case RADEON_CP_PLY_NEXTSCAN:
301         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
302                 /* these packets are safe */
303                 break;
304
305         case RADEON_CP_3D_DRAW_IMMD_2:
306         case RADEON_CP_3D_DRAW_VBUF_2:
307         case RADEON_CP_3D_DRAW_INDX_2:
308         case RADEON_3D_CLEAR_HIZ:
309                 /* safe but r200 only */
310                 if (dev_priv->microcode_version != UCODE_R200) {
311                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
312                         return -EINVAL;
313                 }
314                 break;
315
316         case RADEON_3D_LOAD_VBPNTR:
317                 count = (cmd[0] >> 16) & 0x3fff;
318
319                 if (count > 18) { /* 12 arrays max */
320                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
321                                   count);
322                         return -EINVAL;
323                 }
324
325                 /* carefully check packet contents */
326                 narrays = cmd[1] & ~0xc000;
327                 k = 0;
328                 i = 2;
329                 while ((k < narrays) && (i < (count + 2))) {
330                         i++;            /* skip attribute field */
331                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
332                                                           &cmd[i])) {
333                                 DRM_ERROR
334                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
335                                      k, i);
336                                 return -EINVAL;
337                         }
338                         k++;
339                         i++;
340                         if (k == narrays)
341                                 break;
342                         /* have one more to process, they come in pairs */
343                         if (radeon_check_and_fixup_offset(dev_priv,
344                                                           file_priv, &cmd[i]))
345                         {
346                                 DRM_ERROR
347                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
348                                      k, i);
349                                 return -EINVAL;
350                         }
351                         k++;
352                         i++;
353                 }
354                 /* do the counts match what we expect ? */
355                 if ((k != narrays) || (i != (count + 2))) {
356                         DRM_ERROR
357                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
358                               k, i, narrays, count + 1);
359                         return -EINVAL;
360                 }
361                 break;
362
363         case RADEON_3D_RNDR_GEN_INDX_PRIM:
364                 if (dev_priv->microcode_version != UCODE_R100) {
365                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
366                         return -EINVAL;
367                 }
368                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
369                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
370                                 return -EINVAL;
371                 }
372                 break;
373
374         case RADEON_CP_INDX_BUFFER:
375                 if (dev_priv->microcode_version != UCODE_R200) {
376                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
377                         return -EINVAL;
378                 }
379                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
380                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
381                         return -EINVAL;
382                 }
383                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
384                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
385                         return -EINVAL;
386                 }
387                 break;
388
389         case RADEON_CNTL_HOSTDATA_BLT:
390         case RADEON_CNTL_PAINT_MULTI:
391         case RADEON_CNTL_BITBLT_MULTI:
392                 /* MSB of opcode: next DWORD GUI_CNTL */
393                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
394                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
395                         offset = cmd[2] << 10;
396                         if (radeon_check_and_fixup_offset
397                             (dev_priv, file_priv, &offset)) {
398                                 DRM_ERROR("Invalid first packet offset\n");
399                                 return -EINVAL;
400                         }
401                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
402                 }
403
404                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
405                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
406                         offset = cmd[3] << 10;
407                         if (radeon_check_and_fixup_offset
408                             (dev_priv, file_priv, &offset)) {
409                                 DRM_ERROR("Invalid second packet offset\n");
410                                 return -EINVAL;
411                         }
412                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
413                 }
414                 break;
415
416         default:
417                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
418                 return -EINVAL;
419         }
420
421         return 0;
422 }
423
424 /* ================================================================
425  * CP hardware state programming functions
426  */
427
428 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
429                                              struct drm_clip_rect * box)
430 {
431         RING_LOCALS;
432
433         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
434                   box->x1, box->y1, box->x2, box->y2);
435
436         BEGIN_RING(4);
437         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
438         OUT_RING((box->y1 << 16) | box->x1);
439         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
440         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
441         ADVANCE_RING();
442 }
443
444 /* Emit 1.1 state
445  */
446 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
447                              struct drm_file *file_priv,
448                              drm_radeon_context_regs_t * ctx,
449                              drm_radeon_texture_regs_t * tex,
450                              unsigned int dirty)
451 {
452         RING_LOCALS;
453         DRM_DEBUG("dirty=0x%08x\n", dirty);
454
455         if (dirty & RADEON_UPLOAD_CONTEXT) {
456                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
457                                                   &ctx->rb3d_depthoffset)) {
458                         DRM_ERROR("Invalid depth buffer offset\n");
459                         return -EINVAL;
460                 }
461
462                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
463                                                   &ctx->rb3d_coloroffset)) {
464                         DRM_ERROR("Invalid depth buffer offset\n");
465                         return -EINVAL;
466                 }
467
468                 BEGIN_RING(14);
469                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
470                 OUT_RING(ctx->pp_misc);
471                 OUT_RING(ctx->pp_fog_color);
472                 OUT_RING(ctx->re_solid_color);
473                 OUT_RING(ctx->rb3d_blendcntl);
474                 OUT_RING(ctx->rb3d_depthoffset);
475                 OUT_RING(ctx->rb3d_depthpitch);
476                 OUT_RING(ctx->rb3d_zstencilcntl);
477                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
478                 OUT_RING(ctx->pp_cntl);
479                 OUT_RING(ctx->rb3d_cntl);
480                 OUT_RING(ctx->rb3d_coloroffset);
481                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
482                 OUT_RING(ctx->rb3d_colorpitch);
483                 ADVANCE_RING();
484         }
485
486         if (dirty & RADEON_UPLOAD_VERTFMT) {
487                 BEGIN_RING(2);
488                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
489                 OUT_RING(ctx->se_coord_fmt);
490                 ADVANCE_RING();
491         }
492
493         if (dirty & RADEON_UPLOAD_LINE) {
494                 BEGIN_RING(5);
495                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
496                 OUT_RING(ctx->re_line_pattern);
497                 OUT_RING(ctx->re_line_state);
498                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
499                 OUT_RING(ctx->se_line_width);
500                 ADVANCE_RING();
501         }
502
503         if (dirty & RADEON_UPLOAD_BUMPMAP) {
504                 BEGIN_RING(5);
505                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
506                 OUT_RING(ctx->pp_lum_matrix);
507                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
508                 OUT_RING(ctx->pp_rot_matrix_0);
509                 OUT_RING(ctx->pp_rot_matrix_1);
510                 ADVANCE_RING();
511         }
512
513         if (dirty & RADEON_UPLOAD_MASKS) {
514                 BEGIN_RING(4);
515                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
516                 OUT_RING(ctx->rb3d_stencilrefmask);
517                 OUT_RING(ctx->rb3d_ropcntl);
518                 OUT_RING(ctx->rb3d_planemask);
519                 ADVANCE_RING();
520         }
521
522         if (dirty & RADEON_UPLOAD_VIEWPORT) {
523                 BEGIN_RING(7);
524                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
525                 OUT_RING(ctx->se_vport_xscale);
526                 OUT_RING(ctx->se_vport_xoffset);
527                 OUT_RING(ctx->se_vport_yscale);
528                 OUT_RING(ctx->se_vport_yoffset);
529                 OUT_RING(ctx->se_vport_zscale);
530                 OUT_RING(ctx->se_vport_zoffset);
531                 ADVANCE_RING();
532         }
533
534         if (dirty & RADEON_UPLOAD_SETUP) {
535                 BEGIN_RING(4);
536                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
537                 OUT_RING(ctx->se_cntl);
538                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
539                 OUT_RING(ctx->se_cntl_status);
540                 ADVANCE_RING();
541         }
542
543         if (dirty & RADEON_UPLOAD_MISC) {
544                 BEGIN_RING(2);
545                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
546                 OUT_RING(ctx->re_misc);
547                 ADVANCE_RING();
548         }
549
550         if (dirty & RADEON_UPLOAD_TEX0) {
551                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
552                                                   &tex[0].pp_txoffset)) {
553                         DRM_ERROR("Invalid texture offset for unit 0\n");
554                         return -EINVAL;
555                 }
556
557                 BEGIN_RING(9);
558                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
559                 OUT_RING(tex[0].pp_txfilter);
560                 OUT_RING(tex[0].pp_txformat);
561                 OUT_RING(tex[0].pp_txoffset);
562                 OUT_RING(tex[0].pp_txcblend);
563                 OUT_RING(tex[0].pp_txablend);
564                 OUT_RING(tex[0].pp_tfactor);
565                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
566                 OUT_RING(tex[0].pp_border_color);
567                 ADVANCE_RING();
568         }
569
570         if (dirty & RADEON_UPLOAD_TEX1) {
571                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
572                                                   &tex[1].pp_txoffset)) {
573                         DRM_ERROR("Invalid texture offset for unit 1\n");
574                         return -EINVAL;
575                 }
576
577                 BEGIN_RING(9);
578                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
579                 OUT_RING(tex[1].pp_txfilter);
580                 OUT_RING(tex[1].pp_txformat);
581                 OUT_RING(tex[1].pp_txoffset);
582                 OUT_RING(tex[1].pp_txcblend);
583                 OUT_RING(tex[1].pp_txablend);
584                 OUT_RING(tex[1].pp_tfactor);
585                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
586                 OUT_RING(tex[1].pp_border_color);
587                 ADVANCE_RING();
588         }
589
590         if (dirty & RADEON_UPLOAD_TEX2) {
591                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
592                                                   &tex[2].pp_txoffset)) {
593                         DRM_ERROR("Invalid texture offset for unit 2\n");
594                         return -EINVAL;
595                 }
596
597                 BEGIN_RING(9);
598                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
599                 OUT_RING(tex[2].pp_txfilter);
600                 OUT_RING(tex[2].pp_txformat);
601                 OUT_RING(tex[2].pp_txoffset);
602                 OUT_RING(tex[2].pp_txcblend);
603                 OUT_RING(tex[2].pp_txablend);
604                 OUT_RING(tex[2].pp_tfactor);
605                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
606                 OUT_RING(tex[2].pp_border_color);
607                 ADVANCE_RING();
608         }
609
610         return 0;
611 }
612
613 /* Emit 1.2 state
614  */
615 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
616                               struct drm_file *file_priv,
617                               drm_radeon_state_t * state)
618 {
619         RING_LOCALS;
620
621         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
622                 BEGIN_RING(3);
623                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
624                 OUT_RING(state->context2.se_zbias_factor);
625                 OUT_RING(state->context2.se_zbias_constant);
626                 ADVANCE_RING();
627         }
628
629         return radeon_emit_state(dev_priv, file_priv, &state->context,
630                                  state->tex, state->dirty);
631 }
632
633 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
634  * 1.3 cmdbuffers allow all previous state to be updated as well as
635  * the tcl scalar and vector areas.
636  */
637 static struct {
638         int start;
639         int len;
640         const char *name;
641 } packet[RADEON_MAX_STATE_PACKETS] = {
642         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
643         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
644         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
645         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
646         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
647         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
648         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
649         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
650         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
651         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
652         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
653         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
654         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
655         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
656         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
657         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
658         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
659         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
660         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
661         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
662         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
663                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
664         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
665         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
666         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
667         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
668         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
669         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
670         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
671         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
672         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
673         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
674         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
675         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
676         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
677         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
678         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
679         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
680         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
681         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
682         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
683         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
684         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
685         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
686         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
687         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
688         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
689         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
690         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
691         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
692         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
693          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
694         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
695         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
696         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
697         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
698         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
699         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
700         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
701         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
702         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
703         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
704         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
705                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
706         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
707         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
708         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
709         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
710         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
711         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
712         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
713         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
714         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
715         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
716         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
717         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
718         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
719         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
720         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
721         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
722         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
723         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
724         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
725         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
726         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
727         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
728         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
729         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
730         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
731         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
732         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
733         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
734         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
735         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
736         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
737         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
738         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
739         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
740 };
741
742 /* ================================================================
743  * Performance monitoring functions
744  */
745
746 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
747                              int x, int y, int w, int h, int r, int g, int b)
748 {
749         u32 color;
750         RING_LOCALS;
751
752         x += dev_priv->sarea_priv->boxes[0].x1;
753         y += dev_priv->sarea_priv->boxes[0].y1;
754
755         switch (dev_priv->color_fmt) {
756         case RADEON_COLOR_FORMAT_RGB565:
757                 color = (((r & 0xf8) << 8) |
758                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
759                 break;
760         case RADEON_COLOR_FORMAT_ARGB8888:
761         default:
762                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
763                 break;
764         }
765
766         BEGIN_RING(4);
767         RADEON_WAIT_UNTIL_3D_IDLE();
768         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
769         OUT_RING(0xffffffff);
770         ADVANCE_RING();
771
772         BEGIN_RING(6);
773
774         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
775         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776                  RADEON_GMC_BRUSH_SOLID_COLOR |
777                  (dev_priv->color_fmt << 8) |
778                  RADEON_GMC_SRC_DATATYPE_COLOR |
779                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
780
781         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
782                 OUT_RING(dev_priv->front_pitch_offset);
783         } else {
784                 OUT_RING(dev_priv->back_pitch_offset);
785         }
786
787         OUT_RING(color);
788
789         OUT_RING((x << 16) | y);
790         OUT_RING((w << 16) | h);
791
792         ADVANCE_RING();
793 }
794
795 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv)
796 {
797         /* Collapse various things into a wait flag -- trying to
798          * guess if userspase slept -- better just to have them tell us.
799          */
800         if (dev_priv->stats.last_frame_reads > 1 ||
801             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
802                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
803         }
804
805         if (dev_priv->stats.freelist_loops) {
806                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
807         }
808
809         /* Purple box for page flipping
810          */
811         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
812                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
813
814         /* Red box if we have to wait for idle at any point
815          */
816         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
817                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
818
819         /* Blue box: lost context?
820          */
821
822         /* Yellow box for texture swaps
823          */
824         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
825                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
826
827         /* Green box if hardware never idles (as far as we can tell)
828          */
829         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
830                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
831
832         /* Draw bars indicating number of buffers allocated
833          * (not a great measure, easily confused)
834          */
835         if (dev_priv->stats.requested_bufs) {
836                 if (dev_priv->stats.requested_bufs > 100)
837                         dev_priv->stats.requested_bufs = 100;
838
839                 radeon_clear_box(dev_priv, 4, 16,
840                                  dev_priv->stats.requested_bufs, 4,
841                                  196, 128, 128);
842         }
843
844         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
845
846 }
847
848 /* ================================================================
849  * CP command dispatch functions
850  */
851
852 static void radeon_cp_dispatch_clear(struct drm_device * dev,
853                                      drm_radeon_clear_t * clear,
854                                      drm_radeon_clear_rect_t * depth_boxes)
855 {
856         drm_radeon_private_t *dev_priv = dev->dev_private;
857         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
858         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
859         int nbox = sarea_priv->nbox;
860         struct drm_clip_rect *pbox = sarea_priv->boxes;
861         unsigned int flags = clear->flags;
862         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
863         int i;
864         RING_LOCALS;
865         DRM_DEBUG("flags = 0x%x\n", flags);
866
867         dev_priv->stats.clears++;
868
869         if (sarea_priv->pfCurrentPage == 1) {
870                 unsigned int tmp = flags;
871
872                 flags &= ~(RADEON_FRONT | RADEON_BACK);
873                 if (tmp & RADEON_FRONT)
874                         flags |= RADEON_BACK;
875                 if (tmp & RADEON_BACK)
876                         flags |= RADEON_FRONT;
877         }
878
879         if (flags & (RADEON_FRONT | RADEON_BACK)) {
880
881                 BEGIN_RING(4);
882
883                 /* Ensure the 3D stream is idle before doing a
884                  * 2D fill to clear the front or back buffer.
885                  */
886                 RADEON_WAIT_UNTIL_3D_IDLE();
887
888                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
889                 OUT_RING(clear->color_mask);
890
891                 ADVANCE_RING();
892
893                 /* Make sure we restore the 3D state next time.
894                  */
895                 sarea_priv->ctx_owner = 0;
896
897                 for (i = 0; i < nbox; i++) {
898                         int x = pbox[i].x1;
899                         int y = pbox[i].y1;
900                         int w = pbox[i].x2 - x;
901                         int h = pbox[i].y2 - y;
902
903                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
904                                   x, y, w, h, flags);
905
906                         if (flags & RADEON_FRONT) {
907                                 BEGIN_RING(6);
908
909                                 OUT_RING(CP_PACKET3
910                                          (RADEON_CNTL_PAINT_MULTI, 4));
911                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
912                                          RADEON_GMC_BRUSH_SOLID_COLOR |
913                                          (dev_priv->
914                                           color_fmt << 8) |
915                                          RADEON_GMC_SRC_DATATYPE_COLOR |
916                                          RADEON_ROP3_P |
917                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
918
919                                 OUT_RING(dev_priv->front_pitch_offset);
920                                 OUT_RING(clear->clear_color);
921
922                                 OUT_RING((x << 16) | y);
923                                 OUT_RING((w << 16) | h);
924
925                                 ADVANCE_RING();
926                         }
927
928                         if (flags & RADEON_BACK) {
929                                 BEGIN_RING(6);
930
931                                 OUT_RING(CP_PACKET3
932                                          (RADEON_CNTL_PAINT_MULTI, 4));
933                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
934                                          RADEON_GMC_BRUSH_SOLID_COLOR |
935                                          (dev_priv->
936                                           color_fmt << 8) |
937                                          RADEON_GMC_SRC_DATATYPE_COLOR |
938                                          RADEON_ROP3_P |
939                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
940
941                                 OUT_RING(dev_priv->back_pitch_offset);
942                                 OUT_RING(clear->clear_color);
943
944                                 OUT_RING((x << 16) | y);
945                                 OUT_RING((w << 16) | h);
946
947                                 ADVANCE_RING();
948                         }
949                 }
950         }
951
952         /* hyper z clear */
953         /* no docs available, based on reverse engeneering by Stephane Marchesin */
954         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
955             && (flags & RADEON_CLEAR_FASTZ)) {
956
957                 int i;
958                 int depthpixperline =
959                     dev_priv->depth_fmt ==
960                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
961                                                        2) : (dev_priv->
962                                                              depth_pitch / 4);
963
964                 u32 clearmask;
965
966                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
967                     ((clear->depth_mask & 0xff) << 24);
968
969                 /* Make sure we restore the 3D state next time.
970                  * we haven't touched any "normal" state - still need this?
971                  */
972                 sarea_priv->ctx_owner = 0;
973
974                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
975                     && (flags & RADEON_USE_HIERZ)) {
976                         /* FIXME : reverse engineer that for Rx00 cards */
977                         /* FIXME : the mask supposedly contains low-res z values. So can't set
978                            just to the max (0xff? or actually 0x3fff?), need to take z clear
979                            value into account? */
980                         /* pattern seems to work for r100, though get slight
981                            rendering errors with glxgears. If hierz is not enabled for r100,
982                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
983                            other ones are ignored, and the same clear mask can be used. That's
984                            very different behaviour than R200 which needs different clear mask
985                            and different number of tiles to clear if hierz is enabled or not !?!
986                          */
987                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
988                 } else {
989                         /* clear mask : chooses the clearing pattern.
990                            rv250: could be used to clear only parts of macrotiles
991                            (but that would get really complicated...)?
992                            bit 0 and 1 (either or both of them ?!?!) are used to
993                            not clear tile (or maybe one of the bits indicates if the tile is
994                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
995                            Pattern is as follows:
996                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
997                            bits -------------------------------------------------
998                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
999                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1000                            covers 256 pixels ?!?
1001                          */
1002                         clearmask = 0x0;
1003                 }
1004
1005                 BEGIN_RING(8);
1006                 RADEON_WAIT_UNTIL_2D_IDLE();
1007                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1008                              tempRB3D_DEPTHCLEARVALUE);
1009                 /* what offset is this exactly ? */
1010                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1011                 /* need ctlstat, otherwise get some strange black flickering */
1012                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1013                              RADEON_RB3D_ZC_FLUSH_ALL);
1014                 ADVANCE_RING();
1015
1016                 for (i = 0; i < nbox; i++) {
1017                         int tileoffset, nrtilesx, nrtilesy, j;
1018                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1019                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1020                             && !(dev_priv->microcode_version == UCODE_R200)) {
1021                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1022                                    maybe r200 actually doesn't need to put the low-res z value into
1023                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1024                                    Works for R100, both with hierz and without.
1025                                    R100 seems to operate on 2x1 8x8 tiles, but...
1026                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1027                                    problematic with resolutions which are not 64 pix aligned? */
1028                                 tileoffset =
1029                                     ((pbox[i].y1 >> 3) * depthpixperline +
1030                                      pbox[i].x1) >> 6;
1031                                 nrtilesx =
1032                                     ((pbox[i].x2 & ~63) -
1033                                      (pbox[i].x1 & ~63)) >> 4;
1034                                 nrtilesy =
1035                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1036                                 for (j = 0; j <= nrtilesy; j++) {
1037                                         BEGIN_RING(4);
1038                                         OUT_RING(CP_PACKET3
1039                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1040                                         /* first tile */
1041                                         OUT_RING(tileoffset * 8);
1042                                         /* the number of tiles to clear */
1043                                         OUT_RING(nrtilesx + 4);
1044                                         /* clear mask : chooses the clearing pattern. */
1045                                         OUT_RING(clearmask);
1046                                         ADVANCE_RING();
1047                                         tileoffset += depthpixperline >> 6;
1048                                 }
1049                         } else if (dev_priv->microcode_version == UCODE_R200) {
1050                                 /* works for rv250. */
1051                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1052                                 tileoffset =
1053                                     ((pbox[i].y1 >> 3) * depthpixperline +
1054                                      pbox[i].x1) >> 5;
1055                                 nrtilesx =
1056                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1057                                 nrtilesy =
1058                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1059                                 for (j = 0; j <= nrtilesy; j++) {
1060                                         BEGIN_RING(4);
1061                                         OUT_RING(CP_PACKET3
1062                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1063                                         /* first tile */
1064                                         /* judging by the first tile offset needed, could possibly
1065                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1066                                            macro tiles, though would still need clear mask for
1067                                            right/bottom if truely 4x4 granularity is desired ? */
1068                                         OUT_RING(tileoffset * 16);
1069                                         /* the number of tiles to clear */
1070                                         OUT_RING(nrtilesx + 1);
1071                                         /* clear mask : chooses the clearing pattern. */
1072                                         OUT_RING(clearmask);
1073                                         ADVANCE_RING();
1074                                         tileoffset += depthpixperline >> 5;
1075                                 }
1076                         } else {        /* rv 100 */
1077                                 /* rv100 might not need 64 pix alignment, who knows */
1078                                 /* offsets are, hmm, weird */
1079                                 tileoffset =
1080                                     ((pbox[i].y1 >> 4) * depthpixperline +
1081                                      pbox[i].x1) >> 6;
1082                                 nrtilesx =
1083                                     ((pbox[i].x2 & ~63) -
1084                                      (pbox[i].x1 & ~63)) >> 4;
1085                                 nrtilesy =
1086                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1087                                 for (j = 0; j <= nrtilesy; j++) {
1088                                         BEGIN_RING(4);
1089                                         OUT_RING(CP_PACKET3
1090                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1091                                         OUT_RING(tileoffset * 128);
1092                                         /* the number of tiles to clear */
1093                                         OUT_RING(nrtilesx + 4);
1094                                         /* clear mask : chooses the clearing pattern. */
1095                                         OUT_RING(clearmask);
1096                                         ADVANCE_RING();
1097                                         tileoffset += depthpixperline >> 6;
1098                                 }
1099                         }
1100                 }
1101
1102                 /* TODO don't always clear all hi-level z tiles */
1103                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1104                     && (dev_priv->microcode_version == UCODE_R200)
1105                     && (flags & RADEON_USE_HIERZ))
1106                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1107                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1108                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1109                            value into account? */
1110                 {
1111                         BEGIN_RING(4);
1112                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1113                         OUT_RING(0x0);  /* First tile */
1114                         OUT_RING(0x3cc0);
1115                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1116                         ADVANCE_RING();
1117                 }
1118         }
1119
1120         /* We have to clear the depth and/or stencil buffers by
1121          * rendering a quad into just those buffers.  Thus, we have to
1122          * make sure the 3D engine is configured correctly.
1123          */
1124         else if ((dev_priv->microcode_version == UCODE_R200) &&
1125                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1126
1127                 int tempPP_CNTL;
1128                 int tempRE_CNTL;
1129                 int tempRB3D_CNTL;
1130                 int tempRB3D_ZSTENCILCNTL;
1131                 int tempRB3D_STENCILREFMASK;
1132                 int tempRB3D_PLANEMASK;
1133                 int tempSE_CNTL;
1134                 int tempSE_VTE_CNTL;
1135                 int tempSE_VTX_FMT_0;
1136                 int tempSE_VTX_FMT_1;
1137                 int tempSE_VAP_CNTL;
1138                 int tempRE_AUX_SCISSOR_CNTL;
1139
1140                 tempPP_CNTL = 0;
1141                 tempRE_CNTL = 0;
1142
1143                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1144
1145                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1146                 tempRB3D_STENCILREFMASK = 0x0;
1147
1148                 tempSE_CNTL = depth_clear->se_cntl;
1149
1150                 /* Disable TCL */
1151
1152                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1153                                           (0x9 <<
1154                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1155
1156                 tempRB3D_PLANEMASK = 0x0;
1157
1158                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1159
1160                 tempSE_VTE_CNTL =
1161                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1162
1163                 /* Vertex format (X, Y, Z, W) */
1164                 tempSE_VTX_FMT_0 =
1165                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1166                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1167                 tempSE_VTX_FMT_1 = 0x0;
1168
1169                 /*
1170                  * Depth buffer specific enables
1171                  */
1172                 if (flags & RADEON_DEPTH) {
1173                         /* Enable depth buffer */
1174                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1175                 } else {
1176                         /* Disable depth buffer */
1177                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1178                 }
1179
1180                 /*
1181                  * Stencil buffer specific enables
1182                  */
1183                 if (flags & RADEON_STENCIL) {
1184                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1185                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1186                 } else {
1187                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1188                         tempRB3D_STENCILREFMASK = 0x00000000;
1189                 }
1190
1191                 if (flags & RADEON_USE_COMP_ZBUF) {
1192                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1193                             RADEON_Z_DECOMPRESSION_ENABLE;
1194                 }
1195                 if (flags & RADEON_USE_HIERZ) {
1196                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1197                 }
1198
1199                 BEGIN_RING(26);
1200                 RADEON_WAIT_UNTIL_2D_IDLE();
1201
1202                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1203                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1204                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1205                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1206                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1207                              tempRB3D_STENCILREFMASK);
1208                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1209                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1210                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1211                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1212                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1213                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1214                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1215                 ADVANCE_RING();
1216
1217                 /* Make sure we restore the 3D state next time.
1218                  */
1219                 sarea_priv->ctx_owner = 0;
1220
1221                 for (i = 0; i < nbox; i++) {
1222
1223                         /* Funny that this should be required --
1224                          *  sets top-left?
1225                          */
1226                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1227
1228                         BEGIN_RING(14);
1229                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1230                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1231                                   RADEON_PRIM_WALK_RING |
1232                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1233                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1234                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1235                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1236                         OUT_RING(0x3f800000);
1237                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1239                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1240                         OUT_RING(0x3f800000);
1241                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1242                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1243                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1244                         OUT_RING(0x3f800000);
1245                         ADVANCE_RING();
1246                 }
1247         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1248
1249                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1250
1251                 rb3d_cntl = depth_clear->rb3d_cntl;
1252
1253                 if (flags & RADEON_DEPTH) {
1254                         rb3d_cntl |= RADEON_Z_ENABLE;
1255                 } else {
1256                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1257                 }
1258
1259                 if (flags & RADEON_STENCIL) {
1260                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1261                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1262                 } else {
1263                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1264                         rb3d_stencilrefmask = 0x00000000;
1265                 }
1266
1267                 if (flags & RADEON_USE_COMP_ZBUF) {
1268                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1269                             RADEON_Z_DECOMPRESSION_ENABLE;
1270                 }
1271                 if (flags & RADEON_USE_HIERZ) {
1272                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1273                 }
1274
1275                 BEGIN_RING(13);
1276                 RADEON_WAIT_UNTIL_2D_IDLE();
1277
1278                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1279                 OUT_RING(0x00000000);
1280                 OUT_RING(rb3d_cntl);
1281
1282                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1283                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1284                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1285                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1286                 ADVANCE_RING();
1287
1288                 /* Make sure we restore the 3D state next time.
1289                  */
1290                 sarea_priv->ctx_owner = 0;
1291
1292                 for (i = 0; i < nbox; i++) {
1293
1294                         /* Funny that this should be required --
1295                          *  sets top-left?
1296                          */
1297                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1298
1299                         BEGIN_RING(15);
1300
1301                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1302                         OUT_RING(RADEON_VTX_Z_PRESENT |
1303                                  RADEON_VTX_PKCOLOR_PRESENT);
1304                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1305                                   RADEON_PRIM_WALK_RING |
1306                                   RADEON_MAOS_ENABLE |
1307                                   RADEON_VTX_FMT_RADEON_MODE |
1308                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1309
1310                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1311                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1312                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1313                         OUT_RING(0x0);
1314
1315                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1316                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1318                         OUT_RING(0x0);
1319
1320                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1321                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1322                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1323                         OUT_RING(0x0);
1324
1325                         ADVANCE_RING();
1326                 }
1327         }
1328
1329         /* Increment the clear counter.  The client-side 3D driver must
1330          * wait on this value before performing the clear ioctl.  We
1331          * need this because the card's so damned fast...
1332          */
1333         sarea_priv->last_clear++;
1334
1335         BEGIN_RING(4);
1336
1337         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1338         RADEON_WAIT_UNTIL_IDLE();
1339
1340         ADVANCE_RING();
1341 }
1342
1343 static void radeon_cp_dispatch_swap(struct drm_device *dev)
1344 {
1345         drm_radeon_private_t *dev_priv = dev->dev_private;
1346         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1347         int nbox = sarea_priv->nbox;
1348         struct drm_clip_rect *pbox = sarea_priv->boxes;
1349         int i;
1350         RING_LOCALS;
1351         DRM_DEBUG("\n");
1352
1353         /* Do some trivial performance monitoring...
1354          */
1355         if (dev_priv->do_boxes)
1356                 radeon_cp_performance_boxes(dev_priv);
1357
1358         /* Wait for the 3D stream to idle before dispatching the bitblt.
1359          * This will prevent data corruption between the two streams.
1360          */
1361         BEGIN_RING(2);
1362
1363         RADEON_WAIT_UNTIL_3D_IDLE();
1364
1365         ADVANCE_RING();
1366
1367         for (i = 0; i < nbox; i++) {
1368                 int x = pbox[i].x1;
1369                 int y = pbox[i].y1;
1370                 int w = pbox[i].x2 - x;
1371                 int h = pbox[i].y2 - y;
1372
1373                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1374
1375                 BEGIN_RING(9);
1376
1377                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1378                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1379                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1380                          RADEON_GMC_BRUSH_NONE |
1381                          (dev_priv->color_fmt << 8) |
1382                          RADEON_GMC_SRC_DATATYPE_COLOR |
1383                          RADEON_ROP3_S |
1384                          RADEON_DP_SRC_SOURCE_MEMORY |
1385                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1386
1387                 /* Make this work even if front & back are flipped:
1388                  */
1389                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1390                 if (sarea_priv->pfCurrentPage == 0) {
1391                         OUT_RING(dev_priv->back_pitch_offset);
1392                         OUT_RING(dev_priv->front_pitch_offset);
1393                 } else {
1394                         OUT_RING(dev_priv->front_pitch_offset);
1395                         OUT_RING(dev_priv->back_pitch_offset);
1396                 }
1397
1398                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1399                 OUT_RING((x << 16) | y);
1400                 OUT_RING((x << 16) | y);
1401                 OUT_RING((w << 16) | h);
1402
1403                 ADVANCE_RING();
1404         }
1405
1406         /* Increment the frame counter.  The client-side 3D driver must
1407          * throttle the framerate by waiting for this value before
1408          * performing the swapbuffer ioctl.
1409          */
1410         sarea_priv->last_frame++;
1411
1412         BEGIN_RING(4);
1413
1414         RADEON_FRAME_AGE(sarea_priv->last_frame);
1415         RADEON_WAIT_UNTIL_2D_IDLE();
1416
1417         ADVANCE_RING();
1418 }
1419
1420 static void radeon_cp_dispatch_flip(struct drm_device *dev)
1421 {
1422         drm_radeon_private_t *dev_priv = dev->dev_private;
1423         struct drm_sarea *sarea = (struct drm_sarea *)dev_priv->sarea->virtual;
1424         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1425             ? dev_priv->front_offset : dev_priv->back_offset;
1426         RING_LOCALS;
1427         DRM_DEBUG("pfCurrentPage=%d\n",
1428                   dev_priv->sarea_priv->pfCurrentPage);
1429
1430         /* Do some trivial performance monitoring...
1431          */
1432         if (dev_priv->do_boxes) {
1433                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1434                 radeon_cp_performance_boxes(dev_priv);
1435         }
1436
1437         /* Update the frame offsets for both CRTCs
1438          */
1439         BEGIN_RING(6);
1440
1441         RADEON_WAIT_UNTIL_3D_IDLE();
1442         OUT_RING_REG(RADEON_CRTC_OFFSET,
1443                      ((sarea->frame.y * dev_priv->front_pitch +
1444                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1445                      + offset);
1446         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1447                      + offset);
1448
1449         ADVANCE_RING();
1450
1451         /* Increment the frame counter.  The client-side 3D driver must
1452          * throttle the framerate by waiting for this value before
1453          * performing the swapbuffer ioctl.
1454          */
1455         dev_priv->sarea_priv->last_frame++;
1456         dev_priv->sarea_priv->pfCurrentPage =
1457                 1 - dev_priv->sarea_priv->pfCurrentPage;
1458
1459         BEGIN_RING(2);
1460
1461         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1462
1463         ADVANCE_RING();
1464 }
1465
1466 static int bad_prim_vertex_nr(int primitive, int nr)
1467 {
1468         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1469         case RADEON_PRIM_TYPE_NONE:
1470         case RADEON_PRIM_TYPE_POINT:
1471                 return nr < 1;
1472         case RADEON_PRIM_TYPE_LINE:
1473                 return (nr & 1) || nr == 0;
1474         case RADEON_PRIM_TYPE_LINE_STRIP:
1475                 return nr < 2;
1476         case RADEON_PRIM_TYPE_TRI_LIST:
1477         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1478         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1479         case RADEON_PRIM_TYPE_RECT_LIST:
1480                 return nr % 3 || nr == 0;
1481         case RADEON_PRIM_TYPE_TRI_FAN:
1482         case RADEON_PRIM_TYPE_TRI_STRIP:
1483                 return nr < 3;
1484         default:
1485                 return 1;
1486         }
1487 }
1488
1489 typedef struct {
1490         unsigned int start;
1491         unsigned int finish;
1492         unsigned int prim;
1493         unsigned int numverts;
1494         unsigned int offset;
1495         unsigned int vc_format;
1496 } drm_radeon_tcl_prim_t;
1497
1498 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1499                                       struct drm_buf * buf,
1500                                       drm_radeon_tcl_prim_t * prim)
1501 {
1502         drm_radeon_private_t *dev_priv = dev->dev_private;
1503         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1504         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1505         int numverts = (int)prim->numverts;
1506         int nbox = sarea_priv->nbox;
1507         int i = 0;
1508         RING_LOCALS;
1509
1510         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1511                   prim->prim,
1512                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1513
1514         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1515                 DRM_ERROR("bad prim %x numverts %d\n",
1516                           prim->prim, prim->numverts);
1517                 return;
1518         }
1519
1520         do {
1521                 /* Emit the next cliprect */
1522                 if (i < nbox) {
1523                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1524                 }
1525
1526                 /* Emit the vertex buffer rendering commands */
1527                 BEGIN_RING(5);
1528
1529                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1530                 OUT_RING(offset);
1531                 OUT_RING(numverts);
1532                 OUT_RING(prim->vc_format);
1533                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1534                          RADEON_COLOR_ORDER_RGBA |
1535                          RADEON_VTX_FMT_RADEON_MODE |
1536                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1537
1538                 ADVANCE_RING();
1539
1540                 i++;
1541         } while (i < nbox);
1542 }
1543
1544 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_buf *buf)
1545 {
1546         drm_radeon_private_t *dev_priv = dev->dev_private;
1547         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1548         RING_LOCALS;
1549
1550         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1551
1552         /* Emit the vertex buffer age */
1553         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1554                 BEGIN_RING(3);
1555                 R600_DISPATCH_AGE(buf_priv->age);
1556                 ADVANCE_RING();
1557         } else {
1558                 BEGIN_RING(2);
1559                 RADEON_DISPATCH_AGE(buf_priv->age);
1560                 ADVANCE_RING();
1561         }
1562
1563         buf->pending = 1;
1564         buf->used = 0;
1565 }
1566
1567 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1568                                         struct drm_buf * buf, int start, int end)
1569 {
1570         drm_radeon_private_t *dev_priv = dev->dev_private;
1571         RING_LOCALS;
1572         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1573
1574         if (start != end) {
1575                 int offset = (dev_priv->gart_buffers_offset
1576                               + buf->offset + start);
1577                 int dwords = (end - start + 3) / sizeof(u32);
1578
1579                 /* Indirect buffer data must be an even number of
1580                  * dwords, so if we've been given an odd number we must
1581                  * pad the data with a Type-2 CP packet.
1582                  */
1583                 if (dwords & 1) {
1584                         u32 *data = (u32 *)
1585                             ((char *)dev->agp_buffer_map->virtual
1586                              + buf->offset + start);
1587                         data[dwords++] = RADEON_CP_PACKET2;
1588                 }
1589
1590                 /* Fire off the indirect buffer */
1591                 BEGIN_RING(3);
1592
1593                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1594                 OUT_RING(offset);
1595                 OUT_RING(dwords);
1596
1597                 ADVANCE_RING();
1598         }
1599 }
1600
1601 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1602                                        struct drm_buf * elt_buf,
1603                                        drm_radeon_tcl_prim_t * prim)
1604 {
1605         drm_radeon_private_t *dev_priv = dev->dev_private;
1606         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1607         int offset = dev_priv->gart_buffers_offset + prim->offset;
1608         u32 *data;
1609         int dwords;
1610         int i = 0;
1611         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1612         int count = (prim->finish - start) / sizeof(u16);
1613         int nbox = sarea_priv->nbox;
1614
1615         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1616                   prim->prim,
1617                   prim->vc_format,
1618                   prim->start, prim->finish, prim->offset, prim->numverts);
1619
1620         if (bad_prim_vertex_nr(prim->prim, count)) {
1621                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1622                 return;
1623         }
1624
1625         if (start >= prim->finish || (prim->start & 0x7)) {
1626                 DRM_ERROR("buffer prim %d\n", prim->prim);
1627                 return;
1628         }
1629
1630         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1631
1632         data = (u32 *) ((char *)dev->agp_buffer_map->virtual +
1633                         elt_buf->offset + prim->start);
1634
1635         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1636         data[1] = offset;
1637         data[2] = prim->numverts;
1638         data[3] = prim->vc_format;
1639         data[4] = (prim->prim |
1640                    RADEON_PRIM_WALK_IND |
1641                    RADEON_COLOR_ORDER_RGBA |
1642                    RADEON_VTX_FMT_RADEON_MODE |
1643                    (count << RADEON_NUM_VERTICES_SHIFT));
1644
1645         do {
1646                 if (i < nbox)
1647                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1648
1649                 radeon_cp_dispatch_indirect(dev, elt_buf,
1650                                             prim->start, prim->finish);
1651
1652                 i++;
1653         } while (i < nbox);
1654
1655 }
1656
1657 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1658
1659 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1660                                       struct drm_file *file_priv,
1661                                       drm_radeon_texture_t * tex,
1662                                       drm_radeon_tex_image_t * image)
1663 {
1664         drm_radeon_private_t *dev_priv = dev->dev_private;
1665         struct drm_buf *buf;
1666         u32 format;
1667         u32 *buffer;
1668         const u8 __user *data;
1669         int size, dwords, tex_width, blit_width, spitch;
1670         u32 height;
1671         int i;
1672         u32 texpitch, microtile;
1673         u32 offset, byte_offset;
1674         RING_LOCALS;
1675
1676         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1677                 DRM_ERROR("Invalid destination offset\n");
1678                 return -EINVAL;
1679         }
1680
1681         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1682
1683         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1684          * up with the texture data from the host data blit, otherwise
1685          * part of the texture image may be corrupted.
1686          */
1687         BEGIN_RING(4);
1688         RADEON_FLUSH_CACHE();
1689         RADEON_WAIT_UNTIL_IDLE();
1690         ADVANCE_RING();
1691
1692         /* The compiler won't optimize away a division by a variable,
1693          * even if the only legal values are powers of two.  Thus, we'll
1694          * use a shift instead.
1695          */
1696         switch (tex->format) {
1697         case RADEON_TXFORMAT_ARGB8888:
1698         case RADEON_TXFORMAT_RGBA8888:
1699                 format = RADEON_COLOR_FORMAT_ARGB8888;
1700                 tex_width = tex->width * 4;
1701                 blit_width = image->width * 4;
1702                 break;
1703         case RADEON_TXFORMAT_AI88:
1704         case RADEON_TXFORMAT_ARGB1555:
1705         case RADEON_TXFORMAT_RGB565:
1706         case RADEON_TXFORMAT_ARGB4444:
1707         case RADEON_TXFORMAT_VYUY422:
1708         case RADEON_TXFORMAT_YVYU422:
1709                 format = RADEON_COLOR_FORMAT_RGB565;
1710                 tex_width = tex->width * 2;
1711                 blit_width = image->width * 2;
1712                 break;
1713         case RADEON_TXFORMAT_I8:
1714         case RADEON_TXFORMAT_RGB332:
1715                 format = RADEON_COLOR_FORMAT_CI8;
1716                 tex_width = tex->width * 1;
1717                 blit_width = image->width * 1;
1718                 break;
1719         default:
1720                 DRM_ERROR("invalid texture format %d\n", tex->format);
1721                 return -EINVAL;
1722         }
1723         spitch = blit_width >> 6;
1724         if (spitch == 0 && image->height > 1)
1725                 return -EINVAL;
1726
1727         texpitch = tex->pitch;
1728         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1729                 microtile = 1;
1730                 if (tex_width < 64) {
1731                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1732                         /* we got tiled coordinates, untile them */
1733                         image->x *= 2;
1734                 }
1735         } else
1736                 microtile = 0;
1737
1738         /* this might fail for zero-sized uploads - are those illegal? */
1739         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1740                                 blit_width - 1)) {
1741                 DRM_ERROR("Invalid final destination offset\n");
1742                 return -EINVAL;
1743         }
1744
1745         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1746
1747         do {
1748                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%d y=%d w=%d h=%d\n",
1749                           tex->offset >> 10, tex->pitch, tex->format,
1750                           image->x, image->y, image->width, image->height);
1751
1752                 /* Make a copy of some parameters in case we have to
1753                  * update them for a multi-pass texture blit.
1754                  */
1755                 height = image->height;
1756                 data = (const u8 __user *)image->data;
1757
1758                 size = height * blit_width;
1759
1760                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1761                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1762                         size = height * blit_width;
1763                 } else if (size < 4 && size > 0) {
1764                         size = 4;
1765                 } else if (size == 0) {
1766                         return 0;
1767                 }
1768
1769                 buf = radeon_freelist_get(dev);
1770                 if (0 && !buf) {
1771                         radeon_do_cp_idle(dev_priv);
1772                         buf = radeon_freelist_get(dev);
1773                 }
1774                 if (!buf) {
1775                         DRM_DEBUG("EAGAIN\n");
1776                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1777                                 return -EFAULT;
1778                         return -EAGAIN;
1779                 }
1780
1781                 /* Dispatch the indirect buffer.
1782                  */
1783                 buffer =
1784                     (u32 *) ((char *)dev->agp_buffer_map->virtual + buf->offset);
1785                 dwords = size / 4;
1786
1787 #define RADEON_COPY_MT(_buf, _data, _width) \
1788         do { \
1789                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1790                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1791                         return -EFAULT; \
1792                 } \
1793         } while(0)
1794
1795                 if (microtile) {
1796                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1797                            however, we cannot use blitter directly for texture width < 64 bytes,
1798                            since minimum tex pitch is 64 bytes and we need this to match
1799                            the texture width, otherwise the blitter will tile it wrong.
1800                            Thus, tiling manually in this case. Additionally, need to special
1801                            case tex height = 1, since our actual image will have height 2
1802                            and we need to ensure we don't read beyond the texture size
1803                            from user space. */
1804                         if (tex->height == 1) {
1805                                 if (tex_width >= 64 || tex_width <= 16) {
1806                                         RADEON_COPY_MT(buffer, data,
1807                                                 (int)(tex_width * sizeof(u32)));
1808                                 } else if (tex_width == 32) {
1809                                         RADEON_COPY_MT(buffer, data, 16);
1810                                         RADEON_COPY_MT(buffer + 8,
1811                                                        data + 16, 16);
1812                                 }
1813                         } else if (tex_width >= 64 || tex_width == 16) {
1814                                 RADEON_COPY_MT(buffer, data,
1815                                                (int)(dwords * sizeof(u32)));
1816                         } else if (tex_width < 16) {
1817                                 for (i = 0; i < tex->height; i++) {
1818                                         RADEON_COPY_MT(buffer, data, tex_width);
1819                                         buffer += 4;
1820                                         data += tex_width;
1821                                 }
1822                         } else if (tex_width == 32) {
1823                                 /* TODO: make sure this works when not fitting in one buffer
1824                                    (i.e. 32bytes x 2048...) */
1825                                 for (i = 0; i < tex->height; i += 2) {
1826                                         RADEON_COPY_MT(buffer, data, 16);
1827                                         data += 16;
1828                                         RADEON_COPY_MT(buffer + 8, data, 16);
1829                                         data += 16;
1830                                         RADEON_COPY_MT(buffer + 4, data, 16);
1831                                         data += 16;
1832                                         RADEON_COPY_MT(buffer + 12, data, 16);
1833                                         data += 16;
1834                                         buffer += 16;
1835                                 }
1836                         }
1837                 } else {
1838                         if (tex_width >= 32) {
1839                                 /* Texture image width is larger than the minimum, so we
1840                                  * can upload it directly.
1841                                  */
1842                                 RADEON_COPY_MT(buffer, data,
1843                                                (int)(dwords * sizeof(u32)));
1844                         } else {
1845                                 /* Texture image width is less than the minimum, so we
1846                                  * need to pad out each image scanline to the minimum
1847                                  * width.
1848                                  */
1849                                 for (i = 0; i < tex->height; i++) {
1850                                         RADEON_COPY_MT(buffer, data, tex_width);
1851                                         buffer += 8;
1852                                         data += tex_width;
1853                                 }
1854                         }
1855                 }
1856
1857 #undef RADEON_COPY_MT
1858                 byte_offset = (image->y & ~2047) * blit_width;
1859                 buf->file_priv = file_priv;
1860                 buf->used = size;
1861                 offset = dev_priv->gart_buffers_offset + buf->offset;
1862                 BEGIN_RING(9);
1863                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1864                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1865                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1866                          RADEON_GMC_BRUSH_NONE |
1867                          (format << 8) |
1868                          RADEON_GMC_SRC_DATATYPE_COLOR |
1869                          RADEON_ROP3_S |
1870                          RADEON_DP_SRC_SOURCE_MEMORY |
1871                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1872                 OUT_RING((spitch << 22) | (offset >> 10));
1873                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1874                 OUT_RING(0);
1875                 OUT_RING((image->x << 16) | (image->y % 2048));
1876                 OUT_RING((image->width << 16) | height);
1877                 RADEON_WAIT_UNTIL_2D_IDLE();
1878                 ADVANCE_RING();
1879                 COMMIT_RING();
1880
1881                 radeon_cp_discard_buffer(dev, buf);
1882
1883                 /* Update the input parameters for next time */
1884                 image->y += height;
1885                 image->height -= height;
1886                 image->data = (const u8 __user *)image->data + size;
1887         } while (image->height > 0);
1888
1889         /* Flush the pixel cache after the blit completes.  This ensures
1890          * the texture data is written out to memory before rendering
1891          * continues.
1892          */
1893         BEGIN_RING(4);
1894         RADEON_FLUSH_CACHE();
1895         RADEON_WAIT_UNTIL_2D_IDLE();
1896         ADVANCE_RING();
1897         COMMIT_RING();
1898
1899         return 0;
1900 }
1901
1902 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1903 {
1904         drm_radeon_private_t *dev_priv = dev->dev_private;
1905         int i;
1906         RING_LOCALS;
1907         DRM_DEBUG("\n");
1908
1909         BEGIN_RING(35);
1910
1911         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1912         OUT_RING(0x00000000);
1913
1914         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1915         for (i = 0; i < 32; i++) {
1916                 OUT_RING(stipple[i]);
1917         }
1918
1919         ADVANCE_RING();
1920 }
1921
1922 static void radeon_apply_surface_regs(int surf_index,
1923                                       drm_radeon_private_t *dev_priv)
1924 {
1925         if (!dev_priv->mmio)
1926                 return;
1927
1928         radeon_do_cp_idle(dev_priv);
1929
1930         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1931                      dev_priv->surfaces[surf_index].flags);
1932         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1933                      dev_priv->surfaces[surf_index].lower);
1934         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1935                      dev_priv->surfaces[surf_index].upper);
1936 }
1937
1938 /* Allocates a virtual surface
1939  * doesn't always allocate a real surface, will stretch an existing
1940  * surface when possible.
1941  *
1942  * Note that refcount can be at most 2, since during a free refcount=3
1943  * might mean we have to allocate a new surface which might not always
1944  * be available.
1945  * For example : we allocate three contigous surfaces ABC. If B is
1946  * freed, we suddenly need two surfaces to store A and C, which might
1947  * not always be available.
1948  */
1949 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1950                          drm_radeon_private_t *dev_priv,
1951                          struct drm_file *file_priv)
1952 {
1953         struct radeon_virt_surface *s;
1954         int i;
1955         int virt_surface_index;
1956         uint32_t new_upper, new_lower;
1957
1958         new_lower = new->address;
1959         new_upper = new_lower + new->size - 1;
1960
1961         /* sanity check */
1962         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1963             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1964              RADEON_SURF_ADDRESS_FIXED_MASK)
1965             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1966                 return -1;
1967
1968         /* make sure there is no overlap with existing surfaces */
1969         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1970                 if ((dev_priv->surfaces[i].refcount != 0) &&
1971                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1972                       (new_lower < dev_priv->surfaces[i].upper)) ||
1973                      ((new_lower < dev_priv->surfaces[i].lower) &&
1974                       (new_upper > dev_priv->surfaces[i].lower)))) {
1975                         return -1;
1976                 }
1977         }
1978
1979         /* find a virtual surface */
1980         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1981                 if (dev_priv->virt_surfaces[i].file_priv == 0)
1982                         break;
1983         if (i == 2 * RADEON_MAX_SURFACES) {
1984                 return -1;
1985         }
1986         virt_surface_index = i;
1987
1988         /* try to reuse an existing surface */
1989         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1990                 /* extend before */
1991                 if ((dev_priv->surfaces[i].refcount == 1) &&
1992                     (new->flags == dev_priv->surfaces[i].flags) &&
1993                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1994                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1995                         s->surface_index = i;
1996                         s->lower = new_lower;
1997                         s->upper = new_upper;
1998                         s->flags = new->flags;
1999                         s->file_priv = file_priv;
2000                         dev_priv->surfaces[i].refcount++;
2001                         dev_priv->surfaces[i].lower = s->lower;
2002                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2003                         return virt_surface_index;
2004                 }
2005
2006                 /* extend after */
2007                 if ((dev_priv->surfaces[i].refcount == 1) &&
2008                     (new->flags == dev_priv->surfaces[i].flags) &&
2009                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2010                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2011                         s->surface_index = i;
2012                         s->lower = new_lower;
2013                         s->upper = new_upper;
2014                         s->flags = new->flags;
2015                         s->file_priv = file_priv;
2016                         dev_priv->surfaces[i].refcount++;
2017                         dev_priv->surfaces[i].upper = s->upper;
2018                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2019                         return virt_surface_index;
2020                 }
2021         }
2022
2023         /* okay, we need a new one */
2024         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2025                 if (dev_priv->surfaces[i].refcount == 0) {
2026                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2027                         s->surface_index = i;
2028                         s->lower = new_lower;
2029                         s->upper = new_upper;
2030                         s->flags = new->flags;
2031                         s->file_priv = file_priv;
2032                         dev_priv->surfaces[i].refcount = 1;
2033                         dev_priv->surfaces[i].lower = s->lower;
2034                         dev_priv->surfaces[i].upper = s->upper;
2035                         dev_priv->surfaces[i].flags = s->flags;
2036                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2037                         return virt_surface_index;
2038                 }
2039         }
2040
2041         /* we didn't find anything */
2042         return -1;
2043 }
2044
2045 static int free_surface(struct drm_file *file_priv,
2046                         drm_radeon_private_t * dev_priv,
2047                         int lower)
2048 {
2049         struct radeon_virt_surface *s;
2050         int i;
2051         /* find the virtual surface */
2052         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2053                 s = &(dev_priv->virt_surfaces[i]);
2054                 if (s->file_priv) {
2055                         if ((lower == s->lower) && (file_priv == s->file_priv))
2056                         {
2057                                 if (dev_priv->surfaces[s->surface_index].
2058                                     lower == s->lower)
2059                                         dev_priv->surfaces[s->surface_index].
2060                                             lower = s->upper;
2061
2062                                 if (dev_priv->surfaces[s->surface_index].
2063                                     upper == s->upper)
2064                                         dev_priv->surfaces[s->surface_index].
2065                                             upper = s->lower;
2066
2067                                 dev_priv->surfaces[s->surface_index].refcount--;
2068                                 if (dev_priv->surfaces[s->surface_index].
2069                                     refcount == 0)
2070                                         dev_priv->surfaces[s->surface_index].
2071                                             flags = 0;
2072                                 s->file_priv = NULL;
2073                                 radeon_apply_surface_regs(s->surface_index,
2074                                                           dev_priv);
2075                                 return 0;
2076                         }
2077                 }
2078         }
2079         return 1;
2080 }
2081
2082 static void radeon_surfaces_release(struct drm_file *file_priv,
2083                                     drm_radeon_private_t * dev_priv)
2084 {
2085         int i;
2086         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2087                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2088                         free_surface(file_priv, dev_priv,
2089                                      dev_priv->virt_surfaces[i].lower);
2090         }
2091 }
2092
2093 /* ================================================================
2094  * IOCTL functions
2095  */
2096 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2097 {
2098         drm_radeon_private_t *dev_priv = dev->dev_private;
2099         drm_radeon_surface_alloc_t *alloc = data;
2100
2101         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2102                 return -EINVAL;
2103         else
2104                 return 0;
2105 }
2106
2107 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2108 {
2109         drm_radeon_private_t *dev_priv = dev->dev_private;
2110         drm_radeon_surface_free_t *memfree = data;
2111
2112         if (free_surface(file_priv, dev_priv, memfree->address))
2113                 return -EINVAL;
2114         else
2115                 return 0;
2116 }
2117
2118 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2119 {
2120         drm_radeon_private_t *dev_priv = dev->dev_private;
2121         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2122         drm_radeon_clear_t *clear = data;
2123         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2124         DRM_DEBUG("\n");
2125
2126         LOCK_TEST_WITH_RETURN(dev, file_priv);
2127
2128         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2129
2130         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2131                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2132
2133         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2134                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2135                 return -EFAULT;
2136
2137         radeon_cp_dispatch_clear(dev, clear, depth_boxes);
2138
2139         COMMIT_RING();
2140         return 0;
2141 }
2142
2143 /* Not sure why this isn't set all the time:
2144  */
2145 static int radeon_do_init_pageflip(struct drm_device *dev)
2146 {
2147         drm_radeon_private_t *dev_priv = dev->dev_private;
2148         RING_LOCALS;
2149
2150         DRM_DEBUG("\n");
2151
2152         BEGIN_RING(6);
2153         RADEON_WAIT_UNTIL_3D_IDLE();
2154         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2155         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2156                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2157         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2158         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2159                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2160         ADVANCE_RING();
2161
2162         dev_priv->page_flipping = 1;
2163
2164         if (dev_priv->sarea_priv->pfCurrentPage != 1)
2165                 dev_priv->sarea_priv->pfCurrentPage = 0;
2166
2167         return 0;
2168 }
2169
2170 /* Swapping and flipping are different operations, need different ioctls.
2171  * They can & should be intermixed to support multiple 3d windows.
2172  */
2173 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2174 {
2175         drm_radeon_private_t *dev_priv = dev->dev_private;
2176         DRM_DEBUG("\n");
2177
2178         LOCK_TEST_WITH_RETURN(dev, file_priv);
2179
2180         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2181
2182         if (!dev_priv->page_flipping)
2183                 radeon_do_init_pageflip(dev);
2184
2185         radeon_cp_dispatch_flip(dev);
2186
2187         COMMIT_RING();
2188         return 0;
2189 }
2190
2191 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2192 {
2193         drm_radeon_private_t *dev_priv = dev->dev_private;
2194         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2195
2196         DRM_DEBUG("\n");
2197
2198         LOCK_TEST_WITH_RETURN(dev, file_priv);
2199
2200         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2201
2202         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2203                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2204
2205         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2206                 r600_cp_dispatch_swap(dev);
2207         else
2208                 radeon_cp_dispatch_swap(dev);
2209         sarea_priv->ctx_owner = 0;
2210
2211         COMMIT_RING();
2212         return 0;
2213 }
2214
2215 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2216 {
2217         drm_radeon_private_t *dev_priv = dev->dev_private;
2218         drm_radeon_sarea_t *sarea_priv;
2219         struct drm_device_dma *dma = dev->dma;
2220         struct drm_buf *buf;
2221         drm_radeon_vertex_t *vertex = data;
2222         drm_radeon_tcl_prim_t prim;
2223
2224         LOCK_TEST_WITH_RETURN(dev, file_priv);
2225
2226         sarea_priv = dev_priv->sarea_priv;
2227
2228         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2229                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2230
2231         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2232                 DRM_ERROR("buffer index %d (of %d max)\n",
2233                           vertex->idx, dma->buf_count - 1);
2234                 return -EINVAL;
2235         }
2236         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2237                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2238                 return -EINVAL;
2239         }
2240
2241         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2242         VB_AGE_TEST_WITH_RETURN(dev_priv);
2243
2244         buf = dma->buflist[vertex->idx];
2245
2246         if (buf->file_priv != file_priv) {
2247                 DRM_ERROR("process %d using buffer owned by %p\n",
2248                           DRM_CURRENTPID, buf->file_priv);
2249                 return -EINVAL;
2250         }
2251         if (buf->pending) {
2252                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2253                 return -EINVAL;
2254         }
2255
2256         /* Build up a prim_t record:
2257          */
2258         if (vertex->count) {
2259                 buf->used = vertex->count;      /* not used? */
2260
2261                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2262                         if (radeon_emit_state(dev_priv, file_priv,
2263                                               &sarea_priv->context_state,
2264                                               sarea_priv->tex_state,
2265                                               sarea_priv->dirty)) {
2266                                 DRM_ERROR("radeon_emit_state failed\n");
2267                                 return -EINVAL;
2268                         }
2269
2270                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2271                                                RADEON_UPLOAD_TEX1IMAGES |
2272                                                RADEON_UPLOAD_TEX2IMAGES |
2273                                                RADEON_REQUIRE_QUIESCENCE);
2274                 }
2275
2276                 prim.start = 0;
2277                 prim.finish = vertex->count;    /* unused */
2278                 prim.prim = vertex->prim;
2279                 prim.numverts = vertex->count;
2280                 prim.vc_format = sarea_priv->vc_format;
2281
2282                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2283         }
2284
2285         if (vertex->discard) {
2286                 radeon_cp_discard_buffer(dev, buf);
2287         }
2288
2289         COMMIT_RING();
2290         return 0;
2291 }
2292
2293 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2294 {
2295         drm_radeon_private_t *dev_priv = dev->dev_private;
2296         drm_radeon_sarea_t *sarea_priv;
2297         struct drm_device_dma *dma = dev->dma;
2298         struct drm_buf *buf;
2299         drm_radeon_indices_t *elts = data;
2300         drm_radeon_tcl_prim_t prim;
2301         int count;
2302
2303         LOCK_TEST_WITH_RETURN(dev, file_priv);
2304
2305         sarea_priv = dev_priv->sarea_priv;
2306
2307         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2308                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2309                   elts->discard);
2310
2311         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2312                 DRM_ERROR("buffer index %d (of %d max)\n",
2313                           elts->idx, dma->buf_count - 1);
2314                 return -EINVAL;
2315         }
2316         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2317                 DRM_ERROR("buffer prim %d\n", elts->prim);
2318                 return -EINVAL;
2319         }
2320
2321         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2322         VB_AGE_TEST_WITH_RETURN(dev_priv);
2323
2324         buf = dma->buflist[elts->idx];
2325
2326         if (buf->file_priv != file_priv) {
2327                 DRM_ERROR("process %d using buffer owned by %p\n",
2328                           DRM_CURRENTPID, buf->file_priv);
2329                 return -EINVAL;
2330         }
2331         if (buf->pending) {
2332                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2333                 return -EINVAL;
2334         }
2335
2336         count = (elts->end - elts->start) / sizeof(u16);
2337         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2338
2339         if (elts->start & 0x7) {
2340                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2341                 return -EINVAL;
2342         }
2343         if (elts->start < buf->used) {
2344                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2345                 return -EINVAL;
2346         }
2347
2348         buf->used = elts->end;
2349
2350         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2351                 if (radeon_emit_state(dev_priv, file_priv,
2352                                       &sarea_priv->context_state,
2353                                       sarea_priv->tex_state,
2354                                       sarea_priv->dirty)) {
2355                         DRM_ERROR("radeon_emit_state failed\n");
2356                         return -EINVAL;
2357                 }
2358
2359                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2360                                        RADEON_UPLOAD_TEX1IMAGES |
2361                                        RADEON_UPLOAD_TEX2IMAGES |
2362                                        RADEON_REQUIRE_QUIESCENCE);
2363         }
2364
2365         /* Build up a prim_t record:
2366          */
2367         prim.start = elts->start;
2368         prim.finish = elts->end;
2369         prim.prim = elts->prim;
2370         prim.offset = 0;        /* offset from start of dma buffers */
2371         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2372         prim.vc_format = sarea_priv->vc_format;
2373
2374         radeon_cp_dispatch_indices(dev, buf, &prim);
2375         if (elts->discard) {
2376                 radeon_cp_discard_buffer(dev, buf);
2377         }
2378
2379         COMMIT_RING();
2380         return 0;
2381 }
2382
2383 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2384 {
2385         drm_radeon_private_t *dev_priv = dev->dev_private;
2386         drm_radeon_texture_t *tex = data;
2387         drm_radeon_tex_image_t image;
2388         int ret;
2389
2390         LOCK_TEST_WITH_RETURN(dev, file_priv);
2391
2392         if (tex->image == NULL) {
2393                 DRM_ERROR("null texture image!\n");
2394                 return -EINVAL;
2395         }
2396
2397         if (DRM_COPY_FROM_USER(&image,
2398                                (drm_radeon_tex_image_t __user *) tex->image,
2399                                sizeof(image)))
2400                 return -EFAULT;
2401
2402         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2403         VB_AGE_TEST_WITH_RETURN(dev_priv);
2404
2405         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2406                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2407         else
2408                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2409
2410         return ret;
2411 }
2412
2413 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2414 {
2415         drm_radeon_private_t *dev_priv = dev->dev_private;
2416         drm_radeon_stipple_t *stipple = data;
2417         u32 mask[32];
2418
2419         LOCK_TEST_WITH_RETURN(dev, file_priv);
2420
2421         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2422                 return -EFAULT;
2423
2424         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2425
2426         radeon_cp_dispatch_stipple(dev, mask);
2427
2428         COMMIT_RING();
2429         return 0;
2430 }
2431
2432 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2433 {
2434         drm_radeon_private_t *dev_priv = dev->dev_private;
2435         struct drm_device_dma *dma = dev->dma;
2436         struct drm_buf *buf;
2437         drm_radeon_indirect_t *indirect = data;
2438         RING_LOCALS;
2439
2440         LOCK_TEST_WITH_RETURN(dev, file_priv);
2441
2442         if (!dev_priv) {
2443                 DRM_ERROR("called with no initialization\n");
2444                 return -EINVAL;
2445         }
2446
2447         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2448                   indirect->idx, indirect->start, indirect->end,
2449                   indirect->discard);
2450
2451         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2452                 DRM_ERROR("buffer index %d (of %d max)\n",
2453                           indirect->idx, dma->buf_count - 1);
2454                 return -EINVAL;
2455         }
2456
2457         buf = dma->buflist[indirect->idx];
2458
2459         if (buf->file_priv != file_priv) {
2460                 DRM_ERROR("process %d using buffer owned by %p\n",
2461                           DRM_CURRENTPID, buf->file_priv);
2462                 return -EINVAL;
2463         }
2464         if (buf->pending) {
2465                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2466                 return -EINVAL;
2467         }
2468
2469         if (indirect->start < buf->used) {
2470                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2471                           indirect->start, buf->used);
2472                 return -EINVAL;
2473         }
2474
2475         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2476         VB_AGE_TEST_WITH_RETURN(dev_priv);
2477
2478         buf->used = indirect->end;
2479
2480         /* Dispatch the indirect buffer full of commands from the
2481          * X server.  This is insecure and is thus only available to
2482          * privileged clients.
2483          */
2484         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2485                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2486         else {
2487                 /* Wait for the 3D stream to idle before the indirect buffer
2488                  * containing 2D acceleration commands is processed.
2489                  */
2490                 BEGIN_RING(2);
2491                 RADEON_WAIT_UNTIL_3D_IDLE();
2492                 ADVANCE_RING();
2493                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2494         }
2495
2496         if (indirect->discard)
2497                 radeon_cp_discard_buffer(dev, buf);
2498
2499         COMMIT_RING();
2500         return 0;
2501 }
2502
2503 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2504 {
2505         drm_radeon_private_t *dev_priv = dev->dev_private;
2506         drm_radeon_sarea_t *sarea_priv;
2507         struct drm_device_dma *dma = dev->dma;
2508         struct drm_buf *buf;
2509         drm_radeon_vertex2_t *vertex = data;
2510         int i;
2511         unsigned char laststate;
2512
2513         LOCK_TEST_WITH_RETURN(dev, file_priv);
2514
2515         sarea_priv = dev_priv->sarea_priv;
2516
2517         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2518                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2519
2520         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2521                 DRM_ERROR("buffer index %d (of %d max)\n",
2522                           vertex->idx, dma->buf_count - 1);
2523                 return -EINVAL;
2524         }
2525
2526         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2527         VB_AGE_TEST_WITH_RETURN(dev_priv);
2528
2529         buf = dma->buflist[vertex->idx];
2530
2531         if (buf->file_priv != file_priv) {
2532                 DRM_ERROR("process %d using buffer owned by %p\n",
2533                           DRM_CURRENTPID, buf->file_priv);
2534                 return -EINVAL;
2535         }
2536
2537         if (buf->pending) {
2538                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2539                 return -EINVAL;
2540         }
2541
2542         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2543                 return -EINVAL;
2544
2545         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2546                 drm_radeon_prim_t prim;
2547                 drm_radeon_tcl_prim_t tclprim;
2548
2549                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2550                         return -EFAULT;
2551
2552                 if (prim.stateidx != laststate) {
2553                         drm_radeon_state_t state;
2554
2555                         if (DRM_COPY_FROM_USER(&state,
2556                                                &vertex->state[prim.stateidx],
2557                                                sizeof(state)))
2558                                 return -EFAULT;
2559
2560                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2561                                 DRM_ERROR("radeon_emit_state2 failed\n");
2562                                 return -EINVAL;
2563                         }
2564
2565                         laststate = prim.stateidx;
2566                 }
2567
2568                 tclprim.start = prim.start;
2569                 tclprim.finish = prim.finish;
2570                 tclprim.prim = prim.prim;
2571                 tclprim.vc_format = prim.vc_format;
2572
2573                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2574                         tclprim.offset = prim.numverts * 64;
2575                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2576
2577                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2578                 } else {
2579                         tclprim.numverts = prim.numverts;
2580                         tclprim.offset = 0;     /* not used */
2581
2582                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2583                 }
2584
2585                 if (sarea_priv->nbox == 1)
2586                         sarea_priv->nbox = 0;
2587         }
2588
2589         if (vertex->discard) {
2590                 radeon_cp_discard_buffer(dev, buf);
2591         }
2592
2593         COMMIT_RING();
2594         return 0;
2595 }
2596
2597 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2598                                struct drm_file *file_priv,
2599                                drm_radeon_cmd_header_t header,
2600                                drm_radeon_kcmd_buffer_t *cmdbuf)
2601 {
2602         int id = (int)header.packet.packet_id;
2603         int sz, reg;
2604         int *data = (int *)cmdbuf->buf;
2605         RING_LOCALS;
2606
2607         if (id >= RADEON_MAX_STATE_PACKETS)
2608                 return -EINVAL;
2609
2610         sz = packet[id].len;
2611         reg = packet[id].start;
2612
2613         if (sz * sizeof(int) > cmdbuf->bufsz) {
2614                 DRM_ERROR("Packet size provided larger than data provided\n");
2615                 return -EINVAL;
2616         }
2617
2618         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2619                 DRM_ERROR("Packet verification failed\n");
2620                 return -EINVAL;
2621         }
2622
2623         BEGIN_RING(sz + 1);
2624         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2625         OUT_RING_TABLE(data, sz);
2626         ADVANCE_RING();
2627
2628         cmdbuf->buf += sz * sizeof(int);
2629         cmdbuf->bufsz -= sz * sizeof(int);
2630         return 0;
2631 }
2632
2633 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2634                                           drm_radeon_cmd_header_t header,
2635                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2636 {
2637         int sz = header.scalars.count;
2638         int start = header.scalars.offset;
2639         int stride = header.scalars.stride;
2640         RING_LOCALS;
2641
2642         BEGIN_RING(3 + sz);
2643         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2644         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2645         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2646         OUT_RING_TABLE(cmdbuf->buf, sz);
2647         ADVANCE_RING();
2648         cmdbuf->buf += sz * sizeof(int);
2649         cmdbuf->bufsz -= sz * sizeof(int);
2650         return 0;
2651 }
2652
2653 /* God this is ugly
2654  */
2655 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2656                                            drm_radeon_cmd_header_t header,
2657                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2658 {
2659         int sz = header.scalars.count;
2660         int start = ((unsigned int)header.scalars.offset) + 0x100;
2661         int stride = header.scalars.stride;
2662         RING_LOCALS;
2663
2664         BEGIN_RING(3 + sz);
2665         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2666         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2667         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2668         OUT_RING_TABLE(cmdbuf->buf, sz);
2669         ADVANCE_RING();
2670         cmdbuf->buf += sz * sizeof(int);
2671         cmdbuf->bufsz -= sz * sizeof(int);
2672         return 0;
2673 }
2674
2675 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2676                                           drm_radeon_cmd_header_t header,
2677                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2678 {
2679         int sz = header.vectors.count;
2680         int start = header.vectors.offset;
2681         int stride = header.vectors.stride;
2682         RING_LOCALS;
2683
2684         BEGIN_RING(5 + sz);
2685         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2686         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2687         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2688         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2689         OUT_RING_TABLE(cmdbuf->buf, sz);
2690         ADVANCE_RING();
2691
2692         cmdbuf->buf += sz * sizeof(int);
2693         cmdbuf->bufsz -= sz * sizeof(int);
2694         return 0;
2695 }
2696
2697 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2698                                           drm_radeon_cmd_header_t header,
2699                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2700 {
2701         int sz = header.veclinear.count * 4;
2702         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2703         RING_LOCALS;
2704
2705         if (!sz)
2706                 return 0;
2707         if (sz * 4 > cmdbuf->bufsz)
2708                 return -EINVAL;
2709
2710         BEGIN_RING(5 + sz);
2711         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2712         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2713         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2714         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2715         OUT_RING_TABLE(cmdbuf->buf, sz);
2716         ADVANCE_RING();
2717
2718         cmdbuf->buf += sz * sizeof(int);
2719         cmdbuf->bufsz -= sz * sizeof(int);
2720         return 0;
2721 }
2722
2723 static int radeon_emit_packet3(struct drm_device * dev,
2724                                struct drm_file *file_priv,
2725                                drm_radeon_kcmd_buffer_t *cmdbuf)
2726 {
2727         drm_radeon_private_t *dev_priv = dev->dev_private;
2728         unsigned int cmdsz;
2729         int ret;
2730         RING_LOCALS;
2731
2732         DRM_DEBUG("\n");
2733
2734         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2735                                                   cmdbuf, &cmdsz))) {
2736                 DRM_ERROR("Packet verification failed\n");
2737                 return ret;
2738         }
2739
2740         BEGIN_RING(cmdsz);
2741         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2742         ADVANCE_RING();
2743
2744         cmdbuf->buf += cmdsz * 4;
2745         cmdbuf->bufsz -= cmdsz * 4;
2746         return 0;
2747 }
2748
2749 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2750                                         struct drm_file *file_priv,
2751                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2752                                         int orig_nbox)
2753 {
2754         drm_radeon_private_t *dev_priv = dev->dev_private;
2755         struct drm_clip_rect box;
2756         unsigned int cmdsz;
2757         int ret;
2758         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2759         int i = 0;
2760         RING_LOCALS;
2761
2762         DRM_DEBUG("\n");
2763
2764         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2765                                                   cmdbuf, &cmdsz))) {
2766                 DRM_ERROR("Packet verification failed\n");
2767                 return ret;
2768         }
2769
2770         if (!orig_nbox)
2771                 goto out;
2772
2773         do {
2774                 if (i < cmdbuf->nbox) {
2775                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2776                                 return -EFAULT;
2777                         /* FIXME The second and subsequent times round
2778                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2779                          * calling emit_clip_rect(). This fixes a
2780                          * lockup on fast machines when sending
2781                          * several cliprects with a cmdbuf, as when
2782                          * waving a 2D window over a 3D
2783                          * window. Something in the commands from user
2784                          * space seems to hang the card when they're
2785                          * sent several times in a row. That would be
2786                          * the correct place to fix it but this works
2787                          * around it until I can figure that out - Tim
2788                          * Smith */
2789                         if (i) {
2790                                 BEGIN_RING(2);
2791                                 RADEON_WAIT_UNTIL_3D_IDLE();
2792                                 ADVANCE_RING();
2793                         }
2794                         radeon_emit_clip_rect(dev_priv, &box);
2795                 }
2796
2797                 BEGIN_RING(cmdsz);
2798                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2799                 ADVANCE_RING();
2800
2801         } while (++i < cmdbuf->nbox);
2802         if (cmdbuf->nbox == 1)
2803                 cmdbuf->nbox = 0;
2804
2805       out:
2806         cmdbuf->buf += cmdsz * 4;
2807         cmdbuf->bufsz -= cmdsz * 4;
2808         return 0;
2809 }
2810
2811 static int radeon_emit_wait(struct drm_device * dev, int flags)
2812 {
2813         drm_radeon_private_t *dev_priv = dev->dev_private;
2814         RING_LOCALS;
2815
2816         DRM_DEBUG("%x\n", flags);
2817         switch (flags) {
2818         case RADEON_WAIT_2D:
2819                 BEGIN_RING(2);
2820                 RADEON_WAIT_UNTIL_2D_IDLE();
2821                 ADVANCE_RING();
2822                 break;
2823         case RADEON_WAIT_3D:
2824                 BEGIN_RING(2);
2825                 RADEON_WAIT_UNTIL_3D_IDLE();
2826                 ADVANCE_RING();
2827                 break;
2828         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2829                 BEGIN_RING(2);
2830                 RADEON_WAIT_UNTIL_IDLE();
2831                 ADVANCE_RING();
2832                 break;
2833         default:
2834                 return -EINVAL;
2835         }
2836
2837         return 0;
2838 }
2839
2840 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2841 {
2842         drm_radeon_private_t *dev_priv = dev->dev_private;
2843         struct drm_device_dma *dma = dev->dma;
2844         struct drm_buf *buf = NULL;
2845         int idx;
2846         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2847         drm_radeon_cmd_header_t header;
2848         int orig_nbox, orig_bufsz;
2849         char *kbuf = NULL;
2850
2851         LOCK_TEST_WITH_RETURN(dev, file_priv);
2852
2853         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2854         VB_AGE_TEST_WITH_RETURN(dev_priv);
2855
2856         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2857                 return -EINVAL;
2858         }
2859
2860         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2861          * races between checking values and using those values in other code,
2862          * and simply to avoid a lot of function calls to copy in data.
2863          */
2864         orig_bufsz = cmdbuf->bufsz;
2865         if (orig_bufsz != 0) {
2866                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
2867                 if (kbuf == NULL)
2868                         return -ENOMEM;
2869                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2870                                        cmdbuf->bufsz)) {
2871                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2872                         return -EFAULT;
2873                 }
2874                 cmdbuf->buf = kbuf;
2875         }
2876
2877         orig_nbox = cmdbuf->nbox;
2878
2879         if (dev_priv->microcode_version == UCODE_R300) {
2880                 int temp;
2881                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2882
2883                 if (orig_bufsz != 0)
2884                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2885
2886                 return temp;
2887         }
2888
2889         /* microcode_version != r300 */
2890         while (cmdbuf->bufsz >= sizeof(header)) {
2891
2892                 header.i = *(int *)cmdbuf->buf;
2893                 cmdbuf->buf += sizeof(header);
2894                 cmdbuf->bufsz -= sizeof(header);
2895
2896                 switch (header.header.cmd_type) {
2897                 case RADEON_CMD_PACKET:
2898                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2899                         if (radeon_emit_packets
2900                             (dev_priv, file_priv, header, cmdbuf)) {
2901                                 DRM_ERROR("radeon_emit_packets failed\n");
2902                                 goto err;
2903                         }
2904                         break;
2905
2906                 case RADEON_CMD_SCALARS:
2907                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2908                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2909                                 DRM_ERROR("radeon_emit_scalars failed\n");
2910                                 goto err;
2911                         }
2912                         break;
2913
2914                 case RADEON_CMD_VECTORS:
2915                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2916                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2917                                 DRM_ERROR("radeon_emit_vectors failed\n");
2918                                 goto err;
2919                         }
2920                         break;
2921
2922                 case RADEON_CMD_DMA_DISCARD:
2923                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2924                         idx = header.dma.buf_idx;
2925                         if (idx < 0 || idx >= dma->buf_count) {
2926                                 DRM_ERROR("buffer index %d (of %d max)\n",
2927                                           idx, dma->buf_count - 1);
2928                                 goto err;
2929                         }
2930
2931                         buf = dma->buflist[idx];
2932                         if (buf->file_priv != file_priv || buf->pending) {
2933                                 DRM_ERROR("bad buffer %p %p %d\n",
2934                                           buf->file_priv, file_priv,
2935                                           buf->pending);
2936                                 goto err;
2937                         }
2938
2939                         radeon_cp_discard_buffer(dev, buf);
2940                         break;
2941
2942                 case RADEON_CMD_PACKET3:
2943                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2944                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2945                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2946                                 goto err;
2947                         }
2948                         break;
2949
2950                 case RADEON_CMD_PACKET3_CLIP:
2951                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2952                         if (radeon_emit_packet3_cliprect
2953                             (dev, file_priv, cmdbuf, orig_nbox)) {
2954                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2955                                 goto err;
2956                         }
2957                         break;
2958
2959                 case RADEON_CMD_SCALARS2:
2960                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2961                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2962                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2963                                 goto err;
2964                         }
2965                         break;
2966
2967                 case RADEON_CMD_WAIT:
2968                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2969                         if (radeon_emit_wait(dev, header.wait.flags)) {
2970                                 DRM_ERROR("radeon_emit_wait failed\n");
2971                                 goto err;
2972                         }
2973                         break;
2974                 case RADEON_CMD_VECLINEAR:
2975                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2976                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2977                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2978                                 goto err;
2979                         }
2980                         break;
2981
2982                 default:
2983                         DRM_ERROR("bad cmd_type %d at %p\n",
2984                                   header.header.cmd_type,
2985                                   cmdbuf->buf - sizeof(header));
2986                         goto err;
2987                 }
2988         }
2989
2990         if (orig_bufsz != 0)
2991                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2992
2993         DRM_DEBUG("DONE\n");
2994         COMMIT_RING();
2995         return 0;
2996
2997       err:
2998         if (orig_bufsz != 0)
2999                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3000         return -EINVAL;
3001 }
3002
3003 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3004 {
3005         drm_radeon_private_t *dev_priv = dev->dev_private;
3006         drm_radeon_getparam_t *param = data;
3007         int value;
3008
3009         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3010
3011         switch (param->param) {
3012         case RADEON_PARAM_GART_BUFFER_OFFSET:
3013                 value = dev_priv->gart_buffers_offset;
3014                 break;
3015         case RADEON_PARAM_LAST_FRAME:
3016                 dev_priv->stats.last_frame_reads++;
3017                 value = GET_SCRATCH(dev_priv, 0);
3018                 break;
3019         case RADEON_PARAM_LAST_DISPATCH:
3020                 value = GET_SCRATCH(dev_priv, 1);
3021                 break;
3022         case RADEON_PARAM_LAST_CLEAR:
3023                 dev_priv->stats.last_clear_reads++;
3024                 value = GET_SCRATCH(dev_priv, 2);
3025                 break;
3026         case RADEON_PARAM_IRQ_NR:
3027                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3028                         value = 0;
3029                 else
3030                         value = dev->irq;
3031                 break;
3032         case RADEON_PARAM_GART_BASE:
3033                 value = dev_priv->gart_vm_start;
3034                 break;
3035         case RADEON_PARAM_REGISTER_HANDLE:
3036                 value = dev_priv->mmio->offset;
3037                 break;
3038         case RADEON_PARAM_STATUS_HANDLE:
3039                 value = dev_priv->ring_rptr_offset;
3040                 break;
3041 #ifndef __LP64__
3042                 /*
3043                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3044                  * pointer which can't fit into an int-sized variable.  According to
3045                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3046                  * not supporting it shouldn't be a problem.  If the same functionality
3047                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3048                  * so backwards-compatibility for the embedded platforms can be
3049                  * maintained.  --davidm 4-Feb-2004.
3050                  */
3051         case RADEON_PARAM_SAREA_HANDLE:
3052                 /* The lock is the first dword in the sarea. */
3053                 value = (long)dev->lock.hw_lock;
3054                 break;
3055 #endif
3056         case RADEON_PARAM_GART_TEX_HANDLE:
3057                 value = dev_priv->gart_textures_offset;
3058                 break;
3059         case RADEON_PARAM_SCRATCH_OFFSET:
3060                 if (!dev_priv->writeback_works)
3061                         return -EINVAL;
3062                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3063                         value = R600_SCRATCH_REG_OFFSET;
3064                 else
3065                         value = RADEON_SCRATCH_REG_OFFSET;
3066                 break;
3067         case RADEON_PARAM_CARD_TYPE:
3068                 if (dev_priv->flags & RADEON_IS_PCIE)
3069                         value = RADEON_CARD_PCIE;
3070                 else if (dev_priv->flags & RADEON_IS_AGP)
3071                         value = RADEON_CARD_AGP;
3072                 else
3073                         value = RADEON_CARD_PCI;
3074                 break;
3075         case RADEON_PARAM_VBLANK_CRTC:
3076                 value = radeon_vblank_crtc_get(dev);
3077                 break;
3078         case RADEON_PARAM_FB_LOCATION:
3079                 value = radeon_read_fb_location(dev_priv);
3080                 break;
3081         case RADEON_PARAM_NUM_GB_PIPES:
3082                 value = dev_priv->num_gb_pipes;
3083                 break;
3084         case RADEON_PARAM_NUM_Z_PIPES:
3085                 value = dev_priv->num_z_pipes;
3086                 break;
3087         default:
3088                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3089                 return -EINVAL;
3090         }
3091
3092         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3093                 DRM_ERROR("copy_to_user\n");
3094                 return -EFAULT;
3095         }
3096
3097         return 0;
3098 }
3099
3100 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3101 {
3102         drm_radeon_private_t *dev_priv = dev->dev_private;
3103         drm_radeon_setparam_t *sp = data;
3104         struct drm_radeon_driver_file_fields *radeon_priv;
3105
3106         switch (sp->param) {
3107         case RADEON_SETPARAM_FB_LOCATION:
3108                 radeon_priv = file_priv->driver_priv;
3109                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3110                     sp->value;
3111                 break;
3112         case RADEON_SETPARAM_SWITCH_TILING:
3113                 if (sp->value == 0) {
3114                         DRM_DEBUG("color tiling disabled\n");
3115                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3116                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3117                         if (dev_priv->sarea_priv)
3118                                 dev_priv->sarea_priv->tiling_enabled = 0;
3119                 } else if (sp->value == 1) {
3120                         DRM_DEBUG("color tiling enabled\n");
3121                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3122                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3123                         if (dev_priv->sarea_priv)
3124                                 dev_priv->sarea_priv->tiling_enabled = 1;
3125                 }
3126                 break;
3127         case RADEON_SETPARAM_PCIGART_LOCATION:
3128                 dev_priv->pcigart_offset = sp->value;
3129                 dev_priv->pcigart_offset_set = 1;
3130                 break;
3131         case RADEON_SETPARAM_NEW_MEMMAP:
3132                 dev_priv->new_memmap = sp->value;
3133                 break;
3134         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3135                 dev_priv->gart_info.table_size = sp->value;
3136                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3137                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3138                 break;
3139         case RADEON_SETPARAM_VBLANK_CRTC:
3140                 return radeon_vblank_crtc_set(dev, sp->value);
3141                 break;
3142         default:
3143                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3144                 return -EINVAL;
3145         }
3146
3147         return 0;
3148 }
3149
3150 /* When a client dies:
3151  *    - Check for and clean up flipped page state
3152  *    - Free any alloced GART memory.
3153  *    - Free any alloced radeon surfaces.
3154  *
3155  * DRM infrastructure takes care of reclaiming dma buffers.
3156  */
3157 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3158 {
3159         if (dev->dev_private) {
3160                 drm_radeon_private_t *dev_priv = dev->dev_private;
3161                 dev_priv->page_flipping = 0;
3162                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3163                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3164                 radeon_surfaces_release(file_priv, dev_priv);
3165         }
3166 }
3167
3168 void radeon_driver_lastclose(struct drm_device *dev)
3169 {
3170         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3171         if (dev->dev_private) {
3172                 drm_radeon_private_t *dev_priv = dev->dev_private;
3173
3174                 if (dev_priv->sarea_priv &&
3175                     dev_priv->sarea_priv->pfCurrentPage != 0)
3176                         radeon_cp_dispatch_flip(dev);
3177         }
3178
3179         radeon_do_release(dev);
3180 }
3181
3182 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3183 {
3184         drm_radeon_private_t *dev_priv = dev->dev_private;
3185         struct drm_radeon_driver_file_fields *radeon_priv;
3186
3187         DRM_DEBUG("\n");
3188         radeon_priv =
3189             (struct drm_radeon_driver_file_fields *)
3190             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3191
3192         if (!radeon_priv)
3193                 return -ENOMEM;
3194
3195         file_priv->driver_priv = radeon_priv;
3196
3197         if (dev_priv)
3198                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3199         else
3200                 radeon_priv->radeon_fb_delta = 0;
3201         return 0;
3202 }
3203
3204 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3205 {
3206         struct drm_radeon_driver_file_fields *radeon_priv =
3207             file_priv->driver_priv;
3208
3209         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3210 }
3211
3212 struct drm_ioctl_desc radeon_ioctls[] = {
3213         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3214         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3215         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3216         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3217         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3218         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3219         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3220         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3221         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3222         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3223         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3224         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3225         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3226         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3227         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_ROOT_ONLY),
3228         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3229         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3230         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3231         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3232         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3233         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3234         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3236         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3237         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3238         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3239         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3240         DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH)
3241 };
3242
3243 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);