]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/dev/drm2/radeon/radeon_state.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / dev / drm2 / radeon / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <dev/drm2/drmP.h>
34 #include <dev/drm2/drm_buffer.h>
35 #include <dev/drm2/radeon/radeon_drm.h>
36 #include "radeon_drv.h"
37
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41
42 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
43                                                     dev_priv,
44                                                     struct drm_file * file_priv,
45                                                     u32 *offset)
46 {
47         u64 off = *offset;
48         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
49         struct drm_radeon_driver_file_fields *radeon_priv;
50
51         /* Hrm ... the story of the offset ... So this function converts
52          * the various ideas of what userland clients might have for an
53          * offset in the card address space into an offset into the card
54          * address space :) So with a sane client, it should just keep
55          * the value intact and just do some boundary checking. However,
56          * not all clients are sane. Some older clients pass us 0 based
57          * offsets relative to the start of the framebuffer and some may
58          * assume the AGP aperture it appended to the framebuffer, so we
59          * try to detect those cases and fix them up.
60          *
61          * Note: It might be a good idea here to make sure the offset lands
62          * in some "allowed" area to protect things like the PCIE GART...
63          */
64
65         /* First, the best case, the offset already lands in either the
66          * framebuffer or the GART mapped space
67          */
68         if (radeon_check_offset(dev_priv, off))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = file_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > fb_end)
82                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
83
84         /* Now recheck and fail if out of bounds */
85         if (radeon_check_offset(dev_priv, off)) {
86                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
87                 *offset = off;
88                 return 0;
89         }
90         return -EINVAL;
91 }
92
93 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
94                                                      dev_priv,
95                                                      struct drm_file *file_priv,
96                                                      int id, struct drm_buffer *buf)
97 {
98         u32 *data;
99         switch (id) {
100
101         case RADEON_EMIT_PP_MISC:
102                 data = drm_buffer_pointer_to_dword(buf,
103                         (RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4);
104
105                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
106                         DRM_ERROR("Invalid depth buffer offset\n");
107                         return -EINVAL;
108                 }
109                 dev_priv->have_z_offset = 1;
110                 break;
111
112         case RADEON_EMIT_PP_CNTL:
113                 data = drm_buffer_pointer_to_dword(buf,
114                         (RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4);
115
116                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
117                         DRM_ERROR("Invalid colour buffer offset\n");
118                         return -EINVAL;
119                 }
120                 break;
121
122         case R200_EMIT_PP_TXOFFSET_0:
123         case R200_EMIT_PP_TXOFFSET_1:
124         case R200_EMIT_PP_TXOFFSET_2:
125         case R200_EMIT_PP_TXOFFSET_3:
126         case R200_EMIT_PP_TXOFFSET_4:
127         case R200_EMIT_PP_TXOFFSET_5:
128                 data = drm_buffer_pointer_to_dword(buf, 0);
129                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
130                         DRM_ERROR("Invalid R200 texture offset\n");
131                         return -EINVAL;
132                 }
133                 break;
134
135         case RADEON_EMIT_PP_TXFILTER_0:
136         case RADEON_EMIT_PP_TXFILTER_1:
137         case RADEON_EMIT_PP_TXFILTER_2:
138                 data = drm_buffer_pointer_to_dword(buf,
139                         (RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4);
140                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, data)) {
141                         DRM_ERROR("Invalid R100 texture offset\n");
142                         return -EINVAL;
143                 }
144                 break;
145
146         case R200_EMIT_PP_CUBIC_OFFSETS_0:
147         case R200_EMIT_PP_CUBIC_OFFSETS_1:
148         case R200_EMIT_PP_CUBIC_OFFSETS_2:
149         case R200_EMIT_PP_CUBIC_OFFSETS_3:
150         case R200_EMIT_PP_CUBIC_OFFSETS_4:
151         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
152                         int i;
153                         for (i = 0; i < 5; i++) {
154                                 data = drm_buffer_pointer_to_dword(buf, i);
155                                 if (radeon_check_and_fixup_offset(dev_priv,
156                                                                   file_priv,
157                                                                   data)) {
158                                         DRM_ERROR
159                                             ("Invalid R200 cubic texture offset\n");
160                                         return -EINVAL;
161                                 }
162                         }
163                         break;
164                 }
165
166         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
167         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
168         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
169                         int i;
170                         for (i = 0; i < 5; i++) {
171                                 data = drm_buffer_pointer_to_dword(buf, i);
172                                 if (radeon_check_and_fixup_offset(dev_priv,
173                                                                   file_priv,
174                                                                   data)) {
175                                         DRM_ERROR
176                                             ("Invalid R100 cubic texture offset\n");
177                                         return -EINVAL;
178                                 }
179                         }
180                 }
181                 break;
182
183         case R200_EMIT_VAP_CTL:{
184                         RING_LOCALS;
185                         BEGIN_RING(2);
186                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
187                         ADVANCE_RING();
188                 }
189                 break;
190
191         case RADEON_EMIT_RB3D_COLORPITCH:
192         case RADEON_EMIT_RE_LINE_PATTERN:
193         case RADEON_EMIT_SE_LINE_WIDTH:
194         case RADEON_EMIT_PP_LUM_MATRIX:
195         case RADEON_EMIT_PP_ROT_MATRIX_0:
196         case RADEON_EMIT_RB3D_STENCILREFMASK:
197         case RADEON_EMIT_SE_VPORT_XSCALE:
198         case RADEON_EMIT_SE_CNTL:
199         case RADEON_EMIT_SE_CNTL_STATUS:
200         case RADEON_EMIT_RE_MISC:
201         case RADEON_EMIT_PP_BORDER_COLOR_0:
202         case RADEON_EMIT_PP_BORDER_COLOR_1:
203         case RADEON_EMIT_PP_BORDER_COLOR_2:
204         case RADEON_EMIT_SE_ZBIAS_FACTOR:
205         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
206         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
207         case R200_EMIT_PP_TXCBLEND_0:
208         case R200_EMIT_PP_TXCBLEND_1:
209         case R200_EMIT_PP_TXCBLEND_2:
210         case R200_EMIT_PP_TXCBLEND_3:
211         case R200_EMIT_PP_TXCBLEND_4:
212         case R200_EMIT_PP_TXCBLEND_5:
213         case R200_EMIT_PP_TXCBLEND_6:
214         case R200_EMIT_PP_TXCBLEND_7:
215         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
216         case R200_EMIT_TFACTOR_0:
217         case R200_EMIT_VTX_FMT_0:
218         case R200_EMIT_MATRIX_SELECT_0:
219         case R200_EMIT_TEX_PROC_CTL_2:
220         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
221         case R200_EMIT_PP_TXFILTER_0:
222         case R200_EMIT_PP_TXFILTER_1:
223         case R200_EMIT_PP_TXFILTER_2:
224         case R200_EMIT_PP_TXFILTER_3:
225         case R200_EMIT_PP_TXFILTER_4:
226         case R200_EMIT_PP_TXFILTER_5:
227         case R200_EMIT_VTE_CNTL:
228         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
229         case R200_EMIT_PP_TAM_DEBUG3:
230         case R200_EMIT_PP_CNTL_X:
231         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
232         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
233         case R200_EMIT_RE_SCISSOR_TL_0:
234         case R200_EMIT_RE_SCISSOR_TL_1:
235         case R200_EMIT_RE_SCISSOR_TL_2:
236         case R200_EMIT_SE_VAP_CNTL_STATUS:
237         case R200_EMIT_SE_VTX_STATE_CNTL:
238         case R200_EMIT_RE_POINTSIZE:
239         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
240         case R200_EMIT_PP_CUBIC_FACES_0:
241         case R200_EMIT_PP_CUBIC_FACES_1:
242         case R200_EMIT_PP_CUBIC_FACES_2:
243         case R200_EMIT_PP_CUBIC_FACES_3:
244         case R200_EMIT_PP_CUBIC_FACES_4:
245         case R200_EMIT_PP_CUBIC_FACES_5:
246         case RADEON_EMIT_PP_TEX_SIZE_0:
247         case RADEON_EMIT_PP_TEX_SIZE_1:
248         case RADEON_EMIT_PP_TEX_SIZE_2:
249         case R200_EMIT_RB3D_BLENDCOLOR:
250         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
251         case RADEON_EMIT_PP_CUBIC_FACES_0:
252         case RADEON_EMIT_PP_CUBIC_FACES_1:
253         case RADEON_EMIT_PP_CUBIC_FACES_2:
254         case R200_EMIT_PP_TRI_PERF_CNTL:
255         case R200_EMIT_PP_AFS_0:
256         case R200_EMIT_PP_AFS_1:
257         case R200_EMIT_ATF_TFACTOR:
258         case R200_EMIT_PP_TXCTLALL_0:
259         case R200_EMIT_PP_TXCTLALL_1:
260         case R200_EMIT_PP_TXCTLALL_2:
261         case R200_EMIT_PP_TXCTLALL_3:
262         case R200_EMIT_PP_TXCTLALL_4:
263         case R200_EMIT_PP_TXCTLALL_5:
264         case R200_EMIT_VAP_PVS_CNTL:
265                 /* These packets don't contain memory offsets */
266                 break;
267
268         default:
269                 DRM_ERROR("Unknown state packet ID %d\n", id);
270                 return -EINVAL;
271         }
272
273         return 0;
274 }
275
276 static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
277                                           dev_priv,
278                                           struct drm_file *file_priv,
279                                           drm_radeon_kcmd_buffer_t *
280                                           cmdbuf,
281                                           unsigned int *cmdsz)
282 {
283         u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
284         u32 offset, narrays;
285         int count, i, k;
286
287         count = ((*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16);
288         *cmdsz = 2 + count;
289
290         if ((*cmd & 0xc0000000) != RADEON_CP_PACKET3) {
291                 DRM_ERROR("Not a type 3 packet\n");
292                 return -EINVAL;
293         }
294
295         if (4 * *cmdsz > drm_buffer_unprocessed(cmdbuf->buffer)) {
296                 DRM_ERROR("Packet size larger than size of data provided\n");
297                 return -EINVAL;
298         }
299
300         switch (*cmd & 0xff00) {
301         /* XXX Are there old drivers needing other packets? */
302
303         case RADEON_3D_DRAW_IMMD:
304         case RADEON_3D_DRAW_VBUF:
305         case RADEON_3D_DRAW_INDX:
306         case RADEON_WAIT_FOR_IDLE:
307         case RADEON_CP_NOP:
308         case RADEON_3D_CLEAR_ZMASK:
309 /*      case RADEON_CP_NEXT_CHAR:
310         case RADEON_CP_PLY_NEXTSCAN:
311         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
312                 /* these packets are safe */
313                 break;
314
315         case RADEON_CP_3D_DRAW_IMMD_2:
316         case RADEON_CP_3D_DRAW_VBUF_2:
317         case RADEON_CP_3D_DRAW_INDX_2:
318         case RADEON_3D_CLEAR_HIZ:
319                 /* safe but r200 only */
320                 if (dev_priv->microcode_version != UCODE_R200) {
321                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
322                         return -EINVAL;
323                 }
324                 break;
325
326         case RADEON_3D_LOAD_VBPNTR:
327
328                 if (count > 18) { /* 12 arrays max */
329                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
330                                   count);
331                         return -EINVAL;
332                 }
333
334                 /* carefully check packet contents */
335                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
336
337                 narrays = *cmd & ~0xc000;
338                 k = 0;
339                 i = 2;
340                 while ((k < narrays) && (i < (count + 2))) {
341                         i++;            /* skip attribute field */
342                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
343                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
344                                                           cmd)) {
345                                 DRM_ERROR
346                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347                                      k, i);
348                                 return -EINVAL;
349                         }
350                         k++;
351                         i++;
352                         if (k == narrays)
353                                 break;
354                         /* have one more to process, they come in pairs */
355                         cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
356
357                         if (radeon_check_and_fixup_offset(dev_priv,
358                                                           file_priv, cmd))
359                         {
360                                 DRM_ERROR
361                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
362                                      k, i);
363                                 return -EINVAL;
364                         }
365                         k++;
366                         i++;
367                 }
368                 /* do the counts match what we expect ? */
369                 if ((k != narrays) || (i != (count + 2))) {
370                         DRM_ERROR
371                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
372                               k, i, narrays, count + 1);
373                         return -EINVAL;
374                 }
375                 break;
376
377         case RADEON_3D_RNDR_GEN_INDX_PRIM:
378                 if (dev_priv->microcode_version != UCODE_R100) {
379                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
380                         return -EINVAL;
381                 }
382
383                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
384                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
385                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
386                                 return -EINVAL;
387                 }
388                 break;
389
390         case RADEON_CP_INDX_BUFFER:
391                 if (dev_priv->microcode_version != UCODE_R200) {
392                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
393                         return -EINVAL;
394                 }
395
396                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
397                 if ((*cmd & 0x8000ffff) != 0x80000810) {
398                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", *cmd);
399                         return -EINVAL;
400                 }
401                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
402                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, cmd)) {
403                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", *cmd);
404                         return -EINVAL;
405                 }
406                 break;
407
408         case RADEON_CNTL_HOSTDATA_BLT:
409         case RADEON_CNTL_PAINT_MULTI:
410         case RADEON_CNTL_BITBLT_MULTI:
411                 /* MSB of opcode: next DWORD GUI_CNTL */
412                 cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
413                 if (*cmd & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
414                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
415                         u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
416                         offset = *cmd2 << 10;
417                         if (radeon_check_and_fixup_offset
418                             (dev_priv, file_priv, &offset)) {
419                                 DRM_ERROR("Invalid first packet offset\n");
420                                 return -EINVAL;
421                         }
422                         *cmd2 = (*cmd2 & 0xffc00000) | offset >> 10;
423                 }
424
425                 if ((*cmd & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
426                     (*cmd & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
427                         u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
428                         offset = *cmd3 << 10;
429                         if (radeon_check_and_fixup_offset
430                             (dev_priv, file_priv, &offset)) {
431                                 DRM_ERROR("Invalid second packet offset\n");
432                                 return -EINVAL;
433                         }
434                         *cmd3 = (*cmd3 & 0xffc00000) | offset >> 10;
435                 }
436                 break;
437
438         default:
439                 DRM_ERROR("Invalid packet type %x\n", *cmd & 0xff00);
440                 return -EINVAL;
441         }
442
443         return 0;
444 }
445
446 /* ================================================================
447  * CP hardware state programming functions
448  */
449
450 static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
451                                   struct drm_clip_rect * box)
452 {
453         RING_LOCALS;
454
455         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
456                   box->x1, box->y1, box->x2, box->y2);
457
458         BEGIN_RING(4);
459         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
460         OUT_RING((box->y1 << 16) | box->x1);
461         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
462         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
463         ADVANCE_RING();
464 }
465
466 /* Emit 1.1 state
467  */
468 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
469                              struct drm_file *file_priv,
470                              drm_radeon_context_regs_t * ctx,
471                              drm_radeon_texture_regs_t * tex,
472                              unsigned int dirty)
473 {
474         RING_LOCALS;
475         DRM_DEBUG("dirty=0x%08x\n", dirty);
476
477         if (dirty & RADEON_UPLOAD_CONTEXT) {
478                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
479                                                   &ctx->rb3d_depthoffset)) {
480                         DRM_ERROR("Invalid depth buffer offset\n");
481                         return -EINVAL;
482                 }
483
484                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
485                                                   &ctx->rb3d_coloroffset)) {
486                         DRM_ERROR("Invalid depth buffer offset\n");
487                         return -EINVAL;
488                 }
489
490                 BEGIN_RING(14);
491                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
492                 OUT_RING(ctx->pp_misc);
493                 OUT_RING(ctx->pp_fog_color);
494                 OUT_RING(ctx->re_solid_color);
495                 OUT_RING(ctx->rb3d_blendcntl);
496                 OUT_RING(ctx->rb3d_depthoffset);
497                 OUT_RING(ctx->rb3d_depthpitch);
498                 OUT_RING(ctx->rb3d_zstencilcntl);
499                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
500                 OUT_RING(ctx->pp_cntl);
501                 OUT_RING(ctx->rb3d_cntl);
502                 OUT_RING(ctx->rb3d_coloroffset);
503                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
504                 OUT_RING(ctx->rb3d_colorpitch);
505                 ADVANCE_RING();
506         }
507
508         if (dirty & RADEON_UPLOAD_VERTFMT) {
509                 BEGIN_RING(2);
510                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
511                 OUT_RING(ctx->se_coord_fmt);
512                 ADVANCE_RING();
513         }
514
515         if (dirty & RADEON_UPLOAD_LINE) {
516                 BEGIN_RING(5);
517                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
518                 OUT_RING(ctx->re_line_pattern);
519                 OUT_RING(ctx->re_line_state);
520                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
521                 OUT_RING(ctx->se_line_width);
522                 ADVANCE_RING();
523         }
524
525         if (dirty & RADEON_UPLOAD_BUMPMAP) {
526                 BEGIN_RING(5);
527                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
528                 OUT_RING(ctx->pp_lum_matrix);
529                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
530                 OUT_RING(ctx->pp_rot_matrix_0);
531                 OUT_RING(ctx->pp_rot_matrix_1);
532                 ADVANCE_RING();
533         }
534
535         if (dirty & RADEON_UPLOAD_MASKS) {
536                 BEGIN_RING(4);
537                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
538                 OUT_RING(ctx->rb3d_stencilrefmask);
539                 OUT_RING(ctx->rb3d_ropcntl);
540                 OUT_RING(ctx->rb3d_planemask);
541                 ADVANCE_RING();
542         }
543
544         if (dirty & RADEON_UPLOAD_VIEWPORT) {
545                 BEGIN_RING(7);
546                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
547                 OUT_RING(ctx->se_vport_xscale);
548                 OUT_RING(ctx->se_vport_xoffset);
549                 OUT_RING(ctx->se_vport_yscale);
550                 OUT_RING(ctx->se_vport_yoffset);
551                 OUT_RING(ctx->se_vport_zscale);
552                 OUT_RING(ctx->se_vport_zoffset);
553                 ADVANCE_RING();
554         }
555
556         if (dirty & RADEON_UPLOAD_SETUP) {
557                 BEGIN_RING(4);
558                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
559                 OUT_RING(ctx->se_cntl);
560                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
561                 OUT_RING(ctx->se_cntl_status);
562                 ADVANCE_RING();
563         }
564
565         if (dirty & RADEON_UPLOAD_MISC) {
566                 BEGIN_RING(2);
567                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
568                 OUT_RING(ctx->re_misc);
569                 ADVANCE_RING();
570         }
571
572         if (dirty & RADEON_UPLOAD_TEX0) {
573                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
574                                                   &tex[0].pp_txoffset)) {
575                         DRM_ERROR("Invalid texture offset for unit 0\n");
576                         return -EINVAL;
577                 }
578
579                 BEGIN_RING(9);
580                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
581                 OUT_RING(tex[0].pp_txfilter);
582                 OUT_RING(tex[0].pp_txformat);
583                 OUT_RING(tex[0].pp_txoffset);
584                 OUT_RING(tex[0].pp_txcblend);
585                 OUT_RING(tex[0].pp_txablend);
586                 OUT_RING(tex[0].pp_tfactor);
587                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
588                 OUT_RING(tex[0].pp_border_color);
589                 ADVANCE_RING();
590         }
591
592         if (dirty & RADEON_UPLOAD_TEX1) {
593                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
594                                                   &tex[1].pp_txoffset)) {
595                         DRM_ERROR("Invalid texture offset for unit 1\n");
596                         return -EINVAL;
597                 }
598
599                 BEGIN_RING(9);
600                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
601                 OUT_RING(tex[1].pp_txfilter);
602                 OUT_RING(tex[1].pp_txformat);
603                 OUT_RING(tex[1].pp_txoffset);
604                 OUT_RING(tex[1].pp_txcblend);
605                 OUT_RING(tex[1].pp_txablend);
606                 OUT_RING(tex[1].pp_tfactor);
607                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
608                 OUT_RING(tex[1].pp_border_color);
609                 ADVANCE_RING();
610         }
611
612         if (dirty & RADEON_UPLOAD_TEX2) {
613                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
614                                                   &tex[2].pp_txoffset)) {
615                         DRM_ERROR("Invalid texture offset for unit 2\n");
616                         return -EINVAL;
617                 }
618
619                 BEGIN_RING(9);
620                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
621                 OUT_RING(tex[2].pp_txfilter);
622                 OUT_RING(tex[2].pp_txformat);
623                 OUT_RING(tex[2].pp_txoffset);
624                 OUT_RING(tex[2].pp_txcblend);
625                 OUT_RING(tex[2].pp_txablend);
626                 OUT_RING(tex[2].pp_tfactor);
627                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
628                 OUT_RING(tex[2].pp_border_color);
629                 ADVANCE_RING();
630         }
631
632         return 0;
633 }
634
635 /* Emit 1.2 state
636  */
637 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
638                               struct drm_file *file_priv,
639                               drm_radeon_state_t * state)
640 {
641         RING_LOCALS;
642
643         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
644                 BEGIN_RING(3);
645                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
646                 OUT_RING(state->context2.se_zbias_factor);
647                 OUT_RING(state->context2.se_zbias_constant);
648                 ADVANCE_RING();
649         }
650
651         return radeon_emit_state(dev_priv, file_priv, &state->context,
652                                  state->tex, state->dirty);
653 }
654
655 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
656  * 1.3 cmdbuffers allow all previous state to be updated as well as
657  * the tcl scalar and vector areas.
658  */
659 static struct {
660         int start;
661         int len;
662         const char *name;
663 } packet[RADEON_MAX_STATE_PACKETS] = {
664         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
665         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
666         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
667         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
668         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
669         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
670         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
671         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
672         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
673         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
674         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
675         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
676         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
677         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
678         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
679         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
680         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
681         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
682         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
683         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
684         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
685                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
686         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
687         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
688         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
689         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
690         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
691         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
692         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
693         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
694         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
695         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
696         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
697         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
698         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
699         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
700         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
701         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
702         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
703         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
704         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
705         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
706         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
707         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
708         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
709         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
710         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
711         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
712         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
713         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
714         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
715          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
716         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
717         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
718         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
719         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
720         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
721         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
722         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
723         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
724         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
725         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
726         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
727                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
728         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
729         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
730         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
731         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
732         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
733         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
734         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
735         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
736         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
737         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
738         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
739         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
740         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
741         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
742         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
743         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
744         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
745         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
746         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
747         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
748         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
749         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
750         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
751         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
752         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
753         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
754         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
755         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
756         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
757         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
758         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
759         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
760         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
761         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
762 };
763
764 /* ================================================================
765  * Performance monitoring functions
766  */
767
768 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
769                              struct drm_radeon_master_private *master_priv,
770                              int x, int y, int w, int h, int r, int g, int b)
771 {
772         u32 color;
773         RING_LOCALS;
774
775         x += master_priv->sarea_priv->boxes[0].x1;
776         y += master_priv->sarea_priv->boxes[0].y1;
777
778         switch (dev_priv->color_fmt) {
779         case RADEON_COLOR_FORMAT_RGB565:
780                 color = (((r & 0xf8) << 8) |
781                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
782                 break;
783         case RADEON_COLOR_FORMAT_ARGB8888:
784         default:
785                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
786                 break;
787         }
788
789         BEGIN_RING(4);
790         RADEON_WAIT_UNTIL_3D_IDLE();
791         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
792         OUT_RING(0xffffffff);
793         ADVANCE_RING();
794
795         BEGIN_RING(6);
796
797         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
798         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
799                  RADEON_GMC_BRUSH_SOLID_COLOR |
800                  (dev_priv->color_fmt << 8) |
801                  RADEON_GMC_SRC_DATATYPE_COLOR |
802                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
803
804         if (master_priv->sarea_priv->pfCurrentPage == 1) {
805                 OUT_RING(dev_priv->front_pitch_offset);
806         } else {
807                 OUT_RING(dev_priv->back_pitch_offset);
808         }
809
810         OUT_RING(color);
811
812         OUT_RING((x << 16) | y);
813         OUT_RING((w << 16) | h);
814
815         ADVANCE_RING();
816 }
817
818 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
819 {
820         /* Collapse various things into a wait flag -- trying to
821          * guess if userspase slept -- better just to have them tell us.
822          */
823         if (dev_priv->stats.last_frame_reads > 1 ||
824             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
825                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
826         }
827
828         if (dev_priv->stats.freelist_loops) {
829                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
830         }
831
832         /* Purple box for page flipping
833          */
834         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
835                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
836
837         /* Red box if we have to wait for idle at any point
838          */
839         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
840                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
841
842         /* Blue box: lost context?
843          */
844
845         /* Yellow box for texture swaps
846          */
847         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
848                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
849
850         /* Green box if hardware never idles (as far as we can tell)
851          */
852         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
853                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
854
855         /* Draw bars indicating number of buffers allocated
856          * (not a great measure, easily confused)
857          */
858         if (dev_priv->stats.requested_bufs) {
859                 if (dev_priv->stats.requested_bufs > 100)
860                         dev_priv->stats.requested_bufs = 100;
861
862                 radeon_clear_box(dev_priv, master_priv, 4, 16,
863                                  dev_priv->stats.requested_bufs, 4,
864                                  196, 128, 128);
865         }
866
867         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
868
869 }
870
871 /* ================================================================
872  * CP command dispatch functions
873  */
874
875 static void radeon_cp_dispatch_clear(struct drm_device * dev,
876                                      struct drm_master *master,
877                                      drm_radeon_clear_t * clear,
878                                      drm_radeon_clear_rect_t * depth_boxes)
879 {
880         drm_radeon_private_t *dev_priv = dev->dev_private;
881         struct drm_radeon_master_private *master_priv = master->driver_priv;
882         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
883         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
884         int nbox = sarea_priv->nbox;
885         struct drm_clip_rect *pbox = sarea_priv->boxes;
886         unsigned int flags = clear->flags;
887         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
888         int i;
889         RING_LOCALS;
890         DRM_DEBUG("flags = 0x%x\n", flags);
891
892         dev_priv->stats.clears++;
893
894         if (sarea_priv->pfCurrentPage == 1) {
895                 unsigned int tmp = flags;
896
897                 flags &= ~(RADEON_FRONT | RADEON_BACK);
898                 if (tmp & RADEON_FRONT)
899                         flags |= RADEON_BACK;
900                 if (tmp & RADEON_BACK)
901                         flags |= RADEON_FRONT;
902         }
903         if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
904                 if (!dev_priv->have_z_offset) {
905                         DRM_ERROR("radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
906                         flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
907                 }
908         }
909
910         if (flags & (RADEON_FRONT | RADEON_BACK)) {
911
912                 BEGIN_RING(4);
913
914                 /* Ensure the 3D stream is idle before doing a
915                  * 2D fill to clear the front or back buffer.
916                  */
917                 RADEON_WAIT_UNTIL_3D_IDLE();
918
919                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
920                 OUT_RING(clear->color_mask);
921
922                 ADVANCE_RING();
923
924                 /* Make sure we restore the 3D state next time.
925                  */
926                 sarea_priv->ctx_owner = 0;
927
928                 for (i = 0; i < nbox; i++) {
929                         int x = pbox[i].x1;
930                         int y = pbox[i].y1;
931                         int w = pbox[i].x2 - x;
932                         int h = pbox[i].y2 - y;
933
934                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
935                                   x, y, w, h, flags);
936
937                         if (flags & RADEON_FRONT) {
938                                 BEGIN_RING(6);
939
940                                 OUT_RING(CP_PACKET3
941                                          (RADEON_CNTL_PAINT_MULTI, 4));
942                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
943                                          RADEON_GMC_BRUSH_SOLID_COLOR |
944                                          (dev_priv->
945                                           color_fmt << 8) |
946                                          RADEON_GMC_SRC_DATATYPE_COLOR |
947                                          RADEON_ROP3_P |
948                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
949
950                                 OUT_RING(dev_priv->front_pitch_offset);
951                                 OUT_RING(clear->clear_color);
952
953                                 OUT_RING((x << 16) | y);
954                                 OUT_RING((w << 16) | h);
955
956                                 ADVANCE_RING();
957                         }
958
959                         if (flags & RADEON_BACK) {
960                                 BEGIN_RING(6);
961
962                                 OUT_RING(CP_PACKET3
963                                          (RADEON_CNTL_PAINT_MULTI, 4));
964                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
965                                          RADEON_GMC_BRUSH_SOLID_COLOR |
966                                          (dev_priv->
967                                           color_fmt << 8) |
968                                          RADEON_GMC_SRC_DATATYPE_COLOR |
969                                          RADEON_ROP3_P |
970                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
971
972                                 OUT_RING(dev_priv->back_pitch_offset);
973                                 OUT_RING(clear->clear_color);
974
975                                 OUT_RING((x << 16) | y);
976                                 OUT_RING((w << 16) | h);
977
978                                 ADVANCE_RING();
979                         }
980                 }
981         }
982
983         /* hyper z clear */
984         /* no docs available, based on reverse engineering by Stephane Marchesin */
985         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
986             && (flags & RADEON_CLEAR_FASTZ)) {
987
988                 int i;
989                 int depthpixperline =
990                     dev_priv->depth_fmt ==
991                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
992                                                        2) : (dev_priv->
993                                                              depth_pitch / 4);
994
995                 u32 clearmask;
996
997                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
998                     ((clear->depth_mask & 0xff) << 24);
999
1000                 /* Make sure we restore the 3D state next time.
1001                  * we haven't touched any "normal" state - still need this?
1002                  */
1003                 sarea_priv->ctx_owner = 0;
1004
1005                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1006                     && (flags & RADEON_USE_HIERZ)) {
1007                         /* FIXME : reverse engineer that for Rx00 cards */
1008                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1009                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1010                            value into account? */
1011                         /* pattern seems to work for r100, though get slight
1012                            rendering errors with glxgears. If hierz is not enabled for r100,
1013                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
1014                            other ones are ignored, and the same clear mask can be used. That's
1015                            very different behaviour than R200 which needs different clear mask
1016                            and different number of tiles to clear if hierz is enabled or not !?!
1017                          */
1018                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
1019                 } else {
1020                         /* clear mask : chooses the clearing pattern.
1021                            rv250: could be used to clear only parts of macrotiles
1022                            (but that would get really complicated...)?
1023                            bit 0 and 1 (either or both of them ?!?!) are used to
1024                            not clear tile (or maybe one of the bits indicates if the tile is
1025                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
1026                            Pattern is as follows:
1027                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1028                            bits -------------------------------------------------
1029                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1030                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1031                            covers 256 pixels ?!?
1032                          */
1033                         clearmask = 0x0;
1034                 }
1035
1036                 BEGIN_RING(8);
1037                 RADEON_WAIT_UNTIL_2D_IDLE();
1038                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1039                              tempRB3D_DEPTHCLEARVALUE);
1040                 /* what offset is this exactly ? */
1041                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1042                 /* need ctlstat, otherwise get some strange black flickering */
1043                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1044                              RADEON_RB3D_ZC_FLUSH_ALL);
1045                 ADVANCE_RING();
1046
1047                 for (i = 0; i < nbox; i++) {
1048                         int tileoffset, nrtilesx, nrtilesy, j;
1049                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1050                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1051                             && !(dev_priv->microcode_version == UCODE_R200)) {
1052                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1053                                    maybe r200 actually doesn't need to put the low-res z value into
1054                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1055                                    Works for R100, both with hierz and without.
1056                                    R100 seems to operate on 2x1 8x8 tiles, but...
1057                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1058                                    problematic with resolutions which are not 64 pix aligned? */
1059                                 tileoffset =
1060                                     ((pbox[i].y1 >> 3) * depthpixperline +
1061                                      pbox[i].x1) >> 6;
1062                                 nrtilesx =
1063                                     ((pbox[i].x2 & ~63) -
1064                                      (pbox[i].x1 & ~63)) >> 4;
1065                                 nrtilesy =
1066                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1067                                 for (j = 0; j <= nrtilesy; j++) {
1068                                         BEGIN_RING(4);
1069                                         OUT_RING(CP_PACKET3
1070                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1071                                         /* first tile */
1072                                         OUT_RING(tileoffset * 8);
1073                                         /* the number of tiles to clear */
1074                                         OUT_RING(nrtilesx + 4);
1075                                         /* clear mask : chooses the clearing pattern. */
1076                                         OUT_RING(clearmask);
1077                                         ADVANCE_RING();
1078                                         tileoffset += depthpixperline >> 6;
1079                                 }
1080                         } else if (dev_priv->microcode_version == UCODE_R200) {
1081                                 /* works for rv250. */
1082                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1083                                 tileoffset =
1084                                     ((pbox[i].y1 >> 3) * depthpixperline +
1085                                      pbox[i].x1) >> 5;
1086                                 nrtilesx =
1087                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1088                                 nrtilesy =
1089                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1090                                 for (j = 0; j <= nrtilesy; j++) {
1091                                         BEGIN_RING(4);
1092                                         OUT_RING(CP_PACKET3
1093                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1094                                         /* first tile */
1095                                         /* judging by the first tile offset needed, could possibly
1096                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1097                                            macro tiles, though would still need clear mask for
1098                                            right/bottom if truly 4x4 granularity is desired ? */
1099                                         OUT_RING(tileoffset * 16);
1100                                         /* the number of tiles to clear */
1101                                         OUT_RING(nrtilesx + 1);
1102                                         /* clear mask : chooses the clearing pattern. */
1103                                         OUT_RING(clearmask);
1104                                         ADVANCE_RING();
1105                                         tileoffset += depthpixperline >> 5;
1106                                 }
1107                         } else {        /* rv 100 */
1108                                 /* rv100 might not need 64 pix alignment, who knows */
1109                                 /* offsets are, hmm, weird */
1110                                 tileoffset =
1111                                     ((pbox[i].y1 >> 4) * depthpixperline +
1112                                      pbox[i].x1) >> 6;
1113                                 nrtilesx =
1114                                     ((pbox[i].x2 & ~63) -
1115                                      (pbox[i].x1 & ~63)) >> 4;
1116                                 nrtilesy =
1117                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1118                                 for (j = 0; j <= nrtilesy; j++) {
1119                                         BEGIN_RING(4);
1120                                         OUT_RING(CP_PACKET3
1121                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1122                                         OUT_RING(tileoffset * 128);
1123                                         /* the number of tiles to clear */
1124                                         OUT_RING(nrtilesx + 4);
1125                                         /* clear mask : chooses the clearing pattern. */
1126                                         OUT_RING(clearmask);
1127                                         ADVANCE_RING();
1128                                         tileoffset += depthpixperline >> 6;
1129                                 }
1130                         }
1131                 }
1132
1133                 /* TODO don't always clear all hi-level z tiles */
1134                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1135                     && (dev_priv->microcode_version == UCODE_R200)
1136                     && (flags & RADEON_USE_HIERZ))
1137                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1138                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1139                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1140                            value into account? */
1141                 {
1142                         BEGIN_RING(4);
1143                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1144                         OUT_RING(0x0);  /* First tile */
1145                         OUT_RING(0x3cc0);
1146                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1147                         ADVANCE_RING();
1148                 }
1149         }
1150
1151         /* We have to clear the depth and/or stencil buffers by
1152          * rendering a quad into just those buffers.  Thus, we have to
1153          * make sure the 3D engine is configured correctly.
1154          */
1155         else if ((dev_priv->microcode_version == UCODE_R200) &&
1156                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1157
1158                 int tempPP_CNTL;
1159                 int tempRE_CNTL;
1160                 int tempRB3D_CNTL;
1161                 int tempRB3D_ZSTENCILCNTL;
1162                 int tempRB3D_STENCILREFMASK;
1163                 int tempRB3D_PLANEMASK;
1164                 int tempSE_CNTL;
1165                 int tempSE_VTE_CNTL;
1166                 int tempSE_VTX_FMT_0;
1167                 int tempSE_VTX_FMT_1;
1168                 int tempSE_VAP_CNTL;
1169                 int tempRE_AUX_SCISSOR_CNTL;
1170
1171                 tempPP_CNTL = 0;
1172                 tempRE_CNTL = 0;
1173
1174                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1175
1176                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1177                 tempRB3D_STENCILREFMASK = 0x0;
1178
1179                 tempSE_CNTL = depth_clear->se_cntl;
1180
1181                 /* Disable TCL */
1182
1183                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1184                                           (0x9 <<
1185                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1186
1187                 tempRB3D_PLANEMASK = 0x0;
1188
1189                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1190
1191                 tempSE_VTE_CNTL =
1192                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1193
1194                 /* Vertex format (X, Y, Z, W) */
1195                 tempSE_VTX_FMT_0 =
1196                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1197                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1198                 tempSE_VTX_FMT_1 = 0x0;
1199
1200                 /*
1201                  * Depth buffer specific enables
1202                  */
1203                 if (flags & RADEON_DEPTH) {
1204                         /* Enable depth buffer */
1205                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1206                 } else {
1207                         /* Disable depth buffer */
1208                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1209                 }
1210
1211                 /*
1212                  * Stencil buffer specific enables
1213                  */
1214                 if (flags & RADEON_STENCIL) {
1215                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1216                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1217                 } else {
1218                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1219                         tempRB3D_STENCILREFMASK = 0x00000000;
1220                 }
1221
1222                 if (flags & RADEON_USE_COMP_ZBUF) {
1223                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1224                             RADEON_Z_DECOMPRESSION_ENABLE;
1225                 }
1226                 if (flags & RADEON_USE_HIERZ) {
1227                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1228                 }
1229
1230                 BEGIN_RING(26);
1231                 RADEON_WAIT_UNTIL_2D_IDLE();
1232
1233                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1234                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1235                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1236                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1237                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1238                              tempRB3D_STENCILREFMASK);
1239                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1240                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1241                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1242                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1243                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1244                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1245                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1246                 ADVANCE_RING();
1247
1248                 /* Make sure we restore the 3D state next time.
1249                  */
1250                 sarea_priv->ctx_owner = 0;
1251
1252                 for (i = 0; i < nbox; i++) {
1253
1254                         /* Funny that this should be required --
1255                          *  sets top-left?
1256                          */
1257                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1258
1259                         BEGIN_RING(14);
1260                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1261                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1262                                   RADEON_PRIM_WALK_RING |
1263                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1264                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1265                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1266                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1267                         OUT_RING(0x3f800000);
1268                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1269                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1270                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1271                         OUT_RING(0x3f800000);
1272                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1273                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1274                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1275                         OUT_RING(0x3f800000);
1276                         ADVANCE_RING();
1277                 }
1278         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1279
1280                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1281
1282                 rb3d_cntl = depth_clear->rb3d_cntl;
1283
1284                 if (flags & RADEON_DEPTH) {
1285                         rb3d_cntl |= RADEON_Z_ENABLE;
1286                 } else {
1287                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1288                 }
1289
1290                 if (flags & RADEON_STENCIL) {
1291                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1292                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1293                 } else {
1294                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1295                         rb3d_stencilrefmask = 0x00000000;
1296                 }
1297
1298                 if (flags & RADEON_USE_COMP_ZBUF) {
1299                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1300                             RADEON_Z_DECOMPRESSION_ENABLE;
1301                 }
1302                 if (flags & RADEON_USE_HIERZ) {
1303                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1304                 }
1305
1306                 BEGIN_RING(13);
1307                 RADEON_WAIT_UNTIL_2D_IDLE();
1308
1309                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1310                 OUT_RING(0x00000000);
1311                 OUT_RING(rb3d_cntl);
1312
1313                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1314                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1315                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1316                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1317                 ADVANCE_RING();
1318
1319                 /* Make sure we restore the 3D state next time.
1320                  */
1321                 sarea_priv->ctx_owner = 0;
1322
1323                 for (i = 0; i < nbox; i++) {
1324
1325                         /* Funny that this should be required --
1326                          *  sets top-left?
1327                          */
1328                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1329
1330                         BEGIN_RING(15);
1331
1332                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1333                         OUT_RING(RADEON_VTX_Z_PRESENT |
1334                                  RADEON_VTX_PKCOLOR_PRESENT);
1335                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1336                                   RADEON_PRIM_WALK_RING |
1337                                   RADEON_MAOS_ENABLE |
1338                                   RADEON_VTX_FMT_RADEON_MODE |
1339                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1340
1341                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1342                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1343                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1344                         OUT_RING(0x0);
1345
1346                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1347                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1348                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1349                         OUT_RING(0x0);
1350
1351                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1352                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1353                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1354                         OUT_RING(0x0);
1355
1356                         ADVANCE_RING();
1357                 }
1358         }
1359
1360         /* Increment the clear counter.  The client-side 3D driver must
1361          * wait on this value before performing the clear ioctl.  We
1362          * need this because the card's so damned fast...
1363          */
1364         sarea_priv->last_clear++;
1365
1366         BEGIN_RING(4);
1367
1368         RADEON_CLEAR_AGE(sarea_priv->last_clear);
1369         RADEON_WAIT_UNTIL_IDLE();
1370
1371         ADVANCE_RING();
1372 }
1373
1374 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1375 {
1376         drm_radeon_private_t *dev_priv = dev->dev_private;
1377         struct drm_radeon_master_private *master_priv = master->driver_priv;
1378         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1379         int nbox = sarea_priv->nbox;
1380         struct drm_clip_rect *pbox = sarea_priv->boxes;
1381         int i;
1382         RING_LOCALS;
1383         DRM_DEBUG("\n");
1384
1385         /* Do some trivial performance monitoring...
1386          */
1387         if (dev_priv->do_boxes)
1388                 radeon_cp_performance_boxes(dev_priv, master_priv);
1389
1390         /* Wait for the 3D stream to idle before dispatching the bitblt.
1391          * This will prevent data corruption between the two streams.
1392          */
1393         BEGIN_RING(2);
1394
1395         RADEON_WAIT_UNTIL_3D_IDLE();
1396
1397         ADVANCE_RING();
1398
1399         for (i = 0; i < nbox; i++) {
1400                 int x = pbox[i].x1;
1401                 int y = pbox[i].y1;
1402                 int w = pbox[i].x2 - x;
1403                 int h = pbox[i].y2 - y;
1404
1405                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1406
1407                 BEGIN_RING(9);
1408
1409                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1410                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1411                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1412                          RADEON_GMC_BRUSH_NONE |
1413                          (dev_priv->color_fmt << 8) |
1414                          RADEON_GMC_SRC_DATATYPE_COLOR |
1415                          RADEON_ROP3_S |
1416                          RADEON_DP_SRC_SOURCE_MEMORY |
1417                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1418
1419                 /* Make this work even if front & back are flipped:
1420                  */
1421                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1422                 if (sarea_priv->pfCurrentPage == 0) {
1423                         OUT_RING(dev_priv->back_pitch_offset);
1424                         OUT_RING(dev_priv->front_pitch_offset);
1425                 } else {
1426                         OUT_RING(dev_priv->front_pitch_offset);
1427                         OUT_RING(dev_priv->back_pitch_offset);
1428                 }
1429
1430                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1431                 OUT_RING((x << 16) | y);
1432                 OUT_RING((x << 16) | y);
1433                 OUT_RING((w << 16) | h);
1434
1435                 ADVANCE_RING();
1436         }
1437
1438         /* Increment the frame counter.  The client-side 3D driver must
1439          * throttle the framerate by waiting for this value before
1440          * performing the swapbuffer ioctl.
1441          */
1442         sarea_priv->last_frame++;
1443
1444         BEGIN_RING(4);
1445
1446         RADEON_FRAME_AGE(sarea_priv->last_frame);
1447         RADEON_WAIT_UNTIL_2D_IDLE();
1448
1449         ADVANCE_RING();
1450 }
1451
1452 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1453 {
1454         drm_radeon_private_t *dev_priv = dev->dev_private;
1455         struct drm_radeon_master_private *master_priv = master->driver_priv;
1456         struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1457         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1458             ? dev_priv->front_offset : dev_priv->back_offset;
1459         RING_LOCALS;
1460         DRM_DEBUG("pfCurrentPage=%d\n",
1461                   master_priv->sarea_priv->pfCurrentPage);
1462
1463         /* Do some trivial performance monitoring...
1464          */
1465         if (dev_priv->do_boxes) {
1466                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1467                 radeon_cp_performance_boxes(dev_priv, master_priv);
1468         }
1469
1470         /* Update the frame offsets for both CRTCs
1471          */
1472         BEGIN_RING(6);
1473
1474         RADEON_WAIT_UNTIL_3D_IDLE();
1475         OUT_RING_REG(RADEON_CRTC_OFFSET,
1476                      ((sarea->frame.y * dev_priv->front_pitch +
1477                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1478                      + offset);
1479         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1480                      + offset);
1481
1482         ADVANCE_RING();
1483
1484         /* Increment the frame counter.  The client-side 3D driver must
1485          * throttle the framerate by waiting for this value before
1486          * performing the swapbuffer ioctl.
1487          */
1488         master_priv->sarea_priv->last_frame++;
1489         master_priv->sarea_priv->pfCurrentPage =
1490                 1 - master_priv->sarea_priv->pfCurrentPage;
1491
1492         BEGIN_RING(2);
1493
1494         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1495
1496         ADVANCE_RING();
1497 }
1498
1499 static int bad_prim_vertex_nr(int primitive, int nr)
1500 {
1501         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1502         case RADEON_PRIM_TYPE_NONE:
1503         case RADEON_PRIM_TYPE_POINT:
1504                 return nr < 1;
1505         case RADEON_PRIM_TYPE_LINE:
1506                 return (nr & 1) || nr == 0;
1507         case RADEON_PRIM_TYPE_LINE_STRIP:
1508                 return nr < 2;
1509         case RADEON_PRIM_TYPE_TRI_LIST:
1510         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1511         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1512         case RADEON_PRIM_TYPE_RECT_LIST:
1513                 return nr % 3 || nr == 0;
1514         case RADEON_PRIM_TYPE_TRI_FAN:
1515         case RADEON_PRIM_TYPE_TRI_STRIP:
1516                 return nr < 3;
1517         default:
1518                 return 1;
1519         }
1520 }
1521
1522 typedef struct {
1523         unsigned int start;
1524         unsigned int finish;
1525         unsigned int prim;
1526         unsigned int numverts;
1527         unsigned int offset;
1528         unsigned int vc_format;
1529 } drm_radeon_tcl_prim_t;
1530
1531 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1532                                       struct drm_file *file_priv,
1533                                       struct drm_buf * buf,
1534                                       drm_radeon_tcl_prim_t * prim)
1535 {
1536         drm_radeon_private_t *dev_priv = dev->dev_private;
1537         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
1538         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1539         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1540         int numverts = (int)prim->numverts;
1541         int nbox = sarea_priv->nbox;
1542         int i = 0;
1543         RING_LOCALS;
1544
1545         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1546                   prim->prim,
1547                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1548
1549         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1550                 DRM_ERROR("bad prim %x numverts %d\n",
1551                           prim->prim, prim->numverts);
1552                 return;
1553         }
1554
1555         do {
1556                 /* Emit the next cliprect */
1557                 if (i < nbox) {
1558                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1559                 }
1560
1561                 /* Emit the vertex buffer rendering commands */
1562                 BEGIN_RING(5);
1563
1564                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1565                 OUT_RING(offset);
1566                 OUT_RING(numverts);
1567                 OUT_RING(prim->vc_format);
1568                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1569                          RADEON_COLOR_ORDER_RGBA |
1570                          RADEON_VTX_FMT_RADEON_MODE |
1571                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1572
1573                 ADVANCE_RING();
1574
1575                 i++;
1576         } while (i < nbox);
1577 }
1578
1579 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1580 {
1581         drm_radeon_private_t *dev_priv = dev->dev_private;
1582         struct drm_radeon_master_private *master_priv = master->driver_priv;
1583         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1584         RING_LOCALS;
1585
1586         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1587
1588         /* Emit the vertex buffer age */
1589         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1590                 BEGIN_RING(3);
1591                 R600_DISPATCH_AGE(buf_priv->age);
1592                 ADVANCE_RING();
1593         } else {
1594                 BEGIN_RING(2);
1595                 RADEON_DISPATCH_AGE(buf_priv->age);
1596                 ADVANCE_RING();
1597         }
1598
1599         buf->pending = 1;
1600         buf->used = 0;
1601 }
1602
1603 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1604                                         struct drm_buf * buf, int start, int end)
1605 {
1606         drm_radeon_private_t *dev_priv = dev->dev_private;
1607         RING_LOCALS;
1608         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1609
1610         if (start != end) {
1611                 int offset = (dev_priv->gart_buffers_offset
1612                               + buf->offset + start);
1613                 int dwords = (end - start + 3) / sizeof(u32);
1614
1615                 /* Indirect buffer data must be an even number of
1616                  * dwords, so if we've been given an odd number we must
1617                  * pad the data with a Type-2 CP packet.
1618                  */
1619                 if (dwords & 1) {
1620                         u32 *data = (u32 *)
1621                             ((char *)dev->agp_buffer_map->handle
1622                              + buf->offset + start);
1623                         data[dwords++] = RADEON_CP_PACKET2;
1624                 }
1625
1626                 /* Fire off the indirect buffer */
1627                 BEGIN_RING(3);
1628
1629                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1630                 OUT_RING(offset);
1631                 OUT_RING(dwords);
1632
1633                 ADVANCE_RING();
1634         }
1635 }
1636
1637 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1638                                        struct drm_master *master,
1639                                        struct drm_buf * elt_buf,
1640                                        drm_radeon_tcl_prim_t * prim)
1641 {
1642         drm_radeon_private_t *dev_priv = dev->dev_private;
1643         struct drm_radeon_master_private *master_priv = master->driver_priv;
1644         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1645         int offset = dev_priv->gart_buffers_offset + prim->offset;
1646         u32 *data;
1647         int dwords;
1648         int i = 0;
1649         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1650         int count = (prim->finish - start) / sizeof(u16);
1651         int nbox = sarea_priv->nbox;
1652
1653         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1654                   prim->prim,
1655                   prim->vc_format,
1656                   prim->start, prim->finish, prim->offset, prim->numverts);
1657
1658         if (bad_prim_vertex_nr(prim->prim, count)) {
1659                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1660                 return;
1661         }
1662
1663         if (start >= prim->finish || (prim->start & 0x7)) {
1664                 DRM_ERROR("buffer prim %d\n", prim->prim);
1665                 return;
1666         }
1667
1668         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1669
1670         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1671                         elt_buf->offset + prim->start);
1672
1673         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1674         data[1] = offset;
1675         data[2] = prim->numverts;
1676         data[3] = prim->vc_format;
1677         data[4] = (prim->prim |
1678                    RADEON_PRIM_WALK_IND |
1679                    RADEON_COLOR_ORDER_RGBA |
1680                    RADEON_VTX_FMT_RADEON_MODE |
1681                    (count << RADEON_NUM_VERTICES_SHIFT));
1682
1683         do {
1684                 if (i < nbox)
1685                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1686
1687                 radeon_cp_dispatch_indirect(dev, elt_buf,
1688                                             prim->start, prim->finish);
1689
1690                 i++;
1691         } while (i < nbox);
1692
1693 }
1694
1695 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1696
1697 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1698                                       struct drm_file *file_priv,
1699                                       drm_radeon_texture_t * tex,
1700                                       drm_radeon_tex_image_t * image)
1701 {
1702         drm_radeon_private_t *dev_priv = dev->dev_private;
1703         struct drm_buf *buf;
1704         u32 format;
1705         u32 *buffer;
1706         const u8 __user *data;
1707         int size, dwords, tex_width, blit_width, spitch;
1708         u32 height;
1709         int i;
1710         u32 texpitch, microtile;
1711         u32 offset, byte_offset;
1712         RING_LOCALS;
1713
1714         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1715                 DRM_ERROR("Invalid destination offset\n");
1716                 return -EINVAL;
1717         }
1718
1719         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1720
1721         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1722          * up with the texture data from the host data blit, otherwise
1723          * part of the texture image may be corrupted.
1724          */
1725         BEGIN_RING(4);
1726         RADEON_FLUSH_CACHE();
1727         RADEON_WAIT_UNTIL_IDLE();
1728         ADVANCE_RING();
1729
1730         /* The compiler won't optimize away a division by a variable,
1731          * even if the only legal values are powers of two.  Thus, we'll
1732          * use a shift instead.
1733          */
1734         switch (tex->format) {
1735         case RADEON_TXFORMAT_ARGB8888:
1736         case RADEON_TXFORMAT_RGBA8888:
1737                 format = RADEON_COLOR_FORMAT_ARGB8888;
1738                 tex_width = tex->width * 4;
1739                 blit_width = image->width * 4;
1740                 break;
1741         case RADEON_TXFORMAT_AI88:
1742         case RADEON_TXFORMAT_ARGB1555:
1743         case RADEON_TXFORMAT_RGB565:
1744         case RADEON_TXFORMAT_ARGB4444:
1745         case RADEON_TXFORMAT_VYUY422:
1746         case RADEON_TXFORMAT_YVYU422:
1747                 format = RADEON_COLOR_FORMAT_RGB565;
1748                 tex_width = tex->width * 2;
1749                 blit_width = image->width * 2;
1750                 break;
1751         case RADEON_TXFORMAT_I8:
1752         case RADEON_TXFORMAT_RGB332:
1753                 format = RADEON_COLOR_FORMAT_CI8;
1754                 tex_width = tex->width * 1;
1755                 blit_width = image->width * 1;
1756                 break;
1757         default:
1758                 DRM_ERROR("invalid texture format %d\n", tex->format);
1759                 return -EINVAL;
1760         }
1761         spitch = blit_width >> 6;
1762         if (spitch == 0 && image->height > 1)
1763                 return -EINVAL;
1764
1765         texpitch = tex->pitch;
1766         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1767                 microtile = 1;
1768                 if (tex_width < 64) {
1769                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1770                         /* we got tiled coordinates, untile them */
1771                         image->x *= 2;
1772                 }
1773         } else
1774                 microtile = 0;
1775
1776         /* this might fail for zero-sized uploads - are those illegal? */
1777         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1778                                 blit_width - 1)) {
1779                 DRM_ERROR("Invalid final destination offset\n");
1780                 return -EINVAL;
1781         }
1782
1783         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1784
1785         do {
1786                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%u y=%u w=%u h=%u\n",
1787                           tex->offset >> 10, tex->pitch, tex->format,
1788                           image->x, image->y, image->width, image->height);
1789
1790                 /* Make a copy of some parameters in case we have to
1791                  * update them for a multi-pass texture blit.
1792                  */
1793                 height = image->height;
1794                 data = (const u8 __user *)image->data;
1795
1796                 size = height * blit_width;
1797
1798                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1799                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1800                         size = height * blit_width;
1801                 } else if (size < 4 && size > 0) {
1802                         size = 4;
1803                 } else if (size == 0) {
1804                         return 0;
1805                 }
1806
1807                 buf = radeon_freelist_get(dev);
1808                 if (0 && !buf) {
1809                         radeon_do_cp_idle(dev_priv);
1810                         buf = radeon_freelist_get(dev);
1811                 }
1812                 if (!buf) {
1813                         DRM_DEBUG("EAGAIN\n");
1814                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1815                                 return -EFAULT;
1816                         return -EAGAIN;
1817                 }
1818
1819                 /* Dispatch the indirect buffer.
1820                  */
1821                 buffer =
1822                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1823                 dwords = size / 4;
1824
1825 #define RADEON_COPY_MT(_buf, _data, _width) \
1826         do { \
1827                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1828                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1829                         return -EFAULT; \
1830                 } \
1831         } while(0)
1832
1833                 if (microtile) {
1834                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1835                            however, we cannot use blitter directly for texture width < 64 bytes,
1836                            since minimum tex pitch is 64 bytes and we need this to match
1837                            the texture width, otherwise the blitter will tile it wrong.
1838                            Thus, tiling manually in this case. Additionally, need to special
1839                            case tex height = 1, since our actual image will have height 2
1840                            and we need to ensure we don't read beyond the texture size
1841                            from user space. */
1842                         if (tex->height == 1) {
1843                                 if (tex_width >= 64 || tex_width <= 16) {
1844                                         RADEON_COPY_MT(buffer, data,
1845                                                 (int)(tex_width * sizeof(u32)));
1846                                 } else if (tex_width == 32) {
1847                                         RADEON_COPY_MT(buffer, data, 16);
1848                                         RADEON_COPY_MT(buffer + 8,
1849                                                        data + 16, 16);
1850                                 }
1851                         } else if (tex_width >= 64 || tex_width == 16) {
1852                                 RADEON_COPY_MT(buffer, data,
1853                                                (int)(dwords * sizeof(u32)));
1854                         } else if (tex_width < 16) {
1855                                 for (i = 0; i < tex->height; i++) {
1856                                         RADEON_COPY_MT(buffer, data, tex_width);
1857                                         buffer += 4;
1858                                         data += tex_width;
1859                                 }
1860                         } else if (tex_width == 32) {
1861                                 /* TODO: make sure this works when not fitting in one buffer
1862                                    (i.e. 32bytes x 2048...) */
1863                                 for (i = 0; i < tex->height; i += 2) {
1864                                         RADEON_COPY_MT(buffer, data, 16);
1865                                         data += 16;
1866                                         RADEON_COPY_MT(buffer + 8, data, 16);
1867                                         data += 16;
1868                                         RADEON_COPY_MT(buffer + 4, data, 16);
1869                                         data += 16;
1870                                         RADEON_COPY_MT(buffer + 12, data, 16);
1871                                         data += 16;
1872                                         buffer += 16;
1873                                 }
1874                         }
1875                 } else {
1876                         if (tex_width >= 32) {
1877                                 /* Texture image width is larger than the minimum, so we
1878                                  * can upload it directly.
1879                                  */
1880                                 RADEON_COPY_MT(buffer, data,
1881                                                (int)(dwords * sizeof(u32)));
1882                         } else {
1883                                 /* Texture image width is less than the minimum, so we
1884                                  * need to pad out each image scanline to the minimum
1885                                  * width.
1886                                  */
1887                                 for (i = 0; i < tex->height; i++) {
1888                                         RADEON_COPY_MT(buffer, data, tex_width);
1889                                         buffer += 8;
1890                                         data += tex_width;
1891                                 }
1892                         }
1893                 }
1894
1895 #undef RADEON_COPY_MT
1896                 byte_offset = (image->y & ~2047) * blit_width;
1897                 buf->file_priv = file_priv;
1898                 buf->used = size;
1899                 offset = dev_priv->gart_buffers_offset + buf->offset;
1900                 BEGIN_RING(9);
1901                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1902                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1903                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1904                          RADEON_GMC_BRUSH_NONE |
1905                          (format << 8) |
1906                          RADEON_GMC_SRC_DATATYPE_COLOR |
1907                          RADEON_ROP3_S |
1908                          RADEON_DP_SRC_SOURCE_MEMORY |
1909                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1910                 OUT_RING((spitch << 22) | (offset >> 10));
1911                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1912                 OUT_RING(0);
1913                 OUT_RING((image->x << 16) | (image->y % 2048));
1914                 OUT_RING((image->width << 16) | height);
1915                 RADEON_WAIT_UNTIL_2D_IDLE();
1916                 ADVANCE_RING();
1917                 COMMIT_RING();
1918
1919                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
1920
1921                 /* Update the input parameters for next time */
1922                 image->y += height;
1923                 image->height -= height;
1924                 image->data = (const u8 __user *)image->data + size;
1925         } while (image->height > 0);
1926
1927         /* Flush the pixel cache after the blit completes.  This ensures
1928          * the texture data is written out to memory before rendering
1929          * continues.
1930          */
1931         BEGIN_RING(4);
1932         RADEON_FLUSH_CACHE();
1933         RADEON_WAIT_UNTIL_2D_IDLE();
1934         ADVANCE_RING();
1935         COMMIT_RING();
1936
1937         return 0;
1938 }
1939
1940 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1941 {
1942         drm_radeon_private_t *dev_priv = dev->dev_private;
1943         int i;
1944         RING_LOCALS;
1945         DRM_DEBUG("\n");
1946
1947         BEGIN_RING(35);
1948
1949         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1950         OUT_RING(0x00000000);
1951
1952         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1953         for (i = 0; i < 32; i++) {
1954                 OUT_RING(stipple[i]);
1955         }
1956
1957         ADVANCE_RING();
1958 }
1959
1960 static void radeon_apply_surface_regs(int surf_index,
1961                                       drm_radeon_private_t *dev_priv)
1962 {
1963         if (!dev_priv->mmio)
1964                 return;
1965
1966         radeon_do_cp_idle(dev_priv);
1967
1968         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1969                      dev_priv->surfaces[surf_index].flags);
1970         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1971                      dev_priv->surfaces[surf_index].lower);
1972         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1973                      dev_priv->surfaces[surf_index].upper);
1974 }
1975
1976 /* Allocates a virtual surface
1977  * doesn't always allocate a real surface, will stretch an existing
1978  * surface when possible.
1979  *
1980  * Note that refcount can be at most 2, since during a free refcount=3
1981  * might mean we have to allocate a new surface which might not always
1982  * be available.
1983  * For example : we allocate three contiguous surfaces ABC. If B is
1984  * freed, we suddenly need two surfaces to store A and C, which might
1985  * not always be available.
1986  */
1987 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1988                          drm_radeon_private_t *dev_priv,
1989                          struct drm_file *file_priv)
1990 {
1991         struct radeon_virt_surface *s;
1992         int i;
1993         int virt_surface_index;
1994         uint32_t new_upper, new_lower;
1995
1996         new_lower = new->address;
1997         new_upper = new_lower + new->size - 1;
1998
1999         /* sanity check */
2000         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
2001             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
2002              RADEON_SURF_ADDRESS_FIXED_MASK)
2003             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
2004                 return -1;
2005
2006         /* make sure there is no overlap with existing surfaces */
2007         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2008                 if ((dev_priv->surfaces[i].refcount != 0) &&
2009                     (((new_lower >= dev_priv->surfaces[i].lower) &&
2010                       (new_lower < dev_priv->surfaces[i].upper)) ||
2011                      ((new_lower < dev_priv->surfaces[i].lower) &&
2012                       (new_upper > dev_priv->surfaces[i].lower)))) {
2013                         return -1;
2014                 }
2015         }
2016
2017         /* find a virtual surface */
2018         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
2019                 if (dev_priv->virt_surfaces[i].file_priv == NULL)
2020                         break;
2021         if (i == 2 * RADEON_MAX_SURFACES) {
2022                 return -1;
2023         }
2024         virt_surface_index = i;
2025
2026         /* try to reuse an existing surface */
2027         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2028                 /* extend before */
2029                 if ((dev_priv->surfaces[i].refcount == 1) &&
2030                     (new->flags == dev_priv->surfaces[i].flags) &&
2031                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2032                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2033                         s->surface_index = i;
2034                         s->lower = new_lower;
2035                         s->upper = new_upper;
2036                         s->flags = new->flags;
2037                         s->file_priv = file_priv;
2038                         dev_priv->surfaces[i].refcount++;
2039                         dev_priv->surfaces[i].lower = s->lower;
2040                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2041                         return virt_surface_index;
2042                 }
2043
2044                 /* extend after */
2045                 if ((dev_priv->surfaces[i].refcount == 1) &&
2046                     (new->flags == dev_priv->surfaces[i].flags) &&
2047                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
2048                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2049                         s->surface_index = i;
2050                         s->lower = new_lower;
2051                         s->upper = new_upper;
2052                         s->flags = new->flags;
2053                         s->file_priv = file_priv;
2054                         dev_priv->surfaces[i].refcount++;
2055                         dev_priv->surfaces[i].upper = s->upper;
2056                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2057                         return virt_surface_index;
2058                 }
2059         }
2060
2061         /* okay, we need a new one */
2062         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2063                 if (dev_priv->surfaces[i].refcount == 0) {
2064                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2065                         s->surface_index = i;
2066                         s->lower = new_lower;
2067                         s->upper = new_upper;
2068                         s->flags = new->flags;
2069                         s->file_priv = file_priv;
2070                         dev_priv->surfaces[i].refcount = 1;
2071                         dev_priv->surfaces[i].lower = s->lower;
2072                         dev_priv->surfaces[i].upper = s->upper;
2073                         dev_priv->surfaces[i].flags = s->flags;
2074                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2075                         return virt_surface_index;
2076                 }
2077         }
2078
2079         /* we didn't find anything */
2080         return -1;
2081 }
2082
2083 static int free_surface(struct drm_file *file_priv,
2084                         drm_radeon_private_t * dev_priv,
2085                         int lower)
2086 {
2087         struct radeon_virt_surface *s;
2088         int i;
2089         /* find the virtual surface */
2090         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2091                 s = &(dev_priv->virt_surfaces[i]);
2092                 if (s->file_priv) {
2093                         if ((lower == s->lower) && (file_priv == s->file_priv))
2094                         {
2095                                 if (dev_priv->surfaces[s->surface_index].
2096                                     lower == s->lower)
2097                                         dev_priv->surfaces[s->surface_index].
2098                                             lower = s->upper;
2099
2100                                 if (dev_priv->surfaces[s->surface_index].
2101                                     upper == s->upper)
2102                                         dev_priv->surfaces[s->surface_index].
2103                                             upper = s->lower;
2104
2105                                 dev_priv->surfaces[s->surface_index].refcount--;
2106                                 if (dev_priv->surfaces[s->surface_index].
2107                                     refcount == 0)
2108                                         dev_priv->surfaces[s->surface_index].
2109                                             flags = 0;
2110                                 s->file_priv = NULL;
2111                                 radeon_apply_surface_regs(s->surface_index,
2112                                                           dev_priv);
2113                                 return 0;
2114                         }
2115                 }
2116         }
2117         return 1;
2118 }
2119
2120 static void radeon_surfaces_release(struct drm_file *file_priv,
2121                                     drm_radeon_private_t * dev_priv)
2122 {
2123         int i;
2124         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2125                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2126                         free_surface(file_priv, dev_priv,
2127                                      dev_priv->virt_surfaces[i].lower);
2128         }
2129 }
2130
2131 /* ================================================================
2132  * IOCTL functions
2133  */
2134 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2135 {
2136         drm_radeon_private_t *dev_priv = dev->dev_private;
2137         drm_radeon_surface_alloc_t *alloc = data;
2138
2139         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2140                 return -EINVAL;
2141         else
2142                 return 0;
2143 }
2144
2145 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2146 {
2147         drm_radeon_private_t *dev_priv = dev->dev_private;
2148         drm_radeon_surface_free_t *memfree = data;
2149
2150         if (free_surface(file_priv, dev_priv, memfree->address))
2151                 return -EINVAL;
2152         else
2153                 return 0;
2154 }
2155
2156 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2157 {
2158         drm_radeon_private_t *dev_priv = dev->dev_private;
2159         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2160         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2161         drm_radeon_clear_t *clear = data;
2162         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2163         DRM_DEBUG("\n");
2164
2165         LOCK_TEST_WITH_RETURN(dev, file_priv);
2166
2167         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2168
2169         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2170                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2171
2172         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2173                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2174                 return -EFAULT;
2175
2176         radeon_cp_dispatch_clear(dev, file_priv->masterp, clear, depth_boxes);
2177
2178         COMMIT_RING();
2179         return 0;
2180 }
2181
2182 /* Not sure why this isn't set all the time:
2183  */
2184 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2185 {
2186         drm_radeon_private_t *dev_priv = dev->dev_private;
2187         struct drm_radeon_master_private *master_priv = master->driver_priv;
2188         RING_LOCALS;
2189
2190         DRM_DEBUG("\n");
2191
2192         BEGIN_RING(6);
2193         RADEON_WAIT_UNTIL_3D_IDLE();
2194         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2195         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2196                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2197         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2198         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2199                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2200         ADVANCE_RING();
2201
2202         dev_priv->page_flipping = 1;
2203
2204         if (master_priv->sarea_priv->pfCurrentPage != 1)
2205                 master_priv->sarea_priv->pfCurrentPage = 0;
2206
2207         return 0;
2208 }
2209
2210 /* Swapping and flipping are different operations, need different ioctls.
2211  * They can & should be intermixed to support multiple 3d windows.
2212  */
2213 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2214 {
2215         drm_radeon_private_t *dev_priv = dev->dev_private;
2216         DRM_DEBUG("\n");
2217
2218         LOCK_TEST_WITH_RETURN(dev, file_priv);
2219
2220         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2221
2222         if (!dev_priv->page_flipping)
2223                 radeon_do_init_pageflip(dev, file_priv->masterp);
2224
2225         radeon_cp_dispatch_flip(dev, file_priv->masterp);
2226
2227         COMMIT_RING();
2228         return 0;
2229 }
2230
2231 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2232 {
2233         drm_radeon_private_t *dev_priv = dev->dev_private;
2234         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2235         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2236
2237         DRM_DEBUG("\n");
2238
2239         LOCK_TEST_WITH_RETURN(dev, file_priv);
2240
2241         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2242
2243         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2244                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2245
2246         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2247                 r600_cp_dispatch_swap(dev, file_priv);
2248         else
2249                 radeon_cp_dispatch_swap(dev, file_priv->masterp);
2250         sarea_priv->ctx_owner = 0;
2251
2252         COMMIT_RING();
2253         return 0;
2254 }
2255
2256 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2257 {
2258         drm_radeon_private_t *dev_priv = dev->dev_private;
2259         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2260         drm_radeon_sarea_t *sarea_priv;
2261         struct drm_device_dma *dma = dev->dma;
2262         struct drm_buf *buf;
2263         drm_radeon_vertex_t *vertex = data;
2264         drm_radeon_tcl_prim_t prim;
2265
2266         LOCK_TEST_WITH_RETURN(dev, file_priv);
2267
2268         sarea_priv = master_priv->sarea_priv;
2269
2270         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2271                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2272
2273         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2274                 DRM_ERROR("buffer index %d (of %d max)\n",
2275                           vertex->idx, dma->buf_count - 1);
2276                 return -EINVAL;
2277         }
2278         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2279                 DRM_ERROR("buffer prim %d\n", vertex->prim);
2280                 return -EINVAL;
2281         }
2282
2283         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2284         VB_AGE_TEST_WITH_RETURN(dev_priv);
2285
2286         buf = dma->buflist[vertex->idx];
2287
2288         if (buf->file_priv != file_priv) {
2289                 DRM_ERROR("process %d using buffer owned by %p\n",
2290                           DRM_CURRENTPID, buf->file_priv);
2291                 return -EINVAL;
2292         }
2293         if (buf->pending) {
2294                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2295                 return -EINVAL;
2296         }
2297
2298         /* Build up a prim_t record:
2299          */
2300         if (vertex->count) {
2301                 buf->used = vertex->count;      /* not used? */
2302
2303                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2304                         if (radeon_emit_state(dev_priv, file_priv,
2305                                               &sarea_priv->context_state,
2306                                               sarea_priv->tex_state,
2307                                               sarea_priv->dirty)) {
2308                                 DRM_ERROR("radeon_emit_state failed\n");
2309                                 return -EINVAL;
2310                         }
2311
2312                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2313                                                RADEON_UPLOAD_TEX1IMAGES |
2314                                                RADEON_UPLOAD_TEX2IMAGES |
2315                                                RADEON_REQUIRE_QUIESCENCE);
2316                 }
2317
2318                 prim.start = 0;
2319                 prim.finish = vertex->count;    /* unused */
2320                 prim.prim = vertex->prim;
2321                 prim.numverts = vertex->count;
2322                 prim.vc_format = sarea_priv->vc_format;
2323
2324                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2325         }
2326
2327         if (vertex->discard) {
2328                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2329         }
2330
2331         COMMIT_RING();
2332         return 0;
2333 }
2334
2335 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2336 {
2337         drm_radeon_private_t *dev_priv = dev->dev_private;
2338         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2339         drm_radeon_sarea_t *sarea_priv;
2340         struct drm_device_dma *dma = dev->dma;
2341         struct drm_buf *buf;
2342         drm_radeon_indices_t *elts = data;
2343         drm_radeon_tcl_prim_t prim;
2344         int count;
2345
2346         LOCK_TEST_WITH_RETURN(dev, file_priv);
2347
2348         sarea_priv = master_priv->sarea_priv;
2349
2350         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2351                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2352                   elts->discard);
2353
2354         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2355                 DRM_ERROR("buffer index %d (of %d max)\n",
2356                           elts->idx, dma->buf_count - 1);
2357                 return -EINVAL;
2358         }
2359         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2360                 DRM_ERROR("buffer prim %d\n", elts->prim);
2361                 return -EINVAL;
2362         }
2363
2364         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2365         VB_AGE_TEST_WITH_RETURN(dev_priv);
2366
2367         buf = dma->buflist[elts->idx];
2368
2369         if (buf->file_priv != file_priv) {
2370                 DRM_ERROR("process %d using buffer owned by %p\n",
2371                           DRM_CURRENTPID, buf->file_priv);
2372                 return -EINVAL;
2373         }
2374         if (buf->pending) {
2375                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2376                 return -EINVAL;
2377         }
2378
2379         count = (elts->end - elts->start) / sizeof(u16);
2380         elts->start -= RADEON_INDEX_PRIM_OFFSET;
2381
2382         if (elts->start & 0x7) {
2383                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2384                 return -EINVAL;
2385         }
2386         if (elts->start < buf->used) {
2387                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2388                 return -EINVAL;
2389         }
2390
2391         buf->used = elts->end;
2392
2393         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2394                 if (radeon_emit_state(dev_priv, file_priv,
2395                                       &sarea_priv->context_state,
2396                                       sarea_priv->tex_state,
2397                                       sarea_priv->dirty)) {
2398                         DRM_ERROR("radeon_emit_state failed\n");
2399                         return -EINVAL;
2400                 }
2401
2402                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2403                                        RADEON_UPLOAD_TEX1IMAGES |
2404                                        RADEON_UPLOAD_TEX2IMAGES |
2405                                        RADEON_REQUIRE_QUIESCENCE);
2406         }
2407
2408         /* Build up a prim_t record:
2409          */
2410         prim.start = elts->start;
2411         prim.finish = elts->end;
2412         prim.prim = elts->prim;
2413         prim.offset = 0;        /* offset from start of dma buffers */
2414         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2415         prim.vc_format = sarea_priv->vc_format;
2416
2417         radeon_cp_dispatch_indices(dev, file_priv->masterp, buf, &prim);
2418         if (elts->discard) {
2419                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2420         }
2421
2422         COMMIT_RING();
2423         return 0;
2424 }
2425
2426 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2427 {
2428         drm_radeon_private_t *dev_priv = dev->dev_private;
2429         drm_radeon_texture_t *tex = data;
2430         drm_radeon_tex_image_t image;
2431         int ret;
2432
2433         LOCK_TEST_WITH_RETURN(dev, file_priv);
2434
2435         if (tex->image == NULL) {
2436                 DRM_ERROR("null texture image!\n");
2437                 return -EINVAL;
2438         }
2439
2440         if (DRM_COPY_FROM_USER(&image,
2441                                (drm_radeon_tex_image_t __user *) tex->image,
2442                                sizeof(image)))
2443                 return -EFAULT;
2444
2445         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2446         VB_AGE_TEST_WITH_RETURN(dev_priv);
2447
2448         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2449                 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2450         else
2451                 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2452
2453         return ret;
2454 }
2455
2456 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2457 {
2458         drm_radeon_private_t *dev_priv = dev->dev_private;
2459         drm_radeon_stipple_t *stipple = data;
2460         u32 mask[32];
2461
2462         LOCK_TEST_WITH_RETURN(dev, file_priv);
2463
2464         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2465                 return -EFAULT;
2466
2467         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2468
2469         radeon_cp_dispatch_stipple(dev, mask);
2470
2471         COMMIT_RING();
2472         return 0;
2473 }
2474
2475 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2476 {
2477         drm_radeon_private_t *dev_priv = dev->dev_private;
2478         struct drm_device_dma *dma = dev->dma;
2479         struct drm_buf *buf;
2480         drm_radeon_indirect_t *indirect = data;
2481         RING_LOCALS;
2482
2483         LOCK_TEST_WITH_RETURN(dev, file_priv);
2484
2485         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2486                   indirect->idx, indirect->start, indirect->end,
2487                   indirect->discard);
2488
2489         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2490                 DRM_ERROR("buffer index %d (of %d max)\n",
2491                           indirect->idx, dma->buf_count - 1);
2492                 return -EINVAL;
2493         }
2494
2495         buf = dma->buflist[indirect->idx];
2496
2497         if (buf->file_priv != file_priv) {
2498                 DRM_ERROR("process %d using buffer owned by %p\n",
2499                           DRM_CURRENTPID, buf->file_priv);
2500                 return -EINVAL;
2501         }
2502         if (buf->pending) {
2503                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2504                 return -EINVAL;
2505         }
2506
2507         if (indirect->start < buf->used) {
2508                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2509                           indirect->start, buf->used);
2510                 return -EINVAL;
2511         }
2512
2513         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2514         VB_AGE_TEST_WITH_RETURN(dev_priv);
2515
2516         buf->used = indirect->end;
2517
2518         /* Dispatch the indirect buffer full of commands from the
2519          * X server.  This is insecure and is thus only available to
2520          * privileged clients.
2521          */
2522         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2523                 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2524         else {
2525                 /* Wait for the 3D stream to idle before the indirect buffer
2526                  * containing 2D acceleration commands is processed.
2527                  */
2528                 BEGIN_RING(2);
2529                 RADEON_WAIT_UNTIL_3D_IDLE();
2530                 ADVANCE_RING();
2531                 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2532         }
2533
2534         if (indirect->discard) {
2535                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2536         }
2537
2538         COMMIT_RING();
2539         return 0;
2540 }
2541
2542 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2543 {
2544         drm_radeon_private_t *dev_priv = dev->dev_private;
2545         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
2546         drm_radeon_sarea_t *sarea_priv;
2547         struct drm_device_dma *dma = dev->dma;
2548         struct drm_buf *buf;
2549         drm_radeon_vertex2_t *vertex = data;
2550         int i;
2551         unsigned char laststate;
2552
2553         LOCK_TEST_WITH_RETURN(dev, file_priv);
2554
2555         sarea_priv = master_priv->sarea_priv;
2556
2557         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2558                   DRM_CURRENTPID, vertex->idx, vertex->discard);
2559
2560         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2561                 DRM_ERROR("buffer index %d (of %d max)\n",
2562                           vertex->idx, dma->buf_count - 1);
2563                 return -EINVAL;
2564         }
2565
2566         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2567         VB_AGE_TEST_WITH_RETURN(dev_priv);
2568
2569         buf = dma->buflist[vertex->idx];
2570
2571         if (buf->file_priv != file_priv) {
2572                 DRM_ERROR("process %d using buffer owned by %p\n",
2573                           DRM_CURRENTPID, buf->file_priv);
2574                 return -EINVAL;
2575         }
2576
2577         if (buf->pending) {
2578                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2579                 return -EINVAL;
2580         }
2581
2582         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2583                 return -EINVAL;
2584
2585         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2586                 drm_radeon_prim_t prim;
2587                 drm_radeon_tcl_prim_t tclprim;
2588
2589                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2590                         return -EFAULT;
2591
2592                 if (prim.stateidx != laststate) {
2593                         drm_radeon_state_t state;
2594
2595                         if (DRM_COPY_FROM_USER(&state,
2596                                                &vertex->state[prim.stateidx],
2597                                                sizeof(state)))
2598                                 return -EFAULT;
2599
2600                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2601                                 DRM_ERROR("radeon_emit_state2 failed\n");
2602                                 return -EINVAL;
2603                         }
2604
2605                         laststate = prim.stateidx;
2606                 }
2607
2608                 tclprim.start = prim.start;
2609                 tclprim.finish = prim.finish;
2610                 tclprim.prim = prim.prim;
2611                 tclprim.vc_format = prim.vc_format;
2612
2613                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2614                         tclprim.offset = prim.numverts * 64;
2615                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2616
2617                         radeon_cp_dispatch_indices(dev, file_priv->masterp, buf, &tclprim);
2618                 } else {
2619                         tclprim.numverts = prim.numverts;
2620                         tclprim.offset = 0;     /* not used */
2621
2622                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2623                 }
2624
2625                 if (sarea_priv->nbox == 1)
2626                         sarea_priv->nbox = 0;
2627         }
2628
2629         if (vertex->discard) {
2630                 radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2631         }
2632
2633         COMMIT_RING();
2634         return 0;
2635 }
2636
2637 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2638                                struct drm_file *file_priv,
2639                                drm_radeon_cmd_header_t header,
2640                                drm_radeon_kcmd_buffer_t *cmdbuf)
2641 {
2642         int id = (int)header.packet.packet_id;
2643         int sz, reg;
2644         RING_LOCALS;
2645
2646         if (id >= RADEON_MAX_STATE_PACKETS)
2647                 return -EINVAL;
2648
2649         sz = packet[id].len;
2650         reg = packet[id].start;
2651
2652         if (sz * sizeof(u32) > drm_buffer_unprocessed(cmdbuf->buffer)) {
2653                 DRM_ERROR("Packet size provided larger than data provided\n");
2654                 return -EINVAL;
2655         }
2656
2657         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id,
2658                                 cmdbuf->buffer)) {
2659                 DRM_ERROR("Packet verification failed\n");
2660                 return -EINVAL;
2661         }
2662
2663         BEGIN_RING(sz + 1);
2664         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2665         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2666         ADVANCE_RING();
2667
2668         return 0;
2669 }
2670
2671 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2672                                           drm_radeon_cmd_header_t header,
2673                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2674 {
2675         int sz = header.scalars.count;
2676         int start = header.scalars.offset;
2677         int stride = header.scalars.stride;
2678         RING_LOCALS;
2679
2680         BEGIN_RING(3 + sz);
2681         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2682         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2683         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2684         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2685         ADVANCE_RING();
2686         return 0;
2687 }
2688
2689 /* God this is ugly
2690  */
2691 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2692                                            drm_radeon_cmd_header_t header,
2693                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2694 {
2695         int sz = header.scalars.count;
2696         int start = ((unsigned int)header.scalars.offset) + 0x100;
2697         int stride = header.scalars.stride;
2698         RING_LOCALS;
2699
2700         BEGIN_RING(3 + sz);
2701         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2702         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2703         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2704         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2705         ADVANCE_RING();
2706         return 0;
2707 }
2708
2709 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2710                                           drm_radeon_cmd_header_t header,
2711                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2712 {
2713         int sz = header.vectors.count;
2714         int start = header.vectors.offset;
2715         int stride = header.vectors.stride;
2716         RING_LOCALS;
2717
2718         BEGIN_RING(5 + sz);
2719         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2720         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2721         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2722         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2723         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2724         ADVANCE_RING();
2725
2726         return 0;
2727 }
2728
2729 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2730                                           drm_radeon_cmd_header_t header,
2731                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2732 {
2733         int sz = header.veclinear.count * 4;
2734         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2735         RING_LOCALS;
2736
2737         if (!sz)
2738                 return 0;
2739         if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
2740                 return -EINVAL;
2741
2742         BEGIN_RING(5 + sz);
2743         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2744         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2745         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2746         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2747         OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
2748         ADVANCE_RING();
2749
2750         return 0;
2751 }
2752
2753 static int radeon_emit_packet3(struct drm_device * dev,
2754                                struct drm_file *file_priv,
2755                                drm_radeon_kcmd_buffer_t *cmdbuf)
2756 {
2757         drm_radeon_private_t *dev_priv = dev->dev_private;
2758         unsigned int cmdsz;
2759         int ret;
2760         RING_LOCALS;
2761
2762         DRM_DEBUG("\n");
2763
2764         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2765                                                   cmdbuf, &cmdsz))) {
2766                 DRM_ERROR("Packet verification failed\n");
2767                 return ret;
2768         }
2769
2770         BEGIN_RING(cmdsz);
2771         OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2772         ADVANCE_RING();
2773
2774         return 0;
2775 }
2776
2777 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2778                                         struct drm_file *file_priv,
2779                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2780                                         int orig_nbox)
2781 {
2782         drm_radeon_private_t *dev_priv = dev->dev_private;
2783         struct drm_clip_rect box;
2784         unsigned int cmdsz;
2785         int ret;
2786         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2787         int i = 0;
2788         RING_LOCALS;
2789
2790         DRM_DEBUG("\n");
2791
2792         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2793                                                   cmdbuf, &cmdsz))) {
2794                 DRM_ERROR("Packet verification failed\n");
2795                 return ret;
2796         }
2797
2798         if (!orig_nbox)
2799                 goto out;
2800
2801         do {
2802                 if (i < cmdbuf->nbox) {
2803                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2804                                 return -EFAULT;
2805                         /* FIXME The second and subsequent times round
2806                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2807                          * calling emit_clip_rect(). This fixes a
2808                          * lockup on fast machines when sending
2809                          * several cliprects with a cmdbuf, as when
2810                          * waving a 2D window over a 3D
2811                          * window. Something in the commands from user
2812                          * space seems to hang the card when they're
2813                          * sent several times in a row. That would be
2814                          * the correct place to fix it but this works
2815                          * around it until I can figure that out - Tim
2816                          * Smith */
2817                         if (i) {
2818                                 BEGIN_RING(2);
2819                                 RADEON_WAIT_UNTIL_3D_IDLE();
2820                                 ADVANCE_RING();
2821                         }
2822                         radeon_emit_clip_rect(dev_priv, &box);
2823                 }
2824
2825                 BEGIN_RING(cmdsz);
2826                 OUT_RING_DRM_BUFFER(cmdbuf->buffer, cmdsz);
2827                 ADVANCE_RING();
2828
2829         } while (++i < cmdbuf->nbox);
2830         if (cmdbuf->nbox == 1)
2831                 cmdbuf->nbox = 0;
2832
2833         return 0;
2834       out:
2835         drm_buffer_advance(cmdbuf->buffer, cmdsz * 4);
2836         return 0;
2837 }
2838
2839 static int radeon_emit_wait(struct drm_device * dev, int flags)
2840 {
2841         drm_radeon_private_t *dev_priv = dev->dev_private;
2842         RING_LOCALS;
2843
2844         DRM_DEBUG("%x\n", flags);
2845         switch (flags) {
2846         case RADEON_WAIT_2D:
2847                 BEGIN_RING(2);
2848                 RADEON_WAIT_UNTIL_2D_IDLE();
2849                 ADVANCE_RING();
2850                 break;
2851         case RADEON_WAIT_3D:
2852                 BEGIN_RING(2);
2853                 RADEON_WAIT_UNTIL_3D_IDLE();
2854                 ADVANCE_RING();
2855                 break;
2856         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2857                 BEGIN_RING(2);
2858                 RADEON_WAIT_UNTIL_IDLE();
2859                 ADVANCE_RING();
2860                 break;
2861         default:
2862                 return -EINVAL;
2863         }
2864
2865         return 0;
2866 }
2867
2868 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data,
2869                 struct drm_file *file_priv)
2870 {
2871         drm_radeon_private_t *dev_priv = dev->dev_private;
2872         struct drm_device_dma *dma = dev->dma;
2873         struct drm_buf *buf = NULL;
2874         drm_radeon_cmd_header_t stack_header;
2875         int idx;
2876         drm_radeon_kcmd_buffer_t *cmdbuf = data;
2877         int orig_nbox;
2878
2879         LOCK_TEST_WITH_RETURN(dev, file_priv);
2880
2881         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2882         VB_AGE_TEST_WITH_RETURN(dev_priv);
2883
2884         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2885                 return -EINVAL;
2886         }
2887
2888         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2889          * races between checking values and using those values in other code,
2890          * and simply to avoid a lot of function calls to copy in data.
2891          */
2892         if (cmdbuf->bufsz != 0) {
2893                 int rv;
2894                 void __user *buffer = cmdbuf->buffer;
2895                 rv = drm_buffer_alloc(&cmdbuf->buffer, cmdbuf->bufsz);
2896                 if (rv)
2897                         return rv;
2898                 rv = drm_buffer_copy_from_user(cmdbuf->buffer, buffer,
2899                                                 cmdbuf->bufsz);
2900                 if (rv) {
2901                         drm_buffer_free(cmdbuf->buffer);
2902                         return rv;
2903                 }
2904         } else
2905                 goto done;
2906
2907         orig_nbox = cmdbuf->nbox;
2908
2909         if (dev_priv->microcode_version == UCODE_R300) {
2910                 int temp;
2911                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2912
2913                 drm_buffer_free(cmdbuf->buffer);
2914
2915                 return temp;
2916         }
2917
2918         /* microcode_version != r300 */
2919         while (drm_buffer_unprocessed(cmdbuf->buffer) >= sizeof(stack_header)) {
2920
2921                 drm_radeon_cmd_header_t *header;
2922                 header = drm_buffer_read_object(cmdbuf->buffer,
2923                                 sizeof(stack_header), &stack_header);
2924
2925                 switch (header->header.cmd_type) {
2926                 case RADEON_CMD_PACKET:
2927                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2928                         if (radeon_emit_packets
2929                             (dev_priv, file_priv, *header, cmdbuf)) {
2930                                 DRM_ERROR("radeon_emit_packets failed\n");
2931                                 goto err;
2932                         }
2933                         break;
2934
2935                 case RADEON_CMD_SCALARS:
2936                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2937                         if (radeon_emit_scalars(dev_priv, *header, cmdbuf)) {
2938                                 DRM_ERROR("radeon_emit_scalars failed\n");
2939                                 goto err;
2940                         }
2941                         break;
2942
2943                 case RADEON_CMD_VECTORS:
2944                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2945                         if (radeon_emit_vectors(dev_priv, *header, cmdbuf)) {
2946                                 DRM_ERROR("radeon_emit_vectors failed\n");
2947                                 goto err;
2948                         }
2949                         break;
2950
2951                 case RADEON_CMD_DMA_DISCARD:
2952                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2953                         idx = header->dma.buf_idx;
2954                         if (idx < 0 || idx >= dma->buf_count) {
2955                                 DRM_ERROR("buffer index %d (of %d max)\n",
2956                                           idx, dma->buf_count - 1);
2957                                 goto err;
2958                         }
2959
2960                         buf = dma->buflist[idx];
2961                         if (buf->file_priv != file_priv || buf->pending) {
2962                                 DRM_ERROR("bad buffer %p %p %d\n",
2963                                           buf->file_priv, file_priv,
2964                                           buf->pending);
2965                                 goto err;
2966                         }
2967
2968                         radeon_cp_discard_buffer(dev, file_priv->masterp, buf);
2969                         break;
2970
2971                 case RADEON_CMD_PACKET3:
2972                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2973                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2974                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2975                                 goto err;
2976                         }
2977                         break;
2978
2979                 case RADEON_CMD_PACKET3_CLIP:
2980                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2981                         if (radeon_emit_packet3_cliprect
2982                             (dev, file_priv, cmdbuf, orig_nbox)) {
2983                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2984                                 goto err;
2985                         }
2986                         break;
2987
2988                 case RADEON_CMD_SCALARS2:
2989                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2990                         if (radeon_emit_scalars2(dev_priv, *header, cmdbuf)) {
2991                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2992                                 goto err;
2993                         }
2994                         break;
2995
2996                 case RADEON_CMD_WAIT:
2997                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2998                         if (radeon_emit_wait(dev, header->wait.flags)) {
2999                                 DRM_ERROR("radeon_emit_wait failed\n");
3000                                 goto err;
3001                         }
3002                         break;
3003                 case RADEON_CMD_VECLINEAR:
3004                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
3005                         if (radeon_emit_veclinear(dev_priv, *header, cmdbuf)) {
3006                                 DRM_ERROR("radeon_emit_veclinear failed\n");
3007                                 goto err;
3008                         }
3009                         break;
3010
3011                 default:
3012                         DRM_ERROR("bad cmd_type %d at byte %d\n",
3013                                   header->header.cmd_type,
3014                                   cmdbuf->buffer->iterator);
3015                         goto err;
3016                 }
3017         }
3018
3019         drm_buffer_free(cmdbuf->buffer);
3020
3021       done:
3022         DRM_DEBUG("DONE\n");
3023         COMMIT_RING();
3024         return 0;
3025
3026       err:
3027         drm_buffer_free(cmdbuf->buffer);
3028         return -EINVAL;
3029 }
3030
3031 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3032 {
3033         drm_radeon_private_t *dev_priv = dev->dev_private;
3034         drm_radeon_getparam_t *param = data;
3035         int value;
3036
3037         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3038
3039         switch (param->param) {
3040         case RADEON_PARAM_GART_BUFFER_OFFSET:
3041                 value = dev_priv->gart_buffers_offset;
3042                 break;
3043         case RADEON_PARAM_LAST_FRAME:
3044                 dev_priv->stats.last_frame_reads++;
3045                 value = GET_SCRATCH(dev_priv, 0);
3046                 break;
3047         case RADEON_PARAM_LAST_DISPATCH:
3048                 value = GET_SCRATCH(dev_priv, 1);
3049                 break;
3050         case RADEON_PARAM_LAST_CLEAR:
3051                 dev_priv->stats.last_clear_reads++;
3052                 value = GET_SCRATCH(dev_priv, 2);
3053                 break;
3054         case RADEON_PARAM_IRQ_NR:
3055                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3056                         value = 0;
3057                 else
3058                         value = dev->irq;
3059                 break;
3060         case RADEON_PARAM_GART_BASE:
3061                 value = dev_priv->gart_vm_start;
3062                 break;
3063         case RADEON_PARAM_REGISTER_HANDLE:
3064                 value = dev_priv->mmio->offset;
3065                 break;
3066         case RADEON_PARAM_STATUS_HANDLE:
3067                 value = dev_priv->ring_rptr_offset;
3068                 break;
3069 #ifndef __LP64__
3070                 /*
3071                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3072                  * pointer which can't fit into an int-sized variable.  According to
3073                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3074                  * not supporting it shouldn't be a problem.  If the same functionality
3075                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3076                  * so backwards-compatibility for the embedded platforms can be
3077                  * maintained.  --davidm 4-Feb-2004.
3078                  */
3079         case RADEON_PARAM_SAREA_HANDLE:
3080                 /* The lock is the first dword in the sarea. */
3081                 /* no users of this parameter */
3082                 break;
3083 #endif
3084         case RADEON_PARAM_GART_TEX_HANDLE:
3085                 value = dev_priv->gart_textures_offset;
3086                 break;
3087         case RADEON_PARAM_SCRATCH_OFFSET:
3088                 if (!dev_priv->writeback_works)
3089                         return -EINVAL;
3090                 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3091                         value = R600_SCRATCH_REG_OFFSET;
3092                 else
3093                         value = RADEON_SCRATCH_REG_OFFSET;
3094                 break;
3095         case RADEON_PARAM_CARD_TYPE:
3096                 if (dev_priv->flags & RADEON_IS_PCIE)
3097                         value = RADEON_CARD_PCIE;
3098                 else if (dev_priv->flags & RADEON_IS_AGP)
3099                         value = RADEON_CARD_AGP;
3100                 else
3101                         value = RADEON_CARD_PCI;
3102                 break;
3103         case RADEON_PARAM_VBLANK_CRTC:
3104                 value = radeon_vblank_crtc_get(dev);
3105                 break;
3106         case RADEON_PARAM_FB_LOCATION:
3107                 value = radeon_read_fb_location(dev_priv);
3108                 break;
3109         case RADEON_PARAM_NUM_GB_PIPES:
3110                 value = dev_priv->num_gb_pipes;
3111                 break;
3112         case RADEON_PARAM_NUM_Z_PIPES:
3113                 value = dev_priv->num_z_pipes;
3114                 break;
3115         default:
3116                 DRM_DEBUG("Invalid parameter %d\n", param->param);
3117                 return -EINVAL;
3118         }
3119
3120         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3121                 DRM_ERROR("copy_to_user\n");
3122                 return -EFAULT;
3123         }
3124
3125         return 0;
3126 }
3127
3128 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3129 {
3130         drm_radeon_private_t *dev_priv = dev->dev_private;
3131         struct drm_radeon_master_private *master_priv = file_priv->masterp->driver_priv;
3132         drm_radeon_setparam_t *sp = data;
3133         struct drm_radeon_driver_file_fields *radeon_priv;
3134
3135         switch (sp->param) {
3136         case RADEON_SETPARAM_FB_LOCATION:
3137                 radeon_priv = file_priv->driver_priv;
3138                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3139                     sp->value;
3140                 break;
3141         case RADEON_SETPARAM_SWITCH_TILING:
3142                 if (sp->value == 0) {
3143                         DRM_DEBUG("color tiling disabled\n");
3144                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3145                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3146                         if (master_priv->sarea_priv)
3147                                 master_priv->sarea_priv->tiling_enabled = 0;
3148                 } else if (sp->value == 1) {
3149                         DRM_DEBUG("color tiling enabled\n");
3150                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3151                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3152                         if (master_priv->sarea_priv)
3153                                 master_priv->sarea_priv->tiling_enabled = 1;
3154                 }
3155                 break;
3156         case RADEON_SETPARAM_PCIGART_LOCATION:
3157                 dev_priv->pcigart_offset = sp->value;
3158                 dev_priv->pcigart_offset_set = 1;
3159                 break;
3160         case RADEON_SETPARAM_NEW_MEMMAP:
3161                 dev_priv->new_memmap = sp->value;
3162                 break;
3163         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3164                 dev_priv->gart_info.table_size = sp->value;
3165                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3166                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3167                 break;
3168         case RADEON_SETPARAM_VBLANK_CRTC:
3169                 return radeon_vblank_crtc_set(dev, sp->value);
3170                 break;
3171         default:
3172                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3173                 return -EINVAL;
3174         }
3175
3176         return 0;
3177 }
3178
3179 /* When a client dies:
3180  *    - Check for and clean up flipped page state
3181  *    - Free any alloced GART memory.
3182  *    - Free any alloced radeon surfaces.
3183  *
3184  * DRM infrastructure takes care of reclaiming dma buffers.
3185  */
3186 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3187 {
3188         if (dev->dev_private) {
3189                 drm_radeon_private_t *dev_priv = dev->dev_private;
3190                 dev_priv->page_flipping = 0;
3191                 radeon_mem_release(file_priv, dev_priv->gart_heap);
3192                 radeon_mem_release(file_priv, dev_priv->fb_heap);
3193                 radeon_surfaces_release(file_priv, dev_priv);
3194         }
3195 }
3196
3197 void radeon_driver_lastclose(struct drm_device *dev)
3198 {
3199         radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3200         radeon_do_release(dev);
3201 }
3202
3203 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3204 {
3205         drm_radeon_private_t *dev_priv = dev->dev_private;
3206         struct drm_radeon_driver_file_fields *radeon_priv;
3207
3208         DRM_DEBUG("\n");
3209         radeon_priv = malloc(sizeof(*radeon_priv), DRM_MEM_DRIVER, M_WAITOK);
3210
3211         if (!radeon_priv)
3212                 return -ENOMEM;
3213
3214         file_priv->driver_priv = radeon_priv;
3215
3216         if (dev_priv)
3217                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3218         else
3219                 radeon_priv->radeon_fb_delta = 0;
3220         return 0;
3221 }
3222
3223 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3224 {
3225         struct drm_radeon_driver_file_fields *radeon_priv =
3226             file_priv->driver_priv;
3227
3228         free(radeon_priv, DRM_MEM_DRIVER);
3229 }
3230
3231 struct drm_ioctl_desc radeon_ioctls[] = {
3232         DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3233         DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3234         DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3235         DRM_IOCTL_DEF_DRV(RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3236         DRM_IOCTL_DEF_DRV(RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3237         DRM_IOCTL_DEF_DRV(RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3238         DRM_IOCTL_DEF_DRV(RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3239         DRM_IOCTL_DEF_DRV(RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3240         DRM_IOCTL_DEF_DRV(RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3241         DRM_IOCTL_DEF_DRV(RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3242         DRM_IOCTL_DEF_DRV(RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3243         DRM_IOCTL_DEF_DRV(RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3244         DRM_IOCTL_DEF_DRV(RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3245         DRM_IOCTL_DEF_DRV(RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3246         DRM_IOCTL_DEF_DRV(RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3247         DRM_IOCTL_DEF_DRV(RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3248         DRM_IOCTL_DEF_DRV(RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3249         DRM_IOCTL_DEF_DRV(RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3250         DRM_IOCTL_DEF_DRV(RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3251         DRM_IOCTL_DEF_DRV(RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3252         DRM_IOCTL_DEF_DRV(RADEON_FREE, radeon_mem_free, DRM_AUTH),
3253         DRM_IOCTL_DEF_DRV(RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3254         DRM_IOCTL_DEF_DRV(RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3255         DRM_IOCTL_DEF_DRV(RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3256         DRM_IOCTL_DEF_DRV(RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3257         DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3258         DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3259         DRM_IOCTL_DEF_DRV(RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3260 };
3261
3262 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);