]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/drm/radeon_state.c
This commit was generated by cvs2svn to compensate for changes in r152058,
[FreeBSD/FreeBSD.git] / sys / dev / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*-
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  *
29  * $FreeBSD$
30  */
31
32 #include "dev/drm/drmP.h"
33 #include "dev/drm/drm.h"
34 #include "dev/drm/drm_sarea.h"
35 #include "dev/drm/radeon_drm.h"
36 #include "dev/drm/radeon_drv.h"
37
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41
42 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
43                                                     dev_priv,
44                                                     drm_file_t * filp_priv,
45                                                     u32 * offset)
46 {
47         u32 off = *offset;
48         struct drm_radeon_driver_file_fields *radeon_priv;
49
50         if (off >= dev_priv->fb_location &&
51             off < (dev_priv->gart_vm_start + dev_priv->gart_size))
52                 return 0;
53
54         radeon_priv = filp_priv->driver_priv;
55
56         off += radeon_priv->radeon_fb_delta;
57
58         DRM_DEBUG("offset fixed up to 0x%x\n", off);
59
60         if (off < dev_priv->fb_location ||
61             off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
62                 return DRM_ERR(EINVAL);
63
64         *offset = off;
65
66         return 0;
67 }
68
69 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
70                                                      dev_priv,
71                                                      drm_file_t * filp_priv,
72                                                      int id, u32 __user * data)
73 {
74         switch (id) {
75
76         case RADEON_EMIT_PP_MISC:
77                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
78                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
79                         DRM_ERROR("Invalid depth buffer offset\n");
80                         return DRM_ERR(EINVAL);
81                 }
82                 break;
83
84         case RADEON_EMIT_PP_CNTL:
85                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
86                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
87                         DRM_ERROR("Invalid colour buffer offset\n");
88                         return DRM_ERR(EINVAL);
89                 }
90                 break;
91
92         case R200_EMIT_PP_TXOFFSET_0:
93         case R200_EMIT_PP_TXOFFSET_1:
94         case R200_EMIT_PP_TXOFFSET_2:
95         case R200_EMIT_PP_TXOFFSET_3:
96         case R200_EMIT_PP_TXOFFSET_4:
97         case R200_EMIT_PP_TXOFFSET_5:
98                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
99                                                   &data[0])) {
100                         DRM_ERROR("Invalid R200 texture offset\n");
101                         return DRM_ERR(EINVAL);
102                 }
103                 break;
104
105         case RADEON_EMIT_PP_TXFILTER_0:
106         case RADEON_EMIT_PP_TXFILTER_1:
107         case RADEON_EMIT_PP_TXFILTER_2:
108                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
109                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
110                         DRM_ERROR("Invalid R100 texture offset\n");
111                         return DRM_ERR(EINVAL);
112                 }
113                 break;
114
115         case R200_EMIT_PP_CUBIC_OFFSETS_0:
116         case R200_EMIT_PP_CUBIC_OFFSETS_1:
117         case R200_EMIT_PP_CUBIC_OFFSETS_2:
118         case R200_EMIT_PP_CUBIC_OFFSETS_3:
119         case R200_EMIT_PP_CUBIC_OFFSETS_4:
120         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
121                         int i;
122                         for (i = 0; i < 5; i++) {
123                                 if (radeon_check_and_fixup_offset(dev_priv,
124                                                                   filp_priv,
125                                                                   &data[i])) {
126                                         DRM_ERROR
127                                             ("Invalid R200 cubic texture offset\n");
128                                         return DRM_ERR(EINVAL);
129                                 }
130                         }
131                         break;
132                 }
133
134         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
135         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
136         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
137                         int i;
138                         for (i = 0; i < 5; i++) {
139                                 if (radeon_check_and_fixup_offset(dev_priv,
140                                                                   filp_priv,
141                                                                   &data[i])) {
142                                         DRM_ERROR
143                                             ("Invalid R100 cubic texture offset\n");
144                                         return DRM_ERR(EINVAL);
145                                 }
146                         }
147                 }
148                 break;
149
150         case RADEON_EMIT_RB3D_COLORPITCH:
151         case RADEON_EMIT_RE_LINE_PATTERN:
152         case RADEON_EMIT_SE_LINE_WIDTH:
153         case RADEON_EMIT_PP_LUM_MATRIX:
154         case RADEON_EMIT_PP_ROT_MATRIX_0:
155         case RADEON_EMIT_RB3D_STENCILREFMASK:
156         case RADEON_EMIT_SE_VPORT_XSCALE:
157         case RADEON_EMIT_SE_CNTL:
158         case RADEON_EMIT_SE_CNTL_STATUS:
159         case RADEON_EMIT_RE_MISC:
160         case RADEON_EMIT_PP_BORDER_COLOR_0:
161         case RADEON_EMIT_PP_BORDER_COLOR_1:
162         case RADEON_EMIT_PP_BORDER_COLOR_2:
163         case RADEON_EMIT_SE_ZBIAS_FACTOR:
164         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
165         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
166         case R200_EMIT_PP_TXCBLEND_0:
167         case R200_EMIT_PP_TXCBLEND_1:
168         case R200_EMIT_PP_TXCBLEND_2:
169         case R200_EMIT_PP_TXCBLEND_3:
170         case R200_EMIT_PP_TXCBLEND_4:
171         case R200_EMIT_PP_TXCBLEND_5:
172         case R200_EMIT_PP_TXCBLEND_6:
173         case R200_EMIT_PP_TXCBLEND_7:
174         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
175         case R200_EMIT_TFACTOR_0:
176         case R200_EMIT_VTX_FMT_0:
177         case R200_EMIT_VAP_CTL:
178         case R200_EMIT_MATRIX_SELECT_0:
179         case R200_EMIT_TEX_PROC_CTL_2:
180         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
181         case R200_EMIT_PP_TXFILTER_0:
182         case R200_EMIT_PP_TXFILTER_1:
183         case R200_EMIT_PP_TXFILTER_2:
184         case R200_EMIT_PP_TXFILTER_3:
185         case R200_EMIT_PP_TXFILTER_4:
186         case R200_EMIT_PP_TXFILTER_5:
187         case R200_EMIT_VTE_CNTL:
188         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
189         case R200_EMIT_PP_TAM_DEBUG3:
190         case R200_EMIT_PP_CNTL_X:
191         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
192         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
193         case R200_EMIT_RE_SCISSOR_TL_0:
194         case R200_EMIT_RE_SCISSOR_TL_1:
195         case R200_EMIT_RE_SCISSOR_TL_2:
196         case R200_EMIT_SE_VAP_CNTL_STATUS:
197         case R200_EMIT_SE_VTX_STATE_CNTL:
198         case R200_EMIT_RE_POINTSIZE:
199         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
200         case R200_EMIT_PP_CUBIC_FACES_0:
201         case R200_EMIT_PP_CUBIC_FACES_1:
202         case R200_EMIT_PP_CUBIC_FACES_2:
203         case R200_EMIT_PP_CUBIC_FACES_3:
204         case R200_EMIT_PP_CUBIC_FACES_4:
205         case R200_EMIT_PP_CUBIC_FACES_5:
206         case RADEON_EMIT_PP_TEX_SIZE_0:
207         case RADEON_EMIT_PP_TEX_SIZE_1:
208         case RADEON_EMIT_PP_TEX_SIZE_2:
209         case R200_EMIT_RB3D_BLENDCOLOR:
210         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
211         case RADEON_EMIT_PP_CUBIC_FACES_0:
212         case RADEON_EMIT_PP_CUBIC_FACES_1:
213         case RADEON_EMIT_PP_CUBIC_FACES_2:
214         case R200_EMIT_PP_TRI_PERF_CNTL:
215                 /* These packets don't contain memory offsets */
216                 break;
217
218         default:
219                 DRM_ERROR("Unknown state packet ID %d\n", id);
220                 return DRM_ERR(EINVAL);
221         }
222
223         return 0;
224 }
225
226 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
227                                                      dev_priv,
228                                                      drm_file_t * filp_priv,
229                                                      drm_radeon_cmd_buffer_t *
230                                                      cmdbuf,
231                                                      unsigned int *cmdsz)
232 {
233         u32 *cmd = (u32 *) cmdbuf->buf;
234
235         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
236
237         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
238                 DRM_ERROR("Not a type 3 packet\n");
239                 return DRM_ERR(EINVAL);
240         }
241
242         if (4 * *cmdsz > cmdbuf->bufsz) {
243                 DRM_ERROR("Packet size larger than size of data provided\n");
244                 return DRM_ERR(EINVAL);
245         }
246
247         /* Check client state and fix it up if necessary */
248         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
249                 u32 offset;
250
251                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
252                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
253                         offset = cmd[2] << 10;
254                         if (radeon_check_and_fixup_offset
255                             (dev_priv, filp_priv, &offset)) {
256                                 DRM_ERROR("Invalid first packet offset\n");
257                                 return DRM_ERR(EINVAL);
258                         }
259                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
260                 }
261
262                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
263                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
264                         offset = cmd[3] << 10;
265                         if (radeon_check_and_fixup_offset
266                             (dev_priv, filp_priv, &offset)) {
267                                 DRM_ERROR("Invalid second packet offset\n");
268                                 return DRM_ERR(EINVAL);
269                         }
270                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
271                 }
272         }
273
274         return 0;
275 }
276
277 /* ================================================================
278  * CP hardware state programming functions
279  */
280
281 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
282                                              drm_clip_rect_t * box)
283 {
284         RING_LOCALS;
285
286         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
287                   box->x1, box->y1, box->x2, box->y2);
288
289         BEGIN_RING(4);
290         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
291         OUT_RING((box->y1 << 16) | box->x1);
292         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
293         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
294         ADVANCE_RING();
295 }
296
297 /* Emit 1.1 state
298  */
299 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
300                              drm_file_t * filp_priv,
301                              drm_radeon_context_regs_t * ctx,
302                              drm_radeon_texture_regs_t * tex,
303                              unsigned int dirty)
304 {
305         RING_LOCALS;
306         DRM_DEBUG("dirty=0x%08x\n", dirty);
307
308         if (dirty & RADEON_UPLOAD_CONTEXT) {
309                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
310                                                   &ctx->rb3d_depthoffset)) {
311                         DRM_ERROR("Invalid depth buffer offset\n");
312                         return DRM_ERR(EINVAL);
313                 }
314
315                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
316                                                   &ctx->rb3d_coloroffset)) {
317                         DRM_ERROR("Invalid depth buffer offset\n");
318                         return DRM_ERR(EINVAL);
319                 }
320
321                 BEGIN_RING(14);
322                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
323                 OUT_RING(ctx->pp_misc);
324                 OUT_RING(ctx->pp_fog_color);
325                 OUT_RING(ctx->re_solid_color);
326                 OUT_RING(ctx->rb3d_blendcntl);
327                 OUT_RING(ctx->rb3d_depthoffset);
328                 OUT_RING(ctx->rb3d_depthpitch);
329                 OUT_RING(ctx->rb3d_zstencilcntl);
330                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
331                 OUT_RING(ctx->pp_cntl);
332                 OUT_RING(ctx->rb3d_cntl);
333                 OUT_RING(ctx->rb3d_coloroffset);
334                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
335                 OUT_RING(ctx->rb3d_colorpitch);
336                 ADVANCE_RING();
337         }
338
339         if (dirty & RADEON_UPLOAD_VERTFMT) {
340                 BEGIN_RING(2);
341                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
342                 OUT_RING(ctx->se_coord_fmt);
343                 ADVANCE_RING();
344         }
345
346         if (dirty & RADEON_UPLOAD_LINE) {
347                 BEGIN_RING(5);
348                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
349                 OUT_RING(ctx->re_line_pattern);
350                 OUT_RING(ctx->re_line_state);
351                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
352                 OUT_RING(ctx->se_line_width);
353                 ADVANCE_RING();
354         }
355
356         if (dirty & RADEON_UPLOAD_BUMPMAP) {
357                 BEGIN_RING(5);
358                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
359                 OUT_RING(ctx->pp_lum_matrix);
360                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
361                 OUT_RING(ctx->pp_rot_matrix_0);
362                 OUT_RING(ctx->pp_rot_matrix_1);
363                 ADVANCE_RING();
364         }
365
366         if (dirty & RADEON_UPLOAD_MASKS) {
367                 BEGIN_RING(4);
368                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
369                 OUT_RING(ctx->rb3d_stencilrefmask);
370                 OUT_RING(ctx->rb3d_ropcntl);
371                 OUT_RING(ctx->rb3d_planemask);
372                 ADVANCE_RING();
373         }
374
375         if (dirty & RADEON_UPLOAD_VIEWPORT) {
376                 BEGIN_RING(7);
377                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
378                 OUT_RING(ctx->se_vport_xscale);
379                 OUT_RING(ctx->se_vport_xoffset);
380                 OUT_RING(ctx->se_vport_yscale);
381                 OUT_RING(ctx->se_vport_yoffset);
382                 OUT_RING(ctx->se_vport_zscale);
383                 OUT_RING(ctx->se_vport_zoffset);
384                 ADVANCE_RING();
385         }
386
387         if (dirty & RADEON_UPLOAD_SETUP) {
388                 BEGIN_RING(4);
389                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
390                 OUT_RING(ctx->se_cntl);
391                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
392                 OUT_RING(ctx->se_cntl_status);
393                 ADVANCE_RING();
394         }
395
396         if (dirty & RADEON_UPLOAD_MISC) {
397                 BEGIN_RING(2);
398                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
399                 OUT_RING(ctx->re_misc);
400                 ADVANCE_RING();
401         }
402
403         if (dirty & RADEON_UPLOAD_TEX0) {
404                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
405                                                   &tex[0].pp_txoffset)) {
406                         DRM_ERROR("Invalid texture offset for unit 0\n");
407                         return DRM_ERR(EINVAL);
408                 }
409
410                 BEGIN_RING(9);
411                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
412                 OUT_RING(tex[0].pp_txfilter);
413                 OUT_RING(tex[0].pp_txformat);
414                 OUT_RING(tex[0].pp_txoffset);
415                 OUT_RING(tex[0].pp_txcblend);
416                 OUT_RING(tex[0].pp_txablend);
417                 OUT_RING(tex[0].pp_tfactor);
418                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
419                 OUT_RING(tex[0].pp_border_color);
420                 ADVANCE_RING();
421         }
422
423         if (dirty & RADEON_UPLOAD_TEX1) {
424                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
425                                                   &tex[1].pp_txoffset)) {
426                         DRM_ERROR("Invalid texture offset for unit 1\n");
427                         return DRM_ERR(EINVAL);
428                 }
429
430                 BEGIN_RING(9);
431                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
432                 OUT_RING(tex[1].pp_txfilter);
433                 OUT_RING(tex[1].pp_txformat);
434                 OUT_RING(tex[1].pp_txoffset);
435                 OUT_RING(tex[1].pp_txcblend);
436                 OUT_RING(tex[1].pp_txablend);
437                 OUT_RING(tex[1].pp_tfactor);
438                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
439                 OUT_RING(tex[1].pp_border_color);
440                 ADVANCE_RING();
441         }
442
443         if (dirty & RADEON_UPLOAD_TEX2) {
444                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
445                                                   &tex[2].pp_txoffset)) {
446                         DRM_ERROR("Invalid texture offset for unit 2\n");
447                         return DRM_ERR(EINVAL);
448                 }
449
450                 BEGIN_RING(9);
451                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
452                 OUT_RING(tex[2].pp_txfilter);
453                 OUT_RING(tex[2].pp_txformat);
454                 OUT_RING(tex[2].pp_txoffset);
455                 OUT_RING(tex[2].pp_txcblend);
456                 OUT_RING(tex[2].pp_txablend);
457                 OUT_RING(tex[2].pp_tfactor);
458                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
459                 OUT_RING(tex[2].pp_border_color);
460                 ADVANCE_RING();
461         }
462
463         return 0;
464 }
465
466 /* Emit 1.2 state
467  */
468 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
469                               drm_file_t * filp_priv,
470                               drm_radeon_state_t * state)
471 {
472         RING_LOCALS;
473
474         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
475                 BEGIN_RING(3);
476                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
477                 OUT_RING(state->context2.se_zbias_factor);
478                 OUT_RING(state->context2.se_zbias_constant);
479                 ADVANCE_RING();
480         }
481
482         return radeon_emit_state(dev_priv, filp_priv, &state->context,
483                                  state->tex, state->dirty);
484 }
485
486 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
487  * 1.3 cmdbuffers allow all previous state to be updated as well as
488  * the tcl scalar and vector areas.
489  */
490 static struct {
491         int start;
492         int len;
493         const char *name;
494 } packet[RADEON_MAX_STATE_PACKETS] = {
495         {
496         RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, {
497         RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, {
498         RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, {
499         RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, {
500         RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, {
501         RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, {
502         RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, {
503         RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, {
504         RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, {
505         RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, {
506         RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, {
507         RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, {
508         RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, {
509         RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, {
510         RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, {
511         RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, {
512         RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, {
513         RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, {
514         RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, {
515         RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, {
516         RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
517                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, {
518         R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, {
519         R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, {
520         R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, {
521         R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, {
522         R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, {
523         R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, {
524         R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, {
525         R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, {
526         R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
527         {
528         R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, {
529         R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, {
530         R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, {
531         R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, {
532         R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, {
533         R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
534         {
535         R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, {
536         R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, {
537         R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, {
538         R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, {
539         R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, {
540         R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, {
541         R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, {
542         R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, {
543         R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, {
544         R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, {
545         R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, {
546         R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, {
547         R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, {
548         R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
549         {
550         R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, {
551         R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, {
552         R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, {
553         R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, {
554         R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, {
555         R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, {
556         R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, {
557         R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, {
558         R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, {
559         R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, {
560         R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
561                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, {
562         R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},     /* 61 */
563         {
564         R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"},     /* 62 */
565         {
566         R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, {
567         R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, {
568         R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, {
569         R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, {
570         R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, {
571         R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, {
572         R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, {
573         R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, {
574         R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, {
575         R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, {
576         RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, {
577         RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, {
578         RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, {
579         R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, {
580         R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
581         {
582         RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, {
583         RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, {
584         RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, {
585         RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, {
586         RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, {
587         RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, {
588         R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
589 };
590
591 /* ================================================================
592  * Performance monitoring functions
593  */
594
595 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
596                              int x, int y, int w, int h, int r, int g, int b)
597 {
598         u32 color;
599         RING_LOCALS;
600
601         x += dev_priv->sarea_priv->boxes[0].x1;
602         y += dev_priv->sarea_priv->boxes[0].y1;
603
604         switch (dev_priv->color_fmt) {
605         case RADEON_COLOR_FORMAT_RGB565:
606                 color = (((r & 0xf8) << 8) |
607                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
608                 break;
609         case RADEON_COLOR_FORMAT_ARGB8888:
610         default:
611                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
612                 break;
613         }
614
615         BEGIN_RING(4);
616         RADEON_WAIT_UNTIL_3D_IDLE();
617         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
618         OUT_RING(0xffffffff);
619         ADVANCE_RING();
620
621         BEGIN_RING(6);
622
623         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
624         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
625                  RADEON_GMC_BRUSH_SOLID_COLOR |
626                  (dev_priv->color_fmt << 8) |
627                  RADEON_GMC_SRC_DATATYPE_COLOR |
628                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
629
630         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
631                 OUT_RING(dev_priv->front_pitch_offset);
632         } else {
633                 OUT_RING(dev_priv->back_pitch_offset);
634         }
635
636         OUT_RING(color);
637
638         OUT_RING((x << 16) | y);
639         OUT_RING((w << 16) | h);
640
641         ADVANCE_RING();
642 }
643
644 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
645 {
646         /* Collapse various things into a wait flag -- trying to
647          * guess if userspase slept -- better just to have them tell us.
648          */
649         if (dev_priv->stats.last_frame_reads > 1 ||
650             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
651                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
652         }
653
654         if (dev_priv->stats.freelist_loops) {
655                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
656         }
657
658         /* Purple box for page flipping
659          */
660         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
661                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
662
663         /* Red box if we have to wait for idle at any point
664          */
665         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
666                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
667
668         /* Blue box: lost context?
669          */
670
671         /* Yellow box for texture swaps
672          */
673         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
674                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
675
676         /* Green box if hardware never idles (as far as we can tell)
677          */
678         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
679                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
680
681         /* Draw bars indicating number of buffers allocated
682          * (not a great measure, easily confused)
683          */
684         if (dev_priv->stats.requested_bufs) {
685                 if (dev_priv->stats.requested_bufs > 100)
686                         dev_priv->stats.requested_bufs = 100;
687
688                 radeon_clear_box(dev_priv, 4, 16,
689                                  dev_priv->stats.requested_bufs, 4,
690                                  196, 128, 128);
691         }
692
693         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
694
695 }
696
697 /* ================================================================
698  * CP command dispatch functions
699  */
700
701 static void radeon_cp_dispatch_clear(drm_device_t * dev,
702                                      drm_radeon_clear_t * clear,
703                                      drm_radeon_clear_rect_t * depth_boxes)
704 {
705         drm_radeon_private_t *dev_priv = dev->dev_private;
706         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
707         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
708         int nbox = sarea_priv->nbox;
709         drm_clip_rect_t *pbox = sarea_priv->boxes;
710         unsigned int flags = clear->flags;
711         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
712         int i;
713         RING_LOCALS;
714         DRM_DEBUG("flags = 0x%x\n", flags);
715
716         dev_priv->stats.clears++;
717
718         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
719                 unsigned int tmp = flags;
720
721                 flags &= ~(RADEON_FRONT | RADEON_BACK);
722                 if (tmp & RADEON_FRONT)
723                         flags |= RADEON_BACK;
724                 if (tmp & RADEON_BACK)
725                         flags |= RADEON_FRONT;
726         }
727
728         if (flags & (RADEON_FRONT | RADEON_BACK)) {
729
730                 BEGIN_RING(4);
731
732                 /* Ensure the 3D stream is idle before doing a
733                  * 2D fill to clear the front or back buffer.
734                  */
735                 RADEON_WAIT_UNTIL_3D_IDLE();
736
737                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
738                 OUT_RING(clear->color_mask);
739
740                 ADVANCE_RING();
741
742                 /* Make sure we restore the 3D state next time.
743                  */
744                 dev_priv->sarea_priv->ctx_owner = 0;
745
746                 for (i = 0; i < nbox; i++) {
747                         int x = pbox[i].x1;
748                         int y = pbox[i].y1;
749                         int w = pbox[i].x2 - x;
750                         int h = pbox[i].y2 - y;
751
752                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
753                                   x, y, w, h, flags);
754
755                         if (flags & RADEON_FRONT) {
756                                 BEGIN_RING(6);
757
758                                 OUT_RING(CP_PACKET3
759                                          (RADEON_CNTL_PAINT_MULTI, 4));
760                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
761                                          RADEON_GMC_BRUSH_SOLID_COLOR |
762                                          (dev_priv->
763                                           color_fmt << 8) |
764                                          RADEON_GMC_SRC_DATATYPE_COLOR |
765                                          RADEON_ROP3_P |
766                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
767
768                                 OUT_RING(dev_priv->front_pitch_offset);
769                                 OUT_RING(clear->clear_color);
770
771                                 OUT_RING((x << 16) | y);
772                                 OUT_RING((w << 16) | h);
773
774                                 ADVANCE_RING();
775                         }
776
777                         if (flags & RADEON_BACK) {
778                                 BEGIN_RING(6);
779
780                                 OUT_RING(CP_PACKET3
781                                          (RADEON_CNTL_PAINT_MULTI, 4));
782                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
783                                          RADEON_GMC_BRUSH_SOLID_COLOR |
784                                          (dev_priv->
785                                           color_fmt << 8) |
786                                          RADEON_GMC_SRC_DATATYPE_COLOR |
787                                          RADEON_ROP3_P |
788                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
789
790                                 OUT_RING(dev_priv->back_pitch_offset);
791                                 OUT_RING(clear->clear_color);
792
793                                 OUT_RING((x << 16) | y);
794                                 OUT_RING((w << 16) | h);
795
796                                 ADVANCE_RING();
797                         }
798                 }
799         }
800
801         /* hyper z clear */
802         /* no docs available, based on reverse engeneering by Stephane Marchesin */
803         if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
804
805                 int i;
806                 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
807                         (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
808                 
809                 u32 clearmask;
810
811                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
812                         ((clear->depth_mask & 0xff) << 24);
813         
814                 
815                 /* Make sure we restore the 3D state next time.
816                  * we haven't touched any "normal" state - still need this?
817                  */
818                 dev_priv->sarea_priv->ctx_owner = 0;
819
820                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
821                 /* FIXME : reverse engineer that for Rx00 cards */
822                 /* FIXME : the mask supposedly contains low-res z values. So can't set
823                    just to the max (0xff? or actually 0x3fff?), need to take z clear
824                    value into account? */
825                 /* pattern seems to work for r100, though get slight
826                    rendering errors with glxgears. If hierz is not enabled for r100,
827                    only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
828                    other ones are ignored, and the same clear mask can be used. That's
829                    very different behaviour than R200 which needs different clear mask
830                    and different number of tiles to clear if hierz is enabled or not !?!
831                 */
832                         clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
833                 }
834                 else {
835                 /* clear mask : chooses the clearing pattern.
836                    rv250: could be used to clear only parts of macrotiles
837                    (but that would get really complicated...)?
838                    bit 0 and 1 (either or both of them ?!?!) are used to
839                    not clear tile (or maybe one of the bits indicates if the tile is
840                    compressed or not), bit 2 and 3 to not clear tile 1,...,.
841                    Pattern is as follows:
842                         | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
843                    bits -------------------------------------------------
844                         | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
845                    rv100: clearmask covers 2x8 4x1 tiles, but one clear still
846                    covers 256 pixels ?!?
847                 */
848                         clearmask = 0x0;
849                 }
850
851                 BEGIN_RING( 8 );
852                 RADEON_WAIT_UNTIL_2D_IDLE();
853                 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
854                         tempRB3D_DEPTHCLEARVALUE);
855                 /* what offset is this exactly ? */
856                 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
857                 /* need ctlstat, otherwise get some strange black flickering */
858                 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
859                 ADVANCE_RING();
860
861                 for (i = 0; i < nbox; i++) {
862                         int tileoffset, nrtilesx, nrtilesy, j;
863                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
864                         if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
865                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
866                                    maybe r200 actually doesn't need to put the low-res z value into
867                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
868                                    Works for R100, both with hierz and without.
869                                    R100 seems to operate on 2x1 8x8 tiles, but...
870                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
871                                    problematic with resolutions which are not 64 pix aligned? */
872                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
873                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
874                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
875                                 for (j = 0; j <= nrtilesy; j++) {
876                                         BEGIN_RING( 4 );
877                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
878                                         /* first tile */
879                                         OUT_RING( tileoffset * 8 );
880                                         /* the number of tiles to clear */
881                                         OUT_RING( nrtilesx + 4 );
882                                         /* clear mask : chooses the clearing pattern. */
883                                         OUT_RING( clearmask );
884                                         ADVANCE_RING();
885                                         tileoffset += depthpixperline >> 6;
886                                 }
887                         }
888                         else if (dev_priv->microcode_version==UCODE_R200) {
889                                 /* works for rv250. */
890                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
891                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
892                                 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
893                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
894                                 for (j = 0; j <= nrtilesy; j++) {
895                                         BEGIN_RING( 4 );
896                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
897                                         /* first tile */
898                                         /* judging by the first tile offset needed, could possibly
899                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
900                                            macro tiles, though would still need clear mask for
901                                            right/bottom if truely 4x4 granularity is desired ? */
902                                         OUT_RING( tileoffset * 16 );
903                                         /* the number of tiles to clear */
904                                         OUT_RING( nrtilesx + 1 );
905                                         /* clear mask : chooses the clearing pattern. */
906                                         OUT_RING( clearmask );
907                                         ADVANCE_RING();
908                                         tileoffset += depthpixperline >> 5;
909                                 }
910                         }
911                         else { /* rv 100 */
912                                 /* rv100 might not need 64 pix alignment, who knows */
913                                 /* offsets are, hmm, weird */
914                                 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
915                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
916                                 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
917                                 for (j = 0; j <= nrtilesy; j++) {
918                                         BEGIN_RING( 4 );
919                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
920                                         OUT_RING( tileoffset * 128 );
921                                         /* the number of tiles to clear */
922                                         OUT_RING( nrtilesx + 4 );
923                                         /* clear mask : chooses the clearing pattern. */
924                                         OUT_RING( clearmask );
925                                         ADVANCE_RING();
926                                         tileoffset += depthpixperline >> 6;
927                                 }
928                         }
929                 }
930
931                 /* TODO don't always clear all hi-level z tiles */
932                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
933                         && (flags & RADEON_USE_HIERZ))
934                 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
935                 /* FIXME : the mask supposedly contains low-res z values. So can't set
936                    just to the max (0xff? or actually 0x3fff?), need to take z clear
937                    value into account? */
938                 {
939                         BEGIN_RING( 4 );
940                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
941                         OUT_RING( 0x0 ); /* First tile */
942                         OUT_RING( 0x3cc0 );
943                         OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
944                         ADVANCE_RING();
945                 }
946         }
947
948         /* We have to clear the depth and/or stencil buffers by
949          * rendering a quad into just those buffers.  Thus, we have to
950          * make sure the 3D engine is configured correctly.
951          */
952         else if ((dev_priv->microcode_version == UCODE_R200) &&
953                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
954
955                 int tempPP_CNTL;
956                 int tempRE_CNTL;
957                 int tempRB3D_CNTL;
958                 int tempRB3D_ZSTENCILCNTL;
959                 int tempRB3D_STENCILREFMASK;
960                 int tempRB3D_PLANEMASK;
961                 int tempSE_CNTL;
962                 int tempSE_VTE_CNTL;
963                 int tempSE_VTX_FMT_0;
964                 int tempSE_VTX_FMT_1;
965                 int tempSE_VAP_CNTL;
966                 int tempRE_AUX_SCISSOR_CNTL;
967
968                 tempPP_CNTL = 0;
969                 tempRE_CNTL = 0;
970
971                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
972
973                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
974                 tempRB3D_STENCILREFMASK = 0x0;
975
976                 tempSE_CNTL = depth_clear->se_cntl;
977
978                 /* Disable TCL */
979
980                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
981                                           (0x9 <<
982                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
983
984                 tempRB3D_PLANEMASK = 0x0;
985
986                 tempRE_AUX_SCISSOR_CNTL = 0x0;
987
988                 tempSE_VTE_CNTL =
989                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
990
991                 /* Vertex format (X, Y, Z, W) */
992                 tempSE_VTX_FMT_0 =
993                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
994                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
995                 tempSE_VTX_FMT_1 = 0x0;
996
997                 /*
998                  * Depth buffer specific enables
999                  */
1000                 if (flags & RADEON_DEPTH) {
1001                         /* Enable depth buffer */
1002                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1003                 } else {
1004                         /* Disable depth buffer */
1005                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1006                 }
1007
1008                 /*
1009                  * Stencil buffer specific enables
1010                  */
1011                 if (flags & RADEON_STENCIL) {
1012                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1013                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1014                 } else {
1015                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1016                         tempRB3D_STENCILREFMASK = 0x00000000;
1017                 }
1018
1019                 if (flags & RADEON_USE_COMP_ZBUF) {
1020                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1021                                 RADEON_Z_DECOMPRESSION_ENABLE;
1022                 }
1023                 if (flags & RADEON_USE_HIERZ) {
1024                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1025                 }
1026
1027                 BEGIN_RING(26);
1028                 RADEON_WAIT_UNTIL_2D_IDLE();
1029
1030                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1031                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1032                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1033                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1034                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1035                              tempRB3D_STENCILREFMASK);
1036                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1037                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1038                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1039                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1040                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1041                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1042                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1043                 ADVANCE_RING();
1044
1045                 /* Make sure we restore the 3D state next time.
1046                  */
1047                 dev_priv->sarea_priv->ctx_owner = 0;
1048
1049                 for (i = 0; i < nbox; i++) {
1050
1051                         /* Funny that this should be required --
1052                          *  sets top-left?
1053                          */
1054                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1055
1056                         BEGIN_RING(14);
1057                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1058                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1059                                   RADEON_PRIM_WALK_RING |
1060                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1061                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1062                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1063                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1064                         OUT_RING(0x3f800000);
1065                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1066                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1067                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1068                         OUT_RING(0x3f800000);
1069                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1070                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1071                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1072                         OUT_RING(0x3f800000);
1073                         ADVANCE_RING();
1074                 }
1075         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1076
1077                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1078                 
1079                 rb3d_cntl = depth_clear->rb3d_cntl;
1080
1081                 if (flags & RADEON_DEPTH) {
1082                         rb3d_cntl |= RADEON_Z_ENABLE;
1083                 } else {
1084                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1085                 }
1086
1087                 if (flags & RADEON_STENCIL) {
1088                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1089                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1090                 } else {
1091                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1092                         rb3d_stencilrefmask = 0x00000000;
1093                 }
1094
1095                 if (flags & RADEON_USE_COMP_ZBUF) {
1096                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1097                                 RADEON_Z_DECOMPRESSION_ENABLE;
1098                 }
1099                 if (flags & RADEON_USE_HIERZ) {
1100                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1101                 }
1102
1103                 BEGIN_RING(13);
1104                 RADEON_WAIT_UNTIL_2D_IDLE();
1105
1106                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1107                 OUT_RING(0x00000000);
1108                 OUT_RING(rb3d_cntl);
1109
1110                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1111                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1112                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1113                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1114                 ADVANCE_RING();
1115
1116                 /* Make sure we restore the 3D state next time.
1117                  */
1118                 dev_priv->sarea_priv->ctx_owner = 0;
1119
1120                 for (i = 0; i < nbox; i++) {
1121
1122                         /* Funny that this should be required --
1123                          *  sets top-left?
1124                          */
1125                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1126
1127                         BEGIN_RING(15);
1128
1129                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1130                         OUT_RING(RADEON_VTX_Z_PRESENT |
1131                                  RADEON_VTX_PKCOLOR_PRESENT);
1132                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1133                                   RADEON_PRIM_WALK_RING |
1134                                   RADEON_MAOS_ENABLE |
1135                                   RADEON_VTX_FMT_RADEON_MODE |
1136                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1137
1138                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1139                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1140                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1141                         OUT_RING(0x0);
1142
1143                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1144                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1145                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1146                         OUT_RING(0x0);
1147
1148                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1149                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1150                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1151                         OUT_RING(0x0);
1152
1153                         ADVANCE_RING();
1154                 }
1155         }
1156
1157         /* Increment the clear counter.  The client-side 3D driver must
1158          * wait on this value before performing the clear ioctl.  We
1159          * need this because the card's so damned fast...
1160          */
1161         dev_priv->sarea_priv->last_clear++;
1162
1163         BEGIN_RING(4);
1164
1165         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1166         RADEON_WAIT_UNTIL_IDLE();
1167
1168         ADVANCE_RING();
1169 }
1170
1171 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1172 {
1173         drm_radeon_private_t *dev_priv = dev->dev_private;
1174         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1175         int nbox = sarea_priv->nbox;
1176         drm_clip_rect_t *pbox = sarea_priv->boxes;
1177         int i;
1178         RING_LOCALS;
1179         DRM_DEBUG("\n");
1180
1181         /* Do some trivial performance monitoring...
1182          */
1183         if (dev_priv->do_boxes)
1184                 radeon_cp_performance_boxes(dev_priv);
1185
1186         /* Wait for the 3D stream to idle before dispatching the bitblt.
1187          * This will prevent data corruption between the two streams.
1188          */
1189         BEGIN_RING(2);
1190
1191         RADEON_WAIT_UNTIL_3D_IDLE();
1192
1193         ADVANCE_RING();
1194
1195         for (i = 0; i < nbox; i++) {
1196                 int x = pbox[i].x1;
1197                 int y = pbox[i].y1;
1198                 int w = pbox[i].x2 - x;
1199                 int h = pbox[i].y2 - y;
1200
1201                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1202
1203                 BEGIN_RING(7);
1204
1205                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1206                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1207                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1208                          RADEON_GMC_BRUSH_NONE |
1209                          (dev_priv->color_fmt << 8) |
1210                          RADEON_GMC_SRC_DATATYPE_COLOR |
1211                          RADEON_ROP3_S |
1212                          RADEON_DP_SRC_SOURCE_MEMORY |
1213                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1214
1215                 /* Make this work even if front & back are flipped:
1216                  */
1217                 if (dev_priv->current_page == 0) {
1218                         OUT_RING(dev_priv->back_pitch_offset);
1219                         OUT_RING(dev_priv->front_pitch_offset);
1220                 } else {
1221                         OUT_RING(dev_priv->front_pitch_offset);
1222                         OUT_RING(dev_priv->back_pitch_offset);
1223                 }
1224
1225                 OUT_RING((x << 16) | y);
1226                 OUT_RING((x << 16) | y);
1227                 OUT_RING((w << 16) | h);
1228
1229                 ADVANCE_RING();
1230         }
1231
1232         /* Increment the frame counter.  The client-side 3D driver must
1233          * throttle the framerate by waiting for this value before
1234          * performing the swapbuffer ioctl.
1235          */
1236         dev_priv->sarea_priv->last_frame++;
1237
1238         BEGIN_RING(4);
1239
1240         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1241         RADEON_WAIT_UNTIL_2D_IDLE();
1242
1243         ADVANCE_RING();
1244 }
1245
1246 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1247 {
1248         drm_radeon_private_t *dev_priv = dev->dev_private;
1249         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1250         int offset = (dev_priv->current_page == 1)
1251             ? dev_priv->front_offset : dev_priv->back_offset;
1252         RING_LOCALS;
1253         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1254                   __FUNCTION__,
1255                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1256
1257         /* Do some trivial performance monitoring...
1258          */
1259         if (dev_priv->do_boxes) {
1260                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1261                 radeon_cp_performance_boxes(dev_priv);
1262         }
1263
1264         /* Update the frame offsets for both CRTCs
1265          */
1266         BEGIN_RING(6);
1267
1268         RADEON_WAIT_UNTIL_3D_IDLE();
1269         OUT_RING_REG(RADEON_CRTC_OFFSET,
1270                      ((sarea->frame.y * dev_priv->front_pitch +
1271                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1272                      + offset);
1273         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1274                      + offset);
1275
1276         ADVANCE_RING();
1277
1278         /* Increment the frame counter.  The client-side 3D driver must
1279          * throttle the framerate by waiting for this value before
1280          * performing the swapbuffer ioctl.
1281          */
1282         dev_priv->sarea_priv->last_frame++;
1283         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1284             1 - dev_priv->current_page;
1285
1286         BEGIN_RING(2);
1287
1288         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1289
1290         ADVANCE_RING();
1291 }
1292
1293 static int bad_prim_vertex_nr(int primitive, int nr)
1294 {
1295         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1296         case RADEON_PRIM_TYPE_NONE:
1297         case RADEON_PRIM_TYPE_POINT:
1298                 return nr < 1;
1299         case RADEON_PRIM_TYPE_LINE:
1300                 return (nr & 1) || nr == 0;
1301         case RADEON_PRIM_TYPE_LINE_STRIP:
1302                 return nr < 2;
1303         case RADEON_PRIM_TYPE_TRI_LIST:
1304         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1305         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1306         case RADEON_PRIM_TYPE_RECT_LIST:
1307                 return nr % 3 || nr == 0;
1308         case RADEON_PRIM_TYPE_TRI_FAN:
1309         case RADEON_PRIM_TYPE_TRI_STRIP:
1310                 return nr < 3;
1311         default:
1312                 return 1;
1313         }
1314 }
1315
1316 typedef struct {
1317         unsigned int start;
1318         unsigned int finish;
1319         unsigned int prim;
1320         unsigned int numverts;
1321         unsigned int offset;
1322         unsigned int vc_format;
1323 } drm_radeon_tcl_prim_t;
1324
1325 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1326                                       drm_buf_t * buf,
1327                                       drm_radeon_tcl_prim_t * prim)
1328 {
1329         drm_radeon_private_t *dev_priv = dev->dev_private;
1330         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1331         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1332         int numverts = (int)prim->numverts;
1333         int nbox = sarea_priv->nbox;
1334         int i = 0;
1335         RING_LOCALS;
1336
1337         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1338                   prim->prim,
1339                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1340
1341         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1342                 DRM_ERROR("bad prim %x numverts %d\n",
1343                           prim->prim, prim->numverts);
1344                 return;
1345         }
1346
1347         do {
1348                 /* Emit the next cliprect */
1349                 if (i < nbox) {
1350                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1351                 }
1352
1353                 /* Emit the vertex buffer rendering commands */
1354                 BEGIN_RING(5);
1355
1356                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1357                 OUT_RING(offset);
1358                 OUT_RING(numverts);
1359                 OUT_RING(prim->vc_format);
1360                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1361                          RADEON_COLOR_ORDER_RGBA |
1362                          RADEON_VTX_FMT_RADEON_MODE |
1363                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1364
1365                 ADVANCE_RING();
1366
1367                 i++;
1368         } while (i < nbox);
1369 }
1370
1371 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1372 {
1373         drm_radeon_private_t *dev_priv = dev->dev_private;
1374         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1375         RING_LOCALS;
1376
1377         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1378
1379         /* Emit the vertex buffer age */
1380         BEGIN_RING(2);
1381         RADEON_DISPATCH_AGE(buf_priv->age);
1382         ADVANCE_RING();
1383
1384         buf->pending = 1;
1385         buf->used = 0;
1386 }
1387
1388 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1389                                         drm_buf_t * buf, int start, int end)
1390 {
1391         drm_radeon_private_t *dev_priv = dev->dev_private;
1392         RING_LOCALS;
1393         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1394
1395         if (start != end) {
1396                 int offset = (dev_priv->gart_buffers_offset
1397                               + buf->offset + start);
1398                 int dwords = (end - start + 3) / sizeof(u32);
1399
1400                 /* Indirect buffer data must be an even number of
1401                  * dwords, so if we've been given an odd number we must
1402                  * pad the data with a Type-2 CP packet.
1403                  */
1404                 if (dwords & 1) {
1405                         u32 *data = (u32 *)
1406                             ((char *)dev->agp_buffer_map->handle
1407                              + buf->offset + start);
1408                         data[dwords++] = RADEON_CP_PACKET2;
1409                 }
1410
1411                 /* Fire off the indirect buffer */
1412                 BEGIN_RING(3);
1413
1414                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1415                 OUT_RING(offset);
1416                 OUT_RING(dwords);
1417
1418                 ADVANCE_RING();
1419         }
1420 }
1421
1422 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1423                                        drm_buf_t * elt_buf,
1424                                        drm_radeon_tcl_prim_t * prim)
1425 {
1426         drm_radeon_private_t *dev_priv = dev->dev_private;
1427         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1428         int offset = dev_priv->gart_buffers_offset + prim->offset;
1429         u32 *data;
1430         int dwords;
1431         int i = 0;
1432         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1433         int count = (prim->finish - start) / sizeof(u16);
1434         int nbox = sarea_priv->nbox;
1435
1436         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1437                   prim->prim,
1438                   prim->vc_format,
1439                   prim->start, prim->finish, prim->offset, prim->numverts);
1440
1441         if (bad_prim_vertex_nr(prim->prim, count)) {
1442                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1443                 return;
1444         }
1445
1446         if (start >= prim->finish || (prim->start & 0x7)) {
1447                 DRM_ERROR("buffer prim %d\n", prim->prim);
1448                 return;
1449         }
1450
1451         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1452
1453         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1454                         elt_buf->offset + prim->start);
1455
1456         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1457         data[1] = offset;
1458         data[2] = prim->numverts;
1459         data[3] = prim->vc_format;
1460         data[4] = (prim->prim |
1461                    RADEON_PRIM_WALK_IND |
1462                    RADEON_COLOR_ORDER_RGBA |
1463                    RADEON_VTX_FMT_RADEON_MODE |
1464                    (count << RADEON_NUM_VERTICES_SHIFT));
1465
1466         do {
1467                 if (i < nbox)
1468                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1469
1470                 radeon_cp_dispatch_indirect(dev, elt_buf,
1471                                             prim->start, prim->finish);
1472
1473                 i++;
1474         } while (i < nbox);
1475
1476 }
1477
1478 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1479
1480 static int radeon_cp_dispatch_texture(DRMFILE filp,
1481                                       drm_device_t * dev,
1482                                       drm_radeon_texture_t * tex,
1483                                       drm_radeon_tex_image_t * image)
1484 {
1485         drm_radeon_private_t *dev_priv = dev->dev_private;
1486         drm_file_t *filp_priv;
1487         drm_buf_t *buf;
1488         u32 format;
1489         u32 *buffer;
1490         const u8 __user *data;
1491         int size, dwords, tex_width, blit_width, spitch;
1492         u32 height;
1493         int i;
1494         u32 texpitch, microtile;
1495         u32 offset;
1496         RING_LOCALS;
1497
1498         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1499
1500         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1501                 DRM_ERROR("Invalid destination offset\n");
1502                 return DRM_ERR(EINVAL);
1503         }
1504
1505         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1506
1507         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1508          * up with the texture data from the host data blit, otherwise
1509          * part of the texture image may be corrupted.
1510          */
1511         BEGIN_RING(4);
1512         RADEON_FLUSH_CACHE();
1513         RADEON_WAIT_UNTIL_IDLE();
1514         ADVANCE_RING();
1515
1516         /* The compiler won't optimize away a division by a variable,
1517          * even if the only legal values are powers of two.  Thus, we'll
1518          * use a shift instead.
1519          */
1520         switch (tex->format) {
1521         case RADEON_TXFORMAT_ARGB8888:
1522         case RADEON_TXFORMAT_RGBA8888:
1523                 format = RADEON_COLOR_FORMAT_ARGB8888;
1524                 tex_width = tex->width * 4;
1525                 blit_width = image->width * 4;
1526                 break;
1527         case RADEON_TXFORMAT_AI88:
1528         case RADEON_TXFORMAT_ARGB1555:
1529         case RADEON_TXFORMAT_RGB565:
1530         case RADEON_TXFORMAT_ARGB4444:
1531         case RADEON_TXFORMAT_VYUY422:
1532         case RADEON_TXFORMAT_YVYU422:
1533                 format = RADEON_COLOR_FORMAT_RGB565;
1534                 tex_width = tex->width * 2;
1535                 blit_width = image->width * 2;
1536                 break;
1537         case RADEON_TXFORMAT_I8:
1538         case RADEON_TXFORMAT_RGB332:
1539                 format = RADEON_COLOR_FORMAT_CI8;
1540                 tex_width = tex->width * 1;
1541                 blit_width = image->width * 1;
1542                 break;
1543         default:
1544                 DRM_ERROR("invalid texture format %d\n", tex->format);
1545                 return DRM_ERR(EINVAL);
1546         }
1547         spitch = blit_width >> 6;
1548         if (spitch == 0 && image->height > 1)
1549                 return DRM_ERR(EINVAL);
1550
1551         texpitch = tex->pitch;
1552         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1553                 microtile = 1;
1554                 if (tex_width < 64) {
1555                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1556                         /* we got tiled coordinates, untile them */
1557                         image->x *= 2;
1558                 }
1559         }
1560         else microtile = 0;
1561
1562         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1563
1564         do {
1565                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1566                           tex->offset >> 10, tex->pitch, tex->format,
1567                           image->x, image->y, image->width, image->height);
1568
1569                 /* Make a copy of some parameters in case we have to
1570                  * update them for a multi-pass texture blit.
1571                  */
1572                 height = image->height;
1573                 data = (const u8 __user *)image->data;
1574
1575                 size = height * blit_width;
1576
1577                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1578                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1579                         size = height * blit_width;
1580                 } else if (size < 4 && size > 0) {
1581                         size = 4;
1582                 } else if (size == 0) {
1583                         return 0;
1584                 }
1585
1586                 buf = radeon_freelist_get(dev);
1587                 if (0 && !buf) {
1588                         radeon_do_cp_idle(dev_priv);
1589                         buf = radeon_freelist_get(dev);
1590                 }
1591                 if (!buf) {
1592                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1593                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1594                                 return DRM_ERR(EFAULT);
1595                         return DRM_ERR(EAGAIN);
1596                 }
1597
1598                 /* Dispatch the indirect buffer.
1599                  */
1600                 buffer =
1601                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1602                 dwords = size / 4;
1603
1604                 if (microtile) {
1605                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1606                            however, we cannot use blitter directly for texture width < 64 bytes,
1607                            since minimum tex pitch is 64 bytes and we need this to match
1608                            the texture width, otherwise the blitter will tile it wrong.
1609                            Thus, tiling manually in this case. Additionally, need to special
1610                            case tex height = 1, since our actual image will have height 2
1611                            and we need to ensure we don't read beyond the texture size
1612                            from user space. */
1613                         if (tex->height == 1) {
1614                                 if (tex_width >= 64 || tex_width <= 16) {
1615                                         if (DRM_COPY_FROM_USER(buffer, data,
1616                                                        tex_width * sizeof(u32))) {
1617                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1618                                                           tex_width);
1619                                                 return DRM_ERR(EFAULT);
1620                                         }
1621                                 } else if (tex_width == 32) {
1622                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1623                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1624                                                           tex_width);
1625                                                 return DRM_ERR(EFAULT);
1626                                         }
1627                                         if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1628                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1629                                                           tex_width);
1630                                                 return DRM_ERR(EFAULT);
1631                                         }
1632                                 }
1633                         } else if (tex_width >= 64 || tex_width == 16) {
1634                                 if (DRM_COPY_FROM_USER(buffer, data,
1635                                                        dwords * sizeof(u32))) {
1636                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1637                                                   dwords);
1638                                         return DRM_ERR(EFAULT);
1639                                 }
1640                         } else if (tex_width < 16) {
1641                                 for (i = 0; i < tex->height; i++) {
1642                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1643                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1644                                                           tex_width);
1645                                                 return DRM_ERR(EFAULT);
1646                                         }
1647                                         buffer += 4;
1648                                         data += tex_width;
1649                                 }
1650                         } else if (tex_width == 32) {
1651                         /* TODO: make sure this works when not fitting in one buffer
1652                                 (i.e. 32bytes x 2048...) */
1653                                 for (i = 0; i < tex->height; i += 2) {
1654                                         if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1655                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1656                                                           tex_width);
1657                                                 return DRM_ERR(EFAULT);
1658                                         }
1659                                         data += 16;
1660                                         if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1661                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1662                                                           tex_width);
1663                                                 return DRM_ERR(EFAULT);
1664                                         }
1665                                         data += 16;
1666                                         if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1667                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1668                                                           tex_width);
1669                                                 return DRM_ERR(EFAULT);
1670                                         }
1671                                         data += 16;
1672                                         if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1673                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1674                                                           tex_width);
1675                                                 return DRM_ERR(EFAULT);
1676                                         }
1677                                         data += 16;
1678                                         buffer += 16;
1679                                 }
1680                         }
1681                 }
1682                 else {
1683                         if (tex_width >= 32) {
1684                                 /* Texture image width is larger than the minimum, so we
1685                                  * can upload it directly.
1686                                  */
1687                                 if (DRM_COPY_FROM_USER(buffer, data,
1688                                                        dwords * sizeof(u32))) {
1689                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1690                                                   dwords);
1691                                         return DRM_ERR(EFAULT);
1692                                 }
1693                         } else {
1694                                 /* Texture image width is less than the minimum, so we
1695                                  * need to pad out each image scanline to the minimum
1696                                  * width.
1697                                  */
1698                                 for (i = 0; i < tex->height; i++) {
1699                                         if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1700                                                 DRM_ERROR("EFAULT on pad, %d bytes\n",
1701                                                           tex_width);
1702                                                 return DRM_ERR(EFAULT);
1703                                         }
1704                                         buffer += 8;
1705                                         data += tex_width;
1706                                 }
1707                         }
1708                 }
1709
1710                 buf->filp = filp;
1711                 buf->used = size;
1712                 offset = dev_priv->gart_buffers_offset + buf->offset;
1713                 BEGIN_RING(9);
1714                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1715                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1716                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1717                          RADEON_GMC_BRUSH_NONE |
1718                          (format << 8) |
1719                          RADEON_GMC_SRC_DATATYPE_COLOR |
1720                          RADEON_ROP3_S |
1721                          RADEON_DP_SRC_SOURCE_MEMORY |
1722                          RADEON_GMC_CLR_CMP_CNTL_DIS |
1723                          RADEON_GMC_WR_MSK_DIS );
1724                 OUT_RING((spitch << 22) | (offset >> 10));
1725                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1726                 OUT_RING(0);
1727                 OUT_RING((image->x << 16) | image->y);
1728                 OUT_RING((image->width << 16) | height);
1729                 RADEON_WAIT_UNTIL_2D_IDLE();
1730                 ADVANCE_RING();
1731
1732                 radeon_cp_discard_buffer(dev, buf);
1733
1734                 /* Update the input parameters for next time */
1735                 image->y += height;
1736                 image->height -= height;
1737                 image->data = (const u8 __user *)image->data + size;
1738         } while (image->height > 0);
1739
1740         /* Flush the pixel cache after the blit completes.  This ensures
1741          * the texture data is written out to memory before rendering
1742          * continues.
1743          */
1744         BEGIN_RING(4);
1745         RADEON_FLUSH_CACHE();
1746         RADEON_WAIT_UNTIL_2D_IDLE();
1747         ADVANCE_RING();
1748         return 0;
1749 }
1750
1751 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1752 {
1753         drm_radeon_private_t *dev_priv = dev->dev_private;
1754         int i;
1755         RING_LOCALS;
1756         DRM_DEBUG("\n");
1757
1758         BEGIN_RING(35);
1759
1760         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1761         OUT_RING(0x00000000);
1762
1763         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1764         for (i = 0; i < 32; i++) {
1765                 OUT_RING(stipple[i]);
1766         }
1767
1768         ADVANCE_RING();
1769 }
1770
1771 static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1772 {
1773         if (!dev_priv->mmio)
1774                 return;
1775
1776         radeon_do_cp_idle(dev_priv);
1777
1778         RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1779                 dev_priv->surfaces[surf_index].flags);
1780         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1781                 dev_priv->surfaces[surf_index].lower);
1782         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1783                 dev_priv->surfaces[surf_index].upper);
1784 }
1785
1786 /* Allocates a virtual surface
1787  * doesn't always allocate a real surface, will stretch an existing 
1788  * surface when possible.
1789  *
1790  * Note that refcount can be at most 2, since during a free refcount=3
1791  * might mean we have to allocate a new surface which might not always
1792  * be available.
1793  * For example : we allocate three contigous surfaces ABC. If B is 
1794  * freed, we suddenly need two surfaces to store A and C, which might
1795  * not always be available.
1796  */
1797 static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1798 {
1799         struct radeon_virt_surface *s;
1800         int i;
1801         int virt_surface_index;
1802         uint32_t new_upper, new_lower;
1803
1804         new_lower = new->address;
1805         new_upper = new_lower + new->size - 1;
1806
1807         /* sanity check */
1808         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1809                 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1810                 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1811                 return -1;
1812
1813         /* make sure there is no overlap with existing surfaces */
1814         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1815                 if ((dev_priv->surfaces[i].refcount != 0) &&
1816                 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1817                         (new_lower < dev_priv->surfaces[i].upper) ) ||
1818                  ( (new_lower < dev_priv->surfaces[i].lower) &&
1819                         (new_upper > dev_priv->surfaces[i].lower) )) ){
1820                 return -1;}
1821         }
1822
1823         /* find a virtual surface */
1824         for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1825                 if (dev_priv->virt_surfaces[i].filp == 0)
1826                         break;
1827         if (i == 2*RADEON_MAX_SURFACES) {
1828                 return -1;}
1829         virt_surface_index = i;
1830
1831         /* try to reuse an existing surface */
1832         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1833                 /* extend before */
1834                 if ((dev_priv->surfaces[i].refcount == 1) &&
1835                   (new->flags == dev_priv->surfaces[i].flags) &&
1836                   (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1837                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1838                         s->surface_index = i;
1839                         s->lower = new_lower;
1840                         s->upper = new_upper;
1841                         s->flags = new->flags;
1842                         s->filp = filp;
1843                         dev_priv->surfaces[i].refcount++;
1844                         dev_priv->surfaces[i].lower = s->lower;
1845                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1846                         return virt_surface_index;
1847                 }
1848
1849                 /* extend after */
1850                 if ((dev_priv->surfaces[i].refcount == 1) &&
1851                   (new->flags == dev_priv->surfaces[i].flags) &&
1852                   (new_lower == dev_priv->surfaces[i].upper + 1)) {
1853                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1854                         s->surface_index = i;
1855                         s->lower = new_lower;
1856                         s->upper = new_upper;
1857                         s->flags = new->flags;
1858                         s->filp = filp;
1859                         dev_priv->surfaces[i].refcount++;
1860                         dev_priv->surfaces[i].upper = s->upper;
1861                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1862                         return virt_surface_index;
1863                 }
1864         }
1865
1866         /* okay, we need a new one */
1867         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1868                 if (dev_priv->surfaces[i].refcount == 0) {
1869                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1870                         s->surface_index = i;
1871                         s->lower = new_lower;
1872                         s->upper = new_upper;
1873                         s->flags = new->flags;
1874                         s->filp = filp;
1875                         dev_priv->surfaces[i].refcount = 1;
1876                         dev_priv->surfaces[i].lower = s->lower;
1877                         dev_priv->surfaces[i].upper = s->upper;
1878                         dev_priv->surfaces[i].flags = s->flags;
1879                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1880                         return virt_surface_index;
1881                 }
1882         }
1883
1884         /* we didn't find anything */
1885         return -1;
1886 }
1887
1888 static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1889 {
1890         struct radeon_virt_surface *s;
1891         int i;
1892         /* find the virtual surface */
1893         for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1894                 s = &(dev_priv->virt_surfaces[i]);
1895                 if (s->filp) {
1896                         if ((lower == s->lower) && (filp == s->filp)) {
1897                                 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1898                                         dev_priv->surfaces[s->surface_index].lower = s->upper;
1899
1900                                 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1901                                         dev_priv->surfaces[s->surface_index].upper = s->lower;
1902
1903                                 dev_priv->surfaces[s->surface_index].refcount--;
1904                                 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1905                                         dev_priv->surfaces[s->surface_index].flags = 0;
1906                                 s->filp = 0;
1907                                 radeon_apply_surface_regs(s->surface_index, dev_priv);
1908                                 return 0;
1909                         }
1910                 }
1911         }
1912         return 1;
1913 }
1914
1915 static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1916 {
1917         int i;
1918         for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1919         {
1920                 if (dev_priv->virt_surfaces[i].filp == filp)
1921                         free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1922         }
1923 }
1924
1925 /* ================================================================
1926  * IOCTL functions
1927  */
1928
1929 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1930 {
1931         DRM_DEVICE;
1932         drm_radeon_private_t *dev_priv = dev->dev_private;
1933         drm_radeon_surface_alloc_t alloc;
1934
1935         if (!dev_priv) {
1936                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1937                 return DRM_ERR(EINVAL);
1938         }
1939
1940         DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1941                                   sizeof(alloc));
1942
1943         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1944                 return DRM_ERR(EINVAL);
1945         else
1946                 return 0;
1947 }
1948
1949 static int radeon_surface_free(DRM_IOCTL_ARGS)
1950 {
1951         DRM_DEVICE;
1952         drm_radeon_private_t *dev_priv = dev->dev_private;
1953         drm_radeon_surface_free_t memfree;
1954
1955         if (!dev_priv) {
1956                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1957                 return DRM_ERR(EINVAL);
1958         }
1959
1960         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *)data,
1961                                   sizeof(memfree) );
1962
1963         if (free_surface(filp, dev_priv, memfree.address))
1964                 return DRM_ERR(EINVAL);
1965         else
1966                 return 0;
1967 }
1968
1969 static int radeon_cp_clear(DRM_IOCTL_ARGS)
1970 {
1971         DRM_DEVICE;
1972         drm_radeon_private_t *dev_priv = dev->dev_private;
1973         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1974         drm_radeon_clear_t clear;
1975         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1976         DRM_DEBUG("\n");
1977
1978         LOCK_TEST_WITH_RETURN(dev, filp);
1979
1980         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
1981                                  sizeof(clear));
1982
1983         RING_SPACE_TEST_WITH_RETURN(dev_priv);
1984
1985         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
1986                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1987
1988         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
1989                                sarea_priv->nbox * sizeof(depth_boxes[0])))
1990                 return DRM_ERR(EFAULT);
1991
1992         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
1993
1994         COMMIT_RING();
1995         return 0;
1996 }
1997
1998 /* Not sure why this isn't set all the time:
1999  */
2000 static int radeon_do_init_pageflip(drm_device_t * dev)
2001 {
2002         drm_radeon_private_t *dev_priv = dev->dev_private;
2003         RING_LOCALS;
2004
2005         DRM_DEBUG("\n");
2006
2007         BEGIN_RING(6);
2008         RADEON_WAIT_UNTIL_3D_IDLE();
2009         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2010         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2011                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2012         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2013         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2014                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2015         ADVANCE_RING();
2016
2017         dev_priv->page_flipping = 1;
2018         dev_priv->current_page = 0;
2019         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2020
2021         return 0;
2022 }
2023
2024 /* Called whenever a client dies, from drm_release.
2025  * NOTE:  Lock isn't necessarily held when this is called!
2026  */
2027 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2028 {
2029         drm_radeon_private_t *dev_priv = dev->dev_private;
2030         DRM_DEBUG("\n");
2031
2032         if (dev_priv->current_page != 0)
2033                 radeon_cp_dispatch_flip(dev);
2034
2035         dev_priv->page_flipping = 0;
2036         return 0;
2037 }
2038
2039 /* Swapping and flipping are different operations, need different ioctls.
2040  * They can & should be intermixed to support multiple 3d windows.
2041  */
2042 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2043 {
2044         DRM_DEVICE;
2045         drm_radeon_private_t *dev_priv = dev->dev_private;
2046         DRM_DEBUG("\n");
2047
2048         LOCK_TEST_WITH_RETURN(dev, filp);
2049
2050         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2051
2052         if (!dev_priv->page_flipping)
2053                 radeon_do_init_pageflip(dev);
2054
2055         radeon_cp_dispatch_flip(dev);
2056
2057         COMMIT_RING();
2058         return 0;
2059 }
2060
2061 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2062 {
2063         DRM_DEVICE;
2064         drm_radeon_private_t *dev_priv = dev->dev_private;
2065         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2066         DRM_DEBUG("\n");
2067
2068         LOCK_TEST_WITH_RETURN(dev, filp);
2069
2070         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2071
2072         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2073                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2074
2075         radeon_cp_dispatch_swap(dev);
2076         dev_priv->sarea_priv->ctx_owner = 0;
2077
2078         COMMIT_RING();
2079         return 0;
2080 }
2081
2082 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2083 {
2084         DRM_DEVICE;
2085         drm_radeon_private_t *dev_priv = dev->dev_private;
2086         drm_file_t *filp_priv;
2087         drm_radeon_sarea_t *sarea_priv;
2088         drm_device_dma_t *dma = dev->dma;
2089         drm_buf_t *buf;
2090         drm_radeon_vertex_t vertex;
2091         drm_radeon_tcl_prim_t prim;
2092
2093         LOCK_TEST_WITH_RETURN(dev, filp);
2094
2095         if (!dev_priv) {
2096                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2097                 return DRM_ERR(EINVAL);
2098         }
2099         sarea_priv = dev_priv->sarea_priv;
2100
2101         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2102
2103         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2104                                  sizeof(vertex));
2105
2106         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2107                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2108
2109         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2110                 DRM_ERROR("buffer index %d (of %d max)\n",
2111                           vertex.idx, dma->buf_count - 1);
2112                 return DRM_ERR(EINVAL);
2113         }
2114         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2115                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2116                 return DRM_ERR(EINVAL);
2117         }
2118
2119         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2120         VB_AGE_TEST_WITH_RETURN(dev_priv);
2121
2122         buf = dma->buflist[vertex.idx];
2123
2124         if (buf->filp != filp) {
2125                 DRM_ERROR("process %d using buffer owned by %p\n",
2126                           DRM_CURRENTPID, buf->filp);
2127                 return DRM_ERR(EINVAL);
2128         }
2129         if (buf->pending) {
2130                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2131                 return DRM_ERR(EINVAL);
2132         }
2133
2134         /* Build up a prim_t record:
2135          */
2136         if (vertex.count) {
2137                 buf->used = vertex.count;       /* not used? */
2138
2139                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2140                         if (radeon_emit_state(dev_priv, filp_priv,
2141                                               &sarea_priv->context_state,
2142                                               sarea_priv->tex_state,
2143                                               sarea_priv->dirty)) {
2144                                 DRM_ERROR("radeon_emit_state failed\n");
2145                                 return DRM_ERR(EINVAL);
2146                         }
2147
2148                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2149                                                RADEON_UPLOAD_TEX1IMAGES |
2150                                                RADEON_UPLOAD_TEX2IMAGES |
2151                                                RADEON_REQUIRE_QUIESCENCE);
2152                 }
2153
2154                 prim.start = 0;
2155                 prim.finish = vertex.count;     /* unused */
2156                 prim.prim = vertex.prim;
2157                 prim.numverts = vertex.count;
2158                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2159
2160                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2161         }
2162
2163         if (vertex.discard) {
2164                 radeon_cp_discard_buffer(dev, buf);
2165         }
2166
2167         COMMIT_RING();
2168         return 0;
2169 }
2170
2171 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2172 {
2173         DRM_DEVICE;
2174         drm_radeon_private_t *dev_priv = dev->dev_private;
2175         drm_file_t *filp_priv;
2176         drm_radeon_sarea_t *sarea_priv;
2177         drm_device_dma_t *dma = dev->dma;
2178         drm_buf_t *buf;
2179         drm_radeon_indices_t elts;
2180         drm_radeon_tcl_prim_t prim;
2181         int count;
2182
2183         LOCK_TEST_WITH_RETURN(dev, filp);
2184
2185         if (!dev_priv) {
2186                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2187                 return DRM_ERR(EINVAL);
2188         }
2189         sarea_priv = dev_priv->sarea_priv;
2190
2191         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2192
2193         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2194                                  sizeof(elts));
2195
2196         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2197                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2198
2199         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2200                 DRM_ERROR("buffer index %d (of %d max)\n",
2201                           elts.idx, dma->buf_count - 1);
2202                 return DRM_ERR(EINVAL);
2203         }
2204         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2205                 DRM_ERROR("buffer prim %d\n", elts.prim);
2206                 return DRM_ERR(EINVAL);
2207         }
2208
2209         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2210         VB_AGE_TEST_WITH_RETURN(dev_priv);
2211
2212         buf = dma->buflist[elts.idx];
2213
2214         if (buf->filp != filp) {
2215                 DRM_ERROR("process %d using buffer owned by %p\n",
2216                           DRM_CURRENTPID, buf->filp);
2217                 return DRM_ERR(EINVAL);
2218         }
2219         if (buf->pending) {
2220                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2221                 return DRM_ERR(EINVAL);
2222         }
2223
2224         count = (elts.end - elts.start) / sizeof(u16);
2225         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2226
2227         if (elts.start & 0x7) {
2228                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2229                 return DRM_ERR(EINVAL);
2230         }
2231         if (elts.start < buf->used) {
2232                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2233                 return DRM_ERR(EINVAL);
2234         }
2235
2236         buf->used = elts.end;
2237
2238         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2239                 if (radeon_emit_state(dev_priv, filp_priv,
2240                                       &sarea_priv->context_state,
2241                                       sarea_priv->tex_state,
2242                                       sarea_priv->dirty)) {
2243                         DRM_ERROR("radeon_emit_state failed\n");
2244                         return DRM_ERR(EINVAL);
2245                 }
2246
2247                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2248                                        RADEON_UPLOAD_TEX1IMAGES |
2249                                        RADEON_UPLOAD_TEX2IMAGES |
2250                                        RADEON_REQUIRE_QUIESCENCE);
2251         }
2252
2253         /* Build up a prim_t record:
2254          */
2255         prim.start = elts.start;
2256         prim.finish = elts.end;
2257         prim.prim = elts.prim;
2258         prim.offset = 0;        /* offset from start of dma buffers */
2259         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2260         prim.vc_format = dev_priv->sarea_priv->vc_format;
2261
2262         radeon_cp_dispatch_indices(dev, buf, &prim);
2263         if (elts.discard) {
2264                 radeon_cp_discard_buffer(dev, buf);
2265         }
2266
2267         COMMIT_RING();
2268         return 0;
2269 }
2270
2271 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2272 {
2273         DRM_DEVICE;
2274         drm_radeon_private_t *dev_priv = dev->dev_private;
2275         drm_radeon_texture_t tex;
2276         drm_radeon_tex_image_t image;
2277         int ret;
2278
2279         LOCK_TEST_WITH_RETURN(dev, filp);
2280
2281         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2282                                  sizeof(tex));
2283
2284         if (tex.image == NULL) {
2285                 DRM_ERROR("null texture image!\n");
2286                 return DRM_ERR(EINVAL);
2287         }
2288
2289         if (DRM_COPY_FROM_USER(&image,
2290                                (drm_radeon_tex_image_t __user *) tex.image,
2291                                sizeof(image)))
2292                 return DRM_ERR(EFAULT);
2293
2294         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2295         VB_AGE_TEST_WITH_RETURN(dev_priv);
2296
2297         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2298
2299         COMMIT_RING();
2300         return ret;
2301 }
2302
2303 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2304 {
2305         DRM_DEVICE;
2306         drm_radeon_private_t *dev_priv = dev->dev_private;
2307         drm_radeon_stipple_t stipple;
2308         u32 mask[32];
2309
2310         LOCK_TEST_WITH_RETURN(dev, filp);
2311
2312         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2313                                  sizeof(stipple));
2314
2315         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2316                 return DRM_ERR(EFAULT);
2317
2318         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2319
2320         radeon_cp_dispatch_stipple(dev, mask);
2321
2322         COMMIT_RING();
2323         return 0;
2324 }
2325
2326 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2327 {
2328         DRM_DEVICE;
2329         drm_radeon_private_t *dev_priv = dev->dev_private;
2330         drm_device_dma_t *dma = dev->dma;
2331         drm_buf_t *buf;
2332         drm_radeon_indirect_t indirect;
2333         RING_LOCALS;
2334
2335         LOCK_TEST_WITH_RETURN(dev, filp);
2336
2337         if (!dev_priv) {
2338                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2339                 return DRM_ERR(EINVAL);
2340         }
2341
2342         DRM_COPY_FROM_USER_IOCTL(indirect,
2343                                  (drm_radeon_indirect_t __user *) data,
2344                                  sizeof(indirect));
2345
2346         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2347                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2348
2349         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2350                 DRM_ERROR("buffer index %d (of %d max)\n",
2351                           indirect.idx, dma->buf_count - 1);
2352                 return DRM_ERR(EINVAL);
2353         }
2354
2355         buf = dma->buflist[indirect.idx];
2356
2357         if (buf->filp != filp) {
2358                 DRM_ERROR("process %d using buffer owned by %p\n",
2359                           DRM_CURRENTPID, buf->filp);
2360                 return DRM_ERR(EINVAL);
2361         }
2362         if (buf->pending) {
2363                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2364                 return DRM_ERR(EINVAL);
2365         }
2366
2367         if (indirect.start < buf->used) {
2368                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2369                           indirect.start, buf->used);
2370                 return DRM_ERR(EINVAL);
2371         }
2372
2373         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2374         VB_AGE_TEST_WITH_RETURN(dev_priv);
2375
2376         buf->used = indirect.end;
2377
2378         /* Wait for the 3D stream to idle before the indirect buffer
2379          * containing 2D acceleration commands is processed.
2380          */
2381         BEGIN_RING(2);
2382
2383         RADEON_WAIT_UNTIL_3D_IDLE();
2384
2385         ADVANCE_RING();
2386
2387         /* Dispatch the indirect buffer full of commands from the
2388          * X server.  This is insecure and is thus only available to
2389          * privileged clients.
2390          */
2391         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2392         if (indirect.discard) {
2393                 radeon_cp_discard_buffer(dev, buf);
2394         }
2395
2396         COMMIT_RING();
2397         return 0;
2398 }
2399
2400 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2401 {
2402         DRM_DEVICE;
2403         drm_radeon_private_t *dev_priv = dev->dev_private;
2404         drm_file_t *filp_priv;
2405         drm_radeon_sarea_t *sarea_priv;
2406         drm_device_dma_t *dma = dev->dma;
2407         drm_buf_t *buf;
2408         drm_radeon_vertex2_t vertex;
2409         int i;
2410         unsigned char laststate;
2411
2412         LOCK_TEST_WITH_RETURN(dev, filp);
2413
2414         if (!dev_priv) {
2415                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2416                 return DRM_ERR(EINVAL);
2417         }
2418         sarea_priv = dev_priv->sarea_priv;
2419
2420         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2421
2422         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2423                                  sizeof(vertex));
2424
2425         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2426                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2427
2428         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2429                 DRM_ERROR("buffer index %d (of %d max)\n",
2430                           vertex.idx, dma->buf_count - 1);
2431                 return DRM_ERR(EINVAL);
2432         }
2433
2434         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2435         VB_AGE_TEST_WITH_RETURN(dev_priv);
2436
2437         buf = dma->buflist[vertex.idx];
2438
2439         if (buf->filp != filp) {
2440                 DRM_ERROR("process %d using buffer owned by %p\n",
2441                           DRM_CURRENTPID, buf->filp);
2442                 return DRM_ERR(EINVAL);
2443         }
2444
2445         if (buf->pending) {
2446                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2447                 return DRM_ERR(EINVAL);
2448         }
2449
2450         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2451                 return DRM_ERR(EINVAL);
2452
2453         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2454                 drm_radeon_prim_t prim;
2455                 drm_radeon_tcl_prim_t tclprim;
2456
2457                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2458                         return DRM_ERR(EFAULT);
2459
2460                 if (prim.stateidx != laststate) {
2461                         drm_radeon_state_t state;
2462
2463                         if (DRM_COPY_FROM_USER(&state,
2464                                                &vertex.state[prim.stateidx],
2465                                                sizeof(state)))
2466                                 return DRM_ERR(EFAULT);
2467
2468                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2469                                 DRM_ERROR("radeon_emit_state2 failed\n");
2470                                 return DRM_ERR(EINVAL);
2471                         }
2472
2473                         laststate = prim.stateidx;
2474                 }
2475
2476                 tclprim.start = prim.start;
2477                 tclprim.finish = prim.finish;
2478                 tclprim.prim = prim.prim;
2479                 tclprim.vc_format = prim.vc_format;
2480
2481                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2482                         tclprim.offset = prim.numverts * 64;
2483                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2484
2485                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2486                 } else {
2487                         tclprim.numverts = prim.numverts;
2488                         tclprim.offset = 0;     /* not used */
2489
2490                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2491                 }
2492
2493                 if (sarea_priv->nbox == 1)
2494                         sarea_priv->nbox = 0;
2495         }
2496
2497         if (vertex.discard) {
2498                 radeon_cp_discard_buffer(dev, buf);
2499         }
2500
2501         COMMIT_RING();
2502         return 0;
2503 }
2504
2505 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2506                                drm_file_t * filp_priv,
2507                                drm_radeon_cmd_header_t header,
2508                                drm_radeon_cmd_buffer_t * cmdbuf)
2509 {
2510         int id = (int)header.packet.packet_id;
2511         int sz, reg;
2512         int *data = (int *)cmdbuf->buf;
2513         RING_LOCALS;
2514
2515         if (id >= RADEON_MAX_STATE_PACKETS)
2516                 return DRM_ERR(EINVAL);
2517
2518         sz = packet[id].len;
2519         reg = packet[id].start;
2520
2521         if (sz * sizeof(int) > cmdbuf->bufsz) {
2522                 DRM_ERROR("Packet size provided larger than data provided\n");
2523                 return DRM_ERR(EINVAL);
2524         }
2525
2526         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2527                 DRM_ERROR("Packet verification failed\n");
2528                 return DRM_ERR(EINVAL);
2529         }
2530
2531         BEGIN_RING(sz + 1);
2532         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2533         OUT_RING_TABLE(data, sz);
2534         ADVANCE_RING();
2535
2536         cmdbuf->buf += sz * sizeof(int);
2537         cmdbuf->bufsz -= sz * sizeof(int);
2538         return 0;
2539 }
2540
2541 static __inline__ int radeon_emit_scalars(drm_radeon_private_t * dev_priv,
2542                                           drm_radeon_cmd_header_t header,
2543                                           drm_radeon_cmd_buffer_t * cmdbuf)
2544 {
2545         int sz = header.scalars.count;
2546         int start = header.scalars.offset;
2547         int stride = header.scalars.stride;
2548         RING_LOCALS;
2549
2550         BEGIN_RING(3 + sz);
2551         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2552         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2553         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2554         OUT_RING_TABLE(cmdbuf->buf, sz);
2555         ADVANCE_RING();
2556         cmdbuf->buf += sz * sizeof(int);
2557         cmdbuf->bufsz -= sz * sizeof(int);
2558         return 0;
2559 }
2560
2561 /* God this is ugly
2562  */
2563 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t * dev_priv,
2564                                            drm_radeon_cmd_header_t header,
2565                                            drm_radeon_cmd_buffer_t * cmdbuf)
2566 {
2567         int sz = header.scalars.count;
2568         int start = ((unsigned int)header.scalars.offset) + 0x100;
2569         int stride = header.scalars.stride;
2570         RING_LOCALS;
2571
2572         BEGIN_RING(3 + sz);
2573         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2574         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2575         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2576         OUT_RING_TABLE(cmdbuf->buf, sz);
2577         ADVANCE_RING();
2578         cmdbuf->buf += sz * sizeof(int);
2579         cmdbuf->bufsz -= sz * sizeof(int);
2580         return 0;
2581 }
2582
2583 static __inline__ int radeon_emit_vectors(drm_radeon_private_t * dev_priv,
2584                                           drm_radeon_cmd_header_t header,
2585                                           drm_radeon_cmd_buffer_t * cmdbuf)
2586 {
2587         int sz = header.vectors.count;
2588         int start = header.vectors.offset;
2589         int stride = header.vectors.stride;
2590         RING_LOCALS;
2591
2592         BEGIN_RING(3 + sz);
2593         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2594         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2595         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2596         OUT_RING_TABLE(cmdbuf->buf, sz);
2597         ADVANCE_RING();
2598
2599         cmdbuf->buf += sz * sizeof(int);
2600         cmdbuf->bufsz -= sz * sizeof(int);
2601         return 0;
2602 }
2603
2604 static int radeon_emit_packet3(drm_device_t * dev,
2605                                drm_file_t * filp_priv,
2606                                drm_radeon_cmd_buffer_t * cmdbuf)
2607 {
2608         drm_radeon_private_t *dev_priv = dev->dev_private;
2609         unsigned int cmdsz;
2610         int ret;
2611         RING_LOCALS;
2612
2613         DRM_DEBUG("\n");
2614
2615         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2616                                                   cmdbuf, &cmdsz))) {
2617                 DRM_ERROR("Packet verification failed\n");
2618                 return ret;
2619         }
2620
2621         BEGIN_RING(cmdsz);
2622         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2623         ADVANCE_RING();
2624
2625         cmdbuf->buf += cmdsz * 4;
2626         cmdbuf->bufsz -= cmdsz * 4;
2627         return 0;
2628 }
2629
2630 static int radeon_emit_packet3_cliprect(drm_device_t * dev,
2631                                         drm_file_t * filp_priv,
2632                                         drm_radeon_cmd_buffer_t * cmdbuf,
2633                                         int orig_nbox)
2634 {
2635         drm_radeon_private_t *dev_priv = dev->dev_private;
2636         drm_clip_rect_t box;
2637         unsigned int cmdsz;
2638         int ret;
2639         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2640         int i = 0;
2641         RING_LOCALS;
2642
2643         DRM_DEBUG("\n");
2644
2645         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2646                                                   cmdbuf, &cmdsz))) {
2647                 DRM_ERROR("Packet verification failed\n");
2648                 return ret;
2649         }
2650
2651         if (!orig_nbox)
2652                 goto out;
2653
2654         do {
2655                 if (i < cmdbuf->nbox) {
2656                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2657                                 return DRM_ERR(EFAULT);
2658                         /* FIXME The second and subsequent times round
2659                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2660                          * calling emit_clip_rect(). This fixes a
2661                          * lockup on fast machines when sending
2662                          * several cliprects with a cmdbuf, as when
2663                          * waving a 2D window over a 3D
2664                          * window. Something in the commands from user
2665                          * space seems to hang the card when they're
2666                          * sent several times in a row. That would be
2667                          * the correct place to fix it but this works
2668                          * around it until I can figure that out - Tim
2669                          * Smith */
2670                         if (i) {
2671                                 BEGIN_RING(2);
2672                                 RADEON_WAIT_UNTIL_3D_IDLE();
2673                                 ADVANCE_RING();
2674                         }
2675                         radeon_emit_clip_rect(dev_priv, &box);
2676                 }
2677
2678                 BEGIN_RING(cmdsz);
2679                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2680                 ADVANCE_RING();
2681
2682         } while (++i < cmdbuf->nbox);
2683         if (cmdbuf->nbox == 1)
2684                 cmdbuf->nbox = 0;
2685
2686       out:
2687         cmdbuf->buf += cmdsz * 4;
2688         cmdbuf->bufsz -= cmdsz * 4;
2689         return 0;
2690 }
2691
2692 static int radeon_emit_wait(drm_device_t * dev, int flags)
2693 {
2694         drm_radeon_private_t *dev_priv = dev->dev_private;
2695         RING_LOCALS;
2696
2697         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2698         switch (flags) {
2699         case RADEON_WAIT_2D:
2700                 BEGIN_RING(2);
2701                 RADEON_WAIT_UNTIL_2D_IDLE();
2702                 ADVANCE_RING();
2703                 break;
2704         case RADEON_WAIT_3D:
2705                 BEGIN_RING(2);
2706                 RADEON_WAIT_UNTIL_3D_IDLE();
2707                 ADVANCE_RING();
2708                 break;
2709         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2710                 BEGIN_RING(2);
2711                 RADEON_WAIT_UNTIL_IDLE();
2712                 ADVANCE_RING();
2713                 break;
2714         default:
2715                 return DRM_ERR(EINVAL);
2716         }
2717
2718         return 0;
2719 }
2720
2721 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2722 {
2723         DRM_DEVICE;
2724         drm_radeon_private_t *dev_priv = dev->dev_private;
2725         drm_file_t *filp_priv;
2726         drm_device_dma_t *dma = dev->dma;
2727         drm_buf_t *buf = NULL;
2728         int idx;
2729         drm_radeon_cmd_buffer_t cmdbuf;
2730         drm_radeon_cmd_header_t header;
2731         int orig_nbox, orig_bufsz;
2732         char *kbuf = NULL;
2733
2734         LOCK_TEST_WITH_RETURN(dev, filp);
2735
2736         if (!dev_priv) {
2737                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2738                 return DRM_ERR(EINVAL);
2739         }
2740
2741         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2742
2743         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2744                                  (drm_radeon_cmd_buffer_t __user *) data,
2745                                  sizeof(cmdbuf));
2746
2747         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2748         VB_AGE_TEST_WITH_RETURN(dev_priv);
2749
2750         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2751                 return DRM_ERR(EINVAL);
2752         }
2753
2754         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2755          * races between checking values and using those values in other code,
2756          * and simply to avoid a lot of function calls to copy in data.
2757          */
2758         orig_bufsz = cmdbuf.bufsz;
2759         if (orig_bufsz != 0) {
2760                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2761                 if (kbuf == NULL)
2762                         return DRM_ERR(ENOMEM);
2763                 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2764                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2765                         return DRM_ERR(EFAULT);
2766                 }
2767                 cmdbuf.buf = kbuf;
2768         }
2769
2770         orig_nbox = cmdbuf.nbox;
2771         
2772         if(dev_priv->microcode_version == UCODE_R300) {
2773                 int temp;
2774                 temp=r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2775         
2776                 if (orig_bufsz != 0)
2777                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2778         
2779                 return temp;
2780         }
2781         
2782         /* microcode_version != r300 */
2783         while (cmdbuf.bufsz >= sizeof(header)) {
2784                 header.i = *(int *)cmdbuf.buf;
2785                 cmdbuf.buf += sizeof(header);
2786                 cmdbuf.bufsz -= sizeof(header);
2787
2788                 switch (header.header.cmd_type) {
2789                 case RADEON_CMD_PACKET:
2790                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2791                         if (radeon_emit_packets
2792                             (dev_priv, filp_priv, header, &cmdbuf)) {
2793                                 DRM_ERROR("radeon_emit_packets failed\n");
2794                                 goto err;
2795                         }
2796                         break;
2797
2798                 case RADEON_CMD_SCALARS:
2799                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2800                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2801                                 DRM_ERROR("radeon_emit_scalars failed\n");
2802                                 goto err;
2803                         }
2804                         break;
2805
2806                 case RADEON_CMD_VECTORS:
2807                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2808                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2809                                 DRM_ERROR("radeon_emit_vectors failed\n");
2810                                 goto err;
2811                         }
2812                         break;
2813
2814                 case RADEON_CMD_DMA_DISCARD:
2815                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2816                         idx = header.dma.buf_idx;
2817                         if (idx < 0 || idx >= dma->buf_count) {
2818                                 DRM_ERROR("buffer index %d (of %d max)\n",
2819                                           idx, dma->buf_count - 1);
2820                                 goto err;
2821                         }
2822
2823                         buf = dma->buflist[idx];
2824                         if (buf->filp != filp || buf->pending) {
2825                                 DRM_ERROR("bad buffer %p %p %d\n",
2826                                           buf->filp, filp, buf->pending);
2827                                 goto err;
2828                         }
2829
2830                         radeon_cp_discard_buffer(dev, buf);
2831                         break;
2832
2833                 case RADEON_CMD_PACKET3:
2834                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2835                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2836                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2837                                 goto err;
2838                         }
2839                         break;
2840
2841                 case RADEON_CMD_PACKET3_CLIP:
2842                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2843                         if (radeon_emit_packet3_cliprect
2844                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2845                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2846                                 goto err;
2847                         }
2848                         break;
2849
2850                 case RADEON_CMD_SCALARS2:
2851                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2852                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2853                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2854                                 goto err;
2855                         }
2856                         break;
2857
2858                 case RADEON_CMD_WAIT:
2859                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2860                         if (radeon_emit_wait(dev, header.wait.flags)) {
2861                                 DRM_ERROR("radeon_emit_wait failed\n");
2862                                 goto err;
2863                         }
2864                         break;
2865                 default:
2866                         DRM_ERROR("bad cmd_type %d at %p\n",
2867                                   header.header.cmd_type,
2868                                   cmdbuf.buf - sizeof(header));
2869                         goto err;
2870                 }
2871         }
2872
2873         if (orig_bufsz != 0)
2874                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2875         DRM_DEBUG("DONE\n");
2876         COMMIT_RING();
2877
2878         return 0;
2879
2880 err:
2881         if (orig_bufsz != 0)
2882                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2883         return DRM_ERR(EINVAL);
2884 }
2885
2886 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2887 {
2888         DRM_DEVICE;
2889         drm_radeon_private_t *dev_priv = dev->dev_private;
2890         drm_radeon_getparam_t param;
2891         int value;
2892
2893         if (!dev_priv) {
2894                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2895                 return DRM_ERR(EINVAL);
2896         }
2897
2898         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2899                                  sizeof(param));
2900
2901         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2902
2903         switch (param.param) {
2904         case RADEON_PARAM_GART_BUFFER_OFFSET:
2905                 value = dev_priv->gart_buffers_offset;
2906                 break;
2907         case RADEON_PARAM_LAST_FRAME:
2908                 dev_priv->stats.last_frame_reads++;
2909                 value = GET_SCRATCH(0);
2910                 break;
2911         case RADEON_PARAM_LAST_DISPATCH:
2912                 value = GET_SCRATCH(1);
2913                 break;
2914         case RADEON_PARAM_LAST_CLEAR:
2915                 dev_priv->stats.last_clear_reads++;
2916                 value = GET_SCRATCH(2);
2917                 break;
2918         case RADEON_PARAM_IRQ_NR:
2919                 value = dev->irq;
2920                 break;
2921         case RADEON_PARAM_GART_BASE:
2922                 value = dev_priv->gart_vm_start;
2923                 break;
2924         case RADEON_PARAM_REGISTER_HANDLE:
2925                 value = dev_priv->mmio_offset;
2926                 break;
2927         case RADEON_PARAM_STATUS_HANDLE:
2928                 value = dev_priv->ring_rptr_offset;
2929                 break;
2930 #if BITS_PER_LONG == 32
2931                 /*
2932                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2933                  * pointer which can't fit into an int-sized variable.  According to
2934                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2935                  * not supporting it shouldn't be a problem.  If the same functionality
2936                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2937                  * so backwards-compatibility for the embedded platforms can be
2938                  * maintained.  --davidm 4-Feb-2004.
2939                  */
2940         case RADEON_PARAM_SAREA_HANDLE:
2941                 /* The lock is the first dword in the sarea. */
2942                 value = (long)dev->lock.hw_lock;
2943                 break;
2944 #endif
2945         case RADEON_PARAM_GART_TEX_HANDLE:
2946                 value = dev_priv->gart_textures_offset;
2947                 break;
2948         default:
2949                 return DRM_ERR(EINVAL);
2950         }
2951
2952         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
2953                 DRM_ERROR("copy_to_user\n");
2954                 return DRM_ERR(EFAULT);
2955         }
2956
2957         return 0;
2958 }
2959
2960 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
2961 {
2962         DRM_DEVICE;
2963         drm_radeon_private_t *dev_priv = dev->dev_private;
2964         drm_file_t *filp_priv;
2965         drm_radeon_setparam_t sp;
2966         struct drm_radeon_driver_file_fields *radeon_priv;
2967
2968         if (!dev_priv) {
2969                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2970                 return DRM_ERR(EINVAL);
2971         }
2972
2973         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2974
2975         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
2976                                  sizeof(sp));
2977
2978         switch (sp.param) {
2979         case RADEON_SETPARAM_FB_LOCATION:
2980                 radeon_priv = filp_priv->driver_priv;
2981                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2982                 break;
2983         case RADEON_SETPARAM_SWITCH_TILING:
2984                 if (sp.value == 0) {
2985                         DRM_DEBUG( "color tiling disabled\n" );
2986                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2987                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2988                         dev_priv->sarea_priv->tiling_enabled = 0;
2989                 }
2990                 else if (sp.value == 1) {
2991                         DRM_DEBUG( "color tiling enabled\n" );
2992                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
2993                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
2994                         dev_priv->sarea_priv->tiling_enabled = 1;
2995                 }
2996                 break;
2997         default:
2998                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
2999                 return DRM_ERR(EINVAL);
3000         }
3001
3002         return 0;
3003 }
3004
3005 /* When a client dies:
3006  *    - Check for and clean up flipped page state
3007  *    - Free any alloced GART memory.
3008  *    - Free any alloced radeon surfaces.
3009  *
3010  * DRM infrastructure takes care of reclaiming dma buffers.
3011  */
3012 void radeon_driver_prerelease(drm_device_t * dev, DRMFILE filp)
3013 {
3014         if (dev->dev_private) {
3015                 drm_radeon_private_t *dev_priv = dev->dev_private;
3016                 if (dev_priv->page_flipping) {
3017                         radeon_do_cleanup_pageflip(dev);
3018                 }
3019                 radeon_mem_release(filp, dev_priv->gart_heap);
3020                 radeon_mem_release(filp, dev_priv->fb_heap);
3021                 radeon_surfaces_release(filp, dev_priv);
3022         }
3023 }
3024
3025 void radeon_driver_pretakedown(drm_device_t * dev)
3026 {
3027         radeon_do_release(dev);
3028 }
3029
3030 int radeon_driver_open_helper(drm_device_t * dev, drm_file_t * filp_priv)
3031 {
3032         drm_radeon_private_t *dev_priv = dev->dev_private;
3033         struct drm_radeon_driver_file_fields *radeon_priv;
3034
3035         DRM_DEBUG("\n");
3036         radeon_priv =
3037             (struct drm_radeon_driver_file_fields *)
3038             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3039
3040         if (!radeon_priv)
3041                 return -ENOMEM;
3042
3043         filp_priv->driver_priv = radeon_priv;
3044
3045         if (dev_priv)
3046                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3047         else
3048                 radeon_priv->radeon_fb_delta = 0;
3049         return 0;
3050 }
3051
3052 void radeon_driver_free_filp_priv(drm_device_t * dev, drm_file_t * filp_priv)
3053 {
3054         struct drm_radeon_driver_file_fields *radeon_priv =
3055             filp_priv->driver_priv;
3056
3057         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3058 }
3059
3060 drm_ioctl_desc_t radeon_ioctls[] = {
3061         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, 1, 1},
3062         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, 1, 1},
3063         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, 1, 1},
3064         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, 1, 1},
3065         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, 1, 0},
3066         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, 1, 0},
3067         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, 1, 0},
3068         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, 1, 0},
3069         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, 1, 0},
3070         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, 1, 0},
3071         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, 1, 0},
3072         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, 1, 0},
3073         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, 1, 0},
3074         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, 1, 0},
3075         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, 1, 1},
3076         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, 1, 0},
3077         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, 1, 0},
3078         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, 1, 0},
3079         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, 1, 0},
3080         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, 1, 0},
3081         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, 1, 0},
3082         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, 1, 1},
3083         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, 1, 0},
3084         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, 1, 0},
3085         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, 1, 0},
3086         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, 1, 0},
3087         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, 1, 0}
3088 };
3089
3090 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);