2 * Copyright 2009 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Alex Deucher <alexander.deucher@amd.com>
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include "dev/drm/drmP.h"
31 #include "dev/drm/drm.h"
32 #include "dev/drm/radeon_drm.h"
33 #include "dev/drm/radeon_drv.h"
35 static u32 r6xx_default_state[] =
559 static u32 r7xx_default_state[] =
1070 /* same for r6xx/r7xx */
1071 static u32 r6xx_vs[] =
1087 static u32 r6xx_ps[] =
1099 #define DI_PT_RECTLIST 0x11
1100 #define DI_INDEX_SIZE_16_BIT 0x0
1101 #define DI_SRC_SEL_AUTO_INDEX 0x2
1105 #define FMT_8_8_8_8 0x1a
1107 #define COLOR_5_6_5 8
1108 #define COLOR_8_8_8_8 0x1a
1110 #define R600_CB0_DEST_BASE_ENA (1 << 6)
1111 #define R600_TC_ACTION_ENA (1 << 23)
1112 #define R600_VC_ACTION_ENA (1 << 24)
1113 #define R600_CB_ACTION_ENA (1 << 25)
1114 #define R600_DB_ACTION_ENA (1 << 26)
1115 #define R600_SH_ACTION_ENA (1 << 27)
1116 #define R600_SMX_ACTION_ENA (1 << 28)
1118 #define R600_CB_COLOR0_SIZE 0x28060
1119 #define R600_CB_COLOR0_VIEW 0x28080
1120 #define R600_CB_COLOR0_INFO 0x280a0
1121 #define R600_CB_COLOR0_TILE 0x280c0
1122 #define R600_CB_COLOR0_FRAG 0x280e0
1123 #define R600_CB_COLOR0_MASK 0x28100
1125 #define R600_SQ_PGM_START_VS 0x28858
1126 #define R600_SQ_PGM_RESOURCES_VS 0x28868
1127 #define R600_SQ_PGM_CF_OFFSET_VS 0x288d0
1128 #define R600_SQ_PGM_START_PS 0x28840
1129 #define R600_SQ_PGM_RESOURCES_PS 0x28850
1130 #define R600_SQ_PGM_EXPORTS_PS 0x28854
1131 #define R600_SQ_PGM_CF_OFFSET_PS 0x288cc
1133 #define R600_VGT_PRIMITIVE_TYPE 0x8958
1135 #define R600_PA_SC_SCREEN_SCISSOR_TL 0x28030
1136 #define R600_PA_SC_GENERIC_SCISSOR_TL 0x28240
1137 #define R600_PA_SC_WINDOW_SCISSOR_TL 0x28204
1139 #define R600_SQ_TEX_VTX_INVALID_TEXTURE 0x0
1140 #define R600_SQ_TEX_VTX_INVALID_BUFFER 0x1
1141 #define R600_SQ_TEX_VTX_VALID_TEXTURE 0x2
1142 #define R600_SQ_TEX_VTX_VALID_BUFFER 0x3
1144 /* packet 3 type offsets */
1145 #define R600_SET_CONFIG_REG_OFFSET 0x00008000
1146 #define R600_SET_CONFIG_REG_END 0x0000ac00
1147 #define R600_SET_CONTEXT_REG_OFFSET 0x00028000
1148 #define R600_SET_CONTEXT_REG_END 0x00029000
1149 #define R600_SET_ALU_CONST_OFFSET 0x00030000
1150 #define R600_SET_ALU_CONST_END 0x00032000
1151 #define R600_SET_RESOURCE_OFFSET 0x00038000
1152 #define R600_SET_RESOURCE_END 0x0003c000
1153 #define R600_SET_SAMPLER_OFFSET 0x0003c000
1154 #define R600_SET_SAMPLER_END 0x0003cff0
1155 #define R600_SET_CTL_CONST_OFFSET 0x0003cff0
1156 #define R600_SET_CTL_CONST_END 0x0003e200
1157 #define R600_SET_LOOP_CONST_OFFSET 0x0003e200
1158 #define R600_SET_LOOP_CONST_END 0x0003e380
1159 #define R600_SET_BOOL_CONST_OFFSET 0x0003e380
1160 #define R600_SET_BOOL_CONST_END 0x00040000
1162 /* Packet 3 types */
1163 #define R600_IT_INDIRECT_BUFFER_END 0x00001700
1164 #define R600_IT_SET_PREDICATION 0x00002000
1165 #define R600_IT_REG_RMW 0x00002100
1166 #define R600_IT_COND_EXEC 0x00002200
1167 #define R600_IT_PRED_EXEC 0x00002300
1168 #define R600_IT_START_3D_CMDBUF 0x00002400
1169 #define R600_IT_DRAW_INDEX_2 0x00002700
1170 #define R600_IT_CONTEXT_CONTROL 0x00002800
1171 #define R600_IT_DRAW_INDEX_IMMD_BE 0x00002900
1172 #define R600_IT_INDEX_TYPE 0x00002A00
1173 #define R600_IT_DRAW_INDEX 0x00002B00
1174 #define R600_IT_DRAW_INDEX_AUTO 0x00002D00
1175 #define R600_IT_DRAW_INDEX_IMMD 0x00002E00
1176 #define R600_IT_NUM_INSTANCES 0x00002F00
1177 #define R600_IT_STRMOUT_BUFFER_UPDATE 0x00003400
1178 #define R600_IT_INDIRECT_BUFFER_MP 0x00003800
1179 #define R600_IT_MEM_SEMAPHORE 0x00003900
1180 #define R600_IT_MPEG_INDEX 0x00003A00
1181 #define R600_IT_WAIT_REG_MEM 0x00003C00
1182 #define R600_IT_MEM_WRITE 0x00003D00
1183 #define R600_IT_INDIRECT_BUFFER 0x00003200
1184 #define R600_IT_CP_INTERRUPT 0x00004000
1185 #define R600_IT_SURFACE_SYNC 0x00004300
1186 #define R600_IT_ME_INITIALIZE 0x00004400
1187 #define R600_IT_COND_WRITE 0x00004500
1188 #define R600_IT_EVENT_WRITE 0x00004600
1189 #define R600_IT_EVENT_WRITE_EOP 0x00004700
1190 #define R600_IT_ONE_REG_WRITE 0x00005700
1191 #define R600_IT_SET_CONFIG_REG 0x00006800
1192 #define R600_IT_SET_CONTEXT_REG 0x00006900
1193 #define R600_IT_SET_ALU_CONST 0x00006A00
1194 #define R600_IT_SET_BOOL_CONST 0x00006B00
1195 #define R600_IT_SET_LOOP_CONST 0x00006C00
1196 #define R600_IT_SET_RESOURCE 0x00006D00
1197 #define R600_IT_SET_SAMPLER 0x00006E00
1198 #define R600_IT_SET_CTL_CONST 0x00006F00
1199 #define R600_IT_SURFACE_BASE_UPDATE 0x00007300
1202 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
1213 cb_color_info = ((format << 2) | (1 << 27));
1214 pitch = (w / 8) - 1;
1215 slice = ((w * h) / 64) - 1;
1217 if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
1218 ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
1220 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1221 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1222 OUT_RING(gpu_addr >> 8);
1223 OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
1227 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1228 OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1229 OUT_RING(gpu_addr >> 8);
1232 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1233 OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1234 OUT_RING((pitch << 0) | (slice << 10));
1236 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1237 OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1240 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1241 OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1242 OUT_RING(cb_color_info);
1244 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1245 OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1248 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1249 OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1252 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1253 OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1260 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
1261 u32 sync_type, u32 size, u64 mc_addr)
1267 if (size == 0xffffffff)
1268 cp_coher_size = 0xffffffff;
1270 cp_coher_size = ((size + 255) >> 8);
1273 OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
1274 OUT_RING(sync_type);
1275 OUT_RING(cp_coher_size);
1276 OUT_RING((mc_addr >> 8));
1277 OUT_RING(10); /* poll interval */
1282 set_shaders(struct drm_device *dev)
1284 drm_radeon_private_t *dev_priv = dev->dev_private;
1288 uint32_t sq_pgm_resources;
1293 vs = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset);
1294 ps = (u32 *) ((char *)dev->agp_buffer_map->virtual + dev_priv->blit_vb->offset + 256);
1296 shader_size = sizeof(r6xx_vs) / 4;
1297 for (i= 0; i < shader_size; i++)
1299 shader_size = sizeof(r6xx_ps) / 4;
1300 for (i= 0; i < shader_size; i++)
1303 dev_priv->blit_vb->used = 512;
1305 gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
1307 /* setup shader regs */
1308 sq_pgm_resources = (1 << 0);
1312 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1313 OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1314 OUT_RING(gpu_addr >> 8);
1316 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1317 OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1318 OUT_RING(sq_pgm_resources);
1320 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1321 OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1325 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1326 OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1327 OUT_RING((gpu_addr + 256) >> 8);
1329 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1330 OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1331 OUT_RING(sq_pgm_resources | (1 << 28));
1333 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1334 OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1337 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
1338 OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1342 cp_set_surface_sync(dev_priv,
1343 R600_SH_ACTION_ENA, 512, gpu_addr);
1347 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
1349 uint32_t sq_vtx_constant_word2;
1353 sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
1356 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
1358 OUT_RING(gpu_addr & 0xffffffff);
1360 OUT_RING(sq_vtx_constant_word2);
1364 OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
1367 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1368 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1369 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1370 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
1371 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
1372 cp_set_surface_sync(dev_priv,
1373 R600_TC_ACTION_ENA, 48, gpu_addr);
1375 cp_set_surface_sync(dev_priv,
1376 R600_VC_ACTION_ENA, 48, gpu_addr);
1380 set_tex_resource(drm_radeon_private_t *dev_priv,
1381 int format, int w, int h, int pitch, u64 gpu_addr)
1383 uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
1390 sq_tex_resource_word0 = (1 << 0);
1391 sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
1394 sq_tex_resource_word1 = (format << 26);
1395 sq_tex_resource_word1 |= ((h - 1) << 0);
1397 sq_tex_resource_word4 = ((1 << 14) |
1404 OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
1406 OUT_RING(sq_tex_resource_word0);
1407 OUT_RING(sq_tex_resource_word1);
1408 OUT_RING(gpu_addr >> 8);
1409 OUT_RING(gpu_addr >> 8);
1410 OUT_RING(sq_tex_resource_word4);
1412 OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
1418 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
1424 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
1425 OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1426 OUT_RING((x1 << 0) | (y1 << 16));
1427 OUT_RING((x2 << 0) | (y2 << 16));
1429 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
1430 OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1431 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
1432 OUT_RING((x2 << 0) | (y2 << 16));
1434 OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
1435 OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
1436 OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
1437 OUT_RING((x2 << 0) | (y2 << 16));
1442 draw_auto(drm_radeon_private_t *dev_priv)
1448 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
1449 OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
1450 OUT_RING(DI_PT_RECTLIST);
1452 OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
1453 OUT_RING(DI_INDEX_SIZE_16_BIT);
1455 OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
1458 OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
1460 OUT_RING(DI_SRC_SEL_AUTO_INDEX);
1467 set_default_state(drm_radeon_private_t *dev_priv)
1469 int default_state_dw, i;
1470 u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
1471 u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
1472 int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
1473 int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
1474 int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
1477 switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
1484 num_ps_threads = 136;
1485 num_vs_threads = 48;
1488 num_ps_stack_entries = 128;
1489 num_vs_stack_entries = 128;
1490 num_gs_stack_entries = 0;
1491 num_es_stack_entries = 0;
1500 num_ps_threads = 144;
1501 num_vs_threads = 40;
1504 num_ps_stack_entries = 40;
1505 num_vs_stack_entries = 40;
1506 num_gs_stack_entries = 32;
1507 num_es_stack_entries = 16;
1519 num_ps_threads = 136;
1520 num_vs_threads = 48;
1523 num_ps_stack_entries = 40;
1524 num_vs_stack_entries = 40;
1525 num_gs_stack_entries = 32;
1526 num_es_stack_entries = 16;
1534 num_ps_threads = 136;
1535 num_vs_threads = 48;
1538 num_ps_stack_entries = 40;
1539 num_vs_stack_entries = 40;
1540 num_gs_stack_entries = 32;
1541 num_es_stack_entries = 16;
1549 num_ps_threads = 188;
1550 num_vs_threads = 60;
1553 num_ps_stack_entries = 256;
1554 num_vs_stack_entries = 256;
1555 num_gs_stack_entries = 0;
1556 num_es_stack_entries = 0;
1565 num_ps_threads = 188;
1566 num_vs_threads = 60;
1569 num_ps_stack_entries = 128;
1570 num_vs_stack_entries = 128;
1571 num_gs_stack_entries = 0;
1572 num_es_stack_entries = 0;
1580 num_ps_threads = 144;
1581 num_vs_threads = 48;
1584 num_ps_stack_entries = 128;
1585 num_vs_stack_entries = 128;
1586 num_gs_stack_entries = 0;
1587 num_es_stack_entries = 0;
1591 if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
1592 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
1593 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
1594 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
1595 ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
1598 sq_config = R600_VC_ENABLE;
1600 sq_config |= (R600_DX9_CONSTS |
1601 R600_ALU_INST_PREFER_VECTOR |
1607 sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
1608 R600_NUM_VS_GPRS(num_vs_gprs) |
1609 R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
1610 sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
1611 R600_NUM_ES_GPRS(num_es_gprs));
1612 sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
1613 R600_NUM_VS_THREADS(num_vs_threads) |
1614 R600_NUM_GS_THREADS(num_gs_threads) |
1615 R600_NUM_ES_THREADS(num_es_threads));
1616 sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
1617 R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
1618 sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
1619 R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
1621 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
1622 default_state_dw = sizeof(r7xx_default_state) / 4;
1623 BEGIN_RING(default_state_dw + 10);
1624 for (i = 0; i < default_state_dw; i++)
1625 OUT_RING(r7xx_default_state[i]);
1627 default_state_dw = sizeof(r6xx_default_state) / 4;
1628 BEGIN_RING(default_state_dw + 10);
1629 for (i = 0; i < default_state_dw; i++)
1630 OUT_RING(r6xx_default_state[i]);
1632 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
1633 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
1635 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
1636 OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
1637 OUT_RING(sq_config);
1638 OUT_RING(sq_gpr_resource_mgmt_1);
1639 OUT_RING(sq_gpr_resource_mgmt_2);
1640 OUT_RING(sq_thread_resource_mgmt);
1641 OUT_RING(sq_stack_resource_mgmt_1);
1642 OUT_RING(sq_stack_resource_mgmt_2);
1646 static inline uint32_t i2f(uint32_t input)
1648 u32 result, i, exponent, fraction;
1650 if ((input & 0x3fff) == 0)
1651 result = 0; /* 0 is a special case */
1653 exponent = 140; /* exponent biased by 127; */
1654 fraction = (input & 0x3fff) << 10; /* cheat and only
1655 handle numbers below 2^^15 */
1656 for (i = 0; i < 14; i++) {
1657 if (fraction & 0x800000)
1660 fraction = fraction << 1; /* keep
1661 shifting left until top bit = 1 */
1662 exponent = exponent -1;
1665 result = exponent << 23 | (fraction & 0x7fffff); /* mask
1666 off top bit; assumed 1 */
1672 r600_prepare_blit_copy(struct drm_device *dev)
1674 drm_radeon_private_t *dev_priv = dev->dev_private;
1677 dev_priv->blit_vb = radeon_freelist_get(dev);
1678 if (!dev_priv->blit_vb) {
1679 DRM_ERROR("Unable to allocate vertex buffer for blit\n");
1683 set_default_state(dev_priv);
1690 r600_done_blit_copy(struct drm_device *dev)
1692 drm_radeon_private_t *dev_priv = dev->dev_private;
1697 OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
1698 OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
1699 /* wait for 3D idle clean */
1700 OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
1701 OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
1702 OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
1707 dev_priv->blit_vb->used = 0;
1708 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1712 r600_blit_copy(struct drm_device *dev,
1713 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
1716 drm_radeon_private_t *dev_priv = dev->dev_private;
1721 vb = (u32 *) ((char *)dev->agp_buffer_map->virtual +
1722 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1723 DRM_DEBUG("src=0x%016jx, dst=0x%016jx, size=%d\n",
1724 src_gpu_addr, dst_gpu_addr, size_bytes);
1726 if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
1729 while (size_bytes) {
1730 int cur_size = size_bytes;
1731 int src_x = src_gpu_addr & 255;
1732 int dst_x = dst_gpu_addr & 255;
1734 src_gpu_addr = src_gpu_addr & ~255;
1735 dst_gpu_addr = dst_gpu_addr & ~255;
1737 if (!src_x && !dst_x) {
1738 h = (cur_size / max_bytes);
1744 cur_size = max_bytes;
1746 if (cur_size > max_bytes)
1747 cur_size = max_bytes;
1748 if (cur_size > (max_bytes - dst_x))
1749 cur_size = (max_bytes - dst_x);
1750 if (cur_size > (max_bytes - src_x))
1751 cur_size = (max_bytes - src_x);
1754 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
1755 dev_priv->blit_vb->used = 0;
1756 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1757 dev_priv->blit_vb = radeon_freelist_get(dev);
1758 if (!dev_priv->blit_vb)
1761 vb = (u32 *) ((char *)dev->agp_buffer_map->virtual +
1762 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1775 vb[8] = i2f(dst_x + cur_size);
1777 vb[10] = i2f(src_x + cur_size);
1781 set_tex_resource(dev_priv, FMT_8,
1782 src_x + cur_size, h, src_x + cur_size,
1785 cp_set_surface_sync(dev_priv,
1786 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
1789 set_render_target(dev_priv, COLOR_8,
1790 dst_x + cur_size, h,
1794 set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
1796 /* Vertex buffer setup */
1797 vb_addr = dev_priv->gart_buffers_offset +
1798 dev_priv->blit_vb->offset +
1799 dev_priv->blit_vb->used;
1800 set_vtx_resource(dev_priv, vb_addr);
1803 draw_auto(dev_priv);
1805 cp_set_surface_sync(dev_priv,
1806 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
1807 cur_size * h, dst_gpu_addr);
1810 dev_priv->blit_vb->used += 12 * 4;
1812 src_gpu_addr += cur_size * h;
1813 dst_gpu_addr += cur_size * h;
1814 size_bytes -= cur_size * h;
1817 max_bytes = 8192 * 4;
1819 while (size_bytes) {
1820 int cur_size = size_bytes;
1821 int src_x = (src_gpu_addr & 255);
1822 int dst_x = (dst_gpu_addr & 255);
1824 src_gpu_addr = src_gpu_addr & ~255;
1825 dst_gpu_addr = dst_gpu_addr & ~255;
1827 if (!src_x && !dst_x) {
1828 h = (cur_size / max_bytes);
1834 cur_size = max_bytes;
1836 if (cur_size > max_bytes)
1837 cur_size = max_bytes;
1838 if (cur_size > (max_bytes - dst_x))
1839 cur_size = (max_bytes - dst_x);
1840 if (cur_size > (max_bytes - src_x))
1841 cur_size = (max_bytes - src_x);
1844 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
1845 dev_priv->blit_vb->used = 0;
1846 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1847 dev_priv->blit_vb = radeon_freelist_get(dev);
1848 if (!dev_priv->blit_vb)
1851 vb = (u32 *) ((char *)dev->agp_buffer_map->virtual +
1852 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1855 vb[0] = i2f(dst_x / 4);
1857 vb[2] = i2f(src_x / 4);
1860 vb[4] = i2f(dst_x / 4);
1862 vb[6] = i2f(src_x / 4);
1865 vb[8] = i2f((dst_x + cur_size) / 4);
1867 vb[10] = i2f((src_x + cur_size) / 4);
1871 set_tex_resource(dev_priv, FMT_8_8_8_8,
1872 (src_x + cur_size) / 4,
1873 h, (src_x + cur_size) / 4,
1876 cp_set_surface_sync(dev_priv,
1877 R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
1880 set_render_target(dev_priv, COLOR_8_8_8_8,
1881 (dst_x + cur_size) / 4, h,
1885 set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
1887 /* Vertex buffer setup */
1888 vb_addr = dev_priv->gart_buffers_offset +
1889 dev_priv->blit_vb->offset +
1890 dev_priv->blit_vb->used;
1891 set_vtx_resource(dev_priv, vb_addr);
1894 draw_auto(dev_priv);
1896 cp_set_surface_sync(dev_priv,
1897 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
1898 cur_size * h, dst_gpu_addr);
1901 dev_priv->blit_vb->used += 12 * 4;
1903 src_gpu_addr += cur_size * h;
1904 dst_gpu_addr += cur_size * h;
1905 size_bytes -= cur_size * h;
1911 r600_blit_swap(struct drm_device *dev,
1912 uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
1913 int sx, int sy, int dx, int dy,
1914 int w, int h, int src_pitch, int dst_pitch, int cpp)
1916 drm_radeon_private_t *dev_priv = dev->dev_private;
1917 int cb_format, tex_format;
1918 int sx2, sy2, dx2, dy2;
1922 if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
1923 dev_priv->blit_vb->used = 0;
1924 radeon_cp_discard_buffer(dev, dev_priv->blit_vb);
1925 dev_priv->blit_vb = radeon_freelist_get(dev);
1926 if (!dev_priv->blit_vb)
1930 vb = (u32 *) ((char *)dev->agp_buffer_map->virtual +
1931 dev_priv->blit_vb->offset + dev_priv->blit_vb->used);
1955 cb_format = COLOR_8_8_8_8;
1956 tex_format = FMT_8_8_8_8;
1959 cb_format = COLOR_5_6_5;
1960 tex_format = FMT_5_6_5;
1963 cb_format = COLOR_8;
1969 set_tex_resource(dev_priv, tex_format,
1971 sy2, src_pitch / cpp,
1974 cp_set_surface_sync(dev_priv,
1975 R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
1978 set_render_target(dev_priv, cb_format,
1979 dst_pitch / cpp, dy2,
1983 set_scissors(dev_priv, dx, dy, dx2, dy2);
1985 /* Vertex buffer setup */
1986 vb_addr = dev_priv->gart_buffers_offset +
1987 dev_priv->blit_vb->offset +
1988 dev_priv->blit_vb->used;
1989 set_vtx_resource(dev_priv, vb_addr);
1992 draw_auto(dev_priv);
1994 cp_set_surface_sync(dev_priv,
1995 R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
1996 dst_pitch * dy2, dst_gpu_addr);
1998 dev_priv->blit_vb->used += 12 * 4;