sys/compat/ndis/subr_hal.c

   1 /*-
   2  * Copyright (c) 2003
   3  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by Bill Paul.
  16  * 4. Neither the name of the author nor the names of any co-contributors
  17  *    may be used to endorse or promote products derived from this software
  18  *    without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  23  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  30  * THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32
  33 #include <sys/cdefs.h>
  34 __FBSDID("$FreeBSD$");
  35
  36 #include <sys/param.h>
  37 #include <sys/types.h>
  38 #include <sys/errno.h>
  39
  40 #include <sys/callout.h>
  41 #include <sys/kernel.h>
  42 #include <sys/lock.h>
  43 #include <sys/mutex.h>
  44 #include <sys/proc.h>
  45 #include <sys/sched.h>
  46 #include <sys/module.h>
  47
  48 #include <sys/systm.h>
  49 #include <machine/bus.h>
  50
  51 #include <sys/bus.h>
  52 #include <sys/rman.h>
  53
  54 #include <compat/ndis/pe_var.h>
  55 #include <compat/ndis/resource_var.h>
  56 #include <compat/ndis/cfg_var.h>
  57 #include <compat/ndis/ntoskrnl_var.h>
  58 #include <compat/ndis/hal_var.h>
  59
  60 static void KeStallExecutionProcessor(uint32_t);
  61 static void WRITE_PORT_BUFFER_ULONG(uint32_t *,
  62         uint32_t *, uint32_t);
  63 static void WRITE_PORT_BUFFER_USHORT(uint16_t *,
  64         uint16_t *, uint32_t);
  65 static void WRITE_PORT_BUFFER_UCHAR(uint8_t *,
  66         uint8_t *, uint32_t);
  67 static void WRITE_PORT_ULONG(uint32_t *, uint32_t);
  68 static void WRITE_PORT_USHORT(uint16_t *, uint16_t);
  69 static void WRITE_PORT_UCHAR(uint8_t *, uint8_t);
  70 static uint32_t READ_PORT_ULONG(uint32_t *);
  71 static uint16_t READ_PORT_USHORT(uint16_t *);
  72 static uint8_t READ_PORT_UCHAR(uint8_t *);
  73 static void READ_PORT_BUFFER_ULONG(uint32_t *,
  74         uint32_t *, uint32_t);
  75 static void READ_PORT_BUFFER_USHORT(uint16_t *,
  76         uint16_t *, uint32_t);
  77 static void READ_PORT_BUFFER_UCHAR(uint8_t *,
  78         uint8_t *, uint32_t);
  79 static uint64_t KeQueryPerformanceCounter(uint64_t *);
  80 static void _KeLowerIrql(uint8_t);
  81 static uint8_t KeRaiseIrqlToDpcLevel(void);
  82 static void dummy (void);
  83
  84 #define NDIS_MAXCPUS 64
  85 static struct mtx disp_lock[NDIS_MAXCPUS];
  86
  87 int
  88 hal_libinit()
  89 {
  90         image_patch_table       *patch;
  91         int                     i;
  92
  93         for (i = 0; i < NDIS_MAXCPUS; i++)
  94                 mtx_init(&disp_lock[i], "HAL preemption lock",
  95                     "HAL lock", MTX_RECURSE|MTX_DEF);
  96
  97         patch = hal_functbl;
  98         while (patch->ipt_func != NULL) {
  99                 windrv_wrap((funcptr)patch->ipt_func,
 100                     (funcptr *)&patch->ipt_wrap,
 101                     patch->ipt_argcnt, patch->ipt_ftype);
 102                 patch++;
 103         }
 104
 105
 106         return(0);
 107 }
 108
 109 int
 110 hal_libfini()
 111 {
 112         image_patch_table       *patch;
 113         int                     i;
 114
 115         for (i = 0; i < NDIS_MAXCPUS; i++)
 116                 mtx_destroy(&disp_lock[i]);
 117
 118         patch = hal_functbl;
 119         while (patch->ipt_func != NULL) {
 120                 windrv_unwrap(patch->ipt_wrap);
 121                 patch++;
 122         }
 123
 124         return(0);
 125 }
 126
 127 static void
 128 KeStallExecutionProcessor(usecs)
 129         uint32_t                usecs;
 130 {
 131         DELAY(usecs);
 132         return;
 133 }
 134
 135 static void
 136 WRITE_PORT_ULONG(port, val)
 137         uint32_t                *port;
 138         uint32_t                val;
 139 {
 140         bus_space_write_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
 141         return;
 142 }
 143
 144 static void
 145 WRITE_PORT_USHORT(port, val)
 146         uint16_t                *port;
 147         uint16_t                val;
 148 {
 149         bus_space_write_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
 150         return;
 151 }
 152
 153 static void
 154 WRITE_PORT_UCHAR(port, val)
 155         uint8_t                 *port;
 156         uint8_t                 val;
 157 {
 158         bus_space_write_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
 159         return;
 160 }
 161
 162 static void
 163 WRITE_PORT_BUFFER_ULONG(port, val, cnt)
 164         uint32_t                *port;
 165         uint32_t                *val;
 166         uint32_t                cnt;
 167 {
 168         bus_space_write_multi_4(NDIS_BUS_SPACE_IO, 0x0,
 169             (bus_size_t)port, val, cnt);
 170         return;
 171 }
 172
 173 static void
 174 WRITE_PORT_BUFFER_USHORT(port, val, cnt)
 175         uint16_t                *port;
 176         uint16_t                *val;
 177         uint32_t                cnt;
 178 {
 179         bus_space_write_multi_2(NDIS_BUS_SPACE_IO, 0x0,
 180             (bus_size_t)port, val, cnt);
 181         return;
 182 }
 183
 184 static void
 185 WRITE_PORT_BUFFER_UCHAR(port, val, cnt)
 186         uint8_t                 *port;
 187         uint8_t                 *val;
 188         uint32_t                cnt;
 189 {
 190         bus_space_write_multi_1(NDIS_BUS_SPACE_IO, 0x0,
 191             (bus_size_t)port, val, cnt);
 192         return;
 193 }
 194
 195 static uint16_t
 196 READ_PORT_USHORT(port)
 197         uint16_t                *port;
 198 {
 199         return(bus_space_read_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
 200 }
 201
 202 static uint32_t
 203 READ_PORT_ULONG(port)
 204         uint32_t                *port;
 205 {
 206         return(bus_space_read_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
 207 }
 208
 209 static uint8_t
 210 READ_PORT_UCHAR(port)
 211         uint8_t                 *port;
 212 {
 213         return(bus_space_read_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
 214 }
 215
 216 static void
 217 READ_PORT_BUFFER_ULONG(port, val, cnt)
 218         uint32_t                *port;
 219         uint32_t                *val;
 220         uint32_t                cnt;
 221 {
 222         bus_space_read_multi_4(NDIS_BUS_SPACE_IO, 0x0,
 223             (bus_size_t)port, val, cnt);
 224         return;
 225 }
 226
 227 static void
 228 READ_PORT_BUFFER_USHORT(port, val, cnt)
 229         uint16_t                *port;
 230         uint16_t                *val;
 231         uint32_t                cnt;
 232 {
 233         bus_space_read_multi_2(NDIS_BUS_SPACE_IO, 0x0,
 234             (bus_size_t)port, val, cnt);
 235         return;
 236 }
 237
 238 static void
 239 READ_PORT_BUFFER_UCHAR(port, val, cnt)
 240         uint8_t                 *port;
 241         uint8_t                 *val;
 242         uint32_t                cnt;
 243 {
 244         bus_space_read_multi_1(NDIS_BUS_SPACE_IO, 0x0,
 245             (bus_size_t)port, val, cnt);
 246         return;
 247 }
 248
 249 /*
 250  * The spinlock implementation in Windows differs from that of FreeBSD.
 251  * The basic operation of spinlocks involves two steps: 1) spin in a
 252  * tight loop while trying to acquire a lock, 2) after obtaining the
 253  * lock, disable preemption. (Note that on uniprocessor systems, you're
 254  * allowed to skip the first step and just lock out pre-emption, since
 255  * it's not possible for you to be in contention with another running
 256  * thread.) Later, you release the lock then re-enable preemption.
 257  * The difference between Windows and FreeBSD lies in how preemption
 258  * is disabled. In FreeBSD, it's done using critical_enter(), which on
 259  * the x86 arch translates to a cli instruction. This masks off all
 260  * interrupts, and effectively stops the scheduler from ever running
 261  * so _nothing_ can execute except the current thread. In Windows,
 262  * preemption is disabled by raising the processor IRQL to DISPATCH_LEVEL.
 263  * This stops other threads from running, but does _not_ block device
 264  * interrupts. This means ISRs can still run, and they can make other
 265  * threads runable, but those other threads won't be able to execute
 266  * until the current thread lowers the IRQL to something less than
 267  * DISPATCH_LEVEL.
 268  *
 269  * There's another commonly used IRQL in Windows, which is APC_LEVEL.
 270  * An APC is an Asynchronous Procedure Call, which differs from a DPC
 271  * (Defered Procedure Call) in that a DPC is queued up to run in
 272  * another thread, while an APC runs in the thread that scheduled
 273  * it (similar to a signal handler in a UNIX process). We don't
 274  * actually support the notion of APCs in FreeBSD, so for now, the
 275  * only IRQLs we're interested in are DISPATCH_LEVEL and PASSIVE_LEVEL.
 276  *
 277  * To simulate DISPATCH_LEVEL, we raise the current thread's priority
 278  * to PI_REALTIME, which is the highest we can give it. This should,
 279  * if I understand things correctly, prevent anything except for an
 280  * interrupt thread from preempting us. PASSIVE_LEVEL is basically
 281  * everything else.
 282  *
 283  * Be aware that, at least on the x86 arch, the Windows spinlock
 284  * functions are divided up in peculiar ways. The actual spinlock
 285  * functions are KfAcquireSpinLock() and KfReleaseSpinLock(), and
 286  * they live in HAL.dll. Meanwhile, KeInitializeSpinLock(),
 287  * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
 288  * live in ntoskrnl.exe. Most Windows source code will call
 289  * KeAcquireSpinLock() and KeReleaseSpinLock(), but these are just
 290  * macros that call KfAcquireSpinLock() and KfReleaseSpinLock().
 291  * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
 292  * perform the lock aquisition/release functions without doing the
 293  * IRQL manipulation, and are used when one is already running at
 294  * DISPATCH_LEVEL. Make sense? Good.
 295  *
 296  * According to the Microsoft documentation, any thread that calls
 297  * KeAcquireSpinLock() must be running at IRQL <= DISPATCH_LEVEL. If
 298  * we detect someone trying to acquire a spinlock from DEVICE_LEVEL
 299  * or HIGH_LEVEL, we panic.
 300  *
 301  * Alternate sleep-lock-based spinlock implementation
 302  * --------------------------------------------------
 303  *
 304  * The earlier spinlock implementation was arguably a bit of a hack
 305  * and presented several problems. It was basically designed to provide
 306  * the functionality of spinlocks without incurring the wrath of
 307  * WITNESS. We could get away with using both our spinlock implementation
 308  * and FreeBSD sleep locks at the same time, but if WITNESS knew what
 309  * we were really up to, it would have spanked us rather severely.
 310  *
 311  * There's another method we can use based entirely on sleep locks.
 312  * First, it's important to realize that everything we're locking
 313  * resides inside Project Evil itself: any critical data being locked
 314  * by drivers belongs to the drivers, and should not be referenced
 315  * by any other OS code outside of the NDISulator. The priority-based
 316  * locking scheme has system-wide effects, just like real spinlocks
 317  * (blocking preemption affects the whole CPU), but since we keep all
 318  * our critical data private, we can use a simpler mechanism that
 319  * affects only code/threads directly related to Project Evil.
 320  *
 321  * The idea is to create a sleep lock mutex for each CPU in the system.
 322  * When a CPU running in the NDISulator wants to acquire a spinlock, it
 323  * does the following:
 324  * - Pin ourselves to the current CPU
 325  * - Acquire the mutex for the current CPU
 326  * - Spin on the spinlock variable using atomic test and set, just like
 327  *   a real spinlock.
 328  * - Once we have the lock, we execute our critical code
 329  *
 330  * To give up the lock, we do:
 331  * - Clear the spinlock variable with an atomic op
 332  * - Release the per-CPU mutex
 333  * - Unpin ourselves from the current CPU.
 334  *
 335  * On a uniprocessor system, this means all threads that access protected
 336  * data are serialized through the per-CPU mutex. After one thread
 337  * acquires the 'spinlock,' any other thread that uses a spinlock on the
 338  * current CPU will block on the per-CPU mutex, which has the same general
 339  * effect of blocking pre-emption, but _only_ for those threads that are
 340  * running NDISulator code.
 341  *
 342  * On a multiprocessor system, threads on different CPUs all block on
 343  * their respective per-CPU mutex, and the atomic test/set operation
 344  * on the spinlock variable provides inter-CPU synchronization, though
 345  * only for threads running NDISulator code.
 346  *
 347  * This method solves an important problem. In Windows, you're allowed
 348  * to do an ExAllocatePoolWithTag() with a spinlock held, provided you
 349  * allocate from NonPagedPool. This implies an atomic heap allocation
 350  * that will not cause the current thread to sleep. (You can't sleep
 351  * while holding real spinlock: clowns will eat you.) But in FreeBSD,
 352  * malloc(9) _always_ triggers the acquisition of a sleep lock, even
 353  * when you use M_NOWAIT. This is not a problem for FreeBSD native
 354  * code: you're allowed to sleep in things like interrupt threads. But
 355  * it is a problem with the old priority-based spinlock implementation:
 356  * even though we get away with it most of the time, we really can't
 357  * do a malloc(9) after doing a KeAcquireSpinLock() or KeRaiseIrql().
 358  * With the new implementation, it's not a problem: you're allowed to
 359  * acquire more than one sleep lock (as long as you avoid lock order
 360  * reversals).
 361  *
 362  * The one drawback to this approach is that now we have a lot of
 363  * contention on one per-CPU mutex within the NDISulator code. Whether
 364  * or not this is preferable to the expected Windows spinlock behavior
 365  * of blocking pre-emption is debatable.
 366  */
 367
 368 uint8_t
 369 KfAcquireSpinLock(lock)
 370         kspin_lock              *lock;
 371 {
 372         uint8_t                 oldirql;
 373
 374         KeRaiseIrql(DISPATCH_LEVEL, &oldirql);
 375         KeAcquireSpinLockAtDpcLevel(lock);
 376
 377         return(oldirql);
 378 }
 379
 380 void
 381 KfReleaseSpinLock(lock, newirql)
 382         kspin_lock              *lock;
 383         uint8_t                 newirql;
 384 {
 385         KeReleaseSpinLockFromDpcLevel(lock);
 386         KeLowerIrql(newirql);
 387
 388         return;
 389 }
 390
 391 uint8_t
 392 KeGetCurrentIrql()
 393 {
 394         if (mtx_owned(&disp_lock[curthread->td_oncpu]))
 395                 return(DISPATCH_LEVEL);
 396         return(PASSIVE_LEVEL);
 397 }
 398
 399 static uint64_t
 400 KeQueryPerformanceCounter(freq)
 401         uint64_t                *freq;
 402 {
 403         if (freq != NULL)
 404                 *freq = hz;
 405
 406         return((uint64_t)ticks);
 407 }
 408
 409 uint8_t
 410 KfRaiseIrql(irql)
 411         uint8_t                 irql;
 412 {
 413         uint8_t                 oldirql;
 414
 415         oldirql = KeGetCurrentIrql();
 416
 417         /* I am so going to hell for this. */
 418         if (oldirql > irql)
 419                 panic("IRQL_NOT_LESS_THAN");
 420
 421         if (oldirql != DISPATCH_LEVEL) {
 422                 sched_pin();
 423                 mtx_lock(&disp_lock[curthread->td_oncpu]);
 424         }
 425 /*printf("RAISE IRQL: %d %d\n", irql, oldirql);*/
 426
 427         return(oldirql);
 428 }
 429
 430 void
 431 KfLowerIrql(oldirql)
 432         uint8_t                 oldirql;
 433 {
 434         if (oldirql == DISPATCH_LEVEL)
 435                 return;
 436
 437         if (KeGetCurrentIrql() != DISPATCH_LEVEL)
 438                 panic("IRQL_NOT_GREATER_THAN");
 439
 440         mtx_unlock(&disp_lock[curthread->td_oncpu]);
 441         sched_unpin();
 442
 443         return;
 444 }
 445
 446 static uint8_t
 447 KeRaiseIrqlToDpcLevel(void)
 448 {
 449         uint8_t                 irql;
 450
 451         KeRaiseIrql(DISPATCH_LEVEL, &irql);
 452         return(irql);
 453 }
 454
 455 static void
 456 _KeLowerIrql(oldirql)
 457         uint8_t                 oldirql;
 458 {
 459         KeLowerIrql(oldirql);
 460         return;
 461 }
 462
 463 static void dummy()
 464 {
 465         printf ("hal dummy called...\n");
 466         return;
 467 }
 468
 469 image_patch_table hal_functbl[] = {
 470         IMPORT_SFUNC(KeStallExecutionProcessor, 1),
 471         IMPORT_SFUNC(WRITE_PORT_ULONG, 2),
 472         IMPORT_SFUNC(WRITE_PORT_USHORT, 2),
 473         IMPORT_SFUNC(WRITE_PORT_UCHAR, 2),
 474         IMPORT_SFUNC(WRITE_PORT_BUFFER_ULONG, 3),
 475         IMPORT_SFUNC(WRITE_PORT_BUFFER_USHORT, 3),
 476         IMPORT_SFUNC(WRITE_PORT_BUFFER_UCHAR, 3),
 477         IMPORT_SFUNC(READ_PORT_ULONG, 1),
 478         IMPORT_SFUNC(READ_PORT_USHORT, 1),
 479         IMPORT_SFUNC(READ_PORT_UCHAR, 1),
 480         IMPORT_SFUNC(READ_PORT_BUFFER_ULONG, 3),
 481         IMPORT_SFUNC(READ_PORT_BUFFER_USHORT, 3),
 482         IMPORT_SFUNC(READ_PORT_BUFFER_UCHAR, 3),
 483         IMPORT_FFUNC(KfAcquireSpinLock, 1),
 484         IMPORT_FFUNC(KfReleaseSpinLock, 1),
 485         IMPORT_SFUNC(KeGetCurrentIrql, 0),
 486         IMPORT_SFUNC(KeQueryPerformanceCounter, 1),
 487         IMPORT_FFUNC(KfLowerIrql, 1),
 488         IMPORT_FFUNC(KfRaiseIrql, 1),
 489         IMPORT_SFUNC(KeRaiseIrqlToDpcLevel, 0),
 490 #undef KeLowerIrql
 491         IMPORT_SFUNC_MAP(KeLowerIrql, _KeLowerIrql, 1),
 492
 493         /*
 494          * This last entry is a catch-all for any function we haven't
 495          * implemented yet. The PE import list patching routine will
 496          * use it for any function that doesn't have an explicit match
 497          * in this table.
 498          */
 499
 500         { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
 501
 502         /* End of list. */
 503
 504         { NULL, NULL, NULL }
 505 };