2 * Copyright (C) 2010 Lawrence Livermore National Security, LLC.
3 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
4 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
7 * This file is part of the SPL, Solaris Porting Layer.
8 * For details, see <http://zfsonlinux.org/>.
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
24 * Solaris Porting Layer (SPL) Thread Specific Data Implementation.
26 * Thread specific data has implemented using a hash table, this avoids
27 * the need to add a member to the task structure and allows maximum
28 * portability between kernels. This implementation has been optimized
29 * to keep the tsd_set() and tsd_get() times as small as possible.
31 * The majority of the entries in the hash table are for specific tsd
32 * entries. These entries are hashed by the product of their key and
33 * pid because by design the key and pid are guaranteed to be unique.
34 * Their product also has the desirable properly that it will be uniformly
35 * distributed over the hash bins providing neither the pid nor key is zero.
36 * Under linux the zero pid is always the init process and thus won't be
37 * used, and this implementation is careful to never to assign a zero key.
38 * By default the hash table is sized to 512 bins which is expected to
39 * be sufficient for light to moderate usage of thread specific data.
41 * The hash table contains two additional type of entries. They first
42 * type is entry is called a 'key' entry and it is added to the hash during
43 * tsd_create(). It is used to store the address of the destructor function
44 * and it is used as an anchor point. All tsd entries which use the same
45 * key will be linked to this entry. This is used during tsd_destroy() to
46 * quickly call the destructor function for all tsd associated with the key.
47 * The 'key' entry may be looked up with tsd_hash_search() by passing the
48 * key you wish to lookup and DTOR_PID constant as the pid.
50 * The second type of entry is called a 'pid' entry and it is added to the
51 * hash the first time a process set a key. The 'pid' entry is also used
52 * as an anchor and all tsd for the process will be linked to it. This
53 * list is using during tsd_exit() to ensure all registered destructors
54 * are run for the process. The 'pid' entry may be looked up with
55 * tsd_hash_search() by passing the PID_KEY constant as the key, and
56 * the process pid. Note that tsd_exit() is called by thread_exit()
57 * so if your using the Solaris thread API you should not need to call
58 * tsd_exit() directly.
63 #include <sys/thread.h>
65 #include <linux/hash.h>
67 typedef struct tsd_hash_bin {
69 struct hlist_head hb_head;
72 typedef struct tsd_hash_table {
76 tsd_hash_bin_t *ht_bins;
79 typedef struct tsd_hash_entry {
84 struct hlist_node he_list;
85 struct list_head he_key_list;
86 struct list_head he_pid_list;
89 static tsd_hash_table_t *tsd_hash_table = NULL;
93 * tsd_hash_search - searches hash table for tsd_hash_entry
98 static tsd_hash_entry_t *
99 tsd_hash_search(tsd_hash_table_t *table, uint_t key, pid_t pid)
101 struct hlist_node *node = NULL;
102 tsd_hash_entry_t *entry;
106 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
107 bin = &table->ht_bins[hash];
108 spin_lock(&bin->hb_lock);
109 hlist_for_each(node, &bin->hb_head) {
110 entry = list_entry(node, tsd_hash_entry_t, he_list);
111 if ((entry->he_key == key) && (entry->he_pid == pid)) {
112 spin_unlock(&bin->hb_lock);
117 spin_unlock(&bin->hb_lock);
122 * tsd_hash_dtor - call the destructor and free all entries on the list
123 * @work: list of hash entries
125 * For a list of entries which have all already been removed from the
126 * hash call their registered destructor then free the associated memory.
129 tsd_hash_dtor(struct hlist_head *work)
131 tsd_hash_entry_t *entry;
133 while (!hlist_empty(work)) {
134 entry = hlist_entry(work->first, tsd_hash_entry_t, he_list);
135 hlist_del(&entry->he_list);
137 if (entry->he_dtor && entry->he_pid != DTOR_PID)
138 entry->he_dtor(entry->he_value);
140 kmem_free(entry, sizeof (tsd_hash_entry_t));
145 * tsd_hash_add - adds an entry to hash table
150 * The caller is responsible for ensuring the unique key/pid do not
151 * already exist in the hash table. This possible because all entries
152 * are thread specific thus a concurrent thread will never attempt to
153 * add this key/pid. Because multiple bins must be checked to add
154 * links to the dtor and pid entries the entire table is locked.
157 tsd_hash_add(tsd_hash_table_t *table, uint_t key, pid_t pid, void *value)
159 tsd_hash_entry_t *entry, *dtor_entry, *pid_entry;
164 ASSERT3P(tsd_hash_search(table, key, pid), ==, NULL);
166 /* New entry allocate structure, set value, and add to hash */
167 entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
173 entry->he_value = value;
174 INIT_HLIST_NODE(&entry->he_list);
175 INIT_LIST_HEAD(&entry->he_key_list);
176 INIT_LIST_HEAD(&entry->he_pid_list);
178 spin_lock(&table->ht_lock);
180 /* Destructor entry must exist for all valid keys */
181 dtor_entry = tsd_hash_search(table, entry->he_key, DTOR_PID);
182 ASSERT3P(dtor_entry, !=, NULL);
183 entry->he_dtor = dtor_entry->he_dtor;
185 /* Process entry must exist for all valid processes */
186 pid_entry = tsd_hash_search(table, PID_KEY, entry->he_pid);
187 ASSERT3P(pid_entry, !=, NULL);
189 hash = hash_long((ulong_t)key * (ulong_t)pid, table->ht_bits);
190 bin = &table->ht_bins[hash];
191 spin_lock(&bin->hb_lock);
193 /* Add to the hash, key, and pid lists */
194 hlist_add_head(&entry->he_list, &bin->hb_head);
195 list_add(&entry->he_key_list, &dtor_entry->he_key_list);
196 list_add(&entry->he_pid_list, &pid_entry->he_pid_list);
198 spin_unlock(&bin->hb_lock);
199 spin_unlock(&table->ht_lock);
205 * tsd_hash_add_key - adds a destructor entry to the hash table
208 * @dtor: key destructor
210 * For every unique key there is a single entry in the hash which is used
211 * as anchor. All other thread specific entries for this key are linked
212 * to this anchor via the 'he_key_list' list head. On return they keyp
213 * will be set to the next available key for the hash table.
216 tsd_hash_add_key(tsd_hash_table_t *table, uint_t *keyp, dtor_func_t dtor)
218 tsd_hash_entry_t *tmp_entry, *entry;
221 int keys_checked = 0;
223 ASSERT3P(table, !=, NULL);
225 /* Allocate entry to be used as a destructor for this key */
226 entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
230 /* Determine next available key value */
231 spin_lock(&table->ht_lock);
233 /* Limited to TSD_KEYS_MAX concurrent unique keys */
234 if (table->ht_key++ > TSD_KEYS_MAX)
237 /* Ensure failure when all TSD_KEYS_MAX keys are in use */
238 if (keys_checked++ >= TSD_KEYS_MAX) {
239 spin_unlock(&table->ht_lock);
243 tmp_entry = tsd_hash_search(table, table->ht_key, DTOR_PID);
246 /* Add destructor entry in to hash table */
247 entry->he_key = *keyp = table->ht_key;
248 entry->he_pid = DTOR_PID;
249 entry->he_dtor = dtor;
250 entry->he_value = NULL;
251 INIT_HLIST_NODE(&entry->he_list);
252 INIT_LIST_HEAD(&entry->he_key_list);
253 INIT_LIST_HEAD(&entry->he_pid_list);
255 hash = hash_long((ulong_t)*keyp * (ulong_t)DTOR_PID, table->ht_bits);
256 bin = &table->ht_bins[hash];
257 spin_lock(&bin->hb_lock);
259 hlist_add_head(&entry->he_list, &bin->hb_head);
261 spin_unlock(&bin->hb_lock);
262 spin_unlock(&table->ht_lock);
268 * tsd_hash_add_pid - adds a process entry to the hash table
272 * For every process there is a single entry in the hash which is used
273 * as anchor. All other thread specific entries for this process are
274 * linked to this anchor via the 'he_pid_list' list head.
277 tsd_hash_add_pid(tsd_hash_table_t *table, pid_t pid)
279 tsd_hash_entry_t *entry;
283 /* Allocate entry to be used as the process reference */
284 entry = kmem_alloc(sizeof (tsd_hash_entry_t), KM_PUSHPAGE);
288 spin_lock(&table->ht_lock);
289 entry->he_key = PID_KEY;
291 entry->he_dtor = NULL;
292 entry->he_value = NULL;
293 INIT_HLIST_NODE(&entry->he_list);
294 INIT_LIST_HEAD(&entry->he_key_list);
295 INIT_LIST_HEAD(&entry->he_pid_list);
297 hash = hash_long((ulong_t)PID_KEY * (ulong_t)pid, table->ht_bits);
298 bin = &table->ht_bins[hash];
299 spin_lock(&bin->hb_lock);
301 hlist_add_head(&entry->he_list, &bin->hb_head);
303 spin_unlock(&bin->hb_lock);
304 spin_unlock(&table->ht_lock);
310 * tsd_hash_del - delete an entry from hash table, key, and pid lists
316 tsd_hash_del(tsd_hash_table_t *table, tsd_hash_entry_t *entry)
318 hlist_del(&entry->he_list);
319 list_del_init(&entry->he_key_list);
320 list_del_init(&entry->he_pid_list);
324 * tsd_hash_table_init - allocate a hash table
325 * @bits: hash table size
327 * A hash table with 2^bits bins will be created, it may not be resized
328 * after the fact and must be free'd with tsd_hash_table_fini().
330 static tsd_hash_table_t *
331 tsd_hash_table_init(uint_t bits)
333 tsd_hash_table_t *table;
334 int hash, size = (1 << bits);
336 table = kmem_zalloc(sizeof (tsd_hash_table_t), KM_SLEEP);
340 table->ht_bins = kmem_zalloc(sizeof (tsd_hash_bin_t) * size, KM_SLEEP);
341 if (table->ht_bins == NULL) {
342 kmem_free(table, sizeof (tsd_hash_table_t));
346 for (hash = 0; hash < size; hash++) {
347 spin_lock_init(&table->ht_bins[hash].hb_lock);
348 INIT_HLIST_HEAD(&table->ht_bins[hash].hb_head);
351 spin_lock_init(&table->ht_lock);
352 table->ht_bits = bits;
359 * tsd_hash_table_fini - free a hash table
362 * Free a hash table allocated by tsd_hash_table_init(). If the hash
363 * table is not empty this function will call the proper destructor for
364 * all remaining entries before freeing the memory used by those entries.
367 tsd_hash_table_fini(tsd_hash_table_t *table)
371 tsd_hash_entry_t *entry;
374 ASSERT3P(table, !=, NULL);
375 spin_lock(&table->ht_lock);
376 for (i = 0, size = (1 << table->ht_bits); i < size; i++) {
377 bin = &table->ht_bins[i];
378 spin_lock(&bin->hb_lock);
379 while (!hlist_empty(&bin->hb_head)) {
380 entry = hlist_entry(bin->hb_head.first,
381 tsd_hash_entry_t, he_list);
382 tsd_hash_del(table, entry);
383 hlist_add_head(&entry->he_list, &work);
385 spin_unlock(&bin->hb_lock);
387 spin_unlock(&table->ht_lock);
389 tsd_hash_dtor(&work);
390 kmem_free(table->ht_bins, sizeof (tsd_hash_bin_t)*(1<<table->ht_bits));
391 kmem_free(table, sizeof (tsd_hash_table_t));
395 * tsd_remove_entry - remove a tsd entry for this thread
396 * @entry: entry to remove
398 * Remove the thread specific data @entry for this thread.
399 * If this is the last entry for this thread, also remove the PID entry.
402 tsd_remove_entry(tsd_hash_entry_t *entry)
405 tsd_hash_table_t *table;
406 tsd_hash_entry_t *pid_entry;
407 tsd_hash_bin_t *pid_entry_bin, *entry_bin;
410 table = tsd_hash_table;
411 ASSERT3P(table, !=, NULL);
412 ASSERT3P(entry, !=, NULL);
414 spin_lock(&table->ht_lock);
416 hash = hash_long((ulong_t)entry->he_key *
417 (ulong_t)entry->he_pid, table->ht_bits);
418 entry_bin = &table->ht_bins[hash];
420 /* save the possible pid_entry */
421 pid_entry = list_entry(entry->he_pid_list.next, tsd_hash_entry_t,
425 spin_lock(&entry_bin->hb_lock);
426 tsd_hash_del(table, entry);
427 hlist_add_head(&entry->he_list, &work);
428 spin_unlock(&entry_bin->hb_lock);
430 /* if pid_entry is indeed pid_entry, then remove it if it's empty */
431 if (pid_entry->he_key == PID_KEY &&
432 list_empty(&pid_entry->he_pid_list)) {
433 hash = hash_long((ulong_t)pid_entry->he_key *
434 (ulong_t)pid_entry->he_pid, table->ht_bits);
435 pid_entry_bin = &table->ht_bins[hash];
437 spin_lock(&pid_entry_bin->hb_lock);
438 tsd_hash_del(table, pid_entry);
439 hlist_add_head(&pid_entry->he_list, &work);
440 spin_unlock(&pid_entry_bin->hb_lock);
443 spin_unlock(&table->ht_lock);
445 tsd_hash_dtor(&work);
449 * tsd_set - set thread specific data
451 * @value: value to set
453 * Caller must prevent racing tsd_create() or tsd_destroy(), protected
454 * from racing tsd_get() or tsd_set() because it is thread specific.
455 * This function has been optimized to be fast for the update case.
456 * When setting the tsd initially it will be slower due to additional
457 * required locking and potential memory allocations.
460 tsd_set(uint_t key, void *value)
462 tsd_hash_table_t *table;
463 tsd_hash_entry_t *entry;
466 /* mark remove if value is NULL */
467 boolean_t remove = (value == NULL);
469 table = tsd_hash_table;
470 pid = curthread->pid;
471 ASSERT3P(table, !=, NULL);
473 if ((key == 0) || (key > TSD_KEYS_MAX))
476 /* Entry already exists in hash table update value */
477 entry = tsd_hash_search(table, key, pid);
479 entry->he_value = value;
480 /* remove the entry */
482 tsd_remove_entry(entry);
486 /* don't create entry if value is NULL */
490 /* Add a process entry to the hash if not yet exists */
491 entry = tsd_hash_search(table, PID_KEY, pid);
493 rc = tsd_hash_add_pid(table, pid);
498 rc = tsd_hash_add(table, key, pid, value);
501 EXPORT_SYMBOL(tsd_set);
504 * tsd_get - get thread specific data
507 * Caller must prevent racing tsd_create() or tsd_destroy(). This
508 * implementation is designed to be fast and scalable, it does not
509 * lock the entire table only a single hash bin.
514 tsd_hash_entry_t *entry;
516 ASSERT3P(tsd_hash_table, !=, NULL);
518 if ((key == 0) || (key > TSD_KEYS_MAX))
521 entry = tsd_hash_search(tsd_hash_table, key, curthread->pid);
525 return (entry->he_value);
527 EXPORT_SYMBOL(tsd_get);
530 * tsd_get_by_thread - get thread specific data for specified thread
532 * @thread: thread to lookup
534 * Caller must prevent racing tsd_create() or tsd_destroy(). This
535 * implementation is designed to be fast and scalable, it does not
536 * lock the entire table only a single hash bin.
539 tsd_get_by_thread(uint_t key, kthread_t *thread)
541 tsd_hash_entry_t *entry;
543 ASSERT3P(tsd_hash_table, !=, NULL);
545 if ((key == 0) || (key > TSD_KEYS_MAX))
548 entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
552 return (entry->he_value);
554 EXPORT_SYMBOL(tsd_get_by_thread);
557 * tsd_create - create thread specific data key
558 * @keyp: lookup key address
559 * @dtor: destructor called during tsd_destroy() or tsd_exit()
561 * Provided key must be set to 0 or it assumed to be already in use.
562 * The dtor is allowed to be NULL in which case no additional cleanup
563 * for the data is performed during tsd_destroy() or tsd_exit().
565 * Caller must prevent racing tsd_set() or tsd_get(), this function is
566 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
569 tsd_create(uint_t *keyp, dtor_func_t dtor)
571 ASSERT3P(keyp, !=, NULL);
575 (void) tsd_hash_add_key(tsd_hash_table, keyp, dtor);
577 EXPORT_SYMBOL(tsd_create);
580 * tsd_destroy - destroy thread specific data
581 * @keyp: lookup key address
583 * Destroys the thread specific data on all threads which use this key.
585 * Caller must prevent racing tsd_set() or tsd_get(), this function is
586 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
589 tsd_destroy(uint_t *keyp)
592 tsd_hash_table_t *table;
593 tsd_hash_entry_t *dtor_entry, *entry;
594 tsd_hash_bin_t *dtor_entry_bin, *entry_bin;
597 table = tsd_hash_table;
598 ASSERT3P(table, !=, NULL);
600 spin_lock(&table->ht_lock);
601 dtor_entry = tsd_hash_search(table, *keyp, DTOR_PID);
602 if (dtor_entry == NULL) {
603 spin_unlock(&table->ht_lock);
608 * All threads which use this key must be linked off of the
609 * DTOR_PID entry. They are removed from the hash table and
610 * linked in to a private working list to be destroyed.
612 while (!list_empty(&dtor_entry->he_key_list)) {
613 entry = list_entry(dtor_entry->he_key_list.next,
614 tsd_hash_entry_t, he_key_list);
615 ASSERT3U(dtor_entry->he_key, ==, entry->he_key);
616 ASSERT3P(dtor_entry->he_dtor, ==, entry->he_dtor);
618 hash = hash_long((ulong_t)entry->he_key *
619 (ulong_t)entry->he_pid, table->ht_bits);
620 entry_bin = &table->ht_bins[hash];
622 spin_lock(&entry_bin->hb_lock);
623 tsd_hash_del(table, entry);
624 hlist_add_head(&entry->he_list, &work);
625 spin_unlock(&entry_bin->hb_lock);
628 hash = hash_long((ulong_t)dtor_entry->he_key *
629 (ulong_t)dtor_entry->he_pid, table->ht_bits);
630 dtor_entry_bin = &table->ht_bins[hash];
632 spin_lock(&dtor_entry_bin->hb_lock);
633 tsd_hash_del(table, dtor_entry);
634 hlist_add_head(&dtor_entry->he_list, &work);
635 spin_unlock(&dtor_entry_bin->hb_lock);
636 spin_unlock(&table->ht_lock);
638 tsd_hash_dtor(&work);
641 EXPORT_SYMBOL(tsd_destroy);
644 * tsd_exit - destroys all thread specific data for this thread
646 * Destroys all the thread specific data for this thread.
648 * Caller must prevent racing tsd_set() or tsd_get(), this function is
649 * safe from racing tsd_create(), tsd_destroy(), and tsd_exit().
655 tsd_hash_table_t *table;
656 tsd_hash_entry_t *pid_entry, *entry;
657 tsd_hash_bin_t *pid_entry_bin, *entry_bin;
660 table = tsd_hash_table;
661 ASSERT3P(table, !=, NULL);
663 spin_lock(&table->ht_lock);
664 pid_entry = tsd_hash_search(table, PID_KEY, curthread->pid);
665 if (pid_entry == NULL) {
666 spin_unlock(&table->ht_lock);
671 * All keys associated with this pid must be linked off of the
672 * PID_KEY entry. They are removed from the hash table and
673 * linked in to a private working list to be destroyed.
676 while (!list_empty(&pid_entry->he_pid_list)) {
677 entry = list_entry(pid_entry->he_pid_list.next,
678 tsd_hash_entry_t, he_pid_list);
679 ASSERT3U(pid_entry->he_pid, ==, entry->he_pid);
681 hash = hash_long((ulong_t)entry->he_key *
682 (ulong_t)entry->he_pid, table->ht_bits);
683 entry_bin = &table->ht_bins[hash];
685 spin_lock(&entry_bin->hb_lock);
686 tsd_hash_del(table, entry);
687 hlist_add_head(&entry->he_list, &work);
688 spin_unlock(&entry_bin->hb_lock);
691 hash = hash_long((ulong_t)pid_entry->he_key *
692 (ulong_t)pid_entry->he_pid, table->ht_bits);
693 pid_entry_bin = &table->ht_bins[hash];
695 spin_lock(&pid_entry_bin->hb_lock);
696 tsd_hash_del(table, pid_entry);
697 hlist_add_head(&pid_entry->he_list, &work);
698 spin_unlock(&pid_entry_bin->hb_lock);
699 spin_unlock(&table->ht_lock);
701 tsd_hash_dtor(&work);
703 EXPORT_SYMBOL(tsd_exit);
708 tsd_hash_table = tsd_hash_table_init(TSD_HASH_TABLE_BITS_DEFAULT);
709 if (tsd_hash_table == NULL)
718 tsd_hash_table_fini(tsd_hash_table);
719 tsd_hash_table = NULL;