001/*
002 * CDDL HEADER START
003 *
004 * The contents of this file are subject to the terms of the
005 * Common Development and Distribution License, Version 1.0 only
006 * (the "License").  You may not use this file except in compliance
007 * with the License.
008 *
009 * You can obtain a copy of the license at legal-notices/CDDLv1_0.txt
010 * or http://forgerock.org/license/CDDLv1.0.html.
011 * See the License for the specific language governing permissions
012 * and limitations under the License.
013 *
014 * When distributing Covered Code, include this CDDL HEADER in each
015 * file and include the License file at legal-notices/CDDLv1_0.txt.
016 * If applicable, add the following below this CDDL HEADER, with the
017 * fields enclosed by brackets "[]" replaced with your own identifying
018 * information:
019 *      Portions Copyright [yyyy] [name of copyright owner]
020 *
021 * CDDL HEADER END
022 *
023 *
024 *      Copyright 2015 ForgeRock AS.
025 */
026package org.opends.server.types;
027
028import java.util.Iterator;
029import java.util.LinkedList;
030import java.util.concurrent.TimeUnit;
031import java.util.concurrent.atomic.AtomicInteger;
032import java.util.concurrent.locks.Lock;
033import java.util.concurrent.locks.ReentrantReadWriteLock;
034
035import org.forgerock.util.Reject;
036
037/**
038 * A lock manager coordinates directory update operations so that the DIT structure remains in a
039 * consistent state, as well as providing repeatable read isolation. When accessing entries
040 * components need to ensure that they have the appropriate lock:
041 * <ul>
042 * <li>repeatable reads: repeatable read isolation is rarely needed in practice, since all backend
043 * reads are guaranteed to be performed with read-committed isolation, which is normally sufficient.
044 * Specifically, read-only operations such as compare and search do not require any additional
045 * locking. If repeatable read isolation is required then lock the entry using
046 * {@link #tryReadLockEntry(DN)}
047 * <li>modifying an entry: acquire an entry write-lock for the target entry using
048 * {@link #tryWriteLockEntry(DN)}. Updates are typically performed using a read-modify-write cycle,
049 * so the write lock should be acquired before performing the initial read in order to ensure
050 * consistency
051 * <li>adding an entry: client code must acquire an entry write-lock for the target entry using
052 * {@link #tryWriteLockEntry(DN)}. The parent entry will automatically be protected from deletion by
053 * an implicit subtree read lock on the parent
054 * <li>deleting an entry: client code must acquire a subtree write lock for the target entry using
055 * {@link #tryWriteLockSubtree(DN)}
056 * <li>renaming an entry: client code must acquire a subtree write lock for the old entry, and a
057 * subtree write lock for the new entry using {@link #tryWriteLockSubtree(DN)}. Care should be taken
058 * to avoid deadlocks, e.g. by locking the DN which sorts first.
059 * </ul>
060 * In addition, backend implementations may choose to use their own lock manager for enforcing
061 * atomicity and isolation. This is typically the case for backends which cannot take advantage of
062 * atomicity guarantees provided by an underlying DB (the task backend is one such example).
063 * <p>
064 * <b>Implementation Notes</b>
065 * <p>
066 * The lock table is conceptually a cache of locks keyed on DN, i.e. a {@code Map<DN, DNLock>}.
067 * Locks must be kept in the cache while they are locked, but may be removed once they are no longer
068 * locked by any threads. Locks are represented using a pair of read-write locks: the first lock is
069 * the "subtree" lock and the second is the "entry" lock.
070 * <p>
071 * In order to lock an entry for read or write a <b>subtree</b> read lock is first acquired on each
072 * of the parent entries from the root DN down to the immediate parent of the entry to be locked.
073 * Then the appropriate read or write <b>entry</b> lock is acquired for the target entry. Subtree
074 * write locking is performed by acquiring a <b>subtree</b> read lock on each of the parent entries
075 * from the root DN down to the immediate parent of the subtree to be locked. Then a <b>subtree</b>
076 * write lock is acquired for the target subtree.
077 * <p>
078 * The lock table itself is not represented using a {@code ConcurrentHashMap} because the JDK6/7
079 * APIs do not provide the ability to atomically add-and-lock or unlock-and-remove locks (this
080 * capability is provided in JDK8). Instead, we provide our own implementation comprising of a fixed
081 * number of buckets, a bucket being a {@code LinkedList} of {@code DNLock}s. In addition, it is
082 * important to be able to efficiently iterate up and down a chain of hierarchically related locks,
083 * so each lock maintains a reference to its parent lock. Modern directories tend to have a flat
084 * structure so it is also important to avoid contention on "hot" parent DNs. Typically, a lock
085 * attempt against a DN will involve a cache miss for the target DN and a cache hit for the parent,
086 * but the parent will be the same parent for all lock requests, resulting in a lot of contention on
087 * the same lock bucket. To avoid this the lock manager maintains a small-thread local cache of
088 * locks, so that parent locks can be acquired using a lock-free algorithm.
089 * <p>
090 * Since the thread local cache may reference locks which are not actively locked by anyone, a
091 * reference counting mechanism is used in order to prevent cached locks from being removed from the
092 * underlying lock table. The reference counting mechanism is also used for references between a
093 * lock and its parent lock. To summarize, locking a DN involves the following steps:
094 * <ul>
095 * <li>get the lock from the thread local cache. If the lock was not in the thread local cache then
096 * try fetching it from the lock table:
097 * <ul>
098 * <li><i>found</i> - store it in the thread local cache and bump the reference count
099 * <li><i>not found</i> - create a new lock. First fetch the parent lock using the same process,
100 * i.e. looking in the thread local cache, etc. Then create a new lock referencing the parent lock
101 * (bumps the reference count for the parent lock), and store it in the lock table and the thread
102 * local cache with a reference count of 1.
103 * </ul>
104 * <li>return the lock to the application and increment its reference count since the application
105 * now also has a reference to the lock.
106 * </ul>
107 * Locks are dereferenced when they are unlocked, when they are evicted from a thread local cache,
108 * and when a child lock's reference count reaches zero. A lock is completely removed from the lock
109 * table once its reference count reaches zero.
110 */
111@org.opends.server.types.PublicAPI(stability = org.opends.server.types.StabilityLevel.UNCOMMITTED,
112    mayInstantiate = false, mayExtend = false, mayInvoke = true)
113public final class LockManager
114{
115  /**
116   * A lock on an entry or subtree. A lock can only be unlocked once.
117   */
118  public final class DNLock
119  {
120    private final DNLockHolder lock;
121    private final Lock subtreeLock;
122    private final Lock entryLock;
123    private boolean isLocked = true;
124
125    private DNLock(final DNLockHolder lock, final Lock subtreeLock, final Lock entryLock)
126    {
127      this.lock = lock;
128      this.subtreeLock = subtreeLock;
129      this.entryLock = entryLock;
130    }
131
132    @Override
133    public String toString()
134    {
135      return lock.toString();
136    }
137
138    /**
139     * Unlocks this lock and releases any blocked threads.
140     *
141     * @throws IllegalStateException
142     *           If this lock has already been unlocked.
143     */
144    public void unlock()
145    {
146      if (!isLocked)
147      {
148        throw new IllegalStateException("Already unlocked");
149      }
150      lock.releaseParentSubtreeReadLock();
151      subtreeLock.unlock();
152      entryLock.unlock();
153      dereference(lock);
154      isLocked = false;
155    }
156
157    // For unit testing.
158    int refCount()
159    {
160      return lock.refCount.get();
161    }
162  }
163
164  /**
165   * Lock implementation
166   */
167  private final class DNLockHolder
168  {
169    private final AtomicInteger refCount = new AtomicInteger();
170    private final DNLockHolder parent;
171    private final DN dn;
172    private final int dnHashCode;
173    private final ReentrantReadWriteLock subtreeLock = new ReentrantReadWriteLock();
174    private final ReentrantReadWriteLock entryLock = new ReentrantReadWriteLock();
175
176    DNLockHolder(final DNLockHolder parent, final DN dn, final int dnHashCode)
177    {
178      this.parent = parent;
179      this.dn = dn;
180      this.dnHashCode = dnHashCode;
181    }
182
183    @Override
184    public String toString()
185    {
186      return "\"" + dn + "\" : " + refCount;
187    }
188
189    /**
190     * Unlocks the subtree read lock from the parent of this lock up to the root.
191     */
192    void releaseParentSubtreeReadLock()
193    {
194      for (DNLockHolder lock = parent; lock != null; lock = lock.parent)
195      {
196        lock.subtreeLock.readLock().unlock();
197      }
198    }
199
200    DNLock tryReadLockEntry()
201    {
202      return tryLock(subtreeLock.readLock(), entryLock.readLock());
203    }
204
205    DNLock tryWriteLockEntry()
206    {
207      return tryLock(subtreeLock.readLock(), entryLock.writeLock());
208    }
209
210    DNLock tryWriteLockSubtree()
211    {
212      return tryLock(subtreeLock.writeLock(), entryLock.writeLock());
213    }
214
215    /**
216     * Locks the subtree read lock from the root down to the parent of this lock.
217     */
218    private boolean tryAcquireParentSubtreeReadLock()
219    {
220      // First lock the parents of the parent.
221      if (parent == null)
222      {
223        return true;
224      }
225
226      if (!parent.tryAcquireParentSubtreeReadLock())
227      {
228        return false;
229      }
230
231      // Then lock the parent of this lock
232      if (tryLockWithTimeout(parent.subtreeLock.readLock()))
233      {
234        return true;
235      }
236
237      // Failed to grab the parent lock within the timeout, so roll-back the other locks.
238      releaseParentSubtreeReadLock();
239      return false;
240    }
241
242    private DNLock tryLock(final Lock subtreeLock, final Lock entryLock)
243    {
244      if (tryAcquireParentSubtreeReadLock())
245      {
246        if (tryLockWithTimeout(subtreeLock))
247        {
248          if (tryLockWithTimeout(entryLock))
249          {
250            return new DNLock(this, subtreeLock, entryLock);
251          }
252          subtreeLock.unlock();
253        }
254        releaseParentSubtreeReadLock();
255      }
256      // Failed to acquire all the necessary locks within the time out.
257      dereference(this);
258      return null;
259    }
260
261    private boolean tryLockWithTimeout(final Lock lock)
262    {
263      try
264      {
265        return lock.tryLock(lockTimeout, lockTimeoutUnits);
266      }
267      catch (final InterruptedException e)
268      {
269        // Unable to handle interrupts here.
270        Thread.currentThread().interrupt();
271        return false;
272      }
273    }
274  }
275
276  private static final long DEFAULT_LOCK_TIMEOUT = 9;
277  private static final TimeUnit DEFAULT_LOCK_TIMEOUT_UNITS = TimeUnit.SECONDS;
278  private static final int MINIMUM_NUMBER_OF_BUCKETS = 64;
279  private static final int THREAD_LOCAL_CACHE_SIZE = 8;
280
281  private final int numberOfBuckets;
282  private final LinkedList<DNLockHolder>[] lockTable;
283  private final long lockTimeout;
284  private final TimeUnit lockTimeoutUnits;
285
286  // Avoid sub-classing in order to workaround class leaks in app servers.
287  private final ThreadLocal<LinkedList<DNLockHolder>> threadLocalCache = new ThreadLocal<>();
288
289  /**
290   * Creates a new lock manager with a lock timeout of 9 seconds and an automatically chosen number
291   * of lock table buckets based on the number of processors.
292   */
293  public LockManager()
294  {
295    this(DEFAULT_LOCK_TIMEOUT, DEFAULT_LOCK_TIMEOUT_UNITS);
296  }
297
298  /**
299   * Creates a new lock manager with the specified lock timeout and an automatically chosen number
300   * of lock table buckets based on the number of processors.
301   *
302   * @param lockTimeout
303   *          The lock timeout.
304   * @param lockTimeoutUnit
305   *          The lock timeout units.
306   */
307  public LockManager(final long lockTimeout, final TimeUnit lockTimeoutUnit)
308  {
309    this(lockTimeout, lockTimeoutUnit, Runtime.getRuntime().availableProcessors() * 8);
310  }
311
312  /**
313   * Creates a new lock manager with the provided configuration.
314   *
315   * @param lockTimeout
316   *          The lock timeout.
317   * @param lockTimeoutUnit
318   *          The lock timeout units.
319   * @param numberOfBuckets
320   *          The number of buckets to use in the lock table. The minimum number of buckets is 64.
321   */
322  @SuppressWarnings("unchecked")
323  public LockManager(final long lockTimeout, final TimeUnit lockTimeoutUnit, final int numberOfBuckets)
324  {
325    Reject.ifFalse(lockTimeout >= 0, "lockTimeout must be a non-negative integer");
326    Reject.ifNull(lockTimeoutUnit, "lockTimeoutUnit must be non-null");
327    Reject.ifFalse(numberOfBuckets > 0, "numberOfBuckets must be a positive integer");
328
329    this.lockTimeout = lockTimeout;
330    this.lockTimeoutUnits = lockTimeoutUnit;
331    this.numberOfBuckets = getNumberOfBuckets(numberOfBuckets);
332    this.lockTable = new LinkedList[this.numberOfBuckets];
333    for (int i = 0; i < this.numberOfBuckets; i++)
334    {
335      this.lockTable[i] = new LinkedList<>();
336    }
337  }
338
339  @Override
340  public String toString()
341  {
342    final StringBuilder builder = new StringBuilder();
343    for (int i = 0; i < numberOfBuckets; i++)
344    {
345      final LinkedList<DNLockHolder> bucket = lockTable[i];
346      synchronized (bucket)
347      {
348        for (final DNLockHolder lock : bucket)
349        {
350          builder.append(lock);
351          builder.append('\n');
352        }
353      }
354    }
355    return builder.toString();
356  }
357
358  /**
359   * Acquires the read lock for the specified entry. This method will block if the entry is already
360   * write locked or if the entry, or any of its parents, have the subtree write lock taken.
361   *
362   * @param entry
363   *          The entry whose read lock is required.
364   * @return The lock, or {@code null} if the lock attempt timed out.
365   */
366  public DNLock tryReadLockEntry(final DN entry)
367  {
368    return acquireLockFromCache(entry).tryReadLockEntry();
369  }
370
371  /**
372   * Acquires the write lock for the specified entry. This method will block if the entry is already
373   * read or write locked or if the entry, or any of its parents, have the subtree write lock taken.
374   *
375   * @param entry
376   *          The entry whose write lock is required.
377   * @return The lock, or {@code null} if the lock attempt timed out.
378   */
379  public DNLock tryWriteLockEntry(final DN entry)
380  {
381    return acquireLockFromCache(entry).tryWriteLockEntry();
382  }
383
384  /**
385   * Acquires the write lock for the specified subtree. This method will block if any entry or
386   * subtree within the subtree is already read or write locked or if any of the parent entries of
387   * the subtree have the subtree write lock taken.
388   *
389   * @param subtree
390   *          The subtree whose write lock is required.
391   * @return The lock, or {@code null} if the lock attempt timed out.
392   */
393  public DNLock tryWriteLockSubtree(final DN subtree)
394  {
395    return acquireLockFromCache(subtree).tryWriteLockSubtree();
396  }
397
398  // For unit testing.
399  int getLockTableRefCountFor(final DN dn)
400  {
401    final int dnHashCode = dn.hashCode();
402    final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
403    synchronized (bucket)
404    {
405      for (final DNLockHolder lock : bucket)
406      {
407        if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
408        {
409          return lock.refCount.get();
410        }
411      }
412      return -1;
413    }
414  }
415
416  //For unit testing.
417  int getThreadLocalCacheRefCountFor(final DN dn)
418  {
419    final LinkedList<DNLockHolder> cache = threadLocalCache.get();
420    if (cache == null)
421    {
422      return -1;
423    }
424    final int dnHashCode = dn.hashCode();
425    for (final DNLockHolder lock : cache)
426    {
427      if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
428      {
429        return lock.refCount.get();
430      }
431    }
432    return -1;
433  }
434
435  private DNLockHolder acquireLockFromCache(final DN dn)
436  {
437    LinkedList<DNLockHolder> cache = threadLocalCache.get();
438    if (cache == null)
439    {
440      cache = new LinkedList<>();
441      threadLocalCache.set(cache);
442    }
443    return acquireLockFromCache0(dn, cache);
444  }
445
446  private DNLockHolder acquireLockFromCache0(final DN dn, final LinkedList<DNLockHolder> cache)
447  {
448    final int dnHashCode = dn.hashCode();
449    DNLockHolder lock = removeLock(cache, dn, dnHashCode);
450    if (lock == null)
451    {
452      lock = acquireLockFromLockTable(dn, dnHashCode, cache);
453      if (cache.size() >= THREAD_LOCAL_CACHE_SIZE)
454      {
455        // Cache too big: evict oldest entry.
456        dereference(cache.removeLast());
457      }
458    }
459    cache.addFirst(lock); // optimize for LRU
460    lock.refCount.incrementAndGet();
461    return lock;
462  }
463
464  private DNLockHolder acquireLockFromLockTable(final DN dn, final int dnHashCode, final LinkedList<DNLockHolder> cache)
465  {
466    /*
467     * The lock doesn't exist yet so we'll have to create a new one referencing its parent lock. The
468     * parent lock may not yet exist in the lock table either so acquire it before locking the
469     * bucket in order to avoid deadlocks resulting from reentrant bucket locks. Note that we
470     * pre-emptively fetch the parent lock because experiments show that the requested child lock is
471     * almost never in the lock-table. Specifically, this method is only called if we are already on
472     * the slow path due to a cache miss in the thread-local cache.
473     */
474    final DN parentDN = dn.parent();
475    final DNLockHolder parentLock = parentDN != null ? acquireLockFromCache0(parentDN, cache) : null;
476    boolean parentLockWasUsed = false;
477    try
478    {
479      final LinkedList<DNLockHolder> bucket = getBucket(dnHashCode);
480      synchronized (bucket)
481      {
482        DNLockHolder lock = removeLock(bucket, dn, dnHashCode);
483        if (lock == null)
484        {
485          lock = new DNLockHolder(parentLock, dn, dnHashCode);
486          parentLockWasUsed = true;
487        }
488        bucket.addFirst(lock); // optimize for LRU
489        lock.refCount.incrementAndGet();
490        return lock;
491      }
492    }
493    finally
494    {
495      if (!parentLockWasUsed && parentLock != null)
496      {
497        dereference(parentLock);
498      }
499    }
500  }
501
502  private void dereference(final DNLockHolder lock)
503  {
504    if (lock.refCount.decrementAndGet() <= 0)
505    {
506      final LinkedList<DNLockHolder> bucket = getBucket(lock.dnHashCode);
507      boolean lockWasRemoved = false;
508      synchronized (bucket)
509      {
510        // Double check: another thread could have acquired the lock since we decremented it to zero.
511        if (lock.refCount.get() <= 0)
512        {
513          removeLock(bucket, lock.dn, lock.dnHashCode);
514          lockWasRemoved = true;
515        }
516      }
517
518      /*
519       * Dereference the parent outside of the bucket lock to avoid potential deadlocks due to
520       * reentrant bucket locks.
521       */
522      if (lockWasRemoved && lock.parent != null)
523      {
524        dereference(lock.parent);
525      }
526    }
527  }
528
529  private LinkedList<DNLockHolder> getBucket(final int dnHashCode)
530  {
531    return lockTable[dnHashCode & numberOfBuckets - 1];
532  }
533
534  /*
535   * Ensure that the number of buckets is a power of 2 in order to make it easier to map hash codes
536   * to bucket indexes.
537   */
538  private int getNumberOfBuckets(final int buckets)
539  {
540    final int roundedNumberOfBuckets = Math.min(buckets, MINIMUM_NUMBER_OF_BUCKETS);
541    int powerOf2 = 1;
542    while (powerOf2 < roundedNumberOfBuckets)
543    {
544      powerOf2 <<= 1;
545    }
546    return powerOf2;
547  }
548
549  private DNLockHolder removeLock(final LinkedList<DNLockHolder> lockList, final DN dn, final int dnHashCode)
550  {
551    final Iterator<DNLockHolder> iterator = lockList.iterator();
552    while (iterator.hasNext())
553    {
554      final DNLockHolder lock = iterator.next();
555      if (lock.dnHashCode == dnHashCode && lock.dn.equals(dn))
556      {
557        // Found: remove the lock because it will be moved to the front of the list.
558        iterator.remove();
559        return lock;
560      }
561    }
562    return null;
563  }
564}