mirror of
https://github.com/ossrs/srs.git
synced 2025-03-09 15:49:59 +00:00
Squash: Fix bugs
This commit is contained in:
parent
10d188faab
commit
716e578a19
382 changed files with 170096 additions and 220 deletions
510
trunk/3rdparty/gperftools-2-fit/src/thread_cache.h
vendored
Normal file
510
trunk/3rdparty/gperftools-2-fit/src/thread_cache.h
vendored
Normal file
|
@ -0,0 +1,510 @@
|
|||
// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
|
||||
// Copyright (c) 2008, Google Inc.
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// ---
|
||||
// Author: Sanjay Ghemawat <opensource@google.com>
|
||||
|
||||
#ifndef TCMALLOC_THREAD_CACHE_H_
|
||||
#define TCMALLOC_THREAD_CACHE_H_
|
||||
|
||||
#include <config.h>
|
||||
#ifdef HAVE_PTHREAD
|
||||
#include <pthread.h> // for pthread_t, pthread_key_t
|
||||
#endif
|
||||
#include <stddef.h> // for size_t, NULL
|
||||
#ifdef HAVE_STDINT_H
|
||||
#include <stdint.h> // for uint32_t, uint64_t
|
||||
#endif
|
||||
#include <sys/types.h> // for ssize_t
|
||||
#include "base/commandlineflags.h"
|
||||
#include "common.h"
|
||||
#include "linked_list.h"
|
||||
#include "maybe_threads.h"
|
||||
#include "page_heap_allocator.h"
|
||||
#include "sampler.h"
|
||||
#include "static_vars.h"
|
||||
|
||||
#include "common.h" // for SizeMap, kMaxSize, etc
|
||||
#include "internal_logging.h" // for ASSERT, etc
|
||||
#include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc
|
||||
#include "page_heap_allocator.h" // for PageHeapAllocator
|
||||
#include "sampler.h" // for Sampler
|
||||
#include "static_vars.h" // for Static
|
||||
|
||||
DECLARE_int64(tcmalloc_sample_parameter);
|
||||
|
||||
namespace tcmalloc {
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// Data kept per thread
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
class ThreadCache {
|
||||
public:
|
||||
#ifdef HAVE_TLS
|
||||
enum { have_tls = true };
|
||||
#else
|
||||
enum { have_tls = false };
|
||||
#endif
|
||||
|
||||
void Init(pthread_t tid);
|
||||
void Cleanup();
|
||||
|
||||
// Accessors (mostly just for printing stats)
|
||||
int freelist_length(uint32 cl) const { return list_[cl].length(); }
|
||||
|
||||
// Total byte size in cache
|
||||
size_t Size() const { return size_; }
|
||||
|
||||
// Allocate an object of the given size and class. The size given
|
||||
// must be the same as the size of the class in the size map.
|
||||
void* Allocate(size_t size, uint32 cl, void *(*oom_handler)(size_t size));
|
||||
void Deallocate(void* ptr, uint32 size_class);
|
||||
|
||||
void Scavenge();
|
||||
|
||||
int GetSamplePeriod();
|
||||
|
||||
// Record allocation of "k" bytes. Return true iff allocation
|
||||
// should be sampled
|
||||
bool SampleAllocation(size_t k);
|
||||
|
||||
bool TryRecordAllocationFast(size_t k);
|
||||
|
||||
static void InitModule();
|
||||
static void InitTSD();
|
||||
static ThreadCache* GetThreadHeap();
|
||||
static ThreadCache* GetCache();
|
||||
static ThreadCache* GetCacheIfPresent();
|
||||
static ThreadCache* GetFastPathCache();
|
||||
static ThreadCache* GetCacheWhichMustBePresent();
|
||||
static ThreadCache* CreateCacheIfNecessary();
|
||||
static void BecomeIdle();
|
||||
static void BecomeTemporarilyIdle();
|
||||
static void SetUseEmergencyMalloc();
|
||||
static void ResetUseEmergencyMalloc();
|
||||
static bool IsUseEmergencyMalloc();
|
||||
|
||||
// Return the number of thread heaps in use.
|
||||
static inline int HeapsInUse();
|
||||
|
||||
// Adds to *total_bytes the total number of bytes used by all thread heaps.
|
||||
// Also, if class_count is not NULL, it must be an array of size kNumClasses,
|
||||
// and this function will increment each element of class_count by the number
|
||||
// of items in all thread-local freelists of the corresponding size class.
|
||||
// REQUIRES: Static::pageheap_lock is held.
|
||||
static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
|
||||
|
||||
// Sets the total thread cache size to new_size, recomputing the
|
||||
// individual thread cache sizes as necessary.
|
||||
// REQUIRES: Static::pageheap lock is held.
|
||||
static void set_overall_thread_cache_size(size_t new_size);
|
||||
static size_t overall_thread_cache_size() {
|
||||
return overall_thread_cache_size_;
|
||||
}
|
||||
|
||||
private:
|
||||
class FreeList {
|
||||
private:
|
||||
void* list_; // Linked list of nodes
|
||||
|
||||
#ifdef _LP64
|
||||
// On 64-bit hardware, manipulating 16-bit values may be slightly slow.
|
||||
uint32_t length_; // Current length.
|
||||
uint32_t lowater_; // Low water mark for list length.
|
||||
uint32_t max_length_; // Dynamic max list length based on usage.
|
||||
// Tracks the number of times a deallocation has caused
|
||||
// length_ > max_length_. After the kMaxOverages'th time, max_length_
|
||||
// shrinks and length_overages_ is reset to zero.
|
||||
uint32_t length_overages_;
|
||||
#else
|
||||
// If we aren't using 64-bit pointers then pack these into less space.
|
||||
uint16_t length_;
|
||||
uint16_t lowater_;
|
||||
uint16_t max_length_;
|
||||
uint16_t length_overages_;
|
||||
#endif
|
||||
|
||||
int32_t size_;
|
||||
|
||||
public:
|
||||
void Init(size_t size) {
|
||||
list_ = NULL;
|
||||
length_ = 0;
|
||||
lowater_ = 0;
|
||||
max_length_ = 1;
|
||||
length_overages_ = 0;
|
||||
size_ = size;
|
||||
}
|
||||
|
||||
// Return current length of list
|
||||
size_t length() const {
|
||||
return length_;
|
||||
}
|
||||
|
||||
int32_t object_size() const {
|
||||
return size_;
|
||||
}
|
||||
|
||||
// Return the maximum length of the list.
|
||||
size_t max_length() const {
|
||||
return max_length_;
|
||||
}
|
||||
|
||||
// Set the maximum length of the list. If 'new_max' > length(), the
|
||||
// client is responsible for removing objects from the list.
|
||||
void set_max_length(size_t new_max) {
|
||||
max_length_ = new_max;
|
||||
}
|
||||
|
||||
// Return the number of times that length() has gone over max_length().
|
||||
size_t length_overages() const {
|
||||
return length_overages_;
|
||||
}
|
||||
|
||||
void set_length_overages(size_t new_count) {
|
||||
length_overages_ = new_count;
|
||||
}
|
||||
|
||||
// Is list empty?
|
||||
bool empty() const {
|
||||
return list_ == NULL;
|
||||
}
|
||||
|
||||
// Low-water mark management
|
||||
int lowwatermark() const { return lowater_; }
|
||||
void clear_lowwatermark() { lowater_ = length_; }
|
||||
|
||||
uint32_t Push(void* ptr) {
|
||||
uint32_t length = length_ + 1;
|
||||
SLL_Push(&list_, ptr);
|
||||
length_ = length;
|
||||
return length;
|
||||
}
|
||||
|
||||
void* Pop() {
|
||||
ASSERT(list_ != NULL);
|
||||
length_--;
|
||||
if (length_ < lowater_) lowater_ = length_;
|
||||
return SLL_Pop(&list_);
|
||||
}
|
||||
|
||||
bool TryPop(void **rv) {
|
||||
if (SLL_TryPop(&list_, rv)) {
|
||||
length_--;
|
||||
if (PREDICT_FALSE(length_ < lowater_)) lowater_ = length_;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void* Next() {
|
||||
return SLL_Next(&list_);
|
||||
}
|
||||
|
||||
void PushRange(int N, void *start, void *end) {
|
||||
SLL_PushRange(&list_, start, end);
|
||||
length_ += N;
|
||||
}
|
||||
|
||||
void PopRange(int N, void **start, void **end) {
|
||||
SLL_PopRange(&list_, N, start, end);
|
||||
ASSERT(length_ >= N);
|
||||
length_ -= N;
|
||||
if (length_ < lowater_) lowater_ = length_;
|
||||
}
|
||||
};
|
||||
|
||||
// Gets and returns an object from the central cache, and, if possible,
|
||||
// also adds some objects of that size class to this thread cache.
|
||||
void* FetchFromCentralCache(uint32 cl, int32_t byte_size,
|
||||
void *(*oom_handler)(size_t size));
|
||||
|
||||
void ListTooLong(void* ptr, uint32 cl);
|
||||
|
||||
// Releases some number of items from src. Adjusts the list's max_length
|
||||
// to eventually converge on num_objects_to_move(cl).
|
||||
void ListTooLong(FreeList* src, uint32 cl);
|
||||
|
||||
// Releases N items from this thread cache.
|
||||
void ReleaseToCentralCache(FreeList* src, uint32 cl, int N);
|
||||
|
||||
void SetMaxSize(int32 new_max_size);
|
||||
|
||||
// Increase max_size_ by reducing unclaimed_cache_space_ or by
|
||||
// reducing the max_size_ of some other thread. In both cases,
|
||||
// the delta is kStealAmount.
|
||||
void IncreaseCacheLimit();
|
||||
// Same as above but requires Static::pageheap_lock() is held.
|
||||
void IncreaseCacheLimitLocked();
|
||||
|
||||
// If TLS is available, we also store a copy of the per-thread object
|
||||
// in a __thread variable since __thread variables are faster to read
|
||||
// than pthread_getspecific(). We still need pthread_setspecific()
|
||||
// because __thread variables provide no way to run cleanup code when
|
||||
// a thread is destroyed.
|
||||
// We also give a hint to the compiler to use the "initial exec" TLS
|
||||
// model. This is faster than the default TLS model, at the cost that
|
||||
// you cannot dlopen this library. (To see the difference, look at
|
||||
// the CPU use of __tls_get_addr with and without this attribute.)
|
||||
// Since we don't really use dlopen in google code -- and using dlopen
|
||||
// on a malloc replacement is asking for trouble in any case -- that's
|
||||
// a good tradeoff for us.
|
||||
#ifdef HAVE_TLS
|
||||
struct ThreadLocalData {
|
||||
ThreadCache* fast_path_heap;
|
||||
ThreadCache* heap;
|
||||
bool use_emergency_malloc;
|
||||
};
|
||||
static __thread ThreadLocalData threadlocal_data_
|
||||
CACHELINE_ALIGNED ATTR_INITIAL_EXEC;
|
||||
|
||||
#endif
|
||||
|
||||
// Thread-specific key. Initialization here is somewhat tricky
|
||||
// because some Linux startup code invokes malloc() before it
|
||||
// is in a good enough state to handle pthread_keycreate().
|
||||
// Therefore, we use TSD keys only after tsd_inited is set to true.
|
||||
// Until then, we use a slow path to get the heap object.
|
||||
static ATTRIBUTE_HIDDEN bool tsd_inited_;
|
||||
static pthread_key_t heap_key_;
|
||||
|
||||
// Linked list of heap objects. Protected by Static::pageheap_lock.
|
||||
static ThreadCache* thread_heaps_;
|
||||
static int thread_heap_count_;
|
||||
|
||||
// A pointer to one of the objects in thread_heaps_. Represents
|
||||
// the next ThreadCache from which a thread over its max_size_ should
|
||||
// steal memory limit. Round-robin through all of the objects in
|
||||
// thread_heaps_. Protected by Static::pageheap_lock.
|
||||
static ThreadCache* next_memory_steal_;
|
||||
|
||||
// Overall thread cache size. Protected by Static::pageheap_lock.
|
||||
static size_t overall_thread_cache_size_;
|
||||
|
||||
// Global per-thread cache size. Writes are protected by
|
||||
// Static::pageheap_lock. Reads are done without any locking, which should be
|
||||
// fine as long as size_t can be written atomically and we don't place
|
||||
// invariants between this variable and other pieces of state.
|
||||
static volatile size_t per_thread_cache_size_;
|
||||
|
||||
// Represents overall_thread_cache_size_ minus the sum of max_size_
|
||||
// across all ThreadCaches. Protected by Static::pageheap_lock.
|
||||
static ssize_t unclaimed_cache_space_;
|
||||
|
||||
// This class is laid out with the most frequently used fields
|
||||
// first so that hot elements are placed on the same cache line.
|
||||
|
||||
FreeList list_[kClassSizesMax]; // Array indexed by size-class
|
||||
|
||||
int32 size_; // Combined size of data
|
||||
int32 max_size_; // size_ > max_size_ --> Scavenge()
|
||||
|
||||
// We sample allocations, biased by the size of the allocation
|
||||
Sampler sampler_; // A sampler
|
||||
|
||||
pthread_t tid_; // Which thread owns it
|
||||
bool in_setspecific_; // In call to pthread_setspecific?
|
||||
|
||||
// Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
|
||||
static ThreadCache* NewHeap(pthread_t tid);
|
||||
|
||||
// Use only as pthread thread-specific destructor function.
|
||||
static void DestroyThreadCache(void* ptr);
|
||||
|
||||
static void DeleteCache(ThreadCache* heap);
|
||||
static void RecomputePerThreadCacheSize();
|
||||
|
||||
public:
|
||||
|
||||
// All ThreadCache objects are kept in a linked list (for stats collection)
|
||||
ThreadCache* next_;
|
||||
ThreadCache* prev_;
|
||||
|
||||
// Ensure that this class is cacheline-aligned. This is critical for
|
||||
// performance, as false sharing would negate many of the benefits
|
||||
// of a per-thread cache.
|
||||
} CACHELINE_ALIGNED;
|
||||
|
||||
// Allocator for thread heaps
|
||||
// This is logically part of the ThreadCache class, but MSVC, at
|
||||
// least, does not like using ThreadCache as a template argument
|
||||
// before the class is fully defined. So we put it outside the class.
|
||||
extern PageHeapAllocator<ThreadCache> threadcache_allocator;
|
||||
|
||||
inline int ThreadCache::HeapsInUse() {
|
||||
return threadcache_allocator.inuse();
|
||||
}
|
||||
|
||||
inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(
|
||||
size_t size, uint32 cl, void *(*oom_handler)(size_t size)) {
|
||||
FreeList* list = &list_[cl];
|
||||
|
||||
#ifdef NO_TCMALLOC_SAMPLES
|
||||
size = list->object_size();
|
||||
#endif
|
||||
|
||||
ASSERT(size <= kMaxSize);
|
||||
ASSERT(size != 0);
|
||||
ASSERT(size == 0 || size == Static::sizemap()->ByteSizeForClass(cl));
|
||||
|
||||
void* rv;
|
||||
if (!list->TryPop(&rv)) {
|
||||
return FetchFromCentralCache(cl, size, oom_handler);
|
||||
}
|
||||
size_ -= size;
|
||||
return rv;
|
||||
}
|
||||
|
||||
inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, uint32 cl) {
|
||||
ASSERT(list_[cl].max_length() > 0);
|
||||
FreeList* list = &list_[cl];
|
||||
|
||||
// This catches back-to-back frees of allocs in the same size
|
||||
// class. A more comprehensive (and expensive) test would be to walk
|
||||
// the entire freelist. But this might be enough to find some bugs.
|
||||
ASSERT(ptr != list->Next());
|
||||
|
||||
uint32_t length = list->Push(ptr);
|
||||
|
||||
if (PREDICT_FALSE(length > list->max_length())) {
|
||||
ListTooLong(list, cl);
|
||||
return;
|
||||
}
|
||||
|
||||
size_ += list->object_size();
|
||||
if (PREDICT_FALSE(size_ > max_size_)){
|
||||
Scavenge();
|
||||
}
|
||||
}
|
||||
|
||||
inline ThreadCache* ThreadCache::GetThreadHeap() {
|
||||
#ifdef HAVE_TLS
|
||||
return threadlocal_data_.heap;
|
||||
#else
|
||||
return reinterpret_cast<ThreadCache *>(
|
||||
perftools_pthread_getspecific(heap_key_));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() {
|
||||
#ifdef HAVE_TLS
|
||||
ASSERT(threadlocal_data_.heap);
|
||||
return threadlocal_data_.heap;
|
||||
#else
|
||||
ASSERT(perftools_pthread_getspecific(heap_key_));
|
||||
return reinterpret_cast<ThreadCache *>(
|
||||
perftools_pthread_getspecific(heap_key_));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline ThreadCache* ThreadCache::GetCache() {
|
||||
#ifdef HAVE_TLS
|
||||
ThreadCache* ptr = GetThreadHeap();
|
||||
#else
|
||||
ThreadCache* ptr = NULL;
|
||||
if (PREDICT_TRUE(tsd_inited_)) {
|
||||
ptr = GetThreadHeap();
|
||||
}
|
||||
#endif
|
||||
if (ptr == NULL) ptr = CreateCacheIfNecessary();
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// In deletion paths, we do not try to create a thread-cache. This is
|
||||
// because we may be in the thread destruction code and may have
|
||||
// already cleaned up the cache for this thread.
|
||||
inline ThreadCache* ThreadCache::GetCacheIfPresent() {
|
||||
#ifndef HAVE_TLS
|
||||
if (PREDICT_FALSE(!tsd_inited_)) return NULL;
|
||||
#endif
|
||||
return GetThreadHeap();
|
||||
}
|
||||
|
||||
inline ThreadCache* ThreadCache::GetFastPathCache() {
|
||||
#ifndef HAVE_TLS
|
||||
return GetCacheIfPresent();
|
||||
#else
|
||||
return threadlocal_data_.fast_path_heap;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void ThreadCache::SetUseEmergencyMalloc() {
|
||||
#ifdef HAVE_TLS
|
||||
threadlocal_data_.fast_path_heap = NULL;
|
||||
threadlocal_data_.use_emergency_malloc = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void ThreadCache::ResetUseEmergencyMalloc() {
|
||||
#ifdef HAVE_TLS
|
||||
ThreadCache *heap = threadlocal_data_.heap;
|
||||
threadlocal_data_.fast_path_heap = heap;
|
||||
threadlocal_data_.use_emergency_malloc = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool ThreadCache::IsUseEmergencyMalloc() {
|
||||
#if defined(HAVE_TLS) && defined(ENABLE_EMERGENCY_MALLOC)
|
||||
return PREDICT_FALSE(threadlocal_data_.use_emergency_malloc);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void ThreadCache::SetMaxSize(int32 new_max_size) {
|
||||
max_size_ = new_max_size;
|
||||
}
|
||||
|
||||
#ifndef NO_TCMALLOC_SAMPLES
|
||||
|
||||
inline bool ThreadCache::SampleAllocation(size_t k) {
|
||||
return !sampler_.RecordAllocation(k);
|
||||
}
|
||||
|
||||
inline bool ThreadCache::TryRecordAllocationFast(size_t k) {
|
||||
return sampler_.TryRecordAllocationFast(k);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline bool ThreadCache::SampleAllocation(size_t k) {
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool ThreadCache::TryRecordAllocationFast(size_t k) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace tcmalloc
|
||||
|
||||
#endif // TCMALLOC_THREAD_CACHE_H_
|
Loading…
Add table
Add a link
Reference in a new issue