548 lines
24 KiB
OCaml
548 lines
24 KiB
OCaml
(**************************************************************************)
|
|
(* *)
|
|
(* OCaml *)
|
|
(* *)
|
|
(* Damien Doligez, projet Para, INRIA Rocquencourt *)
|
|
(* Jacques-Henri Jourdan, projet Gallium, INRIA Paris *)
|
|
(* *)
|
|
(* Copyright 1996-2016 Institut National de Recherche en Informatique *)
|
|
(* et en Automatique. *)
|
|
(* *)
|
|
(* All rights reserved. This file is distributed under the terms of *)
|
|
(* the GNU Lesser General Public License version 2.1, with the *)
|
|
(* special exception on linking described in the file LICENSE. *)
|
|
(* *)
|
|
(**************************************************************************)
|
|
|
|
(** Memory management control and statistics; finalised values. *)
|
|
|
|
type stat =
|
|
{ minor_words : float;
|
|
(** Number of words allocated in the minor heap since
|
|
the program was started. *)
|
|
|
|
promoted_words : float;
|
|
(** Number of words allocated in the minor heap that
|
|
survived a minor collection and were moved to the major heap
|
|
since the program was started. *)
|
|
|
|
major_words : float;
|
|
(** Number of words allocated in the major heap, including
|
|
the promoted words, since the program was started. *)
|
|
|
|
minor_collections : int;
|
|
(** Number of minor collections since the program was started. *)
|
|
|
|
major_collections : int;
|
|
(** Number of major collection cycles completed since the program
|
|
was started. *)
|
|
|
|
heap_words : int;
|
|
(** Total size of the major heap, in words. *)
|
|
|
|
heap_chunks : int;
|
|
(** Number of contiguous pieces of memory that make up the major heap. *)
|
|
|
|
live_words : int;
|
|
(** Number of words of live data in the major heap, including the header
|
|
words. *)
|
|
|
|
live_blocks : int;
|
|
(** Number of live blocks in the major heap. *)
|
|
|
|
free_words : int;
|
|
(** Number of words in the free list. *)
|
|
|
|
free_blocks : int;
|
|
(** Number of blocks in the free list. *)
|
|
|
|
largest_free : int;
|
|
(** Size (in words) of the largest block in the free list. *)
|
|
|
|
fragments : int;
|
|
(** Number of wasted words due to fragmentation. These are
|
|
1-words free blocks placed between two live blocks. They
|
|
are not available for allocation. *)
|
|
|
|
compactions : int;
|
|
(** Number of heap compactions since the program was started. *)
|
|
|
|
top_heap_words : int;
|
|
(** Maximum size reached by the major heap, in words. *)
|
|
|
|
stack_size: int;
|
|
(** Current size of the stack, in words. @since 3.12.0 *)
|
|
|
|
forced_major_collections: int;
|
|
(** Number of forced full major collections completed since the program
|
|
was started. @since 4.12.0 *)
|
|
}
|
|
(** The memory management counters are returned in a [stat] record.
|
|
|
|
The total amount of memory allocated by the program since it was started
|
|
is (in words) [minor_words + major_words - promoted_words]. Multiply by
|
|
the word size (4 on a 32-bit machine, 8 on a 64-bit machine) to get
|
|
the number of bytes.
|
|
*)
|
|
|
|
type control =
|
|
{ mutable minor_heap_size : int;
|
|
[@ocaml.deprecated_mutable "Use {(Gc.get()) with Gc.minor_heap_size = ...}"]
|
|
(** The size (in words) of the minor heap. Changing
|
|
this parameter will trigger a minor collection. Default: 256k. *)
|
|
|
|
mutable major_heap_increment : int;
|
|
[@ocaml.deprecated_mutable
|
|
"Use {(Gc.get()) with Gc.major_heap_increment = ...}"]
|
|
(** How much to add to the major heap when increasing it. If this
|
|
number is less than or equal to 1000, it is a percentage of
|
|
the current heap size (i.e. setting it to 100 will double the heap
|
|
size at each increase). If it is more than 1000, it is a fixed
|
|
number of words that will be added to the heap. Default: 15. *)
|
|
|
|
mutable space_overhead : int;
|
|
[@ocaml.deprecated_mutable "Use {(Gc.get()) with Gc.space_overhead = ...}"]
|
|
(** The major GC speed is computed from this parameter.
|
|
This is the memory that will be "wasted" because the GC does not
|
|
immediately collect unreachable blocks. It is expressed as a
|
|
percentage of the memory used for live data.
|
|
The GC will work more (use more CPU time and collect
|
|
blocks more eagerly) if [space_overhead] is smaller.
|
|
Default: 80. *)
|
|
|
|
mutable verbose : int;
|
|
[@ocaml.deprecated_mutable "Use {(Gc.get()) with Gc.verbose = ...}"]
|
|
(** This value controls the GC messages on standard error output.
|
|
It is a sum of some of the following flags, to print messages
|
|
on the corresponding events:
|
|
- [0x001] Start and end of major GC cycle.
|
|
- [0x002] Minor collection and major GC slice.
|
|
- [0x004] Growing and shrinking of the heap.
|
|
- [0x008] Resizing of stacks and memory manager tables.
|
|
- [0x010] Heap compaction.
|
|
- [0x020] Change of GC parameters.
|
|
- [0x040] Computation of major GC slice size.
|
|
- [0x080] Calling of finalisation functions.
|
|
- [0x100] Bytecode executable and shared library search at start-up.
|
|
- [0x200] Computation of compaction-triggering condition.
|
|
- [0x400] Output GC statistics at program exit.
|
|
Default: 0. *)
|
|
|
|
mutable max_overhead : int;
|
|
[@ocaml.deprecated_mutable "Use {(Gc.get()) with Gc.max_overhead = ...}"]
|
|
(** Heap compaction is triggered when the estimated amount
|
|
of "wasted" memory is more than [max_overhead] percent of the
|
|
amount of live data. If [max_overhead] is set to 0, heap
|
|
compaction is triggered at the end of each major GC cycle
|
|
(this setting is intended for testing purposes only).
|
|
If [max_overhead >= 1000000], compaction is never triggered.
|
|
If compaction is permanently disabled, it is strongly suggested
|
|
to set [allocation_policy] to 2.
|
|
Default: 500. *)
|
|
|
|
mutable stack_limit : int;
|
|
[@ocaml.deprecated_mutable "Use {(Gc.get()) with Gc.stack_limit = ...}"]
|
|
(** The maximum size of the stack (in words). This is only
|
|
relevant to the byte-code runtime, as the native code runtime
|
|
uses the operating system's stack. Default: 1024k. *)
|
|
|
|
mutable allocation_policy : int;
|
|
[@ocaml.deprecated_mutable
|
|
"Use {(Gc.get()) with Gc.allocation_policy = ...}"]
|
|
(** The policy used for allocating in the major heap.
|
|
Possible values are 0, 1 and 2.
|
|
|
|
- 0 is the next-fit policy, which is usually fast but can
|
|
result in fragmentation, increasing memory consumption.
|
|
|
|
- 1 is the first-fit policy, which avoids fragmentation but
|
|
has corner cases (in certain realistic workloads) where it
|
|
is sensibly slower.
|
|
|
|
- 2 is the best-fit policy, which is fast and avoids
|
|
fragmentation. In our experiments it is faster and uses less
|
|
memory than both next-fit and first-fit.
|
|
(since OCaml 4.10)
|
|
|
|
The current default is next-fit, as the best-fit policy is new
|
|
and not yet widely tested. We expect best-fit to become the
|
|
default in the future.
|
|
|
|
On one example that was known to be bad for next-fit and first-fit,
|
|
next-fit takes 28s using 855Mio of memory,
|
|
first-fit takes 47s using 566Mio of memory,
|
|
best-fit takes 27s using 545Mio of memory.
|
|
|
|
Note: When changing to a low-fragmentation policy, you may
|
|
need to augment the [space_overhead] setting, for example
|
|
using [100] instead of the default [80] which is tuned for
|
|
next-fit. Indeed, the difference in fragmentation behavior
|
|
means that different policies will have different proportion
|
|
of "wasted space" for a given program. Less fragmentation
|
|
means a smaller heap so, for the same amount of wasted space,
|
|
a higher proportion of wasted space. This makes the GC work
|
|
harder, unless you relax it by increasing [space_overhead].
|
|
|
|
Note: changing the allocation policy at run-time forces
|
|
a heap compaction, which is a lengthy operation unless the
|
|
heap is small (e.g. at the start of the program).
|
|
|
|
Default: 0.
|
|
|
|
@since 3.11.0 *)
|
|
|
|
window_size : int;
|
|
(** The size of the window used by the major GC for smoothing
|
|
out variations in its workload. This is an integer between
|
|
1 and 50.
|
|
Default: 1. @since 4.03.0 *)
|
|
|
|
custom_major_ratio : int;
|
|
(** Target ratio of floating garbage to major heap size for
|
|
out-of-heap memory held by custom values located in the major
|
|
heap. The GC speed is adjusted to try to use this much memory
|
|
for dead values that are not yet collected. Expressed as a
|
|
percentage of major heap size. The default value keeps the
|
|
out-of-heap floating garbage about the same size as the
|
|
in-heap overhead.
|
|
Note: this only applies to values allocated with
|
|
[caml_alloc_custom_mem] (e.g. bigarrays).
|
|
Default: 44.
|
|
@since 4.08.0 *)
|
|
|
|
custom_minor_ratio : int;
|
|
(** Bound on floating garbage for out-of-heap memory held by
|
|
custom values in the minor heap. A minor GC is triggered when
|
|
this much memory is held by custom values located in the minor
|
|
heap. Expressed as a percentage of minor heap size.
|
|
Note: this only applies to values allocated with
|
|
[caml_alloc_custom_mem] (e.g. bigarrays).
|
|
Default: 100.
|
|
@since 4.08.0 *)
|
|
|
|
custom_minor_max_size : int;
|
|
(** Maximum amount of out-of-heap memory for each custom value
|
|
allocated in the minor heap. When a custom value is allocated
|
|
on the minor heap and holds more than this many bytes, only
|
|
this value is counted against [custom_minor_ratio] and the
|
|
rest is directly counted against [custom_major_ratio].
|
|
Note: this only applies to values allocated with
|
|
[caml_alloc_custom_mem] (e.g. bigarrays).
|
|
Default: 8192 bytes.
|
|
@since 4.08.0 *)
|
|
}
|
|
(** The GC parameters are given as a [control] record. Note that
|
|
these parameters can also be initialised by setting the
|
|
OCAMLRUNPARAM environment variable. See the documentation of
|
|
[ocamlrun]. *)
|
|
|
|
external stat : unit -> stat = "caml_gc_stat"
|
|
(** Return the current values of the memory management counters in a
|
|
[stat] record. This function examines every heap block to get the
|
|
statistics. *)
|
|
|
|
external quick_stat : unit -> stat = "caml_gc_quick_stat"
|
|
(** Same as [stat] except that [live_words], [live_blocks], [free_words],
|
|
[free_blocks], [largest_free], and [fragments] are set to 0. This
|
|
function is much faster than [stat] because it does not need to go
|
|
through the heap. *)
|
|
|
|
external counters : unit -> float * float * float = "caml_gc_counters"
|
|
(** Return [(minor_words, promoted_words, major_words)]. This function
|
|
is as fast as [quick_stat]. *)
|
|
|
|
external minor_words : unit -> (float [@unboxed])
|
|
= "caml_gc_minor_words" "caml_gc_minor_words_unboxed"
|
|
(** Number of words allocated in the minor heap since the program was
|
|
started. This number is accurate in byte-code programs, but only an
|
|
approximation in programs compiled to native code.
|
|
|
|
In native code this function does not allocate.
|
|
|
|
@since 4.04 *)
|
|
|
|
external get : unit -> control = "caml_gc_get"
|
|
(** Return the current values of the GC parameters in a [control] record. *)
|
|
|
|
external set : control -> unit = "caml_gc_set"
|
|
(** [set r] changes the GC parameters according to the [control] record [r].
|
|
The normal usage is: [Gc.set { (Gc.get()) with Gc.verbose = 0x00d }] *)
|
|
|
|
external minor : unit -> unit = "caml_gc_minor"
|
|
(** Trigger a minor collection. *)
|
|
|
|
external major_slice : int -> int = "caml_gc_major_slice"
|
|
(** [major_slice n]
|
|
Do a minor collection and a slice of major collection. [n] is the
|
|
size of the slice: the GC will do enough work to free (on average)
|
|
[n] words of memory. If [n] = 0, the GC will try to do enough work
|
|
to ensure that the next automatic slice has no work to do.
|
|
This function returns an unspecified integer (currently: 0). *)
|
|
|
|
external major : unit -> unit = "caml_gc_major"
|
|
(** Do a minor collection and finish the current major collection cycle. *)
|
|
|
|
external full_major : unit -> unit = "caml_gc_full_major"
|
|
(** Do a minor collection, finish the current major collection cycle,
|
|
and perform a complete new cycle. This will collect all currently
|
|
unreachable blocks. *)
|
|
|
|
external compact : unit -> unit = "caml_gc_compaction"
|
|
(** Perform a full major collection and compact the heap. Note that heap
|
|
compaction is a lengthy operation. *)
|
|
|
|
val print_stat : out_channel -> unit
|
|
(** Print the current values of the memory management counters (in
|
|
human-readable form) into the channel argument. *)
|
|
|
|
val allocated_bytes : unit -> float
|
|
(** Return the total number of bytes allocated since the program was
|
|
started. It is returned as a [float] to avoid overflow problems
|
|
with [int] on 32-bit machines. *)
|
|
|
|
external get_minor_free : unit -> int = "caml_get_minor_free"
|
|
(** Return the current size of the free space inside the minor heap.
|
|
|
|
@since 4.03.0 *)
|
|
|
|
external get_bucket : int -> int = "caml_get_major_bucket" [@@noalloc]
|
|
(** [get_bucket n] returns the current size of the [n]-th future bucket
|
|
of the GC smoothing system. The unit is one millionth of a full GC.
|
|
@raise Invalid_argument if [n] is negative, return 0 if n is larger
|
|
than the smoothing window.
|
|
|
|
@since 4.03.0 *)
|
|
|
|
external get_credit : unit -> int = "caml_get_major_credit" [@@noalloc]
|
|
(** [get_credit ()] returns the current size of the "work done in advance"
|
|
counter of the GC smoothing system. The unit is one millionth of a
|
|
full GC.
|
|
|
|
@since 4.03.0 *)
|
|
|
|
external huge_fallback_count : unit -> int = "caml_gc_huge_fallback_count"
|
|
(** Return the number of times we tried to map huge pages and had to fall
|
|
back to small pages. This is always 0 if [OCAMLRUNPARAM] contains [H=1].
|
|
@since 4.03.0 *)
|
|
|
|
val finalise : ('a -> unit) -> 'a -> unit
|
|
(** [finalise f v] registers [f] as a finalisation function for [v].
|
|
[v] must be heap-allocated. [f] will be called with [v] as
|
|
argument at some point between the first time [v] becomes unreachable
|
|
(including through weak pointers) and the time [v] is collected by
|
|
the GC. Several functions can
|
|
be registered for the same value, or even several instances of the
|
|
same function. Each instance will be called once (or never,
|
|
if the program terminates before [v] becomes unreachable).
|
|
|
|
The GC will call the finalisation functions in the order of
|
|
deallocation. When several values become unreachable at the
|
|
same time (i.e. during the same GC cycle), the finalisation
|
|
functions will be called in the reverse order of the corresponding
|
|
calls to [finalise]. If [finalise] is called in the same order
|
|
as the values are allocated, that means each value is finalised
|
|
before the values it depends upon. Of course, this becomes
|
|
false if additional dependencies are introduced by assignments.
|
|
|
|
In the presence of multiple OCaml threads it should be assumed that
|
|
any particular finaliser may be executed in any of the threads.
|
|
|
|
Anything reachable from the closure of finalisation functions
|
|
is considered reachable, so the following code will not work
|
|
as expected:
|
|
- [ let v = ... in Gc.finalise (fun _ -> ...v...) v ]
|
|
|
|
Instead you should make sure that [v] is not in the closure of
|
|
the finalisation function by writing:
|
|
- [ let f = fun x -> ... let v = ... in Gc.finalise f v ]
|
|
|
|
|
|
The [f] function can use all features of OCaml, including
|
|
assignments that make the value reachable again. It can also
|
|
loop forever (in this case, the other
|
|
finalisation functions will not be called during the execution of f,
|
|
unless it calls [finalise_release]).
|
|
It can call [finalise] on [v] or other values to register other
|
|
functions or even itself. It can raise an exception; in this case
|
|
the exception will interrupt whatever the program was doing when
|
|
the function was called.
|
|
|
|
|
|
[finalise] will raise [Invalid_argument] if [v] is not
|
|
guaranteed to be heap-allocated. Some examples of values that are not
|
|
heap-allocated are integers, constant constructors, booleans,
|
|
the empty array, the empty list, the unit value. The exact list
|
|
of what is heap-allocated or not is implementation-dependent.
|
|
Some constant values can be heap-allocated but never deallocated
|
|
during the lifetime of the program, for example a list of integer
|
|
constants; this is also implementation-dependent.
|
|
Note that values of types [float] are sometimes allocated and
|
|
sometimes not, so finalising them is unsafe, and [finalise] will
|
|
also raise [Invalid_argument] for them. Values of type ['a Lazy.t]
|
|
(for any ['a]) are like [float] in this respect, except that the
|
|
compiler sometimes optimizes them in a way that prevents [finalise]
|
|
from detecting them. In this case, it will not raise
|
|
[Invalid_argument], but you should still avoid calling [finalise]
|
|
on lazy values.
|
|
|
|
|
|
The results of calling {!String.make}, {!Bytes.make}, {!Bytes.create},
|
|
{!Array.make}, and {!Stdlib.ref} are guaranteed to be
|
|
heap-allocated and non-constant except when the length argument is [0].
|
|
*)
|
|
|
|
val finalise_last : (unit -> unit) -> 'a -> unit
|
|
(** same as {!finalise} except the value is not given as argument. So
|
|
you can't use the given value for the computation of the
|
|
finalisation function. The benefit is that the function is called
|
|
after the value is unreachable for the last time instead of the
|
|
first time. So contrary to {!finalise} the value will never be
|
|
reachable again or used again. In particular every weak pointer
|
|
and ephemeron that contained this value as key or data is unset
|
|
before running the finalisation function. Moreover the finalisation
|
|
functions attached with {!finalise} are always called before the
|
|
finalisation functions attached with {!finalise_last}.
|
|
|
|
@since 4.04
|
|
*)
|
|
|
|
val finalise_release : unit -> unit
|
|
(** A finalisation function may call [finalise_release] to tell the
|
|
GC that it can launch the next finalisation function without waiting
|
|
for the current one to return. *)
|
|
|
|
type alarm
|
|
(** An alarm is a piece of data that calls a user function at the end of
|
|
each major GC cycle. The following functions are provided to create
|
|
and delete alarms. *)
|
|
|
|
val create_alarm : (unit -> unit) -> alarm
|
|
(** [create_alarm f] will arrange for [f] to be called at the end of each
|
|
major GC cycle, starting with the current cycle or the next one.
|
|
A value of type [alarm] is returned that you can
|
|
use to call [delete_alarm]. *)
|
|
|
|
val delete_alarm : alarm -> unit
|
|
(** [delete_alarm a] will stop the calls to the function associated
|
|
to [a]. Calling [delete_alarm a] again has no effect. *)
|
|
|
|
external eventlog_pause : unit -> unit = "caml_eventlog_pause"
|
|
(** [eventlog_pause ()] will pause the collection of traces in the
|
|
runtime.
|
|
Traces are collected if the program is linked to the instrumented runtime
|
|
and started with the environment variable OCAML_EVENTLOG_ENABLED.
|
|
Events are flushed to disk after pausing, and no new events will be
|
|
recorded until [eventlog_resume] is called. *)
|
|
|
|
external eventlog_resume : unit -> unit = "caml_eventlog_resume"
|
|
(** [eventlog_resume ()] will resume the collection of traces in the
|
|
runtime.
|
|
Traces are collected if the program is linked to the instrumented runtime
|
|
and started with the environment variable OCAML_EVENTLOG_ENABLED.
|
|
This call can be used after calling [eventlog_pause], or if the program
|
|
was started with OCAML_EVENTLOG_ENABLED=p. (which pauses the collection of
|
|
traces before the first event.) *)
|
|
|
|
|
|
(** [Memprof] is a sampling engine for allocated memory words. Every
|
|
allocated word has a probability of being sampled equal to a
|
|
configurable sampling rate. Once a block is sampled, it becomes
|
|
tracked. A tracked block triggers a user-defined callback as soon
|
|
as it is allocated, promoted or deallocated.
|
|
|
|
Since blocks are composed of several words, a block can potentially
|
|
be sampled several times. If a block is sampled several times, then
|
|
each of the callback is called once for each event of this block:
|
|
the multiplicity is given in the [n_samples] field of the
|
|
[allocation] structure.
|
|
|
|
This engine makes it possible to implement a low-overhead memory
|
|
profiler as an OCaml library.
|
|
|
|
Note: this API is EXPERIMENTAL. It may change without prior
|
|
notice. *)
|
|
module Memprof :
|
|
sig
|
|
type allocation = private
|
|
{ n_samples : int;
|
|
(** The number of samples in this block (>= 1). *)
|
|
|
|
size : int;
|
|
(** The size of the block, in words, excluding the header. *)
|
|
|
|
unmarshalled : bool;
|
|
(** Whether the block comes from unmarshalling. *)
|
|
|
|
callstack : Printexc.raw_backtrace
|
|
(** The callstack for the allocation. *)
|
|
}
|
|
(** The type of metadata associated with allocations. This is the
|
|
type of records passed to the callback triggered by the
|
|
sampling of an allocation. *)
|
|
|
|
type ('minor, 'major) tracker = {
|
|
alloc_minor: allocation -> 'minor option;
|
|
alloc_major: allocation -> 'major option;
|
|
promote: 'minor -> 'major option;
|
|
dealloc_minor: 'minor -> unit;
|
|
dealloc_major: 'major -> unit;
|
|
}
|
|
(**
|
|
A [('minor, 'major) tracker] describes how memprof should track
|
|
sampled blocks over their lifetime, keeping a user-defined piece
|
|
of metadata for each of them: ['minor] is the type of metadata
|
|
to keep for minor blocks, and ['major] the type of metadata
|
|
for major blocks.
|
|
|
|
If an allocation-tracking or promotion-tracking function returns [None],
|
|
memprof stops tracking the corresponding value.
|
|
*)
|
|
|
|
val null_tracker: ('minor, 'major) tracker
|
|
(** Default callbacks simply return [None] or [()] *)
|
|
|
|
val start :
|
|
sampling_rate:float ->
|
|
?callstack_size:int ->
|
|
('minor, 'major) tracker ->
|
|
unit
|
|
(** Start the sampling with the given parameters. Fails if
|
|
sampling is already active.
|
|
|
|
The parameter [sampling_rate] is the sampling rate in samples
|
|
per word (including headers). Usually, with cheap callbacks, a
|
|
rate of 1e-4 has no visible effect on performance, and 1e-3
|
|
causes the program to run a few percent slower
|
|
|
|
The parameter [callstack_size] is the length of the callstack
|
|
recorded at every sample. Its default is [max_int].
|
|
|
|
The parameter [tracker] determines how to track sampled blocks
|
|
over their lifetime in the minor and major heap.
|
|
|
|
Sampling is temporarily disabled when calling a callback
|
|
for the current thread. So they do not need to be reentrant if
|
|
the program is single-threaded. However, if threads are used,
|
|
it is possible that a context switch occurs during a callback,
|
|
in this case the callback functions must be reentrant.
|
|
|
|
Note that the callback can be postponed slightly after the
|
|
actual event. The callstack passed to the callback is always
|
|
accurate, but the program state may have evolved.
|
|
|
|
Calling [Thread.exit] in a callback is currently unsafe and can
|
|
result in undefined behavior. *)
|
|
|
|
val stop : unit -> unit
|
|
(** Stop the sampling. Fails if sampling is not active.
|
|
|
|
This function does not allocate memory, but tries to run the
|
|
postponed callbacks for already allocated memory blocks (of
|
|
course, these callbacks may allocate).
|
|
|
|
All the already tracked blocks are discarded.
|
|
|
|
Calling [stop] when a callback is running can lead to
|
|
callbacks not being called even though some events happened. *)
|
|
end
|